sqlConnString <- "Driver=SQL Server;Server=H2; Database=AdventureWorksDW; Trusted_Connection=true" sqlBikeBuyerView <- "vTargetMail" sqlRowsPerRead = 5000 sqlBikeBuyerDS <- RxSqlServerData(connectionString = sqlConnString, table = sqlBikeBuyerView, rowsPerRead = sqlRowsPerRead) # Set ComputeContext to run agggregation and compute operations on the server. # Needs a temp directory path to serialize R objects back and forth sqlShareDir <- paste("C:\\AllShare\\",Sys.getenv("USERNAME"),sep="") sqlWait <- TRUE sqlConsoleOutput <- FALSE cc <- RxInSqlServer(connectionString = sqlConnString, shareDir = sqlShareDir, wait = sqlWait, consoleOutput = sqlConsoleOutput) rxSetComputeContext(cc) # View available R functions in RevoScaleR package help(RevoScaleR) # Explore data rxGetVarInfo(data = sqlBikeBuyerDS) # rxSummary equires you to enable external scripts # sp_configure 'external scripts enabled', 1; # RECONFIGURE; # restart SQL Server rxSummary(~., data = sqlBikeBuyerDS) # change DS definition to set strings as factors sqlBikeBuyerDS <- RxSqlServerData(connectionString = sqlConnString, table = sqlBikeBuyerView, rowsPerRead = sqlRowsPerRead, stringsAsFactors = TRUE) rxSummary(~., data = sqlBikeBuyerDS) sqlBikeBuyerquery <- "SELECT [CustomerKey] ,[MaritalStatus] ,[Gender] ,[YearlyIncome] ,[TotalChildren] ,[NumberChildrenAtHome] ,[EnglishEducation] ,[EnglishOccupation] ,[HouseOwnerFlag] ,[NumberCarsOwned] ,[CommuteDistance] ,[Region] ,[Age] ,[BikeBuyer] FROM [vTargetMail]" sqlBikeBuyerDS <- RxSqlServerData(sqlQuery = sqlBikeBuyerquery, connectionString = sqlConnString, stringsAsFactors = TRUE, rowsPerRead=sqlRowsPerRead) # ~. means compute statistics for all non-character variables rxSummary(~., data = sqlBikeBuyerDS) # can also specify a single variable rxSummary(~ Region, data = sqlBikeBuyerDS) # Or cross-join rxSummary(~ Region : CommuteDistance, data = sqlBikeBuyerDS) rxHistogram(~Age, data = sqlBikeBuyerDS, title = "Age Histogram", numBreaks=10) # install ggplot2 install.packages("ggplot2")