A supermarket is offering a new line of organic products. The supermarket’s management wants to determine which customers are likely to purchase these products. The supermarket has a loyalty program (think Kroger Card). As an initial buyer incentive plan, the supermarket provided coupons for the organic products to all of the loyalty program participants and collected data that includes whether these customers purchased any of the organics products.

The target variable is TargetBuy which indicates if an organics purchase was made or not.

Data Pre-Processing and Summary

data$ID<-as.factor(data$ID)
data<-dummy_columns(data, select_columns = c("DemClusterGroup","DemGender", "DemReg", "DemTVReg", "PromClass"), remove_first_dummy = TRUE, remove_selected_columns = TRUE)
data.summary(data)
##                               mean           sd   min      max median length
## DemAffl               8.774307e+00    3.4787618  0.00    34.00      8   3000
## DemAge                5.346187e+01   13.3625093 18.00    79.00     53   3000
## PromSpend             4.444894e+03 7176.3483976  0.01 81551.03   2000   3000
## PromTime              6.565980e+00    4.6870982  0.00    39.00      5   3000
## TargetBuy             2.556667e-01    0.4363080  0.00     1.00      0   3000
## TargetAmt             3.080000e-01    0.5790058  0.00     3.00      0   3000
## DemClusterGroup_B     1.813333e-01    0.3853583  0.00     1.00      0   3000
## DemClusterGroup_C     2.043333e-01    0.4032808  0.00     1.00      0   3000
## DemClusterGroup_D     1.883333e-01    0.3910433  0.00     1.00      0   3000
## DemClusterGroup_E     1.266667e-01    0.3326546  0.00     1.00      0   3000
## DemClusterGroup_F     1.790000e-01    0.3834162  0.00     1.00      0   3000
## DemClusterGroup_U     3.500000e-02    0.1838104  0.00     1.00      0   3000
## DemGender_M           2.670000e-01    0.4424661  0.00     1.00      0   3000
## DemGender_U           1.803333e-01    0.3845289  0.00     1.00      0   3000
## DemReg_North          1.980000e-01    0.3985586  0.00     1.00      0   3000
## DemReg_Scottish       6.400000e-02    0.2447937  0.00     1.00      0   3000
## DemReg_South East     3.910000e-01    0.4880557  0.00     1.00      0   3000
## DemReg_South West     2.866667e-02    0.1668957  0.00     1.00      0   3000
## DemReg_U              1.700000e-02    0.1292926  0.00     1.00      0   3000
## DemTVReg_C Scotland   3.800000e-02    0.1912281  0.00     1.00      0   3000
## DemTVReg_East         7.766667e-02    0.2676909  0.00     1.00      0   3000
## DemTVReg_London       2.846667e-01    0.4513308  0.00     1.00      0   3000
## DemTVReg_Midlands     1.316667e-01    0.3381844  0.00     1.00      0   3000
## DemTVReg_N East       3.933333e-02    0.1944192  0.00     1.00      0   3000
## DemTVReg_N Scot       1.933333e-02    0.1377167  0.00     1.00      0   3000
## DemTVReg_N West       9.233333e-02    0.2895442  0.00     1.00      0   3000
## DemTVReg_S & S East   1.063333e-01    0.3083152  0.00     1.00      0   3000
## DemTVReg_S West       2.866667e-02    0.1668957  0.00     1.00      0   3000
## DemTVReg_U            1.700000e-02    0.1292926  0.00     1.00      0   3000
## DemTVReg_Ulster       1.433333e-02    0.1188806  0.00     1.00      0   3000
## DemTVReg_Wales & West 7.766667e-02    0.2676909  0.00     1.00      0   3000
## DemTVReg_Yorkshire    6.633333e-02    0.2489054  0.00     1.00      0   3000
## PromClass_Platinum    3.766667e-02    0.1904205  0.00     1.00      0   3000
## PromClass_Silver      3.836667e-01    0.4863593  0.00     1.00      0   3000
## PromClass_Tin         2.976667e-01    0.4573084  0.00     1.00      0   3000
##                       missing
## DemAffl                   151
## DemAge                    194
## PromSpend                   0
## PromTime                   37
## TargetBuy                   0
## TargetAmt                   0
## DemClusterGroup_B           0
## DemClusterGroup_C           0
## DemClusterGroup_D           0
## DemClusterGroup_E           0
## DemClusterGroup_F           0
## DemClusterGroup_U           0
## DemGender_M                 0
## DemGender_U                 0
## DemReg_North                0
## DemReg_Scottish             0
## DemReg_South East           0
## DemReg_South West           0
## DemReg_U                    0
## DemTVReg_C Scotland         0
## DemTVReg_East               0
## DemTVReg_London             0
## DemTVReg_Midlands           0
## DemTVReg_N East             0
## DemTVReg_N Scot             0
## DemTVReg_N West             0
## DemTVReg_S & S East         0
## DemTVReg_S West             0
## DemTVReg_U                  0
## DemTVReg_Ulster             0
## DemTVReg_Wales & West       0
## DemTVReg_Yorkshire          0
## PromClass_Platinum          0
## PromClass_Silver            0
## PromClass_Tin               0
##    levels mode missing
## ID   3000  140       0
data$TargetBuy<-as.factor(data$TargetBuy)
data$TargetBuy<-recode_factor(data$TargetBuy, "1"="Yes", "0"="No")
prop.table(table(data$TargetBuy))
## 
##       Yes        No 
## 0.2556667 0.7443333

Imputations were performed, but the code is not shown.

Data Partition

set.seed(13)
trainIndex<-createDataPartition(data$TargetBuy, p=0.8, list=FALSE, times=1)

training<-data[trainIndex,]
valid<-data[-trainIndex,]

Logistic Model OU

ctrl<-trainControl(method="none", summaryFunction = twoClassSummary, classProbs = TRUE, savePredictions = TRUE)
lrfull<-train(TargetBuy~., data=training, method="glm", family="binomial", metric="ROC", trControl=ctrl)

Logistic Model MU

lrstep<-train(TargetBuy~., data=training, method="glmStepAIC", direction="both", metric="ROC", trControl=ctrl, trace=0)
options(scipen = 999)
summary(lrstep)
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.2318  -0.3340   0.4029   0.6769   2.4404  
## 
## Coefficients:
##                              Estimate Std. Error z value             Pr(>|z|)
## (Intercept)                 -0.630534   0.321636  -1.960              0.04995
## DemAffl                     -0.247788   0.018036 -13.739 < 0.0000000000000002
## DemAge                       0.061123   0.004587  13.326 < 0.0000000000000002
## DemClusterGroup_D           -0.259007   0.141890  -1.825              0.06794
## DemClusterGroup_F           -0.221322   0.144724  -1.529              0.12620
## DemGender_M                  1.039327   0.135854   7.650     0.00000000000002
## DemGender_U                  1.715473   0.192255   8.923 < 0.0000000000000002
## DemReg_North                 0.572612   0.196383   2.916              0.00355
## DemReg_Scottish              1.137944   0.422507   2.693              0.00707
## `\\`DemReg_South East\\``    0.436011   0.174594   2.497              0.01251
## `\\`DemTVReg_C Scotland\\`` -0.780339   0.488511  -1.597              0.11018
## DemTVReg_East                0.664367   0.256088   2.594              0.00948
## DemTVReg_Midlands            0.635626   0.214439   2.964              0.00304
## DemTVReg_Ulster              0.696712   0.491658   1.417              0.15646
##                                
## (Intercept)                 *  
## DemAffl                     ***
## DemAge                      ***
## DemClusterGroup_D           .  
## DemClusterGroup_F              
## DemGender_M                 ***
## DemGender_U                 ***
## DemReg_North                ** 
## DemReg_Scottish             ** 
## `\\`DemReg_South East\\``   *  
## `\\`DemTVReg_C Scotland\\``    
## DemTVReg_East               ** 
## DemTVReg_Midlands           ** 
## DemTVReg_Ulster                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2730.1  on 2400  degrees of freedom
## Residual deviance: 2068.3  on 2387  degrees of freedom
## AIC: 2096.3
## 
## Number of Fisher Scoring iterations: 5

D.T. Model OU

class.tree.ou<-rpart(TargetBuy~., data=training, control=rpart.control(maxdepth = 30, minsplit=20, cp=0.001, xval=10), method="class", model=TRUE)
printcp(class.tree.ou)
## 
## Classification tree:
## rpart(formula = TargetBuy ~ ., data = training, method = "class", 
##     model = TRUE, control = rpart.control(maxdepth = 30, minsplit = 20, 
##         cp = 0.001, xval = 10))
## 
## Variables actually used in tree construction:
##  [1] DemAffl               DemAge                DemClusterGroup_B    
##  [4] DemClusterGroup_C     DemGender_M           DemGender_U          
##  [7] DemTVReg_London       DemTVReg_Midlands     DemTVReg_Wales & West
## [10] PromSpend             PromTime             
## 
## Root node error: 614/2401 = 0.25573
## 
## n= 2401 
## 
##           CP nsplit rel error  xerror     xstd
## 1  0.0952769      0   1.00000 1.00000 0.034816
## 2  0.0211726      2   0.80945 0.81107 0.032357
## 3  0.0203583      4   0.76710 0.77850 0.031867
## 4  0.0130293      7   0.68893 0.74430 0.031329
## 5  0.0048860      8   0.67590 0.69544 0.030516
## 6  0.0043431      9   0.67101 0.71173 0.030793
## 7  0.0032573     12   0.65798 0.72150 0.030956
## 8  0.0024430     19   0.63518 0.72638 0.031037
## 9  0.0016287     23   0.62541 0.73290 0.031144
## 10 0.0012215     25   0.62215 0.74104 0.031276
## 11 0.0010858     34   0.60912 0.77850 0.031867
## 12 0.0010000     37   0.60586 0.77850 0.031867
prp(class.tree.ou, type=1, extra=1, split.font = 1, varlen = -10, digits = -3)

D.T. Model MU

class.tree.mu<-prune(class.tree.ou, cp=class.tree.ou$cptable[which.min(class.tree.ou$cptable[,"xerror"]), "CP"])
printcp(class.tree.mu)
## 
## Classification tree:
## rpart(formula = TargetBuy ~ ., data = training, method = "class", 
##     model = TRUE, control = rpart.control(maxdepth = 30, minsplit = 20, 
##         cp = 0.001, xval = 10))
## 
## Variables actually used in tree construction:
## [1] DemAffl           DemAge            DemClusterGroup_B DemGender_M      
## [5] DemGender_U      
## 
## Root node error: 614/2401 = 0.25573
## 
## n= 2401 
## 
##         CP nsplit rel error  xerror     xstd
## 1 0.095277      0   1.00000 1.00000 0.034816
## 2 0.021173      2   0.80945 0.81107 0.032357
## 3 0.020358      4   0.76710 0.77850 0.031867
## 4 0.013029      7   0.68893 0.74430 0.031329
## 5 0.004886      8   0.67590 0.69544 0.030516
prp(class.tree.mu, type=1, extra=1, split.font = 1, varlen = -10, digits = -3)

Predict this observation (this is the same thing shown in your exam).

R. Forest Model MU (OU student doesn’t know how to do this)

set.seed(13)
cvindx<-createFolds(trainIndex, k=10, returnTrain = TRUE)
ctrl <- trainControl(method="cv", index=cvindx, summaryFunction = twoClassSummary, classProbs = TRUE)


tunegrid <- expand.grid(
  .mtry = c(2, 5, 10, 15),
  .splitrule = "gini",
  .min.node.size = c(200,250,300, 350)
)

cl <- makePSOCKcluster(6) 
registerDoParallel(cl)


rforest<-train(TargetBuy~., data=training, method="ranger", tuneGrid=tunegrid, metric="ROC", 
               num.trees=500, importance="impurity", trControl=ctrl )

stopCluster(cl) 

rforest
## Random Forest 
## 
## 2401 samples
##   33 predictor
##    2 classes: 'Yes', 'No' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 2161, 2161, 2161, 2161, 2161, 2161, ... 
## Resampling results across tuning parameters:
## 
##   mtry  min.node.size  ROC        Sens         Spec     
##    2    200            0.7969406  0.033696041  0.9983176
##    2    250            0.7967645  0.019225416  0.9983176
##    2    300            0.7982899  0.007780217  0.9988732
##    2    350            0.7962571  0.000000000  1.0000000
##    5    200            0.8087844  0.277059656  0.9753118
##    5    250            0.8072548  0.245778560  0.9808688
##    5    300            0.8095142  0.224082294  0.9842611
##    5    350            0.8086102  0.197725128  0.9898877
##   10    200            0.8106792  0.435663260  0.9434860
##   10    250            0.8109584  0.427537038  0.9450925
##   10    300            0.8126413  0.399216913  0.9501683
##   10    350            0.8132080  0.381359958  0.9535594
##   15    200            0.8120443  0.471615526  0.9305568
##   15    250            0.8124344  0.458777433  0.9322623
##   15    300            0.8125042  0.433734757  0.9372375
##   15    350            0.8136489  0.432569593  0.9405111
## 
## Tuning parameter 'splitrule' was held constant at a value of gini
## ROC was used to select the optimal model using the largest value.
## The final values used for the model were mtry = 15, splitrule = gini
##  and min.node.size = 350.
plot(rforest)

varImp(rforest)
## ranger variable importance
## 
##   only 20 most important variables shown (out of 33)
## 
##                          Overall
## DemAge                  100.0000
## DemAffl                  67.0975
## DemGender_U              16.2166
## DemGender_M               9.7908
## PromSpend                 4.5109
## PromTime                  2.4718
## `DemTVReg_Wales & West`   0.6413
## `DemTVReg_S West`         0.4775
## `DemReg_South West`       0.4161
## DemClusterGroup_F         0.3980
## DemClusterGroup_D         0.3369
## DemClusterGroup_B         0.3227
## PromClass_Tin             0.2737
## `DemTVReg_C Scotland`     0.2237
## DemClusterGroup_C         0.1826
## `DemTVReg_N West`         0.1631
## `DemTVReg_S & S East`     0.1545
## DemReg_North              0.1219
## DemTVReg_U                0.1171
## DemClusterGroup_E         0.1135

Boosted Tree Model MU (OU student stood on a stool to make a D.T.)

set.seed(13)
tunegrid <- expand.grid( n.trees = seq(50,350,50), interaction.depth = c(10, 20), shrinkage = c(0.1, 0.01), n.minobsinnode=c(25))

cl <- makePSOCKcluster(6)
registerDoParallel(cl)

gb.tree <- train(TargetBuy~., data=training, method = 'gbm', trControl=ctrl, tuneGrid=tunegrid, metric='ROC')
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.1314             nan     0.0100    0.0026
##      2        1.1261             nan     0.0100    0.0025
##      3        1.1210             nan     0.0100    0.0025
##      4        1.1161             nan     0.0100    0.0025
##      5        1.1113             nan     0.0100    0.0023
##      6        1.1064             nan     0.0100    0.0024
##      7        1.1012             nan     0.0100    0.0023
##      8        1.0960             nan     0.0100    0.0023
##      9        1.0916             nan     0.0100    0.0019
##     10        1.0872             nan     0.0100    0.0022
##     20        1.0469             nan     0.0100    0.0019
##     40        0.9879             nan     0.0100    0.0012
##     60        0.9448             nan     0.0100    0.0007
##     80        0.9132             nan     0.0100    0.0005
##    100        0.8889             nan     0.0100    0.0004
##    120        0.8702             nan     0.0100    0.0002
##    140        0.8548             nan     0.0100    0.0001
##    150        0.8481             nan     0.0100    0.0002
stopCluster(cl)
gb.tree
## Stochastic Gradient Boosting 
## 
## 2401 samples
##   33 predictor
##    2 classes: 'Yes', 'No' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 2161, 2161, 2161, 2161, 2161, 2161, ... 
## Resampling results across tuning parameters:
## 
##   shrinkage  interaction.depth  n.trees  ROC        Sens         Spec     
##   0.01       10                  50      0.8175928  0.008018313  0.9988795
##   0.01       10                 100      0.8188771  0.299782502  0.9731015
##   0.01       10                 150      0.8194540  0.388241877  0.9529198
##   0.01       10                 200      0.8192479  0.434590725  0.9444266
##   0.01       10                 250      0.8189846  0.457324060  0.9411345
##   0.01       10                 300      0.8179288  0.465725309  0.9395232
##   0.01       10                 350      0.8174871  0.468828901  0.9390446
##   0.01       20                  50      0.8127154  0.048799633  0.9972294
##   0.01       20                 100      0.8137111  0.293580764  0.9702952
##   0.01       20                 150      0.8145173  0.392801281  0.9522775
##   0.01       20                 200      0.8132307  0.435679151  0.9427691
##   0.01       20                 250      0.8133516  0.453194337  0.9389535
##   0.01       20                 300      0.8120375  0.463462412  0.9367696
##   0.01       20                 350      0.8108462  0.470662166  0.9362492
##   0.10       10                  50      0.8100936  0.482113361  0.9339991
##   0.10       10                 100      0.8000114  0.469624716  0.9272212
##   0.10       10                 150      0.7955282  0.470740957  0.9172997
##   0.10       10                 200      0.7918043  0.472215388  0.9095674
##   0.10       10                 250      0.7837146  0.468113206  0.9089614
##   0.10       10                 300      0.7816476  0.468255058  0.9032955
##   0.10       10                 350      0.7772323  0.469358650  0.9040316
##   0.10       20                  50      0.8012344  0.464128990  0.9239922
##   0.10       20                 100      0.7910367  0.478840700  0.9105516
##   0.10       20                 150      0.7829584  0.481327692  0.9039522
##   0.10       20                 200      0.7799780  0.477947126  0.8972636
##   0.10       20                 250      0.7782941  0.486487732  0.8949487
##   0.10       20                 300      0.7759009  0.488122952  0.8829170
##   0.10       20                 350      0.7753893  0.488053545  0.8816929
## 
## Tuning parameter 'n.minobsinnode' was held constant at a value of 25
## ROC was used to select the optimal model using the largest value.
## The final values used for the model were n.trees = 150, interaction.depth =
##  10, shrinkage = 0.01 and n.minobsinnode = 25.

NN Model OU

numWts<-500
tunegrid<-expand.grid( .size=4, .decay= c(0.1, 0.5))

x<-select(training, -TargetBuy)

cl <- makePSOCKcluster(6)
registerDoParallel(cl)

nnetFit.ou<-train(x, y=training$TargetBuy, 
               method="nnet", 
               metric="ROC", 
               linout=FALSE,
               preProcess = c("range"), 
               tuneGrid = tunegrid, 
               
               trace=FALSE,
               maxit=200,
               MaxNWts=numWts,
               trControl=ctrl)

stopCluster(cl)

NN Model MU

numWts<-500
tunegrid<-expand.grid( .size=1:10, .decay= c(0.1, 0.5))
terms<-names(lrstep$finalModel$coefficients)[-1]

##From the terms
x<-select(training,"DemAffl", "DemAge","PromTime", "DemClusterGroup_D", "DemGender_M"  ,"DemGender_U" , "PromClass_Tin", "DemTVReg_Yorkshire")
cl <- makePSOCKcluster(6)
registerDoParallel(cl)

nnetFit.mu<-train(x, y=training$TargetBuy, 
               method="nnet", 
               metric="ROC", 
               linout=FALSE,
               preProcess = c("range"), 
               tuneGrid = tunegrid, 
               
               trace=FALSE,
               maxit=200,
               MaxNWts=numWts,
               trControl=ctrl)

stopCluster(cl)

nnetFit.mu
## Neural Network 
## 
## 2401 samples
##    8 predictor
##    2 classes: 'Yes', 'No' 
## 
## Pre-processing: re-scaling to [0, 1] (8) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 2161, 2161, 2161, 2161, 2161, 2161, ... 
## Resampling results across tuning parameters:
## 
##   size  decay  ROC        Sens       Spec     
##    1    0.1    0.8112700  0.4416529  0.9344226
##    1    0.5    0.8111686  0.4232376  0.9450799
##    2    0.1    0.8156336  0.4373272  0.9428275
##    2    0.5    0.8135827  0.4202489  0.9506800
##    3    0.1    0.8153818  0.4399508  0.9395891
##    3    0.5    0.8135786  0.4186096  0.9501335
##    4    0.1    0.8125666  0.4492085  0.9429884
##    4    0.5    0.8135252  0.4139792  0.9507218
##    5    0.1    0.8111561  0.4474219  0.9406074
##    5    0.5    0.8137573  0.4156185  0.9507218
##    6    0.1    0.8107045  0.4521594  0.9401323
##    6    0.5    0.8138450  0.4156185  0.9507218
##    7    0.1    0.8103871  0.4510175  0.9418438
##    7    0.5    0.8137632  0.4156185  0.9501335
##    8    0.1    0.8101365  0.4508425  0.9412638
##    8    0.5    0.8136752  0.4156185  0.9501335
##    9    0.1    0.8100122  0.4531133  0.9413644
##    9    0.5    0.8137748  0.4156185  0.9501335
##   10    0.1    0.8100317  0.4544981  0.9413612
##   10    0.5    0.8137838  0.4156185  0.9501335
## 
## ROC was used to select the optimal model using the largest value.
## The final values used for the model were size = 2 and decay = 0.1.