pacman::p_load(tidyverse, DataExplorer, nnet, ranger, gbm, xgboost, caret, pROC, ROCR, rpart, rpart.plot, doParallel, glmnet)
East-West Airlines has entered into a partnership with the wireless phone company Telecon to sell the latter’s service via direct mail. The data used to construct these models is a sample of people who already received the offer. We will construct a model to predict whether they purchase a wireless phone service contract (Phone_Sale=Yes). We will use this model classify additional customers.
Here is a snapshot of the data.
| Topflight | Balance | Qual_miles | cc1_miles. | cc2_miles. | cc3_miles. | Bonus_miles | Bonus_trans | Flight_miles_12mo | Flight_trans_12 | Online_12 | Club_member | Any_cc_miles_12mo | Phone_sale | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 884 | 0 | 135168 | 0 | 1 | 0 | 0 | 29156 | 16 | 300 | 2 | 0 | 1 | 0 | 1 | No |
| 3017 | 1 | 97424 | 0 | 1 | 0 | 0 | 63796 | 17 | 0 | 0 | 0 | 1 | 0 | 1 | No |
| 1583 | 0 | 11001 | 0 | 0 | 0 | 0 | 255 | 5 | 0 | 0 | 0 | 1 | 0 | 0 | No |
| 4919 | 0 | 5010 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | No |
| 483 | 1 | 10954 | 500 | 0 | 0 | 0 | 2500 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | No |
| 3605 | 0 | 12964 | 500 | 0 | 0 | 0 | 450 | 3 | 350 | 2 | 0 | 1 | 0 | 0 | No |
Here I describe the response variable.
##
## No Yes
## 0.8696848 0.1303152
And then create training and validation partitions.
set.seed(13)
trainIndex<-createDataPartition(df$Phone_sale, p=0.6, list=FALSE, times=1)
train<-df[trainIndex, ]
valid<-df[-trainIndex,]
Your Eastern “Red Brick” colleagues constructed this model.
class.tree.ou<-rpart(Phone_sale~., data=train, control=rpart.control(maxdepth = 4, minsplit=20, cp=0.002, xval=10), method="class", model=TRUE)
printcp(class.tree.ou)
##
## Classification tree:
## rpart(formula = Phone_sale ~ ., data = train, method = "class",
## model = TRUE, control = rpart.control(maxdepth = 4, minsplit = 20,
## cp = 0.002, xval = 10))
##
## Variables actually used in tree construction:
## [1] Bonus_miles Bonus_trans
##
## Root node error: 313/2400 = 0.13042
##
## n= 2400
##
## CP nsplit rel error xerror xstd
## 1 0.0047923 0 1.00000 1.0000 0.052709
## 2 0.0020000 2 0.99042 1.0128 0.052994
prp(class.tree.ou, type=1, extra=1, split.font = 1, varlen = -10, digits = -3)
set.seed(13)
cvindx<-createFolds(trainIndex, k=10, returnTrain = TRUE)
ctrl <- trainControl(method="cv", index=cvindx, summaryFunction = twoClassSummary, classProbs = TRUE)
tunegrid <- expand.grid(
.mtry = c(2, 5, 10),
.splitrule = c("gini"),
.min.node.size = c(10, 20, 50)
)
tunegrid
## .mtry .splitrule .min.node.size
## 1 2 gini 10
## 2 5 gini 10
## 3 10 gini 10
## 4 2 gini 20
## 5 5 gini 20
## 6 10 gini 20
## 7 2 gini 50
## 8 5 gini 50
## 9 10 gini 50
cl <- makePSOCKcluster(6)
registerDoParallel(cl)
rforest<-train(Phone_sale~., data=train, method="ranger", tuneGrid=tunegrid, metric="ROC",
num.trees=600, importance="impurity", trControl=ctrl )
stopCluster(cl)
set.seed(13)
tuneGrid <- expand.grid( n.trees = seq(50,100,25), interaction.depth = c(10, 20), shrinkage = c(0.1, 0.01), n.minobsinnode=c(25))
tuneGrid
## n.trees interaction.depth shrinkage n.minobsinnode
## 1 50 10 0.10 25
## 2 75 10 0.10 25
## 3 100 10 0.10 25
## 4 50 20 0.10 25
## 5 75 20 0.10 25
## 6 100 20 0.10 25
## 7 50 10 0.01 25
## 8 75 10 0.01 25
## 9 100 10 0.01 25
## 10 50 20 0.01 25
## 11 75 20 0.01 25
## 12 100 20 0.01 25
cl <- makePSOCKcluster(6) #starts the cluster
registerDoParallel(cl)
gb.tree <- train(Phone_sale~.,
data=train,
method = 'gbm',
trControl=ctrl,
tuneGrid=tuneGrid,
metric='ROC')
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.7729 nan 0.0100 0.0004
## 2 0.7713 nan 0.0100 0.0004
## 3 0.7695 nan 0.0100 0.0004
## 4 0.7678 nan 0.0100 0.0005
## 5 0.7661 nan 0.0100 0.0004
## 6 0.7645 nan 0.0100 0.0004
## 7 0.7628 nan 0.0100 0.0002
## 8 0.7613 nan 0.0100 0.0003
## 9 0.7600 nan 0.0100 0.0003
## 10 0.7586 nan 0.0100 0.0003
## 20 0.7454 nan 0.0100 0.0002
## 40 0.7241 nan 0.0100 0.0001
## 60 0.7063 nan 0.0100 0.0000
## 75 0.6950 nan 0.0100 0.0000
stopCluster(cl)
gb.tree
## Stochastic Gradient Boosting
##
## 2400 samples
## 14 predictor
## 2 classes: 'No', 'Yes'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 2160, 2160, 2160, 2160, 2160, 2160, ...
## Resampling results across tuning parameters:
##
## shrinkage interaction.depth n.trees ROC Sens Spec
## 0.01 10 50 0.6366168 1.0000000 0.000000000
## 0.01 10 75 0.6375202 1.0000000 0.000000000
## 0.01 10 100 0.6372970 1.0000000 0.000000000
## 0.01 20 50 0.6330344 1.0000000 0.000000000
## 0.01 20 75 0.6376144 1.0000000 0.000000000
## 0.01 20 100 0.6368114 1.0000000 0.000000000
## 0.10 10 50 0.6220247 0.9961946 0.009703539
## 0.10 10 75 0.6201846 0.9933275 0.006787330
## 0.10 10 100 0.6156804 0.9894588 0.024274301
## 0.10 20 50 0.6256814 0.9918474 0.027084013
## 0.10 20 75 0.6147826 0.9840976 0.028501017
## 0.10 20 100 0.6030173 0.9813216 0.031751791
##
## Tuning parameter 'n.minobsinnode' was held constant at a value of 25
## ROC was used to select the optimal model using the largest value.
## The final values used for the model were n.trees = 75, interaction.depth =
## 20, shrinkage = 0.01 and n.minobsinnode = 25.
And the OU folks constructed this logistic regression model.
lrfull<-glm(Phone_sale~., data=train, family="binomial")
options(scipen = 999)
summary(lrfull)
##
## Call:
## glm(formula = Phone_sale ~ ., family = "binomial", data = train)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.617141057 0.148624193 -17.609 < 0.0000000000000002 ***
## Topflight 0.187622562 0.190979630 0.982 0.32589
## Balance -0.000003415 0.000001051 -3.248 0.00116 **
## Qual_miles -0.000004201 0.000088819 -0.047 0.96228
## cc1_miles. -0.262885161 0.508008633 -0.517 0.60482
## cc2_miles. 0.071280272 0.337021734 0.212 0.83250
## cc3_miles. 0.347307014 0.614441644 0.565 0.57191
## Bonus_miles 0.000006288 0.000003349 1.878 0.06042 .
## Bonus_trans 0.019978715 0.009336412 2.140 0.03237 *
## Flight_miles_12mo 0.000077401 0.000081928 0.945 0.34479
## Flight_trans_12 -0.023730585 0.033341861 -0.712 0.47663
## Online_12 0.126388443 0.077907955 1.622 0.10474
## Email 0.234557711 0.139833081 1.677 0.09346 .
## Club_member 0.031279370 0.216001187 0.145 0.88486
## Any_cc_miles_12mo 0.838018878 0.525279654 1.595 0.11063
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1858.5 on 2399 degrees of freedom
## Residual deviance: 1777.3 on 2385 degrees of freedom
## AIC: 1807.3
##
## Number of Fisher Scoring iterations: 5
tuneGrid <- expand.grid(
alpha = 1,
lambda = seq(0.0001, 1, length = 100)
)
lasso_model <- train(
Phone_sale ~ .,
data = train,
method = "glmnet",
trControl = ctrl_glmnet,
metric = "ROC",
tuneGrid = tuneGrid,
family = "binomial"
)
glmnet_fit <- lasso_model$finalModel
best_lambda <- lasso_model$bestTune$lambda
coef_mat <- coef(glmnet_fit, s = best_lambda)
coef_df <- data.frame(
term = rownames(as.matrix(coef_mat)),
estimate = as.numeric(as.matrix(coef_mat)),
stringsAsFactors = FALSE
)
active_vars <- coef_df %>%
filter(term != "(Intercept)", estimate != 0)
terms.lasso <- active_vars$term
terms.lasso
## [1] "Topflight" "Balance" "Qual_miles"
## [4] "cc1_miles." "cc2_miles." "cc3_miles."
## [7] "Bonus_miles" "Bonus_trans" "Flight_miles_12mo"
## [10] "Flight_trans_12" "Online_12" "Email"
## [13] "Club_member" "Any_cc_miles_12mo"
set.seed(13)
cvindx<-createFolds(trainIndex, k=10, returnTrain = TRUE)
ctrl <- trainControl(method="cv", index=cvindx, summaryFunction = twoClassSummary, classProbs = TRUE)
tunegrid<-expand.grid( .size=1:12, .decay= c(0.01, 0.1))
tunegrid
## .size .decay
## 1 1 0.01
## 2 2 0.01
## 3 3 0.01
## 4 4 0.01
## 5 5 0.01
## 6 6 0.01
## 7 7 0.01
## 8 8 0.01
## 9 9 0.01
## 10 10 0.01
## 11 11 0.01
## 12 12 0.01
## 13 1 0.10
## 14 2 0.10
## 15 3 0.10
## 16 4 0.10
## 17 5 0.10
## 18 6 0.10
## 19 7 0.10
## 20 8 0.10
## 21 9 0.10
## 22 10 0.10
## 23 11 0.10
## 24 12 0.10
numWts<-500
Note we use the variables selected from the lasso.
cl <- makePSOCKcluster(3) #Starts the parallel computing
registerDoParallel(cl)
nnetFit<-train(x=train[,terms.lasso], y=train$Phone_sale,
method="nnet",
metric="ROC",
linout=FALSE,
preProcess = c("range"),
tuneGrid = tunegrid,
trace=FALSE,
maxit=100,
MaxNWts=numWts,
trControl=ctrl)
stopCluster(cl)
nnetFit
## Neural Network
##
## 2400 samples
## 14 predictor
## 2 classes: 'No', 'Yes'
##
## Pre-processing: re-scaling to [0, 1] (14)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 2160, 2160, 2160, 2160, 2160, 2160, ...
## Resampling results across tuning parameters:
##
## size decay ROC Sens Spec
## 1 0.01 0.6184818 0.9995370 0.000000000
## 1 0.10 0.6251293 0.9990697 0.000000000
## 2 0.01 0.6240871 0.9942632 0.020927215
## 2 0.10 0.6337693 0.9981190 0.006325581
## 3 0.01 0.6130470 0.9937334 0.022970297
## 3 0.10 0.6309296 0.9956595 0.019744491
## 4 0.01 0.6065989 0.9952046 0.006325581
## 4 0.10 0.6258915 0.9971201 0.019434893
## 5 0.01 0.6135885 0.9884860 0.016513932
## 5 0.10 0.6364126 0.9966400 0.012957160
## 6 0.01 0.6109005 0.9861826 0.028237055
## 6 0.10 0.6314031 0.9966323 0.015898337
## 7 0.01 0.6042524 0.9846435 0.026755480
## 7 0.10 0.6368835 0.9961546 0.012957160
## 8 0.01 0.6194569 0.9874658 0.026632449
## 8 0.10 0.6341326 0.9966400 0.012957160
## 9 0.01 0.6205872 0.9875069 0.028773369
## 9 0.10 0.6394672 0.9961823 0.013266758
## 10 0.01 0.6114289 0.9847545 0.019040110
## 10 0.10 0.6366024 0.9956873 0.015898337
## 11 0.01 0.6116548 0.9851891 0.025143727
## 11 0.10 0.6338810 0.9956873 0.022376070
## 12 0.01 0.6178583 0.9856311 0.033858255
## 12 0.10 0.6348118 0.9961524 0.012957160
##
## ROC was used to select the optimal model using the largest value.
## The final values used for the model were size = 9 and decay = 0.1.
p.nnet<-predict(nnetFit, data=train, type="prob")
r<-roc(train$Phone_sale, p.nnet[,2])
r.nnet.auct<-r$auc
r.nnet.auct
## Area under the curve: 0.6785
p.nnet<-predict(nnetFit, newdata=valid, type="prob")
r<-roc(valid$Phone_sale, p.nnet[,2])
r.nnet.auc<-r$auc
r.nnet.auc
## Area under the curve: 0.6533