## Optimisation and regularisation steps

```
## ====================================================================
## Step 0: data import and wrangling
## ====================================================================
# source("data_format.R")
<-factor(as.integer(y)-1) ## Outcome is required to be factor of 0 or 1.
y1
## ====================================================================
## Step 1: settings
## ====================================================================
## Folds
=10
Kset.seed(3)
<-caret::createFolds(y=y,
ck = K,
list = FALSE,
returnTrain = TRUE) # Foldids for alpha tuning
## Defining tuning parameters
=2^seq(-10, 5, 1)
lambdas<-seq(0,1,.1)
alphas
## Weights for models
=TRUE
weightedif (weighted == TRUE) {
<-as.vector(1 - (table(y)[y] / length(y)))
wghtelse {
} <- rep(1, nrow(y))
wght
}
## Standardise numeric
## Centered and
## ====================================================================
## Step 2: all cross validations for each alpha
## ====================================================================
library(furrr)
library(purrr)
library(doMC)
registerDoMC(cores=6)
# Nested CVs with analysis for all lambdas for each alpha
#
set.seed(3)
<- future_map(alphas, function(a){
cvs cv.glmnet(model.matrix(~.-1,X),
y1,weights = wght,
lambda=lambdas,
type.measure = "deviance", # This is standard measure and recommended for tuning
foldid = c, # Per recommendation the folds are kept for alpha optimisation
alpha=a,
standardize=TRUE,
family=quasibinomial,
keep=TRUE) # Same as binomial, but not as picky
})
## ====================================================================
# Step 3: optimum lambda for each alpha
## ====================================================================
# For each alpha, lambda is chosen for the lowest meassure (deviance)
<- sapply(seq_along(alphas), function(id) {
each_alpha <- cvs[[id]]
each_cv <- alphas[id]
alpha_val <- match(each_cv$lambda.min,
index_lmin $lambda)
each_cvc(lamb = each_cv$lambda.min,
alph = alpha_val,
cvm = each_cv$cvm[index_lmin])
})
# Best lambda
<- min(each_alpha["lamb", ])
best_lamb
# Alpha is chosen for best lambda with lowest model deviance, each_alpha["cvm",]
<- each_alpha["alph",][each_alpha["cvm",]==min(each_alpha["cvm",]
best_alph "lamb",] %in% best_lamb])]
[each_alpha[
## https://stackoverflow.com/questions/42007313/plot-an-roc-curve-in-r-with-ggplot2
<-roc.glmnet(cvs[[1]]$fit.preval, newy = y)[[match(best_alph,alphas)]]|> # Plots performance from model with best alpha
p_rocggplot(aes(FPR,TPR)) +
geom_step() +
coord_cartesian(xlim=c(0,1), ylim=c(0,1)) +
geom_abline()+
theme_bw()
## ====================================================================
# Step 4: Creating the final model
## ====================================================================
source("regular_fun.R") # Custom function
<-regular_fun(X,y1,K,lambdas=best_lamb,alpha=best_alph)
optimised_model# With lambda and alpha specified, the function is just a k-fold cross-validation wrapper,
# but keeps model performance figures from each fold.
list2env(optimised_model,.GlobalEnv)
# Function outputs a list, which is unwrapped to Env.
# See source script for reference.
## ====================================================================
# Step 5: creating table of coefficients for inference
## ====================================================================
<-matrix(unlist(B),ncol=10)
Bmatrix<-apply(Bmatrix,1,median)
Bmedian<-apply(Bmatrix,1,mean)
Bmean
<-tibble(
reg_coef_tblname = c("Intercept",Hmisc::label(X)),
medianX = round(Bmedian,5),
ORmed = round(exp(Bmedian),5),
meanX = round(Bmean,5),
ORmea = round(exp(Bmean),5))%>%
# arrange(desc(abs(medianX)))%>%
gt()
## ====================================================================
# Step 6: plotting predictive performance
## ====================================================================
<-confusionMatrix(cMatTest)
reg_cfm<-summary(auc_test[,1])
reg_auc_sum
## ====================================================================
# Step 7: Packing list to save in loop
## ====================================================================
<- list("RegularisedCoefs"=reg_coef_tbl,
ls[[i]] "bestA"=best_alph,
"bestL"=best_lamb,
"ConfusionMatrx"=reg_cfm,
"AUROC"=reg_auc_sum)
```