Different persons like to write/organize codes in different ways. I generally followed the following points.
More spacious. I would write x = c(2,3) rather than x=c(2,3).
I used “=” instead of “<-” in all places where they are consequentially the same. [Note: Only in particular cases, “<-” is different than “=.” For example, cor(x=1:3,y=4:6) and cor(x<-1:3,y<-4:6) will give the same output 1, but in the latter case only, x and y will also get defined in main environment. As an aside, if you want to define a varible within the body of a function but also in the main environment, use like this: a = function(){ff <<- 2}. Here, on running the function a, ff will also get defined in the main environment.]
Loop: For multiline for/if/while loop, I kept the curly brackets ({}) vertically aligned. Thus, instead of writing
for(i in 1:10){
a = i
b = i + 1
}I wrote
for(i in 1:10)
{
a = i
b = i + 1
}data directory, the source files are in source directory, the analysis result files are saved as R workspace within Results directory, and plots and tables are saved within tables_and_figures directory.# empty workspace
rm(list = ls())
# load library
library(data.table) # for data input/output
library(mice) # for imputation using mice() function
library(crrp) # for penalized regression analysis using gcrrp() function
library(kableExtra) # for printing nice looking tables using functions like column_spec()
library(ggplot2) # for making nice looking compact plots using data.table object
library(riskRegression) # for fitting Fine and Gray model using FGR()
library(pec) # for predicting cumulative incidence from fitted CR object using predictEventProb()
library(prodlim)
# set directories
workdir = "/Users/nsanyal/Downloads/PR_for_CR/"D = fread(paste0(workdir, "data/example_SPLC_data.csv"))
dim(D)## [1] 1000 20
# copy data.table into another object
D0 = copy(D) # D0=D will not copy, just will assign another name, so changing one will change the other
# extract column 'event' as vector
D[, event] # or D$event
# extract column 'event' as data.table
D[, "event"] # or D[,.(event)]
# extract columns 'Time' and 'event' as data.table
D[, c("Time", "event")] # or D[,.(Time,event)]
# extract columns with names saved in a variable
cnames = c("Time", "event", "edu", "sex")
D[, ..cnames] # or D[,cnames,with=F]
# sort data by increasing order of 'Time'
setkey(D, Time) # or setorder(D,Time)
# sort data by increasing order of 'Time' (first) and 'edu' (next)
setkey(D, Time, edu) # or setorder(D,Time,edu)
# sort data by decreasing order of 'Time'
setorder(D, -Time) # or setorderv(D,'Time',order=-1). setkey allows sorting only by increasing order.
# sort data by alphabetically ordered column names (convenient for looking up a column name)
setcolorder(D, sort(names(D)))
# reset data as in the beginning
D = copy(D0)# exclude never smokers (1=never, 2=former, 3=current)
D = D[!D$smkstatus2 == 1, ]
dim(D)
# [1] 6372 102
# Choose variables to include and subset data
vars.include = c("Time", "event", "age.ix.cat", "bmi", "chemo_ix", "cigday2", "copd", "edu", "fh", "hist2.ix",
"packyears2", "ph", "quityears2", "race.cat", "radiation_ix", "sex", "smkstatus2", "stage2.ix", "surgery_ix",
"USPSTF")
data = D[, ..vars.include]
data$age.ix.cat = unname(sapply(data$age.ix.cat, function(x) switch(x, `71-80` = "71.80", `80+` = "80",
`61-70` = "61.70", `<55` = "55", `56-60` = "56.60"))) # replace characters -,< and + by .
# Set missing code, if any, to NA. Then, convert categorical variables to factor.
data$age.ix.cat = relevel(as.factor(data$age.ix.cat), ref = "61.70")
data$chemo_ix = ifelse(data$chemo_ix == 9, NA, data$chemo_ix)
data$chemo_ix = relevel(as.factor(data$chemo_ix), ref = 1)
data$copd = ifelse(data$copd == 9, NA, data$copd)
data$copd = as.factor(as.character(data$copd))
data$edu = relevel(as.factor(as.character(data$edu)), ref = "2") # 2 = college group is reference
data$fh = as.factor(as.character(data$fh))
data$hist2.ix = relevel(as.factor(data$hist2.ix), ref = "SQ")
data$ph = as.factor(as.character(data$ph))
data$race.cat = relevel(as.factor(data$race.cat), ref = "W")
data$radiation_ix = ifelse(data$radiation_ix == 9, NA, data$radiation_ix)
data$radiation_ix = relevel(as.factor(as.character(data$radiation_ix)), ref = "0") # 0 = No radiotherapy
data$sex = relevel(as.factor(as.character(data$sex)), ref = "0") # 0 = Female
data$smkstatus2 = relevel(as.factor(as.character(data$smkstatus2)), ref = "2") # 2 = Former
data$stage2.ix = ifelse(data$stage2.ix == 9, NA, data$stage2.ix)
data$stage2.ix = relevel(as.factor(as.character(data$stage2.ix)), ref = "0") # 0 = No stage 2 primary LC
data$surgery_ix = ifelse(data$surgery_ix == 9, NA, data$surgery_ix)
data$surgery_ix = relevel(as.factor(data$surgery_ix), ref = 1)
data$USPSTF = relevel(as.factor(as.character(data$USPSTF)), ref = "0") # 0 = No stage 2 primary LCseed = c(12)
mi = mice(data = data, m = 1, maxit = 5, print = T, seed = seed)
data.imp = complete(mi, 1)
# save( list = c('mi','data.imp'), file = paste0('Results/data_imp_seed',seed,'.Rdata') )# load imputed data
load(paste0(workdir, "Results/data_imp_seed12.Rdata"))
# create design matrix and
mat = data.imp[, -c(1:2)]
expr = as.formula(paste(" ~ ", paste(colnames(mat), collapse = " + ")))
X = model.matrix(expr, mat)[, -1]
# create group indicator (for inputing in gcrrp() function) by counting number of groups created in X
group = rep(1:ncol(mat), times = sapply(mat, function(x) {
if (class(x) == "numeric") return(1) else return(length(unique(x)) - 1)
}))
# fit LASSO
fit.glasso = gcrrp(time = data.imp$Time, fstatus = data.imp$event, failcode = 1, X = X, group = group,
penalty = "gLASSO", lambda.min = 0.001, nlambda = 50) # default choices.
# run str(fit.glasso) to see the structure of fit.glasso object.
# fit adaptive LASSO
fit.gadalasso = gcrrp(time = data.imp$Time, fstatus = data.imp$event, failcode = 1, X = X, group = group,
penalty = "gLASSO", weighted = T, lambda.min = 0.001, nlambda = 50) # default choices.
# fit SCAD
fit.gscad = gcrrp(time = data.imp$Time, fstatus = data.imp$event, failcode = 1, X = X, group = group,
penalty = "gSCAD", lambda.min = 0.001, nlambda = 50, gamma = 3.7) # default choices.
# fit MCP
fit.gmcp = gcrrp(time = data.imp$Time, fstatus = data.imp$event, failcode = 1, X = X, group = group,
penalty = "gMCP", lambda.min = 0.001, nlambda = 50, , gamma = 2.7) # default choices.
# save fits
save(fit.glasso, file = paste0(workdir, "Results/gLASSO_fit_imp_seed12.Rdata"))
save(fit.gadalasso, file = paste0(workdir, "Results/gADALASSO_fit_imp_seed12.Rdata"))
save(fit.gscad, file = paste0(workdir, "Results/gSCAD_fit_imp_seed12.Rdata"))
save(fit.gmcp, file = paste0(workdir, "Results/gMCP_fit_imp_seed12.Rdata"))# load fits
load(paste0(workdir, "Results/gLASSO_fit_imp_seed12.Rdata"))
load(paste0(workdir, "Results/gADALASSO_fit_imp_seed12.Rdata"))
load(paste0(workdir, "Results/gSCAD_fit_imp_seed12.Rdata"))
load(paste0(workdir, "Results/gMCP_fit_imp_seed12.Rdata"))
# extract coefficients for both GCV and BIC based tuning parameter selection
beta.glasso.bic = fit.glasso$beta[, which.min(fit.glasso$BIC)]
beta.gadalasso.bic = fit.gadalasso$beta[, which.min(fit.gadalasso$BIC)]
beta.gscad.bic = fit.gscad$beta[, which.min(fit.gscad$BIC)]
beta.gmcp.bic = fit.gmcp$beta[, which.min(fit.gmcp$BIC)]
beta.glasso.gcv = fit.glasso$beta[, which.min(fit.glasso$GCV)]
beta.gadalasso.gcv = fit.gadalasso$beta[, which.min(fit.gadalasso$GCV)]
beta.gscad.gcv = fit.gscad$beta[, which.min(fit.gscad$GCV)]
beta.gmcp.gcv = fit.gmcp$beta[, which.min(fit.gmcp$GCV)]
# make coefficient x method table
beta.all.bic = cbind(beta.glasso.bic, beta.gadalasso.bic, beta.gscad.bic, beta.gmcp.bic)
beta.all.gcv = cbind(beta.glasso.gcv, beta.gadalasso.gcv, beta.gscad.gcv, beta.gmcp.gcv)
kable(cbind(beta.all.bic, beta.all.gcv), row.names = T, caption = "GCV and BIC based variable selection for cause 1",
linesep = "") %>% column_spec(5, border_right = T) %>% kable_styling(full_width = F)| beta.glasso.bic | beta.gadalasso.bic | beta.gscad.bic | beta.gmcp.bic | beta.glasso.gcv | beta.gadalasso.gcv | beta.gscad.gcv | beta.gmcp.gcv | |
|---|---|---|---|---|---|---|---|---|
| age.ix.cat55 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -1.4575606 | -2.6698418 | -6.8191033 | -6.8150809 |
| age.ix.cat56.60 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.0747861 | -0.0485878 | -0.1423634 | -0.1618113 |
| age.ix.cat71.80 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -0.3730483 | -0.5435110 | -0.6720156 | -0.6657066 |
| age.ix.cat80 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -0.0992690 | -0.1745293 | -0.2088444 | -0.1993495 |
| bmi | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.0233010 | 0.0149302 | 0.0406255 | 0.0414460 |
| chemo_ix1 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -0.8936411 | -1.0492702 | -1.2220898 | -1.2302037 |
| cigday2 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| copd1 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.0052276 | 0.0000000 | 0.0276097 | 0.0000000 |
| edu1 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.1257211 | 0.0148201 | 0.1081530 | 0.1119839 |
| edu3 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.3815423 | 0.0459760 | 0.2882186 | 0.2879215 |
| fh1 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -0.4381514 | -0.4582016 | -0.4893955 | -0.5254502 |
| hist2.ixAD | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.8633487 | 1.1292301 | 1.1764403 | 1.1829045 |
| hist2.ixLC | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 1.1540491 | 1.5064344 | 1.6203130 | 1.6199115 |
| hist2.ixOTH | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.5612671 | 0.7131332 | 0.7077201 | 0.7055595 |
| hist2.ixSC | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -0.4435133 | -1.7466435 | -5.4763283 | -5.4651681 |
| packyears2 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -0.0010113 | 0.0000000 | 0.0000000 | 0.0000000 |
| ph1 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.0103285 | 0.0000000 | 0.0000000 | 0.0000000 |
| quityears2 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| race.catA | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -0.0226699 | -0.0784260 | -0.0459666 | -0.0669693 |
| race.catB | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -0.2293959 | -0.3210262 | -0.4237903 | -0.4832460 |
| race.catH | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -0.5393309 | -0.8860188 | -0.9087899 | -1.0745480 |
| race.catHW | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.4481362 | 0.6980772 | 0.7266344 | 0.7387063 |
| race.catO | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -1.5362620 | -3.2426866 | -3.2254721 | -6.8849971 |
| radiation_ix1 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.4642847 | 0.5082836 | 0.6016434 | 0.5908213 |
| sex1 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -0.1925784 | -0.1173216 | -0.1553838 | -0.1590882 |
| smkstatus23 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.5170287 | 0.6486791 | 0.7606614 | 0.7768355 |
| stage2.ix1 | -0.7600735 | -0.7120922 | 0.000000 | 0.000000 | -1.4745838 | -1.6264616 | -1.6413689 | -1.6396480 |
| surgery_ix1 | 1.2753809 | 0.9382062 | 2.342946 | 2.343131 | 1.3680460 | 1.4165489 | 1.4773960 | 1.4743209 |
| USPSTF1 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -0.6529241 | -0.9045143 | -1.0360865 | -1.0507383 |
# do line-plot of GCV based coefficient estimates
range = range(c(beta.all.bic, beta.all.gcv))
par(oma = c(4, 2, 0, 0))
plot(beta.glasso.gcv, main = "GCV based estimates for cause 1", ylim = range, pch = 1, xaxt = "n", xlab = "",
ylab = "beta estimates from crrp", yaxt = "n")
points(beta.gadalasso.gcv, col = "yellow", pch = 20)
points(beta.gscad.gcv, col = "red", pch = 1)
points(beta.gmcp.gcv, col = "green", pch = 20)
axis(side = 1, at = 1:length(beta.glasso.gcv), labels = names(beta.glasso.gcv), las = 3)
axis(side = 2, at = c(-2, -1, -0.5, -0.25, 0, 0.25, 0.5, 1, 2), las = 2)
abline(h = c(-2, -1, -0.5, -0.25, 0, 0.25, 0.5, 1, 2), lty = 3)
abline(v = 1:length(beta.glasso.gcv), lty = 3, col = "grey")
legend("bottomleft", legend = c("gLASSO", "gADLASSO", "gSCAD", "gMCP"), pch = c(1, 20, 1, 20), col = c("black",
"yellow", "red", "green"), horiz = T)# do bar-plot of GCV based coefficient estimates that are > 0.1 in magnitude
vars.chosen = names(which(apply(beta.all.gcv, 1, function(x) {
any(abs(x) > 0.1)
})))
beta.vars.chosen.gcv = beta.all.gcv[rownames(beta.all.gcv) %in% vars.chosen, ]
colnames(beta.vars.chosen.gcv) = c("gLASSO", "gADALASSO", "gSCAD", "gMCP")
beta.vars.chosen.gcv.df = as.data.table(beta.vars.chosen.gcv)
beta.vars.chosen.gcv.df[, `:=`(covariates, rownames(beta.vars.chosen.gcv))]
beta.vars.chosen.gcv.df.melt = melt(beta.vars.chosen.gcv.df, id.var = c("covariates"))
ggplot(beta.vars.chosen.gcv.df.melt) + geom_col(aes(x = variable, y = value, fill = variable), position = "dodge",
show.legend = F) + ylab("Coeff. Estimates") + xlab("Penalized regression penalty") + facet_wrap(covariates ~
., nrow = 4) + theme(axis.text.x = element_text(angle = 45, hjust = 1.1, size = 12), axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 15), axis.title.y = element_text(size = 15), strip.text.x = element_text(size = 12))# save coefficients
namecombs = as.character(Reduce(function(X, Y) outer(X, Y, FUN = paste, sep = "."), list(c("beta"), c("glasso",
"gadalasso", "gscad", "gmcp"), c("gcv", "bic"))))
allcoef = data.frame(coef = names(beta.glasso.bic), mget(namecombs))
# fwrite( allcoef, paste0( 'Results/PR_coef_imp_seed12.csv' ) )There are two ways:
Method 1 (FGR by penalty): Fit FGR model with all variables to obtain an FGR object. But then within that FGR object, substitute the FGR coefficients by the penalized regression coefficients. Then, use the FGR object to predict.
Method 2 (FGR with penalty): Fit FGR model with only those variables which are selected (i.e., |coef|>a given threshold) by penalized regression. Then, use the FGR object to predict.
In Method 1, we are using penalized regression for both variable selection and estimation. In Method 2, we are using penalized regression only for variable seelction whereas estimation is done by FGR.
# fix formula
formula.FGR = as.formula(paste("Hist(Time,event) ~", paste0(colnames(X), collapse = " +")))
# build input data frame
df.FGR = data.frame(Time = data.imp$Time, event = data.imp$event, X)
# fit FGR model
fit.FGR = FGR(formula.FGR, cause = 1, data = df.FGR)
fit.FGR$call$formula = formula.FGRfit.FGR.by_glasso_gcv = fit.FGR.by_gadalasso_gcv = fit.FGR.by_gscad_gcv = fit.FGR.by_gmcp_gcv = fit.FGR
fit.FGR.by_glasso_gcv$crrFit$coef = allcoef$beta.glasso.gcv
fit.FGR.by_gadalasso_gcv$crrFit$coef = allcoef$beta.gadalasso.gcv
fit.FGR.by_gscad_gcv$crrFit$coef = allcoef$beta.gscad.gcv
fit.FGR.by_gmcp_gcv$crrFit$coef = allcoef$beta.gmcp.gcv# set time-points for prediction and evaluation
times = c(5, 10, 15) # time-points to predict at
ref.time = 10 # time-point to use for risk stratification and decision analysis later
# prediction using ordinary FGR and Method 1
pred.FGR = predictEventProb(fit.FGR, newdata = df.FGR, cause = 1, times = times)
pred.FGR.by_glasso_gcv = predictEventProb(fit.FGR.by_glasso_gcv, newdata = df.FGR, cause = 1, times = times)
pred.FGR.by_gadalasso_gcv = predictEventProb(fit.FGR.by_gadalasso_gcv, newdata = df.FGR, cause = 1, times = times)
pred.FGR.by_gscad_gcv = predictEventProb(fit.FGR.by_gscad_gcv, newdata = df.FGR, cause = 1, times = times)
pred.FGR.by_gmcp_gcv = predictEventProb(fit.FGR.by_gmcp_gcv, newdata = df.FGR, cause = 1, times = times)# load prediction evaluation source code
source(paste0(workdir, "source/source_general.R"))
source(paste0(workdir, "source/source_predbased.R"))
# FGR prediction evaluation.
cindex.FGR = cindex_predbased(pred.FGR, formula.FGR, df.FGR, times)$AppCindex[[1]] # Harrell-type C-index
sc = Score_predbased(pred = getpred_for_score(pred.FGR, df.FGR, cause = 1, times), "FGR", Hist(Time,
event) ~ 1, df.FGR, cause = 1, times = times, plots = "cal")
auc.FGR = sc$AUC$score # time-dependent (time-varying) area under the ROC (AUROC)
brier.FGR = sc$Brier$score # time-dependent (time-varying) Brier score
pdf(paste0(workdir, "tables_and_figures/FGR_performance_imp_seed12.pdf"))
### Calibration plot using pec package
j = 2
x = calPlot_predbased(pred = pred.FGR[, j], formula = formula.FGR, cause = 1, data = df.FGR, time = times[j],
method = "quantile", q = 10, ylim = c(0, 0.2), xlim = c(0, 0.2))
calplotFrame = x$plotFrame
prd = as.vector(calplotFrame[, "Pred"])
dif = as.vector(calplotFrame[, "Pred"]) - as.vector(calplotFrame[, "Obs"])
x = plot(prd, dif, xlab = paste0("Predicted ", ref.time, "-year Risk (quantile)"), ylab = "Pred. prob. - Obsd. prob.",
pch = 1, col = "blue", cex.lab = 1, cex.axis = 1, cex.main = 1, ylim = c(-0.05, 0.05))
abline(h = 0, col = "red", lty = "dotted")
### Calibration plot using riskRegression package
plotCalibration(sc, ylim = c(0, 0.5), xlim = c(0, 0.5), cens.method = "local", xlab = "Pred. risk", ylab = "Obsd. freq.",
mgp = c(3, 3, 0), round = T)
### Decile plot using estimated probability (akin to HL method)
x = myDecilePlot2(k = 10, event = df.FGR$event, Time = df.FGR$Time, PROB = pred.FGR[, j], minTime = 1,
maxTime = 20, YLIM = c(0, 0.15), MAIN = paste("Obsd. SPLC incidence by decile of estimated risk"))
### Decision curve analysis
df.FGR1 = as.data.frame(df.FGR) # Without this stdca won't work
df.FGR1$risk = pred.FGR[, j]
x = stdca(data = df.FGR1, outcome = "event", ttoutcome = "Time", timepoint = 10, xby = 0.03, predictors = "risk",
cmprsk = T, probability = T, xstop = 0.2)
title(paste("Decision Curve Analysis", cex = 1))
abline(v = 0.01, col = "gray", lty = "dotted")
legend("left", legend = c("Risk-based", "Screen All", "Screen None"), col = c("red", "blue", "dark green"),
lty = c("solid", "dotted", "solid"), lwd = 2, cex = 1, bg = "white")
mtext("fit.FGR", outer = TRUE, line = -2, cex = 1.5)
dev.off()
# LASSO-based FGR prediction evaluation. Save plots.
cindex.FGR.by_glasso = cindex_predbased(pred.FGR.by_glasso_gcv, formula.FGR, df.FGR, times)$AppCindex[[1]]
sc = Score_predbased(pred = getpred_for_score(pred.FGR.by_glasso_gcv, df.FGR, cause = 1, times), "FGR",
Hist(Time, event) ~ 1, df.FGR, cause = 1, times = times, plots = "cal")
auc.FGR.by_glasso = sc$AUC$score
brier.FGR.by_glasso = sc$Brier$score
# ADALASSO-based FGR prediction evaluation. Save plots.
cindex.FGR.by_gadalasso = cindex_predbased(pred.FGR.by_gadalasso_gcv, formula.FGR, df.FGR, times)$AppCindex[[1]]
sc = Score_predbased(pred = getpred_for_score(pred.FGR.by_gadalasso_gcv, df.FGR, cause = 1, times), "FGR",
Hist(Time, event) ~ 1, df.FGR, cause = 1, times = times, plots = "cal")
auc.FGR.by_gadalasso = sc$AUC$score
brier.FGR.by_gadalasso = sc$Brier$score
# SCAD-based FGR prediction evaluation. Save plots.
cindex.FGR.by_gscad = cindex_predbased(pred.FGR.by_gscad_gcv, formula.FGR, df.FGR, times)$AppCindex[[1]]
sc = Score_predbased(pred = getpred_for_score(pred.FGR.by_gscad_gcv, df.FGR, cause = 1, times), "FGR",
Hist(Time, event) ~ 1, df.FGR, cause = 1, times = times, plots = "cal")
auc.FGR.by_gscad = sc$AUC$score
brier.FGR.by_gscad = sc$Brier$score
# MCP-based FGR prediction evaluation. Save plots.
cindex.FGR.by_gmcp = cindex_predbased(pred.FGR.by_gmcp_gcv, formula.FGR, df.FGR, times)$AppCindex[[1]]
sc = Score_predbased(pred = getpred_for_score(pred.FGR.by_gmcp_gcv, df.FGR, cause = 1, times), "FGR",
Hist(Time, event) ~ 1, df.FGR, cause = 1, times = times, plots = "cal")
auc.FGR.by_gmcp = sc$AUC$score
brier.FGR.by_gmcp = sc$Brier$score
# print all validation measures (C-index,AUC,Brier)
mget(grep("cindex", ls(), value = T)[-1])
mget(grep("auc", ls(), value = T))
mget(grep("brier", ls(), value = T))# Fix selection threshold for penalized regression
sel.thres = 0.01
# LASSO-based selection and FGR fit
glasso.varsel = allcoef$coef[which(abs(allcoef$beta.glasso.gcv) > sel.thres)]
formula.FGR.glasso = as.formula(paste("Hist(Time,event) ~", paste0(glasso.varsel, collapse = " +")))
df.FGR.glasso = data.frame(Time = data.imp$Time, event = data.imp$event, X[, glasso.varsel])
fit.FGR.with_glasso_gcv = FGR(formula.FGR.glasso, cause = 1, data = df.FGR.glasso)
fit.FGR.with_glasso_gcv$call$formula = formula.FGR.glasso
# ADALASSO-based selection and FGR fit
gadalasso.vars = allcoef$coef[which(abs(allcoef$beta.gadalasso.gcv) > sel.thres)]
formula.FGR.gadalasso = as.formula(paste("Hist(Time,event) ~", paste0(gadalasso.vars, collapse = " +")))
df.FGR.gadalasso = data.frame(Time = data.imp$Time, event = data.imp$event, X[, gadalasso.vars])
fit.FGR.with_gadalasso_gcv = FGR(formula.FGR.gadalasso, cause = 1, data = df.FGR.gadalasso)
fit.FGR.with_gadalasso_gcv$call$formula = formula.FGR.gadalasso
# SCAD-based selection and FGR fit
gscad.vars = allcoef$coef[which(abs(allcoef$beta.gscad.gcv) > sel.thres)]
formula.FGR.gscad = as.formula(paste("Hist(Time,event) ~", paste0(gscad.vars, collapse = " +")))
df.FGR.gscad = data.frame(Time = data.imp$Time, event = data.imp$event, X[, gscad.vars])
fit.FGR.with_gscad_gcv = FGR(formula.FGR.gscad, cause = 1, data = df.FGR.gscad)
fit.FGR.with_gscad_gcv$call$formula = formula.FGR.gscad
# MCP-based selection and FGR fit
gmcp.vars = allcoef$coef[which(abs(allcoef$beta.gmcp.gcv) > sel.thres)]
formula.FGR.gmcp = as.formula(paste("Hist(Time,event) ~", paste0(gmcp.vars, collapse = " +")))
df.FGR.gmcp = data.frame(Time = data.imp$Time, event = data.imp$event, X[, gmcp.vars])
fit.FGR.with_gmcp_gcv = FGR(formula.FGR.gmcp, cause = 1, data = df.FGR.gmcp)
fit.FGR.with_gmcp_gcv$call$formula = formula.FGR.gmcp# set time-points for prediction and evaluation
times = c(5, 10, 15) # time-points to predict at
ref.time = 10 # time-point to use for risk stratification and decision analysis later
# prediction using ordinary FGR and Method 1
pred.FGR.with_glasso_gcv = predictEventProb(fit.FGR.with_glasso_gcv, newdata = df.FGR.glasso, cause = 1,
times = times)
pred.FGR.with_gadalasso_gcv = predictEventProb(fit.FGR.with_gadalasso_gcv, newdata = df.FGR.gadalasso,
cause = 1, times = times)
pred.FGR.with_gscad_gcv = predictEventProb(fit.FGR.with_gscad_gcv, newdata = df.FGR.gscad, cause = 1,
times = times)
pred.FGR.with_gmcp_gcv = predictEventProb(fit.FGR.with_gmcp_gcv, newdata = df.FGR.gmcp, cause = 1, times = times)# load prediction evaluation source code
source(paste0(workdir, "source/source_general.R"))
source(paste0(workdir, "source/source_predbased.R"))
# LASSO-based FGR prediction evaluation. Save plots.
cindex.FGR.with_glasso = cindex_predbased(pred.FGR.with_glasso_gcv, formula.FGR.glasso, df.FGR.glasso,
times)$AppCindex[[1]]
sc = Score_predbased(pred = getpred_for_score(pred.FGR.with_glasso_gcv, df.FGR.glasso, cause = 1, times),
"FGR", Hist(Time, event) ~ 1, df.FGR.glasso, cause = 1, times = times, plots = "cal")
auc.FGR.with_glasso = sc$AUC$score
brier.FGR.with_glasso = sc$Brier$score
# ADALASSO-based FGR prediction evaluation. Save plots.
cindex.FGR.with_gadalasso = cindex_predbased(pred.FGR.with_gadalasso_gcv, formula.FGR.gadalasso, df.FGR.gadalasso,
times)$AppCindex[[1]]
sc = Score_predbased(pred = getpred_for_score(pred.FGR.with_gadalasso_gcv, df.FGR.gadalasso, cause = 1,
times), "FGR", Hist(Time, event) ~ 1, df.FGR.gadalasso, cause = 1, times = times, plots = "cal")
auc.FGR.with_gadalasso = sc$AUC$score
brier.FGR.with_gadalasso = sc$Brier$score
# SCAD-based FGR prediction evaluation. Save plots.
cindex.FGR.with_gscad = cindex_predbased(pred.FGR.with_gscad_gcv, formula.FGR.gscad, df.FGR.gscad, times)$AppCindex[[1]]
sc = Score_predbased(pred = getpred_for_score(pred.FGR.with_gscad_gcv, df.FGR.gscad, cause = 1, times),
"FGR", Hist(Time, event) ~ 1, df.FGR.gscad, cause = 1, times = times, plots = "cal")
auc.FGR.with_gscad = sc$AUC$score
brier.FGR.with_gscad = sc$Brier$score
# MCP-based FGR prediction evaluation. Save plots.
cindex.FGR.with_gmcp = cindex_predbased(pred.FGR.with_gmcp_gcv, formula.FGR.gmcp, df.FGR.gmcp, times)$AppCindex[[1]]
sc = Score_predbased(pred = getpred_for_score(pred.FGR.with_gmcp_gcv, df.FGR.gmcp, cause = 1, times),
"FGR", Hist(Time, event) ~ 1, df.FGR.gmcp, cause = 1, times = times, plots = "cal")
auc.FGR.with_gmcp = sc$AUC$score
brier.FGR.with_gmcp = sc$Brier$score
# print all validation measures (C-index,AUC,Brier)
mget(grep("cindex", ls(), value = T)[-1])
mget(grep("auc", ls(), value = T))
mget(grep("brier", ls(), value = T))