Different persons like to write/organize codes in different ways. I generally followed the following points.
More spacious. I would write x = c(2,3)
rather than x=c(2,3)
.
I used “=” instead of “<-” in all places where they are consequentially the same. [Note: Only in particular cases, “<-” is different than “=.” For example, cor(x=1:3,y=4:6)
and cor(x<-1:3,y<-4:6)
will give the same output 1, but in the latter case only, x and y will also get defined in main environment. As an aside, if you want to define a varible within the body of a function but also in the main environment, use like this: a = function(){ff <<- 2}
. Here, on running the function a
, ff
will also get defined in the main environment.]
Loop: For multiline for/if/while loop, I kept the curly brackets ({}) vertically aligned. Thus, instead of writing
for(i in 1:10){
= i
a = i + 1
b }
I wrote
for(i in 1:10)
{= i
a = i + 1
b }
data
directory, the source files are in source
directory, the analysis result files are saved as R workspace within Results
directory, and plots and tables are saved within tables_and_figures
directory.# empty workspace
rm(list = ls())
# load library
library(data.table) # for data input/output
library(mice) # for imputation using mice() function
library(crrp) # for penalized regression analysis using gcrrp() function
library(kableExtra) # for printing nice looking tables using functions like column_spec()
library(ggplot2) # for making nice looking compact plots using data.table object
library(riskRegression) # for fitting Fine and Gray model using FGR()
library(pec) # for predicting cumulative incidence from fitted CR object using predictEventProb()
library(prodlim)
# set directories
= "/Users/nsanyal/Downloads/PR_for_CR/" workdir
= fread(paste0(workdir, "data/example_SPLC_data.csv"))
D dim(D)
## [1] 1000 20
# copy data.table into another object
= copy(D) # D0=D will not copy, just will assign another name, so changing one will change the other
D0
# extract column 'event' as vector
# or D$event
D[, event]
# extract column 'event' as data.table
"event"] # or D[,.(event)]
D[,
# extract columns 'Time' and 'event' as data.table
c("Time", "event")] # or D[,.(Time,event)]
D[,
# extract columns with names saved in a variable
= c("Time", "event", "edu", "sex")
cnames # or D[,cnames,with=F]
D[, ..cnames]
# sort data by increasing order of 'Time'
setkey(D, Time) # or setorder(D,Time)
# sort data by increasing order of 'Time' (first) and 'edu' (next)
setkey(D, Time, edu) # or setorder(D,Time,edu)
# sort data by decreasing order of 'Time'
setorder(D, -Time) # or setorderv(D,'Time',order=-1). setkey allows sorting only by increasing order.
# sort data by alphabetically ordered column names (convenient for looking up a column name)
setcolorder(D, sort(names(D)))
# reset data as in the beginning
= copy(D0) D
# exclude never smokers (1=never, 2=former, 3=current)
= D[!D$smkstatus2 == 1, ]
D dim(D)
# [1] 6372 102
# Choose variables to include and subset data
= c("Time", "event", "age.ix.cat", "bmi", "chemo_ix", "cigday2", "copd", "edu", "fh", "hist2.ix",
vars.include "packyears2", "ph", "quityears2", "race.cat", "radiation_ix", "sex", "smkstatus2", "stage2.ix", "surgery_ix",
"USPSTF")
= D[, ..vars.include]
data $age.ix.cat = unname(sapply(data$age.ix.cat, function(x) switch(x, `71-80` = "71.80", `80+` = "80",
data`61-70` = "61.70", `<55` = "55", `56-60` = "56.60"))) # replace characters -,< and + by .
# Set missing code, if any, to NA. Then, convert categorical variables to factor.
$age.ix.cat = relevel(as.factor(data$age.ix.cat), ref = "61.70")
data
$chemo_ix = ifelse(data$chemo_ix == 9, NA, data$chemo_ix)
data$chemo_ix = relevel(as.factor(data$chemo_ix), ref = 1)
data
$copd = ifelse(data$copd == 9, NA, data$copd)
data$copd = as.factor(as.character(data$copd))
data
$edu = relevel(as.factor(as.character(data$edu)), ref = "2") # 2 = college group is reference
data
$fh = as.factor(as.character(data$fh))
data
$hist2.ix = relevel(as.factor(data$hist2.ix), ref = "SQ")
data
$ph = as.factor(as.character(data$ph))
data
$race.cat = relevel(as.factor(data$race.cat), ref = "W")
data
$radiation_ix = ifelse(data$radiation_ix == 9, NA, data$radiation_ix)
data$radiation_ix = relevel(as.factor(as.character(data$radiation_ix)), ref = "0") # 0 = No radiotherapy
data
$sex = relevel(as.factor(as.character(data$sex)), ref = "0") # 0 = Female
data$smkstatus2 = relevel(as.factor(as.character(data$smkstatus2)), ref = "2") # 2 = Former
data
$stage2.ix = ifelse(data$stage2.ix == 9, NA, data$stage2.ix)
data$stage2.ix = relevel(as.factor(as.character(data$stage2.ix)), ref = "0") # 0 = No stage 2 primary LC
data
$surgery_ix = ifelse(data$surgery_ix == 9, NA, data$surgery_ix)
data$surgery_ix = relevel(as.factor(data$surgery_ix), ref = 1)
data
$USPSTF = relevel(as.factor(as.character(data$USPSTF)), ref = "0") # 0 = No stage 2 primary LC data
= c(12)
seed = mice(data = data, m = 1, maxit = 5, print = T, seed = seed)
mi = complete(mi, 1)
data.imp # save( list = c('mi','data.imp'), file = paste0('Results/data_imp_seed',seed,'.Rdata') )
# load imputed data
load(paste0(workdir, "Results/data_imp_seed12.Rdata"))
# create design matrix and
= data.imp[, -c(1:2)]
mat = as.formula(paste(" ~ ", paste(colnames(mat), collapse = " + ")))
expr = model.matrix(expr, mat)[, -1]
X
# create group indicator (for inputing in gcrrp() function) by counting number of groups created in X
= rep(1:ncol(mat), times = sapply(mat, function(x) {
group if (class(x) == "numeric") return(1) else return(length(unique(x)) - 1)
}))
# fit LASSO
= gcrrp(time = data.imp$Time, fstatus = data.imp$event, failcode = 1, X = X, group = group,
fit.glasso penalty = "gLASSO", lambda.min = 0.001, nlambda = 50) # default choices.
# run str(fit.glasso) to see the structure of fit.glasso object.
# fit adaptive LASSO
= gcrrp(time = data.imp$Time, fstatus = data.imp$event, failcode = 1, X = X, group = group,
fit.gadalasso penalty = "gLASSO", weighted = T, lambda.min = 0.001, nlambda = 50) # default choices.
# fit SCAD
= gcrrp(time = data.imp$Time, fstatus = data.imp$event, failcode = 1, X = X, group = group,
fit.gscad penalty = "gSCAD", lambda.min = 0.001, nlambda = 50, gamma = 3.7) # default choices.
# fit MCP
= gcrrp(time = data.imp$Time, fstatus = data.imp$event, failcode = 1, X = X, group = group,
fit.gmcp penalty = "gMCP", lambda.min = 0.001, nlambda = 50, , gamma = 2.7) # default choices.
# save fits
save(fit.glasso, file = paste0(workdir, "Results/gLASSO_fit_imp_seed12.Rdata"))
save(fit.gadalasso, file = paste0(workdir, "Results/gADALASSO_fit_imp_seed12.Rdata"))
save(fit.gscad, file = paste0(workdir, "Results/gSCAD_fit_imp_seed12.Rdata"))
save(fit.gmcp, file = paste0(workdir, "Results/gMCP_fit_imp_seed12.Rdata"))
# load fits
load(paste0(workdir, "Results/gLASSO_fit_imp_seed12.Rdata"))
load(paste0(workdir, "Results/gADALASSO_fit_imp_seed12.Rdata"))
load(paste0(workdir, "Results/gSCAD_fit_imp_seed12.Rdata"))
load(paste0(workdir, "Results/gMCP_fit_imp_seed12.Rdata"))
# extract coefficients for both GCV and BIC based tuning parameter selection
= fit.glasso$beta[, which.min(fit.glasso$BIC)]
beta.glasso.bic = fit.gadalasso$beta[, which.min(fit.gadalasso$BIC)]
beta.gadalasso.bic = fit.gscad$beta[, which.min(fit.gscad$BIC)]
beta.gscad.bic = fit.gmcp$beta[, which.min(fit.gmcp$BIC)]
beta.gmcp.bic
= fit.glasso$beta[, which.min(fit.glasso$GCV)]
beta.glasso.gcv = fit.gadalasso$beta[, which.min(fit.gadalasso$GCV)]
beta.gadalasso.gcv = fit.gscad$beta[, which.min(fit.gscad$GCV)]
beta.gscad.gcv = fit.gmcp$beta[, which.min(fit.gmcp$GCV)]
beta.gmcp.gcv
# make coefficient x method table
= cbind(beta.glasso.bic, beta.gadalasso.bic, beta.gscad.bic, beta.gmcp.bic)
beta.all.bic = cbind(beta.glasso.gcv, beta.gadalasso.gcv, beta.gscad.gcv, beta.gmcp.gcv)
beta.all.gcv
kable(cbind(beta.all.bic, beta.all.gcv), row.names = T, caption = "GCV and BIC based variable selection for cause 1",
linesep = "") %>% column_spec(5, border_right = T) %>% kable_styling(full_width = F)
beta.glasso.bic | beta.gadalasso.bic | beta.gscad.bic | beta.gmcp.bic | beta.glasso.gcv | beta.gadalasso.gcv | beta.gscad.gcv | beta.gmcp.gcv | |
---|---|---|---|---|---|---|---|---|
age.ix.cat55 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -1.4575606 | -2.6698418 | -6.8191033 | -6.8150809 |
age.ix.cat56.60 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.0747861 | -0.0485878 | -0.1423634 | -0.1618113 |
age.ix.cat71.80 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -0.3730483 | -0.5435110 | -0.6720156 | -0.6657066 |
age.ix.cat80 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -0.0992690 | -0.1745293 | -0.2088444 | -0.1993495 |
bmi | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.0233010 | 0.0149302 | 0.0406255 | 0.0414460 |
chemo_ix1 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -0.8936411 | -1.0492702 | -1.2220898 | -1.2302037 |
cigday2 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
copd1 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.0052276 | 0.0000000 | 0.0276097 | 0.0000000 |
edu1 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.1257211 | 0.0148201 | 0.1081530 | 0.1119839 |
edu3 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.3815423 | 0.0459760 | 0.2882186 | 0.2879215 |
fh1 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -0.4381514 | -0.4582016 | -0.4893955 | -0.5254502 |
hist2.ixAD | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.8633487 | 1.1292301 | 1.1764403 | 1.1829045 |
hist2.ixLC | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 1.1540491 | 1.5064344 | 1.6203130 | 1.6199115 |
hist2.ixOTH | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.5612671 | 0.7131332 | 0.7077201 | 0.7055595 |
hist2.ixSC | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -0.4435133 | -1.7466435 | -5.4763283 | -5.4651681 |
packyears2 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -0.0010113 | 0.0000000 | 0.0000000 | 0.0000000 |
ph1 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.0103285 | 0.0000000 | 0.0000000 | 0.0000000 |
quityears2 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
race.catA | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -0.0226699 | -0.0784260 | -0.0459666 | -0.0669693 |
race.catB | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -0.2293959 | -0.3210262 | -0.4237903 | -0.4832460 |
race.catH | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -0.5393309 | -0.8860188 | -0.9087899 | -1.0745480 |
race.catHW | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.4481362 | 0.6980772 | 0.7266344 | 0.7387063 |
race.catO | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -1.5362620 | -3.2426866 | -3.2254721 | -6.8849971 |
radiation_ix1 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.4642847 | 0.5082836 | 0.6016434 | 0.5908213 |
sex1 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -0.1925784 | -0.1173216 | -0.1553838 | -0.1590882 |
smkstatus23 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | 0.5170287 | 0.6486791 | 0.7606614 | 0.7768355 |
stage2.ix1 | -0.7600735 | -0.7120922 | 0.000000 | 0.000000 | -1.4745838 | -1.6264616 | -1.6413689 | -1.6396480 |
surgery_ix1 | 1.2753809 | 0.9382062 | 2.342946 | 2.343131 | 1.3680460 | 1.4165489 | 1.4773960 | 1.4743209 |
USPSTF1 | 0.0000000 | 0.0000000 | 0.000000 | 0.000000 | -0.6529241 | -0.9045143 | -1.0360865 | -1.0507383 |
# do line-plot of GCV based coefficient estimates
= range(c(beta.all.bic, beta.all.gcv))
range
par(oma = c(4, 2, 0, 0))
plot(beta.glasso.gcv, main = "GCV based estimates for cause 1", ylim = range, pch = 1, xaxt = "n", xlab = "",
ylab = "beta estimates from crrp", yaxt = "n")
points(beta.gadalasso.gcv, col = "yellow", pch = 20)
points(beta.gscad.gcv, col = "red", pch = 1)
points(beta.gmcp.gcv, col = "green", pch = 20)
axis(side = 1, at = 1:length(beta.glasso.gcv), labels = names(beta.glasso.gcv), las = 3)
axis(side = 2, at = c(-2, -1, -0.5, -0.25, 0, 0.25, 0.5, 1, 2), las = 2)
abline(h = c(-2, -1, -0.5, -0.25, 0, 0.25, 0.5, 1, 2), lty = 3)
abline(v = 1:length(beta.glasso.gcv), lty = 3, col = "grey")
legend("bottomleft", legend = c("gLASSO", "gADLASSO", "gSCAD", "gMCP"), pch = c(1, 20, 1, 20), col = c("black",
"yellow", "red", "green"), horiz = T)
# do bar-plot of GCV based coefficient estimates that are > 0.1 in magnitude
= names(which(apply(beta.all.gcv, 1, function(x) {
vars.chosen any(abs(x) > 0.1)
})))= beta.all.gcv[rownames(beta.all.gcv) %in% vars.chosen, ]
beta.vars.chosen.gcv colnames(beta.vars.chosen.gcv) = c("gLASSO", "gADALASSO", "gSCAD", "gMCP")
= as.data.table(beta.vars.chosen.gcv)
beta.vars.chosen.gcv.df `:=`(covariates, rownames(beta.vars.chosen.gcv))]
beta.vars.chosen.gcv.df[, = melt(beta.vars.chosen.gcv.df, id.var = c("covariates"))
beta.vars.chosen.gcv.df.melt
ggplot(beta.vars.chosen.gcv.df.melt) + geom_col(aes(x = variable, y = value, fill = variable), position = "dodge",
show.legend = F) + ylab("Coeff. Estimates") + xlab("Penalized regression penalty") + facet_wrap(covariates ~
nrow = 4) + theme(axis.text.x = element_text(angle = 45, hjust = 1.1, size = 12), axis.text.y = element_text(size = 12),
., axis.title.x = element_text(size = 15), axis.title.y = element_text(size = 15), strip.text.x = element_text(size = 12))
# save coefficients
= as.character(Reduce(function(X, Y) outer(X, Y, FUN = paste, sep = "."), list(c("beta"), c("glasso",
namecombs "gadalasso", "gscad", "gmcp"), c("gcv", "bic"))))
= data.frame(coef = names(beta.glasso.bic), mget(namecombs))
allcoef # fwrite( allcoef, paste0( 'Results/PR_coef_imp_seed12.csv' ) )
There are two ways:
Method 1 (FGR by penalty): Fit FGR model with all variables to obtain an FGR object. But then within that FGR object, substitute the FGR coefficients by the penalized regression coefficients. Then, use the FGR object to predict.
Method 2 (FGR with penalty): Fit FGR model with only those variables which are selected (i.e., |coef|>a given threshold) by penalized regression. Then, use the FGR object to predict.
In Method 1, we are using penalized regression for both variable selection and estimation. In Method 2, we are using penalized regression only for variable seelction whereas estimation is done by FGR.
# fix formula
= as.formula(paste("Hist(Time,event) ~", paste0(colnames(X), collapse = " +")))
formula.FGR
# build input data frame
= data.frame(Time = data.imp$Time, event = data.imp$event, X)
df.FGR
# fit FGR model
= FGR(formula.FGR, cause = 1, data = df.FGR)
fit.FGR $call$formula = formula.FGR fit.FGR
= fit.FGR.by_gadalasso_gcv = fit.FGR.by_gscad_gcv = fit.FGR.by_gmcp_gcv = fit.FGR
fit.FGR.by_glasso_gcv
$crrFit$coef = allcoef$beta.glasso.gcv
fit.FGR.by_glasso_gcv$crrFit$coef = allcoef$beta.gadalasso.gcv
fit.FGR.by_gadalasso_gcv$crrFit$coef = allcoef$beta.gscad.gcv
fit.FGR.by_gscad_gcv$crrFit$coef = allcoef$beta.gmcp.gcv fit.FGR.by_gmcp_gcv
# set time-points for prediction and evaluation
= c(5, 10, 15) # time-points to predict at
times = 10 # time-point to use for risk stratification and decision analysis later
ref.time
# prediction using ordinary FGR and Method 1
= predictEventProb(fit.FGR, newdata = df.FGR, cause = 1, times = times)
pred.FGR = predictEventProb(fit.FGR.by_glasso_gcv, newdata = df.FGR, cause = 1, times = times)
pred.FGR.by_glasso_gcv = predictEventProb(fit.FGR.by_gadalasso_gcv, newdata = df.FGR, cause = 1, times = times)
pred.FGR.by_gadalasso_gcv = predictEventProb(fit.FGR.by_gscad_gcv, newdata = df.FGR, cause = 1, times = times)
pred.FGR.by_gscad_gcv = predictEventProb(fit.FGR.by_gmcp_gcv, newdata = df.FGR, cause = 1, times = times) pred.FGR.by_gmcp_gcv
# load prediction evaluation source code
source(paste0(workdir, "source/source_general.R"))
source(paste0(workdir, "source/source_predbased.R"))
# FGR prediction evaluation.
= cindex_predbased(pred.FGR, formula.FGR, df.FGR, times)$AppCindex[[1]] # Harrell-type C-index
cindex.FGR = Score_predbased(pred = getpred_for_score(pred.FGR, df.FGR, cause = 1, times), "FGR", Hist(Time,
sc ~ 1, df.FGR, cause = 1, times = times, plots = "cal")
event) = sc$AUC$score # time-dependent (time-varying) area under the ROC (AUROC)
auc.FGR = sc$Brier$score # time-dependent (time-varying) Brier score
brier.FGR
pdf(paste0(workdir, "tables_and_figures/FGR_performance_imp_seed12.pdf"))
### Calibration plot using pec package
= 2
j = calPlot_predbased(pred = pred.FGR[, j], formula = formula.FGR, cause = 1, data = df.FGR, time = times[j],
x method = "quantile", q = 10, ylim = c(0, 0.2), xlim = c(0, 0.2))
= x$plotFrame
calplotFrame = as.vector(calplotFrame[, "Pred"])
prd = as.vector(calplotFrame[, "Pred"]) - as.vector(calplotFrame[, "Obs"])
dif = plot(prd, dif, xlab = paste0("Predicted ", ref.time, "-year Risk (quantile)"), ylab = "Pred. prob. - Obsd. prob.",
x pch = 1, col = "blue", cex.lab = 1, cex.axis = 1, cex.main = 1, ylim = c(-0.05, 0.05))
abline(h = 0, col = "red", lty = "dotted")
### Calibration plot using riskRegression package
plotCalibration(sc, ylim = c(0, 0.5), xlim = c(0, 0.5), cens.method = "local", xlab = "Pred. risk", ylab = "Obsd. freq.",
mgp = c(3, 3, 0), round = T)
### Decile plot using estimated probability (akin to HL method)
= myDecilePlot2(k = 10, event = df.FGR$event, Time = df.FGR$Time, PROB = pred.FGR[, j], minTime = 1,
x maxTime = 20, YLIM = c(0, 0.15), MAIN = paste("Obsd. SPLC incidence by decile of estimated risk"))
### Decision curve analysis
= as.data.frame(df.FGR) # Without this stdca won't work
df.FGR1 $risk = pred.FGR[, j]
df.FGR1= stdca(data = df.FGR1, outcome = "event", ttoutcome = "Time", timepoint = 10, xby = 0.03, predictors = "risk",
x cmprsk = T, probability = T, xstop = 0.2)
title(paste("Decision Curve Analysis", cex = 1))
abline(v = 0.01, col = "gray", lty = "dotted")
legend("left", legend = c("Risk-based", "Screen All", "Screen None"), col = c("red", "blue", "dark green"),
lty = c("solid", "dotted", "solid"), lwd = 2, cex = 1, bg = "white")
mtext("fit.FGR", outer = TRUE, line = -2, cex = 1.5)
dev.off()
# LASSO-based FGR prediction evaluation. Save plots.
= cindex_predbased(pred.FGR.by_glasso_gcv, formula.FGR, df.FGR, times)$AppCindex[[1]]
cindex.FGR.by_glasso = Score_predbased(pred = getpred_for_score(pred.FGR.by_glasso_gcv, df.FGR, cause = 1, times), "FGR",
sc Hist(Time, event) ~ 1, df.FGR, cause = 1, times = times, plots = "cal")
= sc$AUC$score
auc.FGR.by_glasso = sc$Brier$score
brier.FGR.by_glasso
# ADALASSO-based FGR prediction evaluation. Save plots.
= cindex_predbased(pred.FGR.by_gadalasso_gcv, formula.FGR, df.FGR, times)$AppCindex[[1]]
cindex.FGR.by_gadalasso = Score_predbased(pred = getpred_for_score(pred.FGR.by_gadalasso_gcv, df.FGR, cause = 1, times), "FGR",
sc Hist(Time, event) ~ 1, df.FGR, cause = 1, times = times, plots = "cal")
= sc$AUC$score
auc.FGR.by_gadalasso = sc$Brier$score
brier.FGR.by_gadalasso
# SCAD-based FGR prediction evaluation. Save plots.
= cindex_predbased(pred.FGR.by_gscad_gcv, formula.FGR, df.FGR, times)$AppCindex[[1]]
cindex.FGR.by_gscad = Score_predbased(pred = getpred_for_score(pred.FGR.by_gscad_gcv, df.FGR, cause = 1, times), "FGR",
sc Hist(Time, event) ~ 1, df.FGR, cause = 1, times = times, plots = "cal")
= sc$AUC$score
auc.FGR.by_gscad = sc$Brier$score
brier.FGR.by_gscad
# MCP-based FGR prediction evaluation. Save plots.
= cindex_predbased(pred.FGR.by_gmcp_gcv, formula.FGR, df.FGR, times)$AppCindex[[1]]
cindex.FGR.by_gmcp = Score_predbased(pred = getpred_for_score(pred.FGR.by_gmcp_gcv, df.FGR, cause = 1, times), "FGR",
sc Hist(Time, event) ~ 1, df.FGR, cause = 1, times = times, plots = "cal")
= sc$AUC$score
auc.FGR.by_gmcp = sc$Brier$score
brier.FGR.by_gmcp
# print all validation measures (C-index,AUC,Brier)
mget(grep("cindex", ls(), value = T)[-1])
mget(grep("auc", ls(), value = T))
mget(grep("brier", ls(), value = T))
# Fix selection threshold for penalized regression
= 0.01
sel.thres
# LASSO-based selection and FGR fit
= allcoef$coef[which(abs(allcoef$beta.glasso.gcv) > sel.thres)]
glasso.varsel = as.formula(paste("Hist(Time,event) ~", paste0(glasso.varsel, collapse = " +")))
formula.FGR.glasso = data.frame(Time = data.imp$Time, event = data.imp$event, X[, glasso.varsel])
df.FGR.glasso = FGR(formula.FGR.glasso, cause = 1, data = df.FGR.glasso)
fit.FGR.with_glasso_gcv $call$formula = formula.FGR.glasso
fit.FGR.with_glasso_gcv
# ADALASSO-based selection and FGR fit
= allcoef$coef[which(abs(allcoef$beta.gadalasso.gcv) > sel.thres)]
gadalasso.vars = as.formula(paste("Hist(Time,event) ~", paste0(gadalasso.vars, collapse = " +")))
formula.FGR.gadalasso = data.frame(Time = data.imp$Time, event = data.imp$event, X[, gadalasso.vars])
df.FGR.gadalasso = FGR(formula.FGR.gadalasso, cause = 1, data = df.FGR.gadalasso)
fit.FGR.with_gadalasso_gcv $call$formula = formula.FGR.gadalasso
fit.FGR.with_gadalasso_gcv
# SCAD-based selection and FGR fit
= allcoef$coef[which(abs(allcoef$beta.gscad.gcv) > sel.thres)]
gscad.vars = as.formula(paste("Hist(Time,event) ~", paste0(gscad.vars, collapse = " +")))
formula.FGR.gscad = data.frame(Time = data.imp$Time, event = data.imp$event, X[, gscad.vars])
df.FGR.gscad = FGR(formula.FGR.gscad, cause = 1, data = df.FGR.gscad)
fit.FGR.with_gscad_gcv $call$formula = formula.FGR.gscad
fit.FGR.with_gscad_gcv
# MCP-based selection and FGR fit
= allcoef$coef[which(abs(allcoef$beta.gmcp.gcv) > sel.thres)]
gmcp.vars = as.formula(paste("Hist(Time,event) ~", paste0(gmcp.vars, collapse = " +")))
formula.FGR.gmcp = data.frame(Time = data.imp$Time, event = data.imp$event, X[, gmcp.vars])
df.FGR.gmcp = FGR(formula.FGR.gmcp, cause = 1, data = df.FGR.gmcp)
fit.FGR.with_gmcp_gcv $call$formula = formula.FGR.gmcp fit.FGR.with_gmcp_gcv
# set time-points for prediction and evaluation
= c(5, 10, 15) # time-points to predict at
times = 10 # time-point to use for risk stratification and decision analysis later
ref.time
# prediction using ordinary FGR and Method 1
= predictEventProb(fit.FGR.with_glasso_gcv, newdata = df.FGR.glasso, cause = 1,
pred.FGR.with_glasso_gcv times = times)
= predictEventProb(fit.FGR.with_gadalasso_gcv, newdata = df.FGR.gadalasso,
pred.FGR.with_gadalasso_gcv cause = 1, times = times)
= predictEventProb(fit.FGR.with_gscad_gcv, newdata = df.FGR.gscad, cause = 1,
pred.FGR.with_gscad_gcv times = times)
= predictEventProb(fit.FGR.with_gmcp_gcv, newdata = df.FGR.gmcp, cause = 1, times = times) pred.FGR.with_gmcp_gcv
# load prediction evaluation source code
source(paste0(workdir, "source/source_general.R"))
source(paste0(workdir, "source/source_predbased.R"))
# LASSO-based FGR prediction evaluation. Save plots.
= cindex_predbased(pred.FGR.with_glasso_gcv, formula.FGR.glasso, df.FGR.glasso,
cindex.FGR.with_glasso $AppCindex[[1]]
times)= Score_predbased(pred = getpred_for_score(pred.FGR.with_glasso_gcv, df.FGR.glasso, cause = 1, times),
sc "FGR", Hist(Time, event) ~ 1, df.FGR.glasso, cause = 1, times = times, plots = "cal")
= sc$AUC$score
auc.FGR.with_glasso = sc$Brier$score
brier.FGR.with_glasso
# ADALASSO-based FGR prediction evaluation. Save plots.
= cindex_predbased(pred.FGR.with_gadalasso_gcv, formula.FGR.gadalasso, df.FGR.gadalasso,
cindex.FGR.with_gadalasso $AppCindex[[1]]
times)= Score_predbased(pred = getpred_for_score(pred.FGR.with_gadalasso_gcv, df.FGR.gadalasso, cause = 1,
sc "FGR", Hist(Time, event) ~ 1, df.FGR.gadalasso, cause = 1, times = times, plots = "cal")
times), = sc$AUC$score
auc.FGR.with_gadalasso = sc$Brier$score
brier.FGR.with_gadalasso
# SCAD-based FGR prediction evaluation. Save plots.
= cindex_predbased(pred.FGR.with_gscad_gcv, formula.FGR.gscad, df.FGR.gscad, times)$AppCindex[[1]]
cindex.FGR.with_gscad = Score_predbased(pred = getpred_for_score(pred.FGR.with_gscad_gcv, df.FGR.gscad, cause = 1, times),
sc "FGR", Hist(Time, event) ~ 1, df.FGR.gscad, cause = 1, times = times, plots = "cal")
= sc$AUC$score
auc.FGR.with_gscad = sc$Brier$score
brier.FGR.with_gscad
# MCP-based FGR prediction evaluation. Save plots.
= cindex_predbased(pred.FGR.with_gmcp_gcv, formula.FGR.gmcp, df.FGR.gmcp, times)$AppCindex[[1]]
cindex.FGR.with_gmcp = Score_predbased(pred = getpred_for_score(pred.FGR.with_gmcp_gcv, df.FGR.gmcp, cause = 1, times),
sc "FGR", Hist(Time, event) ~ 1, df.FGR.gmcp, cause = 1, times = times, plots = "cal")
= sc$AUC$score
auc.FGR.with_gmcp = sc$Brier$score
brier.FGR.with_gmcp
# print all validation measures (C-index,AUC,Brier)
mget(grep("cindex", ls(), value = T)[-1])
mget(grep("auc", ls(), value = T))
mget(grep("brier", ls(), value = T))