Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
1477503
calibration and discrimination stats
solonkarapa Nov 7, 2022
21755f0
recalibration code
solonkarapa Nov 8, 2022
dd9514e
calibration after updating
solonkarapa Nov 11, 2022
895a5f6
subgroup analysis
solonkarapa Nov 18, 2022
b759f18
subgroups
solonkarapa Dec 14, 2022
1dad4ad
sensitivity analysis
solonkarapa Dec 16, 2022
9c565a6
auc plots
solonkarapa Dec 22, 2022
c8f69d7
fix deleted file
solonkarapa Dec 23, 2022
80c79f6
agreement script
solonkarapa Jan 4, 2023
2bda5ab
chech AUC
solonkarapa Jan 11, 2023
96be57a
general update
solonkarapa Jan 27, 2023
c594f1c
add arrange plot function
solonkarapa Feb 17, 2023
4439ba4
plots update
solonkarapa Feb 17, 2023
d16a6b5
add agreement
solonkarapa Feb 17, 2023
96b283a
trial
solonkarapa Feb 17, 2023
7f99b8c
delete a few lines
solonkarapa Feb 17, 2023
da6f79f
com
solonkarapa Feb 17, 2023
456a644
trial 2
solonkarapa Feb 17, 2023
621cf20
commit new ReadMe
solonkarapa Feb 17, 2023
d0dd203
fix commit
solonkarapa Feb 17, 2023
18ab76d
fix agreement.R
solonkarapa Feb 17, 2023
f8144c7
plots
solonkarapa Apr 5, 2023
6f9f934
update plots
solonkarapa Apr 9, 2023
e8582f9
censoring
solonkarapa Jul 5, 2023
bb4a11d
icc calculations
solonkarapa Jul 12, 2023
3aa61e9
table 1 stratified by outcome
solonkarapa Jul 31, 2023
6a423f8
6-month analysis and imputation
solonkarapa Jul 31, 2023
fff6d66
imputation calibration
solonkarapa Aug 1, 2023
153bda7
imputation sensitivity
solonkarapa Aug 2, 2023
877e41a
discrimination imputation
solonkarapa Aug 7, 2023
d72ed4c
fix plots
solonkarapa Aug 7, 2023
bae268b
imputation plots and data
solonkarapa Aug 8, 2023
23233fb
calibration sensitivity
solonkarapa Aug 8, 2023
ba6bc45
imputation plots and write-up
solonkarapa Aug 11, 2023
8864637
tables
solonkarapa Aug 14, 2023
96625ad
discrimination problematic file
solonkarapa Aug 15, 2023
ed95c1d
calibration stats
solonkarapa Aug 21, 2023
aaa298e
6m calibration slope and intercept
solonkarapa Aug 22, 2023
155876c
train/test split
solonkarapa Aug 24, 2023
83a102f
imputation update
solonkarapa Nov 7, 2023
d405aad
optimal cut-off auc
solonkarapa Nov 8, 2023
c820004
net benefit
solonkarapa Nov 9, 2023
075c8b9
nb code
solonkarapa Nov 10, 2023
909df92
nb boot
solonkarapa Nov 15, 2023
7d84ab9
nb boot 2
solonkarapa Nov 15, 2023
48ea787
nb changes
solonkarapa Nov 16, 2023
60c9250
import and process validation dataset
solonkarapa May 3, 2024
0f0c293
save path
solonkarapa May 3, 2024
7bea0f5
calibration validation
solonkarapa May 9, 2024
9702fb4
external validation
solonkarapa May 10, 2024
ef0f205
temporal calibration
solonkarapa May 17, 2024
94e526e
minor changes
solonkarapa May 24, 2024
21715b4
minor changes 2
solonkarapa May 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.Rproj.user
.Rhistory
.RData
.Ruserdata
13 changes: 13 additions & 0 deletions AH_code.Rproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Version: 1.0

RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 4
Encoding: UTF-8

RnwWeave: Sweave
LaTeX: pdfLaTeX
176 changes: 176 additions & 0 deletions NB/nb_boot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@

# load libraries
library(tidyverse)
library(rsample)
library(reshape2)
library(ggplot2)
library(survminer) # for plotting theme
library(ggsci) # for color palette

# funs
path_funs <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/NB"
source(paste0(path_funs, "/net_benefit_fun.R"))
source(paste0(path_funs, "/net_benefit_wrapper_fun.R"))

# data
path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/updating models"
load(paste0(path_data, "/recalibrated_models_default.Rdata"))

test.data.short <- test.data %>% mutate(lille.death.risk = 1 - Lille.surv,
lille.death.risk.updated = 1 -lille.surv.updated,
CLIF.death.risk = 1 - CLIF.surv,
clif.death.risk.updated = 1 - clif.surv.updated,
meld.death.risk = 1 - MELD.surv,
meld.death.risk.updated = 1 - meld.surv.updated,
meld.death.risk2 = 1 - MELD.surv2)

##################################################################
############### Calculate bootstrapped NB diff ##################
##################################################################
thresholds = seq(0.01, 0.99, 0.04)
n_boot <- 1000


set.seed(353)
bt_resamples <- bootstraps(test.data.short, times = n_boot, strata = "D90_DTH")

# models
m_lille <- map_df(bt_resamples$splits, net_benefit_treated_wrapper, model = "Lille")
m_lille$boot_id <- rep(1:n_boot, each = length(thresholds) * 2)

m_cliff <- map_df(bt_resamples$splits, net_benefit_treated_wrapper, model = "CLIF-C ACLF")
m_cliff$boot_id <- rep(1:n_boot, each = length(thresholds) * 2)

meld1 <- map_df(bt_resamples$splits, net_benefit_treated_wrapper, model = "MELD 1")
meld1$boot_id <- rep(1:n_boot, each = length(thresholds) * 2)

meld2 <- map_df(bt_resamples$splits, net_benefit_treated_wrapper, model = "MELD 2")
meld2$boot_id <- rep(1:n_boot, each = length(thresholds) * 2)

combined_df <- rbind(m_lille, m_cliff, meld1, meld2)

alpha <- 0.05 # significance level

data_wide <- dcast(combined_df, threshold + model + boot_id ~ status, value.var = "NB") %>%
group_by(threshold, boot_id, model) %>%
mutate(diff_NB = Original - Updated) %>%
ungroup() %>%
group_by(threshold, model) %>%
summarize(meanNB_diff = mean(diff_NB),
low = quantile(diff_NB, alpha / 2),
high = quantile(diff_NB, 1 - alpha / 2))

#############################################
###################### Plots ###############
#############################################

# Original vs Updated comparison plots
combined_df %>% group_by(threshold, status, model) %>%
summarize(meanNB = mean(NB),
low = quantile(NB, alpha/2, na.rm = T),
high = quantile(NB, 1 - alpha/2, na.rm = T),
mean_NB_all = mean(NB_all)) %>%
ggplot(aes(x = threshold, y = meanNB)) +
geom_line(aes(col = status)) +
#geom_ribbon(aes(ymin = low, ymax = high, fill = model, linetype = NA),
# alpha = 0.3, show.legend = F) +
geom_line(aes(y = mean_NB_all), linetype = "dashed") +
geom_hline(yintercept = 0, linetype = "dotted") +
coord_cartesian(ylim = c(-0.05, 0.4)) +
facet_wrap(.~ model) +
theme_classic2()

# Updated comparison plots
updated <- combined_df %>%
filter(model %in% c("CLIF-C ACLF", "Lille", "MELD 1")) %>%
group_by(threshold, status, model) %>%
summarize(meanNB = mean(NB),
low = quantile(NB, alpha/2, na.rm = T),
high = quantile(NB, 1 - alpha/2, na.rm = T),
mean_NB_all = mean(NB_all)) %>%
filter(status == "Updated") %>%
mutate(model = ifelse(model == "MELD 1", "MELD", model)) %>%
ggplot(aes(x = threshold, y = meanNB)) +
geom_line(aes(col = model), lwd = 1) +
#geom_ribbon(aes(ymin = low, ymax = high, fill = model, linetype = NA),
# alpha = 0.3, show.legend = F) +
geom_line(aes(y = mean_NB_all), linetype = "dashed") +
geom_hline(yintercept = 0, linetype = "dotted") +
coord_cartesian(ylim = c(-0.05, 0.3)) +
labs(col = "Updated Scores", y = "Net Benefit", x = "Threshold Risk probability") +
#facet_wrap(.~ model) +
scale_color_brewer(palette = "Dark2") +
scale_fill_brewer(palette = "Dark2") +
theme_classic2()


# Updated original plots
library(RColorBrewer)
customs_cols <- brewer.pal(6, "Dark2")
customs_cols

original <- combined_df %>%
#filter(model %in% c("CLIF-C ACLF", "Lille", "MELD 1")) %>%
group_by(threshold, status, model) %>%
summarize(meanNB = mean(NB),
low = quantile(NB, alpha/2, na.rm = T),
high = quantile(NB, 1 - alpha/2, na.rm = T),
mean_NB_all = mean(NB_all)) %>%
filter(status == "Original") %>%
#mutate(model = ifelse(model == "MELD 1", "MELD", model)) %>%
ggplot(aes(x = threshold, y = meanNB)) +
geom_line(aes(col = model), lwd = 1) +
#geom_ribbon(aes(ymin = low, ymax = high, fill = model, linetype = NA),
# alpha = 0.3, show.legend = F) +
geom_line(aes(y = mean_NB_all), linetype = "dashed") +
geom_hline(yintercept = 0, linetype = "dotted") +
coord_cartesian(ylim = c(-0.05, 0.3)) +
labs(col = "Original Scores", y = "Net Benefit", x = "Threshold Risk probability") +
#facet_wrap(.~ model) +
#scale_color_brewer(palette = "Dark2", type = "diverging") +
scale_color_manual(values = customs_cols[-3]) +
theme_classic2()

combined_df %>%
#filter(model %in% c("CLIF-C ACLF", "Lille", "MELD 1")) %>%
group_by(threshold, status, model) %>%
summarize(meanNB = mean(NB),
low = quantile(NB, alpha/2, na.rm = T),
high = quantile(NB, 1 - alpha/2, na.rm = T),
mean_NB_all = mean(NB_all),
mean_FPR = mean(FPR)) %>%
#filter(status == "Original") %>%
filter(model == "Lille" & threshold == 0.45)

data_wide %>% filter(model == "Lille" & threshold == 0.45)

tp <- sum(test.data.short$lille.death.risk >= 0.45 & test.data.short$D90_DTH == 1)
tp
fp <- sum(test.data.short$lille.death.risk >= 0.45 & test.data.short$D90_DTH == 0)
fp
fp/tp

tp <- sum(test.data.short$lille.death.risk.updated >= 0.45 & test.data.short$D90_DTH == 1)
tp
fp <- sum(test.data.short$lille.death.risk.update >= 0.45 & test.data.short$D90_DTH == 0)
fp

fp/tp



#################
ggplot(data_wide, aes(x = threshold, y = meanNB_diff)) +
geom_line(lwd = 1) +
geom_ribbon(aes(ymin = low, ymax = high, linetype = NA),
alpha = 0.3, show.legend = F) +
geom_hline(yintercept = 0, linetype = "dashed") +
labs(y = "Difference in Net Benefit", x = "Threshold Risk Probability") +
facet_wrap(.~ model, scales = "free_y") +
theme_classic2()

# lille threshold
data_wide %>% filter(model == "Lille" & threshold == 0.45)

data_wide %>% filter(model == "MELD 1" & threshold == 0.45)
#################
82 changes: 82 additions & 0 deletions NB/net_benefit_fun.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# function to calculate the NB of the treated i.e those
# receiving treatment (risk > risk_threshold)
net_benefit_treated <- function(pred_y, obs_y, risk_threshold){
# pred_y = (vector) of the predicted probabilty of outcome, must be 0 < pred_y < 1
# obs_y = (vector) of the observed outcome, must be 1 = event and 0 = non-event
# risk_threshold = (vector) of the risk thresholds, must be 0 < risk_threshold < 1

# start checks
# if (!all(pred_y >= 0) | !all(pred_y <= 1)){
# stop("The predicted values must range between 0 and 1")
# }

if (!all(between(pred_y, 0, 1))){
stop("The predicted values must range between 0 and 1")
}

if (!all(obs_y == 0 | obs_y == 1)){
stop("Outcome must be coded 0 (non event) and 1 (event)")
}

# if (!all(risk_threshold >= 0) | !all(risk_threshold <= 1)){
# stop("The risk threshold must range between 0 and 1")
# }

if (!all(between(risk_threshold, 0, 1))){
stop("The risk threshold must range between 0 and 1")
}

# end checks

# number of observations
N <- length(obs_y)

# prevalence Pr(obs_y = 1)
rho = mean(obs_y == 1)

# initialize empty vectors
tpr <- numeric(length(risk_threshold))
fpr <- numeric(length(risk_threshold))
nb <- numeric(length(risk_threshold))
snb <- numeric(length(risk_threshold))
U_all <- numeric(length(risk_threshold))
sU_all <- numeric(length(risk_threshold))

# loop through each risk threshold and calcualte tpr, fpr, nb, snb, U_all, sU_all
for(i in 1:length(risk_threshold)){

# true positive rate Pr(pred_y > risk_threshold | obs_y = 1)
tpr[i] <- sum(pred_y >= risk_threshold[i] & obs_y == 1)/sum(obs_y == 1)

# false positive rate Pr(pred_y > risk_threshold | obs_y = 0)
fpr[i] <- sum(pred_y >= risk_threshold[i] & obs_y == 0)/sum(obs_y == 0) # check again it works ok

# net benefit
nb[i] = tpr[i] * rho - (risk_threshold[i]/(1 - risk_threshold[i])) * (1 - rho) * fpr[i]

# standardized net benefit
snb[i] = nb[i]/rho

# treat all NB = (U_all - U_none)
U_all[i] = rho - (1 - rho) * (risk_threshold[i]/(1- risk_threshold[i]))

# treat all sNB
sU_all[i] = U_all[i]/rho

}

# expected utility treat none
U_none = 0

output = data.frame("threshold" = risk_threshold,
"TPR" = tpr,
"FPR" = fpr,
"NB" = nb,
"sNB" = snb,
"rho" = rho,
"NB_none" = U_none,
"NB_all" = U_all,
"sNB_all" = sU_all)
return(output)

}
28 changes: 28 additions & 0 deletions NB/net_benefit_wrapper_fun.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@


net_benefit_treated_wrapper <- function(splits, model){

x <- analysis(splits)

if (model == "Lille"){
nb_original <- net_benefit_treated(x$lille.death.risk, x$D90_DTH, risk_threshold = thresholds)
nb_update <- net_benefit_treated(x$lille.death.risk.updated, x$D90_DTH, risk_threshold = thresholds)
} else if (model == "CLIF-C ACLF"){
nb_original <- net_benefit_treated(x$CLIF.death.risk, x$D90_DTH, risk_threshold = thresholds)
nb_update <- net_benefit_treated(x$clif.death.risk.updated, x$D90_DTH, risk_threshold = thresholds)
} else if ( model == "MELD 1"){
nb_original <- net_benefit_treated(x$meld.death.risk, x$D90_DTH, risk_threshold = thresholds)
nb_update <- net_benefit_treated(x$meld.death.risk.updated, x$D90_DTH, risk_threshold = thresholds)
} else if ( model == "MELD 2"){
nb_original <- net_benefit_treated(x$meld.death.risk2, x$D90_DTH, risk_threshold = thresholds)
nb_update <- net_benefit_treated(x$meld.death.risk.updated, x$D90_DTH, risk_threshold = thresholds)
}

nb_original$status <- "Original"
nb_update$status <- "Updated"

nb <- rbind(nb_original, nb_update)
nb$model <- model

return(nb)
}
5 changes: 5 additions & 0 deletions README.md
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,9 @@ The repository contains several folders, with the following files in them:
- subgroup_comparison.R. File that is used to analyse model performance in sub-groups of patients.

In order to perform the analysis, the files import_data.R and prognostic_scores.R should always be run first. Generally, files may only work after another file is run first. As the data is not published, however, I would recommend using this code as a guideline for comparable analyses, rather than as a perfect example of how model validations and comparisons should always be done.
<<<<<<< HEAD

----
=======
>>>>>>> origin

44 changes: 44 additions & 0 deletions grid_arrange_fun.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@

# https://cran.r-project.org/web/packages/egg/vignettes/Ecosystem.html

grid_arrange_shared_legend <-
function(...,
ncol = length(list(...)),
nrow = 1,
position = c("bottom", "right")) {

plots <- list(...)
position <- match.arg(position)
g <-
ggplotGrob(plots[[1]] + theme(legend.position = position))$grobs
legend <- g[[which(sapply(g, function(x)
x$name) == "guide-box")]]
lheight <- sum(legend$height)
lwidth <- sum(legend$width)
gl <- lapply(plots, function(x)
x + theme(legend.position = "none"))
gl <- c(gl, ncol = ncol, nrow = nrow)

combined <- switch(
position,
"bottom" = arrangeGrob(
do.call(arrangeGrob, gl),
legend,
ncol = 1,
heights = unit.c(unit(1, "npc") - lheight, lheight)
),
"right" = arrangeGrob(
do.call(arrangeGrob, gl),
legend,
ncol = 2,
widths = unit.c(unit(1, "npc") - lwidth, lwidth)
)
)

grid.newpage()
grid.draw(combined)

# return gtable invisibly
invisible(combined)

}
Loading