0958-AH · solonkarapa · Nov 7, 2022 · Nov 8, 2022 · Nov 11, 2022 · Nov 18, 2022
diff --git a/.DS_Store b/.DS_Store
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,4 @@
+.Rproj.user
+.Rhistory
+.RData
+.Ruserdata
diff --git a/AH_code.Rproj b/AH_code.Rproj
@@ -0,0 +1,13 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 4
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
diff --git a/NB/nb_boot.R b/NB/nb_boot.R
@@ -0,0 +1,176 @@
+
+# load libraries
+library(tidyverse)
+library(rsample)
+library(reshape2)
+library(ggplot2)
+library(survminer) # for plotting theme
+library(ggsci)  # for color palette
+
+# funs
+path_funs <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/NB"
+source(paste0(path_funs, "/net_benefit_fun.R"))
+source(paste0(path_funs, "/net_benefit_wrapper_fun.R"))
+
+# data
+path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/updating models"
+load(paste0(path_data, "/recalibrated_models_default.Rdata"))
+
+test.data.short <- test.data %>% mutate(lille.death.risk = 1 - Lille.surv,
+                                        lille.death.risk.updated = 1 -lille.surv.updated,
+                                        CLIF.death.risk = 1 - CLIF.surv,
+                                        clif.death.risk.updated = 1 - clif.surv.updated,
+                                        meld.death.risk = 1 - MELD.surv,
+                                        meld.death.risk.updated = 1 - meld.surv.updated,
+                                        meld.death.risk2 = 1 - MELD.surv2)
+
+##################################################################    
+############### Calculate bootstrapped NB diff  ################## 
+##################################################################  
+thresholds = seq(0.01, 0.99, 0.04)
+n_boot <- 1000
+
+
+set.seed(353)
+bt_resamples <- bootstraps(test.data.short, times = n_boot, strata = "D90_DTH")
+
+# models 
+m_lille <- map_df(bt_resamples$splits, net_benefit_treated_wrapper, model = "Lille")
+m_lille$boot_id <- rep(1:n_boot, each = length(thresholds) * 2)
+
+m_cliff <- map_df(bt_resamples$splits, net_benefit_treated_wrapper, model = "CLIF-C ACLF")
+m_cliff$boot_id <- rep(1:n_boot, each = length(thresholds) * 2)
+
+meld1 <- map_df(bt_resamples$splits, net_benefit_treated_wrapper, model = "MELD 1")
+meld1$boot_id <- rep(1:n_boot, each = length(thresholds) * 2)
+
+meld2 <- map_df(bt_resamples$splits, net_benefit_treated_wrapper, model = "MELD 2")
+meld2$boot_id <- rep(1:n_boot, each = length(thresholds) * 2)
+
+combined_df <- rbind(m_lille, m_cliff, meld1, meld2)
+
+alpha <- 0.05 # significance level
+
+data_wide <- dcast(combined_df, threshold + model + boot_id ~ status, value.var = "NB") %>%
+    group_by(threshold, boot_id, model) %>% 
+    mutate(diff_NB = Original - Updated) %>%
+    ungroup() %>%
+    group_by(threshold, model) %>%
+    summarize(meanNB_diff = mean(diff_NB),
+              low = quantile(diff_NB, alpha / 2),
+              high = quantile(diff_NB, 1 - alpha / 2))
+
+#############################################   
+###################### Plots  ###############
+############################################# 
+
+# Original vs Updated comparison plots
+combined_df %>% group_by(threshold, status, model) %>% 
+    summarize(meanNB = mean(NB),
+              low = quantile(NB, alpha/2, na.rm = T),
+              high = quantile(NB, 1 - alpha/2, na.rm = T),
+              mean_NB_all = mean(NB_all)) %>%
+    ggplot(aes(x = threshold, y = meanNB)) +
+    geom_line(aes(col = status)) +
+    #geom_ribbon(aes(ymin = low, ymax = high, fill = model, linetype = NA),  
+    #            alpha = 0.3, show.legend = F) + 
+    geom_line(aes(y = mean_NB_all), linetype = "dashed") +
+    geom_hline(yintercept = 0, linetype = "dotted") + 
+    coord_cartesian(ylim = c(-0.05, 0.4)) +
+    facet_wrap(.~ model) +
+    theme_classic2() 
+
+# Updated comparison plots
+updated <- combined_df %>% 
+    filter(model %in% c("CLIF-C ACLF", "Lille", "MELD 1")) %>%
+    group_by(threshold, status, model) %>% 
+    summarize(meanNB = mean(NB),
+              low = quantile(NB, alpha/2, na.rm = T),
+              high = quantile(NB, 1 - alpha/2, na.rm = T),
+              mean_NB_all = mean(NB_all)) %>%
+    filter(status == "Updated") %>%
+    mutate(model = ifelse(model == "MELD 1", "MELD", model)) %>%
+    ggplot(aes(x = threshold, y = meanNB)) +
+    geom_line(aes(col = model), lwd = 1) +
+    #geom_ribbon(aes(ymin = low, ymax = high, fill = model, linetype = NA),  
+    #            alpha = 0.3, show.legend = F) + 
+    geom_line(aes(y = mean_NB_all), linetype = "dashed") +
+    geom_hline(yintercept = 0, linetype = "dotted") + 
+    coord_cartesian(ylim = c(-0.05, 0.3)) +
+    labs(col = "Updated Scores", y = "Net Benefit", x = "Threshold Risk probability") +
+    #facet_wrap(.~ model) +
+    scale_color_brewer(palette = "Dark2") +
+    scale_fill_brewer(palette = "Dark2") +
+    theme_classic2() 
+
+
+# Updated original plots
+library(RColorBrewer)
+customs_cols <- brewer.pal(6, "Dark2")
+customs_cols
+
+original <- combined_df %>% 
+    #filter(model %in% c("CLIF-C ACLF", "Lille", "MELD 1")) %>%
+    group_by(threshold, status, model) %>% 
+    summarize(meanNB = mean(NB),
+              low = quantile(NB, alpha/2, na.rm = T),
+              high = quantile(NB, 1 - alpha/2, na.rm = T),
+              mean_NB_all = mean(NB_all)) %>%
+    filter(status == "Original") %>%
+    #mutate(model = ifelse(model == "MELD 1", "MELD", model)) %>%
+    ggplot(aes(x = threshold, y = meanNB)) +
+    geom_line(aes(col = model), lwd = 1) +
+    #geom_ribbon(aes(ymin = low, ymax = high, fill = model, linetype = NA),  
+    #            alpha = 0.3, show.legend = F) + 
+    geom_line(aes(y = mean_NB_all), linetype = "dashed") +
+    geom_hline(yintercept = 0, linetype = "dotted") + 
+    coord_cartesian(ylim = c(-0.05, 0.3)) +
+    labs(col = "Original Scores", y = "Net Benefit", x = "Threshold Risk probability") +
+    #facet_wrap(.~ model) +
+    #scale_color_brewer(palette = "Dark2", type = "diverging") +
+    scale_color_manual(values = customs_cols[-3]) +
+    theme_classic2() 
+
+combined_df %>% 
+    #filter(model %in% c("CLIF-C ACLF", "Lille", "MELD 1")) %>%
+    group_by(threshold, status, model) %>% 
+    summarize(meanNB = mean(NB),
+              low = quantile(NB, alpha/2, na.rm = T),
+              high = quantile(NB, 1 - alpha/2, na.rm = T),
+              mean_NB_all = mean(NB_all), 
+              mean_FPR = mean(FPR)) %>% 
+    #filter(status == "Original") %>% 
+    filter(model == "Lille" & threshold == 0.45)
+
+data_wide %>% filter(model == "Lille" & threshold == 0.45)
+
+tp <- sum(test.data.short$lille.death.risk >= 0.45 & test.data.short$D90_DTH == 1)
+tp    
+fp <- sum(test.data.short$lille.death.risk >= 0.45 & test.data.short$D90_DTH == 0)
+fp
+fp/tp
+
+tp <- sum(test.data.short$lille.death.risk.updated >= 0.45 & test.data.short$D90_DTH == 1)
+tp    
+fp <- sum(test.data.short$lille.death.risk.update >= 0.45 & test.data.short$D90_DTH == 0)
+fp
+
+fp/tp
+
+
+
+#################
+ggplot(data_wide, aes(x = threshold, y = meanNB_diff)) +
+    geom_line(lwd = 1) +
+    geom_ribbon(aes(ymin = low, ymax = high, linetype = NA),  
+                alpha = 0.3, show.legend = F) +
+    geom_hline(yintercept = 0, linetype = "dashed") +
+    labs(y = "Difference in Net Benefit", x = "Threshold Risk Probability") +
+    facet_wrap(.~ model, scales = "free_y") +
+    theme_classic2()
+
+# lille threshold 
+data_wide %>% filter(model == "Lille" & threshold == 0.45)
+
+data_wide %>% filter(model == "MELD 1" & threshold == 0.45)
+#################
diff --git a/NB/net_benefit_fun.R b/NB/net_benefit_fun.R
@@ -0,0 +1,82 @@
+# function to calculate the NB of the treated i.e those 
+# receiving treatment (risk > risk_threshold)
+net_benefit_treated <- function(pred_y, obs_y, risk_threshold){
+    # pred_y = (vector) of the predicted probabilty of outcome, must be 0 < pred_y < 1
+    # obs_y = (vector) of the observed outcome, must be 1 = event and 0 = non-event 
+    # risk_threshold = (vector) of the risk thresholds, must be 0 < risk_threshold < 1 
+
+    # start checks
+    # if (!all(pred_y >= 0) | !all(pred_y <= 1)){
+    #    stop("The predicted values must range between 0 and 1")
+    # }
+
+    if (!all(between(pred_y, 0, 1))){
+        stop("The predicted values must range between 0 and 1")
+    }
+
+    if (!all(obs_y == 0 | obs_y == 1)){
+        stop("Outcome must be coded 0 (non event) and 1 (event)")
+    }
+
+    # if (!all(risk_threshold >= 0) | !all(risk_threshold <= 1)){
+    #    stop("The risk threshold must range between 0 and 1")
+    # }
+
+    if (!all(between(risk_threshold, 0, 1))){
+        stop("The risk threshold must range between 0 and 1")
+    }
+
+    # end checks
+
+    # number of observations
+    N <- length(obs_y)
+
+    # prevalence Pr(obs_y = 1)
+    rho = mean(obs_y == 1)
+
+    # initialize empty vectors
+    tpr <- numeric(length(risk_threshold))
+    fpr <- numeric(length(risk_threshold))
+    nb <- numeric(length(risk_threshold))
+    snb <- numeric(length(risk_threshold))
+    U_all <- numeric(length(risk_threshold))
+    sU_all <- numeric(length(risk_threshold))
+
+    # loop through each risk threshold and calcualte tpr, fpr, nb, snb, U_all, sU_all
+    for(i in 1:length(risk_threshold)){
+
+        # true positive rate Pr(pred_y > risk_threshold | obs_y = 1)
+        tpr[i] <- sum(pred_y >= risk_threshold[i] & obs_y == 1)/sum(obs_y == 1)
+
+        # false positive rate Pr(pred_y > risk_threshold | obs_y = 0)
+        fpr[i] <- sum(pred_y >= risk_threshold[i] & obs_y == 0)/sum(obs_y == 0) # check again it works ok 
+
+        # net benefit
+        nb[i] = tpr[i] * rho - (risk_threshold[i]/(1 - risk_threshold[i])) * (1 - rho) * fpr[i]
+
+        # standardized net benefit
+        snb[i] = nb[i]/rho
+
+        # treat all NB = (U_all - U_none)
+        U_all[i] = rho - (1 - rho) * (risk_threshold[i]/(1- risk_threshold[i]))
+
+        # treat all sNB 
+        sU_all[i] = U_all[i]/rho
+
+    }
+
+    # expected utility treat none
+    U_none = 0
+
+    output  = data.frame("threshold" = risk_threshold,
+                         "TPR" = tpr,
+                         "FPR" = fpr,
+                         "NB" = nb,
+                         "sNB" = snb,
+                         "rho" = rho,
+                         "NB_none" = U_none, 
+                         "NB_all" = U_all,
+                         "sNB_all" = sU_all)
+    return(output)
+
+}
diff --git a/NB/net_benefit_wrapper_fun.R b/NB/net_benefit_wrapper_fun.R
@@ -0,0 +1,28 @@
+
+
+net_benefit_treated_wrapper <- function(splits, model){
+
+    x <- analysis(splits)
+
+    if (model == "Lille"){
+        nb_original <- net_benefit_treated(x$lille.death.risk, x$D90_DTH, risk_threshold = thresholds)
+        nb_update <- net_benefit_treated(x$lille.death.risk.updated, x$D90_DTH, risk_threshold = thresholds)
+    } else if (model == "CLIF-C ACLF"){
+        nb_original <- net_benefit_treated(x$CLIF.death.risk, x$D90_DTH, risk_threshold = thresholds)
+        nb_update <- net_benefit_treated(x$clif.death.risk.updated, x$D90_DTH, risk_threshold = thresholds)
+    } else if ( model == "MELD 1"){
+        nb_original <- net_benefit_treated(x$meld.death.risk, x$D90_DTH, risk_threshold = thresholds)
+        nb_update <- net_benefit_treated(x$meld.death.risk.updated, x$D90_DTH, risk_threshold = thresholds)
+    } else if ( model == "MELD 2"){
+        nb_original <- net_benefit_treated(x$meld.death.risk2, x$D90_DTH, risk_threshold = thresholds)
+        nb_update <- net_benefit_treated(x$meld.death.risk.updated, x$D90_DTH, risk_threshold = thresholds)
+    }
+
+    nb_original$status <- "Original"
+    nb_update$status <- "Updated"
+
+    nb <- rbind(nb_original, nb_update)
+    nb$model <- model
+
+    return(nb)
+}
diff --git a/README.md b/README.md
@@ -23,4 +23,9 @@ The repository contains several folders, with the following files in them:
      - subgroup_comparison.R. File that is used to analyse model performance in sub-groups of patients. 
 
 In order to perform the analysis, the files import_data.R and prognostic_scores.R should always be run first. Generally, files may only work after another file is run first. As the data is not published, however, I would recommend using this code as a guideline for comparable analyses, rather than as a perfect example of how model validations and comparisons should always be done. 
+<<<<<<< HEAD
+
+---- 
+=======
+>>>>>>> origin
 
diff --git a/grid_arrange_fun.R b/grid_arrange_fun.R
@@ -0,0 +1,44 @@
+
+# https://cran.r-project.org/web/packages/egg/vignettes/Ecosystem.html
+
+grid_arrange_shared_legend <-
+    function(...,
+             ncol = length(list(...)),
+             nrow = 1,
+             position = c("bottom", "right")) {
+
+        plots <- list(...)
+        position <- match.arg(position)
+        g <-
+            ggplotGrob(plots[[1]] + theme(legend.position = position))$grobs
+        legend <- g[[which(sapply(g, function(x)
+            x$name) == "guide-box")]]
+        lheight <- sum(legend$height)
+        lwidth <- sum(legend$width)
+        gl <- lapply(plots, function(x)
+            x + theme(legend.position = "none"))
+        gl <- c(gl, ncol = ncol, nrow = nrow)
+
+        combined <- switch(
+            position,
+            "bottom" = arrangeGrob(
+                do.call(arrangeGrob, gl),
+                legend,
+                ncol = 1,
+                heights = unit.c(unit(1, "npc") - lheight, lheight)
+            ),
+            "right" = arrangeGrob(
+                do.call(arrangeGrob, gl),
+                legend,
+                ncol = 2,
+                widths = unit.c(unit(1, "npc") - lwidth, lwidth)
+            )
+        )
+
+        grid.newpage()
+        grid.draw(combined)
+
+        # return gtable invisibly
+        invisible(combined)
+
+    }