diff --git a/.DS_Store b/.DS_Store
new file mode 100755
index 0000000..32a24ca
Binary files /dev/null and b/.DS_Store differ
diff --git a/.gitignore b/.gitignore
new file mode 100755
index 0000000..5b6a065
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+.Rproj.user
+.Rhistory
+.RData
+.Ruserdata
diff --git a/AH_code.Rproj b/AH_code.Rproj
new file mode 100755
index 0000000..066341e
--- /dev/null
+++ b/AH_code.Rproj
@@ -0,0 +1,13 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 4
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
diff --git a/NB/nb_boot.R b/NB/nb_boot.R
new file mode 100644
index 0000000..587762f
--- /dev/null
+++ b/NB/nb_boot.R
@@ -0,0 +1,176 @@
+
+# load libraries
+library(tidyverse)
+library(rsample)
+library(reshape2)
+library(ggplot2)
+library(survminer) # for plotting theme
+library(ggsci)  # for color palette
+
+# funs
+path_funs <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/NB"
+source(paste0(path_funs, "/net_benefit_fun.R"))
+source(paste0(path_funs, "/net_benefit_wrapper_fun.R"))
+
+# data
+path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/updating models"
+load(paste0(path_data, "/recalibrated_models_default.Rdata"))
+
+test.data.short <- test.data %>% mutate(lille.death.risk = 1 - Lille.surv,
+                                        lille.death.risk.updated = 1 -lille.surv.updated,
+                                        CLIF.death.risk = 1 - CLIF.surv,
+                                        clif.death.risk.updated = 1 - clif.surv.updated,
+                                        meld.death.risk = 1 - MELD.surv,
+                                        meld.death.risk.updated = 1 - meld.surv.updated,
+                                        meld.death.risk2 = 1 - MELD.surv2)
+
+##################################################################    
+############### Calculate bootstrapped NB diff  ################## 
+##################################################################  
+thresholds = seq(0.01, 0.99, 0.04)
+n_boot <- 1000
+
+
+set.seed(353)
+bt_resamples <- bootstraps(test.data.short, times = n_boot, strata = "D90_DTH")
+
+# models 
+m_lille <- map_df(bt_resamples$splits, net_benefit_treated_wrapper, model = "Lille")
+m_lille$boot_id <- rep(1:n_boot, each = length(thresholds) * 2)
+
+m_cliff <- map_df(bt_resamples$splits, net_benefit_treated_wrapper, model = "CLIF-C ACLF")
+m_cliff$boot_id <- rep(1:n_boot, each = length(thresholds) * 2)
+
+meld1 <- map_df(bt_resamples$splits, net_benefit_treated_wrapper, model = "MELD 1")
+meld1$boot_id <- rep(1:n_boot, each = length(thresholds) * 2)
+
+meld2 <- map_df(bt_resamples$splits, net_benefit_treated_wrapper, model = "MELD 2")
+meld2$boot_id <- rep(1:n_boot, each = length(thresholds) * 2)
+
+combined_df <- rbind(m_lille, m_cliff, meld1, meld2)
+
+alpha <- 0.05 # significance level
+
+data_wide <- dcast(combined_df, threshold + model + boot_id ~ status, value.var = "NB") %>%
+    group_by(threshold, boot_id, model) %>% 
+    mutate(diff_NB = Original - Updated) %>%
+    ungroup() %>%
+    group_by(threshold, model) %>%
+    summarize(meanNB_diff = mean(diff_NB),
+              low = quantile(diff_NB, alpha / 2),
+              high = quantile(diff_NB, 1 - alpha / 2))
+
+#############################################   
+###################### Plots  ###############
+############################################# 
+
+# Original vs Updated comparison plots
+combined_df %>% group_by(threshold, status, model) %>% 
+    summarize(meanNB = mean(NB),
+              low = quantile(NB, alpha/2, na.rm = T),
+              high = quantile(NB, 1 - alpha/2, na.rm = T),
+              mean_NB_all = mean(NB_all)) %>%
+    ggplot(aes(x = threshold, y = meanNB)) +
+    geom_line(aes(col = status)) +
+    #geom_ribbon(aes(ymin = low, ymax = high, fill = model, linetype = NA),  
+    #            alpha = 0.3, show.legend = F) + 
+    geom_line(aes(y = mean_NB_all), linetype = "dashed") +
+    geom_hline(yintercept = 0, linetype = "dotted") + 
+    coord_cartesian(ylim = c(-0.05, 0.4)) +
+    facet_wrap(.~ model) +
+    theme_classic2() 
+
+# Updated comparison plots
+updated <- combined_df %>% 
+    filter(model %in% c("CLIF-C ACLF", "Lille", "MELD 1")) %>%
+    group_by(threshold, status, model) %>% 
+    summarize(meanNB = mean(NB),
+              low = quantile(NB, alpha/2, na.rm = T),
+              high = quantile(NB, 1 - alpha/2, na.rm = T),
+              mean_NB_all = mean(NB_all)) %>%
+    filter(status == "Updated") %>%
+    mutate(model = ifelse(model == "MELD 1", "MELD", model)) %>%
+    ggplot(aes(x = threshold, y = meanNB)) +
+    geom_line(aes(col = model), lwd = 1) +
+    #geom_ribbon(aes(ymin = low, ymax = high, fill = model, linetype = NA),  
+    #            alpha = 0.3, show.legend = F) + 
+    geom_line(aes(y = mean_NB_all), linetype = "dashed") +
+    geom_hline(yintercept = 0, linetype = "dotted") + 
+    coord_cartesian(ylim = c(-0.05, 0.3)) +
+    labs(col = "Updated Scores", y = "Net Benefit", x = "Threshold Risk probability") +
+    #facet_wrap(.~ model) +
+    scale_color_brewer(palette = "Dark2") +
+    scale_fill_brewer(palette = "Dark2") +
+    theme_classic2() 
+
+
+# Updated original plots
+library(RColorBrewer)
+customs_cols <- brewer.pal(6, "Dark2")
+customs_cols
+
+original <- combined_df %>% 
+    #filter(model %in% c("CLIF-C ACLF", "Lille", "MELD 1")) %>%
+    group_by(threshold, status, model) %>% 
+    summarize(meanNB = mean(NB),
+              low = quantile(NB, alpha/2, na.rm = T),
+              high = quantile(NB, 1 - alpha/2, na.rm = T),
+              mean_NB_all = mean(NB_all)) %>%
+    filter(status == "Original") %>%
+    #mutate(model = ifelse(model == "MELD 1", "MELD", model)) %>%
+    ggplot(aes(x = threshold, y = meanNB)) +
+    geom_line(aes(col = model), lwd = 1) +
+    #geom_ribbon(aes(ymin = low, ymax = high, fill = model, linetype = NA),  
+    #            alpha = 0.3, show.legend = F) + 
+    geom_line(aes(y = mean_NB_all), linetype = "dashed") +
+    geom_hline(yintercept = 0, linetype = "dotted") + 
+    coord_cartesian(ylim = c(-0.05, 0.3)) +
+    labs(col = "Original Scores", y = "Net Benefit", x = "Threshold Risk probability") +
+    #facet_wrap(.~ model) +
+    #scale_color_brewer(palette = "Dark2", type = "diverging") +
+    scale_color_manual(values = customs_cols[-3]) +
+    theme_classic2() 
+
+combined_df %>% 
+    #filter(model %in% c("CLIF-C ACLF", "Lille", "MELD 1")) %>%
+    group_by(threshold, status, model) %>% 
+    summarize(meanNB = mean(NB),
+              low = quantile(NB, alpha/2, na.rm = T),
+              high = quantile(NB, 1 - alpha/2, na.rm = T),
+              mean_NB_all = mean(NB_all), 
+              mean_FPR = mean(FPR)) %>% 
+    #filter(status == "Original") %>% 
+    filter(model == "Lille" & threshold == 0.45)
+
+data_wide %>% filter(model == "Lille" & threshold == 0.45)
+
+tp <- sum(test.data.short$lille.death.risk >= 0.45 & test.data.short$D90_DTH == 1)
+tp    
+fp <- sum(test.data.short$lille.death.risk >= 0.45 & test.data.short$D90_DTH == 0)
+fp
+fp/tp
+
+tp <- sum(test.data.short$lille.death.risk.updated >= 0.45 & test.data.short$D90_DTH == 1)
+tp    
+fp <- sum(test.data.short$lille.death.risk.update >= 0.45 & test.data.short$D90_DTH == 0)
+fp
+
+fp/tp
+
+
+
+#################
+ggplot(data_wide, aes(x = threshold, y = meanNB_diff)) +
+    geom_line(lwd = 1) +
+    geom_ribbon(aes(ymin = low, ymax = high, linetype = NA),  
+                alpha = 0.3, show.legend = F) +
+    geom_hline(yintercept = 0, linetype = "dashed") +
+    labs(y = "Difference in Net Benefit", x = "Threshold Risk Probability") +
+    facet_wrap(.~ model, scales = "free_y") +
+    theme_classic2()
+
+# lille threshold 
+data_wide %>% filter(model == "Lille" & threshold == 0.45)
+
+data_wide %>% filter(model == "MELD 1" & threshold == 0.45)
+#################
\ No newline at end of file
diff --git a/NB/net_benefit_fun.R b/NB/net_benefit_fun.R
new file mode 100644
index 0000000..4601779
--- /dev/null
+++ b/NB/net_benefit_fun.R
@@ -0,0 +1,82 @@
+# function to calculate the NB of the treated i.e those 
+# receiving treatment (risk > risk_threshold)
+net_benefit_treated <- function(pred_y, obs_y, risk_threshold){
+    # pred_y = (vector) of the predicted probabilty of outcome, must be 0 < pred_y < 1
+    # obs_y = (vector) of the observed outcome, must be 1 = event and 0 = non-event 
+    # risk_threshold = (vector) of the risk thresholds, must be 0 < risk_threshold < 1 
+    
+    # start checks
+    # if (!all(pred_y >= 0) | !all(pred_y <= 1)){
+    #    stop("The predicted values must range between 0 and 1")
+    # }
+    
+    if (!all(between(pred_y, 0, 1))){
+        stop("The predicted values must range between 0 and 1")
+    }
+    
+    if (!all(obs_y == 0 | obs_y == 1)){
+        stop("Outcome must be coded 0 (non event) and 1 (event)")
+    }
+    
+    # if (!all(risk_threshold >= 0) | !all(risk_threshold <= 1)){
+    #    stop("The risk threshold must range between 0 and 1")
+    # }
+    
+    if (!all(between(risk_threshold, 0, 1))){
+        stop("The risk threshold must range between 0 and 1")
+    }
+    
+    # end checks
+    
+    # number of observations
+    N <- length(obs_y)
+    
+    # prevalence Pr(obs_y = 1)
+    rho = mean(obs_y == 1)
+    
+    # initialize empty vectors
+    tpr <- numeric(length(risk_threshold))
+    fpr <- numeric(length(risk_threshold))
+    nb <- numeric(length(risk_threshold))
+    snb <- numeric(length(risk_threshold))
+    U_all <- numeric(length(risk_threshold))
+    sU_all <- numeric(length(risk_threshold))
+    
+    # loop through each risk threshold and calcualte tpr, fpr, nb, snb, U_all, sU_all
+    for(i in 1:length(risk_threshold)){
+        
+        # true positive rate Pr(pred_y > risk_threshold | obs_y = 1)
+        tpr[i] <- sum(pred_y >= risk_threshold[i] & obs_y == 1)/sum(obs_y == 1)
+        
+        # false positive rate Pr(pred_y > risk_threshold | obs_y = 0)
+        fpr[i] <- sum(pred_y >= risk_threshold[i] & obs_y == 0)/sum(obs_y == 0) # check again it works ok 
+        
+        # net benefit
+        nb[i] = tpr[i] * rho - (risk_threshold[i]/(1 - risk_threshold[i])) * (1 - rho) * fpr[i]
+        
+        # standardized net benefit
+        snb[i] = nb[i]/rho
+        
+        # treat all NB = (U_all - U_none)
+        U_all[i] = rho - (1 - rho) * (risk_threshold[i]/(1- risk_threshold[i]))
+        
+        # treat all sNB 
+        sU_all[i] = U_all[i]/rho
+        
+    }
+    
+    # expected utility treat none
+    U_none = 0
+    
+    output  = data.frame("threshold" = risk_threshold,
+                         "TPR" = tpr,
+                         "FPR" = fpr,
+                         "NB" = nb,
+                         "sNB" = snb,
+                         "rho" = rho,
+                         "NB_none" = U_none, 
+                         "NB_all" = U_all,
+                         "sNB_all" = sU_all)
+    return(output)
+    
+}
diff --git a/NB/net_benefit_wrapper_fun.R b/NB/net_benefit_wrapper_fun.R
new file mode 100644
index 0000000..ed4d29c
--- /dev/null
+++ b/NB/net_benefit_wrapper_fun.R
@@ -0,0 +1,28 @@
+
+
+net_benefit_treated_wrapper <- function(splits, model){
+    
+    x <- analysis(splits)
+    
+    if (model == "Lille"){
+        nb_original <- net_benefit_treated(x$lille.death.risk, x$D90_DTH, risk_threshold = thresholds)
+        nb_update <- net_benefit_treated(x$lille.death.risk.updated, x$D90_DTH, risk_threshold = thresholds)
+    } else if (model == "CLIF-C ACLF"){
+        nb_original <- net_benefit_treated(x$CLIF.death.risk, x$D90_DTH, risk_threshold = thresholds)
+        nb_update <- net_benefit_treated(x$clif.death.risk.updated, x$D90_DTH, risk_threshold = thresholds)
+    } else if ( model == "MELD 1"){
+        nb_original <- net_benefit_treated(x$meld.death.risk, x$D90_DTH, risk_threshold = thresholds)
+        nb_update <- net_benefit_treated(x$meld.death.risk.updated, x$D90_DTH, risk_threshold = thresholds)
+    } else if ( model == "MELD 2"){
+        nb_original <- net_benefit_treated(x$meld.death.risk2, x$D90_DTH, risk_threshold = thresholds)
+        nb_update <- net_benefit_treated(x$meld.death.risk.updated, x$D90_DTH, risk_threshold = thresholds)
+    }
+    
+    nb_original$status <- "Original"
+    nb_update$status <- "Updated"
+    
+    nb <- rbind(nb_original, nb_update)
+    nb$model <- model
+    
+    return(nb)
+}
diff --git a/README.md b/README.md
old mode 100644
new mode 100755
index be637f8..4acf3bb
--- a/README.md
+++ b/README.md
@@ -23,4 +23,9 @@ The repository contains several folders, with the following files in them:
      - subgroup_comparison.R. File that is used to analyse model performance in sub-groups of patients. 
 
 In order to perform the analysis, the files import_data.R and prognostic_scores.R should always be run first. Generally, files may only work after another file is run first. As the data is not published, however, I would recommend using this code as a guideline for comparable analyses, rather than as a perfect example of how model validations and comparisons should always be done. 
+<<<<<<< HEAD
+
+---- 
+=======
+>>>>>>> origin
     
diff --git a/grid_arrange_fun.R b/grid_arrange_fun.R
new file mode 100644
index 0000000..9343696
--- /dev/null
+++ b/grid_arrange_fun.R
@@ -0,0 +1,44 @@
+
+# https://cran.r-project.org/web/packages/egg/vignettes/Ecosystem.html
+
+grid_arrange_shared_legend <-
+    function(...,
+             ncol = length(list(...)),
+             nrow = 1,
+             position = c("bottom", "right")) {
+        
+        plots <- list(...)
+        position <- match.arg(position)
+        g <-
+            ggplotGrob(plots[[1]] + theme(legend.position = position))$grobs
+        legend <- g[[which(sapply(g, function(x)
+            x$name) == "guide-box")]]
+        lheight <- sum(legend$height)
+        lwidth <- sum(legend$width)
+        gl <- lapply(plots, function(x)
+            x + theme(legend.position = "none"))
+        gl <- c(gl, ncol = ncol, nrow = nrow)
+        
+        combined <- switch(
+            position,
+            "bottom" = arrangeGrob(
+                do.call(arrangeGrob, gl),
+                legend,
+                ncol = 1,
+                heights = unit.c(unit(1, "npc") - lheight, lheight)
+            ),
+            "right" = arrangeGrob(
+                do.call(arrangeGrob, gl),
+                legend,
+                ncol = 2,
+                widths = unit.c(unit(1, "npc") - lwidth, lwidth)
+            )
+        )
+        
+        grid.newpage()
+        grid.draw(combined)
+        
+        # return gtable invisibly
+        invisible(combined)
+        
+    }
diff --git a/original models/Calibration.R b/original models/Calibration.R
old mode 100644
new mode 100755
index 507a51e..20440f2
--- a/original models/Calibration.R	
+++ b/original models/Calibration.R	
@@ -1,36 +1,122 @@
 # This script performs all calculations of model calibration
-library(rms)
+library(ggplot2)
+library(dplyr)
+library(survminer) # for plotting theme
+
+# funs
+path_funs <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/funs"
+source(paste0(path_funs, "/calibration_fun.R"))
+source(paste0(path_funs, "/val.prob.ci.2_wrapper.R"))
+
+# data
+path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/original models/"
+load(paste0(path_data, "original_models.Rdata"))
+
+#############################################   
+############### Calculate calibration  ######
+#############################################   
 
-# Create calibration plots and compute corresponding slopes and intercepts for all models
 # MELD_1 survival function
-val.prob(stph.meld$MELD.surv, stph.meld$D90_surv, pl = TRUE, smooth = FALSE, logistic.cal = TRUE,
-         xlab = "Predicted survival probability", ylab = "Actual survival probability",
-         legendloc = FALSE, statloc = FALSE)
+cal_MELD.surv <- calibration(stph.meld$MELD.surv, y = stph.meld$D90_surv)
+cal_MELD.surv$Score <- "MELD_1"
 
 # MELD_2 survival function
-val.prob(stph.meld$MELD.surv2, stph.meld$D90_surv, pl = TRUE, smooth = FALSE, logistic.cal = TRUE,
-         xlab = "Predicted survival probability", ylab = "Actual survival probability",
-         legendloc = FALSE, statloc = FALSE)
+cal_MELD.surv2 <- calibration(stph.meld$MELD.surv2, y = stph.meld$D90_surv)
+cal_MELD.surv2$Score <- "MELD_2"
 
-# MELD 3.0
-val.prob(stph.meld$MELD3.surv, stph.meld$D90_surv, pl = TRUE, smooth = FALSE, logistic.cal = TRUE,
-         xlab = "Predicted survival probability", ylab = "Actual survival probability",
-         legendloc = FALSE, statloc = FALSE)
+# MELD VanDerwerken 
+cal_MELD.VanDerwerken <- calibration(stph.meld$MELD_Van, y = stph.meld$D90_surv)
+cal_MELD.VanDerwerken$Score <- "MELD VanDerwerken"
 
+# MELD 3.0
+sum(is.na(stph.meld$MELD3.surv)) # 10 missing values due to Albumin.MELD
+cal_MELD3.surv <- calibration(stph.meld$MELD3.surv[!is.na(stph.meld$MELD3.surv)], y = stph.meld$D90_surv[!is.na(stph.meld$MELD3.surv)])
+cal_MELD3.surv$Score <- "MELD 3.0"
+    
 # Lille
-val.prob(stph.lille$Lille.surv, stph.lille$D90_surv, pl = TRUE, smooth = FALSE, logistic.cal = TRUE,
-         xlab = "Predicted survival probability", ylab = "Actual survival probability",
-         legendloc = FALSE, statloc = FALSE)
+cal_Lille <- calibration(stph.lille$Lille.surv, y = stph.lille$D90_surv)
+cal_Lille$Score <- "Lille"
 
 # CLIF-C ACLF
-val.prob(stph.clif$CLIF.surv, stph.clif$D90_surv, pl = TRUE, smooth = FALSE, logistic.cal = TRUE,
-         xlab = "Predicted survival probability", ylab = "Actual survival probability",
-         legendloc = FALSE, statloc = FALSE)
+cal_CLIF <- calibration(stph.clif$CLIF.surv, y = stph.clif$D90_surv)
+cal_CLIF$Score <- "CLIF-C ACLF"
+
+# combine dfs
+df_cal <- rbind(cal_MELD.surv, cal_MELD.surv2, cal_MELD.VanDerwerken, cal_MELD3.surv, cal_Lille, cal_CLIF)
+
+#############################################   
+###################### Plots  ###############
+############################################# 
+
+# plot without ribbon and without MELD 3.0
+df_cal %>% filter(Score != "MELD 3.0") %>%
+    ggplot(., aes(x = pred, y = obs, col = Score)) +
+    geom_line(lwd = 1)  + 
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    #geom_ribbon(aes(ymin = lower, ymax = upper, linetype = NA), 
+    #            alpha = 0.3, show.legend = F) + 
+    #scale_fill_manual("", values = col) + 
+    #scale_color_manual(name = "Score", values = col) + 
+    xlim(0, 1) + 
+    ylim(0, 1) + 
+    ylab("Observed proportion") + 
+    xlab("Predicted probability") + 
+    theme_classic() 
+
+# plot with ribbon 
+p_calibration <- df_cal %>% filter(Score != "MELD 3.0") %>%
+    ggplot(., aes(x = pred, y = obs, col = Score)) +
+    facet_grid(. ~ Score) +
+    geom_line(lwd = 1)  + 
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    geom_ribbon(aes(ymin = lower, ymax = upper, fill = Score, linetype = NA),  
+                alpha = 0.3, show.legend = F) + 
+    #scale_fill_manual("", values = col) + 
+    #scale_color_manual(name = "Score", values = col) + 
+    scale_color_brewer(palette = "Dark2") +
+    scale_fill_brewer(palette = "Dark2") +
+    coord_equal() +
+    xlim(0, 1) + 
+    ylim(0, 1) + 
+    labs(y = "Observed survival proportion", x = "Predicted survival probability") +
+    theme_classic2() +
+    theme(legend.position = "none")
+
+#############################################   
+############## Summary Stats  ###############
+############################################# 
+
+# compute calibration slopes and intercepts
+
+# MELD_1 survival function
+MELD1_stats <- val.prob.ci.2_wrapper(stph.meld$MELD.surv, stph.meld$D90_surv, "MELD_1")
+
+# MELD_2 survival function
+MELD2_stats <- val.prob.ci.2_wrapper(stph.meld$MELD.surv2, stph.meld$D90_surv, "MELD_2")
+
+# MELD VanDerwerken 
+MELD_Van_stats <- val.prob.ci.2_wrapper(stph.meld$MELD_Van, stph.meld$D90_surv, "MELD VanDerwerken")
+
+# Lille
+Lille_stats <- val.prob.ci.2_wrapper(stph.lille$Lille.surv, stph.lille$D90_surv, "Lille")
+
+#CLIF-C ACLF
+clif_stats <- val.prob.ci.2_wrapper(stph.clif$CLIF.surv, y = stph.clif$D90_surv, "CLIF-C ACLF")
+
+
+df_stats <- rbind(MELD1_stats, MELD2_stats, MELD_Van_stats, Lille_stats, clif_stats) %>% relocate(Score)
+
+#df_stats$Model <- fct_reorder(df_stats$Model, df_stats$ Point.estimate)
+# adding to plot
+#p_calibration + geom_text(data = df_stats %>% filter(stat == "intercept"), 
+#                  aes(0.1, 0.90, label = 
+#                          paste0("Int (95% CI): ", Point.estimate, " (", Lower.confidence.limit, "-", Upper.confidence.limit, ")"), 
+#                      hjust = 0), col = "black") #+
+
+
+library(xtable)
+xtable((df_stats))
+
+
 
-# Calculate the Hosmer-Lemeshow goodness-of-fit statistic for all models
-library(ResourceSelection)
 
-hoslem.test(stph.meld$D90_surv, stph.meld$MELD.surv)
-hoslem.test(stph.meld$D90_surv, stph.meld$MELD.surv2)
-hoslem.test(stph.lille$D90_surv, stph.lille$Lille.surv)
-hoslem.test(stph.clif$D90_surv, stph.clif$CLIF.surv)
diff --git a/original models/Calibration_6M.R b/original models/Calibration_6M.R
new file mode 100644
index 0000000..efec9f2
--- /dev/null
+++ b/original models/Calibration_6M.R	
@@ -0,0 +1,99 @@
+# This script performs all calculations of model calibration
+library(ggplot2)
+library(dplyr)
+library(survminer) # for plotting theme
+
+# funs
+path_funs <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/funs"
+source(paste0(path_funs, "/calibration_fun.R"))
+source(paste0(path_funs, "/val.prob.ci.2_wrapper.R"))
+
+# data
+path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/original models/"
+load(paste0(path_data, "original_models_6M.Rdata"))
+
+#############################################   
+############### Calculate calibration  ######
+#############################################   
+
+# MELD_1 survival function
+cal_MELD.surv <- calibration(stph.meld$MELD.surv_6M, y = stph.meld$M6_surv)
+cal_MELD.surv$Score <- "MELD_1"
+
+# Lille
+cal_Lille <- calibration(stph.lille$Lille.surv_6M, y = stph.lille$M6_surv)
+cal_Lille$Score <- "Lille"
+
+# CLIF-C ACLF
+cal_CLIF <- calibration(stph.clif$CLIF.surv_6M, y = stph.clif$M6_surv)
+cal_CLIF$Score <- "CLIF-C ACLF"
+
+# combine dfs
+df_cal <- rbind(cal_MELD.surv, cal_Lille, cal_CLIF)
+
+#############################################   
+###################### Plots  ###############
+############################################# 
+
+# plot without ribbon and without MELD 3.0
+df_cal %>% filter(Score != "MELD 3.0") %>%
+    ggplot(., aes(x = pred, y = obs, col = Score)) +
+    geom_line(lwd = 1)  + 
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    #geom_ribbon(aes(ymin = lower, ymax = upper, linetype = NA), 
+    #            alpha = 0.3, show.legend = F) + 
+    #scale_fill_manual("", values = col) + 
+    #scale_color_manual(name = "Score", values = col) + 
+    xlim(0, 1) + 
+    ylim(0, 1) + 
+    ylab("Observed proportion") + 
+    xlab("Predicted probability") + 
+    theme_classic() 
+
+# plot with ribbon 
+p_calibration <- df_cal %>% filter(Score != "MELD 3.0") %>%
+    ggplot(., aes(x = pred, y = obs, col = Score)) +
+    geom_line(lwd = 1)  + 
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    geom_ribbon(aes(ymin = lower, ymax = upper, fill = Score, linetype = NA),  
+                alpha = 0.3, show.legend = F) + 
+    #scale_fill_manual("", values = col) + 
+    #scale_color_manual(name = "Score", values = col) + 
+    facet_grid(. ~ Score) +
+    scale_color_manual(values = c("#1B9E77", "#D95F02", "#E7298A")) + #palette = "Dark2") +
+    scale_fill_manual(values = c("#1B9E77", "#D95F02", "#E7298A")) + #palette = "Dark2") +
+    coord_equal() +
+    xlim(0, 1) + 
+    ylim(0, 1) + 
+    labs(y = "Observed 6-month survival proportion", x = "Predicted 6-month survival probability") +
+    theme_classic2() +
+    theme(legend.position = "none")
+
+
+#############################################   
+############## Summary Stats  ###############
+############################################# 
+# compute calibration slopes and intercepts
+
+# MELD_1 survival function
+MELD_stats <- val.prob.ci.2_wrapper(stph.meld$MELD.surv_6M, stph.meld$M6_surv, "MELD_1")
+
+# Lille
+Lille_stats <- val.prob.ci.2_wrapper(stph.lille$Lille.surv_6M, stph.lille$M6_surv, "Lille")
+
+#CLIF-C ACLF
+clif_stats <- val.prob.ci.2_wrapper(stph.clif$CLIF.surv_6M, y = stph.clif$M6_surv, "CLIF-C ACLF")
+
+df_stats <- rbind(MELD_stats, Lille_stats, clif_stats) %>% relocate(Score)
+
+# plot
+p_calibration + geom_text(data = df_stats %>% filter(stat == "intercept"), 
+                          aes(0.05, 0.95, label = 
+                                  paste0("Intercept (95% CI): ", Point.estimate, " (", Lower.confidence.limit, "-", Upper.confidence.limit, ")"), 
+                              hjust = 0), col = "black") +
+    geom_text(data = df_stats %>% filter(stat == "slope"), 
+              aes(0.05, 0.90, label = 
+                      paste0("Slope (95% CI): ", Point.estimate, " (", Lower.confidence.limit, "-", Upper.confidence.limit, ")"), 
+                  hjust = 0), col = "black")
+
+
diff --git a/original models/Calibration_imp.R b/original models/Calibration_imp.R
new file mode 100644
index 0000000..5225924
--- /dev/null
+++ b/original models/Calibration_imp.R	
@@ -0,0 +1,104 @@
+# This script performs all calculations of model calibration
+library(ggplot2)
+library(dplyr)
+library(survminer) # for plotting theme
+
+# funs
+path_funs <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/funs"
+source(paste0(path_funs, "/calibration_fun.R"))
+
+# imputed data
+path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/original models/"
+load(paste0(path_data, "imputed_orig_scores.Rdata"))
+
+#############################################   
+############### Calculate calibration  ######
+#############################################   
+imp_index <- group_split(imp_data2, .imp) 
+
+imp_ind <- max(imp_data2$.imp)
+
+imp_index_Bili7 <- group_split(imp_data2_Biliday7, .imp) 
+
+imp_ind_Bili7 <- max(imp_data2_Biliday7$.imp)
+
+# MELD_1
+cal_MELD.surv1 <- tibble()
+for(i in 1:imp_ind){
+    # MELD 1
+    temp <- calibration(imp_index[[i]]$MELD.surv1, y = imp_index[[i]]$D90_surv)
+    temp$Score <- "MELD_1"
+    temp$.imp <- i
+    cal_MELD.surv1 <- rbind(cal_MELD.surv1, temp)
+}
+rm(temp)
+
+# MELD 2
+cal_MELD.surv2 <- tibble()
+for(i in 1:imp_ind){
+    temp <- calibration(imp_index[[i]]$MELD.surv2, y = imp_index[[i]]$D90_surv)
+    temp$Score <- "MELD_2"
+    temp$.imp <- i
+    cal_MELD.surv2 <- rbind(cal_MELD.surv2, temp)
+}
+
+rm(temp)
+
+# MELD VanDerwerken
+cal_MELD.VanDerwerken <- tibble()
+for(i in 1:imp_ind){
+    temp <- calibration(imp_index[[i]]$MELD_Van, y = imp_index[[i]]$D90_surv)
+    temp$Score <- "MELD VanDerwerken"
+    temp$.imp <- i
+    cal_MELD.VanDerwerken <- rbind(cal_MELD.VanDerwerken, temp)
+}
+
+rm(temp)
+
+# Lille
+cal_Lille <- tibble()
+for(i in 1:imp_ind_Bili7){
+    temp <- calibration(imp_index_Bili7[[i]]$Lille.surv, y = imp_index_Bili7[[i]]$D90_surv)
+    temp$Score <- "Lille"
+    temp$.imp <- i
+    cal_Lille <- rbind(cal_Lille, temp)
+}
+
+rm(temp)
+
+# "CLIF-C ACLF" 
+cal_CLIF <- tibble()
+for(i in 1:imp_ind){
+    temp <- calibration(imp_index[[i]]$CLIF.surv, y = imp_index[[i]]$D90_surv)
+    temp$Score <- "CLIF-C ACLF"
+    temp$.imp <- i
+    cal_CLIF <- rbind(cal_CLIF, temp)
+}
+
+# combine dfs
+df_cal <- rbind(cal_MELD.surv1, cal_MELD.surv2, cal_MELD.VanDerwerken, cal_Lille, cal_CLIF)
+
+#############################################   
+###################### Plots  ###############
+############################################# 
+# plot without ribbon 
+df_cal %>% 
+    ggplot(., aes(x = pred, y = obs, col = Score)) +
+    geom_line(aes(group = .imp), lwd = 0.7)  + 
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    #geom_ribbon(aes(ymin = lower, ymax = upper, linetype = NA), 
+    #            alpha = 0.3, show.legend = F) + 
+    #scale_fill_manual("", values = col) + 
+    #scale_color_manual(name = "Score", values = col) + 
+    scale_color_brewer(palette = "Dark2") +
+    scale_fill_brewer(palette = "Dark2") +
+    facet_grid(. ~ Score) +
+    xlim(0, 1) + 
+    ylim(0, 1) + 
+    ylab("Observed proportion") + 
+    xlab("Predicted probability") + 
+    theme_classic() +
+    theme(legend.position = "none")
+
+
+
diff --git a/original models/Calibration_imp_sens.R b/original models/Calibration_imp_sens.R
new file mode 100644
index 0000000..415e3f0
--- /dev/null
+++ b/original models/Calibration_imp_sens.R	
@@ -0,0 +1,124 @@
+
+# This script performs all calculations of model calibration
+library(ggplot2)
+library(dplyr)
+library(survminer) # for plotting theme
+
+# funs
+path_funs <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/funs"
+source(paste0(path_funs, "/calibration_fun.R"))
+
+# imputed data
+path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/original models/"
+load(paste0(path_data, "imputed_sens_orig_scores.Rdata"))
+
+#############################################   
+############### Calculate calibration  ######
+#############################################   
+imp_index <- group_split(imp_data3, .imp) 
+
+imp_ind <- max(imp_data3$.imp)
+delta <- unique(imp_data3$delta)
+
+# # MELD_1
+# cal_MELD.surv1 <- tibble()
+# for(i in 1:imp_ind){
+#     # MELD 1
+#     temp <- calibration(imp_index[[i]]$MELD.surv1, y = imp_index[[i]]$D90_surv)
+#     temp$Score <- "MELD_1"
+#     temp$delta <- rep(unique(imp_index[[i]]$delta), each = 1068)
+#     temp$.imp <- i
+#     cal_MELD.surv1 <- rbind(cal_MELD.surv1, temp)
+# }
+# rm(temp)
+# 
+# # MELD 2
+# cal_MELD.surv2 <- tibble()
+# for(i in 1:imp_ind){
+#     temp <- calibration(imp_index[[i]]$MELD.surv2, y = imp_index[[i]]$D90_surv)
+#     temp$Score <- "MELD_2"
+#     temp$delta <- rep(unique(imp_index[[i]]$delta), each = 1068)
+#     temp$.imp <- i
+#     cal_MELD.surv2 <- rbind(cal_MELD.surv2, temp)
+# }
+# 
+# rm(temp)
+# 
+# # MELD VanDerwerken
+# cal_MELD.VanDerwerken <- tibble()
+# for(i in 1:imp_ind){
+#     temp <- calibration(imp_index[[i]]$MELD_Van, y = imp_index[[i]]$D90_surv)
+#     temp$Score <- "MELD VanDerwerken"
+#     temp$delta <- rep(unique(imp_index[[i]]$delta), each = 1068)
+#     temp$.imp <- i
+#     cal_MELD.VanDerwerken <- rbind(cal_MELD.VanDerwerken, temp)
+# }
+# 
+# rm(temp)
+
+# Lille
+cal_Lille <- tibble()
+for(i in 1:imp_ind){
+    for(g in 1:length(delta)){
+        d <- unique(imp_index[[i]]$delta)
+        df <- imp_index[[i]] %>% filter(delta == d[g])
+        
+        temp <- calibration(df$Lille.surv, y = df$D90_surv)
+        temp$Score <- "Lille"
+        temp$delta <- unique(df$delta)
+        temp$.imp <- i
+        cal_Lille <- rbind(cal_Lille, temp)
+    }
+}
+
+# rm(temp)
+# 
+# # "CLIF-C ACLF" 
+# cal_CLIF <- tibble()
+# for(i in 1:imp_ind){
+#     temp <- calibration(imp_index[[i]]$CLIF.surv, y = imp_index[[i]]$D90_surv)
+#     temp$Score <- "CLIF-C ACLF"
+#     temp$delta <- rep(unique(imp_index[[i]]$delta), each = 1068)
+#     temp$.imp <- i
+#     cal_CLIF <- rbind(cal_CLIF, temp)
+# }
+# 
+# # combine dfs
+# df_cal <- rbind(cal_MELD.surv1, cal_MELD.surv2, cal_MELD.VanDerwerken, cal_Lille, cal_CLIF)
+
+#############################################   
+###################### Plots  ###############
+############################################# 
+# distribution of predictions
+p5 <- cal_Lille %>% ggplot(.) +
+    geom_boxplot(aes(x = pred, fill = as.factor(as.numeric(delta)))) +
+    scale_fill_brewer(palette = "Dark2") +
+    labs(fill = expression(delta)) +
+    xlab("Predicted survival distribution - Lille") + 
+    theme_classic() +
+    theme(axis.text.y = element_blank(),
+          axis.ticks.y = element_blank())
+
+p6 <- cal_Lille %>% filter(Score == "Lille") %>% 
+    mutate(grp = paste0(delta, "_", .imp),
+           delta = as.numeric(delta)) %>%
+    ggplot(., aes(x = pred, y = obs, group = grp)) +
+    geom_line(aes(group = grp))  + 
+    geom_line(aes(col = as.factor(delta)), alpha = 0.5)  + 
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    #geom_ribbon(aes(ymin = lower, ymax = upper, linetype = NA), 
+    #            alpha = 0.3, show.legend = F) + 
+    #scale_fill_manual("", values = col) + 
+    #scale_color_manual(name = "Score", values = col) + 
+    #facet_grid(. ~ delta) +
+    xlim(0, 1) + 
+    ylim(0, 1) + 
+    labs(col = expression(delta)) +
+    scale_color_brewer(palette = "Dark2") +
+    ylab("Observed proportion") + 
+    xlab("Predicted probability") + 
+    theme_classic() +
+    guides(colour = guide_legend(override.aes = list(alpha = 1)))
+    #theme(legend.position = "none")
+ 
+
diff --git a/original models/Discimination_imp.R b/original models/Discimination_imp.R
new file mode 100644
index 0000000..2086c4c
--- /dev/null
+++ b/original models/Discimination_imp.R	
@@ -0,0 +1,106 @@
+# This script performs all calculations for assessing discrimination
+
+library(pROC)
+library(dplyr)
+library(ggplot2)
+library(tidyr)
+library(forcats)
+library(purrr)
+
+# data
+#path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/original models/"
+#load(paste0(path_data, "original_models.Rdata"))
+
+# imputed data
+path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/original models/"
+load(paste0(path_data, "imputed_orig_scores.Rdata"))
+
+#############################################   
+############### Calculate calibration  ######
+#############################################   
+imp_index <- group_split(imp_data2, .imp) 
+
+imp_ind <- max(imp_data2$.imp)
+
+imp_index_Bili7 <- group_split(imp_data2_Biliday7, .imp) 
+
+imp_ind_Bili7 <- max(imp_data2_Biliday7$.imp)
+
+# MELD_1
+cal_MELD.surv1 <- tibble()
+for(i in 1:imp_ind){
+    # MELD 1
+    temp <- roc(imp_index[[i]]$D90_DTH, imp_index[[i]]$MELD.surv1)
+    temp2 <- data.frame(sensitivities = temp$sensitivities, 
+                            specificities = temp$specificities, 
+                            thresholds = temp$thresholds)
+    temp2$Score <- "MELD"
+    temp2$.imp <- i
+    cal_MELD.surv1 <- rbind(cal_MELD.surv1, temp2)
+}
+rm(temp2)
+
+# MELD VanDerwerken
+cal_MELD.VanDerwerken <- tibble()
+for(i in 1:imp_ind){
+    temp <- roc(imp_index[[i]]$D90_DTH, imp_index[[i]]$MELD_Van)
+    temp2 <- data.frame(sensitivities = temp$sensitivities, 
+                        specificities = temp$specificities, 
+                        thresholds = temp$thresholds)
+    temp2$Score <- "MELD VanDerwerken"
+    temp2$.imp <- i
+    cal_MELD.VanDerwerken <- rbind(cal_MELD.VanDerwerken, temp2)
+}
+rm(temp2)
+
+# Lille
+cal_Lille <- tibble()
+for(i in 1:imp_ind_Bili7){
+    temp <- roc(imp_index_Bili7[[i]]$D90_DTH, imp_index_Bili7[[i]]$Lille.surv)
+    temp2 <- data.frame(sensitivities = temp$sensitivities, 
+                        specificities = temp$specificities, 
+                        thresholds = temp$thresholds)
+    temp2$Score <- "Lille"
+    temp2$.imp <- i
+    cal_Lille <- rbind(cal_Lille, temp2)
+}
+rm(temp2)
+
+# "CLIF-C ACLF" 
+cal_CLIF <- tibble()
+for(i in 1:imp_ind){
+    temp <- roc(imp_index[[i]]$D90_DTH, imp_index[[i]]$CLIF.surv)
+    temp2 <- data.frame(sensitivities = temp$sensitivities, 
+                        specificities = temp$specificities, 
+                        thresholds = temp$thresholds)
+    temp2$Score <- "CLIF-C ACLF"
+    temp2$.imp <- i
+    cal_CLIF <- rbind(cal_CLIF, temp2)
+}
+rm(temp2)
+
+# combine dfs
+df_cal <- rbind(cal_MELD.surv1, cal_MELD.VanDerwerken, cal_Lille, cal_CLIF)
+
+#############################################   
+###################### Plots  ###############
+############################################# 
+# plot without ribbon 
+df_cal %>%
+    ggplot(., aes(x = 1 - specificities, y = sensitivities, col = Score)) +
+    geom_line(aes(group = .imp), lwd = 0.7)  + 
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    #geom_ribbon(aes(ymin = lower, ymax = upper, linetype = NA), 
+    #            alpha = 0.3, show.legend = F) + 
+    #scale_fill_manual("", values = col) + 
+    #scale_color_manual(name = "Score", values = col) + 
+    facet_grid(. ~ Score) +
+    scale_color_brewer(palette = "Dark2") +
+    scale_fill_brewer(palette = "Dark2") +
+    xlim(0, 1) + 
+    ylim(0, 1) + 
+    ylab("Sensitivity") + 
+    xlab("Specificity") + 
+    theme_classic() +
+    theme(legend.position = "none")
+
diff --git a/original models/Discrimination.R b/original models/Discrimination.R
deleted file mode 100644
index 3d3b3ef..0000000
--- a/original models/Discrimination.R	
+++ /dev/null
@@ -1,88 +0,0 @@
-# This script performs all calculations for assessing discrimination
-library(tidyverse)
-library(pROC)
-library(dplyr)
-library(ggplot2)
-library(runway)
-
-# AUROC calculation and figure for MELD
-roc_meld <- roc(stph.meld$D90_DTH, stph.meld$MELD.surv)
-auc_meld <- auc(roc_meld)
-auc_meld_ci <- ci.auc(roc_meld) # Confidence intervals
-
-roc_plot(stph.meld, "D90_surv", "MELD.surv", ci = TRUE, plot_title = "ROC curve for the MELD score")
-
-# AUROC calculation and figure for MELD 3.0
-roc_meld3 <- roc(stph.meld$D90_DTH, stph.meld$MELD3.surv)
-auc_meld3 <- auc(roc_meld3)
-auc_meld3_ci <- ci.auc(roc_meld3) # Confidence intervals
-
-roc_plot(stph.meld, "D90_surv", "MELD3.surv", ci = TRUE, plot_title = "ROC curve for the MELD score")
-
-# AUROC calculation and figure for Lille
-roc_lille <- roc(stph.lille$D90_DTH, stph.lille$Lille.surv)
-auc_lille <- auc(roc_lille)
-auc_lille_ci <- ci.auc(roc_lille)
-
-roc_plot(stph.lille, "D90_surv", "Lille.surv", ci = TRUE, plot_title = "ROC curve for the Lille score")
-
-# AUROC calculation and figure for CLIF-C ACLF
-roc_clif <- roc(stph.clif$D90_DTH, stph.clif$CLIF.surv)
-auc_clif <- auc(roc_clif)
-auc_clif_ci <- ci.auc(roc_clif)
-
-roc_plot(stph.clif, "D90_surv", "CLIF.surv", ci = TRUE, plot_title = "ROC curve for the CLIF-C ACLF score")
-
-# ROC plot of all models combined
-library(ggROC)
-ggroc(list(MELD = roc_meld, CLIF = roc_clif, Lille = roc_lille))
-
-#####
-# Formally ompare the c-statistics across models using bootstrap method
-compareroc.mc <- roc.test(roc_clif, roc_meld) # comparison between MELD and CLIF
-compareroc.ml <- roc.test(roc_meld, roc_lille) # comparison between MELD and Lille
-compareroc.cl <- roc.test(roc_clif, roc_lille) # comparison between CLIF and Lille
-
-# Tabulate the p-values
-roc_pvalues <- c(compareroc.mc$p.value, compareroc.ml$p.value, compareroc.cl$p.value)
-names(roc_pvalues) <- c("p-value MELD-CLIF", "p-value MELD-Lille", "p-value CLIF-Lille")
-roc_pvalues
-
-#####
-# NRI calculations
-library(nricens)
-
-# Define events and probability vectors
-event <- stph.c$D90_surv
-p.MELD <- stph.c$MELD.surv
-p.MELD2 <- stph.c$MELD.surv2
-p.LILLE <- stph.c$Lille.surv
-p.CLIF <- stph.c$CLIF.surv
-
-# Define cut-off points
-cut_lille <- 1 - (exp(-0.45)/(1 + exp(-0.45))) 
-cut_meld <- 0.707^(exp(2.5 - 1.127)) 
-cut_meld2 <- 0.98465^(exp(0.1635*(25 - 10)))  
-cut_clif <- exp(-0.0079 * exp(0.0869*51))
-
-# Calculate NRI for all models
-# MELD_1 and Lille
-NRI_ML <- nribin(event = event, p.std = p.MELD, p.new = p.LILLE, cut = cut_meld, niter = 0, updown = 'category')
-# MELD_2 and Lille
-NRI_M2L <- nribin(event = event, p.std = p.MELD2, p.new = p.LILLE, cut = cut_meld2, niter = 0, updown = 'category')
-# CLIF-C ACLF and Lille
-NRI_CL <- nribin(event = event, p.std = p.CLIF, p.new = p.LILLE, cut = cut_clif, niter = 0, updown = 'category')
-# MELD_1 and CLIF-C ACLF
-NRI_MC <- nribin(event = event, p.std = p.MELD, p.new = p.CLIF, cut = cut_meld, niter = 0, updown = 'category')
-# MELD_2 and CLIF-C ACLF
-NRI_M2C <- nribin(event = event, p.std = p.MELD2, p.new = p.CLIF, cut = cut_meld2, niter = 0, updown = 'category')
-# Lille and MELD_1
-NRI_LM <- nribin(event = event, p.std = p.LILLE, p.new = p.MELD, cut = cut_lille, niter = 0, updown = 'category')
-# Lille and MELD_2
-NRI_LM2 <- nribin(event = event, p.std = p.LILLE, p.new = p.MELD2, cut = cut_lille, niter = 0, updown = 'category')
-# Lille and CLIF-C ACLf
-NRI_LC <- nribin(event = event, p.std = p.LILLE, p.new = p.CLIF, cut = cut_lille, niter = 0, updown = 'category')
-# CLIF-C ACLF and MELD_1
-NRI_CM <- nribin(event = event, p.std = p.CLIF, p.new = p.MELD, cut = cut_clif, niter = 0, updown = 'category')
-# CLIF-C ACLF and MELD_2
-NRI_CM2 <- nribin(event = event, p.std = p.CLIF, p.new = p.MELD2, cut = cut_clif, niter = 0, updown = 'category')
diff --git a/original models/Discrimination_6M.R b/original models/Discrimination_6M.R
new file mode 100644
index 0000000..6bb5d77
--- /dev/null
+++ b/original models/Discrimination_6M.R	
@@ -0,0 +1,126 @@
+# This script performs all calculations for assessing discrimination
+library(pROC)
+library(dplyr)
+library(ggplot2)
+library(tidyr)
+library(forcats)
+library(purrr)
+
+# data
+path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/original models/"
+load(paste0(path_data, "original_models_6M.Rdata"))
+
+#############################################   
+############### Calculate AUCs  #############
+#############################################  
+
+# MELD (survival function 1 and 2 do not matter here)
+roc_meld <- roc(stph.meld$M6_DTH, stph.meld$MELD.surv_6M)
+
+# Lille
+roc_lille <- roc(stph.lille$M6_DTH, stph.lille$Lille.surv_6M)
+
+# CLIF-C ACLF
+roc_clif <- roc(stph.clif$M6_DTH, stph.clif$CLIF.surv_6M)
+
+#############################################   
+###################### Plots  ###############
+############################################# 
+roc.list <- list("CLIF-C ACLF" = roc_clif,
+                 "Lille" = roc_lille,
+                 "MELD" = roc_meld)
+
+g.list <- ggroc(roc.list)
+
+# ROC plot of all models combined
+g.list +
+    geom_line(lwd = 1) +
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") + 
+    theme_classic()
+
+# faceting 
+g.list + 
+    facet_grid(. ~ name) + 
+    geom_line(lwd = 1) + 
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") +
+    theme_classic() + 
+    theme(legend.position="none") 
+
+# add confidence bands
+ci.list <- lapply(roc.list, ci.se, specificities = seq(0, 1, l = 25))
+
+dat.ci.list <- lapply(ci.list, function(ciobj) 
+    data.frame(x = as.numeric(rownames(ciobj)),
+               lower = ciobj[, 1],
+               upper = ciobj[, 3]))
+
+df <- plyr::ldply(dat.ci.list, data.frame, .id = "name")
+
+# rorder based on AUC scores
+#df_cal$Score <- factor(df_cal$Score, labels = levels(data_wide$condition))
+ggroc(roc.list) + 
+    facet_grid(. ~ name) +
+    theme_minimal() + 
+    geom_ribbon(data = df, aes(x = x, ymin = lower, ymax = upper, fill = name), alpha = 0.3, inherit.aes = F) +
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") + 
+    labs(x = "Specificity", y = "Sensitivity") + 
+    coord_equal() +
+    theme_classic() +
+    theme(legend.position = "none") 
+
+#############################################   
+###################### AUC  #################
+#############################################
+# CI
+#auc_meld <- auc(roc_meld)
+#auc_meld_ci <- ci.auc(roc_meld) # Confidence intervals
+#roc_plot(stph.meld, "D90_surv", "MELD.surv", ci = TRUE, plot_title = "ROC curve for the MELD score")
+
+df_AUC <- as.data.frame(map_dfr(roc.list, ci.auc))
+rownames(df_AUC) <- c("low_CL", "mean", "upper_CL")
+
+df_AUC2 <- tibble::rownames_to_column(df_AUC, var = "AUC")
+
+df3 <- gather(df_AUC2, condition, measurement, "CLIF-C ACLF":"MELD", factor_key = TRUE)
+
+#df3 <- gather(df_AUC2, condition, measurement, MELD:Lille, factor_key = TRUE)
+
+data_wide <- spread(df3, AUC, measurement) %>% arrange(mean)
+
+# reorder factor levels
+data_wide$condition <- fct_reorder(data_wide$condition, data_wide$mean)
+
+
+# p_auc <- ggplot(data_wide, aes(x = mean, y = condition, col = condition)) +
+#     geom_point(lwd = 2)  + 
+#     coord_cartesian(xlim = c(0.5, 0.86)) +
+#     geom_errorbar(aes(xmin = low_CL, xmax = upper_CL), 
+#                   alpha = 1, show.legend = F, lwd = 1, width = 0.5) + 
+#     labs(y = "Score", col = "Score", x = "AUC with 95% limits") +
+#     scale_color_brewer(palette = "Dark2") +
+#     scale_fill_brewer(palette = "Dark2") +
+#     theme_classic2() +
+#     theme(legend.position = "none") 
+
+#####
+p_roc <- ggroc(roc.list, lwd = 1.1) + 
+    facet_grid(. ~ name) + 
+    geom_ribbon(data = df, aes(x = x, ymin = lower, ymax = upper, fill = name), alpha = 0.3, inherit.aes = F) +
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") + 
+    labs(x = "Specificity", y = "Sensitivity") + 
+    coord_equal() +
+    scale_color_brewer(palette = "Dark2") +
+    scale_fill_brewer(palette = "Dark2") +
+    theme_classic2() +
+    theme(legend.position = "none") 
+
+data_wide$name <- data_wide$condition
+data_wide$low_CL <- round(data_wide$low_CL, 2)
+data_wide$mean <- round(data_wide$mean, 2)
+data_wide$upper_CL <- round(data_wide$upper_CL, 2)
+
+p_roc + geom_text(data = data_wide, 
+                  aes(0.02, 0.10, label = paste0("AUC (95% CI): ", mean, " (", low_CL, "-", upper_CL, ")" ), 
+                      hjust = 1), col = "black") #+
+
+
diff --git a/original models/Discrimination_uo.R b/original models/Discrimination_uo.R
new file mode 100644
index 0000000..90f5a52
--- /dev/null
+++ b/original models/Discrimination_uo.R	
@@ -0,0 +1,158 @@
+# This script performs all calculations for assessing discrimination
+
+library(pROC)
+library(dplyr)
+library(ggplot2)
+library(tidyr)
+library(forcats)
+library(purrr)
+
+# data
+path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/original models/"
+load(paste0(path_data, "original_models.Rdata"))
+
+#############################################   
+############### Calculate AUCs  #############
+#############################################  
+
+# MELD (survival function 1 and 2 do not matter here)
+roc_meld <- roc(stph.meld$D90_DTH, stph.meld$MELD.surv)
+
+# MELD 3.0
+roc_meld3 <- roc(stph.meld$D90_DTH, stph.meld$MELD3.surv)
+
+# MELD from VanDerwerken et al 2021
+roc_meld.VanDerwerken <- roc(stph.meld$D90_DTH, stph.meld$MELD_Van)
+
+# Lille
+roc_lille <- roc(stph.lille$D90_DTH, stph.lille$Lille.surv)
+
+# CLIF-C ACLF
+roc_clif <- roc(stph.clif$D90_DTH, stph.clif$CLIF.surv)
+
+#############################################   
+###################### Plots  ###############
+############################################# 
+roc.list <- list("CLIF-C ACLF" = roc_clif,
+                 "Lille" = roc_lille,
+                 "MELD" = roc_meld, 
+                 "MELD VanDerwerken" = roc_meld.VanDerwerken)
+
+#roc.list <- list("MELD" = roc_meld, 
+#           "MELD VanDerwerken" = roc_meld.VanDerwerken,
+#           "CLIF-C ACLF" = roc_clif, 
+#           "Lille" = roc_lille)
+
+#setwd(path_data)
+#save(roc.list, file = "ROC_original.Rdata")
+
+g.list <- ggroc(roc.list)
+
+# ROC plot of all models combined
+g.list +
+    geom_line(lwd = 1) +
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") + 
+    theme_classic()
+
+# faceting 
+g.list + 
+    facet_grid(. ~ name) + 
+    geom_line(lwd = 1) + 
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") +
+    theme_classic() + 
+    theme(legend.position="none") 
+
+# add confidence bands
+ci.list <- lapply(roc.list, ci.se, specificities = seq(0, 1, l = 25))
+
+dat.ci.list <- lapply(ci.list, function(ciobj) 
+    data.frame(x = as.numeric(rownames(ciobj)),
+               lower = ciobj[, 1],
+               upper = ciobj[, 3]))
+
+df <- plyr::ldply(dat.ci.list, data.frame, .id = "name")
+
+# rorder based on AUC scores
+#df_cal$Score <- factor(df_cal$Score, labels = levels(data_wide$condition))
+ggroc(roc.list) + 
+    facet_grid(. ~ name) +
+    theme_minimal() + 
+    geom_ribbon(data = df, aes(x = x, ymin = lower, ymax = upper, fill = name), alpha = 0.3, inherit.aes = F) +
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") + 
+    labs(x = "Specificity", y = "Sensitivity") + 
+    coord_equal() +
+    theme_classic() +
+    theme(legend.position = "none") 
+
+# To have all the curves of the same color, use aes="group":
+#g.group <- ggroc(roc.list, aes="group")
+#g.group
+#g.group + facet_grid(.~name)
+
+#############################################   
+###################### AUC  ###############
+#############################################
+# CI
+#auc_meld <- auc(roc_meld)
+#auc_meld_ci <- ci.auc(roc_meld) # Confidence intervals
+#roc_plot(stph.meld, "D90_surv", "MELD.surv", ci = TRUE, plot_title = "ROC curve for the MELD score")
+
+df_AUC <- as.data.frame(map_dfr(roc.list, ci.auc))
+rownames(df_AUC) <- c("low_CL", "mean", "upper_CL")
+
+df_AUC2 <- tibble::rownames_to_column(df_AUC, var = "AUC")
+
+
+df3 <- gather(df_AUC2, condition, measurement, `CLIF-C ACLF`:`MELD VanDerwerken`, factor_key = TRUE)
+
+#df3 <- gather(df_AUC2, condition, measurement, MELD:Lille, factor_key = TRUE)
+
+data_wide <- spread(df3, AUC, measurement) %>% arrange(mean)
+
+# reorder factor levels
+data_wide$condition <- fct_reorder(data_wide$condition, data_wide$mean)
+
+p_auc <- ggplot(data_wide, aes(x = mean, y = condition, col = condition)) +
+    geom_point(lwd = 2)  + 
+    coord_cartesian(xlim = c(0.5, 0.86)) +
+    geom_errorbar(aes(xmin = low_CL, xmax = upper_CL), 
+                  alpha = 1, show.legend = F, lwd = 1, width = 0.5) + 
+    labs(y = "Score", col = "Score", x = "AUC with 95% limits") +
+    scale_color_brewer(palette = "Dark2") +
+    scale_fill_brewer(palette = "Dark2") +
+    theme_classic2() +
+    theme(legend.position = "none") 
+
+#####
+p_roc <- ggroc(roc.list, lwd = 1.1) + 
+    facet_grid(. ~ name) + 
+    geom_ribbon(data = df, aes(x = x, ymin = lower, ymax = upper, fill = name), alpha = 0.3, inherit.aes = F) +
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") + 
+    labs(x = "Specificity", y = "Sensitivity") + 
+    coord_equal() +
+    scale_color_brewer(palette = "Dark2") +
+    scale_fill_brewer(palette = "Dark2") +
+    theme_classic2() +
+    theme(legend.position = "none") 
+
+data_wide$name <- data_wide$condition
+data_wide$low_CL <- round(data_wide$low_CL, 2)
+data_wide$mean <- round(data_wide$mean, 2)
+data_wide$upper_CL <- round(data_wide$upper_CL, 2)
+
+p_roc + geom_text(data = data_wide, 
+                  aes(0.02, 0.10, label = paste0("AUC (95% CI): ", mean, " (", low_CL, "-", upper_CL, ")" ), 
+                      hjust = 1), col = "black") #+
+#theme_classic() 
+
+
+#####
+# Formally compare the c-statistics across models using bootstrap method
+compareroc.mc <- roc.test(roc_clif, roc_meld) # comparison between MELD and CLIF
+compareroc.ml <- roc.test(roc_meld, roc_lille) # comparison between MELD and Lille
+compareroc.cl <- roc.test(roc_clif, roc_lille) # comparison between CLIF and Lille
+
+# Tabulate the p-values
+roc_pvalues <- c(compareroc.mc$p.value, compareroc.ml$p.value, compareroc.cl$p.value)
+names(roc_pvalues) <- c("p-value MELD-CLIF", "p-value MELD-Lille", "p-value CLIF-Lille")
+roc_pvalues
\ No newline at end of file
diff --git a/original models/ROC_original.Rdata b/original models/ROC_original.Rdata
new file mode 100755
index 0000000..03005cc
Binary files /dev/null and b/original models/ROC_original.Rdata differ
diff --git a/original models/clinical_utility.R b/original models/clinical_utility.R
old mode 100644
new mode 100755
index 631f20d..9a4d6c6
--- a/original models/clinical_utility.R	
+++ b/original models/clinical_utility.R	
@@ -1,115 +1,115 @@
-# This script performs all calculations for decision curve analysis. 
-
-# add necessary functions to the directory
-source("nb_diff.R")
-source("dca.R") # dca.R is taken from decisioncurveanalysis.org 
-
-# Since decision curves look better when working with mortality probability rather than survival, first
-# define variables corresponding to 90-day mortality
-stph.c$clif.mort <- 1 - stph.c$CLIF.surv
-stph.c$meld.mort <- 1 - stph.c$MELD.surv
-stph.c$meld.mort2 <- 1 - stph.c$MELD.surv2
-stph.c$lille.mort <- 1 - stph.c$Lille.surv
-stph.c$meld3.mort <- 1 - stph.c$MELD3.surv
-
-# Perform and plot DCA for the original models 
-full_dca <- dca(data = stph.c, outcome = "D90_DTH", predictors = c("clif.mort", "meld.mort", "meld.mort2", "lille.mort"), xstop = 0.75)
-nb_data <- full_dca$net.benefit
-
-plot(nb_data$threshold, nb_data$none, type = "l", lwd = 2, xlab = "Threshold mortality probability", ylab = "Net benefit", ylim = c(-0.05, 0.25))
-lines(nb_data$threshold, nb_data$all, type = "l", col = 8, lwd = 2)
-lines(nb_data$threshold, nb_data$meld.mort, type = "l", col = "darkblue", lwd = 2)
-lines(nb_data$threshold, nb_data$meld.mort2, type = "l", col = "darkgreen", lwd = 2)
-lines(nb_data$threshold, nb_data$clif.mort, type = "l", col = "darkred", lwd = 2)
-lines(nb_data$threshold, nb_data$lille.mort, type = "l", col = "orange", lwd = 2)
-# Add a legend
-legend("topright", cex = 0.8, legend = c("Treat none", "Treat all", "MELD_1", "MELD_2", "CLIF-C ACLF", "Lille"),
-       col = c(17, 8, "darkblue", "darkgreen", "darkred", "orange"), lwd = c(2, 2, 2, 2, 2, 2))
-
-# Create a table of NB values for a set of reasonable threshold probabilities
-table_output <- dca(data = stph.c, outcome = "D90_DTH", 
-                    predictors = c("clif.mort", "meld.mort", "lille.mort"), 
-                    xstart = 0.25, xstop = 0.75, xby = 0.10, graph = F)
-table_output
-
-######
-# Formally compare the NB values for different values of the threshold probability using a bootstrap approach
-# The code is based on that proposed by Zhang et al., 2018
-library(boot)
-set.seed(34) 
-R <- 500 # Number of bootstrap samples
-
-# Compare the CLIF-C ACLF and MELD models
-boot.diff.cm <- boot(data=stph.c, statistic = nb_diff,
-                    R = R, outcome = "D90_DTH", pred1 = "clif.mort",
-                    pred2 = "meld.mort", xstart = 0.25, xstop = 0.75,
-                    step = 0.05)
-pvalue.cm <- NULL
-
-for(i in 1:length(boot.diff.cm$t0)){
-  pvalue.cm <- c(pvalue.cm, mean(abs(boot.diff.cm$t[,i] - boot.diff.cm$t0[i]) > abs(boot.diff.cm$t0[i])))
-}
-
-# Compare the MELD and Lille model
-boot.diff.ml <- boot(data=stph.c, statistic = nb_diff,
-                     R = R, outcome = "D90_DTH", pred1 = "lille.mort",
-                     pred2 = "meld.mort", xstart = 0.25, xstop = 0.75,
-                     step = 0.05)
-pvalue.ml <- NULL
-
-for(i in 1:length(boot.diff.ml$t0)){
-  pvalue.ml <- c(pvalue.ml, mean(abs(boot.diff.ml$t[,i] - boot.diff.ml$t0[i]) > abs(boot.diff.ml$t0[i])))
-}
-
-# Compare the CLIF-C ACLF and Lille model
-boot.diff.cl <- boot(data=stph.c, statistic = nb_diff,
-                     R = R, outcome = "D90_DTH", pred1 = "clif.mort",
-                     pred2 = "lille.mort", xstart = 0.25, xstop = 0.75,
-                     step = 0.05)
-pvalue.cl <- NULL
-
-for(i in 1:length(boot.diff.cl$t0)){
-  pvalue.cl <- c(pvalue.cl, mean(abs(boot.diff.cl$t[,i] - boot.diff.cl$t0[i]) > abs(boot.diff.cl$t0[i])))
-}
-
-# Compare CLIF-C ACLF with the MELD_2
-boot.diff.cm2 <- boot(data=stph.c, statistic = nb_diff,
-                     R = R, outcome = "D90_DTH", pred1 = "clif.mort",
-                     pred2 = "meld.mort2", xstart = 0.25, xstop = 0.75,
-                     step = 0.05)
-pvalue.cm2 <- NULL
-
-for(i in 1:length(boot.diff.cm2$t0)){
-  pvalue.cm2 <- c(pvalue.cm2, mean(abs(boot.diff.cm2$t[,i] - boot.diff.cm2$t0[i]) > abs(boot.diff.cm2$t0[i])))
-}
-
-# Compare Lille and the MELD_2
-boot.diff.m2l <- boot(data=stph.c, statistic = nb_diff,
-                     R = R, outcome = "D90_DTH", pred1 = "lille.mort",
-                     pred2 = "meld.mort2", xstart = 0.25, xstop = 0.75,
-                     step = 0.05)
-pvalue.m2l <- NULL
-
-for(i in 1:length(boot.diff.m2l$t0)){
-  pvalue.m2l <- c(pvalue.m2l, mean(abs(boot.diff.m2l$t[,i] - boot.diff.m2l$t0[i]) > abs(boot.diff.m2l$t0[i])))
-}
-
-# Compare both MELD options
-boot.diff.mm <- boot(data=stph.c, statistic = nb_diff,
-                     R = R, outcome = "D90_DTH", pred1 = "meld.mort2",
-                     pred2 = "meld.mort", xstart = 0.25, xstop = 0.75,
-                     step = 0.05)
-pvalue.mm <- NULL
-
-for(i in 1:length(boot.diff.mm$t0)){
-  pvalue.mm <- c(pvalue.mm, mean(abs(boot.diff.mm$t[,i] - boot.diff.mm$t0[i]) > abs(boot.diff.mm$t0[i])))
-}
-
-# Append p-values together in a dataframe for simplicity
-pvalues_dca <- data.frame("threshold probability" = seq(from = 0.25, to = 0.75, by = 0.05),
-                        "p-values Meld-CLIF" = pvalue.cm,
-                        "p-values MELD-Lille" = pvalue.ml,
-                        "p-values CLIF-Lille" = pvalue.cl,
-                        "p-values MELD2-CLIF" = pvalue.cm2,
-                        "p-values MELD2-Lille" = pvalue.m2l,
-                        "p-values MELD-MELD2" = pvalue.mm)
+# # This script performs all calculations for decision curve analysis. 
+# 
+# # add necessary functions to the directory
+# source("nb_diff.R")
+# source("dca.R") # dca.R is taken from decisioncurveanalysis.org 
+# 
+# # Since decision curves look better when working with mortality probability rather than survival, first
+# # define variables corresponding to 90-day mortality
+# stph.c$clif.mort <- 1 - stph.c$CLIF.surv
+# stph.c$meld.mort <- 1 - stph.c$MELD.surv
+# stph.c$meld.mort2 <- 1 - stph.c$MELD.surv2
+# stph.c$lille.mort <- 1 - stph.c$Lille.surv
+# stph.c$meld3.mort <- 1 - stph.c$MELD3.surv
+# 
+# # Perform and plot DCA for the original models 
+# full_dca <- dca(data = stph.c, outcome = "D90_DTH", predictors = c("clif.mort", "meld.mort", "meld.mort2", "lille.mort"), xstop = 0.75)
+# nb_data <- full_dca$net.benefit
+# 
+# plot(nb_data$threshold, nb_data$none, type = "l", lwd = 2, xlab = "Threshold mortality probability", ylab = "Net benefit", ylim = c(-0.05, 0.25))
+# lines(nb_data$threshold, nb_data$all, type = "l", col = 8, lwd = 2)
+# lines(nb_data$threshold, nb_data$meld.mort, type = "l", col = "darkblue", lwd = 2)
+# lines(nb_data$threshold, nb_data$meld.mort2, type = "l", col = "darkgreen", lwd = 2)
+# lines(nb_data$threshold, nb_data$clif.mort, type = "l", col = "darkred", lwd = 2)
+# lines(nb_data$threshold, nb_data$lille.mort, type = "l", col = "orange", lwd = 2)
+# # Add a legend
+# legend("topright", cex = 0.8, legend = c("Treat none", "Treat all", "MELD_1", "MELD_2", "CLIF-C ACLF", "Lille"),
+#        col = c(17, 8, "darkblue", "darkgreen", "darkred", "orange"), lwd = c(2, 2, 2, 2, 2, 2))
+# 
+# # Create a table of NB values for a set of reasonable threshold probabilities
+# table_output <- dca(data = stph.c, outcome = "D90_DTH", 
+#                     predictors = c("clif.mort", "meld.mort", "lille.mort"), 
+#                     xstart = 0.25, xstop = 0.75, xby = 0.10, graph = F)
+# table_output
+# 
+# ######
+# # Formally compare the NB values for different values of the threshold probability using a bootstrap approach
+# # The code is based on that proposed by Zhang et al., 2018
+# library(boot)
+# set.seed(34) 
+# R <- 500 # Number of bootstrap samples
+# 
+# # Compare the CLIF-C ACLF and MELD models
+# boot.diff.cm <- boot(data=stph.c, statistic = nb_diff,
+#                     R = R, outcome = "D90_DTH", pred1 = "clif.mort",
+#                     pred2 = "meld.mort", xstart = 0.25, xstop = 0.75,
+#                     step = 0.05)
+# pvalue.cm <- NULL
+# 
+# for(i in 1:length(boot.diff.cm$t0)){
+#   pvalue.cm <- c(pvalue.cm, mean(abs(boot.diff.cm$t[,i] - boot.diff.cm$t0[i]) > abs(boot.diff.cm$t0[i])))
+# }
+# 
+# # Compare the MELD and Lille model
+# boot.diff.ml <- boot(data=stph.c, statistic = nb_diff,
+#                      R = R, outcome = "D90_DTH", pred1 = "lille.mort",
+#                      pred2 = "meld.mort", xstart = 0.25, xstop = 0.75,
+#                      step = 0.05)
+# pvalue.ml <- NULL
+# 
+# for(i in 1:length(boot.diff.ml$t0)){
+#   pvalue.ml <- c(pvalue.ml, mean(abs(boot.diff.ml$t[,i] - boot.diff.ml$t0[i]) > abs(boot.diff.ml$t0[i])))
+# }
+# 
+# # Compare the CLIF-C ACLF and Lille model
+# boot.diff.cl <- boot(data=stph.c, statistic = nb_diff,
+#                      R = R, outcome = "D90_DTH", pred1 = "clif.mort",
+#                      pred2 = "lille.mort", xstart = 0.25, xstop = 0.75,
+#                      step = 0.05)
+# pvalue.cl <- NULL
+# 
+# for(i in 1:length(boot.diff.cl$t0)){
+#   pvalue.cl <- c(pvalue.cl, mean(abs(boot.diff.cl$t[,i] - boot.diff.cl$t0[i]) > abs(boot.diff.cl$t0[i])))
+# }
+# 
+# # Compare CLIF-C ACLF with the MELD_2
+# boot.diff.cm2 <- boot(data=stph.c, statistic = nb_diff,
+#                      R = R, outcome = "D90_DTH", pred1 = "clif.mort",
+#                      pred2 = "meld.mort2", xstart = 0.25, xstop = 0.75,
+#                      step = 0.05)
+# pvalue.cm2 <- NULL
+# 
+# for(i in 1:length(boot.diff.cm2$t0)){
+#   pvalue.cm2 <- c(pvalue.cm2, mean(abs(boot.diff.cm2$t[,i] - boot.diff.cm2$t0[i]) > abs(boot.diff.cm2$t0[i])))
+# }
+# 
+# # Compare Lille and the MELD_2
+# boot.diff.m2l <- boot(data=stph.c, statistic = nb_diff,
+#                      R = R, outcome = "D90_DTH", pred1 = "lille.mort",
+#                      pred2 = "meld.mort2", xstart = 0.25, xstop = 0.75,
+#                      step = 0.05)
+# pvalue.m2l <- NULL
+# 
+# for(i in 1:length(boot.diff.m2l$t0)){
+#   pvalue.m2l <- c(pvalue.m2l, mean(abs(boot.diff.m2l$t[,i] - boot.diff.m2l$t0[i]) > abs(boot.diff.m2l$t0[i])))
+# }
+# 
+# # Compare both MELD options
+# boot.diff.mm <- boot(data=stph.c, statistic = nb_diff,
+#                      R = R, outcome = "D90_DTH", pred1 = "meld.mort2",
+#                      pred2 = "meld.mort", xstart = 0.25, xstop = 0.75,
+#                      step = 0.05)
+# pvalue.mm <- NULL
+# 
+# for(i in 1:length(boot.diff.mm$t0)){
+#   pvalue.mm <- c(pvalue.mm, mean(abs(boot.diff.mm$t[,i] - boot.diff.mm$t0[i]) > abs(boot.diff.mm$t0[i])))
+# }
+# 
+# # Append p-values together in a dataframe for simplicity
+# pvalues_dca <- data.frame("threshold probability" = seq(from = 0.25, to = 0.75, by = 0.05),
+#                         "p-values Meld-CLIF" = pvalue.cm,
+#                         "p-values MELD-Lille" = pvalue.ml,
+#                         "p-values CLIF-Lille" = pvalue.cl,
+#                         "p-values MELD2-CLIF" = pvalue.cm2,
+#                         "p-values MELD2-Lille" = pvalue.m2l,
+#                         "p-values MELD-MELD2" = pvalue.mm)
diff --git a/original models/complete_cases_models.Rdata b/original models/complete_cases_models.Rdata
new file mode 100755
index 0000000..6ff94c7
Binary files /dev/null and b/original models/complete_cases_models.Rdata differ
diff --git a/original models/dca.r b/original models/dca.r
deleted file mode 100644
index 9c17e58..0000000
--- a/original models/dca.r	
+++ /dev/null
@@ -1,232 +0,0 @@
-#' Function that performs and plots the dca for prognostic models
-#' Code taken from decisioncurveanalysis.org, referenced by 
-#' Vickers AJ, Elkin EB. Decision curve analysis: a novel method for evaluating prediction models. Medical Decision Making. 2006;26(6):565-74.
-#' 
-#' @param data dataset that includes the predictor and outcome variables
-#' @param outcome 0/1 outcome variable
-#' @param predictors vector of predicted risks of the models to plot
-#' @param xstart lowest value of threshold probability to be looked at
-#' @param xstop highest value of threshold probability to consider
-#' @param xby stepsize for looking at different threshold probabilities
-#' @param ymin minimum of y-axis in the plot
-#' @param probablity 
-#' @param harm 
-#' @param graph TRUE if we want a graph of NB curves to print
-#' @param intervention 
-#' @param interventionper
-#' @param smooth
-#' @param loess.span 
-
-
-dca <- function(data, outcome, predictors, xstart=0.01, xstop=0.99, xby=0.01, 
-  ymin=-0.05, probability=NULL, harm=NULL,graph=TRUE, intervention=FALSE, 
-  interventionper=100, smooth=FALSE,loess.span=0.10) {
-  
-  # LOADING REQUIRED LIBRARIES
-  require(stats)
-
-  # data MUST BE A DATA FRAME
-  if (class(data)!="data.frame") {
-      stop("Input data must be class data.frame")
-  }
-  
-  #ONLY KEEPING COMPLETE CASES
-  data=data[complete.cases(data[append(outcome,predictors)]),append(outcome,predictors)]
-
-  # outcome MUST BE CODED AS 0 AND 1
-  if (max(data[[outcome]])>1 | min(data[[outcome]])<0) {
-    stop("outcome cannot be less than 0 or greater than 1")
-  }
-  # xstart IS BETWEEN 0 AND 1
-  if (xstart<0 | xstart>1) {
-    stop("xstart must lie between 0 and 1")
-  }
-  
-  # xstop IS BETWEEN 0 AND 1
-  if (xstop<0 | xstop>1) {
-    stop("xstop must lie between 0 and 1")
-  }
-  
-  # xby IS BETWEEN 0 AND 1
-  if (xby<=0 | xby>=1) {
-    stop("xby must lie between 0 and 1")
-  }
-  
-  # xstart IS BEFORE xstop
-  if (xstart>=xstop) {
-    stop("xstop must be larger than xstart")
-  }
-     
-  #STORING THE NUMBER OF PREDICTORS SPECIFIED
-  pred.n=length(predictors)
-  
-  #IF probability SPECIFIED ENSURING THAT EACH PREDICTOR IS INDICATED AS A YES OR NO
-  if (length(probability)>0 & pred.n!=length(probability)) {
-    stop("Number of probabilities specified must be the same as the number of predictors being checked.")
-  }
- 
-  #IF harm SPECIFIED ENSURING THAT EACH PREDICTOR HAS A SPECIFIED HARM
-  if (length(harm)>0 & pred.n!=length(harm)) {
-    stop("Number of harms specified must be the same as the number of predictors being checked.")
-  }
-  
-  #INITIALIZING DEFAULT VALUES FOR PROBABILITES AND HARMS IF NOT SPECIFIED
-  if (length(harm)==0) {
-    harm=rep(0,pred.n)
-  }
-  if (length(probability)==0) {
-    probability=rep(TRUE,pred.n)
-  }
-  
-  
-  #CHECKING THAT EACH probability ELEMENT IS EQUAL TO YES OR NO, 
-  #AND CHECKING THAT PROBABILITIES ARE BETWEEN 0 and 1
-  #IF NOT A PROB THEN CONVERTING WITH A LOGISTIC REGRESSION
-  for(m in 1:pred.n) { 
-    if (probability[m]!=TRUE & probability[m]!=FALSE) {
-      stop("Each element of probability vector must be TRUE or FALSE")
-    }
-    if (probability[m]==TRUE & (max(data[predictors[m]])>1 | min(data[predictors[m]])<0)) {
-      stop(paste(predictors[m],"must be between 0 and 1 OR sepcified as a non-probability in the probability option",sep=" "))  
-    }
-    if(probability[m]==FALSE) {
-      model=NULL
-      pred=NULL
-      model=glm(data.matrix(data[outcome]) ~ data.matrix(data[predictors[m]]), family=binomial("logit"))
-      pred=data.frame(model$fitted.values)
-      pred=data.frame(pred)
-      names(pred)=predictors[m]
-      data=cbind(data[names(data)!=predictors[m]],pred)
-      print(paste(predictors[m],"converted to a probability with logistic regression. Due to linearity assumption, miscalibration may occur.",sep=" "))
-    }
-  }
-
-  # THE PREDICTOR NAMES CANNOT BE EQUAL TO all OR none.
-  if (length(predictors[predictors=="all" | predictors=="none"])) {
-    stop("Prediction names cannot be equal to all or none.")
-  }  
-  
-  #########  CALCULATING NET BENEFIT   #########
-  N=dim(data)[1]
-  event.rate=colMeans(data[outcome])
-  
-  # CREATING DATAFRAME THAT IS ONE LINE PER THRESHOLD PER all AND none STRATEGY
-  nb=data.frame(seq(from=xstart, to=xstop, by=xby))
-  names(nb)="threshold"
-  interv=nb
-  
-  nb["all"]=event.rate - (1-event.rate)*nb$threshold/(1-nb$threshold)
-  nb["none"]=0
-  
-  # CYCLING THROUGH EACH PREDICTOR AND CALCULATING NET BENEFIT
-  for(m in 1:pred.n){
-    for(t in 1:length(nb$threshold)){
-      # COUNTING TRUE POSITIVES AT EACH THRESHOLD
-      tp=mean(data[data[[predictors[m]]]>=nb$threshold[t],outcome])*sum(data[[predictors[m]]]>=nb$threshold[t])
-      # COUNTING FALSE POSITIVES AT EACH THRESHOLD
-      fp=(1-mean(data[data[[predictors[m]]]>=nb$threshold[t],outcome]))*sum(data[[predictors[m]]]>=nb$threshold[t])
-      #setting TP and FP to 0 if no observations meet threshold prob.
-      if (sum(data[[predictors[m]]]>=nb$threshold[t])==0) {
-        tp=0
-        fp=0
-      }
-      
-      # CALCULATING NET BENEFIT
-      nb[t,predictors[m]]=tp/N - fp/N*(nb$threshold[t]/(1-nb$threshold[t])) - harm[m]
-    }
-    interv[predictors[m]]=(nb[predictors[m]] - nb["all"])*interventionper/(interv$threshold/(1-interv$threshold))
-  }
-  
-  # CYCLING THROUGH EACH PREDICTOR AND SMOOTH NET BENEFIT AND INTERVENTIONS AVOIDED 
-  for(m in 1:pred.n) {
-    if (smooth==TRUE){
-      lws=loess(data.matrix(nb[!is.na(nb[[predictors[m]]]),predictors[m]]) ~ data.matrix(nb[!is.na(nb[[predictors[m]]]),"threshold"]),span=loess.span)
-      nb[!is.na(nb[[predictors[m]]]),paste(predictors[m],"_sm",sep="")]=lws$fitted
-      
-      lws=loess(data.matrix(interv[!is.na(nb[[predictors[m]]]),predictors[m]]) ~ data.matrix(interv[!is.na(nb[[predictors[m]]]),"threshold"]),span=loess.span)
-      interv[!is.na(nb[[predictors[m]]]),paste(predictors[m],"_sm",sep="")]=lws$fitted
-    }
-  }
-    
-  # PLOTTING GRAPH IF REQUESTED
-  if (graph==TRUE) {
-    require(graphics)
-    
-    # PLOTTING INTERVENTIONS AVOIDED IF REQUESTED
-    if(intervention==TRUE) {
-      # initialize the legend label, color, and width using the standard specs of the none and all lines
-      legendlabel <- NULL
-      legendcolor <- NULL
-      legendwidth <- NULL
-      legendpattern <- NULL
-      
-      #getting maximum number of avoided interventions
-      ymax=max(interv[predictors],na.rm = TRUE)
-      
-      #INITIALIZING EMPTY PLOT WITH LABELS
-      plot(x=nb$threshold, y=nb$all, type="n" ,xlim=c(xstart, xstop), ylim=c(ymin, ymax), xlab="Threshold probability", ylab=paste("Net reduction in interventions per",interventionper,"patients"))
-      
-      #PLOTTING INTERVENTIONS AVOIDED FOR EACH PREDICTOR
-      for(m in 1:pred.n) {
-        if (smooth==TRUE){
-          lines(interv$threshold,data.matrix(interv[paste(predictors[m],"_sm",sep="")]),col=m,lty=2)
-        } else {
-          lines(interv$threshold,data.matrix(interv[predictors[m]]),col=m,lty=2)
-        }
-        
-        # adding each model to the legend
-        legendlabel <- c(legendlabel, predictors[m])
-        legendcolor <- c(legendcolor, m)
-        legendwidth <- c(legendwidth, 1)
-        legendpattern <- c(legendpattern, 2)
-      }
-    } else {
-    # PLOTTING NET BENEFIT IF REQUESTED
-    
-      # initialize the legend label, color, and width using the standard specs of the none and all lines
-      legendlabel <- c("None", "All")
-      legendcolor <- c(17, 8)
-      legendwidth <- c(2, 2)
-      legendpattern <- c(1, 1)
-      
-      #getting maximum net benefit
-      ymax=max(nb[names(nb)!="threshold"],na.rm = TRUE)
-      
-      # inializing new benfit plot with treat all option
-      plot(x=nb$threshold, y=nb$all, type="l", col=8, lwd=2 ,xlim=c(xstart, xstop), ylim=c(ymin, ymax), xlab="Threshold probability", ylab="Net benefit")
-      # adding treat none option
-      lines(x=nb$threshold, y=nb$none,lwd=2)
-      #PLOTTING net benefit FOR EACH PREDICTOR
-      for(m in 1:pred.n) {
-        if (smooth==TRUE){
-          lines(nb$threshold,data.matrix(nb[paste(predictors[m],"_sm",sep="")]),col=m,lty=2) 
-        } else {
-          lines(nb$threshold,data.matrix(nb[predictors[m]]),col=m,lty=2)
-        }
-        # adding each model to the legend
-        legendlabel <- c(legendlabel, predictors[m])
-        legendcolor <- c(legendcolor, m)
-        legendwidth <- c(legendwidth, 1)
-        legendpattern <- c(legendpattern, 2)
-      }
-    }
-    # then add the legend
-    legend("topright", legendlabel, cex=0.8, col=legendcolor, lwd=legendwidth, lty=legendpattern)
-    
-  }
-  
-  #RETURNING RESULTS
-  results=list() 
-  results$N=N
-  results$predictors=data.frame(cbind(predictors,harm,probability))
-  names(results$predictors)=c("predictor","harm.applied","probability")
-  results$interventions.avoided.per=interventionper
-  results$net.benefit=nb
-  results$interventions.avoided=interv
-  
-  return(results)
-              
-}  
- 
-
-
diff --git a/original models/imputed_orig_scores.Rdata b/original models/imputed_orig_scores.Rdata
new file mode 100644
index 0000000..a58f11a
Binary files /dev/null and b/original models/imputed_orig_scores.Rdata differ
diff --git a/original models/imputed_sens_orig_scores.Rdata b/original models/imputed_sens_orig_scores.Rdata
new file mode 100644
index 0000000..673edcc
Binary files /dev/null and b/original models/imputed_sens_orig_scores.Rdata differ
diff --git a/original models/nb_diff.R b/original models/nb_diff.R
deleted file mode 100644
index 19dca3b..0000000
--- a/original models/nb_diff.R	
+++ /dev/null
@@ -1,28 +0,0 @@
-#' Function that calculates the difference between net benefit and calculates the p-value for
-#' formal comparison between the net benefits for different values of the threshold probability
-#' The idea for this function is taken from: 
-#' Zhang Z, Rousson V, Lee WC, et al. Decision curve analysis: a technical note. Ann Transl Med. 2018;6(15):308. doi:10.21037/atm.2018.07.02
-#' link: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6123195/
-#' 
-#' @param data dataset that includes the predictor and outcome variables
-#' @param ii indices parameter for bootstrap
-#' @param outcome 0/1 outcome variable
-#' @param pred1 first predictor for survival 
-#' @param pred2 second predictor for survival 
-#' @param xstart lowest value of threshold probability to be looked at
-#' @param xstop highest value of threshold probability to consider
-#' @param step step size for looking at different threshold probabilities
-
-nb_diff <- function(data, ii, outcome, pred1, pred2, xstart, xstop, step){
-  dd <- data[ii,]
-  nb1 <- dca(data = dd, outcome = outcome, predictors = pred1, 
-             xstart = 0.25, xstop = 0.75, xby = 0.05, graph = F)
-  nb2 <- dca(data = dd, outcome = outcome, predictors = pred2, 
-             xstart = 0.25, xstop = 0.75, xby = 0.05, graph = F)
-  
-  nb.diff <- nb2$net.benefit[,4] - nb1$net.benefit[,4]
-  return(nb.diff)
-}
-
-
-
diff --git a/original models/original_models.Rdata b/original models/original_models.Rdata
new file mode 100755
index 0000000..afe8845
Binary files /dev/null and b/original models/original_models.Rdata differ
diff --git a/original models/original_models_6M.Rdata b/original models/original_models_6M.Rdata
new file mode 100644
index 0000000..a23af4a
Binary files /dev/null and b/original models/original_models_6M.Rdata differ
diff --git a/original models/plots_imputation_sens.R b/original models/plots_imputation_sens.R
new file mode 100644
index 0000000..300e697
--- /dev/null
+++ b/original models/plots_imputation_sens.R	
@@ -0,0 +1,46 @@
+
+library(ggplot2)
+library(gridExtra)
+
+
+path_imp <- "~/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/pre-analysis/"
+
+# imputed data - sensitivity analysis
+load(paste0(path_imp, "imputed_data_sens.Rdata"))
+
+
+p1 <- densityplot(imp.all[[1]], ~ Bilirubin.day.7, lwd = 3, xlab = "", ylab = "", 
+                  main = expression(paste(delta, " = 0")))
+p2 <- densityplot(imp.all[[2]], ~ Bilirubin.day.7, lwd = 3, xlab = "", ylab = "", 
+                  main = expression(paste(delta, " = -50")))
+p3 <- densityplot(imp.all[[3]], ~ Bilirubin.day.7, lwd = 3, xlab = "", ylab = "", 
+                  main = expression(paste(delta, " = -100")))
+p4 <- densityplot(imp.all[[4]], ~ Bilirubin.day.7, lwd = 3, xlab = "", ylab = "", 
+                  main = expression(paste(delta, " = -200")))
+
+library(patchwork)
+p_prelim <- grid.arrange(p1, p2, p3, p4, nrow = 1, 
+             bottom = textGrob("Bilirubin day 7", gp = gpar(fontsize = 11), vjust = -3),
+             left = textGrob("Density", gp = gpar(fontsize = 11), rot = 90, vjust = 1.5, hjust = 0)
+)
+
+grid.arrange(p_prelim, p5, p6, nrow = 2, ncol = 3, layout_matrix = rbind(c(1,1), c(2,3)))
+
+
+
+top_row <- plot_grid(p1, p2, p3, p4, labels = "a", nrow = 1)
+title <- ggdraw() + 
+    draw_label("Bilirubin day 7", x = 0.5, y = 0.5, size = 11) 
+
+top_row_with_title <- plot_grid(
+    title, top_row,
+    ncol = 1
+    # rel_heights values control vertical title margins
+)
+
+top_row_with_title
+
+bottom_row <- plot_grid(p5, p6, labels = c('b', 'c'), nrow = 1)
+plot_grid(top_row_with_title, bottom_row, nrow = 2, rel_heights = c(4, 4))
+
+
diff --git a/original models/prognostic_scores.R b/original models/prognostic_scores.R
old mode 100644
new mode 100755
index 69b7afb..c2d9c4d
--- a/original models/prognostic_scores.R	
+++ b/original models/prognostic_scores.R	
@@ -1,11 +1,13 @@
 ######
-# This script calculates the prognostic scores (MELD, Lille, CLIF-C ACLF) and corresponding survival probabilities.
+# This script calculates the prognostic scores (MELD, Lille, CLIF-C ACLF) and corresponding 90-day survival probabilities.
 library(tidyverse)
 library(dplyr)
 library(ggplot2)
 
-######
-# MELD score calculations
+#############################################
+############### MELD score ##################
+#############################################
+#####
 # First constrain INR, Creatinine, and Bilirubin to be within certain bandwidth
 stph.meld$INR <- ifelse(stph.meld$INR < 1, 1, stph.meld$INR)
 stph.meld$Creatinine.mg.dl.MELD <- ifelse(stph.meld$Creatinine.mg.dl < 1, 1, stph.meld$Creatinine.mg.dl)
@@ -13,8 +15,9 @@ stph.meld$Bilirubin.mg.dl.MELD <- ifelse(stph.meld$Bilirubin.mg.dl < 1, 1, stph.
 stph.meld$Creatinine.mg.dl.MELD <- ifelse(stph.meld$Creatinine.mg.dl > 4, 4, stph.meld$Creatinine.mg.dl)
 
 # Calculate basic MELD score
-stph.meld$MELD.calc <- 0.378*log(stph.meld$Bilirubin.mg.dl.MELD) +
-  1.120*log(stph.meld$INR) + 0.957*log(stph.meld$Creatinine.mg.dl.MELD) 
+stph.meld$MELD.calc <- 0.378*log(stph.meld$Bilirubin.mg.dl.MELD) + 
+    1.120*log(stph.meld$INR) + 
+    0.957*log(stph.meld$Creatinine.mg.dl.MELD) 
 
 # Round score to the nearest tenth
 stph.meld$MELD.calc <- round(stph.meld$MELD.calc, 1)
@@ -48,8 +51,16 @@ stph.meld$MELD.calc <- ifelse(stph.meld$MELD.calc > 40, 40, stph.meld$MELD.calc)
 stph.meld$MELD.surv <- 0.707^(exp((stph.meld$MELD.calc/10) - 1.127)) 
 stph.meld$MELD.surv2 <- 0.98465^(exp(0.1635*(stph.meld$MELD.calc - 10)))  
 
+# Extract 90-day survival probability from VanDerwerke et al, 2021 
+library(readxl)
+MELD_VanDerwerken <- read_excel("~/IDrive-Sync/Projects/MIMAH/data/MELD_VanDerwerken.xlsx")
+
+for(i in 1:nrow(stph.meld)){
+    stph.meld$MELD_Van[i] <- MELD_VanDerwerken[stph.meld$MELD.calc[i] - 5, ]$SURV
+}
+
 ####
-# Calculate the MELD3.0 score (sex-adjusted version of the MELD)
+# Calculate the MELD 3.0 score (sex-adjusted version of the MELD)
 # Constrain Albumin 
 stph.meld$Albumin.MELD <- ifelse(stph.meld$Albumin < 1.5, 1.5, stph.meld$Albumin)
 stph.meld$Albumin.MELD <- ifelse(stph.meld$Albumin > 3.5, 3.5, stph.meld$Albumin)
@@ -63,8 +74,10 @@ stph.meld$MELD3 <- 1.33*stph.meld$Gender + 4.56*log(stph.meld$Bilirubin.mg.dl.ME
 stph.meld$MELD3 <- round(stph.meld$MELD3, 0)
 stph.meld$MELD3.surv <- 0.946^(exp(0.17698*stph.meld$MELD3 - 3.56))
 
+#############################################
+######### CLIF-C ACLF Score #################
+#############################################
 #####
-# CLIF-C ACLF Score calculations
 # Start by calculating organ-failure sub-scores
 stph.clif$liver.score <- ifelse(stph.clif$Bilirubin.mg.dl < 6, 1, 2)
 stph.clif$liver.score <- ifelse(stph.clif$Bilirubin.mg.dl < 12, stph.clif$liver.score, 3)
@@ -88,8 +101,10 @@ stph.clif$CLIF.OF <- stph.clif$liver.score + stph.clif$kidney.score +
 stph.clif$CLIF.C <- 10*(0.33*stph.clif$CLIF.OF + 0.04*stph.clif$Age + 0.63*log(stph.clif$WBC) - 2)
 stph.clif$CLIF.surv <- exp(-0.0079 * exp(0.0869*stph.clif$CLIF.C))
 
-######
-# Lille score calculations
+#############################################
+################### Lille score #############
+#############################################
+#####
 # First create renal insufficiency dummy
 stph.lille$ren.insuf <- ifelse(stph.lille$Creatinine.mg.dl < 1.3, 0, 1)
 
@@ -97,19 +112,56 @@ stph.lille$ren.insuf <- ifelse(stph.lille$Creatinine.mg.dl < 1.3, 0, 1)
 stph.lille$delta.bili <- stph.lille$Bilirubin.Merged - stph.lille$Bilirubin.day.7
 
 # Calculate the Lille score and corresponding survival probability
-stph.lille$LILLE <- 3.19 - 0.101*stph.lille$Age + 0.147*stph.lille$Albumin + 0.0165*stph.lille$delta.bili - 
-  0.206*stph.lille$ren.insuf - 0.0065*stph.lille$Bilirubin.Merged - 0.0096*stph.lille$protime
+stph.lille$LILLE <- 3.19 - 
+    0.101*stph.lille$Age.at.randomisation..calc. + 
+    0.147*stph.lille$Albumin + 0.0165*stph.lille$delta.bili - 
+    0.206*stph.lille$ren.insuf - 
+    0.0065*stph.lille$Bilirubin.Merged - 
+    0.0096*stph.lille$protime
 stph.lille$Lille.surv <- 1 - (exp(-stph.lille$LILLE)/(1 + exp(-stph.lille$LILLE)))
 
+
+#stph.lille[240,] %>% select(Bilirubin.Merged, Bilirubin.mg.dl, Bilirubin.day.7, Bilirubin.mg.dl.day.7, 
+#                      Lille...R, Lille.index, LILLE, Lille.surv, Age.at.randomisation..calc., Albumin, Creatinine, protime)
 #####
 # Create dataframe of all complete cases including prognostic scores (used later)
 stph.c <- merge(stph.meld, stph.clif, by = "Subject")
 stph.c <- merge(stph.c, stph.lille, by = "Subject")
 
+# 
+#path <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/original models/"
+#setwd(path)
+#save(stph.meld, stph.lille, stph.clif, file = "original_models.Rdata")
+#save(stph.c, file = "complete_cases_models.Rdata")
+#save(stph, file = "full_sample.Rdata")
+
 # Tabulate the calculated prognostic scores and survival probabilities
 library(table1)
-table1::table1(~MELD.calc + MELD.surv + MELD.surv2 + MELD3.surv, data = stph.meld)
-table1::table1(~LILLE + Lille.surv, data = stph.lille)
-table1::table1(~CLIF.C + CLIF.surv, data = stph.clif)
-
-
+tb_MELD <- table1(~ MELD.calc + MELD.surv + MELD.surv2 + MELD3.surv | factor(D90_surv), data = stph.meld)
+tb_Lille <- table1(~ LILLE + Lille.surv | factor(D90_surv), data = stph.lille)
+tb_CLIF <- table1(~ CLIF.C + CLIF.surv| factor(D90_surv), data = stph.clif)
+
+library(xtable)
+xtable(as_tibble(tb_MELD))
+xtable(as_tibble(tb_Lille))
+xtable(as_tibble(tb_CLIF))
+
+# KM estimates and plots
+library(ggsurvfit)
+survfit2(Surv(Time_to_death_from_rand, Death_event) ~ 1, data = stph) %>% 
+    ggsurvfit() +
+    labs(
+        x = "Days",
+        y = "Overall survival probability"
+    ) + 
+    add_confidence_interval() +
+    add_risktable()
+
+summary(survfit(Surv(Time_to_death_from_rand, Death_event) ~ 1, data = stph), times = 90)
+summary(survfit(Surv(Time_to_death_from_rand, Death_event) ~ 1, data = stph), times = 183)
+
+survfit(Surv(Time_to_death_from_rand, Death_event) ~ 1, data = stph) %>% 
+    gtsummary::tbl_survfit(
+        times = 90,
+        label_header = "**1-year survival (95% CI)**"
+    )
diff --git a/original models/prognostic_scores_6M.R b/original models/prognostic_scores_6M.R
new file mode 100644
index 0000000..3da8e72
--- /dev/null
+++ b/original models/prognostic_scores_6M.R	
@@ -0,0 +1,102 @@
+######
+# This script calculates the prognostic scores (MELD, Lille, CLIF-C ACLF) and corresponding 90-day survival probabilities.
+library(tidyverse)
+library(dplyr)
+library(ggplot2)
+
+#############################################
+############### MELD score ##################
+#############################################
+#####
+# First constrain INR, Creatinine, and Bilirubin to be within certain bandwidth
+stph.meld$INR <- ifelse(stph.meld$INR < 1, 1, stph.meld$INR)
+stph.meld$Creatinine.mg.dl.MELD <- ifelse(stph.meld$Creatinine.mg.dl < 1, 1, stph.meld$Creatinine.mg.dl)
+stph.meld$Bilirubin.mg.dl.MELD <- ifelse(stph.meld$Bilirubin.mg.dl < 1, 1, stph.meld$Bilirubin.mg.dl)
+stph.meld$Creatinine.mg.dl.MELD <- ifelse(stph.meld$Creatinine.mg.dl > 4, 4, stph.meld$Creatinine.mg.dl)
+
+# Calculate basic MELD score
+stph.meld$MELD.calc <- 0.378*log(stph.meld$Bilirubin.mg.dl.MELD) + 
+    1.120*log(stph.meld$INR) + 
+    0.957*log(stph.meld$Creatinine.mg.dl.MELD) 
+
+# Round score to the nearest tenth
+stph.meld$MELD.calc <- round(stph.meld$MELD.calc, 1)
+
+# Multiply score by ten
+stph.meld$MELD.calc <- 10*stph.meld$MELD.calc
+
+# Constrain serum sodium to be within certain bandwidth
+stph.meld$Sodium <- ifelse(stph.meld$Sodium < 125, 125, stph.meld$Sodium)
+stph.meld$Sodium <- ifelse(stph.meld$Sodium > 137, 137, stph.meld$Sodium)
+
+# Recalculate to get the MELD-Na score
+for(i in 1:nrow(stph.meld)){
+    if(stph.meld$MELD.calc[i] > 11){
+        temp <- stph.meld$MELD.calc[i] + 1.32*(137 - stph.meld$Sodium[i]) - 
+            0.033*stph.meld$MELD.calc[i]*(137 - stph.meld$Sodium[i])
+        stph.meld$MELD.calc[i] <- temp
+    } else {
+        stph.meld$MELD.calc[i] <- stph.meld$MELD.calc[i]
+    }
+}
+
+# Round to nearest integer
+stph.meld$MELD.calc <- round(stph.meld$MELD.calc, 0)
+
+# Constrain MELD to be between 6 and 40
+stph.meld$MELD.calc <- ifelse(stph.meld$MELD.calc < 6, 6, stph.meld$MELD.calc)
+stph.meld$MELD.calc <- ifelse(stph.meld$MELD.calc > 40, 40, stph.meld$MELD.calc)
+
+# Calculate 90-day survival probability based on MELD-Na score (2 different functions)
+stph.meld$MELD.surv_6M <- 0.621^(exp((stph.meld$MELD.calc/10) - 1.127)) 
+#stph.meld$MELD.surv2 <- 0.98465^(exp(0.1635*(stph.meld$MELD.calc - 10)))
+
+#############################################
+######### CLIF-C ACLF Score #################
+#############################################
+#####
+# Start by calculating organ-failure sub-scores
+stph.clif$liver.score <- ifelse(stph.clif$Bilirubin.mg.dl < 6, 1, 2)
+stph.clif$liver.score <- ifelse(stph.clif$Bilirubin.mg.dl < 12, stph.clif$liver.score, 3)
+
+stph.clif$kidney.score <- ifelse(stph.clif$Creatinine.mg.dl < 2, 1, 2)
+stph.clif$kidney.score <- ifelse(stph.clif$Creatinine.mg.dl < 3.5, stph.clif$kidney.score, 3) 
+
+stph.clif$brain.score <- ifelse(stph.clif$HE == 0, 1, 2)
+stph.clif$brain.score <- ifelse(stph.clif$HE > 2, 3, stph.clif$brain.score)
+
+stph.clif$coag.score <- ifelse(stph.clif$INR < 2, 1, 2)
+stph.clif$coag.score <- ifelse(stph.clif$INR < 2.5, stph.clif$coag.score, 3)
+
+stph.clif$circ.score <- ifelse(stph.clif$MAP >= 70, 1, 2)
+
+# Calculate CLIF-OF score, which is the sum of sub-scores
+stph.clif$CLIF.OF <- stph.clif$liver.score + stph.clif$kidney.score + 
+    stph.clif$brain.score + stph.clif$coag.score + stph.clif$circ.score + 1 # respiratory score equal to 1 for all patients
+
+# Calculate the CLIF-C ACLF score and corresponding survival
+stph.clif$CLIF.C <- 10*(0.33*stph.clif$CLIF.OF + 0.04*stph.clif$Age + 0.63*log(stph.clif$WBC) - 2)
+stph.clif$CLIF.surv_6M <- exp(-0.0115 * exp(0.0824*stph.clif$CLIF.C))
+
+#############################################
+################### Lille score #############
+#############################################
+#####
+# First create renal insufficiency dummy
+stph.lille$ren.insuf <- ifelse(stph.lille$Creatinine.mg.dl < 1.3, 0, 1)
+
+# Calculate the change in bilirubin
+stph.lille$delta.bili <- stph.lille$Bilirubin.Merged - stph.lille$Bilirubin.day.7
+
+# Calculate the Lille score and corresponding survival probability
+stph.lille$LILLE <- 3.19 - 0.101*stph.lille$Age + 0.147*stph.lille$Albumin + 0.0165*stph.lille$delta.bili - 
+    0.206*stph.lille$ren.insuf - 0.0065*stph.lille$Bilirubin.Merged - 0.0096*stph.lille$protime
+stph.lille$Lille.surv_6M <- 1 - (exp(-stph.lille$LILLE)/(1 + exp(-stph.lille$LILLE)))
+
+
+#path <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/original models/"
+#setwd(path)
+#save(stph.meld, stph.lille, stph.clif, file = "original_models_6M.Rdata")
+
+
+
diff --git a/original models/prognostic_scores_imp.R b/original models/prognostic_scores_imp.R
new file mode 100644
index 0000000..78bb7ef
--- /dev/null
+++ b/original models/prognostic_scores_imp.R	
@@ -0,0 +1,160 @@
+
+library(readxl)
+library(dplyr)
+
+#############################################
+################ load data ##################
+#############################################
+# imputed data path
+path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/pre-analysis/"
+
+# imputed data - default
+load(paste0(path_data, "imputed_data.Rdata"))
+
+# imputed data - sensitivity analysis
+load(paste0(path_data, "imputed_data_sens.Rdata"))
+
+#####
+# Extract 90-day survival probability from VanDerwerke et al, 2021 
+MELD_VanDerwerken <- read_excel("~/IDrive-Sync/Projects/MIMAH/data/MELD_VanDerwerken.xlsx")
+
+#############################################
+############### MELD score ##################
+#############################################
+# data preparation
+imp_data2 <- imp_data %>% 
+    dplyr::mutate(
+        # Survival indicator
+        D90_surv = 1 - D90_DTH,
+        # MELD
+        INR = ifelse(INR < 1, 1, INR),
+        Creatinine.mg.dl.MELD = ifelse(Creatinine.mg.dl < 1, 1, Creatinine.mg.dl),
+        Bilirubin.mg.dl.MELD = ifelse(Bilirubin.mg.dl < 1, 1, Bilirubin.mg.dl),
+        Creatinine.mg.dl.MELD = ifelse(Creatinine.mg.dl > 4, 4, Creatinine.mg.dl),
+        MELD.calc = (0.378 * log(Bilirubin.mg.dl.MELD) + 1.120 * log(INR) + 0.957 * log(Creatinine.mg.dl.MELD)),
+        MELD.calc = 10 * round(MELD.calc, 1),
+        Sodium = ifelse(Sodium < 125, 125, Sodium),
+        Sodium = ifelse(Sodium > 137, 137, Sodium),
+        MELD.calc = ifelse(MELD.calc > 11, 
+                           (MELD.calc + 1.32 * (137 - Sodium) - 0.033 * MELD.calc * (137 - Sodium)),
+                           MELD.calc),
+        MELD.calc = round(MELD.calc, 0),
+        MELD.calc = ifelse(MELD.calc < 6, 6, MELD.calc),
+        MELD.calc = ifelse(MELD.calc > 40, 40, MELD.calc),
+        MELD.surv1 = 0.707^(exp((MELD.calc/10) - 1.127)),
+        MELD.surv2= 0.98465^(exp(0.1635*(MELD.calc - 10))), 
+        MELD_Van = MELD_VanDerwerken[MELD.calc - 5, ]$SURV,
+        Albumin.MELD = ifelse(Albumin < 1.5, 1.5, Albumin),
+        Albumin.MELD = ifelse(Albumin > 3.5, 3.5, Albumin),
+        MELD3 = 1.33 * Gender + 
+            4.56 * log(Bilirubin.mg.dl.MELD) +
+            0.82 * (137 - Sodium) - 
+            (0.24 * (137 - Sodium) * log(Bilirubin.mg.dl.MELD)) +
+            9.09 * log(INR) + 
+            11.14 * log(Creatinine.mg.dl.MELD) + 
+            1.85 * (3.5 - Albumin.MELD) -
+            (1.83 * (3.5 - Albumin.MELD) * log(Creatinine.mg.dl.MELD)) + 6,
+        MELD3 = round(MELD3, 0),
+        MELD3.surv = 0.946^(exp(0.17698*MELD3 - 3.56)),
+        # CLIF
+        liver.score = ifelse(Bilirubin.mg.dl < 6, 1, 2),
+        liver.score = ifelse(Bilirubin.mg.dl < 12, liver.score, 3),
+        kidney.score = ifelse(Creatinine.mg.dl < 2, 1, 2),
+        kidney.score = ifelse(Creatinine.mg.dl < 3.5, kidney.score, 3), 
+        brain.score = ifelse(HE == 0, 1, 2),
+        brain.score = ifelse(HE > 2, 3, brain.score),
+        coag.score = ifelse(INR < 2, 1, 2),
+        coag.score = ifelse(INR < 2.5, coag.score, 3),
+        circ.score = ifelse(MAP >= 70, 1, 2),
+        CLIF.OF = liver.score + kidney.score + brain.score + coag.score + circ.score + 1, 
+        CLIF.C = 10 * (0.33 * CLIF.OF + 0.04 * Age + 0.63 * log(WBC) - 2),
+        CLIF.surv = exp(-0.0079 * exp(0.0869 * CLIF.C)),
+        # Lille
+        #ren.insuf = ifelse(Creatinine.mg.dl < 1.3, 0, 1), 
+        #Bilirubin.Merged = Bilirubin.mg.dl * 17,
+        #delta.bili = Bilirubin.Merged - Bilirubin.day.7,
+        #LILLE = 3.19 - 0.101 * Age + 0.147 * Albumin + 0.0165 * delta.bili - 0.206 * ren.insuf - 0.0065 * Bilirubin.Merged - 0.0096 * protime,
+        #Lille.surv = 1 - (exp(-LILLE)/(1 + exp(-LILLE)))
+        
+    )
+
+colnames(imp_data2)
+
+imp_data2_Biliday7 <- imp_data_day7 %>% 
+    dplyr::mutate(
+        # Survival indicator
+        D90_surv = 1 - D90_DTH,
+        # Lille
+        ren.insuf = ifelse(Creatinine.mg.dl < 1.3, 0, 1), 
+        Bilirubin.Merged = Bilirubin.mg.dl * 17,
+        delta.bili = Bilirubin.Merged - Bilirubin.day.7,
+        LILLE = 3.19 - 0.101 * Age + 0.147 * Albumin + 0.0165 * delta.bili - 0.206 * ren.insuf - 0.0065 * Bilirubin.Merged - 0.0096 * protime,
+        Lille.surv = 1 - (exp(-LILLE)/(1 + exp(-LILLE)))
+        )
+
+#setwd("~/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/original models")
+#save(imp_data2, imp_data2_Biliday7, file = "imputed_orig_scores.Rdata")
+    
+# data preparation sensitivity analysis
+imp_data3 <- imp_sens_df %>% 
+    dplyr::mutate(
+        # Survival indicator
+        D90_surv = 1 - D90_DTH,
+        # MELD
+        INR = ifelse(INR < 1, 1, INR),
+        Creatinine.mg.dl.MELD = ifelse(Creatinine.mg.dl < 1, 1, Creatinine.mg.dl),
+        Bilirubin.mg.dl.MELD = ifelse(Bilirubin.mg.dl < 1, 1, Bilirubin.mg.dl),
+        Creatinine.mg.dl.MELD = ifelse(Creatinine.mg.dl > 4, 4, Creatinine.mg.dl),
+        MELD.calc = (0.378 * log(Bilirubin.mg.dl.MELD) + 1.120 * log(INR) + 0.957 * log(Creatinine.mg.dl.MELD)),
+        MELD.calc = 10 * round(MELD.calc, 1),
+        Sodium = ifelse(Sodium < 125, 125, Sodium),
+        Sodium = ifelse(Sodium > 137, 137, Sodium),
+        MELD.calc = ifelse(MELD.calc > 11, 
+                           (MELD.calc + 1.32 * (137 - Sodium) - 0.033 * MELD.calc * (137 - Sodium)),
+                           MELD.calc),
+        MELD.calc = round(MELD.calc, 0),
+        MELD.calc = ifelse(MELD.calc < 6, 6, MELD.calc),
+        MELD.calc = ifelse(MELD.calc > 40, 40, MELD.calc),
+        MELD.surv1 = 0.707^(exp((MELD.calc/10) - 1.127)),
+        MELD.surv2= 0.98465^(exp(0.1635*(MELD.calc - 10))), 
+        MELD_Van = MELD_VanDerwerken[MELD.calc - 5, ]$SURV,
+        Albumin.MELD = ifelse(Albumin < 1.5, 1.5, Albumin),
+        Albumin.MELD = ifelse(Albumin > 3.5, 3.5, Albumin),
+        MELD3 = 1.33 * Gender + 
+            4.56 * log(Bilirubin.mg.dl.MELD) +
+            0.82 * (137 - Sodium) - 
+            (0.24 * (137 - Sodium) * log(Bilirubin.mg.dl.MELD)) +
+            9.09 * log(INR) + 
+            11.14 * log(Creatinine.mg.dl.MELD) + 
+            1.85 * (3.5 - Albumin.MELD) -
+            (1.83 * (3.5 - Albumin.MELD) * log(Creatinine.mg.dl.MELD)) + 6,
+        MELD3 = round(MELD3, 0),
+        MELD3.surv = 0.946^(exp(0.17698*MELD3 - 3.56)),
+        # CLIF
+        liver.score = ifelse(Bilirubin.mg.dl < 6, 1, 2),
+        liver.score = ifelse(Bilirubin.mg.dl < 12, liver.score, 3),
+        kidney.score = ifelse(Creatinine.mg.dl < 2, 1, 2),
+        kidney.score = ifelse(Creatinine.mg.dl < 3.5, kidney.score, 3), 
+        brain.score = ifelse(HE == 0, 1, 2),
+        brain.score = ifelse(HE > 2, 3, brain.score),
+        coag.score = ifelse(INR < 2, 1, 2),
+        coag.score = ifelse(INR < 2.5, coag.score, 3),
+        circ.score = ifelse(MAP >= 70, 1, 2),
+        CLIF.OF = liver.score + kidney.score + brain.score + coag.score + circ.score + 1, 
+        CLIF.C = 10 * (0.33 * CLIF.OF + 0.04 * Age + 0.63 * log(WBC) - 2),
+        CLIF.surv = exp(-0.0079 * exp(0.0869 * CLIF.C)),
+        # Lille
+        ren.insuf = ifelse(Creatinine.mg.dl < 1.3, 0, 1), 
+        Bilirubin.Merged = Bilirubin.mg.dl * 17,
+        delta.bili = Bilirubin.Merged - Bilirubin.day.7,
+        LILLE = 3.19 - 0.101 * Age + 0.147 * Albumin + 0.0165 * delta.bili - 0.206 * ren.insuf - 0.0065 * Bilirubin.Merged - 0.0096 * protime,
+        Lille.surv = 1 - (exp(-LILLE)/(1 + exp(-LILLE)))
+        
+    )
+
+colnames(imp_data3)
+dim(imp_data3)
+
+#setwd("~/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/original models")
+#save(imp_data3, file = "imputed_sens_orig_scores.Rdata")
+
diff --git a/original models/sample_size.R b/original models/sample_size.R
new file mode 100644
index 0000000..7fd89c6
--- /dev/null
+++ b/original models/sample_size.R	
@@ -0,0 +1,10 @@
+# Sample sizes for original analysis
+
+# data
+path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/original models/"
+load(paste0(path_data, "original_models.Rdata"))
+
+# overall sample size per model 
+nrow(stph.meld); nrow(stph.lille); nrow(stph.clif)
+
+
diff --git a/original models/val_data/models_validation.Rdata b/original models/val_data/models_validation.Rdata
new file mode 100644
index 0000000..b29b482
Binary files /dev/null and b/original models/val_data/models_validation.Rdata differ
diff --git a/original models/val_data/prognostic_scores_validation.R b/original models/val_data/prognostic_scores_validation.R
new file mode 100644
index 0000000..296e5aa
--- /dev/null
+++ b/original models/val_data/prognostic_scores_validation.R	
@@ -0,0 +1,142 @@
+
+#############################################
+# Load data 
+path <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/pre-analysis/val_data/"
+load(paste0(path, "full_sample_Global_AlcHep.Rdata"))
+
+#############################################
+############### MELD score ##################
+#############################################
+#####
+# First constrain INR, Creatinine, and Bilirubin to be within certain bandwidth
+Global_AlcHep.meld$INR.MELD <- ifelse(Global_AlcHep.meld$INR < 1, 1, Global_AlcHep.meld$INR)
+Global_AlcHep.meld$Creatinine.MELD <- ifelse(Global_AlcHep.meld$Creatinine < 1, 1, 
+                                                   ifelse(Global_AlcHep.meld$Creatinine > 4, 4, 
+                                                          Global_AlcHep.meld$Creatinine))
+#Global_AlcHep.meld$Bilirubin.mg.dl.MELD <- Global_AlcHep.meld$Bilirubin / 10
+Global_AlcHep.meld$Bilirubin.MELD <- ifelse(Global_AlcHep.meld$Bilirubin < 1, 1, 
+                                                  Global_AlcHep.meld$Bilirubin)
+
+# Calculate basic MELD score
+Global_AlcHep.meld$MELD.calc <- 0.378 * log(Global_AlcHep.meld$Bilirubin.MELD) + 
+    1.120 * log(Global_AlcHep.meld$INR.MELD) + 
+    0.957 * log(Global_AlcHep.meld$Creatinine.MELD)
+
+# Round score to the nearest tenth
+Global_AlcHep.meld$MELD.calc <- round(Global_AlcHep.meld$MELD.calc, 1)
+
+# Multiply score by ten
+Global_AlcHep.meld$MELD.calc <- 10 * Global_AlcHep.meld$MELD.calc
+
+# Constrain serum sodium to be within certain bandwidth
+Global_AlcHep.meld$Sodium <- ifelse(Global_AlcHep.meld$Sodium < 125, 125, Global_AlcHep.meld$Sodium)
+Global_AlcHep.meld$Sodium <- ifelse(Global_AlcHep.meld$Sodium > 137, 137, Global_AlcHep.meld$Sodium)
+
+# Recalculate to get the MELD-Na score
+for(i in 1:nrow(Global_AlcHep.meld)){
+    if(Global_AlcHep.meld$MELD.calc[i] > 11){
+        temp <- Global_AlcHep.meld$MELD.calc[i] + 1.32 * (137 - Global_AlcHep.meld$Sodium[i]) - 
+            0.033 * Global_AlcHep.meld$MELD.calc[i]*(137 - Global_AlcHep.meld$Sodium[i])
+        Global_AlcHep.meld$MELD.calc[i] <- temp
+    } else {
+        Global_AlcHep.meld$MELD.calc[i] <- Global_AlcHep.meld$MELD.calc[i]
+    }
+}
+
+# Round to nearest integer
+Global_AlcHep.meld$MELD.calc <- round(Global_AlcHep.meld$MELD.calc, 0)
+
+# Constrain MELD to be between 6 and 40
+Global_AlcHep.meld$MELD.calc <- ifelse(Global_AlcHep.meld$MELD.calc < 6, 6, Global_AlcHep.meld$MELD.calc)
+Global_AlcHep.meld$MELD.calc <- ifelse(Global_AlcHep.meld$MELD.calc > 40, 40, Global_AlcHep.meld$MELD.calc)
+
+# Calculate 90-day survival probability based on MELD-Na score (2 different functions)
+Global_AlcHep.meld$MELD.surv <- 0.707^(exp((Global_AlcHep.meld$MELD.calc/10) - 1.127)) 
+Global_AlcHep.meld$MELD.surv2 <- 0.98465^(exp(0.1635*(Global_AlcHep.meld$MELD.calc - 10)))  
+
+# Extract 90-day survival probability from VanDerwerke et al, 2021 
+library(readxl)
+MELD_VanDerwerken <- read_excel("~/IDrive-Sync/Projects/MIMAH/data/MELD_VanDerwerken.xlsx")
+
+for(i in 1:nrow(Global_AlcHep.meld)){
+    Global_AlcHep.meld$MELD_Van[i] <- MELD_VanDerwerken[Global_AlcHep.meld$MELD.calc[i] - 5, ]$SURV
+}
+
+####
+# Calculate the MELD 3.0 score (sex-adjusted version of the MELD)
+# Constrain Albumin 
+Global_AlcHep.meld$Albumin.MELD <- ifelse(Global_AlcHep.meld$Albumin < 1.5, 1.5, Global_AlcHep.meld$Albumin)
+Global_AlcHep.meld$Albumin.MELD <- ifelse(Global_AlcHep.meld$Albumin> 3.5, 3.5, Global_AlcHep.meld$Albumin)
+
+# Calculate MELD 3.0, round to nearest integer, and calculate corresponding survival
+#Global_AlcHep.meld$MELD3 <- 1.33 * Global_AlcHep.meld$Gender + 
+#    4.56 * log(Global_AlcHep.meld$Bilirubin.mg.dl.MELD) +
+#    0.82 * (137 - Global_AlcHep.meld$Sodium) - 
+#    (0.24 * (137 - Global_AlcHep.meld$Sodium) * log(Global_AlcHep.meld$Bilirubin.mg.dl.MELD)) +
+#    9.09 * log(Global_AlcHep.meld$INR) + 
+#    11.14 * log(Global_AlcHep.meld$Creatinine.mg.dl.MELD) + 
+#    1.85 * (3.5 - Global_AlcHep.meld$Albumin.MELD) -
+#    (1.83 * (3.5 - Global_AlcHep.meld$Albumin.MELD) * log(Global_AlcHep.meld$Creatinine.mg.dl.MELD)) 
+#+ 6
+
+#stph.meld$MELD3 <- round(stph.meld$MELD3, 0)
+#stph.meld$MELD3.surv <- 0.946^(exp(0.17698*stph.meld$MELD3 - 3.56))
+
+#############################################
+################### Lille score #############
+#############################################
+#####
+# First create renal insufficiency dummy
+Global_AlcHep.lille$ren.insuf <- ifelse(Global_AlcHep.lille$Creatinine <= 1.3, 0, 1)
+
+# convert from mg/L to micromole per litre (μmol/L)
+Global_AlcHep.lille$Bilirubin.Lille <- Global_AlcHep.lille$Bilirubin * 17.1 #5.5506216696
+Global_AlcHep.lille$Bilirubin.day.7.Lille <- Global_AlcHep.lille$Bilirubin.day.7 * 17.1 #5.5506216696
+
+# convert from g/dL to g/L
+Global_AlcHep.lille$Albumin.Lille <- Global_AlcHep.lille$Albumin * 10
+    
+# Calculate the change in bilirubin
+Global_AlcHep.lille$delta.bili <- Global_AlcHep.lille$Bilirubin.Lille - Global_AlcHep.lille$Bilirubin.day.7.Lille
+
+# Calculate the Lille score and corresponding survival probability
+Global_AlcHep.lille$LILLE <- 3.19 - 
+    0.101 * Global_AlcHep.lille$Age + 
+    0.147 * Global_AlcHep.lille$Albumin.Lille + 
+    0.0165 * Global_AlcHep.lille$delta.bili - 
+    0.206 * Global_AlcHep.lille$ren.insuf - 
+    0.0065 * Global_AlcHep.lille$Bilirubin.Lille - 
+    0.0096 * Global_AlcHep.lille$protime
+
+Global_AlcHep.lille$Lille.risk <- (exp(-Global_AlcHep.lille$LILLE)/(1 + exp(-Global_AlcHep.lille$LILLE)))
+Global_AlcHep.lille$Lille.surv <- 1 - Global_AlcHep.lille$Lille.risk
+
+#Global_AlcHep.lille[100,] %>% 
+#    select(Bilirubin, Bilirubin.Lille, Age, Bilirubin.day.7, Bilirubin.day.7.Lille, 
+#           Albumin, Creatinine, protime, LILLE, Lille.risk)
+
+#############################################
+############# Add updated models ############
+#############################################
+Global_AlcHep.meld <- Global_AlcHep.meld %>% 
+    mutate(meld_updated = 2.54 - 0.09 * MELD.calc,
+           meld.surv.updated = 1/(1 + exp(- meld_updated))) 
+Global_AlcHep.lille <- Global_AlcHep.lille %>% 
+    mutate(lille_updated = 0.98 + 0.36 * LILLE,
+           lille.surv.updated = 1/(1 + exp(- lille_updated))) 
+
+#path <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/original models/val_data"
+#setwd(path)
+#save(Global_AlcHep.meld, Global_AlcHep.lille, file = "models_validation.Rdata")
+
+# Tabulate the calculated prognostic scores and survival probabilities
+library(table1)
+tb_MELD <- table1(~ MELD.calc + MELD.surv + MELD.surv2 | factor(D90_surv), data = Global_AlcHep.meld)
+tb_Lille <- table1(~ LILLE + Lille.surv | factor(D90_surv), data = Global_AlcHep.lille)
+
+library(xtable)
+xtable(as_tibble(tb_MELD))
+xtable(as_tibble(tb_Lille))
+
+
+
diff --git a/pre-analysis/.DS_Store b/pre-analysis/.DS_Store
new file mode 100755
index 0000000..5008ddf
Binary files /dev/null and b/pre-analysis/.DS_Store differ
diff --git a/pre-analysis/full_sample.Rdata b/pre-analysis/full_sample.Rdata
new file mode 100755
index 0000000..bd82c7d
Binary files /dev/null and b/pre-analysis/full_sample.Rdata differ
diff --git a/pre-analysis/import_data.R b/pre-analysis/import_data.R
old mode 100644
new mode 100755
index 7e54aa9..063f060
--- a/pre-analysis/import_data.R
+++ b/pre-analysis/import_data.R
@@ -4,9 +4,11 @@ library(tidyverse)
 library(dplyr)
 
 # Load data 
-load("stopah_plus_scores.Rdata")
+path <- "/Users/work/IDrive-Sync/Projects/MIMAH/data"
+load(paste0(path, "/stopah_plus_scores.Rdata"))
 stph <- rename(data_stph_prelim3)
 
+
 # Rename relevant variables
 stph <- rename(stph, Bilirubin.mg.dl = Bilirubin.Merged..mg.dL..Merged..calc.)
 stph <- rename(stph, Creatinine = Creatinine...Merged)
@@ -16,12 +18,47 @@ stph <- rename(stph, INR = INR...Merged.clinical.and.calc)
 stph <- rename(stph, protime = Prothrombin.Time..patient....Merged) # Prothrombin time
 stph <- rename(stph, HE = Hepatic.Encephalopathy...Merged) # Brain function
 stph <- rename(stph, Sodium = Sodium...Merged)
-
+    
 # Create survival variable
 stph$D90_surv <- 1 - stph$D90_DTH
 
 # Transform creatinine into mg/dl
-stph$Creatinine.mg.dl <- 0.0113*stph$Creatinine
+stph$Creatinine.mg.dl <- 0.0113 * stph$Creatinine
+
+# Time-to-event
+stph <- stph %>% 
+    mutate(Date_of_death = as.Date(Date_of_death, format = "%d/%m/%y"),
+           Date_rand = as.Date(Treatment.start.date..randomisation.date.if.rx.start.date.missing., format = "%d/%m/%y"),
+           Date_Last_day_study_contact = as.Date(Last_day_study_contact,  format = "%d-%B-%y"),
+           Death_event = ifelse(is.na(Date_of_death), 0, 1), 
+           Time_to_death_from_rand = ifelse(Death_event == 1, 
+                                            (Date_of_death - Date_rand), (Date_Last_day_study_contact - Date_rand))) %>%
+    mutate(M6_DTH = ifelse((Death_event == 1 & Time_to_death_from_rand <= 183), 1, 0), # Calculate 6-month death indicator
+           M6_surv = 1 - M6_DTH) # Calculate 6-month survival indicator
+
+#stph2$Time_to_death_from_rand
+#stph2$Death_event
+#stph2 %>% filter(Death_event == 0 & Time_to_death_from_rand < 90) %>% 
+#    select(Time_to_death_from_rand, Death_event, Date_of_death, 
+#           Time.to.death..calculated.by.MRIS.retrieved.data., Date_Last_day_study_contact, Date_rand) #%>% 
+    #summarise(n())
+
+#stph$Last_day_study_contact[1]
+#as.Date(stph$Last_day_study_contact[1], format = "%d-%B-%y")
+#stph2 <- stph %>% mutate(Event = ifelse(is.na(Time.to.death..calculated.by.MRIS.retrieved.data.), 0, 1))
+#stph2 %>% filter(Time.to.death..calculated.by.MRIS.retrieved.data. < 90)
+#range(stph$Time.to.death..calculated.by.MRIS.retrieved.data., na.rm = T)
+#stph$Y1_DTH
+#plot(density(na.omit(stph$Time.to.death..calculated.by.MRIS.retrieved.data.)))
+#as.Date(Date_of_death, format = "%d/%m/%y")
+#ind <-str_detect(colnames(stph), "rand")
+#stph[,ind]
+
+# check last day contact 
+#The trial is currently recruiting. Recruitment commenced
+#on 1 February 2011 and will finish on 28 February 2014 - 
+#from https://link.springer.com/content/pdf/10.1186/1745-6215-14-262.pdf
+max(stph$Date_Last_day_study_contact)
 
 # Handle missing data and create data frames for each of the prognostic scores
 stph.meld <- stph[complete.cases(stph$Bilirubin.mg.dl, stph$Creatinine, stph$INR, stph$Sodium),]
@@ -30,13 +67,44 @@ stph.clif <- stph[complete.cases(stph$Bilirubin.mg.dl, stph$Creatinine, stph$INR
 stph.lille <- stph[complete.cases(stph$Bilirubin.day.7, stph$Bilirubin.Merged, stph$Creatinine,
                                   stph$protime, stph$Albumin),]
 
+#path <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/pre-analysis/"
+#setwd(path)
+#save(stph, file = "full_sample.Rdata")
+
 # Create table of descriptive statistics and number of missing values per variable
 library(table1)
 # Some factor variables
 stph$HE_f <- as.factor(stph$HE) 
 stph$D90_surv_f <- as.factor(stph$D90_surv)
 stph$Gender_f <- as.factor(stph$Gender)
-table1::table1(~Bilirubin.mg.dl + Creatinine.mg.dl + Albumin + WBC + protime + 
-                 INR + HE_f + Bilirubin.day.7 + Gender_f + D90_surv_f + Sodium, data = stph)
+# table stratified by outcome
+tb1 <- table1::table1(~ Age.at.randomisation..calc. + Bilirubin.mg.dl + Creatinine.mg.dl + Albumin + WBC + protime + 
+                          INR + HE_f + Bilirubin.day.7 + Gender_f + Sodium | factor(D90_surv_f), data = stph)
+
+library(xtable)
+xtable(as_tibble(tb1))
+
+# 90-day death rate
+mean(stph$D90_DTH)
+
+# KM estimates and plots
+library(ggsurvfit)
+survfit2(Surv(Time_to_death_from_rand, Death_event) ~ 1, data = stph) %>% 
+    ggsurvfit() +
+    labs(
+        x = "Days",
+        y = "Overall survival probability"
+    ) + 
+    add_confidence_interval() +
+    add_risktable()
+
+summary(survfit(Surv(Time_to_death_from_rand, Death_event) ~ 1, data = stph), times = 90)
+summary(survfit(Surv(Time_to_death_from_rand, Death_event) ~ 1, data = stph), times = 183)
+
+survfit(Surv(Time_to_death_from_rand, Death_event) ~ 1, data = stph) %>% 
+    gtsummary::tbl_survfit(
+        times = 90,
+        label_header = "**1-year survival (95% CI)**"
+    )
 
 
diff --git a/pre-analysis/imputation.R b/pre-analysis/imputation.R
new file mode 100644
index 0000000..b1aec17
--- /dev/null
+++ b/pre-analysis/imputation.R
@@ -0,0 +1,168 @@
+
+##################################################
+###################### pkgs ######################
+##################################################
+library(mice)
+library(dplyr)
+library(purrr)
+library(forcats)
+library(VIM)
+library(ggplot2)
+library(gridExtra)
+
+##################################################
+###################### data ######################
+##################################################
+path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/pre-analysis/"
+load(paste0(path_data, "full_sample.Rdata"))
+
+# select patients that died within 7 days of randomisation
+dead_before_day7 <- stph %>% filter(Time_to_death_from_rand <= 7 & Death_event == 1)
+
+# these are 49 in total
+dead_before_day7 %>% summarise(n())
+
+# exclude patients that died within 7 days from analysis
+stph_day7 <- stph %>% filter(! (Subject %in% dead_before_day7$Subject))
+    
+##################################################
+################## logistic ######################
+##################################################
+# association between MELD and missing bilirubin day 7
+stph_missing_day7 <- stph_day7 %>% mutate(miss_bili_day7 = ifelse(is.na(Bilirubin.day.7), 1, 0))
+
+fit <- glm(miss_bili_day7 ~ MELD, data = stph_missing_day7, family = binomial("logit"))
+exp(coef(fit)) # odds ratio
+exp(confint.default(fit)) # CIs
+summary(fit)
+
+##################################################
+################ imputation ######################
+##################################################
+# vars to use in the imputation
+vars_MELD <- c("Creatinine.mg.dl", "Bilirubin.mg.dl", "INR", "Sodium")
+vars_MELD3 <- c(vars_MELD, "Albumin", "Gender")
+vars_Lille_without_Bili7 <- c("Age", "Albumin", "Bilirubin.mg.dl", "protime")
+vars_CLIF <- c("Bilirubin.mg.dl", "Creatinine.mg.dl", "HE", "INR", "MAP", "Age", "WBC")
+
+vars_Lille_with_Bili7 <- c("Age", "Albumin", "Bilirubin.day.7", "Bilirubin.mg.dl", "protime")
+
+# select dataframe for all models excluding Bilirubin.day.7
+stph_short <- stph %>% 
+    rename(Age = Age.at.randomisation..calc.) %>%
+    select(D90_DTH, all_of(vars_MELD), all_of(vars_MELD3), all_of(vars_Lille_without_Bili7), all_of(vars_CLIF))
+
+#select dataframe for Lille
+stph_short_BiliDay7 <- stph %>% 
+    filter(! (Subject %in% dead_before_day7$Subject)) %>% #exclude patients that died within 7 days from analysis
+    rename(Age = Age.at.randomisation..calc.) %>%
+    select(D90_DTH, all_of(vars_MELD), all_of(vars_MELD3), all_of(vars_Lille_with_Bili7), all_of(vars_CLIF))
+
+# create df for plot of missing values
+stph_plot <- stph %>% 
+    rename(Age = Age.at.randomisation..calc.) %>%
+    select(D90_DTH, all_of(vars_MELD), all_of(vars_MELD3), all_of(vars_Lille_with_Bili7), all_of(vars_CLIF))
+
+# inspect the missing data pattern
+#md.pattern(stph_short, rotate.names = T)
+
+# missingness plot
+a <- aggr(stph_plot, plot = F)
+#plot(a, numbers = TRUE, prop = FALSE, only.miss = T, combined = T)
+
+a$missings %>% filter(Count != 0) %>% # filter out complete obs
+    mutate(Variable = 
+               case_when(
+                   Variable == "Bilirubin.mg.dl" ~ "Bilirubin day 0",
+                   Variable == "Creatinine.mg.dl" ~ "Creatinine",
+                   Variable == "Bilirubin.day.7" ~ "Bilirubin day 7", 
+                   Variable == "proctime" ~ "Prothrombin time", 
+                   TRUE ~ Variable)) %>%
+    mutate(Variable = fct_reorder(Variable, Count)) %>%  # reorder factor
+    ggplot(., aes(x = Variable, y = Count)) +
+    geom_bar(stat = "identity") +
+    geom_text(aes(label = Count), hjust = -0.5, size = 3.5) +
+    labs(x = "") +
+    coord_flip() +
+    theme_classic(12) +
+    theme(axis.text.x = element_blank(),
+          axis.ticks.x = element_blank()) 
+
+# multiple imputation
+imp <- mice(stph_short, m = 20, method = "pmm", seed = 5678)
+
+imp_day7 <- mice(stph_short_BiliDay7, m = 20, method = "pmm", seed = 5678)
+
+# predictor matrix
+# https://www.gerkovink.com/miceVignettes/Convergence_pooling/Convergence_and_pooling.html 
+imp$pred
+
+# inspect the convergence of the algorithm
+plot(imp)
+
+# imputation method
+imp$method #pmm method
+
+# diagnostic checking
+stripplot(imp, Bilirubin.day.7 ~ .imp, pch = 20, cex = 2)
+stripplot(imp)
+
+summary(stph_short$Bilirubin.day.7)
+summary(with(imp, mean(Bilirubin.day.7)))
+
+densityplot(imp, ~ Bilirubin.day.7)
+
+# collection of the m imputed data sets 
+imp_data <- complete(imp, action = "long")
+imp_data_day7 <- complete(imp_day7, action = "long")
+
+imp_data[which(is.na(imp_data)),]
+
+#setwd("~/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/pre-analysis")
+#save(imp_data, imp_data_day7, file = "imputed_data.Rdata")
+
+###########################################################
+################### Sensitivity analysis ##################
+###########################################################
+# create  vector that represent the following adjustment values 
+#for Bili day 7: 0 for MAR, and -50, -100, and -200 for MNAR.
+delta <- c(0, -50, -100, -200)
+
+# perform a dry run (using maxit = 0)
+ini <- mice(stph_short_BiliDay7, maxit = 0)
+
+imp.all <- vector("list", length(delta))
+post <- ini$post
+for (i in 1:length(delta)){
+    d <- delta[i]
+    cmd <- paste("imp[[j]][,i] <- squeeze(imp[[j]][,i] +", d, ",c(0, 1061))") #squeeze values to observed ranges
+    post["Bilirubin.day.7"] <- cmd
+    imp <- mice(stph_short_BiliDay7, post = post, m = 20, maxit = 30, seed = i, print = FALSE)
+    imp.all[[i]] <- imp
+}
+
+#bwplot(imp.all[[1]])
+#bwplot(imp.all[[4]])
+
+p1 <- densityplot(imp.all[[1]], ~ Bilirubin.day.7, lwd = 3, xlab = "", ylab = "", main = expression(paste(delta, " = 0")))
+p2 <- densityplot(imp.all[[2]], ~ Bilirubin.day.7, lwd = 3, xlab = "", ylab = "", main = expression(paste(delta, " = -50")))
+p3 <- densityplot(imp.all[[3]], ~ Bilirubin.day.7, lwd = 3, xlab = "", ylab = "", main = expression(paste(delta, " = -100")))
+p4 <- densityplot(imp.all[[4]], ~ Bilirubin.day.7, lwd = 3, xlab = "", ylab = "", main = expression(paste(delta, " = -200")))
+
+grid.arrange(p1, p2, p3, p4, nrow = 1, 
+             bottom = textGrob("Bilirubin day 7", gp = gpar(fontsize = 13), vjust = -1.5),
+             left = textGrob("Density", gp = gpar(fontsize = 13), rot = 90, vjust = 1.5, hjust = 0)
+             )
+
+#densityplot(imp.all[[1]], ~ Bilirubin.day.7, lwd = 3)
+#densityplot(imp.all[[4]], ~ Bilirubin.day.7, lwd = 3)
+#summary(with(imp.all[[3]], range(Bilirubin.day.7)))
+
+## post-process
+names(imp.all) <- delta # add names to list
+complete_list <- lapply(imp.all, complete, action = "long")
+imp_sens_df <- map_df(complete_list, ~ as.data.frame(.x), .id = "delta")
+
+#setwd("~/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/pre-analysis")
+#save(imp.all, imp_sens_df, file = "imputed_data_sens.Rdata")
+
diff --git a/pre-analysis/imputed_data.Rdata b/pre-analysis/imputed_data.Rdata
new file mode 100644
index 0000000..76dcbf7
Binary files /dev/null and b/pre-analysis/imputed_data.Rdata differ
diff --git a/pre-analysis/imputed_data_sens.Rdata b/pre-analysis/imputed_data_sens.Rdata
new file mode 100644
index 0000000..0372d9f
Binary files /dev/null and b/pre-analysis/imputed_data_sens.Rdata differ
diff --git a/pre-analysis/missing_data.R b/pre-analysis/missing_data.R
old mode 100644
new mode 100755
diff --git a/pre-analysis/val_data/full_sample_Global_AlcHep.Rdata b/pre-analysis/val_data/full_sample_Global_AlcHep.Rdata
new file mode 100644
index 0000000..99682fb
Binary files /dev/null and b/pre-analysis/val_data/full_sample_Global_AlcHep.Rdata differ
diff --git a/pre-analysis/val_data/import_data_validation.R b/pre-analysis/val_data/import_data_validation.R
new file mode 100644
index 0000000..16a4ea3
--- /dev/null
+++ b/pre-analysis/val_data/import_data_validation.R
@@ -0,0 +1,94 @@
+######
+# This script reads in the Global_AlcHep data and creates workable data frames
+library(readxl)
+library(tidyverse)
+library(dplyr)
+
+# Load data 
+path <- "/Users/work/IDrive-Sync/Projects/MIMAH/data"
+Global_AlcHep <- read_excel(paste0(path, "/Global_AlcHep.xlsx"))
+
+# Rename relevant variables
+Global_AlcHep <- rename(Global_AlcHep, Bilirubin = Bili_admission) # mg/L
+Global_AlcHep <- rename(Global_AlcHep, Creatinine = "Creatinine\r\nadmission") # mg/dL or mg/L
+Global_AlcHep <- rename(Global_AlcHep, Albumin = Alb_admission) #g/dL
+Global_AlcHep <- rename(Global_AlcHep, INR = INR_admission) 
+Global_AlcHep <- rename(Global_AlcHep, protime = PT_admission) # Prothrombin time (sec)
+Global_AlcHep <- rename(Global_AlcHep, Sodium = "Sodium at admission")
+Global_AlcHep <- rename(Global_AlcHep, Bilirubin.day.7 = "Total bili day  7")
+
+# to check again - the discrepancy
+#diff <- (Global_AlcHep$`INR admission` - Global_AlcHep$INR_admission)
+#ind <- which(diff !=0 )
+#Global_AlcHep[ind, ]
+
+# Create survival variable
+Global_AlcHep <- rename(Global_AlcHep, D90_surv = "Alive at day 90\r\n")
+Global_AlcHep$D90_DTH <- 1 - Global_AlcHep$D90_surv
+
+# Time-to-event
+Global_AlcHep <- Global_AlcHep %>% 
+    mutate(Date_of_death = as.Date(`Date of death`, format = "%d/%m/%y"),
+           Date_rand = as.Date(`Date of admission`, format = "%d/%m/%y"),
+           Date_Last_day_study_contact = as.Date(`Last follow up`,  format = "%d-%B-%y"),
+           Death_event = ifelse(is.na(Date_of_death), 0, 1), 
+           Time_to_death_from_rand = ifelse(Death_event == 1, 
+                                            (Date_of_death - Date_rand), (Date_Last_day_study_contact - Date_rand))) 
+
+# Handle missing data and create data frames for each of the prognostic scores
+Global_AlcHep.meld <- Global_AlcHep[complete.cases(Global_AlcHep$Bilirubin, 
+                                                   Global_AlcHep$Creatinine, 
+                                                   Global_AlcHep$INR, 
+                                                   Global_AlcHep$Sodium),]
+
+Global_AlcHep.lille <- Global_AlcHep[complete.cases(Global_AlcHep$Bilirubin.day.7, 
+                                                    Global_AlcHep$Bilirubin, 
+                                                    Global_AlcHep$Creatinine,
+                                                    Global_AlcHep$protime, 
+                                                    Global_AlcHep$Albumin),]
+
+#path <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/pre-analysis/val_data"
+#setwd(path)
+#save(Global_AlcHep.meld, Global_AlcHep.lille, file = "full_sample_Global_AlcHep.Rdata")
+
+# Create table of descriptive statistics and number of missing values per variable
+library(table1)
+# Some factor variables
+Global_AlcHep$D90_surv_f <- as.factor(Global_AlcHep$D90_surv)
+Global_AlcHep$Gender_f <- as.factor(Global_AlcHep$Gender)
+# table stratified by outcome
+tb1 <- table1::table1(~ Age + Bilirubin + Creatinine + Albumin + protime + 
+                          INR + Bilirubin.day.7 + Gender_f + Sodium | factor(D90_surv_f), data = Global_AlcHep)
+tb1
+    
+    
+library(xtable)
+xtable(as_tibble(tb1))
+
+# 90-day death rate
+mean(Global_AlcHep$D90_DTH)
+
+# 
+unique(Global_AlcHep$Country)
+
+# KM estimates and plots
+library(ggsurvfit)
+survfit2(Surv(Time_to_death_from_rand, Death_event) ~ 1, data = Global_AlcHep) %>% 
+    ggsurvfit() +
+    labs(
+        x = "Days",
+        y = "Overall survival probability"
+    ) + 
+    add_confidence_interval() +
+    add_risktable()
+
+summary(survfit(Surv(Time_to_death_from_rand, Death_event) ~ 1, data = Global_AlcHep), times = 90)
+summary(survfit(Surv(Time_to_death_from_rand, Death_event) ~ 1, data = Global_AlcHep), times = 183)
+
+survfit(Surv(Time_to_death_from_rand, Death_event) ~ 1, data = Global_AlcHep) %>% 
+    gtsummary::tbl_survfit(
+        times = 90,
+        label_header = "**1-year survival (95% CI)**"
+    )
+
+
diff --git a/subgroup comparison/.DS_Store b/subgroup comparison/.DS_Store
new file mode 100644
index 0000000..5008ddf
Binary files /dev/null and b/subgroup comparison/.DS_Store differ
diff --git a/subgroup comparison/subgroup_age.R b/subgroup comparison/subgroup_age.R
new file mode 100755
index 0000000..7fb31bb
--- /dev/null
+++ b/subgroup comparison/subgroup_age.R	
@@ -0,0 +1,170 @@
+
+library(pROC)
+library(ggplot2)
+library(dplyr)
+
+# data original models
+#path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/original models/"
+#load(paste0(path_data, "original_models.Rdata"))
+
+# data recalibrated models
+path_data_rec <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/updating models"
+load(paste0(path_data_rec, "/recalibrated_models_default.Rdata"))
+
+# funs
+path_funs <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/funs"
+source(paste0(path_funs, "/calibration_fun.R"))
+
+
+age_cut_off <- 49 # stratify by the median age of 49 years
+
+test.y <- test.data[test.data$Age.at.randomisation..calc. < age_cut_off,]
+test.o <- test.data[test.data$Age.at.randomisation..calc. >= age_cut_off,]
+
+# choose age-group to calculate statistics 
+age_group <- "o" # "y" or "o"
+
+if(age_group == "y"){
+    dataset <- test.y 
+}else if(age_group == "o"){
+    dataset <- test.o
+}
+
+#### Discrimination ###############
+# MELD updated
+roc_meld <- roc(dataset$D90_DTH, dataset$meld.surv.updated)
+# CLIF - updated
+roc_clif <- roc(dataset$D90_DTH, dataset$clif.surv.updated)
+# Lille - updated 
+roc_lille <- roc(dataset$D90_DTH, dataset$lille.surv.updated)
+
+## Plots  ###############
+roc.list <- list("MELD updated" = roc_meld, 
+                 #"MELD 3.0" = roc_meld3,
+                 "CLIF-C ACLF updated" = roc_clif, 
+                 "Lille updated" = roc_lille)
+
+#save(roc.list, file = "ROC_updated.Rdata")
+
+g.list <- ggroc(roc.list)
+
+# ROC plot of all models combined
+g.list +
+    geom_line(lwd = 1) +
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") + 
+    theme_classic()
+
+# faceting 
+g.list + 
+    facet_grid(. ~ name) + 
+    ggtitle(paste0("age_group ", age_group)) +
+    geom_line(lwd = 1) + 
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") +
+    theme_classic() + 
+    theme(legend.position="none") 
+
+# add confidence bands
+ci.list <- lapply(roc.list, ci.se, specificities = seq(0, 1, l = 25))
+
+dat.ci.list <- lapply(ci.list, function(ciobj) 
+    data.frame(x = as.numeric(rownames(ciobj)),
+               lower = ciobj[, 1],
+               upper = ciobj[, 3]))
+
+df <- plyr::ldply(dat.ci.list, data.frame, .id = "name")
+
+ggroc(roc.list) + 
+    facet_grid(. ~ name) +
+    theme_minimal() + 
+    ggtitle(paste0("age_group ", age_group)) +
+    geom_ribbon(data = df, aes(x = x, ymin = lower, ymax = upper, fill = name), alpha = 0.3, inherit.aes = F) +
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") + 
+    labs(x = "Specificity", y = "Sensitivity") + 
+    coord_equal() +
+    theme_classic() +
+    theme(legend.position = "none") 
+
+#############################################   
+###################### AUC  #################
+#############################################
+df_AUC <- as.data.frame(map_dfr(roc.list, ci.auc))
+rownames(df_AUC) <- c("low_CL", "mean", "upper_CL")
+
+df_AUC2 <- tibble::rownames_to_column(df_AUC, var = "AUC")
+
+df3 <- gather(df_AUC2, condition, measurement, `MELD updated`:`Lille updated`, factor_key = TRUE)
+data_wide <- spread(df3, AUC, measurement) %>% arrange(mean)
+
+# reorder factor levels
+data_wide$condition <- fct_reorder(data_wide$condition, data_wide$mean)
+
+p_auc_young <- ggplot(data_wide, aes(x = mean, y = condition, col = condition)) +
+    #ggtitle(age_group) +
+    geom_point(lwd = 2)  + 
+    coord_cartesian(xlim = c(0.5, 0.86)) +
+    geom_errorbar(aes(xmin = low_CL, xmax = upper_CL), 
+                  alpha = 1, show.legend = F, lwd = 1, width = 0.5) + 
+    labs(y = "Score", col = "Score", x = "AUC with 95% limits") +
+    scale_color_brewer(palette = "Dark2") +
+    scale_fill_brewer(palette = "Dark2") + 
+    theme_classic() 
+
+#### Calibration ###############
+# MELD updated 
+cal_meld <- calibration(dataset$meld.surv.updated, dataset$D90_surv)
+cal_meld$Score <- "MELD updated"
+# CLIF - updated
+cal_clif <-  calibration(dataset$clif.surv.updated, dataset$D90_surv)
+cal_clif$Score <- "CLIF-C ACLF updated"
+# Lille - updated 
+cal_lille <- calibration(dataset$lille.surv.updated, dataset$D90_surv)
+cal_lille$Score <- "Lille updated"
+
+# combine dfs
+df_cal <- rbind(cal_meld, cal_clif, cal_lille)
+
+df_cal$Score <- factor(df_cal$Score, labels = levels(data_wide$condition))
+
+# plot without ribbon 
+df_cal %>%
+    ggplot(., aes(x = pred, y = obs, col = Score)) +
+    ggtitle(paste0("age_group ", age_group)) +
+    geom_line(lwd = 1)  + 
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    #geom_ribbon(aes(ymin = lower, ymax = upper, linetype = NA), 
+    #            alpha = 0.3, show.legend = F) + 
+    #scale_fill_manual("", values = col) + 
+    #scale_color_manual(name = "Score", values = col) + 
+    xlim(0, 1) + 
+    ylim(0, 1) + 
+    ylab("Observed proportion") + 
+    xlab("Predicted probability") + 
+    theme_classic() 
+
+# plot with ribbon 
+p_calibration_young <- df_cal %>%
+    ggplot(., aes(x = pred, y = obs, col = Score)) +
+    geom_line(lwd = 1)  + 
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    geom_ribbon(aes(ymin = lower, ymax = upper, fill = Score, linetype = NA),  
+                alpha = 0.3, show.legend = F) + 
+    #scale_fill_manual("", values = col) + 
+    #scale_color_manual(name = "Score", values = col) + 
+    facet_grid(. ~ Score) +
+    coord_equal() +
+    xlim(0, 1) + 
+    ylim(0, 1) + 
+    scale_color_brewer(palette = "Dark2") +
+    scale_fill_brewer(palette = "Dark2") + 
+    ylab("Observed survival proportion") + 
+    xlab("Predicted survival probability") + 
+    theme_classic() 
+
+
+##### combine plots
+source("/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/grid_arrange_fun.R")
+library(gtable)
+p1 <- grid_arrange_shared_legend(p_auc_young, p_calibration_young)
+
+p2 <- grid_arrange_shared_legend(p_auc_old, p_calibration_old)
+    
diff --git a/subgroup comparison/subgroup_comparison.R b/subgroup comparison/subgroup_comparison.R
old mode 100644
new mode 100755
index fa77439..e332a76
--- a/subgroup comparison/subgroup_comparison.R	
+++ b/subgroup comparison/subgroup_comparison.R	
@@ -7,11 +7,12 @@ source("dca.R")
 
 ######
 # Comparison of ROC between MELD3.0 and original MELD
-roc_meld_u <- roc(test.meld$D90_surv, test.meld$meld.surv.updated)
-roc_meld3 <- roc(stph.meld$D90_DTH, stph.meld$MELD3.surv)
+#roc_meld_u <- roc(test.meld$D90_surv, test.meld$meld.surv.updated)
+#roc_meld3 <- roc(stph.meld$D90_DTH, stph.meld$MELD3.surv)
+
+#roc.test(roc_meld3, roc_meld)
+#roc.test(roc_meld3, roc_meld_u)
 
-roc.test(roc_meld3, roc_meld)
-roc.test(roc_meld3, roc_meld_u)
 
 ######
 # Sub-group based on sex 
diff --git a/subgroup comparison/subgroup_sex.R b/subgroup comparison/subgroup_sex.R
new file mode 100755
index 0000000..b310152
--- /dev/null
+++ b/subgroup comparison/subgroup_sex.R	
@@ -0,0 +1,207 @@
+
+library(pROC)
+library(survminer) # for plotting theme
+
+# data original models
+path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/original models/"
+load(paste0(path_data, "original_models.Rdata"))
+
+# data recalibrated models
+path_data_rec <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/updating models"
+load(paste0(path_data_rec, "/recalibrated_models_default.Rdata"))
+
+# funs
+path_funs <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/funs"
+source(paste0(path_funs, "/calibration_fun.R"))
+
+################################################################################
+############################ Sub-group based on sex ############################  
+################################################################################
+# Calculations on the MELD 3.0 score - first create two separate dfs per sex
+stph.meld.males <- stph.meld[stph.meld$Gender == 0,]
+stph.meld.females <- stph.meld[stph.meld$Gender == 1,]
+
+# Calculations on the updated MELD score
+test.males <- test.data[test.data$Gender == 0,]
+test.females <- test.data[test.data$Gender == 1,]
+
+# choose sex to calculate statistics 
+sex <- "m" # or "f
+#sex <- "m" # or "f
+
+if(sex == "m"){
+    dataset_original <- stph.meld.males 
+    dataset_updated <- test.males
+    }else if(sex == "f"){
+        dataset_original <- stph.meld.females
+        dataset_updated <- test.females
+        }
+
+#### Discrimination ###############
+# MELD 3.0 - original 
+roc_meld3 <- roc(dataset_original$D90_DTH, dataset_original$MELD3.surv)
+# MELD updated
+roc_meld <- roc(dataset_updated$D90_DTH, dataset_updated$meld.surv.updated)
+# CLIF - updated
+roc_clif <- roc(dataset_updated$D90_DTH, dataset_updated$clif.surv.updated)
+# Lille - updated 
+roc_lille <- roc(dataset_updated$D90_DTH, dataset_updated$lille.surv.updated)
+
+   
+## Plots  ###############
+roc.list <- list("MELD updated" = roc_meld, 
+                   "MELD 3.0" = roc_meld3,
+                   "CLIF-C ACLF updated" = roc_clif, 
+                   "Lille updated" = roc_lille)
+
+#save(roc.list, file = "ROC_updated.Rdata")
+
+g.list <- ggroc(roc.list)
+
+# ROC plot of all models combined
+g.list +
+    geom_line(lwd = 1) +
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") + 
+    theme_classic()
+
+# faceting 
+g.list + 
+    facet_grid(. ~ name) + 
+    ggtitle(sex) +
+    geom_line(lwd = 1) + 
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") +
+    theme_classic2() + 
+    scale_color_brewer(palette = "Dark2") +
+    scale_fill_brewer(palette = "Dark2") + 
+    theme_classic() + 
+    theme(legend.position="none") 
+
+# add confidence bands
+ci.list <- lapply(roc.list, ci.se, specificities = seq(0, 1, l = 25))
+
+dat.ci.list <- lapply(ci.list, function(ciobj) 
+    data.frame(x = as.numeric(rownames(ciobj)),
+               lower = ciobj[, 1],
+               upper = ciobj[, 3]))
+
+df <- plyr::ldply(dat.ci.list, data.frame, .id = "name")
+
+ggroc(roc.list) + 
+    facet_grid(. ~ name) +
+    theme_minimal() + 
+    ggtitle(sex) +
+    geom_ribbon(data = df, aes(x = x, ymin = lower, ymax = upper, fill = name), alpha = 0.3, inherit.aes = F) +
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") + 
+    labs(x = "Specificity", y = "Sensitivity") + 
+    coord_equal() +
+    theme_classic() +
+    theme(legend.position = "none") 
+
+#############################################   
+###################### AUC  #################
+#############################################
+df_AUC <- as.data.frame(map_dfr(roc.list, ci.auc))
+rownames(df_AUC) <- c("low_CL", "mean", "upper_CL")
+
+df_AUC2 <- tibble::rownames_to_column(df_AUC, var = "AUC")
+
+df3 <- gather(df_AUC2, condition, measurement, `MELD updated`:`Lille updated`, factor_key = TRUE)
+data_wide <- spread(df3, AUC, measurement) %>% arrange(mean)
+
+# reorder factor levels
+data_wide$condition <- fct_reorder(data_wide$condition, data_wide$mean)
+
+p_auc_females <- ggplot(data_wide, aes(x = mean, y = condition, col = condition)) +
+    #ggtitle(sex) +
+    geom_point(lwd = 2)  + 
+    coord_cartesian(xlim = c(0.5, 0.86)) +
+    geom_errorbar(aes(xmin = low_CL, xmax = upper_CL), 
+                  alpha = 1, show.legend = F, lwd = 1, width = 0.5) + 
+    labs(y = "Score", col = "Score", x = "AUC with 95% limits") +
+    scale_color_brewer(palette = "Dark2") +
+    scale_fill_brewer(palette = "Dark2") + 
+    theme_classic2() +
+    theme(legend.position = "none") 
+    theme_classic() 
+
+#### Calibration ###############
+# MELD 3.0 - original 
+cal_meld3 <- calibration(dataset_original$MELD3.surv[!is.na(dataset_original$MELD3.surv)], dataset_original$D90_surv[!is.na(dataset_original$MELD3.surv)])
+cal_meld3$Score <- "MELD 3.0"
+# MELD updated 
+cal_meld <- calibration(dataset_updated$meld.surv.updated, dataset_updated$D90_surv)
+cal_meld$Score <- "MELD updated"
+# CLIF - updated
+cal_clif <-  calibration(dataset_updated$clif.surv.updated, dataset_updated$D90_surv)
+cal_clif$Score <- "CLIF-C ACLF updated"
+# Lille - updated 
+cal_lille <- calibration(dataset_updated$lille.surv.updated, dataset_updated$D90_surv)
+cal_lille$Score <- "Lille updated"
+
+# combine dfs
+df_cal <- rbind(cal_meld3, cal_meld, cal_clif, cal_lille)
+
+df_cal$Score <- factor(df_cal$Score, labels = levels(data_wide$condition))
+
+# plot without ribbon 
+df_cal %>%
+    ggplot(., aes(x = pred, y = obs, col = Score)) +
+    ggtitle(sex) +
+    geom_line(lwd = 1)  + 
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    #geom_ribbon(aes(ymin = lower, ymax = upper, linetype = NA), 
+    #            alpha = 0.3, show.legend = F) + 
+    #scale_fill_manual("", values = col) + 
+    #scale_color_manual(name = "Score", values = col) + 
+    xlim(0, 1) + 
+    ylim(0, 1) + 
+    ylab("Observed proportion") + 
+    xlab("Predicted probability") + 
+    scale_color_brewer(palette = "Dark2") +
+    scale_fill_brewer(palette = "Dark2") + 
+    theme_classic() 
+
+# plot with ribbon 
+p_calibration_females <- df_cal %>%
+        ggplot(., aes(x = pred, y = obs, col = Score)) +
+        #ggtitle(sex) +
+        geom_line(lwd = 1)  + 
+        geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+        geom_ribbon(aes(ymin = lower, ymax = upper, fill = Score, linetype = NA),  
+                    alpha = 0.3, show.legend = F) + 
+        #scale_fill_manual("", values = col) + 
+        #scale_color_manual(name = "Score", values = col) + 
+        facet_grid(. ~ Score) +
+        coord_equal() +
+        xlim(0, 1) + 
+        ylim(0, 1) + 
+        scale_color_brewer(palette = "Dark2") +
+        scale_fill_brewer(palette = "Dark2") + 
+        ylab("Observed survival proportion") + 
+        xlab("Predicted survival probability") + 
+        theme_classic() 
+
+# 
+source("/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/grid_arrange_fun.R")
+library(gtable)
+p1 <- grid_arrange_shared_legend(p_auc_males, p_calibration_males)
+p2 <- grid_arrange_shared_legend(p_auc_females, p_calibration_females) + theme_bw()
+
+# plot with ribbon 
+df_cal %>%
+    ggplot(., aes(x = pred, y = obs, col = Score)) +
+    ggtitle(sex) +
+    geom_line(lwd = 1)  + 
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    geom_ribbon(aes(ymin = lower, ymax = upper, fill = Score, linetype = NA),  
+                alpha = 0.3, show.legend = F) + 
+    #scale_fill_manual("", values = col) + 
+    #scale_color_manual(name = "Score", values = col) + 
+    facet_grid(. ~ Score) +
+    coord_equal() +
+    xlim(0, 1) + 
+    ylim(0, 1) + 
+    ylab("Observed proportion") + 
+    xlab("Predicted probability") + 
+    theme_classic() 
+
diff --git a/updating models/.DS_Store b/updating models/.DS_Store
new file mode 100755
index 0000000..5008ddf
Binary files /dev/null and b/updating models/.DS_Store differ
diff --git a/updating models/Calibration_updated.R b/updating models/Calibration_updated.R
new file mode 100755
index 0000000..2b9c08c
--- /dev/null
+++ b/updating models/Calibration_updated.R	
@@ -0,0 +1,96 @@
+# This script performs all calculations of model calibration
+library(ggplot2)
+library(survminer) # for plotting theme
+
+path_funs <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/funs"
+source(paste0(path_funs, "/calibration_fun.R"))
+source(paste0(path_funs, "/val.prob.ci.2_wrapper.R"))
+
+path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/updating models"
+load(paste0(path_data, "/recalibrated_models_default.Rdata"))
+
+#############################################   
+############### Calculate calibration  ######
+#############################################   
+# MELD
+cal_MELD.surv <- calibration(test.data$meld.surv.updated, y = test.data$D90_surv)
+cal_MELD.surv$Score <- "MELD"
+
+# Lille
+cal_Lille <- calibration(test.data$lille.surv.updated, y = test.data$D90_surv)
+cal_Lille$Score <- "Lille"
+
+# CLIF-C ACLF
+cal_CLIF <- calibration(test.data$clif.surv.updated, y = test.data$D90_surv)
+cal_CLIF$Score <- "CLIF-C ACLF"
+
+# combine dfs
+df_cal <- rbind(cal_MELD.surv, cal_Lille, cal_CLIF)
+
+#############################################   
+###################### Plots  ###############
+############################################# 
+
+# plot without ribbon and without MELD 3.0
+df_cal %>%
+    ggplot(., aes(x = pred, y = obs, col = Score)) +
+    geom_line(lwd = 1)  + 
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    #geom_ribbon(aes(ymin = lower, ymax = upper, linetype = NA), 
+    #            alpha = 0.3, show.legend = F) + 
+    #scale_fill_manual("", values = col) + 
+    #scale_color_manual(name = "Score", values = col) + 
+    xlim(0, 1) + 
+    ylim(0, 1) + 
+    ylab("Observed proportion") + 
+    xlab("Predicted probability") + 
+    theme_classic() 
+
+# plot with ribbon 
+p_calibration <- df_cal %>%
+    ggplot(., aes(x = pred, y = obs, col = Score)) +
+    geom_line(lwd = 1)  + 
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    geom_ribbon(aes(ymin = lower, ymax = upper, fill = Score, linetype = NA),  
+                alpha = 0.3, show.legend = F) + 
+    scale_color_brewer(palette = "Dark2") +
+    scale_fill_brewer(palette = "Dark2") +
+    #scale_fill_manual("", values = col) + 
+    #scale_color_manual(name = "Score", values = col) + 
+    facet_grid(. ~ Score) +
+    coord_equal() +
+    xlim(0, 1) + 
+    ylim(0, 1) + 
+    ylab("Observed survival proportion") + 
+    xlab("Predicted survival probability") + 
+    theme_classic2() +
+    theme(legend.position = "none")
+ 
+#############################################   
+############## Summary Stats  ###############
+############################################# 
+
+# compute calibration slopes and intercepts
+
+# MELD_1 survival function
+MELD_stats <- val.prob.ci.2_wrapper(test.data$meld.surv.updated, test.data$D90_surv, "MELD")
+
+# Lille
+Lille_stats <- val.prob.ci.2_wrapper(test.data$lille.surv.updated, test.data$D90_surv, "Lille")
+
+#CLIF-C ACLF
+clif_stats <- val.prob.ci.2_wrapper(test.data$clif.surv.updated, y = test.data$D90_surv, "CLIF-C ACLF")
+
+
+df_stats <- rbind(MELD_stats, Lille_stats, clif_stats) %>% relocate(Score)
+
+
+p_calibration + geom_text(data = df_stats %>% filter(stat == "intercept"), 
+                  aes(0.1, 0.95, label = 
+                          paste0("Intercept (95% CI): ", Point.estimate, " (", Lower.confidence.limit, "-", Upper.confidence.limit, ")"), 
+                      hjust = 0), col = "black") +
+    geom_text(data = df_stats %>% filter(stat == "slope"), 
+              aes(0.1, 0.90, label = 
+                      paste0("Slope (95% CI): ", Point.estimate, " (", Lower.confidence.limit, "-", Upper.confidence.limit, ")"), 
+                  hjust = 0), col = "black")
+
diff --git a/updating models/Calibration_updated_sens.R b/updating models/Calibration_updated_sens.R
new file mode 100755
index 0000000..2642cc0
--- /dev/null
+++ b/updating models/Calibration_updated_sens.R	
@@ -0,0 +1,125 @@
+
+# This script performs all calculations of model calibration
+library(ggplot2)
+
+path_funs <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/funs"
+source(paste0(path_funs, "/calibration_fun.R"))
+source(paste0(path_funs, "/val.prob.ci.2_wrapper.R"))
+
+path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/updating models"
+load(paste0(path_data, "/recalibrated_models_sens.Rdata"))
+
+#############################################   
+############### Calculate calibration  ######
+#############################################   
+# MELD
+cal_MELD.surv <- calibration(test.meld$meld.surv.updated, y = test.meld$D90_surv)
+cal_MELD.surv$Score <- "MELD"
+
+# Lille
+cal_Lille <- calibration(test.lille$lille.surv.updated, y = test.lille$D90_surv)
+cal_Lille$Score <- "Lille"
+
+# CLIF-C ACLF
+cal_CLIF <- calibration(test.clif$clif.surv.updated, y = test.clif$D90_surv)
+cal_CLIF$Score <- "CLIF-C ACLF"
+
+#############################################   
+############### Calculate stats  ############
+############################################# 
+# MELD
+cal_MELD.stats <- val.prob.ci.2_wrapper(test.meld$meld.surv.updated, y = test.meld$D90_surv, "MELD")
+
+# Lille
+cal_Lille.stats <- val.prob.ci.2_wrapper(test.lille$lille.surv.updated, y = test.lille$D90_surv, "Lille")
+
+# CLIF-C ACLF
+cal_CLIF.stats <- val.prob.ci.2_wrapper(test.clif$clif.surv.updated, y = test.clif$D90_surv, "CLIF-C ACLF")
+
+#############################################   
+###################### Plots  ###############
+############################################# 
+# plot with ribbon 
+p1 <- cal_MELD.surv %>%
+    ggplot(., aes(x = pred, y = obs)) +
+    geom_line(linewidth = 1)  + 
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    geom_ribbon(aes(ymin = lower, ymax = upper, linetype = NA),  
+                alpha = 0.3, show.legend = F) + 
+    scale_color_brewer(palette = "Dark2") +
+    scale_fill_brewer(palette = "Dark2") +
+    #scale_fill_manual("", values = col) + 
+    #scale_color_manual(name = "Score", values = col) + 
+    facet_grid(. ~ Score) +
+    coord_equal() +
+    xlim(0, 1) + 
+    ylim(0, 1) + 
+    ylab("Observed proportion") + 
+    xlab("Predicted probability") + 
+    theme_classic2() +
+    theme_classic() +
+    geom_text(data = cal_MELD.stats %>% filter(stat == "intercept"), 
+              aes(0.1, 0.90, label = 
+                      paste0("Intercept: ", Point.estimate, " (", Lower.confidence.limit, "-", Upper.confidence.limit, ")"), 
+                      hjust = 0), col = "black") + 
+    geom_text(data = cal_MELD.stats %>% filter(stat == "slope"), 
+              aes(0.1, 0.85, label = 
+                      paste0("Slope: ", Point.estimate, " (", Lower.confidence.limit, "-", Upper.confidence.limit, ")"), 
+                  hjust = 0), col = "black")
+    
+
+p2 <- cal_Lille %>%
+    ggplot(., aes(x = pred, y = obs)) +
+    geom_line(lwd = 1)  + 
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    geom_ribbon(aes(ymin = lower, ymax = upper, linetype = NA),  
+                alpha = 0.3, show.legend = F) + 
+    scale_color_brewer(palette = "Dark2") +
+    scale_fill_brewer(palette = "Dark2") +
+    #scale_fill_manual("", values = col) + 
+    #scale_color_manual(name = "Score", values = col) + 
+    facet_grid(. ~ Score) +
+    coord_equal() +
+    xlim(0, 1) + 
+    ylim(0, 1) + 
+    ylab("Observed proportion") + 
+    xlab("Predicted probability") + 
+    theme_classic2() +
+    theme_classic() +
+    geom_text(data = cal_Lille.stats %>% filter(stat == "intercept"), 
+              aes(0.1, 0.90, label = 
+                      paste0("Intercept: ", Point.estimate, " (", Lower.confidence.limit, "-", Upper.confidence.limit, ")"), 
+                  hjust = 0), col = "black") + 
+    geom_text(data = cal_Lille.stats %>% filter(stat == "slope"), 
+              aes(0.1, 0.85, label = 
+                      paste0("Slope: ", Point.estimate, " (", Lower.confidence.limit, "-", Upper.confidence.limit, ")"), 
+                  hjust = 0), col = "black")
+
+p3 <- cal_CLIF %>%
+    ggplot(., aes(x = pred, y = obs)) +
+    geom_line(lwd = 1)  + 
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    geom_ribbon(aes(ymin = lower, ymax = upper, linetype = NA),  
+                alpha = 0.3, show.legend = F) + 
+    scale_color_brewer(palette = "Dark2") +
+    scale_fill_brewer(palette = "Dark2") + 
+    #scale_fill_manual("", values = col) + 
+    #scale_color_manual(name = "Score", values = col) + 
+    facet_grid(. ~ Score) +
+    coord_equal() +
+    xlim(0, 1) + 
+    ylim(0, 1) + 
+    ylab("Observed proportion") + 
+    xlab("Predicted probability") + 
+    theme_classic2() +
+    theme_classic() +
+    geom_text(data = cal_CLIF.stats %>% filter(stat == "intercept"), 
+              aes(0.1, 0.90, label = 
+                      paste0("Intercept: ", Point.estimate, " (", Lower.confidence.limit, "-", Upper.confidence.limit, ")"), 
+                  hjust = 0), col = "black") + 
+    geom_text(data = cal_CLIF.stats %>% filter(stat == "slope"), 
+              aes(0.1, 0.85, label = 
+                      paste0("Slope: ", Point.estimate, " (", Lower.confidence.limit, "-", Upper.confidence.limit, ")"), 
+                  hjust = 0), col = "black")
+
+ggarrange(p1, p2, p3, nrow = 1, ncol = 3, common.legend = TRUE)
diff --git a/updating models/Discrimination_updated.R b/updating models/Discrimination_updated.R
new file mode 100755
index 0000000..5c36b61
--- /dev/null
+++ b/updating models/Discrimination_updated.R	
@@ -0,0 +1,116 @@
+
+library(pROC)
+library(dplyr)
+library(ggplot2)
+library(purrr)
+library(tidyr)
+library(forcats)
+#library(ggROC)
+
+# load data
+path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/updating models"
+load(paste0(path_data, "/recalibrated_models_default.Rdata"))
+
+#############################################   
+############### Calculate AUCs  #############
+#############################################  
+
+# MELD (survival function 1 and 2 do not matter here)
+roc_meld <- roc(test.data$D90_surv, test.data$meld.surv.updated)
+
+# MELD 3.0
+#roc_meld3 <- roc(stph.meld$D90_DTH, stph.meld$MELD3.surv)
+
+# MELD from VanDerwerken et al 2021
+#roc_meld.VanDerwerken <- roc(stph.meld$D90_DTH, stph.meld$MELD_Van)
+
+# Lille
+roc_lille <- roc(test.data$D90_surv, test.data$lille.surv.updated)
+
+# CLIF-C ACLF
+roc_clif <- roc(test.data$D90_surv, test.data$clif.surv.updated)
+
+#############################################   
+###################### Plots  ###############
+############################################# 
+roc.list <- list(#"MELD VanDerwerken" = roc_meld.VanDerwerken,
+                 "CLIF-C ACLF" = roc_clif, 
+                 "Lille" = roc_lille,
+                 "MELD" = roc_meld)
+#setwd(path_data)
+#save(roc.list, file = "ROC_updated.Rdata")
+
+g.list <- ggroc(roc.list)
+
+# ROC plot of all models combined
+g.list +
+    geom_line(lwd = 1) +
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") + 
+    theme_classic()
+
+# faceting 
+g.list + 
+    facet_grid(. ~ name) + 
+    geom_line(lwd = 1) + 
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") +
+    theme_classic() + 
+    theme(legend.position="none") 
+
+# add confidence bands
+ci.list <- lapply(roc.list, ci.se, specificities = seq(0, 1, l = 25))
+
+dat.ci.list <- lapply(ci.list, function(ciobj) 
+    data.frame(x = as.numeric(rownames(ciobj)),
+               lower = ciobj[, 1],
+               upper = ciobj[, 3]))
+
+df <- plyr::ldply(dat.ci.list, data.frame, .id = "name")
+
+pl <- ggroc(roc.list) + 
+    facet_grid(. ~ name) +
+    theme_minimal() + 
+    geom_ribbon(data = df, aes(x = x, ymin = lower, ymax = upper, fill = name), alpha = 0.3, inherit.aes = F) +
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") + 
+    labs(x = "Specificity", y = "Sensitivity") + 
+    scale_color_brewer(palette = "Dark2") +
+    scale_fill_brewer(palette = "Dark2") +
+    coord_equal() +
+    theme_classic() +
+    theme(legend.position = "none") 
+
+data_wide$name <- data_wide$condition
+data_wide$low_CL <- round(data_wide$low_CL, 2)
+data_wide$mean <- round(data_wide$mean, 2)
+data_wide$upper_CL <- round(data_wide$upper_CL, 2)
+
+pl + geom_text(data = data_wide, aes(0.05, 0.25, label = paste0("AUC (95% CI): ", mean, " (", low_CL, "-", upper_CL, ")" ), 
+                                     hjust = 1), col = "black")
+
+#############################################   
+###################### AUC  #################
+#############################################
+# CI
+#auc_meld <- auc(roc_meld)
+#auc_meld_ci <- ci.auc(roc_meld) # Confidence intervals
+#roc_plot(stph.meld, "D90_surv", "MELD.surv", ci = TRUE, plot_title = "ROC curve for the MELD score")
+
+df_AUC <- as.data.frame(map_dfr(roc.list, ci.auc))
+rownames(df_AUC) <- c("low_CL", "mean", "upper_CL")
+
+df_AUC2 <- tibble::rownames_to_column(df_AUC, var = "AUC")
+
+df3 <- gather(df_AUC2, condition, measurement, `CLIF-C ACLF`:MELD, factor_key = TRUE)
+data_wide <- spread(df3, AUC, measurement) %>% arrange(mean)
+
+# reorder factor levels
+data_wide$condition <- fct_reorder(data_wide$condition, data_wide$mean)
+
+ggplot(data_wide, aes(x = mean, y = condition, col = condition)) +
+    geom_point(lwd = 2)  + 
+    coord_cartesian(xlim = c(0.5, 0.86)) +
+    geom_errorbar(aes(xmin = low_CL, xmax = upper_CL), 
+                  alpha = 1, show.legend = F, lwd = 1, width = 0.5) + 
+    labs(y = "Score", col = "Score", x = "AUC with 95% limits") +
+    theme_classic() 
+
+
diff --git a/updating models/Discrimination_updated_sens.R b/updating models/Discrimination_updated_sens.R
new file mode 100755
index 0000000..edc481d
--- /dev/null
+++ b/updating models/Discrimination_updated_sens.R	
@@ -0,0 +1,117 @@
+
+library(pROC)
+library(dplyr)
+library(ggplot2)
+#library(ggROC)
+
+# load data
+path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/updating models"
+load(paste0(path_data, "/recalibrated_models_sens.Rdata"))
+
+#############################################   
+############### Calculate AUCs  #############
+#############################################  
+
+# MELD (survival function 1 and 2 do not matter here)
+roc_meld <- roc(test.meld$D90_surv, test.meld$meld.surv.updated)
+
+# MELD 3.0
+#roc_meld3 <- roc(stph.meld$D90_DTH, stph.meld$MELD3.surv)
+
+# MELD from VanDerwerken et al 2021
+#roc_meld.VanDerwerken <- roc(stph.meld$D90_DTH, stph.meld$MELD_Van)
+
+# Lille
+roc_lille <- roc(test.lille$D90_surv, test.lille$lille.surv.updated)
+
+# CLIF-C ACLF
+roc_clif <- roc(test.clif$D90_surv, test.clif$clif.surv.updated)
+
+#############################################   
+###################### Plots  ###############
+############################################# 
+roc.list <- list("CLIF-C ACLF" = roc_clif, 
+                 #"MELD VanDerwerken" = roc_meld.VanDerwerken, 
+                 "Lille" = roc_lille,
+                 "MELD" = roc_meld)
+#save(roc.list, file = "ROC_updated.Rdata")
+
+g.list <- ggroc(roc.list)
+
+# ROC plot of all models combined
+g.list +
+    geom_line(lwd = 1) +
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") + 
+    theme_classic()
+
+# faceting 
+g.list + 
+    facet_grid(. ~ name) + 
+    geom_line(lwd = 1) + 
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") +
+    theme_classic() + 
+    theme(legend.position="none") 
+
+# add confidence bands
+ci.list <- lapply(roc.list, ci.se, specificities = seq(0, 1, l = 25))
+
+dat.ci.list <- lapply(ci.list, function(ciobj) 
+    data.frame(x = as.numeric(rownames(ciobj)),
+               lower = ciobj[, 1],
+               upper = ciobj[, 3]))
+
+df <- plyr::ldply(dat.ci.list, data.frame, .id = "name")
+
+
+pl <- ggroc(roc.list) + 
+    facet_grid(. ~ name) +
+    theme_minimal() + 
+    geom_ribbon(data = df, aes(x = x, ymin = lower, ymax = upper, fill = name), alpha = 0.3, inherit.aes = F) +
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") + 
+    labs(x = "Specificity", y = "Sensitivity") + 
+    scale_color_brewer(palette = "Dark2") +
+    scale_fill_brewer(palette = "Dark2") +
+    coord_equal() +
+    theme_classic() +
+    theme(legend.position = "none") 
+
+data_wide$name <- data_wide$condition
+data_wide$low_CL <- round(data_wide$low_CL, 2)
+data_wide$mean <- round(data_wide$mean, 2)
+data_wide$upper_CL <- round(data_wide$upper_CL, 2)
+pl + geom_text(data = data_wide, aes(0.05, 0.25, label = paste0("AUC (95% CI): ", mean, " (", low_CL, "-", upper_CL, ")" ), 
+                                     hjust = 1), col = "black")
+
+#############################################   
+###################### AUC  #################
+#############################################
+# CI
+#auc_meld <- auc(roc_meld)
+#auc_meld_ci <- ci.auc(roc_meld) # Confidence intervals
+#roc_plot(stph.meld, "D90_surv", "MELD.surv", ci = TRUE, plot_title = "ROC curve for the MELD score")
+
+df_AUC <- as.data.frame(map_dfr(roc.list, ci.auc))
+rownames(df_AUC) <- c("low_CL", "mean", "upper_CL")
+
+df_AUC2 <- tibble::rownames_to_column(df_AUC, var = "AUC")
+
+df3 <- gather(df_AUC2, condition, measurement, `CLIF-C ACLF`:MELD, factor_key = TRUE)
+data_wide <- spread(df3, AUC, measurement) %>% arrange(mean)
+
+# reorder factor levels
+data_wide$condition <- fct_reorder(data_wide$condition, data_wide$mean)
+
+ggplot(data_wide, aes(x = mean, y = condition, col = condition)) +
+    geom_point(lwd = 2)  + 
+    coord_cartesian(xlim = c(0.5, 0.86)) +
+    geom_errorbar(aes(xmin = low_CL, xmax = upper_CL), 
+                  alpha = 1, show.legend = F, lwd = 1, width = 0.5) + 
+    labs(y = "Score", col = "Score", x = "AUC with 95% limits") +
+    theme_classic() 
+
+
+
+
+
+
+
diff --git a/updating models/ROC_updated.Rdata b/updating models/ROC_updated.Rdata
new file mode 100755
index 0000000..8b3f290
Binary files /dev/null and b/updating models/ROC_updated.Rdata differ
diff --git a/updating models/agreement.R b/updating models/agreement.R
new file mode 100644
index 0000000..898a2c6
--- /dev/null
+++ b/updating models/agreement.R	
@@ -0,0 +1,267 @@
+
+library(dplyr)
+library(ggplot2)
+library(ggpubr)
+library(tidyr)
+library(forcats)
+library(purrr)
+library(survminer)
+library(psych)
+
+### data 
+path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/updating models"
+load(paste0(path_data, "/recalibrated_models_default.Rdata"))
+
+# subset data
+data <- test.data %>% 
+    select(Subject, meld.surv.updated, lille.surv.updated, clif.surv.updated, D90_surv)
+
+#############################################   
+################### ICC  ####################
+############################################# 
+coords <- c(0.25, 1)
+
+icc_meld_lille <- ICC(data[,c("meld.surv.updated", "lille.surv.updated")])
+
+ICC_meld_lille <- paste0("ICC = ", round(icc_meld_lille$results$ICC[3], 2), 
+                         ", p = ", signif(icc_meld_lille$results$p[3], 2))
+
+p1 <- ggscatter(data, x = "meld.surv.updated", y = "lille.surv.updated",
+          add = "reg.line", conf.int = TRUE, 
+          cor.coef = F, cor.method = "pearson", cor.coef.coord = coords,
+          color = "grey", alpha = 0.7,
+          add.params = list(color = "purple2")) + 
+    geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
+    labs(x = "MELD", y = "Lille") +
+    coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) + 
+    annotate("text", x = coords[1], y = coords[2], label = ICC_meld_lille, size = 4, fontface = 2) +
+    theme_classic2()
+
+icc_clif_meld <- ICC(data[,c("meld.surv.updated", "clif.surv.updated")])
+
+ICC_CLIF_MELD <- paste0("ICC = ", round(icc_clif_meld$results$ICC[3], 2), ", p = ", signif(icc_clif_meld$results$p[3], 2))
+
+p2 <- ggscatter(data, x = "meld.surv.updated", y = "clif.surv.updated",
+                add = "reg.line", conf.int = TRUE, 
+                cor.coef = F, cor.method = "pearson", cor.coef.coord = coords,
+                color = "grey", alpha = 0.7,
+                add.params = list(color = "purple2")) + 
+    geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
+    labs(x = "MELD", y = "CLIF-C ACLF") +
+    coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
+    annotate("text", x = coords[1], y = coords[2], label = ICC_CLIF_MELD, size = 4, fontface = 2) + 
+    theme_classic2()
+
+icc_clif_lille <- ICC(data[,c("clif.surv.updated", "lille.surv.updated")])
+
+ICC_CLIF_lille <- paste0("ICC = ", round(icc_clif_lille$results$ICC[3], 2), ", p = ", signif(icc_clif_lille$results$p[3], 2))
+
+p3 <- ggscatter(data, x = "lille.surv.updated", y = "clif.surv.updated",
+                add = "reg.line", conf.int = TRUE, 
+                cor.coef = F, cor.method = "pearson", cor.coef.coord = coords,
+                color = "grey", alpha = 0.7,
+                add.params = list(color = "purple2")) + 
+    geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
+    labs(x = "Lille", y = "CLIF-C ACLF") +
+    coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
+    annotate("text", x = coords[1], y = coords[2], label = ICC_CLIF_lille, size = 4, fontface = 2) +  
+    theme_classic2()
+
+#############################################   
+################ Correlation  ###############
+############################################# 
+#coords <- c(0.01, 1)
+# 
+# p1 <- ggscatter(data, x = "meld.surv.updated", y = "lille.surv.updated",
+#                 add = "reg.line", conf.int = TRUE, 
+#                 cor.coef = TRUE, cor.method = "pearson", cor.coef.coord = coords,
+#                 color = "grey", alpha = 0.7,
+#                 add.params = list(color = "purple2")) + 
+#     geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
+#     labs(x = "MELD", y = "Lille") +
+#     coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
+#     theme_classic2()
+# 
+# p2 <- ggscatter(data, x = "meld.surv.updated", y = "clif.surv.updated",
+#           add = "reg.line", conf.int = TRUE, 
+#           cor.coef = TRUE, cor.method = "pearson", cor.coef.coord = coords,
+#           color = "grey", alpha = 0.7,
+#           add.params = list(color = "purple2")) + 
+#     geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
+#     labs(x = "MELD", y = "CLIF-C ACLF") +
+#     coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
+#     theme_classic2()
+# 
+# p3 <- ggscatter(data, x = "lille.surv.updated", y = "clif.surv.updated",
+#           add = "reg.line", conf.int = TRUE, 
+#           cor.coef = TRUE, cor.method = "pearson", cor.coef.coord = coords,
+#           color = "grey", alpha = 0.7,
+#           add.params = list(color = "purple2")) + 
+#     geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
+#     labs(x = "Lille", y = "CLIF-C ACLF") +
+#     coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
+#     theme_classic2()
+
+#############################################   
+################ Correlation  ###############
+#############################################
+# long format
+data_long <- gather(data, model, probs, meld.surv.updated:clif.surv.updated, factor_key = TRUE)
+
+data_long2 <- data_long %>% 
+    group_by(Subject) %>% 
+    mutate(dev = sd(probs), # sd 
+           mad_dev = mad(probs), #median absolute deviation
+           max_min_dev = max(probs) - min(probs)) # range 
+
+# plot
+p4 <- data_long2 %>% 
+    #mutate(Subject = fct_reorder(as.factor(Subject), desc(max_min_dev))) %>%
+    ggplot(.) +
+    geom_point(aes(x = max_min_dev, y = reorder(Subject, max_min_dev))) +
+    labs(x = "Range probabilities ", y = "Subject") +
+    theme_classic2() +
+    theme(axis.text.y = element_blank(),
+          axis.ticks.y = element_blank())
+
+p4
+
+# overall survival rate
+data_long2 %>% ungroup() %>% summarise(mean(D90_surv))
+
+# fun 
+cut_off_surv_rate <- function(data, thres, var_name){
+    # calculates mean of `var_name` and sample size for chosen `thres` value
+    
+    df <- data %>% 
+        filter(max_min_dev >= thres) %>% 
+        ungroup() %>%
+        summarise(sum_var = mean({{ var_name }}, na.rm = T))
+    
+    df_n <- data %>% 
+        filter(max_min_dev >= thres) %>% 
+        ungroup() %>%
+        summarise(n = length(unique(Subject)))
+    
+    out <- data.frame(surv_rate = df$sum_var, thres = thres, n = df_n$n)
+    
+    return(out)
+}
+
+data_long23 <- data_long2 %>% distinct(Subject, .keep_all = T)
+data_long23%>% filter(Subject == 1005003)
+
+
+thresholds <- seq(0, 0.5, by = 0.01)
+res <- map_df(thresholds, cut_off_surv_rate, data = data_long2, var_name = D90_surv)
+
+# plot
+ind <- c(1, 5, 9, 13, 25, 41, 49) # select a subset to visualise
+p5 <- ggplot(res, aes(y = surv_rate, x = thres)) +
+    geom_point() +
+    geom_line() + 
+    geom_text(data = res[ind, ], aes(label = n), check_overlap = TRUE, nudge_y = 0.04, nudge_x = 0.015) +
+    labs(x = "Range probabilities", y = "Survival Rate") +
+    theme_classic2()
+
+p5
+
+# fun 
+sum_fun <- function(df1, df2, thres){
+    # choses ids based the `thres` value 
+    
+    df_prelim <- df1 %>% filter(max_min_dev > thres) %>% arrange(Subject)
+    
+    df_final <- df2 %>% filter(Subject %in% df_prelim$Subject)
+    
+    df_final$threshold <- thres
+    
+    return(df_final)
+}
+
+res <- map_df(thresholds, sum_fun, df1 = data_long2, df2 = test.data)
+
+vars <- c("Subject", "Bilirubin.mg.dl", "Bilirubin.day.7", "delta.bili", "INR", 
+          "Creatinine.mg.dl", "Albumin", "WBC", "INR", "protime", "HE",
+          "MAP", "Sodium", "CLIF.OF", 
+          "kidney.score", "liver.score", "brain.score", "coag.score", "circ.score",
+          "Gender.x", "Age.at.randomisation..calc..x")
+
+sum_df <- res %>% 
+    group_by(threshold) %>% 
+    select(all_of(vars)) %>% 
+    summarise_all(mean) %>%
+    gather(., variable, value, Bilirubin.mg.dl:Age.at.randomisation..calc..x, factor_key=TRUE)
+
+vars_to_keep <- c("CLIF.OF")
+
+p6 <- sum_df %>% 
+    filter(variable %in% vars_to_keep) %>%
+    mutate(variable = ifelse(variable == "CLIF.OF", "CLIF OF", NA)) %>%
+    ggplot(., aes(x = threshold, y = value)) +
+    geom_point() +
+    geom_line() +
+    facet_wrap(. ~ variable, scales = "free_y") +
+    labs(x = "Range probabilities", y = "") +
+    theme_classic2() +
+    theme(strip.background = element_blank())
+
+p6
+
+vars_to_remove <- c("Gender.x", "Age.at.randomisation..calc..x", 
+                    "liver.score", "kidney.score", "brain.score", "coag.score",
+                    "circ.score", 
+                    "protime", "delta.bili")
+
+#library(gridExtra)
+ggarrange(p1, p2, p3, p4, p5, p6, nrow = 2, ncol = 3, labels = "auto", align = "hv")
+
+#################
+#################
+#################
+
+sum_df %>% filter(!(variable %in% vars_to_remove)) %>%
+    filter(!variable %in% "CLIF.OF") %>%
+    mutate(variable = case_when(variable == "Bilirubin.mg.dl" ~ "Bilirubin (mg/dL)",
+                                variable == "Bilirubin.day.7" ~ "Bilirubin (day 7)",
+                                variable == "Creatinine.mg.dl" ~ "Creatinine (mg/dL)", 
+                                TRUE ~ variable)) %>%
+    ggplot(., aes(x = threshold, y = value)) +
+    geom_point() +
+    geom_line() +
+    facet_wrap(. ~ variable, scales = "free_y") +
+    labs(x = "Range probabilities", y = "") +
+    theme_bw() +
+    theme(strip.background = element_blank())
+
+#############################################   
+########### ICC exploration  ################
+#############################################
+#https://www.datanovia.com/en/lessons/intraclass-correlation-coefficient-in-r/
+# library(psych)
+# ICC(data[,c(2:4)])
+# 
+# ICC(data[,c(2,4)])
+# 
+#cor.test(data[,c(2)],data[,c(4)], method = c("pearson"))
+#ICC(data[,c(2,4)])
+# 
+#cor.test(data[,c(2)],data[,c(3)], method = c("pearson"))
+#ICC(data[,c(2,3)])
+# 
+#cor.test(data[,c(3)],data[,c(4)], method = c("pearson"))
+#ICC(data[,c(3,4)])
+# 
+# #The intra-class correlation coefficient was computed to assess the agreement between three scores 
+# #in predicting 90-day survival. There was a poor agreement between the three scores, 
+# # using the two-way mixed-effects model and “single rater” unit, kappa = 0.2, p = 0.056.
+# 
+# #ICC estimates and their 95% confident intervals were calculated using the R package psych (2.3.6) based on a "single rating" unit, 
+# #two-way mixed-effects model. - add https://www.sciencedirect.com/science/article/pii/S1556370716000158
+# 
+# #library("irr") # an alternative package 
+# #icc(data[,c(2:4)], model = "oneway",
+# #    type = "consistency", unit = "single")
+
+
+
diff --git a/updating models/compare_newmodels.R b/updating models/compare_newmodels.R
old mode 100644
new mode 100755
diff --git a/updating models/compare_oldnew.R b/updating models/compare_oldnew.R
old mode 100644
new mode 100755
index 2f34387..639b545
--- a/updating models/compare_oldnew.R	
+++ b/updating models/compare_oldnew.R	
@@ -6,13 +6,13 @@ library(pROC)
 ######
 # Comparison of areas under the ROC curves
 # MELD
-roc_meld_new <- roc(test.data.c$D90_surv, test.data.c$meld.surv.updated)
-roc_meld_old <- roc(test.data.c$D90_surv, test.data.c$MELD.surv)
+roc_meld_new <- roc(test.data$D90_surv, test.data$meld.surv.updated)
+roc_meld_old <- roc(test.data$D90_surv, test.data$MELD.surv)
 roc.test(roc_meld_old, roc_meld_new) 
 
 # CLIF-C ACLF
-roc_clif_new <- roc(test.data.c$D90_surv, test.data.c$clif.surv.updated)
-roc_clif_old <- roc(test.data.c$D90_surv, test.data.c$CLIF.surv)
+roc_clif_new <- roc(test.data$D90_surv, test.data$clif.surv.updated)
+roc_clif_old <- roc(test.data$D90_surv, test.data$CLIF.surv)
 roc.test(roc_clif_new, roc_clif_old) 
 
 # Lille
diff --git a/updating models/comparisons.R b/updating models/comparisons.R
new file mode 100755
index 0000000..3c89041
--- /dev/null
+++ b/updating models/comparisons.R	
@@ -0,0 +1,144 @@
+
+library(ggsci)  # for color palette
+library(grid)
+library(gridExtra)
+
+path1 <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/original models/"
+load(paste0(path1, "ROC_original.Rdata"))
+
+roc_list_orig <- roc.list
+
+path2 <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/updating models/"
+load(paste0(path2, "ROC_updated.Rdata"))
+
+roc_list_update <- roc.list
+
+
+#model <- "meld" # meld, lille, clif
+
+model <- "clif" # meld, lille, clif
+
+if(model == "meld"){
+    # MELD
+    model_list <- list("meld_orig" = roc_list_orig$MELD,
+                       "meld_update" = roc_list_update$MELD)
+    
+    P_value_boot <- roc.test(roc_list_orig$MELD, roc_list_update$MELD, method = "bootstrap") # bootstrap p_value
+    
+}else if(model == "lille"){
+    #Lille
+    model_list <- list("lille_orig" = roc_list_orig$Lille,
+                       "lille_update" = roc_list_update$Lille)
+   
+    P_value_boot <- roc.test(roc_list_orig$Lille, roc_list_update$Lille, method = "bootstrap")
+    
+}else{
+    # CLIF 
+    model_list <- list("clif_orig" = roc_list_orig$`CLIF-C ACLF`,
+                       "clif_update" = roc_list_update$`CLIF-C ACLF`)
+    
+    P_value_boot <- roc.test(roc_list_orig$`CLIF-C ACLF`, roc_list_update$`CLIF-C ACLF`, method = "bootstrap")
+    
+}
+
+g.list <- ggroc(model_list)
+
+# ROC plot of all models combined
+g.list +
+    geom_line(lwd = 1) +
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") + 
+    theme_classic()
+
+# add confidence bands
+ci.list <- lapply(model_list, ci.se, specificities = seq(0, 1, l = 25))
+
+dat.ci.list <- lapply(ci.list, function(ciobj) 
+    data.frame(x = as.numeric(rownames(ciobj)),
+               lower = ciobj[, 1],
+               upper = ciobj[, 3]))
+
+df <- plyr::ldply(dat.ci.list, data.frame, .id = "name")
+
+# individual plot
+ggroc(model_list) + 
+    ggtitle(model) +
+    facet_grid(. ~ name) +
+
+ggroc(model_list) + 
+    ggtitle(model) +
+    #facet_grid(. ~ name) +
+    theme_minimal() + 
+    geom_ribbon(data = df, aes(x = x, ymin = lower, ymax = upper, fill = name), alpha = 0.3, inherit.aes = F) +
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") + 
+    #geom_text(x = 0.5, y = 0.5, label = paste0("p.value")) +
+    annotate(geom = "text", x = 0.3, y = 0.5, label =  paste0("p.value = ", round(P_value_boot$p.value, 2)), fontsize = 12) +
+    labs(x = "Specificity", y = "Sensitivity") + 
+    scale_fill_jco() + 
+    scale_color_jco() +
+    annotate(geom = "text", x = 0.3, y = 0.5, label =  paste0("p.value ", round(P_value_boot$p.value, 2)), fontsize = 12) +
+    labs(x = "Specificity", y = "Sensitivity") + 
+    coord_equal() +
+    theme_classic() +
+    theme(legend.position = "none") 
+
+
+#### arranged plots
+linewidth <- 1.3
+text_size <- 4.5
+
+roc1 <- ggroc(model_list, lwd = linewidth) + 
+    ggtitle("CLIF-C ACLF") +
+    #facet_grid(. ~ name) +
+    #theme_minimal() + 
+    geom_ribbon(data = df, aes(x = x, ymin = lower, ymax = upper, fill = name), alpha = 0.3, inherit.aes = F) +
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") + 
+    #geom_text(x = 0.5, y = 0.5, label = paste0("p.value")) +
+    annotate(geom = "text", x = 0.3, y = 0.5, label =  paste0("p.value = ", round(P_value_boot$p.value, 2)), 
+             size = text_size) +
+    labs(x = " ", y = " ") + 
+    scale_fill_jco() + 
+    scale_color_jco() +
+    coord_equal() +
+    theme_classic2() +
+    theme(legend.position = "none") 
+
+roc2 <- ggroc(model_list, lwd = linewidth) + 
+    ggtitle("Lille") +
+    #facet_grid(. ~ name) +
+    #theme_minimal() + 
+    geom_ribbon(data = df, aes(x = x, ymin = lower, ymax = upper, fill = name), alpha = 0.3, inherit.aes = F) +
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") + 
+    #geom_text(x = 0.5, y = 0.5, label = paste0("p.value")) +
+    annotate(geom = "text", x = 0.3, y = 0.5, label =  paste0("p.value = ", round(P_value_boot$p.value, 2)), 
+             size = text_size) +
+    labs(x = " ", y = " ") + 
+    scale_fill_jco() + 
+    scale_color_jco() +
+    coord_equal() +
+    theme_classic2() +
+    theme(legend.position = "none") 
+
+roc3 <- ggroc(model_list, lwd = linewidth) + 
+    ggtitle("MELD") +
+    #facet_grid(. ~ name) +
+    #theme_minimal() + 
+    geom_ribbon(data = df, aes(x = x, ymin = lower, ymax = upper, fill = name), alpha = 0.3, inherit.aes = F) +
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") + 
+    #geom_text(x = 0.5, y = 0.5, label = paste0("p.value")) +
+    annotate(geom = "text", x = 0.3, y = 0.5, label =  paste0("p.value = ", round(P_value_boot$p.value, 2)), 
+             size = text_size) +
+    labs(x = " ", y = " ") + 
+    scale_fill_jco() + 
+    scale_color_jco() +
+    coord_equal() +
+    theme_classic2() +
+    theme(legend.position = "none") 
+
+grid.arrange(arrangeGrob(roc1), 
+             arrangeGrob(roc2), 
+             arrangeGrob(roc3),
+             ncol = 3, left = "Sensitivity", bottom = "Specificity")
+
+
+
+
diff --git a/updating models/recalibrate.R b/updating models/recalibrate.R
old mode 100644
new mode 100755
index f48b864..3051b4c
--- a/updating models/recalibrate.R	
+++ b/updating models/recalibrate.R	
@@ -3,206 +3,94 @@
 # find recalibration parameters and the survival probability is re-calculated using these parameters. 
 library(rms)
 library(pROC)
-source("val_prob_confidence.R")
+
+# load data
+path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/original models/"
+load(paste0(path_data, "complete_cases_models.Rdata"))
 
 # Set random seed
-set.seed(34)
+set.seed(111)
 
 ######
 # Split complete-case sample in training and test observations
-dt <- sort(sample(nrow(stph.c), nrow(stph.c)*.8))
-test.data.c <- stph.c[dt,]
-train.data.c <- stph.c[-dt,]
-
+fraction <- 0.7
+dt <- sort(sample(nrow(stph.c), nrow(stph.c)*fraction))
+test.data <- stph.c[dt,]
+train.data <- stph.c[-dt,]
+
+#############################################
+############### MELD score ##################
+#############################################
 # Fit a logistic regression model on the training data
-meld_regr <- glm(D90_surv ~ MELD.calc, family = binomial, data = train.data.c)
+meld_regr <- glm(D90_surv ~ MELD.calc, family = binomial("logit"), data = train.data)
 
 # Save the regression coefficients (alpha and beta)
 ic_meld_c <- meld_regr$coefficients[1]
 slope_meld_c <- meld_regr$coefficients[2]
 
 # Calculate adjusted MELD score and survival probability on training set
-train.data.c$updated.meld <- ic_meld_c + slope_meld_c*train.data.c$MELD.calc
-train.data.c$meld.surv.updated <- 1/(1+exp(-train.data.c$updated.meld))
+train.data$updated.meld <- ic_meld_c + slope_meld_c*train.data$MELD.calc
+train.data$meld.surv.updated <- 1/(1 + exp(-train.data$updated.meld))
 
 # Calculate adjusted scores on the test set and assess performance
-test.data.c$updated.meld <- ic_meld_c + slope_meld_c*test.data.c$MELD.calc
-test.data.c$meld.surv.updated <- 1/(1+exp(-test.data.c$updated.meld))
-
-roc_meld_uc <- roc(test.data.c$D90_surv, test.data.c$meld.surv.updated)
-ci.auc(roc_meld_uc)
+test.data$updated.meld <- ic_meld_c + slope_meld_c*test.data$MELD.calc
+test.data$meld.surv.updated <- 1/(1 + exp(-test.data$updated.meld))
 
-val.prob(test.data.c$meld.surv.updated, test.data.c$D90_surv, pl = TRUE, smooth = FALSE, logistic.cal = TRUE,
-         xlab = "Predicted survival probability", ylab = "Actual survival probability",
-         legendloc = FALSE, statloc = FALSE)
-
-# Lille score re-calibration
-# Run logistic regression and save regression coefficientss
-lille_regr <- glm(D90_surv ~ LILLE, family = binomial, data = train.data.c)
+#############################################
+################### Lille score #############
+#############################################
+# Run logistic regression and save regression coefficients
+lille_regr <- glm(D90_surv ~ LILLE, family = binomial("logit"), data = train.data)
 
 ic_lille_c <- lille_regr$coefficients[1]
 slope_lille_c <- lille_regr$coefficients[2]
 
 # Update scores in training set
-train.data.c$updated.lille <- ic_lille_c + slope_lille_c*train.data.c$LILLE
-train.data.c$lille.surv.updated <- 1/(1 + exp(-train.data.c$updated.lille))
+train.data$updated.lille <- ic_lille_c + slope_lille_c*train.data$LILLE
+train.data$lille.surv.updated <- 1/(1 + exp(-train.data$updated.lille))
 
 # Assess performance on the test set
-test.data.c$updated.lille <- ic_lille_c + slope_lille_c*test.data.c$LILLE
-test.data.c$lille.surv.updated <- 1/(1 + exp(-test.data.c$updated.lille))
-
-roc_lille_uc <- roc(test.data.c$D90_surv, test.data.c$lille.surv.updated)
-ci.auc(roc_lille_uc)
+test.data$updated.lille <- ic_lille_c + slope_lille_c*test.data$LILLE
+test.data$lille.surv.updated <- 1/(1 + exp(-test.data$updated.lille))
 
-val.prob(test.data.c$lille.surv.updated, test.data.c$D90_surv, pl = TRUE, smooth = FALSE, logistic.cal = TRUE,
-         xlab = "Predicted survival probability", ylab = "Actual survival probability",
-         legendloc = FALSE, statloc = FALSE)
-
-# CLIF-C ACLF score re-calibration
-# Run logistic regression and save regression coefficientss
-clif_regr <- glm(D90_surv ~ CLIF.C, family = binomial, data = train.data.c)
+#############################################
+######### CLIF-C ACLF Score #################
+#############################################
+# Run logistic regression and save regression coefficients
+clif_regr <- glm(D90_surv ~ CLIF.C, family = binomial("logit"), data = train.data)
 
 ic_clif_c <- clif_regr$coefficients[1]
 slope_clif_c <- clif_regr$coefficients[2]
 
 # Update scores in training set
-train.data.c$updated.clif <- ic_clif_c + slope_clif_c*train.data.c$CLIF.C
-train.data.c$clif.surv.updated <- 1/(1 + exp(-train.data.c$updated.clif))
+train.data$updated.clif <- ic_clif_c + slope_clif_c*train.data$CLIF.C
+train.data$clif.surv.updated <- 1/(1 + exp(-train.data$updated.clif))
 
 # Assess performance on the test set
-test.data.c$updated.clif <- ic_clif_c + slope_clif_c*test.data.c$CLIF.C
-test.data.c$clif.surv.updated <- 1/(1 + exp(-test.data.c$updated.clif))
-
-roc_clif_uc <- roc(test.data.c$D90_surv, test.data.c$clif.surv.updated)
-ci.auc(roc_clif_uc)
-
-val.prob(test.data.c$clif.surv.updated, test.data.c$D90_surv, pl = TRUE, smooth = FALSE, logistic.cal = TRUE,
-         xlab = "Predicted survival probability", ylab = "Actual survival probability", 
-         legendloc = FALSE, statloc = FALSE)
-
-# Confidence intervals/SEs for the slope and intercept
-# val_prob_confidence is used to get SEs for the calibration intercepts and slopes (and CIs in the plots)
-val_prob_confidence(test.data.c$meld.surv.updated, test.data.c$D90_surv, pl = TRUE, smooth = FALSE, logistic.cal = TRUE,
-                    xlab = "Predicted survival probability", ylab = "Actual survival probability",
-                    legendloc = T, roundstats = 3)
-
-val_prob_confidence(test.data.c$clif.surv.updated, test.data.c$D90_surv, pl = TRUE, smooth = FALSE, logistic.cal = TRUE,
-                    xlab = "Predicted survival probability", ylab = "Actual survival probability",
-                    legendloc = T, dostats = TRUE, roundstats = 3)
-
-val_prob_confidence(test.data.c$lille.surv.updated, test.data.c$D90_surv, pl = TRUE, smooth = FALSE, logistic.cal = TRUE,
-                    xlab = "Predicted survival probability", ylab = "Actual survival probability",
-                    legendloc = T, dostats = TRUE)
-
-######
-# Sensitivity analysis; split data on full stph sample and then select the complete cases per model
-# Split full sample in training and test observations
-dt <- sort(sample(nrow(stph), nrow(stph)*.8))
-test.data <- stph[dt,]
-train.data <- stph[-dt,]
-
-# MELD score re-calibration
-test.meld <- stph.meld[stph.meld$Subject %in% test.data$Subject,]
-train.meld <- stph.meld[stph.meld$Subject %in% train.data$Subject,]
-
-# Fit a logistic regression model on the training data
-meld_regr <- glm(D90_surv ~ MELD.calc, family = binomial, data = train.meld)
-
-# Save the regression coefficients (alpha and beta)
-ic_meld <- meld_regr$coefficients[1]
-slope_meld <- meld_regr$coefficients[2]
-
-# Calculate adjusted MELD score and survival probability on training set
-train.meld$updated.meld <- ic_meld + slope_meld*train.meld$MELD.calc
-train.meld$meld.surv.updated <- 1/(1+exp(-train.meld$updated.meld))
+test.data$updated.clif <- ic_clif_c + slope_clif_c*test.data$CLIF.C
+test.data$clif.surv.updated <- 1/(1 + exp(-test.data$updated.clif))
 
-# Calculate adjusted scores on the test set and assess performance
-test.meld$updated.meld <- ic_meld + slope_meld*test.meld$MELD.calc
-test.meld$meld.surv.updated <- 1/(1+exp(-test.meld$updated.meld))
-
-roc_meld_u <- roc(test.meld$D90_surv, test.meld$meld.surv.updated)
-ci.auc(roc_meld_u)
-
-val.prob(test.meld$meld.surv.updated, test.meld$D90_surv, pl = TRUE, smooth = FALSE, logistic.cal = TRUE,
-         xlab = "Predicted survival probability", ylab = "Actual survival probability",
-         legendloc = FALSE, statloc = FALSE)
-
-# Finally, add the updated scores to the full MELD df
-stph.meld$updated.meld <- ic_meld + slope_meld*stph.meld$MELD.calc
-stph.meld$meld.surv.updated <- 1/(1+exp(-stph.meld$updated.meld))
-
-# Lille score re-calibration
-# Split Lille dataframe into training and test
-test.lille <- stph.lille[stph.lille$Subject %in% test.data$Subject,]
-train.lille <- stph.lille[stph.lille$Subject %in% train.data$Subject,]
-
-# Run logistic regression and save regression coefficientss
-lille_regr <- glm(D90_surv ~ LILLE, family = binomial, data = train.lille)
-
-ic_lille <- lille_regr$coefficients[1]
-slope_lille <- lille_regr$coefficients[2]
-
-# Update scores in training set and assess performance
-train.lille$updated.lille <- ic_lille + slope_lille*train.lille$LILLE
-train.lille$lille.surv.updated <- 1/(1 + exp(-train.lille$updated.lille))
-
-# Assess performance on the test set
-test.lille$updated.lille <- ic_lille + slope_lille*test.lille$LILLE
-test.lille$lille.surv.updated <- 1/(1 + exp(-test.lille$updated.lille))
-
-roc_lille_u <- roc(test.lille$D90_surv, test.lille$lille.surv.updated)
-ci.auc(roc_lille_u)
-
-val.prob(test.lille$lille.surv.updated, test.lille$D90_surv, pl = TRUE, smooth = FALSE, logistic.cal = TRUE,
-         xlab = "Predicted survival probability", ylab = "Actual survival probability",
-         legendloc = FALSE, statloc = FALSE)
-
-# Finally, add the updated scores to the full Lille df
-stph.lille$updated.lille <- ic_lille + slope_lille*stph.lille$LILLE
-stph.lille$lille.surv.updated <- 1/(1 + exp(-stph.lille$updated.lille))
-
-# CLIF-C ACLF score re-calibration
-# Split CLIF-C ACLF dataframe into training and test
-test.clif <- stph.clif[stph.clif$Subject %in% test.data$Subject,]
-train.clif <- stph.clif[stph.clif$Subject %in% train.data$Subject,]
-
-# Run logistic regression and save regression coefficientss
-clif_regr <- glm(D90_surv ~ CLIF.C, family = binomial, data = train.clif)
-
-ic_clif <- clif_regr$coefficients[1]
-slope_clif <- clif_regr$coefficients[2]
-
-# Update scores in training set and assess performance
-train.clif$updated.clif <- ic_clif + slope_clif*train.clif$CLIF.C
-train.clif$clif.surv.updated <- 1/(1 + exp(-train.clif$updated.clif))
-
-# Assess performance on the test set
-test.clif$updated.clif <- ic_clif + slope_clif*test.clif$CLIF.C
-test.clif$clif.surv.updated <- 1/(1 + exp(-test.clif$updated.clif))
+#############################################
+######### collect all params ################
+#############################################
+df_meld <- data.frame(intercept = ic_meld_c, slope = slope_meld_c)
+df_meld$Score <- "MELD" 
 
-roc_clif_u <- roc(test.clif$D90_surv, test.clif$clif.surv.updated)
-ci.auc(roc_clif_u)
+df_lille <- data.frame(intercept = ic_lille_c, slope = slope_lille_c)
+df_lille$Score <- "Lille" 
 
-val.prob(test.clif$clif.surv.updated, test.clif$D90_surv, pl = TRUE, smooth = FALSE, logistic.cal = TRUE,
-         xlab = "Predicted survival probability", ylab = "Actual survival probability", 
-         legendloc = FALSE, statloc = FALSE)
+df_clif <- data.frame(intercept = ic_clif_c, slope = slope_clif_c)
+df_clif$Score <- "CLIF-C ACLF" 
 
-# Finally, add the updated scores to the full CLIF-C ACLF df
-stph.clif$updated.clif <- ic_clif + slope_clif*stph.clif$CLIF.C
-stph.clif$clif.surv.updated <- 1/(1 + exp(-stph.clif$updated.clif))
+df <- rbind(df_meld, df_lille, df_clif)
+rownames(df) <- NULL
 
-# Confidence intervals/SEs for the slope and intercept
-# val_prob_confidence is used to get SEs for the calibration intercepts and slopes (and CIs in the plots)
-val_prob_confidence(test.meld$meld.surv.updated, test.meld$D90_surv, pl = TRUE, smooth = FALSE, logistic.cal = TRUE,
-                    xlab = "Predicted survival probability", ylab = "Actual survival probability",
-                    legendloc = T, dostats = TRUE, roundstats = 3)
+library(xtable)
+xtable(df[c(3, 1, 2)])
 
-val_prob_confidence(test.clif$clif.surv.updated, test.clif$D90_surv, pl = TRUE, smooth = FALSE, logistic.cal = TRUE,
-                    xlab = "Predicted survival probability", ylab = "Actual survival probability",
-                    legendloc = T, dostats = TRUE, roundstats = 3)
+#path_to_save <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/updating models/"
+#setwd(path_to_save)
+#save(test.data, file = "recalibrated_models_default.Rdata")
 
-val_prob_confidence(test.lille$lille.surv.updated, test.lille$D90_surv, pl = TRUE, smooth = FALSE, logistic.cal = TRUE,
-                    xlab = "Predicted survival probability", ylab = "Actual survival probability",
-                    legendloc = T, dostats = TRUE, roundstats = 3)
 
 
diff --git a/updating models/recalibrated_models_default.Rdata b/updating models/recalibrated_models_default.Rdata
new file mode 100755
index 0000000..491b659
Binary files /dev/null and b/updating models/recalibrated_models_default.Rdata differ
diff --git a/updating models/recalibrated_models_sens.Rdata b/updating models/recalibrated_models_sens.Rdata
new file mode 100755
index 0000000..441b32f
Binary files /dev/null and b/updating models/recalibrated_models_sens.Rdata differ
diff --git a/updating models/recalibration_sens.R b/updating models/recalibration_sens.R
new file mode 100755
index 0000000..25461bc
--- /dev/null
+++ b/updating models/recalibration_sens.R	
@@ -0,0 +1,121 @@
+
+#############################################   
+############### Sensitivity Analysis  #######
+#############################################
+
+# load full data 
+path_data1 <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/pre-analysis/"
+load(paste0(path_data1, "full_sample.Rdata"))
+
+# load data with original models 
+path_data2 <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/original models/"
+load(paste0(path_data2, "original_models.Rdata"))
+
+#split data on full stph sample and then select the complete cases per model
+
+# Split full sample in training and test observations
+## make sure to use the same seed and fraction as in recalibrate.R script 
+fraction <- 0.7
+
+#dt <- sort(sample(nrow(stph), nrow(stph) * fraction))
+#test.data <- stph[dt,]
+#train.data <- stph[-dt,]
+
+#############################################
+############### MELD score ##################
+#############################################
+# MELD score re-calibration - subset data
+#test.meld <- stph.meld[stph.meld$Subject %in% test.data$Subject,]
+#train.meld <- stph.meld[stph.meld$Subject %in% train.data$Subject,]
+
+set.seed(111) # Set random seed
+dt <- sort(sample(nrow(stph.meld), nrow(stph.meld) * fraction))
+test.meld <- stph.meld[dt,]
+train.meld <- stph.meld[-dt,]
+
+# Fit a logistic regression model on the training data
+meld_regr <- glm(D90_surv ~ MELD.calc, family = binomial("logit"), data = train.meld)
+
+# Save the regression coefficients (alpha and beta)
+ic_meld <- meld_regr$coefficients[1]
+slope_meld <- meld_regr$coefficients[2]
+
+# Update scores on training set
+train.meld$updated.meld <- ic_meld + slope_meld*train.meld$MELD.calc
+train.meld$meld.surv.updated <- 1/(1 + exp(-train.meld$updated.meld))
+
+# Update scores on test set
+test.meld$updated.meld <- ic_meld + slope_meld*test.meld$MELD.calc
+test.meld$meld.surv.updated <- 1/(1 + exp(-test.meld$updated.meld))
+
+#############################################
+############### Lille score #################
+#############################################
+# Lille score re-calibration - subset data
+#test.lille <- stph.lille[stph.lille$Subject %in% test.data$Subject,]
+#train.lille <- stph.lille[stph.lille$Subject %in% train.data$Subject,]
+
+set.seed(111)
+dt <- sort(sample(nrow(stph.lille), nrow(stph.lille) * fraction))
+test.lille <- stph.lille[dt,]
+train.lille <- stph.lille[-dt,]
+
+# Run logistic regression and save regression coefficients
+lille_regr <- glm(D90_surv ~ LILLE, family = binomial("logit"), data = train.lille)
+
+# Save the regression coefficients (alpha and beta)
+ic_lille <- lille_regr$coefficients[1]
+slope_lille <- lille_regr$coefficients[2]
+
+# Update scores on training set
+train.lille$updated.lille <- ic_lille + slope_lille*train.lille$LILLE
+train.lille$lille.surv.updated <- 1/(1 + exp(-train.lille$updated.lille))
+
+# Update scores on test set
+test.lille$updated.lille <- ic_lille + slope_lille*test.lille$LILLE
+test.lille$lille.surv.updated <- 1/(1 + exp(-test.lille$updated.lille))
+
+#############################################
+############### CLIF-C ACLF score ###########
+#############################################
+# CLIF-C ACLF score re-calibration -  subset data
+#test.clif <- stph.clif[stph.clif$Subject %in% test.data$Subject,]
+#train.clif <- stph.clif[stph.clif$Subject %in% train.data$Subject,]
+
+set.seed(111)
+dt <- sort(sample(nrow(stph.clif), nrow(stph.clif) * fraction))
+test.clif <- stph.clif[dt,]
+train.clif <- stph.clif[-dt,]
+
+# Run logistic regression and save regression coefficients
+clif_regr <- glm(D90_surv ~ CLIF.C, family = binomial("logit"), data = train.clif)
+
+# Save the regression coefficients (alpha and beta)
+ic_clif <- clif_regr$coefficients[1]
+slope_clif <- clif_regr$coefficients[2]
+
+# Update scores on training set
+train.clif$updated.clif <- ic_clif + slope_clif*train.clif$CLIF.C
+train.clif$clif.surv.updated <- 1/(1 + exp(-train.clif$updated.clif))
+
+# Assess performance on the test set
+test.clif$updated.clif <- ic_clif + slope_clif*test.clif$CLIF.C
+test.clif$clif.surv.updated <- 1/(1 + exp(-test.clif$updated.clif))
+
+# overall sample size per model 
+nrow(stph.meld); nrow(stph.lille); nrow(stph.clif)
+
+# train/test sample size
+model <- c("MELD", "Lille", "CLIF")
+train_size <- c(nrow(train.meld), nrow(train.lille), nrow(train.clif))
+test_size <- c(nrow(test.meld), nrow(test.lille), nrow(test.clif))
+df <- data.frame(model, train_size, test_size)
+df
+
+library(xtable)
+xtable(t(df))
+
+#path_to_save <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/updating models/"
+#setwd(path_to_save)
+#save(test.meld, test.lille, test.clif, file = "recalibrated_models_sens.Rdata")
+
diff --git a/updating models/val/Calibration_severity.R b/updating models/val/Calibration_severity.R
new file mode 100644
index 0000000..38ad8ed
--- /dev/null
+++ b/updating models/val/Calibration_severity.R	
@@ -0,0 +1,165 @@
+
+library(ggplot2)
+library(dplyr)
+library(survminer) # for plotting theme
+
+path_funs <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/funs"
+source(paste0(path_funs, "/calibration_fun.R"))
+source(paste0(path_funs, "/val.prob.ci.2_wrapper.R"))
+
+path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/original models/val_data"
+load(paste0(path_data, "/models_validation.Rdata"))
+
+#############################################   
+############### Calculate calibration  ######
+############################################# 
+
+# stratify by severity
+severity <- "non-severe" # "severe" or non-severe
+
+if(severity == "severe"){
+    data_meld <- Global_AlcHep.meld %>% filter(`DF at admission` >= 32)
+    data_lille <- Global_AlcHep.lille %>% filter(`DF at admission` >= 32)
+    }else{
+        data_meld <- Global_AlcHep.meld %>% filter(`DF at admission` < 32)
+        data_lille <- Global_AlcHep.lille %>% filter(`DF at admission` < 32)
+        }
+
+
+############################################# 
+# MELD 1 survival function
+cal_MELD.surv <- calibration(data_meld$MELD.surv, y = data_meld$D90_surv)
+cal_MELD.surv$Score <- "MELD 1"
+
+# MELD_2 survival function
+cal_MELD.surv2 <- calibration(data_meld$MELD.surv2, y = data_meld$D90_surv)
+cal_MELD.surv2$Score <- "MELD 2"
+
+# MELD_2 survival function
+cal_MELD.surv2 <- calibration(data_meld$MELD.surv2, y = data_meld$D90_surv)
+cal_MELD.surv2$Score <- "MELD 2"
+
+# MELD VanDerwerken 
+cal_MELD.VanDerwerken <- calibration(data_meld$MELD_Van, y = data_meld$D90_surv)
+cal_MELD.VanDerwerken$Score <- "MELD VanDerwerken"
+
+# MELD updated
+cal_MELD.updated <- calibration(data_meld$meld.surv.updated, y = data_meld$D90_surv)
+cal_MELD.updated$Score <- "MELD updated"
+
+############################################# 
+# Lille
+cal_Lille <- calibration(data_lille$Lille.surv, y = data_lille$D90_surv)
+cal_Lille$Score <- "Lille original"
+
+# Lille updated
+cal_Lille.updated <- calibration(data_lille$lille.surv.updated, y = data_lille$D90_surv)
+cal_Lille.updated$Score <- "Lille updated"
+
+# combine dfs
+df_cal <- rbind(cal_MELD.surv, cal_MELD.surv2, cal_MELD.VanDerwerken, cal_MELD.updated, cal_Lille, cal_Lille.updated)
+
+df_cal <- df_cal %>% mutate(Status = ifelse(Score == "Lille updated" | Score == "MELD updated", "Updated", "Original"))
+
+#############################################
+
+if(severity == "severe"){
+    df_cal_severe <- df_cal
+    df_cal_severe$severity <- "Severe AH"
+    }else{
+        df_cal_non_severe <- df_cal
+        df_cal_non_severe$severity <- "Non-severe AH"
+}
+
+df_cal_final <- rbind(df_cal_severe, df_cal_non_severe)
+
+#############################################
+
+# plot with ribbon 
+# display.brewer.pal(n = 8, name = 'Dark2')
+col_updated <- brewer.pal(n = 8, name = "Dark2")[c(2, 3)]
+
+p_calibration_updated <- df_cal_final %>% filter(Status == "Updated") %>%
+    ggplot(., aes(x = pred, y = obs, col = Score)) +
+    geom_line(lwd = 1)  + 
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    geom_ribbon(aes(ymin = lower, ymax = upper, fill = Score, linetype = NA),  
+                alpha = 0.3, show.legend = F) + 
+    #scale_color_brewer(palette = "Dark2") +
+    #scale_fill_brewer(palette = "Dark2") +
+    scale_fill_manual("", values = col_updated) + 
+    scale_color_manual(name = "Score", values = col_updated) +
+    facet_grid(severity ~ Score) +
+    coord_equal() +
+    xlim(0, 1) + 
+    ylim(0, 1) + 
+    ylab("Observed survival proportion") + 
+    xlab("Predicted survival probability") + 
+    theme_classic2() +
+    theme(legend.position = "none") +
+    scale_x_continuous(labels = c(0, 0.25, 0.5, 0.75, 1))
+
+p_calibration_updated
+
+col_original <- brewer.pal(n = 8, name = "Dark2")[c(2, 4, 5, 3)]
+
+p_calibration_original <- df_cal_final %>% filter(Status == "Original") %>%
+    ggplot(., aes(x = pred, y = obs, col = Score)) +
+    geom_line(lwd = 1)  + 
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    geom_ribbon(aes(ymin = lower, ymax = upper, fill = Score, linetype = NA),  
+                alpha = 0.3, show.legend = F) + 
+    #scale_color_brewer(palette = "Dark2") +
+    #scale_fill_brewer(palette = "Dark2") +
+    scale_fill_manual("", values = col_original) + 
+    scale_color_manual(name = "Score", values = col_original) + 
+    facet_grid(severity ~ Score) +
+    coord_equal() +
+    xlim(0, 1) + 
+    ylim(0, 1) + 
+    ylab("Observed survival proportion") + 
+    xlab(" ") + 
+    theme_classic2() +
+    theme(legend.position = "none") +
+    #scale_y_continuous(labels = scientific)
+    scale_x_continuous(labels = c(0, 0.25, 0.5, 0.75, 1.00))
+
+p_calibration_original
+
+ggarrange(p_calibration_original, p_calibration_updated, nrow = 1)
+
+cowplot::plot_grid(p_calibration_original, p_calibration_updated, nrow = 1)
+
+
+# re-arrange factor levels
+df_cal_final2 <- df_cal_final %>% 
+    mutate(Score = factor(Score, 
+                          levels = c("Lille updated", "MELD updated", "Lille original", "MELD VanDerwerken", "MELD 1", "MELD 2")))
+
+display.brewer.pal(n = 8, name = 'Paired')
+
+col_2 <- brewer.pal(n = 8, name = "Paired")[c(1, 2, 5, 6, 7, 8)]
+
+df_cal_final2 %>% #filter(Status == "Original") %>%
+    ggplot(., aes(x = pred, y = obs, col = Score)) +
+    geom_line(lwd = 1)  + 
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    geom_ribbon(aes(ymin = lower, ymax = upper, fill = Score, linetype = NA),  
+                alpha = 0.3, show.legend = F) + 
+    #scale_color_brewer(palette = "Paired") +
+    #scale_fill_brewer(palette = "Paired") +
+    scale_fill_manual("", values = col_2) + 
+    scale_color_manual(name = "Score", values = col_2) + 
+    facet_grid(severity ~ Score) +
+    coord_equal() +
+    xlim(0, 1) + 
+    ylim(0, 1) + 
+    ylab("Observed survival proportion") + 
+    xlab("Predicted survival probability") + 
+    #theme_minimal() +
+    theme_classic2() +
+    theme(legend.position = "none") +
+    #scale_y_continuous(labels = scientific)
+    scale_x_continuous(labels = c(0, 0.25, 0.5, 0.75, 1.00))
+
+
diff --git a/updating models/val/Calibration_temporal.R b/updating models/val/Calibration_temporal.R
new file mode 100644
index 0000000..79c8085
--- /dev/null
+++ b/updating models/val/Calibration_temporal.R	
@@ -0,0 +1,125 @@
+
+# This script evaluated all calculations of model calibration
+library(ggplot2)
+library(dplyr)
+library(survminer) # for plotting theme
+
+path_funs <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/funs"
+source(paste0(path_funs, "/calibration_fun.R"))
+source(paste0(path_funs, "/val.prob.ci.2_wrapper.R"))
+
+path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/original models/val_data"
+load(paste0(path_data, "/models_validation.Rdata"))
+
+#############################################   
+############### Calculate calibration  ######
+############################################# 
+
+data_meld <- Global_AlcHep.meld %>% filter(`Date of admission`> "2015-01-01")
+data_lille <- Global_AlcHep.lille %>% filter(`Date of admission`> "2015-01-01")
+
+############################################# 
+# MELD 1 survival function
+cal_MELD.surv <- calibration(data_meld$MELD.surv, y = data_meld$D90_surv)
+cal_MELD.surv$Score <- "MELD 1"
+
+# MELD_2 survival function
+cal_MELD.surv2 <- calibration(data_meld$MELD.surv2, y = data_meld$D90_surv)
+cal_MELD.surv2$Score <- "MELD 2"
+
+# MELD_2 survival function
+cal_MELD.surv2 <- calibration(data_meld$MELD.surv2, y = data_meld$D90_surv)
+cal_MELD.surv2$Score <- "MELD 2"
+
+# MELD VanDerwerken 
+cal_MELD.VanDerwerken <- calibration(data_meld$MELD_Van, y = data_meld$D90_surv)
+cal_MELD.VanDerwerken$Score <- "MELD VanDerwerken"
+
+# MELD updated
+cal_MELD.updated <- calibration(data_meld$meld.surv.updated, y = data_meld$D90_surv)
+cal_MELD.updated$Score <- "MELD updated"
+
+############################################# 
+# Lille
+cal_Lille <- calibration(data_lille$Lille.surv, y = data_lille$D90_surv)
+cal_Lille$Score <- "Lille original"
+
+# Lille updated
+cal_Lille.updated <- calibration(data_lille$lille.surv.updated, y = data_lille$D90_surv)
+cal_Lille.updated$Score <- "Lille updated"
+
+# combine dfs
+df_cal <- rbind(cal_MELD.surv, cal_MELD.surv2, cal_MELD.VanDerwerken, cal_MELD.updated, cal_Lille, cal_Lille.updated)
+
+df_cal <- df_cal %>% mutate(Status = ifelse(Score == "Lille updated" | Score == "MELD updated", "Updated", "Original"))
+
+############################################# 
+# plot without ribbon and without MELD 3.0
+df_cal %>%
+    ggplot(., aes(x = pred, y = obs, col = Score)) +
+    geom_line(lwd = 1)  + 
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    #geom_ribbon(aes(ymin = lower, ymax = upper, linetype = NA), 
+    #            alpha = 0.3, show.legend = F) + 
+    #scale_fill_manual("", values = col) + 
+    #scale_color_manual(name = "Score", values = col) + 
+    xlim(0, 1) + 
+    ylim(0, 1) + 
+    ylab("Observed proportion") + 
+    xlab("Predicted probability") + 
+    theme_classic() 
+
+# plot with ribbon 
+# display.brewer.pal(n = 8, name = 'Dark2')
+col_updated <- brewer.pal(n = 8, name = "Dark2")[c(2, 3)]
+
+p_calibration_updated <- df_cal %>% filter(Status == "Updated") %>%
+    ggplot(., aes(x = pred, y = obs, col = Score)) +
+    geom_line(lwd = 1)  + 
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    geom_ribbon(aes(ymin = lower, ymax = upper, fill = Score, linetype = NA),  
+                alpha = 0.3, show.legend = F) + 
+    #scale_color_brewer(palette = "Dark2") +
+    #scale_fill_brewer(palette = "Dark2") +
+    scale_fill_manual("", values = col_updated) + 
+    scale_color_manual(name = "Score", values = col_updated) +
+    facet_grid(. ~ Score) +
+    coord_equal() +
+    xlim(0, 1) + 
+    ylim(0, 1) + 
+    ylab("Observed survival proportion") + 
+    xlab("Predicted survival probability") + 
+    theme_classic2() +
+    theme(legend.position = "none") +
+    scale_x_continuous(labels = c(0, 0.25, 0.5, 0.75, 1))
+
+p_calibration_updated
+
+col_original <- brewer.pal(n = 8, name = "Dark2")[c(2, 4, 5, 3)]
+
+p_calibration_original <- df_cal %>% filter(Status == "Original") %>%
+    ggplot(., aes(x = pred, y = obs, col = Score)) +
+    geom_line(lwd = 1)  + 
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    geom_ribbon(aes(ymin = lower, ymax = upper, fill = Score, linetype = NA),  
+                alpha = 0.3, show.legend = F) + 
+    #scale_color_brewer(palette = "Dark2") +
+    #scale_fill_brewer(palette = "Dark2") +
+    scale_fill_manual("", values = col_original) + 
+    scale_color_manual(name = "Score", values = col_original) + 
+    facet_grid(. ~ Score) +
+    coord_equal() +
+    xlim(0, 1) + 
+    ylim(0, 1) + 
+    ylab("Observed survival proportion") + 
+    xlab(" ") + 
+    theme_classic2() +
+    theme(legend.position = "none") +
+    #scale_y_continuous(labels = scientific)
+    scale_x_continuous(labels = c(0, 0.25, 0.5, 0.75, 1.00))
+
+p_calibration_original
+
+ggarrange(p_calibration_original, p_calibration_updated, nrow = 2)
+
+cowplot::plot_grid(p_calibration_original, p_calibration_updated, nrow = 2, scale = 1)
\ No newline at end of file
diff --git a/updating models/val/Calibration_validation.R b/updating models/val/Calibration_validation.R
new file mode 100644
index 0000000..f959a66
--- /dev/null
+++ b/updating models/val/Calibration_validation.R	
@@ -0,0 +1,172 @@
+
+# This script evaluated all calculations of model calibration
+library(ggplot2)
+library(dplyr)
+library(survminer) # for plotting theme
+library(RColorBrewer)
+
+path_funs <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/funs"
+source(paste0(path_funs, "/calibration_fun.R"))
+source(paste0(path_funs, "/val.prob.ci.2_wrapper.R"))
+
+path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/original models/val_data"
+load(paste0(path_data, "/models_validation.Rdata"))
+
+#############################################   
+############### Calculate calibration  ######
+############################################# 
+
+data_to_use <- "all" # all, severe, non_severe "no"
+
+if(data_to_use == "all"){
+    data_meld <- Global_AlcHep.meld
+    data_lille <- Global_AlcHep.lille
+    }else if(data_to_use == "severe"){
+        data_meld <- Global_AlcHep.meld %>% filter(`DF at admission` >= 32)
+        data_lille <- Global_AlcHep.lille %>% filter(`DF at admission` >= 32)
+        }else if(data_to_use == "non_severe"){
+            data_meld <- Global_AlcHep.meld %>% filter(`DF at admission` < 32)
+            data_lille <- Global_AlcHep.lille %>% filter(`DF at admission` < 32)
+        }
+
+############################################# 
+# MELD 1 survival function
+cal_MELD.surv <- calibration(data_meld$MELD.surv, y = data_meld$D90_surv)
+cal_MELD.surv$Score <- "MELD 1"
+
+# MELD_2 survival function
+cal_MELD.surv2 <- calibration(data_meld$MELD.surv2, y = data_meld$D90_surv)
+cal_MELD.surv2$Score <- "MELD 2"
+
+# MELD VanDerwerken 
+cal_MELD.VanDerwerken <- calibration(data_meld$MELD_Van, y = data_meld$D90_surv)
+cal_MELD.VanDerwerken$Score <- "MELD VanDerwerken"
+
+# MELD updated
+cal_MELD.updated <- calibration(data_meld$meld.surv.updated, y = data_meld$D90_surv)
+cal_MELD.updated$Score <- "MELD updated"
+
+############################################# 
+# Lille
+cal_Lille <- calibration(data_lille$Lille.surv, y = data_lille$D90_surv)
+cal_Lille$Score <- "Lille original"
+
+# Lille updated
+cal_Lille.updated <- calibration(data_lille$lille.surv.updated, y = data_lille$D90_surv)
+cal_Lille.updated$Score <- "Lille updated"
+
+# combine dfs
+df_cal <- rbind(cal_MELD.surv, cal_MELD.surv2, cal_MELD.VanDerwerken, cal_MELD.updated, cal_Lille, cal_Lille.updated)
+
+df_cal <- df_cal %>% mutate(Status = ifelse(Score == "Lille updated" | Score == "MELD updated", "Updated", "Original"))
+
+############################################# 
+# plot without ribbon and without MELD 3.0
+df_cal %>%
+    ggplot(., aes(x = pred, y = obs, col = Score)) +
+    geom_line(lwd = 1)  + 
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    #geom_ribbon(aes(ymin = lower, ymax = upper, linetype = NA), 
+    #            alpha = 0.3, show.legend = F) + 
+    #scale_fill_manual("", values = col) + 
+    #scale_color_manual(name = "Score", values = col) + 
+    xlim(0, 1) + 
+    ylim(0, 1) + 
+    ylab("Observed proportion") + 
+    xlab("Predicted probability") + 
+    theme_classic() 
+
+# plot with ribbon 
+# display.brewer.pal(n = 8, name = 'Dark2')
+col_updated <- brewer.pal(n = 8, name = "Dark2")[c(2, 3)]
+
+p_calibration_updated <- df_cal %>% filter(Status == "Updated") %>%
+    ggplot(., aes(x = pred, y = obs, col = Score)) +
+    geom_line(lwd = 1)  + 
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    geom_ribbon(aes(ymin = lower, ymax = upper, fill = Score, linetype = NA),  
+                alpha = 0.3, show.legend = F) + 
+    #scale_color_brewer(palette = "Dark2") +
+    #scale_fill_brewer(palette = "Dark2") +
+    scale_fill_manual("", values = col_updated) + 
+    scale_color_manual(name = "Score", values = col_updated) +
+    facet_grid(. ~ Score) +
+    coord_equal() +
+    xlim(0, 1) + 
+    ylim(0, 1) + 
+    ylab("Observed survival proportion") + 
+    xlab("Predicted survival probability") + 
+    theme_classic2() +
+    theme(legend.position = "none") +
+    scale_x_continuous(labels = c(0, 0.25, 0.5, 0.75, 1))
+
+p_calibration_updated
+
+col_original <- brewer.pal(n = 8, name = "Dark2")[c(2, 4, 5, 3)]
+
+p_calibration_original <- df_cal %>% filter(Status == "Original") %>%
+    ggplot(., aes(x = pred, y = obs, col = Score)) +
+    geom_line(lwd = 1)  + 
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    geom_ribbon(aes(ymin = lower, ymax = upper, fill = Score, linetype = NA),  
+                alpha = 0.3, show.legend = F) + 
+    #scale_color_brewer(palette = "Dark2") +
+    #scale_fill_brewer(palette = "Dark2") +
+    scale_fill_manual("", values = col_original) + 
+    scale_color_manual(name = "Score", values = col_original) + 
+    facet_grid(. ~ Score) +
+    coord_equal() +
+    xlim(0, 1) + 
+    ylim(0, 1) + 
+    ylab("Observed survival proportion") + 
+    xlab(" ") + 
+    theme_classic2() +
+    theme(legend.position = "none") +
+    #scale_y_continuous(labels = scientific)
+    scale_x_continuous(labels = c(0, 0.25, 0.5, 0.75, 1.00))
+
+p_calibration_original
+
+ggarrange(p_calibration_original, p_calibration_updated, nrow = 2)
+
+cowplot::plot_grid(p_calibration_original, p_calibration_updated, nrow = 2, scale = 1)
+
+#############################################   
+############## Summary Stats  ###############
+############################################# 
+
+# compute calibration slopes and intercepts
+# MELD updated
+MELD_stats <- val.prob.ci.2_wrapper(data_meld$meld.surv.updated, data_meld$D90_surv, "MELD updated")
+
+# Lille updated
+Lille_stats <- val.prob.ci.2_wrapper(data_lille$lille.surv.updated, data_lille$D90_surv, "Lille updated")
+
+df_stats <- rbind(MELD_stats, Lille_stats) %>% relocate(Score)
+
+p_calibration_updated + geom_text(data = df_stats %>% filter(stat == "intercept"), 
+                          aes(0.1, 0.95, label = 
+                                  paste0("Intercept (95% CI): ", Point.estimate, " (", Lower.confidence.limit, "-", Upper.confidence.limit, ")"), 
+                              hjust = 0), col = "black") +
+    geom_text(data = df_stats %>% filter(stat == "slope"), 
+              aes(0.1, 0.90, label = 
+                      paste0("Slope (95% CI): ", Point.estimate, " (", Lower.confidence.limit, "-", Upper.confidence.limit, ")"), 
+                  hjust = 0), col = "black")
+
+
+##### original models 
+# MELD 
+MELD_stats1 <- val.prob.ci.2_wrapper(data_meld$MELD.surv, data_meld$D90_surv, "MELD 1")
+MELD_stats2 <- val.prob.ci.2_wrapper(data_meld$MELD.surv2, data_meld$D90_surv, "MELD 2")
+MELD_Van <- val.prob.ci.2_wrapper(data_meld$MELD_Van, data_meld$D90_surv, "MELD Van")
+
+# Lille updated
+Lille_stats_orig <- val.prob.ci.2_wrapper(data_lille$Lille.surv, data_lille$D90_surv, "Lille")
+
+df_original_stats <- rbind(MELD_stats1,MELD_stats2, MELD_Van,  Lille_stats_orig)
+
+df_original_stats %>% filter(stat == "slope")
+library(xtable)
+xtable((df_original_stats))
+
+
diff --git a/updating models/val/Discrimiation_validation.R b/updating models/val/Discrimiation_validation.R
new file mode 100644
index 0000000..ec24340
--- /dev/null
+++ b/updating models/val/Discrimiation_validation.R	
@@ -0,0 +1,130 @@
+library(pROC)
+library(dplyr)
+library(ggplot2)
+library(purrr)
+library(tidyr)
+library(forcats)
+#library(ggROC)
+
+# load data
+path_data <- "/Users/work/IDrive-Sync/Projects/MIMAH/code/AH_code/AH_code/original models/val_data"
+load(paste0(path_data, "/models_validation.Rdata"))
+
+#############################################   
+############### Calculate AUCs  #############
+#############################################  
+
+data_to_use <- "all" # all, severe, non-severe "no"
+
+if(data_to_use == "all"){
+    data_meld <- Global_AlcHep.meld
+    data_lille <- Global_AlcHep.lille
+}else if(data_to_use == "severe"){
+    data_meld <- Global_AlcHep.meld %>% filter(`DF at admission` >= 32)
+    data_lille <- Global_AlcHep.lille %>% filter(`DF at admission` >= 32)
+}else if(data_to_use == "non-severe"){
+    data_meld <- Global_AlcHep.meld %>% filter(`DF at admission` < 32)
+    data_lille <- Global_AlcHep.lille %>% filter(`DF at admission` < 32)
+}
+
+############################################# 
+# MELD original
+roc_meld_original <- roc(data_meld$D90_surv, data_meld$MELD.surv)
+
+# MELD updated
+roc_meld_updated <- roc(data_meld$D90_surv, data_meld$meld.surv.updated)
+
+# Lille original
+roc_lille_original <- roc(data_lille$D90_surv, data_lille$LILLE)
+
+# Lille updated
+roc_lille_updated <- roc(data_lille$D90_surv, data_lille$lille.surv.updated)
+
+#############################################   
+###################### Plots  ###############
+############################################# 
+roc.list <- list(
+    "Lille original" = roc_lille_original,
+    "Lille updated" = roc_lille_updated, 
+    "MELD original" = roc_meld_original,
+    "MELD updated" = roc_meld_updated)
+
+#setwd(path_data)
+#save(roc.list, file = "ROC_updated.Rdata")
+
+g.list <- ggroc(roc.list)
+
+# ROC plot of all models combined
+g.list +
+    geom_line(lwd = 1) +
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") + 
+    theme_classic()
+
+# faceting 
+g.list + 
+    facet_grid(. ~ name) + 
+    geom_line(lwd = 1) + 
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") +
+    theme_classic() + 
+    theme(legend.position="none") 
+
+# add confidence bands
+ci.list <- lapply(roc.list, ci.se, specificities = seq(0, 1, l = 25))
+
+dat.ci.list <- lapply(ci.list, function(ciobj) 
+    data.frame(x = as.numeric(rownames(ciobj)),
+               lower = ciobj[, 1],
+               upper = ciobj[, 3]))
+
+df <- plyr::ldply(dat.ci.list, data.frame, .id = "name")
+
+pl <- ggroc(roc.list, lwd = 1.1) + 
+    facet_grid(. ~ name) +
+    #theme_minimal() + 
+    geom_ribbon(data = df, aes(x = x, ymin = lower, ymax = upper, fill = name), alpha = 0.3, inherit.aes = F) +
+    geom_abline(slope = 1, intercept = 1, linetype = "dashed") + 
+    labs(x = "Specificity", y = "Sensitivity") + 
+    scale_color_brewer(palette = "Dark2") +
+    scale_fill_brewer(palette = "Dark2") +
+    coord_equal() +
+    theme_classic2() +
+    theme(legend.position = "none") 
+
+data_wide$name <- data_wide$condition
+data_wide$low_CL <- round(data_wide$low_CL, 2)
+data_wide$mean <- round(data_wide$mean, 2)
+data_wide$upper_CL <- round(data_wide$upper_CL, 2)
+
+pl + geom_text(data = data_wide, aes(0.01, 0.19, label = paste0("AUC (95% CI): ", mean, " (", low_CL, "-", upper_CL, ")" ), 
+                                     hjust = 1), col = "black")
+
+#############################################   
+###################### AUC  #################
+#############################################
+# CI
+#auc_meld <- auc(roc_meld)
+#auc_meld_ci <- ci.auc(roc_meld) # Confidence intervals
+#roc_plot(stph.meld, "D90_surv", "MELD.surv", ci = TRUE, plot_title = "ROC curve for the MELD score")
+
+df_AUC <- as.data.frame(map_dfr(roc.list, ci.auc))
+rownames(df_AUC) <- c("low_CL", "mean", "upper_CL")
+
+df_AUC2 <- tibble::rownames_to_column(df_AUC, var = "AUC")
+
+df3 <- gather(df_AUC2, condition, measurement, `Lille original`:`MELD updated`, factor_key = TRUE)
+data_wide <- spread(df3, AUC, measurement) %>% arrange(mean)
+
+# reorder factor levels
+data_wide$condition <- fct_reorder(data_wide$condition, data_wide$mean)
+
+ggplot(data_wide, aes(x = mean, y = condition, col = condition)) +
+    geom_point(lwd = 2)  + 
+    coord_cartesian(xlim = c(0.5, 0.86)) +
+    geom_errorbar(aes(xmin = low_CL, xmax = upper_CL), 
+                  alpha = 1, show.legend = F, lwd = 1, width = 0.5) + 
+    labs(y = "Score", col = "Score", x = "AUC with 95% limits") +
+    theme_classic() 
+
+
+pl + geom_text(data = data_wide, aes(0.05, 0.25, label = paste0("AUC (95% CI): ", mean, " (", low_CL, "-", upper_CL, ")" ), 
+                                     hjust = 1), col = "black")
diff --git a/updating models/val_prob_confidence.R b/updating models/val_prob_confidence.R
old mode 100644
new mode 100755