From 9e30ba37f912b80bfaffced8da1ff2216415daf9 Mon Sep 17 00:00:00 2001 From: Daena Rys Date: Tue, 6 May 2025 10:41:37 +0300 Subject: [PATCH 1/8] add getSurvival Signed-off-by: Daena Rys --- DESCRIPTION | 3 +- NAMESPACE | 3 + NEWS | 3 + R/AllGenerics.R | 5 ++ R/getSurvival.R | 157 +++++++++++++++++++++++++++++++++++++++++++++ man/getSurvival.Rd | 107 ++++++++++++++++++++++++++++++ 6 files changed, 277 insertions(+), 1 deletion(-) create mode 100644 R/getSurvival.R create mode 100644 man/getSurvival.Rd diff --git a/DESCRIPTION b/DESCRIPTION index f37559e..86ce5e2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: miaTime Type: Package Title: Microbiome Time Series Analysis -Version: 0.99.8 +Version: 0.99.9 Authors@R: c(person(given = "Leo", family = "Lahti", role = c("aut"), email = "leo.lahti@iki.fi", @@ -25,6 +25,7 @@ Depends: R (>= 4.4.0), mia Imports: + coda4microbiome, dplyr, methods, S4Vectors, diff --git a/NAMESPACE b/NAMESPACE index 3380ac6..eda442d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,7 @@ export(getBimodality) export(getShortTermChange) export(getStability) export(getStepwiseDivergence) +export(getSurvival) exportMethods(addBaselineDivergence) exportMethods(addBimodality) exportMethods(addShortTermChange) @@ -20,12 +21,14 @@ exportMethods(getBimodality) exportMethods(getShortTermChange) exportMethods(getStability) exportMethods(getStepwiseDivergence) +exportMethods(getSurvival) import(S4Vectors) import(SingleCellExperiment) import(SummarizedExperiment) import(TreeSummarizedExperiment) import(methods) import(mia) +importFrom(coda4microbiome,coda_coxnet) importFrom(dplyr,across) importFrom(dplyr,all_of) importFrom(dplyr,all_vars) diff --git a/NEWS b/NEWS index 36184a8..944024a 100755 --- a/NEWS +++ b/NEWS @@ -1,3 +1,6 @@ +Changes in version 0.99.8 ++ Added wrapper for survival analysis + Changes in version 0.99.6 + Final fixes for Bioconductor submission diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 40a420e..7f985b0 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -50,3 +50,8 @@ setGeneric("getStability", signature = "x", function(x, ...) #' @export setGeneric("addStability", signature = "x", function(x, ...) standardGeneric("addStability")) + +#' @rdname getSurvival +#' @export +setGeneric("getSurvival", signature = "x", function(x, ...) + standardGeneric("getSurvival")) diff --git a/R/getSurvival.R b/R/getSurvival.R new file mode 100644 index 0000000..e2106ea --- /dev/null +++ b/R/getSurvival.R @@ -0,0 +1,157 @@ +#' @name +#' getSurvival +#' +#' @export +#' +#' @title +#' Survival analysis +#' +#' @description +#' Survival modeling wrapper for TreeSummarizedExperiment using coda_coxnet +#' +#' @details +#' This function extracts compositional count data and survival metadata from a +#' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}} +#' object and applies penalized Cox regression via \code{\link[coda4microbiome]{coda_coxnet}}, +#' using pairwise log-ratio transformations. +#' +#' +#' @param time.col \code{Character scalar}. Column name in \code{colData(x)} +#' representing time to event or follow-up time. Must be numeric. +#' +#' @param status.col \code{Character scalar}. Column name in \code{colData(x)} +#' representing event occurrence. Accepts numeric (0/1) or logical (FALSE/TRUE) +#' values. +#' +#' @param covar.cols \code{Character vector}. Optional. Specifies covariate +#' columns in \code{colData(x)} to adjust for in the survival model. +#' (Default: \code{NULL}) +#' +#' @param lambda \code{Character or numeric}. Penalization parameter passed to +#' \code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"}, +#' \code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"}) +#' +#' @param nvar \code{Integer scalar}. Optional. Maximum number of variables +#' (log-ratios) to include in the model. (Default: \code{NULL}) +#' +#' @param alpha \code{Numeric scalar}. Elastic net mixing parameter: 1 = Lasso, +#' 0 = Ridge. (Default: \code{0.9}) +#' +#' @param nfolds \code{Integer scalar}. Number of cross-validation folds for +#' \code{cv.glmnet}. (Default: \code{10}) +#' +#' @param showPlots \code{Logical}. If \code{TRUE}, generates plots for +#' cross-validation, risk scores, and selected signature. (Default: \code{TRUE}) +#' +#' @param coef_threshold \code{Numeric scalar}. Minimum absolute value for a +#' coefficient to be included in the final model. (Default: \code{0}) +#' +#' @inheritParams addBaselineDivergence +#' +#' @return A named \code{list} containing: +#' \itemize{ +#' \item \code{taxa.num}: indices of selected taxa +#' \item \code{taxa.name}: names of selected taxa +#' \item \code{log-contrast coefficients}: coefficients of selected taxa in +#' the final log-contrast model +#' \item \code{risk.score}: predicted risk scores +#' \item \code{apparent Cindex}: concordance index on training data +#' \item \code{mean cv-Cindex}: mean cross-validated concordance index +#' \item \code{sd cv-Cindex}: standard deviation of cross-validated +#' concordance index +#' \item \code{risk score plot}: ggplot object showing risk score vs survival +#' \item \code{signature plot}: ggplot object of selected taxa and coefficients +#' } +#' +#' @seealso \code{\link[coda4microbiome]{coda_coxnet}}, +#' \code{\link[glmnet]{cv.glmnet}} +#' +#' @references +#' Meritxell Pujolassos , Antoni Susín , M.Luz Calle (2024). +#' \emph{Microbiome compositional data analysis for survival studies } +#' NAR Genomics and Bioinformatics, 6(2), lqae038. +#' \doi{10.1093/nargab/lqae038} +#' +#' @examples +#' # data(SurvivalData) +#' # tse <- SurvivalData +#' # getSurvival(tse, time.col = "T1Dweek", status.col = "T1D", +#' # covar.cols = c("Sex", "Antibiotics")) +#' +NULL + +#' @rdname getSurvival +#' @export +#' @importFrom coda4microbiome coda_coxnet +setMethod("getSurvival", signature(x = "SummarizedExperiment"), + function( + x, + time.col, + status.col, + assay.type = "counts", + covar.cols = NULL, + lambda = "lambda.1se", + nvar = NULL, + alpha = 0.9, + nfolds = 10, + showPlots = TRUE, + coef_threshold = 0, + ... + ) { + ############################# INPUT CHECK ######################## + x <- .check_and_get_altExp(x, ...) + + # Check required colData fields + .check_input(time.col, list("character scalar"), + colnames(colData(x))) + .check_input(status.col, list("character scalar"), + colnames(colData(x))) + + if (!is.numeric(x[[time.col]])) { + stop("'time.col' must specify a numeric column from colData(x)", + call. = FALSE) + } + if (!is.logical(x[[status.col]]) && !is.numeric(x[[status.col]])) { + stop("'status.col' must be numeric (0/1) or logical (FALSE/TRUE)", + call. = FALSE) + } + + .check_assay_present(assay.type, x) + + if (!is.null(covar.cols)) { + .check_input(covar.cols, list("character vector"), + colnames(colData(x))) + } + + ########################### INPUT CHECK END ###################### + + # Extract matrix + mat <- t(assay(x, assay.type)) + + # Extract survival time and status + time <- as.numeric(x[[time.col]]) + status <- as.numeric(x[[status.col]]) + + # Extract covariates (if specified) + covar <- NULL + if (!is.null(covar.cols)) { + covar <- as.data.frame(colData(x)[, covar.cols, drop = FALSE]) + } + + # Run core model + result <- coda_coxnet( + x = mat, + time = time, + status = status, + covar = covar, + lambda = lambda, + nvar = nvar, + alpha = alpha, + nfolds = nfolds, + showPlots = showPlots, + coef_threshold = coef_threshold + ) + + return(result) + } +) \ No newline at end of file diff --git a/man/getSurvival.Rd b/man/getSurvival.Rd new file mode 100644 index 0000000..4d77ab4 --- /dev/null +++ b/man/getSurvival.Rd @@ -0,0 +1,107 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllGenerics.R, R/getSurvival.R +\name{getSurvival} +\alias{getSurvival} +\alias{getSurvival,SummarizedExperiment-method} +\title{Survival analysis} +\usage{ +getSurvival(x, ...) + +\S4method{getSurvival}{SummarizedExperiment}( + x, + time.col, + status.col, + assay.type = "counts", + covar.cols = NULL, + lambda = "lambda.1se", + nvar = NULL, + alpha = 0.9, + nfolds = 10, + showPlots = TRUE, + coef_threshold = 0, + ... +) +} +\arguments{ +\item{x}{A +\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} +object.} + +\item{...}{Optional arguments passed into +\code{\link[mia:addDivergence]{mia::addDivergence()}}.} + +\item{time.col}{\code{Character scalar}. Column name in \code{colData(x)} +representing time to event or follow-up time. Must be numeric.} + +\item{status.col}{\code{Character scalar}. Column name in \code{colData(x)} +representing event occurrence. Accepts numeric (0/1) or logical (FALSE/TRUE) +values.} + +\item{assay.type}{\code{Character scalar}. Specifies which assay values are +used in the dissimilarity estimation. (Default: \code{"counts"})} + +\item{covar.cols}{\code{Character vector}. Optional. Specifies covariate +columns in \code{colData(x)} to adjust for in the survival model. +(Default: \code{NULL})} + +\item{lambda}{\code{Character or numeric}. Penalization parameter passed to +\code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"}, +\code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"})} + +\item{nvar}{\code{Integer scalar}. Optional. Maximum number of variables +(log-ratios) to include in the model. (Default: \code{NULL})} + +\item{alpha}{\code{Numeric scalar}. Elastic net mixing parameter: 1 = Lasso, +0 = Ridge. (Default: \code{0.9})} + +\item{nfolds}{\code{Integer scalar}. Number of cross-validation folds for +\code{cv.glmnet}. (Default: \code{10})} + +\item{showPlots}{\code{Logical}. If \code{TRUE}, generates plots for +cross-validation, risk scores, and selected signature. (Default: \code{TRUE})} + +\item{coef_threshold}{\code{Numeric scalar}. Minimum absolute value for a +coefficient to be included in the final model. (Default: \code{0})} +} +\value{ +A named \code{list} containing: +\itemize{ +\item \code{taxa.num}: indices of selected taxa +\item \code{taxa.name}: names of selected taxa +\item \code{log-contrast coefficients}: coefficients of selected taxa in +the final log-contrast model +\item \code{risk.score}: predicted risk scores +\item \code{apparent Cindex}: concordance index on training data +\item \code{mean cv-Cindex}: mean cross-validated concordance index +\item \code{sd cv-Cindex}: standard deviation of cross-validated +concordance index +\item \code{risk score plot}: ggplot object showing risk score vs survival +\item \code{signature plot}: ggplot object of selected taxa and coefficients +} +} +\description{ +Survival modeling wrapper for TreeSummarizedExperiment using coda_coxnet +} +\details{ +This function extracts compositional count data and survival metadata from a +\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}} +object and applies penalized Cox regression via \code{\link[coda4microbiome]{coda_coxnet}}, +using pairwise log-ratio transformations. +} +\examples{ +# data(SurvivalData) +# tse <- SurvivalData +# getSurvival(tse, time.col = "T1Dweek", status.col = "T1D", +# covar.cols = c("Sex", "Antibiotics")) + +} +\references{ +Meritxell Pujolassos , Antoni Susín , M.Luz Calle (2024). +\emph{Microbiome compositional data analysis for survival studies } +NAR Genomics and Bioinformatics, 6(2), lqae038. +\doi{10.1093/nargab/lqae038} +} +\seealso{ +\code{\link[coda4microbiome]{coda_coxnet}}, +\code{\link[glmnet]{cv.glmnet}} +} From 2b1fead1e8116e1abd244ec10e74fc502bc930fe Mon Sep 17 00:00:00 2001 From: Daena Rys Date: Fri, 22 Aug 2025 14:21:12 +0300 Subject: [PATCH 2/8] up Signed-off-by: Daena Rys --- NAMESPACE | 5 +- R/getSurvival.R | 306 ++++++++++++++++++++++++++++++++------------- man/getSurvival.Rd | 55 ++++---- 3 files changed, 245 insertions(+), 121 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index eda442d..3e71997 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -28,7 +28,6 @@ import(SummarizedExperiment) import(TreeSummarizedExperiment) import(methods) import(mia) -importFrom(coda4microbiome,coda_coxnet) importFrom(dplyr,across) importFrom(dplyr,all_of) importFrom(dplyr,all_vars) @@ -47,9 +46,13 @@ importFrom(dplyr,summarize) importFrom(dplyr,sym) importFrom(dplyr,ungroup) importFrom(dplyr,vars) +importFrom(glmnet,cv.glmnet) +importFrom(glmnet,glmnet) importFrom(stats,coef) importFrom(stats,lm) importFrom(stats,median) importFrom(stats,setNames) +importFrom(survival,Surv) +importFrom(survival,coxph) importFrom(tidyr,pivot_wider) importFrom(tidyr,unnest) diff --git a/R/getSurvival.R b/R/getSurvival.R index e2106ea..a764a52 100644 --- a/R/getSurvival.R +++ b/R/getSurvival.R @@ -7,14 +7,11 @@ #' Survival analysis #' #' @description -#' Survival modeling wrapper for TreeSummarizedExperiment using coda_coxnet -#' -#' @details -#' This function extracts compositional count data and survival metadata from a -#' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}} -#' object and applies penalized Cox regression via \code{\link[coda4microbiome]{coda_coxnet}}, -#' using pairwise log-ratio transformations. -#' +#' Fit a (penalized) Cox proportional hazards model on microbiome data contained +#' in a SummarizedExperiment object. +#' Data transformations (e.g. pairwise log-ratios) should be handled upstream +#' (e.g. with mia::transformAssay). This function focuses on the statistical +#' model. #' #' @param time.col \code{Character scalar}. Column name in \code{colData(x)} #' representing time to event or follow-up time. Must be numeric. @@ -26,41 +23,38 @@ #' @param covar.cols \code{Character vector}. Optional. Specifies covariate #' columns in \code{colData(x)} to adjust for in the survival model. #' (Default: \code{NULL}) +#' +#' @param penalized \code{Logical}. If TRUE, fit penalized Cox regression +#' using \code{glmnet}. If FALSE, fit standard Cox model using +#' \code{survival::coxph}. (Default: \code{TRUE}) #' #' @param lambda \code{Character or numeric}. Penalization parameter passed to #' \code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"}, #' \code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"}) #' -#' @param nvar \code{Integer scalar}. Optional. Maximum number of variables -#' (log-ratios) to include in the model. (Default: \code{NULL}) -#' #' @param alpha \code{Numeric scalar}. Elastic net mixing parameter: 1 = Lasso, #' 0 = Ridge. (Default: \code{0.9}) #' #' @param nfolds \code{Integer scalar}. Number of cross-validation folds for #' \code{cv.glmnet}. (Default: \code{10}) -#' -#' @param showPlots \code{Logical}. If \code{TRUE}, generates plots for -#' cross-validation, risk scores, and selected signature. (Default: \code{TRUE}) -#' +#' +#' @param nvar \code{Integer scalar}. Optional. Maximum number of variables +#' (log-ratios) to include in the model. (Default: \code{NULL}) +#' #' @param coef_threshold \code{Numeric scalar}. Minimum absolute value for a #' coefficient to be included in the final model. (Default: \code{0}) #' #' @inheritParams addBaselineDivergence #' -#' @return A named \code{list} containing: +#' @return A list with model summaries: #' \itemize{ -#' \item \code{taxa.num}: indices of selected taxa -#' \item \code{taxa.name}: names of selected taxa -#' \item \code{log-contrast coefficients}: coefficients of selected taxa in -#' the final log-contrast model -#' \item \code{risk.score}: predicted risk scores -#' \item \code{apparent Cindex}: concordance index on training data -#' \item \code{mean cv-Cindex}: mean cross-validated concordance index -#' \item \code{sd cv-Cindex}: standard deviation of cross-validated -#' concordance index -#' \item \code{risk score plot}: ggplot object showing risk score vs survival -#' \item \code{signature plot}: ggplot object of selected taxa and coefficients +#' \item \code{coefficients}: estimated model coefficients +#' \item \code{selected.features}: nonzero features in penalized model +#' \item \code{risk.scores}: predicted risk scores +#' \item \code{cindex.apparent}: apparent concordance index +#' \item \code{cv.cindex.mean}: mean cross-validated C-index (if penalized) +#' \item \code{cv.cindex.sd}: SD of cross-validated C-index (if penalized) +#' \item \code{fit}: fitted model object (coxph or cv.glmnet) #' } #' #' @seealso \code{\link[coda4microbiome]{coda_coxnet}}, @@ -82,7 +76,8 @@ NULL #' @rdname getSurvival #' @export -#' @importFrom coda4microbiome coda_coxnet +#' @importFrom survival Surv coxph +#' @importFrom glmnet cv.glmnet glmnet setMethod("getSurvival", signature(x = "SummarizedExperiment"), function( x, @@ -90,68 +85,201 @@ setMethod("getSurvival", signature(x = "SummarizedExperiment"), status.col, assay.type = "counts", covar.cols = NULL, - lambda = "lambda.1se", - nvar = NULL, - alpha = 0.9, - nfolds = 10, - showPlots = TRUE, - coef_threshold = 0, ... ) { - ############################# INPUT CHECK ######################## - x <- .check_and_get_altExp(x, ...) - - # Check required colData fields - .check_input(time.col, list("character scalar"), - colnames(colData(x))) - .check_input(status.col, list("character scalar"), - colnames(colData(x))) + # input checks + x <- .check_data_for_survival(x, time.col, + status.col, covar.cols, assay.type, ...) + + # extract data + dat <- .get_data_for_survival(x, time.col, + status.col, covar.cols, assay.type) + + # fit survival model + res <- .calc_survival( + mat = dat$mat, + time = dat$time, + status = dat$status, + covar = dat$covar, + ... + ) - if (!is.numeric(x[[time.col]])) { - stop("'time.col' must specify a numeric column from colData(x)", - call. = FALSE) - } - if (!is.logical(x[[status.col]]) && !is.numeric(x[[status.col]])) { - stop("'status.col' must be numeric (0/1) or logical (FALSE/TRUE)", - call. = FALSE) - } - - .check_assay_present(assay.type, x) - - if (!is.null(covar.cols)) { - .check_input(covar.cols, list("character vector"), - colnames(colData(x))) - } - - ########################### INPUT CHECK END ###################### - - # Extract matrix - mat <- t(assay(x, assay.type)) - - # Extract survival time and status - time <- as.numeric(x[[time.col]]) - status <- as.numeric(x[[status.col]]) - - # Extract covariates (if specified) - covar <- NULL - if (!is.null(covar.cols)) { - covar <- as.data.frame(colData(x)[, covar.cols, drop = FALSE]) - } - - # Run core model - result <- coda_coxnet( - x = mat, - time = time, - status = status, - covar = covar, - lambda = lambda, - nvar = nvar, - alpha = alpha, - nfolds = nfolds, - showPlots = showPlots, - coef_threshold = coef_threshold - ) - - return(result) + return(res) } -) \ No newline at end of file +) + +############################# internal helpers ################################# + +.check_data_for_survival <- function(x, time.col, + status.col, covar.cols, assay.type, ...) +{ + x <- .check_and_get_altExp(x, ...) + .check_input(time.col, list("character scalar"), colnames(colData(x))) + .check_input(status.col, list("character scalar"), colnames(colData(x))) + .check_assay_present(assay.type, x) + + if (!is.numeric(x[[time.col]])) { + stop("'time.col' must be numeric", call. = FALSE) + } + if (!is.logical(x[[status.col]]) && !is.numeric(x[[status.col]])) { + stop("'status.col' must be numeric (0/1) or logical", call. = FALSE) + } + if (!is.null(covar.cols)) { + .check_input(covar.cols, list("character vector"), colnames(colData(x))) + } + + return(x) +} + +.get_data_for_survival <- function(x, time.col, + status.col, covar.cols, assay.type) +{ + mat <- t(assay(x, assay.type)) + time <- as.numeric(x[[time.col]]) + status <- as.numeric(x[[status.col]]) + + covar <- NULL + if (!is.null(covar.cols)) { + covar <- as.data.frame(colData(x)[, covar.cols, drop = FALSE]) + } + + list( + mat = mat, + time = time, + status = status, + covar = covar + ) +} + +.calc_survival <- function(mat, time, status, covar = NULL, + penalized = TRUE, alpha = 0.9, nfolds = 10, + lambda = "lambda.1se", nvar = NULL, + coef_threshold = 0, ...) +{ + y <- survival::Surv(time, status) + + if (penalized) { + ## --- penalized Cox with glmnet --- + if (is.null(covar)) { + cvfit <- glmnet::cv.glmnet( + x = mat, + y = y, + family = "cox", + type.measure = "C", + alpha = alpha, + nfolds = nfolds, + keep = TRUE + ) + offset <- NULL + } else { + ## fit Cox model for covariates -> use offset + df0 <- data.frame(time = time, status = status, covar) + model0 <- survival::coxph(Surv(time, status) ~ ., data = df0) + offset <- predict(model0) + cvfit <- glmnet::cv.glmnet( + x = mat, + y = y, + family = "cox", + type.measure = "C", + alpha = alpha, + nfolds = nfolds, + keep = TRUE, + offset = offset + ) + } + + ## --- nvar cutoff handling --- + if (!is.null(nvar)) { + rowlasso <- max(which(cvfit$glmnet.fit$df <= nvar)) + lambda <- cvfit$glmnet.fit$lambda[rowlasso] + } + + ## --- lambda selection --- + if (is.character(lambda)) { + if (lambda == "lambda.1se") lambda.value <- cvfit$lambda.1se + if (lambda == "lambda.min") lambda.value <- cvfit$lambda.min + } else { + lambda.value <- lambda + } + + ## --- coefficients --- + coefs <- as.vector(coef(cvfit, s = lambda.value)) + names(coefs) <- rownames(coef(cvfit, s = lambda.value)) + selected <- which(abs(coefs) > coef_threshold) + coef.filtered <- coefs[selected] + + ## normalize coefficients + if (length(coef.filtered) > 0) { + coef.filtered <- 2 * coef.filtered / sum(abs(coef.filtered)) + } + + ## --- predictions --- + if (is.null(offset)) { + risk.scores <- as.numeric(predict(cvfit, mat, s = lambda.value)) + } else { + risk.scores <- as.numeric(predict(cvfit, mat, s = lambda.value, + newoffset = offset)) + } + + ## --- apparent C-index --- + if (length(coef.filtered) == 0) { + Cindex_signature <- 0.5 + } else { + Cindex_signature <- glmnet::Cindex(pred = risk.scores, y) + } + + ## --- CV stats at chosen lambda --- + idrow <- max(which(cvfit$glmnet.fit$lambda >= lambda.value)) + mcvCindex <- cvfit$cvm[idrow] + sdcvCindex <- cvfit$cvsd[idrow] + + out <- list( + coefficients = coef.filtered, + selected.features = names(coef.filtered), + risk.scores = risk.scores, + cindex.apparent = Cindex_signature, + cv.cindex.mean = mcvCindex, + cv.cindex.sd = sdcvCindex, + fit = cvfit + ) + + } else { + ## --- standard CoxPH --- + df <- data.frame(time = time, status = status, mat, covar) + formula <- as.formula(paste("Surv(time, status) ~ .")) + fit <- survival::coxph(formula, data = df) + + ## raw coefficients + coefs <- coef(fit) + + ## apply coef_threshold + selected <- which(abs(coefs) > coef_threshold) + coef.filtered <- coefs[selected] + + ## normalize + if (length(coef.filtered) > 0) { + coef.filtered <- 2 * coef.filtered / sum(abs(coef.filtered)) + } + + risk.scores <- predict(fit, type = "lp") + + ## apparent C-index + if (length(coef.filtered) == 0) { + Cindex_signature <- 0.5 + } else { + Cindex_signature <- summary(fit)$concordance[1] + } + + out <- list( + coefficients = coef.filtered, + selected.features = names(coef.filtered), + risk.scores = as.numeric(risk.scores), + cindex.apparent = Cindex_signature, + cv.cindex.mean = NULL, + cv.cindex.sd = NULL, + fit = fit + ) + } + + return(out) +} diff --git a/man/getSurvival.Rd b/man/getSurvival.Rd index 4d77ab4..f70ebe5 100644 --- a/man/getSurvival.Rd +++ b/man/getSurvival.Rd @@ -13,12 +13,10 @@ getSurvival(x, ...) status.col, assay.type = "counts", covar.cols = NULL, - lambda = "lambda.1se", - nvar = NULL, + penalized = TRUE, alpha = 0.9, nfolds = 10, - showPlots = TRUE, - coef_threshold = 0, + lambda = "lambda.1se", ... ) } @@ -44,12 +42,9 @@ used in the dissimilarity estimation. (Default: \code{"counts"})} columns in \code{colData(x)} to adjust for in the survival model. (Default: \code{NULL})} -\item{lambda}{\code{Character or numeric}. Penalization parameter passed to -\code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"}, -\code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"})} - -\item{nvar}{\code{Integer scalar}. Optional. Maximum number of variables -(log-ratios) to include in the model. (Default: \code{NULL})} +\item{penalized}{\code{Logical}. If TRUE, fit penalized Cox regression +using \code{glmnet}. If FALSE, fit standard Cox model using +\code{survival::coxph}. (Default: \code{TRUE})} \item{alpha}{\code{Numeric scalar}. Elastic net mixing parameter: 1 = Lasso, 0 = Ridge. (Default: \code{0.9})} @@ -57,36 +52,34 @@ columns in \code{colData(x)} to adjust for in the survival model. \item{nfolds}{\code{Integer scalar}. Number of cross-validation folds for \code{cv.glmnet}. (Default: \code{10})} -\item{showPlots}{\code{Logical}. If \code{TRUE}, generates plots for -cross-validation, risk scores, and selected signature. (Default: \code{TRUE})} +\item{lambda}{\code{Character or numeric}. Penalization parameter passed to +\code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"}, +\code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"})} + +\item{nvar}{\code{Integer scalar}. Optional. Maximum number of variables +(log-ratios) to include in the model. (Default: \code{NULL})} \item{coef_threshold}{\code{Numeric scalar}. Minimum absolute value for a coefficient to be included in the final model. (Default: \code{0})} } \value{ -A named \code{list} containing: +A list with model summaries: \itemize{ -\item \code{taxa.num}: indices of selected taxa -\item \code{taxa.name}: names of selected taxa -\item \code{log-contrast coefficients}: coefficients of selected taxa in -the final log-contrast model -\item \code{risk.score}: predicted risk scores -\item \code{apparent Cindex}: concordance index on training data -\item \code{mean cv-Cindex}: mean cross-validated concordance index -\item \code{sd cv-Cindex}: standard deviation of cross-validated -concordance index -\item \code{risk score plot}: ggplot object showing risk score vs survival -\item \code{signature plot}: ggplot object of selected taxa and coefficients +\item \code{coefficients}: estimated model coefficients +\item \code{selected.features}: nonzero features in penalized model +\item \code{risk.scores}: predicted risk scores +\item \code{cindex.apparent}: apparent concordance index +\item \code{cv.cindex.mean}: mean cross-validated C-index (if penalized) +\item \code{cv.cindex.sd}: SD of cross-validated C-index (if penalized) +\item \code{fit}: fitted model object (coxph or cv.glmnet) } } \description{ -Survival modeling wrapper for TreeSummarizedExperiment using coda_coxnet -} -\details{ -This function extracts compositional count data and survival metadata from a -\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}} -object and applies penalized Cox regression via \code{\link[coda4microbiome]{coda_coxnet}}, -using pairwise log-ratio transformations. +Fit a (penalized) Cox proportional hazards model on microbiome data contained +in a SummarizedExperiment object. +Data transformations (e.g. pairwise log-ratios) should be handled upstream +(e.g. with mia::transformAssay). This function focuses on the statistical +model. } \examples{ # data(SurvivalData) From 38fd536cf4e1d5cd08f86280827bd7155e410972 Mon Sep 17 00:00:00 2001 From: Daena Rys Date: Fri, 22 Aug 2025 14:56:29 +0300 Subject: [PATCH 3/8] up Signed-off-by: Daena Rys --- DESCRIPTION | 3 ++- man/getSurvival.Rd | 12 ++++-------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3095cec..43eed37 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -25,13 +25,14 @@ Depends: R (>= 4.5.0), mia Imports: - coda4microbiome, dplyr, + glmnet, methods, S4Vectors, SingleCellExperiment, stats, SummarizedExperiment, + survival, tidyr, TreeSummarizedExperiment Suggests: diff --git a/man/getSurvival.Rd b/man/getSurvival.Rd index f70ebe5..34ecb5b 100644 --- a/man/getSurvival.Rd +++ b/man/getSurvival.Rd @@ -13,10 +13,6 @@ getSurvival(x, ...) status.col, assay.type = "counts", covar.cols = NULL, - penalized = TRUE, - alpha = 0.9, - nfolds = 10, - lambda = "lambda.1se", ... ) } @@ -46,16 +42,16 @@ columns in \code{colData(x)} to adjust for in the survival model. using \code{glmnet}. If FALSE, fit standard Cox model using \code{survival::coxph}. (Default: \code{TRUE})} +\item{lambda}{\code{Character or numeric}. Penalization parameter passed to +\code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"}, +\code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"})} + \item{alpha}{\code{Numeric scalar}. Elastic net mixing parameter: 1 = Lasso, 0 = Ridge. (Default: \code{0.9})} \item{nfolds}{\code{Integer scalar}. Number of cross-validation folds for \code{cv.glmnet}. (Default: \code{10})} -\item{lambda}{\code{Character or numeric}. Penalization parameter passed to -\code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"}, -\code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"})} - \item{nvar}{\code{Integer scalar}. Optional. Maximum number of variables (log-ratios) to include in the model. (Default: \code{NULL})} From 059f1091c65c768e5b50b43dd151a13258587a34 Mon Sep 17 00:00:00 2001 From: Daena Rys Date: Fri, 22 Aug 2025 15:33:04 +0300 Subject: [PATCH 4/8] up Signed-off-by: Daena Rys --- R/getSurvival.R | 31 +++++++++++++++++-------------- man/getSurvival.Rd | 44 ++++++++++++++++++++++---------------------- 2 files changed, 39 insertions(+), 36 deletions(-) diff --git a/R/getSurvival.R b/R/getSurvival.R index a764a52..cfa30fc 100644 --- a/R/getSurvival.R +++ b/R/getSurvival.R @@ -24,25 +24,28 @@ #' columns in \code{colData(x)} to adjust for in the survival model. #' (Default: \code{NULL}) #' -#' @param penalized \code{Logical}. If TRUE, fit penalized Cox regression -#' using \code{glmnet}. If FALSE, fit standard Cox model using -#' \code{survival::coxph}. (Default: \code{TRUE}) +#' @param ... additional arguments. +#' \itemize{ +#' \item \code{penalized}: \code{Logical}. If TRUE, fit penalized Cox +#' regression using \code{glmnet}. If FALSE, fit standard Cox model using +#' \code{survival::coxph}. (Default: \code{TRUE}) #' -#' @param lambda \code{Character or numeric}. Penalization parameter passed to -#' \code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"}, -#' \code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"}) +#' \item \code{lambda}: \code{Character or numeric}. Penalization parameter +#' passed to \code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"}, +#' \code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"}) #' -#' @param alpha \code{Numeric scalar}. Elastic net mixing parameter: 1 = Lasso, -#' 0 = Ridge. (Default: \code{0.9}) +#' \item \code{alpha}: \code{Numeric scalar}. Elastic net mixing parameter: +#' 1 = Lasso, 0 = Ridge. (Default: \code{0.9}) #' -#' @param nfolds \code{Integer scalar}. Number of cross-validation folds for -#' \code{cv.glmnet}. (Default: \code{10}) +#' \item \code{nfolds}: \code{Integer scalar}. Number of cross-validation +#' folds for \code{cv.glmnet}. (Default: \code{10}) #' -#' @param nvar \code{Integer scalar}. Optional. Maximum number of variables -#' (log-ratios) to include in the model. (Default: \code{NULL}) +#' \item \code{nvar}: \code{Integer scalar}. Optional. Maximum number of +#' variables (log-ratios) to include in the model. (Default: \code{NULL}) #' -#' @param coef_threshold \code{Numeric scalar}. Minimum absolute value for a -#' coefficient to be included in the final model. (Default: \code{0}) +#' \item \code{coef_threshold}: \code{Numeric scalar}. Minimum absolute value +#' for a coefficient to be included in the final model. (Default: \code{0}) +#' } #' #' @inheritParams addBaselineDivergence #' diff --git a/man/getSurvival.Rd b/man/getSurvival.Rd index 34ecb5b..daffa53 100644 --- a/man/getSurvival.Rd +++ b/man/getSurvival.Rd @@ -21,8 +21,28 @@ getSurvival(x, ...) \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} object.} -\item{...}{Optional arguments passed into -\code{\link[mia:addDivergence]{mia::addDivergence()}}.} +\item{...}{additional arguments. +\itemize{ +\item \code{penalized}: \code{Logical}. If TRUE, fit penalized Cox +regression using \code{glmnet}. If FALSE, fit standard Cox model using +\code{survival::coxph}. (Default: \code{TRUE}) + +\item \code{lambda}: \code{Character or numeric}. Penalization parameter +passed to \code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"}, +\code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"}) + +\item \code{alpha}: \code{Numeric scalar}. Elastic net mixing parameter: +1 = Lasso, 0 = Ridge. (Default: \code{0.9}) + +\item \code{nfolds}: \code{Integer scalar}. Number of cross-validation +folds for \code{cv.glmnet}. (Default: \code{10}) + +\item \code{nvar}: \code{Integer scalar}. Optional. Maximum number of +variables (log-ratios) to include in the model. (Default: \code{NULL}) + +\item \code{coef_threshold}: \code{Numeric scalar}. Minimum absolute value +for a coefficient to be included in the final model. (Default: \code{0}) +}} \item{time.col}{\code{Character scalar}. Column name in \code{colData(x)} representing time to event or follow-up time. Must be numeric.} @@ -37,26 +57,6 @@ used in the dissimilarity estimation. (Default: \code{"counts"})} \item{covar.cols}{\code{Character vector}. Optional. Specifies covariate columns in \code{colData(x)} to adjust for in the survival model. (Default: \code{NULL})} - -\item{penalized}{\code{Logical}. If TRUE, fit penalized Cox regression -using \code{glmnet}. If FALSE, fit standard Cox model using -\code{survival::coxph}. (Default: \code{TRUE})} - -\item{lambda}{\code{Character or numeric}. Penalization parameter passed to -\code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"}, -\code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"})} - -\item{alpha}{\code{Numeric scalar}. Elastic net mixing parameter: 1 = Lasso, -0 = Ridge. (Default: \code{0.9})} - -\item{nfolds}{\code{Integer scalar}. Number of cross-validation folds for -\code{cv.glmnet}. (Default: \code{10})} - -\item{nvar}{\code{Integer scalar}. Optional. Maximum number of variables -(log-ratios) to include in the model. (Default: \code{NULL})} - -\item{coef_threshold}{\code{Numeric scalar}. Minimum absolute value for a -coefficient to be included in the final model. (Default: \code{0})} } \value{ A list with model summaries: From 63510e55a41d5c25a858d9585d3d95b4719d80a7 Mon Sep 17 00:00:00 2001 From: Daena Rys Date: Mon, 1 Sep 2025 13:33:02 +0300 Subject: [PATCH 5/8] up Signed-off-by: Daena Rys --- NAMESPACE | 2 + R/AllGenerics.R | 5 + R/getSurvival.R | 293 +++++++++++++++++++++++---------------------- man/getSurvival.Rd | 20 +++- 4 files changed, 170 insertions(+), 150 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 3e71997..be97c72 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ export(addBimodality) export(addShortTermChange) export(addStability) export(addStepwiseDivergence) +export(addSurvival) export(getBaselineDivergence) export(getBimodality) export(getShortTermChange) @@ -16,6 +17,7 @@ exportMethods(addBimodality) exportMethods(addShortTermChange) exportMethods(addStability) exportMethods(addStepwiseDivergence) +exportMethods(addSurvival) exportMethods(getBaselineDivergence) exportMethods(getBimodality) exportMethods(getShortTermChange) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 7f985b0..ca65f48 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -55,3 +55,8 @@ setGeneric("addStability", signature = "x", function(x, ...) #' @export setGeneric("getSurvival", signature = "x", function(x, ...) standardGeneric("getSurvival")) + +#' @rdname getSurvival +#' @export +setGeneric("addSurvival", signature = "x", function(x, ...) + standardGeneric("addSurvival")) \ No newline at end of file diff --git a/R/getSurvival.R b/R/getSurvival.R index cfa30fc..05b4456 100644 --- a/R/getSurvival.R +++ b/R/getSurvival.R @@ -20,7 +20,7 @@ #' representing event occurrence. Accepts numeric (0/1) or logical (FALSE/TRUE) #' values. #' -#' @param covar.cols \code{Character vector}. Optional. Specifies covariate +#' @param col.var \code{Character vector}. Optional. Specifies covariate #' columns in \code{colData(x)} to adjust for in the survival model. #' (Default: \code{NULL}) #' @@ -43,7 +43,7 @@ #' \item \code{nvar}: \code{Integer scalar}. Optional. Maximum number of #' variables (log-ratios) to include in the model. (Default: \code{NULL}) #' -#' \item \code{coef_threshold}: \code{Numeric scalar}. Minimum absolute value +#' \item \code{coef.threshold}: \code{Numeric scalar}. Minimum absolute value #' for a coefficient to be included in the final model. (Default: \code{0}) #' } #' @@ -53,7 +53,7 @@ #' \itemize{ #' \item \code{coefficients}: estimated model coefficients #' \item \code{selected.features}: nonzero features in penalized model -#' \item \code{risk.scores}: predicted risk scores +#' \item \code{risk_scores}: predicted risk scores #' \item \code{cindex.apparent}: apparent concordance index #' \item \code{cv.cindex.mean}: mean cross-validated C-index (if penalized) #' \item \code{cv.cindex.sd}: SD of cross-validated C-index (if penalized) @@ -73,30 +73,38 @@ #' # data(SurvivalData) #' # tse <- SurvivalData #' # getSurvival(tse, time.col = "T1Dweek", status.col = "T1D", -#' # covar.cols = c("Sex", "Antibiotics")) +#' # col.var = c("Sex", "Antibiotics")) #' NULL #' @rdname getSurvival #' @export -#' @importFrom survival Surv coxph -#' @importFrom glmnet cv.glmnet glmnet +setMethod("addSurvival", signature = c(x = "SummarizedExperiment"), + function(x, time.col, status.col, name = "survival", ...){ + .check_input(name, "character scalar") + x <- .check_and_get_altExp(x, ...) + args <- c( + list(x = x, time.col = time.col, status.col = status.col, + name = name), + list(...)) + res <- do.call(getSurvival, args) |> as.list() + x <- .add_values_to_metadata(x, name, res, ...) + return(x) + } +) + +#' @rdname getSurvival +#' @export setMethod("getSurvival", signature(x = "SummarizedExperiment"), - function( - x, - time.col, - status.col, - assay.type = "counts", - covar.cols = NULL, - ... - ) { + function(x, time.col, status.col, assay.type = "counts", col.var = NULL, + ...){ # input checks x <- .check_data_for_survival(x, time.col, - status.col, covar.cols, assay.type, ...) + status.col, col.var, assay.type, ...) # extract data dat <- .get_data_for_survival(x, time.col, - status.col, covar.cols, assay.type) + status.col, col.var, assay.type) # fit survival model res <- .calc_survival( @@ -108,13 +116,13 @@ setMethod("getSurvival", signature(x = "SummarizedExperiment"), ) return(res) - } + } ) ############################# internal helpers ################################# .check_data_for_survival <- function(x, time.col, - status.col, covar.cols, assay.type, ...) + status.col, col.var, assay.type, ...) { x <- .check_and_get_altExp(x, ...) .check_input(time.col, list("character scalar"), colnames(colData(x))) @@ -127,25 +135,23 @@ setMethod("getSurvival", signature(x = "SummarizedExperiment"), if (!is.logical(x[[status.col]]) && !is.numeric(x[[status.col]])) { stop("'status.col' must be numeric (0/1) or logical", call. = FALSE) } - if (!is.null(covar.cols)) { - .check_input(covar.cols, list("character vector"), colnames(colData(x))) + if (!is.null(col.var)) { + .check_input(col.var, list("character vector"), colnames(colData(x))) } return(x) } .get_data_for_survival <- function(x, time.col, - status.col, covar.cols, assay.type) + status.col, col.var, assay.type) { mat <- t(assay(x, assay.type)) time <- as.numeric(x[[time.col]]) status <- as.numeric(x[[status.col]]) covar <- NULL - if (!is.null(covar.cols)) { - covar <- as.data.frame(colData(x)[, covar.cols, drop = FALSE]) - } - + if (!is.null(col.var)) + covar <- as.data.frame(colData(x)[, col.var, drop = FALSE]) list( mat = mat, time = time, @@ -154,135 +160,132 @@ setMethod("getSurvival", signature(x = "SummarizedExperiment"), ) } +# dispatcher to calculate survival .calc_survival <- function(mat, time, status, covar = NULL, penalized = TRUE, alpha = 0.9, nfolds = 10, lambda = "lambda.1se", nvar = NULL, - coef_threshold = 0, ...) + coef.threshold = 0, ...) { - y <- survival::Surv(time, status) - if (penalized) { ## --- penalized Cox with glmnet --- - if (is.null(covar)) { - cvfit <- glmnet::cv.glmnet( - x = mat, - y = y, - family = "cox", - type.measure = "C", - alpha = alpha, - nfolds = nfolds, - keep = TRUE - ) - offset <- NULL - } else { - ## fit Cox model for covariates -> use offset - df0 <- data.frame(time = time, status = status, covar) - model0 <- survival::coxph(Surv(time, status) ~ ., data = df0) - offset <- predict(model0) - cvfit <- glmnet::cv.glmnet( - x = mat, - y = y, - family = "cox", - type.measure = "C", - alpha = alpha, - nfolds = nfolds, - keep = TRUE, - offset = offset - ) - } - - ## --- nvar cutoff handling --- - if (!is.null(nvar)) { - rowlasso <- max(which(cvfit$glmnet.fit$df <= nvar)) - lambda <- cvfit$glmnet.fit$lambda[rowlasso] - } - - ## --- lambda selection --- - if (is.character(lambda)) { - if (lambda == "lambda.1se") lambda.value <- cvfit$lambda.1se - if (lambda == "lambda.min") lambda.value <- cvfit$lambda.min - } else { - lambda.value <- lambda - } - - ## --- coefficients --- - coefs <- as.vector(coef(cvfit, s = lambda.value)) - names(coefs) <- rownames(coef(cvfit, s = lambda.value)) - selected <- which(abs(coefs) > coef_threshold) - coef.filtered <- coefs[selected] - - ## normalize coefficients - if (length(coef.filtered) > 0) { - coef.filtered <- 2 * coef.filtered / sum(abs(coef.filtered)) - } - - ## --- predictions --- - if (is.null(offset)) { - risk.scores <- as.numeric(predict(cvfit, mat, s = lambda.value)) - } else { - risk.scores <- as.numeric(predict(cvfit, mat, s = lambda.value, - newoffset = offset)) - } - - ## --- apparent C-index --- - if (length(coef.filtered) == 0) { - Cindex_signature <- 0.5 - } else { - Cindex_signature <- glmnet::Cindex(pred = risk.scores, y) - } - - ## --- CV stats at chosen lambda --- - idrow <- max(which(cvfit$glmnet.fit$lambda >= lambda.value)) - mcvCindex <- cvfit$cvm[idrow] - sdcvCindex <- cvfit$cvsd[idrow] - - out <- list( - coefficients = coef.filtered, - selected.features = names(coef.filtered), - risk.scores = risk.scores, - cindex.apparent = Cindex_signature, - cv.cindex.mean = mcvCindex, - cv.cindex.sd = sdcvCindex, - fit = cvfit + .fit_penalized_cox( + mat, time, status, covar, + alpha, nfolds, nvar, lambda, coef.threshold ) - } else { ## --- standard CoxPH --- - df <- data.frame(time = time, status = status, mat, covar) - formula <- as.formula(paste("Surv(time, status) ~ .")) - fit <- survival::coxph(formula, data = df) - - ## raw coefficients - coefs <- coef(fit) - - ## apply coef_threshold - selected <- which(abs(coefs) > coef_threshold) - coef.filtered <- coefs[selected] - - ## normalize - if (length(coef.filtered) > 0) { - coef.filtered <- 2 * coef.filtered / sum(abs(coef.filtered)) - } - - risk.scores <- predict(fit, type = "lp") - - ## apparent C-index - if (length(coef.filtered) == 0) { - Cindex_signature <- 0.5 - } else { - Cindex_signature <- summary(fit)$concordance[1] - } - - out <- list( - coefficients = coef.filtered, - selected.features = names(coef.filtered), - risk.scores = as.numeric(risk.scores), - cindex.apparent = Cindex_signature, - cv.cindex.mean = NULL, - cv.cindex.sd = NULL, - fit = fit - ) + .fit_standard_cox(mat, time, status, covar, coef.threshold) + } +} + +#' @importFrom survival Surv coxph +#' @importFrom glmnet cv.glmnet glmnet +# Penalized cox function +.fit_penalized_cox <- function(mat, time, status, covar, + alpha, nfolds, nvar, lambda, coef.threshold) +{ + y <- Surv(time, status) + + ## fit Cox model for covariates -> use offset + offset <- NULL + if (!is.null(covar)) { + df0 <- data.frame(time = time, status = status, covar) + model0 <- coxph(y ~ ., data = df0) + offset <- predict(model0, type = "lp") } - return(out) + cvfit <- cv.glmnet( + x = mat, y = y, family = "cox", type.measure = "C", + alpha = alpha, nfolds = nfolds, keep = TRUE, + offset = offset + ) + ## --- lambda selection --- + lambda.value <- .lambda_selector(cvfit, lambda, nvar) + + ## --- coefficients --- + coefs <- as.vector(coef(cvfit, s = lambda.value)) + names(coefs) <- rownames(coef(cvfit, s = lambda.value)) + coef.res <- .filter_and_normalize_coefs(coefs, coef.threshold) + + ## --- predictions --- + risk_scores <- as.numeric( + predict(cvfit, mat, s = lambda.value, newoffset = offset) + ) + ## --- apparent C-index --- + Cindex_signature <- .compute_cindex(risk_scores, y, penalized = TRUE) + ## --- CV stats at chosen lambda --- + idrow <- max(which(cvfit$glmnet.fit$lambda >= lambda.value)) + + list( + coefficients = coef.res$coefficients, + selected.features = coef.res$selected.features, + risk_scores = risk_scores, + cindex.apparent = Cindex_signature, + cv.cindex.mean = cvfit$cvm[idrow], + cv.cindex.sd = cvfit$cvsd[idrow], + fit = cvfit + ) +} + +# Standard cox function +.fit_standard_cox <- function(mat, time, status, covar, coef.threshold) { + ## --- standard CoxPH --- + y <- Surv(time, status) + df <- data.frame(time = time, status = status, mat, covar) + fit <- coxph(y ~ ., data = df) + + ## raw coefficients + coefs <- coef(fit) + ## normalize + coef.res <- .filter_and_normalize_coefs(coefs, coef.threshold) + ## predictions + risk_scores <- predict(fit, type = "lp") + ## apparent C-index + Cindex_signature <- .compute_cindex(risk_scores, y, fit, penalized = FALSE) + + list( + coefficients = coef.res$coefficients, + selected.features = coef.res$selected.features, + risk_scores = as.numeric(risk_scores), + cindex.apparent = Cindex_signature, + cv.cindex.mean = NULL, + cv.cindex.sd = NULL, + fit = fit + ) +} + +.filter_and_normalize_coefs <- function(coefs, threshold) { + selected <- which(abs(coefs) > threshold) + coef.filtered <- coefs[selected] + if (length(coef.filtered) > 0) { + coef.filtered <- 2 * coef.filtered / sum(abs(coef.filtered)) + } + list( + coefficients = coef.filtered, + selected.features = names(coef.filtered) + ) } + +.compute_cindex <- function(risk_scores, y, fit = NULL, penalized = TRUE) { + if (length(risk_scores) == 0 || all(risk_scores == 0)) return(0.5) + if (penalized) { + glmnet::Cindex(pred = risk_scores, y) + } else { + summary(fit)$concordance[1] + } +} + +.lambda_selector <- function(cvfit, lambda, nvar = NULL) { + ## --- nvar cutoff handling --- + if (!is.null(nvar)) { + valid <- which(cvfit$glmnet.fit$df <= nvar) + if (length(valid) > 0) lambda <- cvfit$glmnet.fit$lambda[max(valid)] + } + ## --- lambda selection --- + if (is.character(lambda)) { + switch(lambda, + "lambda.1se" = cvfit$lambda.1se, + "lambda.min" = cvfit$lambda.min + ) + } else lambda +} \ No newline at end of file diff --git a/man/getSurvival.Rd b/man/getSurvival.Rd index daffa53..2ec0ca6 100644 --- a/man/getSurvival.Rd +++ b/man/getSurvival.Rd @@ -2,17 +2,23 @@ % Please edit documentation in R/AllGenerics.R, R/getSurvival.R \name{getSurvival} \alias{getSurvival} +\alias{addSurvival} +\alias{addSurvival,SummarizedExperiment-method} \alias{getSurvival,SummarizedExperiment-method} \title{Survival analysis} \usage{ getSurvival(x, ...) +addSurvival(x, ...) + +\S4method{addSurvival}{SummarizedExperiment}(x, time.col, status.col, name = "survival", ...) + \S4method{getSurvival}{SummarizedExperiment}( x, time.col, status.col, assay.type = "counts", - covar.cols = NULL, + col.var = NULL, ... ) } @@ -40,7 +46,7 @@ folds for \code{cv.glmnet}. (Default: \code{10}) \item \code{nvar}: \code{Integer scalar}. Optional. Maximum number of variables (log-ratios) to include in the model. (Default: \code{NULL}) -\item \code{coef_threshold}: \code{Numeric scalar}. Minimum absolute value +\item \code{coef.threshold}: \code{Numeric scalar}. Minimum absolute value for a coefficient to be included in the final model. (Default: \code{0}) }} @@ -51,10 +57,14 @@ representing time to event or follow-up time. Must be numeric.} representing event occurrence. Accepts numeric (0/1) or logical (FALSE/TRUE) values.} +\item{name}{\code{Character vector}. Specifies a column name for storing +divergence results. +(Default: \code{c("divergence", "time_diff", "ref_samples")})} + \item{assay.type}{\code{Character scalar}. Specifies which assay values are used in the dissimilarity estimation. (Default: \code{"counts"})} -\item{covar.cols}{\code{Character vector}. Optional. Specifies covariate +\item{col.var}{\code{Character vector}. Optional. Specifies covariate columns in \code{colData(x)} to adjust for in the survival model. (Default: \code{NULL})} } @@ -63,7 +73,7 @@ A list with model summaries: \itemize{ \item \code{coefficients}: estimated model coefficients \item \code{selected.features}: nonzero features in penalized model -\item \code{risk.scores}: predicted risk scores +\item \code{risk_scores}: predicted risk scores \item \code{cindex.apparent}: apparent concordance index \item \code{cv.cindex.mean}: mean cross-validated C-index (if penalized) \item \code{cv.cindex.sd}: SD of cross-validated C-index (if penalized) @@ -81,7 +91,7 @@ model. # data(SurvivalData) # tse <- SurvivalData # getSurvival(tse, time.col = "T1Dweek", status.col = "T1D", -# covar.cols = c("Sex", "Antibiotics")) +# col.var = c("Sex", "Antibiotics")) } \references{ From 1492bb15acffa8ae02439520ffc2e36682c98099 Mon Sep 17 00:00:00 2001 From: Tuomas Borman Date: Wed, 10 Sep 2025 16:29:03 +0300 Subject: [PATCH 6/8] up --- NAMESPACE | 1 + R/getSurvival.R | 422 ++++++++++++++++++++++++++------------------- man/getSurvival.Rd | 31 ++-- 3 files changed, 261 insertions(+), 193 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index be97c72..7775cb9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -48,6 +48,7 @@ importFrom(dplyr,summarize) importFrom(dplyr,sym) importFrom(dplyr,ungroup) importFrom(dplyr,vars) +importFrom(glmnet,Cindex) importFrom(glmnet,cv.glmnet) importFrom(glmnet,glmnet) importFrom(stats,coef) diff --git a/R/getSurvival.R b/R/getSurvival.R index 05b4456..caedcb8 100644 --- a/R/getSurvival.R +++ b/R/getSurvival.R @@ -1,79 +1,83 @@ #' @name #' getSurvival -#' +#' #' @export -#' +#' #' @title #' Survival analysis -#' +#' #' @description -#' Fit a (penalized) Cox proportional hazards model on microbiome data contained -#' in a SummarizedExperiment object. -#' Data transformations (e.g. pairwise log-ratios) should be handled upstream -#' (e.g. with mia::transformAssay). This function focuses on the statistical -#' model. +#' Fit a (penalized) Cox proportional hazards model on microbiome data contained +#' in a SummarizedExperiment object. Data transformations (e.g. pairwise +#' log-ratios) should be handled upstream (e.g. with mia::transformAssay). #' -#' @param time.col \code{Character scalar}. Column name in \code{colData(x)} +#' @param time.col \code{Character scalar}. Column name in \code{colData(x)} #' representing time to event or follow-up time. Must be numeric. #' -#' @param status.col \code{Character scalar}. Column name in \code{colData(x)} -#' representing event occurrence. Accepts numeric (0/1) or logical (FALSE/TRUE) -#' values. +#' @param status.col \code{Character scalar}. Column name in \code{colData(x)} +#' representing event occurrence. Accepts numeric (\code{0}/\code{1}) or +#' logical (\code{TRUE}/\code{FALSE}) values. #' -#' @param col.var \code{Character vector}. Optional. Specifies covariate -#' columns in \code{colData(x)} to adjust for in the survival model. +#' @param col.var \code{Character vector}. Optional. Specifies covariate +#' columns in \code{colData(x)} to adjust for in the survival model. #' (Default: \code{NULL}) -#' +#' #' @param ... additional arguments. #' \itemize{ -#' \item \code{penalized}: \code{Logical}. If TRUE, fit penalized Cox -#' regression using \code{glmnet}. If FALSE, fit standard Cox model using -#' \code{survival::coxph}. (Default: \code{TRUE}) +#' \item \code{penalized}: \code{Logical}. If \code{TRUE}, fit penalized Cox +#' regression using \code{glmnet}. If \code{FALSE}, fit standard Cox model +#' using \code{survival::coxph}. (Default: \code{TRUE}) #' -#' \item \code{lambda}: \code{Character or numeric}. Penalization parameter -#' passed to \code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"}, +#' \item \code{lambda}: \code{Character or numeric}. Penalization parameter +#' passed to \code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"}, #' \code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"}) #' -#' \item \code{alpha}: \code{Numeric scalar}. Elastic net mixing parameter: -#' 1 = Lasso, 0 = Ridge. (Default: \code{0.9}) +#' \item \code{alpha}: \code{Numeric scalar}. Elastic net mixing parameter +#' that controls the balance between Lasso and Ridge regression: +#' \code{alpha = 1} corresponds to Lasso, +#' \code{alpha = 0} corresponds to Ridge. +#' Values between 0 and 1 specify a combination of the two. +#' (Default: \code{0.9}) #' -#' \item \code{nfolds}: \code{Integer scalar}. Number of cross-validation +#' \item \code{nfolds}: \code{Integer scalar}. Number of cross-validation #' folds for \code{cv.glmnet}. (Default: \code{10}) -#' -#' \item \code{nvar}: \code{Integer scalar}. Optional. Maximum number of +#' +#' \item \code{nvar}: \code{Integer scalar}. Optional. Maximum number of #' variables (log-ratios) to include in the model. (Default: \code{NULL}) -#' -#' \item \code{coef.threshold}: \code{Numeric scalar}. Minimum absolute value +#' +#' \item \code{coef.threshold}: \code{Numeric scalar}. Minimum absolute value #' for a coefficient to be included in the final model. (Default: \code{0}) -#' } -#' +#' } +#' #' @inheritParams addBaselineDivergence #' #' @return A list with model summaries: #' \itemize{ #' \item \code{coefficients}: estimated model coefficients -#' \item \code{selected.features}: nonzero features in penalized model #' \item \code{risk_scores}: predicted risk scores -#' \item \code{cindex.apparent}: apparent concordance index -#' \item \code{cv.cindex.mean}: mean cross-validated C-index (if penalized) -#' \item \code{cv.cindex.sd}: SD of cross-validated C-index (if penalized) +#' \item \code{c_index}: apparent concordance index +#' \item \code{c_index_cv_mean}: mean cross-validated C-index (if penalized) +#' \item \code{c_index_cv_sd}: SD of cross-validated C-index (if penalized) #' \item \code{fit}: fitted model object (coxph or cv.glmnet) #' } #' -#' @seealso \code{\link[coda4microbiome]{coda_coxnet}}, +#' @seealso \code{\link[coda4microbiome]{coda_coxnet}}, #' \code{\link[glmnet]{cv.glmnet}} -#' +#' #' @references -#' Meritxell Pujolassos , Antoni Susín , M.Luz Calle (2024). +#' Meritxell Pujolassos , Antoni Susín , M.Luz Calle (2024). #' \emph{Microbiome compositional data analysis for survival studies } -#' NAR Genomics and Bioinformatics, 6(2), lqae038. +#' NAR Genomics and Bioinformatics, 6(2), lqae038. #' \doi{10.1093/nargab/lqae038} #' #' @examples -#' # data(SurvivalData) -#' # tse <- SurvivalData -#' # getSurvival(tse, time.col = "T1Dweek", status.col = "T1D", -#' # col.var = c("Sex", "Antibiotics")) +#' data(crohn_survival) +#' tse <- crohn_survival +#' tse <- transformAssay(tse, method = "relabundance") +#' fit <- getSurvival( +#' tse, assay.type = "relabundance", +#' time.col = "event_time", status.col = "event" +#' ) #' NULL @@ -83,11 +87,13 @@ setMethod("addSurvival", signature = c(x = "SummarizedExperiment"), function(x, time.col, status.col, name = "survival", ...){ .check_input(name, "character scalar") x <- .check_and_get_altExp(x, ...) + # Run analysis args <- c( list(x = x, time.col = time.col, status.col = status.col, - name = name), - list(...)) - res <- do.call(getSurvival, args) |> as.list() + name = name), + list(...)[!names(list(...)) %in% c("altexp")]) + res <- do.call(getSurvival, args) + # Add results to metadata x <- .add_values_to_metadata(x, name, res, ...) return(x) } @@ -98,194 +104,248 @@ setMethod("addSurvival", signature = c(x = "SummarizedExperiment"), setMethod("getSurvival", signature(x = "SummarizedExperiment"), function(x, time.col, status.col, assay.type = "counts", col.var = NULL, ...){ - # input checks - x <- .check_data_for_survival(x, time.col, - status.col, col.var, assay.type, ...) - - # extract data - dat <- .get_data_for_survival(x, time.col, - status.col, col.var, assay.type) - - # fit survival model - res <- .calc_survival( - mat = dat$mat, - time = dat$time, - status = dat$status, - covar = dat$covar, - ... - ) - + # Input checks + x <- .check_data_for_survival( + x, time.col, status.col, col.var, assay.type, ...) + # Extract data + args <- .get_data_for_survival( + x, time.col, status.col, col.var, assay.type) + args <- c(args, list(...)) + # Fit survival model + res <- do.call(.calc_survival, args) return(res) } ) -############################# internal helpers ################################# +############################# Internal helpers ################################# -.check_data_for_survival <- function(x, time.col, - status.col, col.var, assay.type, ...) -{ +# Check input validity for survival analysis +# +# Ensures that the required columns for survival analysis are present in +# colData, are of the correct type, and that the requested assay exists. +.check_data_for_survival <- function( + x, time.col, status.col, col.var, assay.type, ...){ + # Ensure we are working with the correct alternative experiment x <- .check_and_get_altExp(x, ...) + # Check that 'time.col' exists in colData and is a character scalar .check_input(time.col, list("character scalar"), colnames(colData(x))) + # Check that 'status.col' exists in colData and is a character scalar .check_input(status.col, list("character scalar"), colnames(colData(x))) + # Check that the requested assay is present in the object .check_assay_present(assay.type, x) - - if (!is.numeric(x[[time.col]])) { - stop("'time.col' must be numeric", call. = FALSE) + # Verify that the time column contains numeric values + if( !is.numeric(x[[time.col]]) ){ + stop("'time.col' must be numeric.", call. = FALSE) } - if (!is.logical(x[[status.col]]) && !is.numeric(x[[status.col]])) { - stop("'status.col' must be numeric (0/1) or logical", call. = FALSE) + # Verify that the status column is either logical or numeric (0/1) + if( !is.logical(x[[status.col]]) && !is.numeric(x[[status.col]]) ){ + stop("'status.col' must be numeric (0/1) or logical.", call. = FALSE) } - if (!is.null(col.var)) { + # If a grouping variable is provided, check it exists in colData + if( !is.null(col.var) ){ .check_input(col.var, list("character vector"), colnames(colData(x))) } - return(x) } -.get_data_for_survival <- function(x, time.col, - status.col, col.var, assay.type) -{ - mat <- t(assay(x, assay.type)) - time <- as.numeric(x[[time.col]]) - status <- as.numeric(x[[status.col]]) - - covar <- NULL - if (!is.null(col.var)) - covar <- as.data.frame(colData(x)[, col.var, drop = FALSE]) - list( +# Extract and prepare data for survival analysis +# +# Retrieves the assay matrix, survival time, survival status, +# and optional covariates from the input object, +# formatted for downstream survival models. +.get_data_for_survival <- function( + x, time.col, status.col, col.var, assay.type){ + # Extract assay data (samples as rows, features as columns) + mat <- assay(x, assay.type) |> t() + # Extract survival time column and ensure numeric + time <- x[[time.col]] |> as.numeric() + # Extract survival status column and ensure numeric (0/1) + status <- x[[status.col]] |> as.numeric() + # Extract optional covariates from colData + if( !is.null(col.var) ){ + col.var <- colData(x)[, col.var, drop = FALSE] |> as.data.frame() + } + # Return prepared components in a list + res <- list( mat = mat, time = time, status = status, - covar = covar + col.var = col.var ) + return(res) } -# dispatcher to calculate survival -.calc_survival <- function(mat, time, status, covar = NULL, - penalized = TRUE, alpha = 0.9, nfolds = 10, - lambda = "lambda.1se", nvar = NULL, - coef.threshold = 0, ...) -{ - if (penalized) { - ## --- penalized Cox with glmnet --- - .fit_penalized_cox( - mat, time, status, covar, - alpha, nfolds, nvar, lambda, coef.threshold - ) - } else { - ## --- standard CoxPH --- - .fit_standard_cox(mat, time, status, covar, coef.threshold) + +# Dispatcher to fit a survival model +# +# Chooses between a penalized Cox model (via glmnet) and a standard Cox +# proportional hazards model, based on the `penalized` argument. +.calc_survival <- function(mat, time, status, col.var, penalized = TRUE, ...){ + if( !.is_a_bool(penalized) ){ + stop("'penalized' must be TRUE or FALSE.", call. = FALSE) } + FUN <- if( penalized ) .fit_penalized_cox else .fit_standard_cox + res <- FUN(mat = mat, time = time, status = status, col.var = col.var, ...) + return(res) } +# Fit a penalized Cox proportional hazards model using glmnet +# +# Performs cross-validated elastic net penalized Cox regression. Optional +# covariates +# can be included via an offset. #' @importFrom survival Surv coxph #' @importFrom glmnet cv.glmnet glmnet -# Penalized cox function -.fit_penalized_cox <- function(mat, time, status, covar, - alpha, nfolds, nvar, lambda, coef.threshold) -{ +.fit_penalized_cox <- function( + mat, time, status, col.var, alpha = 0.9, nfolds = 10, + lambda = "lambda.1se", ...){ + # Create survival response object as glmnet requires a Surv object for Cox + # regression y <- Surv(time, status) - - ## fit Cox model for covariates -> use offset + + # Fit Cox model for covariates (if provided) and use as offset. This is done + # to estimate how microbiome profile improves the prediction compared to + # conventional clinical variables. offset <- NULL - if (!is.null(covar)) { - df0 <- data.frame(time = time, status = status, covar) - model0 <- coxph(y ~ ., data = df0) - offset <- predict(model0, type = "lp") + if( !is.null(col.var) ){ + # Fit standard Cox on covariates only + df_covar <- data.frame(time = time, status = status, col.var) + model_covar <- coxph(y ~ ., data = df_covar) + # Compute the linear predictor (log-hazard ratio) from the + # covariate-only Cox model. + offset <- predict(model_covar, type = "lp") } - - cvfit <- cv.glmnet( + + # Fit penalized Cox model with cross-validation. Cross-validation identifies + # the optimal penalty (lambda) while controlling overfitting. This model + # uses microbial profile as predictor. If covariates where specified, the + # model is built on top of the covariate-only model, i.e., to assess how + # much microbes improve the prediction. + fit <- cv.glmnet( x = mat, y = y, family = "cox", type.measure = "C", alpha = alpha, nfolds = nfolds, keep = TRUE, offset = offset ) - ## --- lambda selection --- - lambda.value <- .lambda_selector(cvfit, lambda, nvar) - - ## --- coefficients --- - coefs <- as.vector(coef(cvfit, s = lambda.value)) - names(coefs) <- rownames(coef(cvfit, s = lambda.value)) - coef.res <- .filter_and_normalize_coefs(coefs, coef.threshold) - - ## --- predictions --- - risk_scores <- as.numeric( - predict(cvfit, mat, s = lambda.value, newoffset = offset) - ) - ## --- apparent C-index --- - Cindex_signature <- .compute_cindex(risk_scores, y, penalized = TRUE) - ## --- CV stats at chosen lambda --- - idrow <- max(which(cvfit$glmnet.fit$lambda >= lambda.value)) - - list( - coefficients = coef.res$coefficients, - selected.features = coef.res$selected.features, + + # Select lambda value based on user input and optional nvar constraint to + # allow selecting lambda that balances model sparsity and predictive + # performance + lambda_value <- .lambda_selector(fit, lambda, ...) + # Extract coefficients at chosen lambda + coefs <- coef(fit, s = lambda_value) + coefs <- setNames(as.vector(coefs), rownames(coefs)) + coefs <- .filter_and_normalize_coefs(coefs, ...) + + # Compute risk scores for all samples + risk_scores <- predict(fit, mat, s = lambda_value, newoffset = offset) |> + as.numeric() + # Compute concordance index + c_index <- .compute_c_index(risk_scores, y, penalized = TRUE) + + # Identify row in CV results corresponding to chosen lambda + id_row <- which(fit[["glmnet.fit"]][["lambda"]] >= lambda_value) |> max() + + # Return all results as a list + res <- list( + coefficients = coefs, risk_scores = risk_scores, - cindex.apparent = Cindex_signature, - cv.cindex.mean = cvfit$cvm[idrow], - cv.cindex.sd = cvfit$cvsd[idrow], - fit = cvfit + c_index = c_index, + c_index_cv_mean = fit[["cvm"]][[id_row]], + c_index_cv_sd = fit[["cvsd"]][[id_row]],, + fit = fit ) + return(res) } -# Standard cox function -.fit_standard_cox <- function(mat, time, status, covar, coef.threshold) { - ## --- standard CoxPH --- +# Fit a standard Cox proportional hazards model +# +# Performs a standard (unpenalized) Cox regression on features and optional +# covariates. This is used when no penalization is desired. +.fit_standard_cox <- function(mat, time, status, col.var, ...){ + # Create survival response object as it is required input format for coxph y <- Surv(time, status) - df <- data.frame(time = time, status = status, mat, covar) + # Combine survival times, status, features, and optional covariates into one + # data frame + df <- data.frame(time = time, status = status, mat, col.var) + # Fit standard Cox proportional hazards model fit <- coxph(y ~ ., data = df) - - ## raw coefficients + + # Extract raw coefficients coefs <- coef(fit) - ## normalize - coef.res <- .filter_and_normalize_coefs(coefs, coef.threshold) - ## predictions + # Filter and normalize coefficients + coefs <- .filter_and_normalize_coefs(coefs, ...) + # Compute risk scores for all samples risk_scores <- predict(fit, type = "lp") - ## apparent C-index - Cindex_signature <- .compute_cindex(risk_scores, y, fit, penalized = FALSE) - - list( - coefficients = coef.res$coefficients, - selected.features = coef.res$selected.features, + # Compute apparent concordance index + c_index <- .compute_c_index(risk_scores, y, fit, penalized = FALSE) + + # Return results as a list + res <- list( + coefficients = coefs, risk_scores = as.numeric(risk_scores), - cindex.apparent = Cindex_signature, - cv.cindex.mean = NULL, - cv.cindex.sd = NULL, + c_index = c_index, fit = fit ) + return(res) } -.filter_and_normalize_coefs <- function(coefs, threshold) { - selected <- which(abs(coefs) > threshold) - coef.filtered <- coefs[selected] - if (length(coef.filtered) > 0) { - coef.filtered <- 2 * coef.filtered / sum(abs(coef.filtered)) +# Filter and normalize coefficients +# +# Removes coefficients with magnitude below a threshold and rescales the +# remaining coefficients. +.filter_and_normalize_coefs <- function(coefs, coef.threshold = 0, ...) { + # Identify coefficients exceeding the threshold in absolute value + # to remove very small coefficients so that we can focus on meaningful + # predictors + res <- coefs[ abs(coefs) > coef.threshold ] + # Rescale the remaining coefficients + if( length(res) > 0L ){ + res <- 2*res / sum(abs(res)) } - list( - coefficients = coef.filtered, - selected.features = names(coef.filtered) - ) + return(res) } -.compute_cindex <- function(risk_scores, y, fit = NULL, penalized = TRUE) { - if (length(risk_scores) == 0 || all(risk_scores == 0)) return(0.5) - if (penalized) { - glmnet::Cindex(pred = risk_scores, y) - } else { - summary(fit)$concordance[1] +# Compute concordance index (C-index) for survival predictions +# +# Evaluates the discriminative ability of a survival model: +# how well the predicted risk scores rank patients according to observed +# survival. +#' @importFrom glmnet Cindex +.compute_c_index <- function(risk_scores, y, fit = NULL, penalized = TRUE){ + res <- NA + if( !(length(risk_scores) == 0 || all(risk_scores == 0)) ){ + if( penalized ){ + # Penalized Cox: use glmnet's Cindex function + res <- Cindex(pred = risk_scores, y) + } else { + # Standard Cox: extract C-index from the model summary + res <- summary(fit)[["concordance"]][[1L]] + } } + return(res) } -.lambda_selector <- function(cvfit, lambda, nvar = NULL) { - ## --- nvar cutoff handling --- - if (!is.null(nvar)) { - valid <- which(cvfit$glmnet.fit$df <= nvar) - if (length(valid) > 0) lambda <- cvfit$glmnet.fit$lambda[max(valid)] +# Select lambda value from a cross-validated glmnet fit +# +# Allows choosing lambda based on CV results or restricting the model to a +# maximum number of variables. +.lambda_selector <- function(fit, lambda, nvar = NULL, ...) { + # Handle nvar cutoff + if( !is.null(nvar) ){ + # Identify lambda values with number of non-zero coefficients <= nvar. + valid <- which(fit[["glmnet.fit"]][["df"]] <= nvar) + # If such lambdas exist, choose the lambda for model that has + # highest number of taxa. + if( length(valid) > 0L ){ + lambda <- fit[["glmnet.fit"]][max(valid), "lambda"] + } + } + # Handle lambda selection by name + if( is.character(lambda) ){ + # Convert character string to actual numeric lambda value from fit + # Why: "lambda.min" or "lambda.1se" are stored inside fit; this + # retrieves the correct numeric value + lambda <- fit[[lambda]] } - ## --- lambda selection --- - if (is.character(lambda)) { - switch(lambda, - "lambda.1se" = cvfit$lambda.1se, - "lambda.min" = cvfit$lambda.min - ) - } else lambda -} \ No newline at end of file + return(lambda) +} diff --git a/man/getSurvival.Rd b/man/getSurvival.Rd index 2ec0ca6..0ee7dac 100644 --- a/man/getSurvival.Rd +++ b/man/getSurvival.Rd @@ -29,16 +29,20 @@ object.} \item{...}{additional arguments. \itemize{ -\item \code{penalized}: \code{Logical}. If TRUE, fit penalized Cox -regression using \code{glmnet}. If FALSE, fit standard Cox model using -\code{survival::coxph}. (Default: \code{TRUE}) +\item \code{penalized}: \code{Logical}. If \code{TRUE}, fit penalized Cox +regression using \code{glmnet}. If \code{FALSE}, fit standard Cox model +using \code{survival::coxph}. (Default: \code{TRUE}) \item \code{lambda}: \code{Character or numeric}. Penalization parameter passed to \code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"}, \code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"}) -\item \code{alpha}: \code{Numeric scalar}. Elastic net mixing parameter: -1 = Lasso, 0 = Ridge. (Default: \code{0.9}) +\item \code{alpha}: \code{Numeric scalar}. Elastic net mixing parameter +that controls the balance between Lasso and Ridge regression: +\code{alpha = 1} corresponds to Lasso, +\code{alpha = 0} corresponds to Ridge. +Values between 0 and 1 specify a combination of the two. +(Default: \code{0.9}) \item \code{nfolds}: \code{Integer scalar}. Number of cross-validation folds for \code{cv.glmnet}. (Default: \code{10}) @@ -54,8 +58,8 @@ for a coefficient to be included in the final model. (Default: \code{0}) representing time to event or follow-up time. Must be numeric.} \item{status.col}{\code{Character scalar}. Column name in \code{colData(x)} -representing event occurrence. Accepts numeric (0/1) or logical (FALSE/TRUE) -values.} +representing event occurrence. Accepts numeric (\code{0}/\code{1}) or +logical (\code{TRUE}/\code{FALSE}) values.} \item{name}{\code{Character vector}. Specifies a column name for storing divergence results. @@ -72,7 +76,7 @@ columns in \code{colData(x)} to adjust for in the survival model. A list with model summaries: \itemize{ \item \code{coefficients}: estimated model coefficients -\item \code{selected.features}: nonzero features in penalized model +\item \code{selected_features}: nonzero features in penalized model \item \code{risk_scores}: predicted risk scores \item \code{cindex.apparent}: apparent concordance index \item \code{cv.cindex.mean}: mean cross-validated C-index (if penalized) @@ -88,10 +92,13 @@ Data transformations (e.g. pairwise log-ratios) should be handled upstream model. } \examples{ -# data(SurvivalData) -# tse <- SurvivalData -# getSurvival(tse, time.col = "T1Dweek", status.col = "T1D", -# col.var = c("Sex", "Antibiotics")) +data(crohn_survival) +tse <- crohn_survival +tse <- transformAssay(tse, method = "relabundance") +fit <- getSurvival( + tse, assay.type = "relabundance", + time.col = "event_time", status.col = "event" +) } \references{ From 9fc9a92bbb33bf082db4c6fc75de5fafc2a53c40 Mon Sep 17 00:00:00 2001 From: Tuomas Borman Date: Wed, 10 Sep 2025 16:30:04 +0300 Subject: [PATCH 7/8] up --- R/getSurvival.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/getSurvival.R b/R/getSurvival.R index caedcb8..8206b99 100644 --- a/R/getSurvival.R +++ b/R/getSurvival.R @@ -9,7 +9,8 @@ #' @description #' Fit a (penalized) Cox proportional hazards model on microbiome data contained #' in a SummarizedExperiment object. Data transformations (e.g. pairwise -#' log-ratios) should be handled upstream (e.g. with mia::transformAssay). +#' log-ratios) should be handled upstream +#' (e.g. with \code{mia::transformAssay()}). #' #' @param time.col \code{Character scalar}. Column name in \code{colData(x)} #' representing time to event or follow-up time. Must be numeric. From a33c6d120c2dccafc729ea1beb91be38efe91998 Mon Sep 17 00:00:00 2001 From: Daena Rys Date: Fri, 12 Sep 2025 14:21:20 +0300 Subject: [PATCH 8/8] add unit tests Signed-off-by: Daena Rys --- R/getSurvival.R | 43 ++-- man/getSurvival.Rd | 31 +-- tests/testthat/test-getSurvival.R | 345 ++++++++++++++++++++++++++++++ 3 files changed, 380 insertions(+), 39 deletions(-) create mode 100644 tests/testthat/test-getSurvival.R diff --git a/R/getSurvival.R b/R/getSurvival.R index 8206b99..422a1c7 100644 --- a/R/getSurvival.R +++ b/R/getSurvival.R @@ -15,7 +15,7 @@ #' @param time.col \code{Character scalar}. Column name in \code{colData(x)} #' representing time to event or follow-up time. Must be numeric. #' -#' @param status.col \code{Character scalar}. Column name in \code{colData(x)} +#' @param event.col \code{Character scalar}. Column name in \code{colData(x)} #' representing event occurrence. Accepts numeric (\code{0}/\code{1}) or #' logical (\code{TRUE}/\code{FALSE}) values. #' @@ -54,7 +54,7 @@ #' #' @return A list with model summaries: #' \itemize{ -#' \item \code{coefficients}: estimated model coefficients +#' \item \code{coef}: estimated model coefficients #' \item \code{risk_scores}: predicted risk scores #' \item \code{c_index}: apparent concordance index #' \item \code{c_index_cv_mean}: mean cross-validated C-index (if penalized) @@ -77,7 +77,7 @@ #' tse <- transformAssay(tse, method = "relabundance") #' fit <- getSurvival( #' tse, assay.type = "relabundance", -#' time.col = "event_time", status.col = "event" +#' time.col = "event_time", event.col = "event" #' ) #' NULL @@ -85,12 +85,12 @@ NULL #' @rdname getSurvival #' @export setMethod("addSurvival", signature = c(x = "SummarizedExperiment"), - function(x, time.col, status.col, name = "survival", ...){ + function(x, time.col, event.col, name = "survival", ...){ .check_input(name, "character scalar") x <- .check_and_get_altExp(x, ...) # Run analysis args <- c( - list(x = x, time.col = time.col, status.col = status.col, + list(x = x, time.col = time.col, event.col = event.col, name = name), list(...)[!names(list(...)) %in% c("altexp")]) res <- do.call(getSurvival, args) @@ -103,14 +103,14 @@ setMethod("addSurvival", signature = c(x = "SummarizedExperiment"), #' @rdname getSurvival #' @export setMethod("getSurvival", signature(x = "SummarizedExperiment"), - function(x, time.col, status.col, assay.type = "counts", col.var = NULL, + function(x, time.col, event.col, assay.type = "counts", col.var = NULL, ...){ # Input checks x <- .check_data_for_survival( - x, time.col, status.col, col.var, assay.type, ...) + x, time.col, event.col, col.var, assay.type, ...) # Extract data args <- .get_data_for_survival( - x, time.col, status.col, col.var, assay.type) + x, time.col, event.col, col.var, assay.type) args <- c(args, list(...)) # Fit survival model res <- do.call(.calc_survival, args) @@ -125,13 +125,13 @@ setMethod("getSurvival", signature(x = "SummarizedExperiment"), # Ensures that the required columns for survival analysis are present in # colData, are of the correct type, and that the requested assay exists. .check_data_for_survival <- function( - x, time.col, status.col, col.var, assay.type, ...){ + x, time.col, event.col, col.var, assay.type, ...){ # Ensure we are working with the correct alternative experiment x <- .check_and_get_altExp(x, ...) # Check that 'time.col' exists in colData and is a character scalar .check_input(time.col, list("character scalar"), colnames(colData(x))) - # Check that 'status.col' exists in colData and is a character scalar - .check_input(status.col, list("character scalar"), colnames(colData(x))) + # Check that 'event.col' exists in colData and is a character scalar + .check_input(event.col, list("character scalar"), colnames(colData(x))) # Check that the requested assay is present in the object .check_assay_present(assay.type, x) # Verify that the time column contains numeric values @@ -139,8 +139,8 @@ setMethod("getSurvival", signature(x = "SummarizedExperiment"), stop("'time.col' must be numeric.", call. = FALSE) } # Verify that the status column is either logical or numeric (0/1) - if( !is.logical(x[[status.col]]) && !is.numeric(x[[status.col]]) ){ - stop("'status.col' must be numeric (0/1) or logical.", call. = FALSE) + if( !is.logical(x[[event.col]]) && !is.numeric(x[[event.col]]) ){ + stop("'event.col' must be numeric (0/1) or logical.", call. = FALSE) } # If a grouping variable is provided, check it exists in colData if( !is.null(col.var) ){ @@ -155,13 +155,13 @@ setMethod("getSurvival", signature(x = "SummarizedExperiment"), # and optional covariates from the input object, # formatted for downstream survival models. .get_data_for_survival <- function( - x, time.col, status.col, col.var, assay.type){ + x, time.col, event.col, col.var, assay.type){ # Extract assay data (samples as rows, features as columns) mat <- assay(x, assay.type) |> t() # Extract survival time column and ensure numeric time <- x[[time.col]] |> as.numeric() # Extract survival status column and ensure numeric (0/1) - status <- x[[status.col]] |> as.numeric() + status <- x[[event.col]] |> as.numeric() # Extract optional covariates from colData if( !is.null(col.var) ){ col.var <- colData(x)[, col.var, drop = FALSE] |> as.data.frame() @@ -248,11 +248,11 @@ setMethod("getSurvival", signature(x = "SummarizedExperiment"), # Return all results as a list res <- list( - coefficients = coefs, + coef = coefs, risk_scores = risk_scores, c_index = c_index, c_index_cv_mean = fit[["cvm"]][[id_row]], - c_index_cv_sd = fit[["cvsd"]][[id_row]],, + c_index_cv_sd = fit[["cvsd"]][[id_row]], fit = fit ) return(res) @@ -267,7 +267,12 @@ setMethod("getSurvival", signature(x = "SummarizedExperiment"), y <- Surv(time, status) # Combine survival times, status, features, and optional covariates into one # data frame - df <- data.frame(time = time, status = status, mat, col.var) + if( is.null(col.var) ){ + df <- data.frame(time = time, status = status, mat) + } else { + df <- data.frame(time = time, status = status, mat, col.var) + } + # Fit standard Cox proportional hazards model fit <- coxph(y ~ ., data = df) @@ -282,7 +287,7 @@ setMethod("getSurvival", signature(x = "SummarizedExperiment"), # Return results as a list res <- list( - coefficients = coefs, + coef = coefs, risk_scores = as.numeric(risk_scores), c_index = c_index, fit = fit diff --git a/man/getSurvival.Rd b/man/getSurvival.Rd index 0ee7dac..9eac72c 100644 --- a/man/getSurvival.Rd +++ b/man/getSurvival.Rd @@ -11,16 +11,9 @@ getSurvival(x, ...) addSurvival(x, ...) -\S4method{addSurvival}{SummarizedExperiment}(x, time.col, status.col, name = "survival", ...) +\S4method{addSurvival}{SummarizedExperiment}(x, time.col, event.col, name = "survival", ...) -\S4method{getSurvival}{SummarizedExperiment}( - x, - time.col, - status.col, - assay.type = "counts", - col.var = NULL, - ... -) +\S4method{getSurvival}{SummarizedExperiment}(x, time.col, event.col, assay.type = "counts", col.var = NULL, ...) } \arguments{ \item{x}{A @@ -57,7 +50,7 @@ for a coefficient to be included in the final model. (Default: \code{0}) \item{time.col}{\code{Character scalar}. Column name in \code{colData(x)} representing time to event or follow-up time. Must be numeric.} -\item{status.col}{\code{Character scalar}. Column name in \code{colData(x)} +\item{event.col}{\code{Character scalar}. Column name in \code{colData(x)} representing event occurrence. Accepts numeric (\code{0}/\code{1}) or logical (\code{TRUE}/\code{FALSE}) values.} @@ -75,21 +68,19 @@ columns in \code{colData(x)} to adjust for in the survival model. \value{ A list with model summaries: \itemize{ -\item \code{coefficients}: estimated model coefficients -\item \code{selected_features}: nonzero features in penalized model +\item \code{coef}: estimated model coefficients \item \code{risk_scores}: predicted risk scores -\item \code{cindex.apparent}: apparent concordance index -\item \code{cv.cindex.mean}: mean cross-validated C-index (if penalized) -\item \code{cv.cindex.sd}: SD of cross-validated C-index (if penalized) +\item \code{c_index}: apparent concordance index +\item \code{c_index_cv_mean}: mean cross-validated C-index (if penalized) +\item \code{c_index_cv_sd}: SD of cross-validated C-index (if penalized) \item \code{fit}: fitted model object (coxph or cv.glmnet) } } \description{ Fit a (penalized) Cox proportional hazards model on microbiome data contained -in a SummarizedExperiment object. -Data transformations (e.g. pairwise log-ratios) should be handled upstream -(e.g. with mia::transformAssay). This function focuses on the statistical -model. +in a SummarizedExperiment object. Data transformations (e.g. pairwise +log-ratios) should be handled upstream +(e.g. with \code{mia::transformAssay()}). } \examples{ data(crohn_survival) @@ -97,7 +88,7 @@ tse <- crohn_survival tse <- transformAssay(tse, method = "relabundance") fit <- getSurvival( tse, assay.type = "relabundance", - time.col = "event_time", status.col = "event" + time.col = "event_time", event.col = "event" ) } diff --git a/tests/testthat/test-getSurvival.R b/tests/testthat/test-getSurvival.R new file mode 100644 index 0000000..b4bfccb --- /dev/null +++ b/tests/testthat/test-getSurvival.R @@ -0,0 +1,345 @@ +# Check that input checks work +test_that("getSurvival validates input", { + # Load and prepare data + data(crohn_survival) + tse <- crohn_survival + tse <- transformAssay(tse, method = "relabundance") + + # Test that function works with valid inputs + expect_no_error({ + fit <- getSurvival( + tse, + assay.type = "relabundance", + time.col = "event_time", + event.col = "event" + ) + }) + + expect_error({ + getSurvival( + tse, + assay.type = "relabundance" + ) + }) + expect_error({ + getSurvival( + tse + ) + }) + + # Test error with non-existent time column + expect_error({ + getSurvival( + tse, + assay.type = "relabundance", + time.col = "nonexistent_time", + event.col = "event" + ) + }) + + expect_error({ + getSurvival( + tse, + assay.type = "relabundance", + event.col = "event" + ) + }) + + # Test error with non-existent event column + expect_error({ + getSurvival( + tse, + assay.type = "relabundance", + time.col = "event_time", + event.col = "nonexistent_event" + ) + }) + + expect_error({ + getSurvival( + tse, + assay.type = "relabundance", + time.col = "event_time" + ) + }) + + # Test error with non-existent assay + expect_error({ + getSurvival( + tse, + assay.type = "nonexistent_assay", + time.col = "event_time", + event.col = "event" + ) + }) +}) + +# Check that method works correctly +test_that("getSurvival works correctly", { + # Load the example data + data(crohn_survival) + tse <- crohn_survival + + # Check that the data loaded correctly + expect_s4_class(tse, "TreeSummarizedExperiment") + + # Verify required columns exist + expect_true("event_time" %in% colnames(colData(tse))) + expect_true("event" %in% colnames(colData(tse))) + + # Transform to relative abundance as in example + tse <- transformAssay(tse, method = "relabundance") + + # Test basic getSurvival functionality + fit <- getSurvival( + tse, + assay.type = "relabundance", + time.col = "event_time", + event.col = "event" + ) + + # Check that result is a list with expected components + expect_type(fit, "list") + expect_true(all(c("coef", "risk_scores", "c_index", "fit") %in% names(fit))) + + # Check coefficients + expect_type(fit$coef, "double") + expect_true(is.numeric(fit$coef)) + + # Check risk scores match number of samples + expect_length(fit$risk_scores, ncol(tse)) + expect_true(is.numeric(fit$risk_scores)) + expect_true(all(is.finite(fit$risk_scores))) + + # Check C-index is valid + expect_true(is.numeric(fit$c_index)) + expect_true(fit$c_index >= 0 && fit$c_index <= 1) + + # Check that fit object exists and is correct type + expect_true(!is.null(fit$fit)) + expect_s3_class(fit$fit, "cv.glmnet") + + # Check cross-validation results are present (penalized = TRUE by default) + expect_true("c_index_cv_mean" %in% names(fit)) + expect_true("c_index_cv_sd" %in% names(fit)) + expect_true(is.numeric(fit$c_index_cv_mean)) + expect_true(is.numeric(fit$c_index_cv_sd)) +}) + +# Check that method handles different params passed in ... +test_that("getSurvival handles different parameters", { + # Load and prepare data + data(crohn_survival) + tse <- crohn_survival + tse <- transformAssay(tse, method = "relabundance") + + # Test with different alpha values + fit_ridge <- getSurvival( + tse, + assay.type = "relabundance", + time.col = "event_time", + event.col = "event", + alpha = 0.1 # Ridge regression + ) + + fit_lasso <- getSurvival( + tse, + assay.type = "relabundance", + time.col = "event_time", + event.col = "event", + alpha = 1.0 # Lasso regression + ) + + # Both should return valid results + expect_type(fit_ridge, "list") + expect_type(fit_lasso, "list") + + # Results should be different due to different regularization + expect_false(identical(fit_ridge$coef, fit_lasso$coef)) + expect_false(identical(fit_ridge$risk_scores, fit_lasso$risk_scores)) + + # Test with standard Cox regression (no penalization) + fit_standard <- getSurvival( + tse, + assay.type = "relabundance", + time.col = "event_time", + event.col = "event", + penalized = FALSE + ) + + expect_type(fit_standard, "list") + expect_s3_class(fit_standard$fit, "coxph") + + # Standard model should not have CV results + expect_false("c_index_cv_mean" %in% names(fit_standard)) + expect_false("c_index_cv_sd" %in% names(fit_standard)) +}) + +# Check that method handles covariates +test_that("getSurvival handles covariates", { + # Load and prepare data + data(crohn_survival) + tse <- crohn_survival + tse <- transformAssay(tse, method = "relabundance") + + set.seed(42) # For reproducibility + + # Determine sample size + n <- ncol(tse) + + # Generate synthetic covariates + colData(tse)$age <- round(rnorm(n, mean = 40, sd = 12)) # ages, e.g. 20–70 + colData(tse)$sex <- factor(sample(c("M", "F"), n, replace = TRUE)) + + # Test with available covariates (use first available column) + fit_with_covs <- getSurvival( + tse, + assay.type = "relabundance", + time.col = "event_time", + event.col = "event", + col.var = c("age", "sex") + ) + + # Test without covariates for comparison + fit_without_covs <- getSurvival( + tse, + assay.type = "relabundance", + time.col = "event_time", + event.col = "event" + ) + + # Both should return valid results + expect_type(fit_with_covs, "list") + expect_type(fit_without_covs, "list") + + # Results should be different when covariates are included + expect_false(identical(fit_with_covs$risk_scores, fit_without_covs$risk_scores)) + + # Both should have same structure + essential_components <- c("coef", "risk_scores", "c_index", "fit") + expect_true(all(essential_components %in% names(fit_with_covs))) + expect_true(all(essential_components %in% names(fit_without_covs))) +}) + +# Check that method handles coefficient filtering +test_that("getSurvival handles coefficient filtering", { + # Load and prepare data + data(crohn_survival) + tse <- crohn_survival + tse <- transformAssay(tse, method = "relabundance") + + # Test with different coefficient thresholds + fit_no_filter <- getSurvival( + tse, + assay.type = "relabundance", + time.col = "event_time", + event.col = "event", + coef.threshold = 0 # No filtering + ) + + fit_filtered <- getSurvival( + tse, + assay.type = "relabundance", + time.col = "event_time", + event.col = "event", + coef.threshold = 0.1 # Filter small coefficients + ) + + # Filtered result should have fewer or equal coefficients + expect_true(length(fit_filtered$coef) <= length(fit_no_filter$coef)) + + # All remaining coefficients should exceed the threshold + if(length(fit_filtered$coef) > 0) { + expect_true(all(abs(fit_filtered$coef) > 0.1)) + } + + # Test with high threshold that might filter out all coefficients + fit_high_filter <- getSurvival( + tse, + assay.type = "relabundance", + time.col = "event_time", + event.col = "event", + coef.threshold = 10 # Very high threshold + ) + + # Should still return valid structure even if no coefficients pass threshold + expect_type(fit_high_filter, "list") + expect_true("coef" %in% names(fit_high_filter)) +}) + +# Check that method handles different transformations +test_that("getSurvival handles different assay types", { + # Load data + data(crohn_survival) + tse <- crohn_survival + + # Test with original counts + fit_counts <- getSurvival( + tse, + assay.type = "counts", + time.col = "event_time", + event.col = "event" + ) + + # Transform and test with relative abundances + tse <- transformAssay(tse, method = "relabundance") + fit_relabundance <- getSurvival( + tse, + assay.type = "relabundance", + time.col = "event_time", + event.col = "event" + ) + + # Both should return valid results + expect_type(fit_counts, "list") + expect_type(fit_relabundance, "list") + + # Results should be different due to different data scaling + expect_false(identical(fit_counts$coef, fit_relabundance$coef)) + expect_false(identical(fit_counts$risk_scores, fit_relabundance$risk_scores)) + + # Add CLR transformation and test + tse <- transformAssay(tse, method = "clr", pseudocount = TRUE) + fit_clr <- getSurvival( + tse, + assay.type = "clr", + time.col = "event_time", + event.col = "event" + ) + + expect_type(fit_clr, "list") + expect_false(identical(fit_clr$coef, fit_relabundance$coef)) +}) + +# Check that methods results are reproducible +test_that("getSurvival results are reproducible", { + # Load and prepare data + data(crohn_survival) + tse <- crohn_survival + tse <- transformAssay(tse, method = "relabundance") + + # Set seed and run analysis + set.seed(123) + fit1 <- getSurvival( + tse, + assay.type = "relabundance", + time.col = "event_time", + event.col = "event", + nfolds = 5 # Use fewer folds for faster testing + ) + + # Set same seed and run analysis again + set.seed(123) + fit2 <- getSurvival( + tse, + assay.type = "relabundance", + time.col = "event_time", + event.col = "event", + nfolds = 5 + ) + + # Results should be identical with same seed + expect_identical(fit1$coef, fit2$coef) + expect_identical(fit1$risk_scores, fit2$risk_scores) + expect_equal(fit1$c_index, fit2$c_index) +}) \ No newline at end of file