From 9e30ba37f912b80bfaffced8da1ff2216415daf9 Mon Sep 17 00:00:00 2001
From: Daena Rys <rysdaena8@gmail.com>
Date: Tue, 6 May 2025 10:41:37 +0300
Subject: [PATCH 1/8] add getSurvival

Signed-off-by: Daena Rys <rysdaena8@gmail.com>
---
 DESCRIPTION        |   3 +-
 NAMESPACE          |   3 +
 NEWS               |   3 +
 R/AllGenerics.R    |   5 ++
 R/getSurvival.R    | 157 +++++++++++++++++++++++++++++++++++++++++++++
 man/getSurvival.Rd | 107 ++++++++++++++++++++++++++++++
 6 files changed, 277 insertions(+), 1 deletion(-)
 create mode 100644 R/getSurvival.R
 create mode 100644 man/getSurvival.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index f37559e..86ce5e2 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: miaTime
 Type: Package
 Title: Microbiome Time Series Analysis
-Version: 0.99.8
+Version: 0.99.9
 Authors@R:
      c(person(given = "Leo", family = "Lahti", role = c("aut"),
               email = "leo.lahti@iki.fi",
@@ -25,6 +25,7 @@ Depends:
     R (>= 4.4.0),
     mia
 Imports:
+    coda4microbiome,
     dplyr,
     methods,
     S4Vectors,
diff --git a/NAMESPACE b/NAMESPACE
index 3380ac6..eda442d 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -10,6 +10,7 @@ export(getBimodality)
 export(getShortTermChange)
 export(getStability)
 export(getStepwiseDivergence)
+export(getSurvival)
 exportMethods(addBaselineDivergence)
 exportMethods(addBimodality)
 exportMethods(addShortTermChange)
@@ -20,12 +21,14 @@ exportMethods(getBimodality)
 exportMethods(getShortTermChange)
 exportMethods(getStability)
 exportMethods(getStepwiseDivergence)
+exportMethods(getSurvival)
 import(S4Vectors)
 import(SingleCellExperiment)
 import(SummarizedExperiment)
 import(TreeSummarizedExperiment)
 import(methods)
 import(mia)
+importFrom(coda4microbiome,coda_coxnet)
 importFrom(dplyr,across)
 importFrom(dplyr,all_of)
 importFrom(dplyr,all_vars)
diff --git a/NEWS b/NEWS
index 36184a8..944024a 100755
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,6 @@
+Changes in version 0.99.8
++ Added wrapper for survival analysis
+
 Changes in version 0.99.6
 + Final fixes for Bioconductor submission
 
diff --git a/R/AllGenerics.R b/R/AllGenerics.R
index 40a420e..7f985b0 100644
--- a/R/AllGenerics.R
+++ b/R/AllGenerics.R
@@ -50,3 +50,8 @@ setGeneric("getStability", signature = "x", function(x, ...)
 #' @export
 setGeneric("addStability", signature = "x", function(x, ...)
     standardGeneric("addStability"))
+
+#' @rdname getSurvival
+#' @export
+setGeneric("getSurvival", signature = "x", function(x, ...)
+    standardGeneric("getSurvival"))
diff --git a/R/getSurvival.R b/R/getSurvival.R
new file mode 100644
index 0000000..e2106ea
--- /dev/null
+++ b/R/getSurvival.R
@@ -0,0 +1,157 @@
+#' @name
+#' getSurvival
+#' 
+#' @export
+#' 
+#' @title
+#' Survival analysis
+#' 
+#' @description
+#' Survival modeling wrapper for TreeSummarizedExperiment using coda_coxnet
+#' 
+#' @details
+#' This function extracts compositional count data and survival metadata from a
+#' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}
+#' object and applies penalized Cox regression via \code{\link[coda4microbiome]{coda_coxnet}}, 
+#' using pairwise log-ratio transformations.
+#'
+#'
+#' @param time.col \code{Character scalar}. Column name in \code{colData(x)} 
+#' representing time to event or follow-up time. Must be numeric.
+#'
+#' @param status.col \code{Character scalar}. Column name in \code{colData(x)} 
+#' representing event occurrence. Accepts numeric (0/1) or logical (FALSE/TRUE) 
+#' values.
+#'
+#' @param covar.cols \code{Character vector}. Optional. Specifies covariate 
+#' columns in \code{colData(x)} to adjust for in the survival model. 
+#' (Default: \code{NULL})
+#'
+#' @param lambda \code{Character or numeric}. Penalization parameter passed to 
+#' \code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"}, 
+#' \code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"})
+#'
+#' @param nvar \code{Integer scalar}. Optional. Maximum number of variables 
+#' (log-ratios) to include in the model. (Default: \code{NULL})
+#'
+#' @param alpha \code{Numeric scalar}. Elastic net mixing parameter: 1 = Lasso, 
+#' 0 = Ridge. (Default: \code{0.9})
+#'
+#' @param nfolds \code{Integer scalar}. Number of cross-validation folds for 
+#' \code{cv.glmnet}. (Default: \code{10})
+#'
+#' @param showPlots \code{Logical}. If \code{TRUE}, generates plots for 
+#' cross-validation, risk scores, and selected signature. (Default: \code{TRUE})
+#'
+#' @param coef_threshold \code{Numeric scalar}. Minimum absolute value for a 
+#' coefficient to be included in the final model. (Default: \code{0})
+#' 
+#' @inheritParams addBaselineDivergence
+#'
+#' @return A named \code{list} containing:
+#' \itemize{
+#'   \item \code{taxa.num}: indices of selected taxa
+#'   \item \code{taxa.name}: names of selected taxa
+#'   \item \code{log-contrast coefficients}: coefficients of selected taxa in 
+#'   the final log-contrast model
+#'   \item \code{risk.score}: predicted risk scores
+#'   \item \code{apparent Cindex}: concordance index on training data
+#'   \item \code{mean cv-Cindex}: mean cross-validated concordance index
+#'   \item \code{sd cv-Cindex}: standard deviation of cross-validated 
+#'   concordance index
+#'   \item \code{risk score plot}: ggplot object showing risk score vs survival
+#'   \item \code{signature plot}: ggplot object of selected taxa and coefficients
+#' }
+#'
+#' @seealso \code{\link[coda4microbiome]{coda_coxnet}}, 
+#' \code{\link[glmnet]{cv.glmnet}}
+#' 
+#' @references
+#' Meritxell Pujolassos , Antoni Susín , M.Luz Calle (2024). 
+#' \emph{Microbiome compositional data analysis for survival studies }
+#' NAR Genomics and Bioinformatics, 6(2), lqae038. 
+#' \doi{10.1093/nargab/lqae038}
+#'
+#' @examples
+#' # data(SurvivalData)
+#' # tse <- SurvivalData
+#' # getSurvival(tse, time.col = "T1Dweek", status.col = "T1D", 
+#' # covar.cols = c("Sex", "Antibiotics"))
+#'
+NULL
+
+#' @rdname getSurvival
+#' @export
+#' @importFrom coda4microbiome coda_coxnet
+setMethod("getSurvival", signature(x = "SummarizedExperiment"),
+    function(
+        x,
+        time.col,
+        status.col,
+        assay.type = "counts",
+        covar.cols = NULL,
+        lambda = "lambda.1se",
+        nvar = NULL,
+        alpha = 0.9,
+        nfolds = 10,
+        showPlots = TRUE,
+        coef_threshold = 0,
+        ...
+          ) {
+              ############################# INPUT CHECK ########################
+              x <- .check_and_get_altExp(x, ...)
+              
+              # Check required colData fields
+              .check_input(time.col, list("character scalar"), 
+                           colnames(colData(x)))
+              .check_input(status.col, list("character scalar"), 
+                           colnames(colData(x)))
+              
+              if (!is.numeric(x[[time.col]])) {
+                  stop("'time.col' must specify a numeric column from colData(x)", 
+                       call. = FALSE)
+              }
+              if (!is.logical(x[[status.col]]) && !is.numeric(x[[status.col]])) {
+                  stop("'status.col' must be numeric (0/1) or logical (FALSE/TRUE)", 
+                       call. = FALSE)
+              }
+              
+              .check_assay_present(assay.type, x)
+              
+              if (!is.null(covar.cols)) {
+                  .check_input(covar.cols, list("character vector"), 
+                               colnames(colData(x)))
+              }
+              
+              ########################### INPUT CHECK END ######################
+              
+              # Extract matrix
+              mat <- t(assay(x, assay.type))
+              
+              # Extract survival time and status
+              time <- as.numeric(x[[time.col]])
+              status <- as.numeric(x[[status.col]])
+              
+              # Extract covariates (if specified)
+              covar <- NULL
+              if (!is.null(covar.cols)) {
+                  covar <- as.data.frame(colData(x)[, covar.cols, drop = FALSE])
+              }
+              
+              # Run core model
+              result <- coda_coxnet(
+                  x = mat,
+                  time = time,
+                  status = status,
+                  covar = covar,
+                  lambda = lambda,
+                  nvar = nvar,
+                  alpha = alpha,
+                  nfolds = nfolds,
+                  showPlots = showPlots,
+                  coef_threshold = coef_threshold
+              )
+              
+              return(result)
+          }
+)
\ No newline at end of file
diff --git a/man/getSurvival.Rd b/man/getSurvival.Rd
new file mode 100644
index 0000000..4d77ab4
--- /dev/null
+++ b/man/getSurvival.Rd
@@ -0,0 +1,107 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/AllGenerics.R, R/getSurvival.R
+\name{getSurvival}
+\alias{getSurvival}
+\alias{getSurvival,SummarizedExperiment-method}
+\title{Survival analysis}
+\usage{
+getSurvival(x, ...)
+
+\S4method{getSurvival}{SummarizedExperiment}(
+  x,
+  time.col,
+  status.col,
+  assay.type = "counts",
+  covar.cols = NULL,
+  lambda = "lambda.1se",
+  nvar = NULL,
+  alpha = 0.9,
+  nfolds = 10,
+  showPlots = TRUE,
+  coef_threshold = 0,
+  ...
+)
+}
+\arguments{
+\item{x}{A
+\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}
+object.}
+
+\item{...}{Optional arguments passed into
+\code{\link[mia:addDivergence]{mia::addDivergence()}}.}
+
+\item{time.col}{\code{Character scalar}. Column name in \code{colData(x)}
+representing time to event or follow-up time. Must be numeric.}
+
+\item{status.col}{\code{Character scalar}. Column name in \code{colData(x)}
+representing event occurrence. Accepts numeric (0/1) or logical (FALSE/TRUE)
+values.}
+
+\item{assay.type}{\code{Character scalar}. Specifies which assay values are
+used in the dissimilarity estimation. (Default: \code{"counts"})}
+
+\item{covar.cols}{\code{Character vector}. Optional. Specifies covariate
+columns in \code{colData(x)} to adjust for in the survival model.
+(Default: \code{NULL})}
+
+\item{lambda}{\code{Character or numeric}. Penalization parameter passed to
+\code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"},
+\code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"})}
+
+\item{nvar}{\code{Integer scalar}. Optional. Maximum number of variables
+(log-ratios) to include in the model. (Default: \code{NULL})}
+
+\item{alpha}{\code{Numeric scalar}. Elastic net mixing parameter: 1 = Lasso,
+0 = Ridge. (Default: \code{0.9})}
+
+\item{nfolds}{\code{Integer scalar}. Number of cross-validation folds for
+\code{cv.glmnet}. (Default: \code{10})}
+
+\item{showPlots}{\code{Logical}. If \code{TRUE}, generates plots for
+cross-validation, risk scores, and selected signature. (Default: \code{TRUE})}
+
+\item{coef_threshold}{\code{Numeric scalar}. Minimum absolute value for a
+coefficient to be included in the final model. (Default: \code{0})}
+}
+\value{
+A named \code{list} containing:
+\itemize{
+\item \code{taxa.num}: indices of selected taxa
+\item \code{taxa.name}: names of selected taxa
+\item \code{log-contrast coefficients}: coefficients of selected taxa in
+the final log-contrast model
+\item \code{risk.score}: predicted risk scores
+\item \code{apparent Cindex}: concordance index on training data
+\item \code{mean cv-Cindex}: mean cross-validated concordance index
+\item \code{sd cv-Cindex}: standard deviation of cross-validated
+concordance index
+\item \code{risk score plot}: ggplot object showing risk score vs survival
+\item \code{signature plot}: ggplot object of selected taxa and coefficients
+}
+}
+\description{
+Survival modeling wrapper for TreeSummarizedExperiment using coda_coxnet
+}
+\details{
+This function extracts compositional count data and survival metadata from a
+\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}
+object and applies penalized Cox regression via \code{\link[coda4microbiome]{coda_coxnet}},
+using pairwise log-ratio transformations.
+}
+\examples{
+# data(SurvivalData)
+# tse <- SurvivalData
+# getSurvival(tse, time.col = "T1Dweek", status.col = "T1D", 
+# covar.cols = c("Sex", "Antibiotics"))
+
+}
+\references{
+Meritxell Pujolassos , Antoni Susín , M.Luz Calle (2024).
+\emph{Microbiome compositional data analysis for survival studies }
+NAR Genomics and Bioinformatics, 6(2), lqae038.
+\doi{10.1093/nargab/lqae038}
+}
+\seealso{
+\code{\link[coda4microbiome]{coda_coxnet}},
+\code{\link[glmnet]{cv.glmnet}}
+}

From 2b1fead1e8116e1abd244ec10e74fc502bc930fe Mon Sep 17 00:00:00 2001
From: Daena Rys <rysdaena8@gmail.com>
Date: Fri, 22 Aug 2025 14:21:12 +0300
Subject: [PATCH 2/8] up

Signed-off-by: Daena Rys <rysdaena8@gmail.com>
---
 NAMESPACE          |   5 +-
 R/getSurvival.R    | 306 ++++++++++++++++++++++++++++++++-------------
 man/getSurvival.Rd |  55 ++++----
 3 files changed, 245 insertions(+), 121 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index eda442d..3e71997 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -28,7 +28,6 @@ import(SummarizedExperiment)
 import(TreeSummarizedExperiment)
 import(methods)
 import(mia)
-importFrom(coda4microbiome,coda_coxnet)
 importFrom(dplyr,across)
 importFrom(dplyr,all_of)
 importFrom(dplyr,all_vars)
@@ -47,9 +46,13 @@ importFrom(dplyr,summarize)
 importFrom(dplyr,sym)
 importFrom(dplyr,ungroup)
 importFrom(dplyr,vars)
+importFrom(glmnet,cv.glmnet)
+importFrom(glmnet,glmnet)
 importFrom(stats,coef)
 importFrom(stats,lm)
 importFrom(stats,median)
 importFrom(stats,setNames)
+importFrom(survival,Surv)
+importFrom(survival,coxph)
 importFrom(tidyr,pivot_wider)
 importFrom(tidyr,unnest)
diff --git a/R/getSurvival.R b/R/getSurvival.R
index e2106ea..a764a52 100644
--- a/R/getSurvival.R
+++ b/R/getSurvival.R
@@ -7,14 +7,11 @@
 #' Survival analysis
 #' 
 #' @description
-#' Survival modeling wrapper for TreeSummarizedExperiment using coda_coxnet
-#' 
-#' @details
-#' This function extracts compositional count data and survival metadata from a
-#' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}
-#' object and applies penalized Cox regression via \code{\link[coda4microbiome]{coda_coxnet}}, 
-#' using pairwise log-ratio transformations.
-#'
+#' Fit a (penalized) Cox proportional hazards model on microbiome data contained 
+#' in a SummarizedExperiment object. 
+#' Data transformations (e.g. pairwise log-ratios) should be handled upstream 
+#' (e.g. with mia::transformAssay). This function focuses on the statistical 
+#' model.
 #'
 #' @param time.col \code{Character scalar}. Column name in \code{colData(x)} 
 #' representing time to event or follow-up time. Must be numeric.
@@ -26,41 +23,38 @@
 #' @param covar.cols \code{Character vector}. Optional. Specifies covariate 
 #' columns in \code{colData(x)} to adjust for in the survival model. 
 #' (Default: \code{NULL})
+#' 
+#' @param penalized \code{Logical}. If TRUE, fit penalized Cox regression 
+#' using \code{glmnet}. If FALSE, fit standard Cox model using 
+#' \code{survival::coxph}. (Default: \code{TRUE})
 #'
 #' @param lambda \code{Character or numeric}. Penalization parameter passed to 
 #' \code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"}, 
 #' \code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"})
 #'
-#' @param nvar \code{Integer scalar}. Optional. Maximum number of variables 
-#' (log-ratios) to include in the model. (Default: \code{NULL})
-#'
 #' @param alpha \code{Numeric scalar}. Elastic net mixing parameter: 1 = Lasso, 
 #' 0 = Ridge. (Default: \code{0.9})
 #'
 #' @param nfolds \code{Integer scalar}. Number of cross-validation folds for 
 #' \code{cv.glmnet}. (Default: \code{10})
-#'
-#' @param showPlots \code{Logical}. If \code{TRUE}, generates plots for 
-#' cross-validation, risk scores, and selected signature. (Default: \code{TRUE})
-#'
+#' 
+#' @param nvar \code{Integer scalar}. Optional. Maximum number of variables 
+#' (log-ratios) to include in the model. (Default: \code{NULL})
+#' 
 #' @param coef_threshold \code{Numeric scalar}. Minimum absolute value for a 
 #' coefficient to be included in the final model. (Default: \code{0})
 #' 
 #' @inheritParams addBaselineDivergence
 #'
-#' @return A named \code{list} containing:
+#' @return A list with model summaries:
 #' \itemize{
-#'   \item \code{taxa.num}: indices of selected taxa
-#'   \item \code{taxa.name}: names of selected taxa
-#'   \item \code{log-contrast coefficients}: coefficients of selected taxa in 
-#'   the final log-contrast model
-#'   \item \code{risk.score}: predicted risk scores
-#'   \item \code{apparent Cindex}: concordance index on training data
-#'   \item \code{mean cv-Cindex}: mean cross-validated concordance index
-#'   \item \code{sd cv-Cindex}: standard deviation of cross-validated 
-#'   concordance index
-#'   \item \code{risk score plot}: ggplot object showing risk score vs survival
-#'   \item \code{signature plot}: ggplot object of selected taxa and coefficients
+#'   \item \code{coefficients}: estimated model coefficients
+#'   \item \code{selected.features}: nonzero features in penalized model
+#'   \item \code{risk.scores}: predicted risk scores
+#'   \item \code{cindex.apparent}: apparent concordance index
+#'   \item \code{cv.cindex.mean}: mean cross-validated C-index (if penalized)
+#'   \item \code{cv.cindex.sd}: SD of cross-validated C-index (if penalized)
+#'   \item \code{fit}: fitted model object (coxph or cv.glmnet)
 #' }
 #'
 #' @seealso \code{\link[coda4microbiome]{coda_coxnet}}, 
@@ -82,7 +76,8 @@ NULL
 
 #' @rdname getSurvival
 #' @export
-#' @importFrom coda4microbiome coda_coxnet
+#' @importFrom survival Surv coxph
+#' @importFrom glmnet cv.glmnet glmnet
 setMethod("getSurvival", signature(x = "SummarizedExperiment"),
     function(
         x,
@@ -90,68 +85,201 @@ setMethod("getSurvival", signature(x = "SummarizedExperiment"),
         status.col,
         assay.type = "counts",
         covar.cols = NULL,
-        lambda = "lambda.1se",
-        nvar = NULL,
-        alpha = 0.9,
-        nfolds = 10,
-        showPlots = TRUE,
-        coef_threshold = 0,
         ...
           ) {
-              ############################# INPUT CHECK ########################
-              x <- .check_and_get_altExp(x, ...)
-              
-              # Check required colData fields
-              .check_input(time.col, list("character scalar"), 
-                           colnames(colData(x)))
-              .check_input(status.col, list("character scalar"), 
-                           colnames(colData(x)))
+        # input checks
+        x <- .check_data_for_survival(x, time.col, 
+            status.col, covar.cols, assay.type, ...)
+        
+        # extract data
+        dat <- .get_data_for_survival(x, time.col, 
+            status.col, covar.cols, assay.type)
+        
+        # fit survival model
+        res <- .calc_survival(
+            mat = dat$mat,
+            time = dat$time,
+            status = dat$status,
+            covar = dat$covar,
+            ...
+        )
               
-              if (!is.numeric(x[[time.col]])) {
-                  stop("'time.col' must specify a numeric column from colData(x)", 
-                       call. = FALSE)
-              }
-              if (!is.logical(x[[status.col]]) && !is.numeric(x[[status.col]])) {
-                  stop("'status.col' must be numeric (0/1) or logical (FALSE/TRUE)", 
-                       call. = FALSE)
-              }
-              
-              .check_assay_present(assay.type, x)
-              
-              if (!is.null(covar.cols)) {
-                  .check_input(covar.cols, list("character vector"), 
-                               colnames(colData(x)))
-              }
-              
-              ########################### INPUT CHECK END ######################
-              
-              # Extract matrix
-              mat <- t(assay(x, assay.type))
-              
-              # Extract survival time and status
-              time <- as.numeric(x[[time.col]])
-              status <- as.numeric(x[[status.col]])
-              
-              # Extract covariates (if specified)
-              covar <- NULL
-              if (!is.null(covar.cols)) {
-                  covar <- as.data.frame(colData(x)[, covar.cols, drop = FALSE])
-              }
-              
-              # Run core model
-              result <- coda_coxnet(
-                  x = mat,
-                  time = time,
-                  status = status,
-                  covar = covar,
-                  lambda = lambda,
-                  nvar = nvar,
-                  alpha = alpha,
-                  nfolds = nfolds,
-                  showPlots = showPlots,
-                  coef_threshold = coef_threshold
-              )
-              
-              return(result)
+        return(res)
           }
-)
\ No newline at end of file
+)
+
+############################# internal helpers #################################
+
+.check_data_for_survival <- function(x, time.col, 
+    status.col, covar.cols, assay.type, ...) 
+{
+    x <- .check_and_get_altExp(x, ...)
+    .check_input(time.col, list("character scalar"), colnames(colData(x)))
+    .check_input(status.col, list("character scalar"), colnames(colData(x)))
+    .check_assay_present(assay.type, x)
+    
+    if (!is.numeric(x[[time.col]])) {
+        stop("'time.col' must be numeric", call. = FALSE)
+    }
+    if (!is.logical(x[[status.col]]) && !is.numeric(x[[status.col]])) {
+        stop("'status.col' must be numeric (0/1) or logical", call. = FALSE)
+    }
+    if (!is.null(covar.cols)) {
+        .check_input(covar.cols, list("character vector"), colnames(colData(x)))
+    }
+    
+    return(x)
+}
+
+.get_data_for_survival <- function(x, time.col, 
+    status.col, covar.cols, assay.type) 
+{
+    mat <- t(assay(x, assay.type))
+    time <- as.numeric(x[[time.col]])
+    status <- as.numeric(x[[status.col]])
+    
+    covar <- NULL
+    if (!is.null(covar.cols)) {
+        covar <- as.data.frame(colData(x)[, covar.cols, drop = FALSE])
+    }
+    
+    list(
+        mat = mat,
+        time = time,
+        status = status,
+        covar = covar
+    )
+}
+
+.calc_survival <- function(mat, time, status, covar = NULL,
+    penalized = TRUE, alpha = 0.9, nfolds = 10,
+    lambda = "lambda.1se", nvar = NULL,
+    coef_threshold = 0, ...) 
+{
+    y <- survival::Surv(time, status)
+    
+    if (penalized) {
+        ## --- penalized Cox with glmnet ---
+        if (is.null(covar)) {
+            cvfit <- glmnet::cv.glmnet(
+                x = mat,
+                y = y,
+                family = "cox",
+                type.measure = "C",
+                alpha = alpha,
+                nfolds = nfolds,
+                keep = TRUE
+            )
+            offset <- NULL
+        } else {
+            ## fit Cox model for covariates -> use offset
+            df0 <- data.frame(time = time, status = status, covar)
+            model0 <- survival::coxph(Surv(time, status) ~ ., data = df0)
+            offset <- predict(model0)
+            cvfit <- glmnet::cv.glmnet(
+                x = mat,
+                y = y,
+                family = "cox",
+                type.measure = "C",
+                alpha = alpha,
+                nfolds = nfolds,
+                keep = TRUE,
+                offset = offset
+            )
+        }
+        
+        ## --- nvar cutoff handling ---
+        if (!is.null(nvar)) {
+            rowlasso <- max(which(cvfit$glmnet.fit$df <= nvar))
+            lambda <- cvfit$glmnet.fit$lambda[rowlasso]
+        }
+        
+        ## --- lambda selection ---
+        if (is.character(lambda)) {
+            if (lambda == "lambda.1se") lambda.value <- cvfit$lambda.1se
+            if (lambda == "lambda.min") lambda.value <- cvfit$lambda.min
+        } else {
+            lambda.value <- lambda
+        }
+        
+        ## --- coefficients ---
+        coefs <- as.vector(coef(cvfit, s = lambda.value))
+        names(coefs) <- rownames(coef(cvfit, s = lambda.value))
+        selected <- which(abs(coefs) > coef_threshold)
+        coef.filtered <- coefs[selected]
+        
+        ## normalize coefficients
+        if (length(coef.filtered) > 0) {
+            coef.filtered <- 2 * coef.filtered / sum(abs(coef.filtered))
+        }
+        
+        ## --- predictions ---
+        if (is.null(offset)) {
+            risk.scores <- as.numeric(predict(cvfit, mat, s = lambda.value))
+        } else {
+            risk.scores <- as.numeric(predict(cvfit, mat, s = lambda.value,
+                                              newoffset = offset))
+        }
+        
+        ## --- apparent C-index ---
+        if (length(coef.filtered) == 0) {
+            Cindex_signature <- 0.5
+        } else {
+            Cindex_signature <- glmnet::Cindex(pred = risk.scores, y)
+        }
+        
+        ## --- CV stats at chosen lambda ---
+        idrow <- max(which(cvfit$glmnet.fit$lambda >= lambda.value))
+        mcvCindex <- cvfit$cvm[idrow]
+        sdcvCindex <- cvfit$cvsd[idrow]
+        
+        out <- list(
+            coefficients = coef.filtered,
+            selected.features = names(coef.filtered),
+            risk.scores = risk.scores,
+            cindex.apparent = Cindex_signature,
+            cv.cindex.mean = mcvCindex,
+            cv.cindex.sd = sdcvCindex,
+            fit = cvfit
+        )
+        
+    } else {
+        ## --- standard CoxPH ---
+        df <- data.frame(time = time, status = status, mat, covar)
+        formula <- as.formula(paste("Surv(time, status) ~ ."))
+        fit <- survival::coxph(formula, data = df)
+        
+        ## raw coefficients
+        coefs <- coef(fit)
+        
+        ## apply coef_threshold
+        selected <- which(abs(coefs) > coef_threshold)
+        coef.filtered <- coefs[selected]
+        
+        ## normalize
+        if (length(coef.filtered) > 0) {
+            coef.filtered <- 2 * coef.filtered / sum(abs(coef.filtered))
+        }
+        
+        risk.scores <- predict(fit, type = "lp")
+        
+        ## apparent C-index
+        if (length(coef.filtered) == 0) {
+            Cindex_signature <- 0.5
+        } else {
+            Cindex_signature <- summary(fit)$concordance[1]
+        }
+        
+        out <- list(
+            coefficients = coef.filtered,
+            selected.features = names(coef.filtered),
+            risk.scores = as.numeric(risk.scores),
+            cindex.apparent = Cindex_signature,
+            cv.cindex.mean = NULL,
+            cv.cindex.sd = NULL,
+            fit = fit
+        )
+    }
+    
+    return(out)
+}
diff --git a/man/getSurvival.Rd b/man/getSurvival.Rd
index 4d77ab4..f70ebe5 100644
--- a/man/getSurvival.Rd
+++ b/man/getSurvival.Rd
@@ -13,12 +13,10 @@ getSurvival(x, ...)
   status.col,
   assay.type = "counts",
   covar.cols = NULL,
-  lambda = "lambda.1se",
-  nvar = NULL,
+  penalized = TRUE,
   alpha = 0.9,
   nfolds = 10,
-  showPlots = TRUE,
-  coef_threshold = 0,
+  lambda = "lambda.1se",
   ...
 )
 }
@@ -44,12 +42,9 @@ used in the dissimilarity estimation. (Default: \code{"counts"})}
 columns in \code{colData(x)} to adjust for in the survival model.
 (Default: \code{NULL})}
 
-\item{lambda}{\code{Character or numeric}. Penalization parameter passed to
-\code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"},
-\code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"})}
-
-\item{nvar}{\code{Integer scalar}. Optional. Maximum number of variables
-(log-ratios) to include in the model. (Default: \code{NULL})}
+\item{penalized}{\code{Logical}. If TRUE, fit penalized Cox regression
+using \code{glmnet}. If FALSE, fit standard Cox model using
+\code{survival::coxph}. (Default: \code{TRUE})}
 
 \item{alpha}{\code{Numeric scalar}. Elastic net mixing parameter: 1 = Lasso,
 0 = Ridge. (Default: \code{0.9})}
@@ -57,36 +52,34 @@ columns in \code{colData(x)} to adjust for in the survival model.
 \item{nfolds}{\code{Integer scalar}. Number of cross-validation folds for
 \code{cv.glmnet}. (Default: \code{10})}
 
-\item{showPlots}{\code{Logical}. If \code{TRUE}, generates plots for
-cross-validation, risk scores, and selected signature. (Default: \code{TRUE})}
+\item{lambda}{\code{Character or numeric}. Penalization parameter passed to
+\code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"},
+\code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"})}
+
+\item{nvar}{\code{Integer scalar}. Optional. Maximum number of variables
+(log-ratios) to include in the model. (Default: \code{NULL})}
 
 \item{coef_threshold}{\code{Numeric scalar}. Minimum absolute value for a
 coefficient to be included in the final model. (Default: \code{0})}
 }
 \value{
-A named \code{list} containing:
+A list with model summaries:
 \itemize{
-\item \code{taxa.num}: indices of selected taxa
-\item \code{taxa.name}: names of selected taxa
-\item \code{log-contrast coefficients}: coefficients of selected taxa in
-the final log-contrast model
-\item \code{risk.score}: predicted risk scores
-\item \code{apparent Cindex}: concordance index on training data
-\item \code{mean cv-Cindex}: mean cross-validated concordance index
-\item \code{sd cv-Cindex}: standard deviation of cross-validated
-concordance index
-\item \code{risk score plot}: ggplot object showing risk score vs survival
-\item \code{signature plot}: ggplot object of selected taxa and coefficients
+\item \code{coefficients}: estimated model coefficients
+\item \code{selected.features}: nonzero features in penalized model
+\item \code{risk.scores}: predicted risk scores
+\item \code{cindex.apparent}: apparent concordance index
+\item \code{cv.cindex.mean}: mean cross-validated C-index (if penalized)
+\item \code{cv.cindex.sd}: SD of cross-validated C-index (if penalized)
+\item \code{fit}: fitted model object (coxph or cv.glmnet)
 }
 }
 \description{
-Survival modeling wrapper for TreeSummarizedExperiment using coda_coxnet
-}
-\details{
-This function extracts compositional count data and survival metadata from a
-\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}
-object and applies penalized Cox regression via \code{\link[coda4microbiome]{coda_coxnet}},
-using pairwise log-ratio transformations.
+Fit a (penalized) Cox proportional hazards model on microbiome data contained
+in a SummarizedExperiment object.
+Data transformations (e.g. pairwise log-ratios) should be handled upstream
+(e.g. with mia::transformAssay). This function focuses on the statistical
+model.
 }
 \examples{
 # data(SurvivalData)

From 38fd536cf4e1d5cd08f86280827bd7155e410972 Mon Sep 17 00:00:00 2001
From: Daena Rys <rysdaena8@gmail.com>
Date: Fri, 22 Aug 2025 14:56:29 +0300
Subject: [PATCH 3/8] up

Signed-off-by: Daena Rys <rysdaena8@gmail.com>
---
 DESCRIPTION        |  3 ++-
 man/getSurvival.Rd | 12 ++++--------
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 3095cec..43eed37 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -25,13 +25,14 @@ Depends:
     R (>= 4.5.0),
     mia
 Imports:
-    coda4microbiome,
     dplyr,
+    glmnet,
     methods,
     S4Vectors,
     SingleCellExperiment,
     stats,
     SummarizedExperiment,
+    survival,
     tidyr,
     TreeSummarizedExperiment
 Suggests:
diff --git a/man/getSurvival.Rd b/man/getSurvival.Rd
index f70ebe5..34ecb5b 100644
--- a/man/getSurvival.Rd
+++ b/man/getSurvival.Rd
@@ -13,10 +13,6 @@ getSurvival(x, ...)
   status.col,
   assay.type = "counts",
   covar.cols = NULL,
-  penalized = TRUE,
-  alpha = 0.9,
-  nfolds = 10,
-  lambda = "lambda.1se",
   ...
 )
 }
@@ -46,16 +42,16 @@ columns in \code{colData(x)} to adjust for in the survival model.
 using \code{glmnet}. If FALSE, fit standard Cox model using
 \code{survival::coxph}. (Default: \code{TRUE})}
 
+\item{lambda}{\code{Character or numeric}. Penalization parameter passed to
+\code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"},
+\code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"})}
+
 \item{alpha}{\code{Numeric scalar}. Elastic net mixing parameter: 1 = Lasso,
 0 = Ridge. (Default: \code{0.9})}
 
 \item{nfolds}{\code{Integer scalar}. Number of cross-validation folds for
 \code{cv.glmnet}. (Default: \code{10})}
 
-\item{lambda}{\code{Character or numeric}. Penalization parameter passed to
-\code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"},
-\code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"})}
-
 \item{nvar}{\code{Integer scalar}. Optional. Maximum number of variables
 (log-ratios) to include in the model. (Default: \code{NULL})}
 

From 059f1091c65c768e5b50b43dd151a13258587a34 Mon Sep 17 00:00:00 2001
From: Daena Rys <rysdaena8@gmail.com>
Date: Fri, 22 Aug 2025 15:33:04 +0300
Subject: [PATCH 4/8] up

Signed-off-by: Daena Rys <rysdaena8@gmail.com>
---
 R/getSurvival.R    | 31 +++++++++++++++++--------------
 man/getSurvival.Rd | 44 ++++++++++++++++++++++----------------------
 2 files changed, 39 insertions(+), 36 deletions(-)

diff --git a/R/getSurvival.R b/R/getSurvival.R
index a764a52..cfa30fc 100644
--- a/R/getSurvival.R
+++ b/R/getSurvival.R
@@ -24,25 +24,28 @@
 #' columns in \code{colData(x)} to adjust for in the survival model. 
 #' (Default: \code{NULL})
 #' 
-#' @param penalized \code{Logical}. If TRUE, fit penalized Cox regression 
-#' using \code{glmnet}. If FALSE, fit standard Cox model using 
-#' \code{survival::coxph}. (Default: \code{TRUE})
+#' @param ... additional arguments.
+#' \itemize{
+#'   \item \code{penalized}: \code{Logical}. If TRUE, fit penalized Cox 
+#'   regression using \code{glmnet}. If FALSE, fit standard Cox model using 
+#'   \code{survival::coxph}. (Default: \code{TRUE})
 #'
-#' @param lambda \code{Character or numeric}. Penalization parameter passed to 
-#' \code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"}, 
-#' \code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"})
+#'   \item \code{lambda}: \code{Character or numeric}. Penalization parameter 
+#'   passed to \code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"}, 
+#'   \code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"})
 #'
-#' @param alpha \code{Numeric scalar}. Elastic net mixing parameter: 1 = Lasso, 
-#' 0 = Ridge. (Default: \code{0.9})
+#'   \item \code{alpha}: \code{Numeric scalar}. Elastic net mixing parameter: 
+#'   1 = Lasso, 0 = Ridge. (Default: \code{0.9})
 #'
-#' @param nfolds \code{Integer scalar}. Number of cross-validation folds for 
-#' \code{cv.glmnet}. (Default: \code{10})
+#'   \item \code{nfolds}: \code{Integer scalar}. Number of cross-validation 
+#'   folds for \code{cv.glmnet}. (Default: \code{10})
 #' 
-#' @param nvar \code{Integer scalar}. Optional. Maximum number of variables 
-#' (log-ratios) to include in the model. (Default: \code{NULL})
+#'   \item \code{nvar}: \code{Integer scalar}. Optional. Maximum number of 
+#'   variables (log-ratios) to include in the model. (Default: \code{NULL})
 #' 
-#' @param coef_threshold \code{Numeric scalar}. Minimum absolute value for a 
-#' coefficient to be included in the final model. (Default: \code{0})
+#'   \item \code{coef_threshold}: \code{Numeric scalar}. Minimum absolute value 
+#'   for a coefficient to be included in the final model. (Default: \code{0})
+#' }  
 #' 
 #' @inheritParams addBaselineDivergence
 #'
diff --git a/man/getSurvival.Rd b/man/getSurvival.Rd
index 34ecb5b..daffa53 100644
--- a/man/getSurvival.Rd
+++ b/man/getSurvival.Rd
@@ -21,8 +21,28 @@ getSurvival(x, ...)
 \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}
 object.}
 
-\item{...}{Optional arguments passed into
-\code{\link[mia:addDivergence]{mia::addDivergence()}}.}
+\item{...}{additional arguments.
+\itemize{
+\item \code{penalized}: \code{Logical}. If TRUE, fit penalized Cox
+regression using \code{glmnet}. If FALSE, fit standard Cox model using
+\code{survival::coxph}. (Default: \code{TRUE})
+
+\item \code{lambda}: \code{Character or numeric}. Penalization parameter
+passed to \code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"},
+\code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"})
+
+\item \code{alpha}: \code{Numeric scalar}. Elastic net mixing parameter:
+1 = Lasso, 0 = Ridge. (Default: \code{0.9})
+
+\item \code{nfolds}: \code{Integer scalar}. Number of cross-validation
+folds for \code{cv.glmnet}. (Default: \code{10})
+
+\item \code{nvar}: \code{Integer scalar}. Optional. Maximum number of
+variables (log-ratios) to include in the model. (Default: \code{NULL})
+
+\item \code{coef_threshold}: \code{Numeric scalar}. Minimum absolute value
+for a coefficient to be included in the final model. (Default: \code{0})
+}}
 
 \item{time.col}{\code{Character scalar}. Column name in \code{colData(x)}
 representing time to event or follow-up time. Must be numeric.}
@@ -37,26 +57,6 @@ used in the dissimilarity estimation. (Default: \code{"counts"})}
 \item{covar.cols}{\code{Character vector}. Optional. Specifies covariate
 columns in \code{colData(x)} to adjust for in the survival model.
 (Default: \code{NULL})}
-
-\item{penalized}{\code{Logical}. If TRUE, fit penalized Cox regression
-using \code{glmnet}. If FALSE, fit standard Cox model using
-\code{survival::coxph}. (Default: \code{TRUE})}
-
-\item{lambda}{\code{Character or numeric}. Penalization parameter passed to
-\code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"},
-\code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"})}
-
-\item{alpha}{\code{Numeric scalar}. Elastic net mixing parameter: 1 = Lasso,
-0 = Ridge. (Default: \code{0.9})}
-
-\item{nfolds}{\code{Integer scalar}. Number of cross-validation folds for
-\code{cv.glmnet}. (Default: \code{10})}
-
-\item{nvar}{\code{Integer scalar}. Optional. Maximum number of variables
-(log-ratios) to include in the model. (Default: \code{NULL})}
-
-\item{coef_threshold}{\code{Numeric scalar}. Minimum absolute value for a
-coefficient to be included in the final model. (Default: \code{0})}
 }
 \value{
 A list with model summaries:

From 63510e55a41d5c25a858d9585d3d95b4719d80a7 Mon Sep 17 00:00:00 2001
From: Daena Rys <rysdaena8@gmail.com>
Date: Mon, 1 Sep 2025 13:33:02 +0300
Subject: [PATCH 5/8] up

Signed-off-by: Daena Rys <rysdaena8@gmail.com>
---
 NAMESPACE          |   2 +
 R/AllGenerics.R    |   5 +
 R/getSurvival.R    | 293 +++++++++++++++++++++++----------------------
 man/getSurvival.Rd |  20 +++-
 4 files changed, 170 insertions(+), 150 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 3e71997..be97c72 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -5,6 +5,7 @@ export(addBimodality)
 export(addShortTermChange)
 export(addStability)
 export(addStepwiseDivergence)
+export(addSurvival)
 export(getBaselineDivergence)
 export(getBimodality)
 export(getShortTermChange)
@@ -16,6 +17,7 @@ exportMethods(addBimodality)
 exportMethods(addShortTermChange)
 exportMethods(addStability)
 exportMethods(addStepwiseDivergence)
+exportMethods(addSurvival)
 exportMethods(getBaselineDivergence)
 exportMethods(getBimodality)
 exportMethods(getShortTermChange)
diff --git a/R/AllGenerics.R b/R/AllGenerics.R
index 7f985b0..ca65f48 100644
--- a/R/AllGenerics.R
+++ b/R/AllGenerics.R
@@ -55,3 +55,8 @@ setGeneric("addStability", signature = "x", function(x, ...)
 #' @export
 setGeneric("getSurvival", signature = "x", function(x, ...)
     standardGeneric("getSurvival"))
+
+#' @rdname getSurvival
+#' @export
+setGeneric("addSurvival", signature = "x", function(x, ...)
+    standardGeneric("addSurvival"))
\ No newline at end of file
diff --git a/R/getSurvival.R b/R/getSurvival.R
index cfa30fc..05b4456 100644
--- a/R/getSurvival.R
+++ b/R/getSurvival.R
@@ -20,7 +20,7 @@
 #' representing event occurrence. Accepts numeric (0/1) or logical (FALSE/TRUE) 
 #' values.
 #'
-#' @param covar.cols \code{Character vector}. Optional. Specifies covariate 
+#' @param col.var \code{Character vector}. Optional. Specifies covariate 
 #' columns in \code{colData(x)} to adjust for in the survival model. 
 #' (Default: \code{NULL})
 #' 
@@ -43,7 +43,7 @@
 #'   \item \code{nvar}: \code{Integer scalar}. Optional. Maximum number of 
 #'   variables (log-ratios) to include in the model. (Default: \code{NULL})
 #' 
-#'   \item \code{coef_threshold}: \code{Numeric scalar}. Minimum absolute value 
+#'   \item \code{coef.threshold}: \code{Numeric scalar}. Minimum absolute value 
 #'   for a coefficient to be included in the final model. (Default: \code{0})
 #' }  
 #' 
@@ -53,7 +53,7 @@
 #' \itemize{
 #'   \item \code{coefficients}: estimated model coefficients
 #'   \item \code{selected.features}: nonzero features in penalized model
-#'   \item \code{risk.scores}: predicted risk scores
+#'   \item \code{risk_scores}: predicted risk scores
 #'   \item \code{cindex.apparent}: apparent concordance index
 #'   \item \code{cv.cindex.mean}: mean cross-validated C-index (if penalized)
 #'   \item \code{cv.cindex.sd}: SD of cross-validated C-index (if penalized)
@@ -73,30 +73,38 @@
 #' # data(SurvivalData)
 #' # tse <- SurvivalData
 #' # getSurvival(tse, time.col = "T1Dweek", status.col = "T1D", 
-#' # covar.cols = c("Sex", "Antibiotics"))
+#' # col.var = c("Sex", "Antibiotics"))
 #'
 NULL
 
 #' @rdname getSurvival
 #' @export
-#' @importFrom survival Surv coxph
-#' @importFrom glmnet cv.glmnet glmnet
+setMethod("addSurvival", signature = c(x = "SummarizedExperiment"),
+    function(x, time.col, status.col, name = "survival", ...){
+        .check_input(name, "character scalar")
+        x <- .check_and_get_altExp(x, ...)
+        args <- c(
+            list(x = x, time.col = time.col, status.col = status.col,
+                    name = name),
+            list(...))
+        res <- do.call(getSurvival, args) |> as.list()
+        x <- .add_values_to_metadata(x, name, res, ...)
+        return(x)
+    }
+)
+
+#' @rdname getSurvival
+#' @export
 setMethod("getSurvival", signature(x = "SummarizedExperiment"),
-    function(
-        x,
-        time.col,
-        status.col,
-        assay.type = "counts",
-        covar.cols = NULL,
-        ...
-          ) {
+    function(x, time.col, status.col, assay.type = "counts", col.var = NULL,
+        ...){
         # input checks
         x <- .check_data_for_survival(x, time.col, 
-            status.col, covar.cols, assay.type, ...)
+            status.col, col.var, assay.type, ...)
         
         # extract data
         dat <- .get_data_for_survival(x, time.col, 
-            status.col, covar.cols, assay.type)
+            status.col, col.var, assay.type)
         
         # fit survival model
         res <- .calc_survival(
@@ -108,13 +116,13 @@ setMethod("getSurvival", signature(x = "SummarizedExperiment"),
         )
               
         return(res)
-          }
+    }
 )
 
 ############################# internal helpers #################################
 
 .check_data_for_survival <- function(x, time.col, 
-    status.col, covar.cols, assay.type, ...) 
+    status.col, col.var, assay.type, ...) 
 {
     x <- .check_and_get_altExp(x, ...)
     .check_input(time.col, list("character scalar"), colnames(colData(x)))
@@ -127,25 +135,23 @@ setMethod("getSurvival", signature(x = "SummarizedExperiment"),
     if (!is.logical(x[[status.col]]) && !is.numeric(x[[status.col]])) {
         stop("'status.col' must be numeric (0/1) or logical", call. = FALSE)
     }
-    if (!is.null(covar.cols)) {
-        .check_input(covar.cols, list("character vector"), colnames(colData(x)))
+    if (!is.null(col.var)) {
+        .check_input(col.var, list("character vector"), colnames(colData(x)))
     }
     
     return(x)
 }
 
 .get_data_for_survival <- function(x, time.col, 
-    status.col, covar.cols, assay.type) 
+    status.col, col.var, assay.type) 
 {
     mat <- t(assay(x, assay.type))
     time <- as.numeric(x[[time.col]])
     status <- as.numeric(x[[status.col]])
     
     covar <- NULL
-    if (!is.null(covar.cols)) {
-        covar <- as.data.frame(colData(x)[, covar.cols, drop = FALSE])
-    }
-    
+    if (!is.null(col.var))
+        covar <- as.data.frame(colData(x)[, col.var, drop = FALSE])
     list(
         mat = mat,
         time = time,
@@ -154,135 +160,132 @@ setMethod("getSurvival", signature(x = "SummarizedExperiment"),
     )
 }
 
+# dispatcher to calculate survival
 .calc_survival <- function(mat, time, status, covar = NULL,
     penalized = TRUE, alpha = 0.9, nfolds = 10,
     lambda = "lambda.1se", nvar = NULL,
-    coef_threshold = 0, ...) 
+    coef.threshold = 0, ...) 
 {
-    y <- survival::Surv(time, status)
-    
     if (penalized) {
         ## --- penalized Cox with glmnet ---
-        if (is.null(covar)) {
-            cvfit <- glmnet::cv.glmnet(
-                x = mat,
-                y = y,
-                family = "cox",
-                type.measure = "C",
-                alpha = alpha,
-                nfolds = nfolds,
-                keep = TRUE
-            )
-            offset <- NULL
-        } else {
-            ## fit Cox model for covariates -> use offset
-            df0 <- data.frame(time = time, status = status, covar)
-            model0 <- survival::coxph(Surv(time, status) ~ ., data = df0)
-            offset <- predict(model0)
-            cvfit <- glmnet::cv.glmnet(
-                x = mat,
-                y = y,
-                family = "cox",
-                type.measure = "C",
-                alpha = alpha,
-                nfolds = nfolds,
-                keep = TRUE,
-                offset = offset
-            )
-        }
-        
-        ## --- nvar cutoff handling ---
-        if (!is.null(nvar)) {
-            rowlasso <- max(which(cvfit$glmnet.fit$df <= nvar))
-            lambda <- cvfit$glmnet.fit$lambda[rowlasso]
-        }
-        
-        ## --- lambda selection ---
-        if (is.character(lambda)) {
-            if (lambda == "lambda.1se") lambda.value <- cvfit$lambda.1se
-            if (lambda == "lambda.min") lambda.value <- cvfit$lambda.min
-        } else {
-            lambda.value <- lambda
-        }
-        
-        ## --- coefficients ---
-        coefs <- as.vector(coef(cvfit, s = lambda.value))
-        names(coefs) <- rownames(coef(cvfit, s = lambda.value))
-        selected <- which(abs(coefs) > coef_threshold)
-        coef.filtered <- coefs[selected]
-        
-        ## normalize coefficients
-        if (length(coef.filtered) > 0) {
-            coef.filtered <- 2 * coef.filtered / sum(abs(coef.filtered))
-        }
-        
-        ## --- predictions ---
-        if (is.null(offset)) {
-            risk.scores <- as.numeric(predict(cvfit, mat, s = lambda.value))
-        } else {
-            risk.scores <- as.numeric(predict(cvfit, mat, s = lambda.value,
-                                              newoffset = offset))
-        }
-        
-        ## --- apparent C-index ---
-        if (length(coef.filtered) == 0) {
-            Cindex_signature <- 0.5
-        } else {
-            Cindex_signature <- glmnet::Cindex(pred = risk.scores, y)
-        }
-        
-        ## --- CV stats at chosen lambda ---
-        idrow <- max(which(cvfit$glmnet.fit$lambda >= lambda.value))
-        mcvCindex <- cvfit$cvm[idrow]
-        sdcvCindex <- cvfit$cvsd[idrow]
-        
-        out <- list(
-            coefficients = coef.filtered,
-            selected.features = names(coef.filtered),
-            risk.scores = risk.scores,
-            cindex.apparent = Cindex_signature,
-            cv.cindex.mean = mcvCindex,
-            cv.cindex.sd = sdcvCindex,
-            fit = cvfit
+        .fit_penalized_cox(
+            mat, time, status, covar,
+            alpha, nfolds, nvar, lambda, coef.threshold
         )
-        
     } else {
         ## --- standard CoxPH ---
-        df <- data.frame(time = time, status = status, mat, covar)
-        formula <- as.formula(paste("Surv(time, status) ~ ."))
-        fit <- survival::coxph(formula, data = df)
-        
-        ## raw coefficients
-        coefs <- coef(fit)
-        
-        ## apply coef_threshold
-        selected <- which(abs(coefs) > coef_threshold)
-        coef.filtered <- coefs[selected]
-        
-        ## normalize
-        if (length(coef.filtered) > 0) {
-            coef.filtered <- 2 * coef.filtered / sum(abs(coef.filtered))
-        }
-        
-        risk.scores <- predict(fit, type = "lp")
-        
-        ## apparent C-index
-        if (length(coef.filtered) == 0) {
-            Cindex_signature <- 0.5
-        } else {
-            Cindex_signature <- summary(fit)$concordance[1]
-        }
-        
-        out <- list(
-            coefficients = coef.filtered,
-            selected.features = names(coef.filtered),
-            risk.scores = as.numeric(risk.scores),
-            cindex.apparent = Cindex_signature,
-            cv.cindex.mean = NULL,
-            cv.cindex.sd = NULL,
-            fit = fit
-        )
+        .fit_standard_cox(mat, time, status, covar, coef.threshold)
+    }
+}
+
+#' @importFrom survival Surv coxph
+#' @importFrom glmnet cv.glmnet glmnet
+# Penalized cox function
+.fit_penalized_cox <- function(mat, time, status, covar,
+    alpha, nfolds, nvar, lambda, coef.threshold) 
+{
+    y <- Surv(time, status)
+    
+    ## fit Cox model for covariates -> use offset
+    offset <- NULL
+    if (!is.null(covar)) {
+        df0 <- data.frame(time = time, status = status, covar)
+        model0 <- coxph(y ~ ., data = df0)
+        offset <- predict(model0, type = "lp")
     }
     
-    return(out)
+    cvfit <- cv.glmnet(
+        x = mat, y = y, family = "cox", type.measure = "C",
+        alpha = alpha, nfolds = nfolds, keep = TRUE,
+        offset = offset
+    )
+    ## --- lambda selection ---
+    lambda.value <- .lambda_selector(cvfit, lambda, nvar)
+    
+    ## --- coefficients ---
+    coefs <- as.vector(coef(cvfit, s = lambda.value))
+    names(coefs) <- rownames(coef(cvfit, s = lambda.value))
+    coef.res <- .filter_and_normalize_coefs(coefs, coef.threshold)
+    
+    ## --- predictions ---
+    risk_scores <- as.numeric(
+        predict(cvfit, mat, s = lambda.value, newoffset = offset)
+    )
+    ## --- apparent C-index ---
+    Cindex_signature <- .compute_cindex(risk_scores, y, penalized = TRUE)
+    ## --- CV stats at chosen lambda ---
+    idrow <- max(which(cvfit$glmnet.fit$lambda >= lambda.value))
+    
+    list(
+        coefficients = coef.res$coefficients,
+        selected.features = coef.res$selected.features,
+        risk_scores = risk_scores,
+        cindex.apparent = Cindex_signature,
+        cv.cindex.mean = cvfit$cvm[idrow],
+        cv.cindex.sd = cvfit$cvsd[idrow],
+        fit = cvfit
+    )
+}
+
+# Standard cox function
+.fit_standard_cox <- function(mat, time, status, covar, coef.threshold) {
+    ## --- standard CoxPH ---
+    y <- Surv(time, status)
+    df <- data.frame(time = time, status = status, mat, covar)
+    fit <- coxph(y ~ ., data = df)
+    
+    ## raw coefficients
+    coefs <- coef(fit)
+    ## normalize
+    coef.res <- .filter_and_normalize_coefs(coefs, coef.threshold)
+    ## predictions
+    risk_scores <- predict(fit, type = "lp")
+    ## apparent C-index
+    Cindex_signature <- .compute_cindex(risk_scores, y, fit, penalized = FALSE)
+    
+    list(
+        coefficients = coef.res$coefficients,
+        selected.features = coef.res$selected.features,
+        risk_scores = as.numeric(risk_scores),
+        cindex.apparent = Cindex_signature,
+        cv.cindex.mean = NULL,
+        cv.cindex.sd = NULL,
+        fit = fit
+    )
+}
+
+.filter_and_normalize_coefs <- function(coefs, threshold) {
+    selected <- which(abs(coefs) > threshold)
+    coef.filtered <- coefs[selected]
+    if (length(coef.filtered) > 0) {
+        coef.filtered <- 2 * coef.filtered / sum(abs(coef.filtered))
+    }
+    list(
+        coefficients = coef.filtered,
+        selected.features = names(coef.filtered)
+    )
 }
+
+.compute_cindex <- function(risk_scores, y, fit = NULL, penalized = TRUE) {
+    if (length(risk_scores) == 0 || all(risk_scores == 0)) return(0.5)
+    if (penalized) {
+        glmnet::Cindex(pred = risk_scores, y)
+    } else {
+        summary(fit)$concordance[1]
+    }
+}
+
+.lambda_selector <- function(cvfit, lambda, nvar = NULL) {
+    ## --- nvar cutoff handling ---
+    if (!is.null(nvar)) {
+        valid <- which(cvfit$glmnet.fit$df <= nvar)
+        if (length(valid) > 0) lambda <- cvfit$glmnet.fit$lambda[max(valid)]
+    }
+    ## --- lambda selection ---
+    if (is.character(lambda)) {
+        switch(lambda,
+            "lambda.1se" = cvfit$lambda.1se,
+            "lambda.min" = cvfit$lambda.min
+        )
+    } else lambda
+}
\ No newline at end of file
diff --git a/man/getSurvival.Rd b/man/getSurvival.Rd
index daffa53..2ec0ca6 100644
--- a/man/getSurvival.Rd
+++ b/man/getSurvival.Rd
@@ -2,17 +2,23 @@
 % Please edit documentation in R/AllGenerics.R, R/getSurvival.R
 \name{getSurvival}
 \alias{getSurvival}
+\alias{addSurvival}
+\alias{addSurvival,SummarizedExperiment-method}
 \alias{getSurvival,SummarizedExperiment-method}
 \title{Survival analysis}
 \usage{
 getSurvival(x, ...)
 
+addSurvival(x, ...)
+
+\S4method{addSurvival}{SummarizedExperiment}(x, time.col, status.col, name = "survival", ...)
+
 \S4method{getSurvival}{SummarizedExperiment}(
   x,
   time.col,
   status.col,
   assay.type = "counts",
-  covar.cols = NULL,
+  col.var = NULL,
   ...
 )
 }
@@ -40,7 +46,7 @@ folds for \code{cv.glmnet}. (Default: \code{10})
 \item \code{nvar}: \code{Integer scalar}. Optional. Maximum number of
 variables (log-ratios) to include in the model. (Default: \code{NULL})
 
-\item \code{coef_threshold}: \code{Numeric scalar}. Minimum absolute value
+\item \code{coef.threshold}: \code{Numeric scalar}. Minimum absolute value
 for a coefficient to be included in the final model. (Default: \code{0})
 }}
 
@@ -51,10 +57,14 @@ representing time to event or follow-up time. Must be numeric.}
 representing event occurrence. Accepts numeric (0/1) or logical (FALSE/TRUE)
 values.}
 
+\item{name}{\code{Character vector}. Specifies a column name for storing
+divergence results.
+(Default: \code{c("divergence", "time_diff", "ref_samples")})}
+
 \item{assay.type}{\code{Character scalar}. Specifies which assay values are
 used in the dissimilarity estimation. (Default: \code{"counts"})}
 
-\item{covar.cols}{\code{Character vector}. Optional. Specifies covariate
+\item{col.var}{\code{Character vector}. Optional. Specifies covariate
 columns in \code{colData(x)} to adjust for in the survival model.
 (Default: \code{NULL})}
 }
@@ -63,7 +73,7 @@ A list with model summaries:
 \itemize{
 \item \code{coefficients}: estimated model coefficients
 \item \code{selected.features}: nonzero features in penalized model
-\item \code{risk.scores}: predicted risk scores
+\item \code{risk_scores}: predicted risk scores
 \item \code{cindex.apparent}: apparent concordance index
 \item \code{cv.cindex.mean}: mean cross-validated C-index (if penalized)
 \item \code{cv.cindex.sd}: SD of cross-validated C-index (if penalized)
@@ -81,7 +91,7 @@ model.
 # data(SurvivalData)
 # tse <- SurvivalData
 # getSurvival(tse, time.col = "T1Dweek", status.col = "T1D", 
-# covar.cols = c("Sex", "Antibiotics"))
+# col.var = c("Sex", "Antibiotics"))
 
 }
 \references{

From 1492bb15acffa8ae02439520ffc2e36682c98099 Mon Sep 17 00:00:00 2001
From: Tuomas Borman <tvborm@utu.fi>
Date: Wed, 10 Sep 2025 16:29:03 +0300
Subject: [PATCH 6/8] up

---
 NAMESPACE          |   1 +
 R/getSurvival.R    | 422 ++++++++++++++++++++++++++-------------------
 man/getSurvival.Rd |  31 ++--
 3 files changed, 261 insertions(+), 193 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index be97c72..7775cb9 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -48,6 +48,7 @@ importFrom(dplyr,summarize)
 importFrom(dplyr,sym)
 importFrom(dplyr,ungroup)
 importFrom(dplyr,vars)
+importFrom(glmnet,Cindex)
 importFrom(glmnet,cv.glmnet)
 importFrom(glmnet,glmnet)
 importFrom(stats,coef)
diff --git a/R/getSurvival.R b/R/getSurvival.R
index 05b4456..caedcb8 100644
--- a/R/getSurvival.R
+++ b/R/getSurvival.R
@@ -1,79 +1,83 @@
 #' @name
 #' getSurvival
-#' 
+#'
 #' @export
-#' 
+#'
 #' @title
 #' Survival analysis
-#' 
+#'
 #' @description
-#' Fit a (penalized) Cox proportional hazards model on microbiome data contained 
-#' in a SummarizedExperiment object. 
-#' Data transformations (e.g. pairwise log-ratios) should be handled upstream 
-#' (e.g. with mia::transformAssay). This function focuses on the statistical 
-#' model.
+#' Fit a (penalized) Cox proportional hazards model on microbiome data contained
+#' in a SummarizedExperiment object. Data transformations (e.g. pairwise
+#' log-ratios) should be handled upstream (e.g. with mia::transformAssay).
 #'
-#' @param time.col \code{Character scalar}. Column name in \code{colData(x)} 
+#' @param time.col \code{Character scalar}. Column name in \code{colData(x)}
 #' representing time to event or follow-up time. Must be numeric.
 #'
-#' @param status.col \code{Character scalar}. Column name in \code{colData(x)} 
-#' representing event occurrence. Accepts numeric (0/1) or logical (FALSE/TRUE) 
-#' values.
+#' @param status.col \code{Character scalar}. Column name in \code{colData(x)}
+#' representing event occurrence. Accepts numeric (\code{0}/\code{1}) or
+#' logical (\code{TRUE}/\code{FALSE}) values.
 #'
-#' @param col.var \code{Character vector}. Optional. Specifies covariate 
-#' columns in \code{colData(x)} to adjust for in the survival model. 
+#' @param col.var \code{Character vector}. Optional. Specifies covariate
+#' columns in \code{colData(x)} to adjust for in the survival model.
 #' (Default: \code{NULL})
-#' 
+#'
 #' @param ... additional arguments.
 #' \itemize{
-#'   \item \code{penalized}: \code{Logical}. If TRUE, fit penalized Cox 
-#'   regression using \code{glmnet}. If FALSE, fit standard Cox model using 
-#'   \code{survival::coxph}. (Default: \code{TRUE})
+#'   \item \code{penalized}: \code{Logical}. If \code{TRUE}, fit penalized Cox
+#'   regression using \code{glmnet}. If \code{FALSE}, fit standard Cox model
+#'   using \code{survival::coxph}. (Default: \code{TRUE})
 #'
-#'   \item \code{lambda}: \code{Character or numeric}. Penalization parameter 
-#'   passed to \code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"}, 
+#'   \item \code{lambda}: \code{Character or numeric}. Penalization parameter
+#'   passed to \code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"},
 #'   \code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"})
 #'
-#'   \item \code{alpha}: \code{Numeric scalar}. Elastic net mixing parameter: 
-#'   1 = Lasso, 0 = Ridge. (Default: \code{0.9})
+#'   \item \code{alpha}: \code{Numeric scalar}. Elastic net mixing parameter
+#'   that controls the balance between Lasso and Ridge regression:
+#'   \code{alpha = 1} corresponds to Lasso,
+#'   \code{alpha = 0} corresponds to Ridge.
+#'   Values between 0 and 1 specify a combination of the two.
+#'   (Default: \code{0.9})
 #'
-#'   \item \code{nfolds}: \code{Integer scalar}. Number of cross-validation 
+#'   \item \code{nfolds}: \code{Integer scalar}. Number of cross-validation
 #'   folds for \code{cv.glmnet}. (Default: \code{10})
-#' 
-#'   \item \code{nvar}: \code{Integer scalar}. Optional. Maximum number of 
+#'
+#'   \item \code{nvar}: \code{Integer scalar}. Optional. Maximum number of
 #'   variables (log-ratios) to include in the model. (Default: \code{NULL})
-#' 
-#'   \item \code{coef.threshold}: \code{Numeric scalar}. Minimum absolute value 
+#'
+#'   \item \code{coef.threshold}: \code{Numeric scalar}. Minimum absolute value
 #'   for a coefficient to be included in the final model. (Default: \code{0})
-#' }  
-#' 
+#' }
+#'
 #' @inheritParams addBaselineDivergence
 #'
 #' @return A list with model summaries:
 #' \itemize{
 #'   \item \code{coefficients}: estimated model coefficients
-#'   \item \code{selected.features}: nonzero features in penalized model
 #'   \item \code{risk_scores}: predicted risk scores
-#'   \item \code{cindex.apparent}: apparent concordance index
-#'   \item \code{cv.cindex.mean}: mean cross-validated C-index (if penalized)
-#'   \item \code{cv.cindex.sd}: SD of cross-validated C-index (if penalized)
+#'   \item \code{c_index}: apparent concordance index
+#'   \item \code{c_index_cv_mean}: mean cross-validated C-index (if penalized)
+#'   \item \code{c_index_cv_sd}: SD of cross-validated C-index (if penalized)
 #'   \item \code{fit}: fitted model object (coxph or cv.glmnet)
 #' }
 #'
-#' @seealso \code{\link[coda4microbiome]{coda_coxnet}}, 
+#' @seealso \code{\link[coda4microbiome]{coda_coxnet}},
 #' \code{\link[glmnet]{cv.glmnet}}
-#' 
+#'
 #' @references
-#' Meritxell Pujolassos , Antoni Susín , M.Luz Calle (2024). 
+#' Meritxell Pujolassos , Antoni Susín , M.Luz Calle (2024).
 #' \emph{Microbiome compositional data analysis for survival studies }
-#' NAR Genomics and Bioinformatics, 6(2), lqae038. 
+#' NAR Genomics and Bioinformatics, 6(2), lqae038.
 #' \doi{10.1093/nargab/lqae038}
 #'
 #' @examples
-#' # data(SurvivalData)
-#' # tse <- SurvivalData
-#' # getSurvival(tse, time.col = "T1Dweek", status.col = "T1D", 
-#' # col.var = c("Sex", "Antibiotics"))
+#' data(crohn_survival)
+#' tse <- crohn_survival
+#' tse <- transformAssay(tse, method = "relabundance")
+#' fit <- getSurvival(
+#'     tse, assay.type = "relabundance",
+#'     time.col = "event_time", status.col = "event"
+#' )
 #'
 NULL
 
@@ -83,11 +87,13 @@ setMethod("addSurvival", signature = c(x = "SummarizedExperiment"),
     function(x, time.col, status.col, name = "survival", ...){
         .check_input(name, "character scalar")
         x <- .check_and_get_altExp(x, ...)
+        # Run analysis
         args <- c(
             list(x = x, time.col = time.col, status.col = status.col,
-                    name = name),
-            list(...))
-        res <- do.call(getSurvival, args) |> as.list()
+                name = name),
+            list(...)[!names(list(...)) %in% c("altexp")])
+        res <- do.call(getSurvival, args)
+        # Add results to metadata
         x <- .add_values_to_metadata(x, name, res, ...)
         return(x)
     }
@@ -98,194 +104,248 @@ setMethod("addSurvival", signature = c(x = "SummarizedExperiment"),
 setMethod("getSurvival", signature(x = "SummarizedExperiment"),
     function(x, time.col, status.col, assay.type = "counts", col.var = NULL,
         ...){
-        # input checks
-        x <- .check_data_for_survival(x, time.col, 
-            status.col, col.var, assay.type, ...)
-        
-        # extract data
-        dat <- .get_data_for_survival(x, time.col, 
-            status.col, col.var, assay.type)
-        
-        # fit survival model
-        res <- .calc_survival(
-            mat = dat$mat,
-            time = dat$time,
-            status = dat$status,
-            covar = dat$covar,
-            ...
-        )
-              
+        # Input checks
+        x <- .check_data_for_survival(
+            x, time.col, status.col, col.var, assay.type, ...)
+        # Extract data
+        args <- .get_data_for_survival(
+            x, time.col, status.col, col.var, assay.type)
+        args <- c(args, list(...))
+        # Fit survival model
+        res <- do.call(.calc_survival, args)
         return(res)
     }
 )
 
-############################# internal helpers #################################
+############################# Internal helpers #################################
 
-.check_data_for_survival <- function(x, time.col, 
-    status.col, col.var, assay.type, ...) 
-{
+# Check input validity for survival analysis
+#
+# Ensures that the required columns for survival analysis are present in
+# colData, are of the correct type, and that the requested assay exists.
+.check_data_for_survival <- function(
+        x, time.col, status.col, col.var, assay.type, ...){
+    # Ensure we are working with the correct alternative experiment
     x <- .check_and_get_altExp(x, ...)
+    # Check that 'time.col' exists in colData and is a character scalar
     .check_input(time.col, list("character scalar"), colnames(colData(x)))
+    # Check that 'status.col' exists in colData and is a character scalar
     .check_input(status.col, list("character scalar"), colnames(colData(x)))
+    # Check that the requested assay is present in the object
     .check_assay_present(assay.type, x)
-    
-    if (!is.numeric(x[[time.col]])) {
-        stop("'time.col' must be numeric", call. = FALSE)
+    # Verify that the time column contains numeric values
+    if( !is.numeric(x[[time.col]]) ){
+        stop("'time.col' must be numeric.", call. = FALSE)
     }
-    if (!is.logical(x[[status.col]]) && !is.numeric(x[[status.col]])) {
-        stop("'status.col' must be numeric (0/1) or logical", call. = FALSE)
+    # Verify that the status column is either logical or numeric (0/1)
+    if( !is.logical(x[[status.col]]) && !is.numeric(x[[status.col]]) ){
+        stop("'status.col' must be numeric (0/1) or logical.", call. = FALSE)
     }
-    if (!is.null(col.var)) {
+    # If a grouping variable is provided, check it exists in colData
+    if( !is.null(col.var) ){
         .check_input(col.var, list("character vector"), colnames(colData(x)))
     }
-    
     return(x)
 }
 
-.get_data_for_survival <- function(x, time.col, 
-    status.col, col.var, assay.type) 
-{
-    mat <- t(assay(x, assay.type))
-    time <- as.numeric(x[[time.col]])
-    status <- as.numeric(x[[status.col]])
-    
-    covar <- NULL
-    if (!is.null(col.var))
-        covar <- as.data.frame(colData(x)[, col.var, drop = FALSE])
-    list(
+# Extract and prepare data for survival analysis
+#
+# Retrieves the assay matrix, survival time, survival status,
+# and optional covariates from the input object,
+# formatted for downstream survival models.
+.get_data_for_survival <- function(
+        x, time.col, status.col, col.var, assay.type){
+    # Extract assay data (samples as rows, features as columns)
+    mat <- assay(x, assay.type) |> t()
+    # Extract survival time column and ensure numeric
+    time <- x[[time.col]] |> as.numeric()
+    # Extract survival status column and ensure numeric (0/1)
+    status <- x[[status.col]] |> as.numeric()
+    # Extract optional covariates from colData
+    if( !is.null(col.var) ){
+        col.var <- colData(x)[, col.var, drop = FALSE] |> as.data.frame()
+    }
+    # Return prepared components in a list
+    res <- list(
         mat = mat,
         time = time,
         status = status,
-        covar = covar
+        col.var = col.var
     )
+    return(res)
 }
 
-# dispatcher to calculate survival
-.calc_survival <- function(mat, time, status, covar = NULL,
-    penalized = TRUE, alpha = 0.9, nfolds = 10,
-    lambda = "lambda.1se", nvar = NULL,
-    coef.threshold = 0, ...) 
-{
-    if (penalized) {
-        ## --- penalized Cox with glmnet ---
-        .fit_penalized_cox(
-            mat, time, status, covar,
-            alpha, nfolds, nvar, lambda, coef.threshold
-        )
-    } else {
-        ## --- standard CoxPH ---
-        .fit_standard_cox(mat, time, status, covar, coef.threshold)
+
+# Dispatcher to fit a survival model
+#
+# Chooses between a penalized Cox model (via glmnet) and a standard Cox
+# proportional hazards model, based on the `penalized` argument.
+.calc_survival <- function(mat, time, status, col.var, penalized = TRUE, ...){
+    if( !.is_a_bool(penalized) ){
+        stop("'penalized' must be TRUE or FALSE.", call. = FALSE)
     }
+    FUN <- if( penalized ) .fit_penalized_cox else .fit_standard_cox
+    res <- FUN(mat = mat, time = time, status = status, col.var = col.var, ...)
+    return(res)
 }
 
+# Fit a penalized Cox proportional hazards model using glmnet
+#
+# Performs cross-validated elastic net penalized Cox regression. Optional
+# covariates
+# can be included via an offset.
 #' @importFrom survival Surv coxph
 #' @importFrom glmnet cv.glmnet glmnet
-# Penalized cox function
-.fit_penalized_cox <- function(mat, time, status, covar,
-    alpha, nfolds, nvar, lambda, coef.threshold) 
-{
+.fit_penalized_cox <- function(
+        mat, time, status, col.var, alpha = 0.9, nfolds = 10,
+        lambda = "lambda.1se", ...){
+    # Create survival response object as glmnet requires a Surv object for Cox
+    # regression
     y <- Surv(time, status)
-    
-    ## fit Cox model for covariates -> use offset
+
+    # Fit Cox model for covariates (if provided) and use as offset. This is done
+    # to estimate how microbiome profile improves the prediction compared to
+    # conventional clinical variables.
     offset <- NULL
-    if (!is.null(covar)) {
-        df0 <- data.frame(time = time, status = status, covar)
-        model0 <- coxph(y ~ ., data = df0)
-        offset <- predict(model0, type = "lp")
+    if( !is.null(col.var) ){
+        # Fit standard Cox on covariates only
+        df_covar <- data.frame(time = time, status = status, col.var)
+        model_covar <- coxph(y ~ ., data = df_covar)
+        # Compute the linear predictor (log-hazard ratio) from the
+        # covariate-only Cox model.
+        offset <- predict(model_covar, type = "lp")
     }
-    
-    cvfit <- cv.glmnet(
+
+    # Fit penalized Cox model with cross-validation. Cross-validation identifies
+    # the optimal penalty (lambda) while controlling overfitting. This model
+    # uses microbial profile as predictor. If covariates where specified, the
+    # model is built on top of the covariate-only model, i.e., to assess how
+    # much microbes improve the prediction.
+    fit <- cv.glmnet(
         x = mat, y = y, family = "cox", type.measure = "C",
         alpha = alpha, nfolds = nfolds, keep = TRUE,
         offset = offset
     )
-    ## --- lambda selection ---
-    lambda.value <- .lambda_selector(cvfit, lambda, nvar)
-    
-    ## --- coefficients ---
-    coefs <- as.vector(coef(cvfit, s = lambda.value))
-    names(coefs) <- rownames(coef(cvfit, s = lambda.value))
-    coef.res <- .filter_and_normalize_coefs(coefs, coef.threshold)
-    
-    ## --- predictions ---
-    risk_scores <- as.numeric(
-        predict(cvfit, mat, s = lambda.value, newoffset = offset)
-    )
-    ## --- apparent C-index ---
-    Cindex_signature <- .compute_cindex(risk_scores, y, penalized = TRUE)
-    ## --- CV stats at chosen lambda ---
-    idrow <- max(which(cvfit$glmnet.fit$lambda >= lambda.value))
-    
-    list(
-        coefficients = coef.res$coefficients,
-        selected.features = coef.res$selected.features,
+
+    # Select lambda value based on user input and optional nvar constraint to
+    # allow selecting lambda that balances model sparsity and predictive
+    # performance
+    lambda_value <- .lambda_selector(fit, lambda, ...)
+    # Extract coefficients at chosen lambda
+    coefs <- coef(fit, s = lambda_value)
+    coefs <- setNames(as.vector(coefs), rownames(coefs))
+    coefs <- .filter_and_normalize_coefs(coefs, ...)
+
+    # Compute risk scores for all samples
+    risk_scores <- predict(fit, mat, s = lambda_value, newoffset = offset) |>
+        as.numeric()
+    # Compute concordance index
+    c_index <- .compute_c_index(risk_scores, y, penalized = TRUE)
+
+    # Identify row in CV results corresponding to chosen lambda
+    id_row <- which(fit[["glmnet.fit"]][["lambda"]] >= lambda_value) |> max()
+
+    # Return all results as a list
+    res <- list(
+        coefficients = coefs,
         risk_scores = risk_scores,
-        cindex.apparent = Cindex_signature,
-        cv.cindex.mean = cvfit$cvm[idrow],
-        cv.cindex.sd = cvfit$cvsd[idrow],
-        fit = cvfit
+        c_index = c_index,
+        c_index_cv_mean = fit[["cvm"]][[id_row]],
+        c_index_cv_sd = fit[["cvsd"]][[id_row]],,
+        fit = fit
     )
+    return(res)
 }
 
-# Standard cox function
-.fit_standard_cox <- function(mat, time, status, covar, coef.threshold) {
-    ## --- standard CoxPH ---
+# Fit a standard Cox proportional hazards model
+#
+# Performs a standard (unpenalized) Cox regression on features and optional
+# covariates. This is used when no penalization is desired.
+.fit_standard_cox <- function(mat, time, status, col.var, ...){
+    # Create survival response object as it is required input format for coxph
     y <- Surv(time, status)
-    df <- data.frame(time = time, status = status, mat, covar)
+    # Combine survival times, status, features, and optional covariates into one
+    # data frame
+    df <- data.frame(time = time, status = status, mat, col.var)
+    # Fit standard Cox proportional hazards model
     fit <- coxph(y ~ ., data = df)
-    
-    ## raw coefficients
+
+    # Extract raw coefficients
     coefs <- coef(fit)
-    ## normalize
-    coef.res <- .filter_and_normalize_coefs(coefs, coef.threshold)
-    ## predictions
+    # Filter and normalize coefficients
+    coefs <- .filter_and_normalize_coefs(coefs, ...)
+    #  Compute risk scores for all samples
     risk_scores <- predict(fit, type = "lp")
-    ## apparent C-index
-    Cindex_signature <- .compute_cindex(risk_scores, y, fit, penalized = FALSE)
-    
-    list(
-        coefficients = coef.res$coefficients,
-        selected.features = coef.res$selected.features,
+    #  Compute apparent concordance index
+    c_index <- .compute_c_index(risk_scores, y, fit, penalized = FALSE)
+
+    # Return results as a list
+    res <- list(
+        coefficients = coefs,
         risk_scores = as.numeric(risk_scores),
-        cindex.apparent = Cindex_signature,
-        cv.cindex.mean = NULL,
-        cv.cindex.sd = NULL,
+        c_index = c_index,
         fit = fit
     )
+    return(res)
 }
 
-.filter_and_normalize_coefs <- function(coefs, threshold) {
-    selected <- which(abs(coefs) > threshold)
-    coef.filtered <- coefs[selected]
-    if (length(coef.filtered) > 0) {
-        coef.filtered <- 2 * coef.filtered / sum(abs(coef.filtered))
+# Filter and normalize coefficients
+#
+# Removes coefficients with magnitude below a threshold and rescales the
+# remaining coefficients.
+.filter_and_normalize_coefs <- function(coefs, coef.threshold = 0, ...) {
+    # Identify coefficients exceeding the threshold in absolute value
+    # to remove very small coefficients so that we can focus on meaningful
+    # predictors
+    res <- coefs[ abs(coefs) > coef.threshold ]
+    # Rescale the remaining coefficients
+    if( length(res) > 0L ){
+        res <- 2*res / sum(abs(res))
     }
-    list(
-        coefficients = coef.filtered,
-        selected.features = names(coef.filtered)
-    )
+    return(res)
 }
 
-.compute_cindex <- function(risk_scores, y, fit = NULL, penalized = TRUE) {
-    if (length(risk_scores) == 0 || all(risk_scores == 0)) return(0.5)
-    if (penalized) {
-        glmnet::Cindex(pred = risk_scores, y)
-    } else {
-        summary(fit)$concordance[1]
+# Compute concordance index (C-index) for survival predictions
+#
+# Evaluates the discriminative ability of a survival model:
+# how well the predicted risk scores rank patients according to observed
+# survival.
+#' @importFrom glmnet Cindex
+.compute_c_index <- function(risk_scores, y, fit = NULL, penalized = TRUE){
+    res <- NA
+    if( !(length(risk_scores) == 0 || all(risk_scores == 0)) ){
+        if( penalized ){
+            #  Penalized Cox: use glmnet's Cindex function
+            res <- Cindex(pred = risk_scores, y)
+        } else {
+            # Standard Cox: extract C-index from the model summary
+            res <- summary(fit)[["concordance"]][[1L]]
+        }
     }
+    return(res)
 }
 
-.lambda_selector <- function(cvfit, lambda, nvar = NULL) {
-    ## --- nvar cutoff handling ---
-    if (!is.null(nvar)) {
-        valid <- which(cvfit$glmnet.fit$df <= nvar)
-        if (length(valid) > 0) lambda <- cvfit$glmnet.fit$lambda[max(valid)]
+# Select lambda value from a cross-validated glmnet fit
+#
+# Allows choosing lambda based on CV results or restricting the model to a
+# maximum number of variables.
+.lambda_selector <- function(fit, lambda, nvar = NULL, ...) {
+    # Handle nvar cutoff
+    if( !is.null(nvar) ){
+        # Identify lambda values with number of non-zero coefficients <= nvar.
+        valid <- which(fit[["glmnet.fit"]][["df"]] <= nvar)
+        # If such lambdas exist, choose the lambda for model that has
+        # highest number of taxa.
+        if( length(valid) > 0L ){
+            lambda <- fit[["glmnet.fit"]][max(valid), "lambda"]
+        }
+    }
+    # Handle lambda selection by name
+    if( is.character(lambda) ){
+        # Convert character string to actual numeric lambda value from fit
+        # Why: "lambda.min" or "lambda.1se" are stored inside fit; this
+        # retrieves the correct numeric value
+        lambda <- fit[[lambda]]
     }
-    ## --- lambda selection ---
-    if (is.character(lambda)) {
-        switch(lambda,
-            "lambda.1se" = cvfit$lambda.1se,
-            "lambda.min" = cvfit$lambda.min
-        )
-    } else lambda
-}
\ No newline at end of file
+    return(lambda)
+}
diff --git a/man/getSurvival.Rd b/man/getSurvival.Rd
index 2ec0ca6..0ee7dac 100644
--- a/man/getSurvival.Rd
+++ b/man/getSurvival.Rd
@@ -29,16 +29,20 @@ object.}
 
 \item{...}{additional arguments.
 \itemize{
-\item \code{penalized}: \code{Logical}. If TRUE, fit penalized Cox
-regression using \code{glmnet}. If FALSE, fit standard Cox model using
-\code{survival::coxph}. (Default: \code{TRUE})
+\item \code{penalized}: \code{Logical}. If \code{TRUE}, fit penalized Cox
+regression using \code{glmnet}. If \code{FALSE}, fit standard Cox model
+using \code{survival::coxph}. (Default: \code{TRUE})
 
 \item \code{lambda}: \code{Character or numeric}. Penalization parameter
 passed to \code{\link[glmnet]{cv.glmnet}}. Use \code{"lambda.1se"},
 \code{"lambda.min"}, or a numeric value. (Default: \code{"lambda.1se"})
 
-\item \code{alpha}: \code{Numeric scalar}. Elastic net mixing parameter:
-1 = Lasso, 0 = Ridge. (Default: \code{0.9})
+\item \code{alpha}: \code{Numeric scalar}. Elastic net mixing parameter
+that controls the balance between Lasso and Ridge regression:
+\code{alpha = 1} corresponds to Lasso,
+\code{alpha = 0} corresponds to Ridge.
+Values between 0 and 1 specify a combination of the two.
+(Default: \code{0.9})
 
 \item \code{nfolds}: \code{Integer scalar}. Number of cross-validation
 folds for \code{cv.glmnet}. (Default: \code{10})
@@ -54,8 +58,8 @@ for a coefficient to be included in the final model. (Default: \code{0})
 representing time to event or follow-up time. Must be numeric.}
 
 \item{status.col}{\code{Character scalar}. Column name in \code{colData(x)}
-representing event occurrence. Accepts numeric (0/1) or logical (FALSE/TRUE)
-values.}
+representing event occurrence. Accepts numeric (\code{0}/\code{1}) or
+logical (\code{TRUE}/\code{FALSE}) values.}
 
 \item{name}{\code{Character vector}. Specifies a column name for storing
 divergence results.
@@ -72,7 +76,7 @@ columns in \code{colData(x)} to adjust for in the survival model.
 A list with model summaries:
 \itemize{
 \item \code{coefficients}: estimated model coefficients
-\item \code{selected.features}: nonzero features in penalized model
+\item \code{selected_features}: nonzero features in penalized model
 \item \code{risk_scores}: predicted risk scores
 \item \code{cindex.apparent}: apparent concordance index
 \item \code{cv.cindex.mean}: mean cross-validated C-index (if penalized)
@@ -88,10 +92,13 @@ Data transformations (e.g. pairwise log-ratios) should be handled upstream
 model.
 }
 \examples{
-# data(SurvivalData)
-# tse <- SurvivalData
-# getSurvival(tse, time.col = "T1Dweek", status.col = "T1D", 
-# col.var = c("Sex", "Antibiotics"))
+data(crohn_survival)
+tse <- crohn_survival
+tse <- transformAssay(tse, method = "relabundance")
+fit <- getSurvival(
+    tse, assay.type = "relabundance",
+    time.col = "event_time", status.col = "event"
+)
 
 }
 \references{

From 9fc9a92bbb33bf082db4c6fc75de5fafc2a53c40 Mon Sep 17 00:00:00 2001
From: Tuomas Borman <tvborm@utu.fi>
Date: Wed, 10 Sep 2025 16:30:04 +0300
Subject: [PATCH 7/8] up

---
 R/getSurvival.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/getSurvival.R b/R/getSurvival.R
index caedcb8..8206b99 100644
--- a/R/getSurvival.R
+++ b/R/getSurvival.R
@@ -9,7 +9,8 @@
 #' @description
 #' Fit a (penalized) Cox proportional hazards model on microbiome data contained
 #' in a SummarizedExperiment object. Data transformations (e.g. pairwise
-#' log-ratios) should be handled upstream (e.g. with mia::transformAssay).
+#' log-ratios) should be handled upstream
+#' (e.g. with \code{mia::transformAssay()}).
 #'
 #' @param time.col \code{Character scalar}. Column name in \code{colData(x)}
 #' representing time to event or follow-up time. Must be numeric.

From a33c6d120c2dccafc729ea1beb91be38efe91998 Mon Sep 17 00:00:00 2001
From: Daena Rys <rysdaena8@gmail.com>
Date: Fri, 12 Sep 2025 14:21:20 +0300
Subject: [PATCH 8/8] add unit tests

Signed-off-by: Daena Rys <rysdaena8@gmail.com>
---
 R/getSurvival.R                   |  43 ++--
 man/getSurvival.Rd                |  31 +--
 tests/testthat/test-getSurvival.R | 345 ++++++++++++++++++++++++++++++
 3 files changed, 380 insertions(+), 39 deletions(-)
 create mode 100644 tests/testthat/test-getSurvival.R

diff --git a/R/getSurvival.R b/R/getSurvival.R
index 8206b99..422a1c7 100644
--- a/R/getSurvival.R
+++ b/R/getSurvival.R
@@ -15,7 +15,7 @@
 #' @param time.col \code{Character scalar}. Column name in \code{colData(x)}
 #' representing time to event or follow-up time. Must be numeric.
 #'
-#' @param status.col \code{Character scalar}. Column name in \code{colData(x)}
+#' @param event.col \code{Character scalar}. Column name in \code{colData(x)}
 #' representing event occurrence. Accepts numeric (\code{0}/\code{1}) or
 #' logical (\code{TRUE}/\code{FALSE}) values.
 #'
@@ -54,7 +54,7 @@
 #'
 #' @return A list with model summaries:
 #' \itemize{
-#'   \item \code{coefficients}: estimated model coefficients
+#'   \item \code{coef}: estimated model coefficients
 #'   \item \code{risk_scores}: predicted risk scores
 #'   \item \code{c_index}: apparent concordance index
 #'   \item \code{c_index_cv_mean}: mean cross-validated C-index (if penalized)
@@ -77,7 +77,7 @@
 #' tse <- transformAssay(tse, method = "relabundance")
 #' fit <- getSurvival(
 #'     tse, assay.type = "relabundance",
-#'     time.col = "event_time", status.col = "event"
+#'     time.col = "event_time", event.col = "event"
 #' )
 #'
 NULL
@@ -85,12 +85,12 @@ NULL
 #' @rdname getSurvival
 #' @export
 setMethod("addSurvival", signature = c(x = "SummarizedExperiment"),
-    function(x, time.col, status.col, name = "survival", ...){
+    function(x, time.col, event.col, name = "survival", ...){
         .check_input(name, "character scalar")
         x <- .check_and_get_altExp(x, ...)
         # Run analysis
         args <- c(
-            list(x = x, time.col = time.col, status.col = status.col,
+            list(x = x, time.col = time.col, event.col = event.col,
                 name = name),
             list(...)[!names(list(...)) %in% c("altexp")])
         res <- do.call(getSurvival, args)
@@ -103,14 +103,14 @@ setMethod("addSurvival", signature = c(x = "SummarizedExperiment"),
 #' @rdname getSurvival
 #' @export
 setMethod("getSurvival", signature(x = "SummarizedExperiment"),
-    function(x, time.col, status.col, assay.type = "counts", col.var = NULL,
+    function(x, time.col, event.col, assay.type = "counts", col.var = NULL,
         ...){
         # Input checks
         x <- .check_data_for_survival(
-            x, time.col, status.col, col.var, assay.type, ...)
+            x, time.col, event.col, col.var, assay.type, ...)
         # Extract data
         args <- .get_data_for_survival(
-            x, time.col, status.col, col.var, assay.type)
+            x, time.col, event.col, col.var, assay.type)
         args <- c(args, list(...))
         # Fit survival model
         res <- do.call(.calc_survival, args)
@@ -125,13 +125,13 @@ setMethod("getSurvival", signature(x = "SummarizedExperiment"),
 # Ensures that the required columns for survival analysis are present in
 # colData, are of the correct type, and that the requested assay exists.
 .check_data_for_survival <- function(
-        x, time.col, status.col, col.var, assay.type, ...){
+        x, time.col, event.col, col.var, assay.type, ...){
     # Ensure we are working with the correct alternative experiment
     x <- .check_and_get_altExp(x, ...)
     # Check that 'time.col' exists in colData and is a character scalar
     .check_input(time.col, list("character scalar"), colnames(colData(x)))
-    # Check that 'status.col' exists in colData and is a character scalar
-    .check_input(status.col, list("character scalar"), colnames(colData(x)))
+    # Check that 'event.col' exists in colData and is a character scalar
+    .check_input(event.col, list("character scalar"), colnames(colData(x)))
     # Check that the requested assay is present in the object
     .check_assay_present(assay.type, x)
     # Verify that the time column contains numeric values
@@ -139,8 +139,8 @@ setMethod("getSurvival", signature(x = "SummarizedExperiment"),
         stop("'time.col' must be numeric.", call. = FALSE)
     }
     # Verify that the status column is either logical or numeric (0/1)
-    if( !is.logical(x[[status.col]]) && !is.numeric(x[[status.col]]) ){
-        stop("'status.col' must be numeric (0/1) or logical.", call. = FALSE)
+    if( !is.logical(x[[event.col]]) && !is.numeric(x[[event.col]]) ){
+        stop("'event.col' must be numeric (0/1) or logical.", call. = FALSE)
     }
     # If a grouping variable is provided, check it exists in colData
     if( !is.null(col.var) ){
@@ -155,13 +155,13 @@ setMethod("getSurvival", signature(x = "SummarizedExperiment"),
 # and optional covariates from the input object,
 # formatted for downstream survival models.
 .get_data_for_survival <- function(
-        x, time.col, status.col, col.var, assay.type){
+        x, time.col, event.col, col.var, assay.type){
     # Extract assay data (samples as rows, features as columns)
     mat <- assay(x, assay.type) |> t()
     # Extract survival time column and ensure numeric
     time <- x[[time.col]] |> as.numeric()
     # Extract survival status column and ensure numeric (0/1)
-    status <- x[[status.col]] |> as.numeric()
+    status <- x[[event.col]] |> as.numeric()
     # Extract optional covariates from colData
     if( !is.null(col.var) ){
         col.var <- colData(x)[, col.var, drop = FALSE] |> as.data.frame()
@@ -248,11 +248,11 @@ setMethod("getSurvival", signature(x = "SummarizedExperiment"),
 
     # Return all results as a list
     res <- list(
-        coefficients = coefs,
+        coef = coefs,
         risk_scores = risk_scores,
         c_index = c_index,
         c_index_cv_mean = fit[["cvm"]][[id_row]],
-        c_index_cv_sd = fit[["cvsd"]][[id_row]],,
+        c_index_cv_sd = fit[["cvsd"]][[id_row]],
         fit = fit
     )
     return(res)
@@ -267,7 +267,12 @@ setMethod("getSurvival", signature(x = "SummarizedExperiment"),
     y <- Surv(time, status)
     # Combine survival times, status, features, and optional covariates into one
     # data frame
-    df <- data.frame(time = time, status = status, mat, col.var)
+    if( is.null(col.var) ){
+        df <- data.frame(time = time, status = status, mat)
+    } else {
+        df <- data.frame(time = time, status = status, mat, col.var)
+    }
+    
     # Fit standard Cox proportional hazards model
     fit <- coxph(y ~ ., data = df)
 
@@ -282,7 +287,7 @@ setMethod("getSurvival", signature(x = "SummarizedExperiment"),
 
     # Return results as a list
     res <- list(
-        coefficients = coefs,
+        coef = coefs,
         risk_scores = as.numeric(risk_scores),
         c_index = c_index,
         fit = fit
diff --git a/man/getSurvival.Rd b/man/getSurvival.Rd
index 0ee7dac..9eac72c 100644
--- a/man/getSurvival.Rd
+++ b/man/getSurvival.Rd
@@ -11,16 +11,9 @@ getSurvival(x, ...)
 
 addSurvival(x, ...)
 
-\S4method{addSurvival}{SummarizedExperiment}(x, time.col, status.col, name = "survival", ...)
+\S4method{addSurvival}{SummarizedExperiment}(x, time.col, event.col, name = "survival", ...)
 
-\S4method{getSurvival}{SummarizedExperiment}(
-  x,
-  time.col,
-  status.col,
-  assay.type = "counts",
-  col.var = NULL,
-  ...
-)
+\S4method{getSurvival}{SummarizedExperiment}(x, time.col, event.col, assay.type = "counts", col.var = NULL, ...)
 }
 \arguments{
 \item{x}{A
@@ -57,7 +50,7 @@ for a coefficient to be included in the final model. (Default: \code{0})
 \item{time.col}{\code{Character scalar}. Column name in \code{colData(x)}
 representing time to event or follow-up time. Must be numeric.}
 
-\item{status.col}{\code{Character scalar}. Column name in \code{colData(x)}
+\item{event.col}{\code{Character scalar}. Column name in \code{colData(x)}
 representing event occurrence. Accepts numeric (\code{0}/\code{1}) or
 logical (\code{TRUE}/\code{FALSE}) values.}
 
@@ -75,21 +68,19 @@ columns in \code{colData(x)} to adjust for in the survival model.
 \value{
 A list with model summaries:
 \itemize{
-\item \code{coefficients}: estimated model coefficients
-\item \code{selected_features}: nonzero features in penalized model
+\item \code{coef}: estimated model coefficients
 \item \code{risk_scores}: predicted risk scores
-\item \code{cindex.apparent}: apparent concordance index
-\item \code{cv.cindex.mean}: mean cross-validated C-index (if penalized)
-\item \code{cv.cindex.sd}: SD of cross-validated C-index (if penalized)
+\item \code{c_index}: apparent concordance index
+\item \code{c_index_cv_mean}: mean cross-validated C-index (if penalized)
+\item \code{c_index_cv_sd}: SD of cross-validated C-index (if penalized)
 \item \code{fit}: fitted model object (coxph or cv.glmnet)
 }
 }
 \description{
 Fit a (penalized) Cox proportional hazards model on microbiome data contained
-in a SummarizedExperiment object.
-Data transformations (e.g. pairwise log-ratios) should be handled upstream
-(e.g. with mia::transformAssay). This function focuses on the statistical
-model.
+in a SummarizedExperiment object. Data transformations (e.g. pairwise
+log-ratios) should be handled upstream
+(e.g. with \code{mia::transformAssay()}).
 }
 \examples{
 data(crohn_survival)
@@ -97,7 +88,7 @@ tse <- crohn_survival
 tse <- transformAssay(tse, method = "relabundance")
 fit <- getSurvival(
     tse, assay.type = "relabundance",
-    time.col = "event_time", status.col = "event"
+    time.col = "event_time", event.col = "event"
 )
 
 }
diff --git a/tests/testthat/test-getSurvival.R b/tests/testthat/test-getSurvival.R
new file mode 100644
index 0000000..b4bfccb
--- /dev/null
+++ b/tests/testthat/test-getSurvival.R
@@ -0,0 +1,345 @@
+# Check that input checks work
+test_that("getSurvival validates input", {
+    # Load and prepare data
+    data(crohn_survival)
+    tse <- crohn_survival
+    tse <- transformAssay(tse, method = "relabundance")
+    
+    # Test that function works with valid inputs
+    expect_no_error({
+        fit <- getSurvival(
+            tse, 
+            assay.type = "relabundance",
+            time.col = "event_time", 
+            event.col = "event"
+        )
+    })
+    
+    expect_error({
+        getSurvival(
+            tse, 
+            assay.type = "relabundance"
+        )
+    })
+    expect_error({
+        getSurvival(
+            tse
+        )
+    })
+    
+    # Test error with non-existent time column
+    expect_error({
+        getSurvival(
+            tse, 
+            assay.type = "relabundance",
+            time.col = "nonexistent_time", 
+            event.col = "event"
+        )
+    })
+    
+    expect_error({
+        getSurvival(
+            tse, 
+            assay.type = "relabundance",
+            event.col = "event"
+        )
+    })
+    
+    # Test error with non-existent event column
+    expect_error({
+        getSurvival(
+            tse, 
+            assay.type = "relabundance",
+            time.col = "event_time", 
+            event.col = "nonexistent_event"
+        )
+    })
+    
+    expect_error({
+        getSurvival(
+            tse, 
+            assay.type = "relabundance",
+            time.col = "event_time"
+        )
+    })
+    
+    # Test error with non-existent assay
+    expect_error({
+        getSurvival(
+            tse, 
+            assay.type = "nonexistent_assay",
+            time.col = "event_time", 
+            event.col = "event"
+        )
+    })
+})
+
+# Check that method works correctly
+test_that("getSurvival works correctly", {
+    # Load the example data
+    data(crohn_survival)
+    tse <- crohn_survival
+    
+    # Check that the data loaded correctly
+    expect_s4_class(tse, "TreeSummarizedExperiment")
+    
+    # Verify required columns exist
+    expect_true("event_time" %in% colnames(colData(tse)))
+    expect_true("event" %in% colnames(colData(tse)))
+    
+    # Transform to relative abundance as in example
+    tse <- transformAssay(tse, method = "relabundance")
+    
+    # Test basic getSurvival functionality
+    fit <- getSurvival(
+        tse, 
+        assay.type = "relabundance",
+        time.col = "event_time", 
+        event.col = "event"
+    )
+    
+    # Check that result is a list with expected components
+    expect_type(fit, "list")
+    expect_true(all(c("coef", "risk_scores", "c_index", "fit") %in% names(fit)))
+    
+    # Check coefficients
+    expect_type(fit$coef, "double")
+    expect_true(is.numeric(fit$coef))
+    
+    # Check risk scores match number of samples
+    expect_length(fit$risk_scores, ncol(tse))
+    expect_true(is.numeric(fit$risk_scores))
+    expect_true(all(is.finite(fit$risk_scores)))
+    
+    # Check C-index is valid
+    expect_true(is.numeric(fit$c_index))
+    expect_true(fit$c_index >= 0 && fit$c_index <= 1)
+    
+    # Check that fit object exists and is correct type
+    expect_true(!is.null(fit$fit))
+    expect_s3_class(fit$fit, "cv.glmnet")
+    
+    # Check cross-validation results are present (penalized = TRUE by default)
+    expect_true("c_index_cv_mean" %in% names(fit))
+    expect_true("c_index_cv_sd" %in% names(fit))
+    expect_true(is.numeric(fit$c_index_cv_mean))
+    expect_true(is.numeric(fit$c_index_cv_sd))
+})
+
+# Check that method handles different params passed in ...
+test_that("getSurvival handles different parameters", {
+    # Load and prepare data
+    data(crohn_survival)
+    tse <- crohn_survival
+    tse <- transformAssay(tse, method = "relabundance")
+    
+    # Test with different alpha values
+    fit_ridge <- getSurvival(
+        tse, 
+        assay.type = "relabundance",
+        time.col = "event_time", 
+        event.col = "event",
+        alpha = 0.1  # Ridge regression
+    )
+    
+    fit_lasso <- getSurvival(
+        tse, 
+        assay.type = "relabundance",
+        time.col = "event_time", 
+        event.col = "event", 
+        alpha = 1.0  # Lasso regression
+    )
+    
+    # Both should return valid results
+    expect_type(fit_ridge, "list")
+    expect_type(fit_lasso, "list")
+    
+    # Results should be different due to different regularization
+    expect_false(identical(fit_ridge$coef, fit_lasso$coef))
+    expect_false(identical(fit_ridge$risk_scores, fit_lasso$risk_scores))
+    
+    # Test with standard Cox regression (no penalization)
+    fit_standard <- getSurvival(
+        tse, 
+        assay.type = "relabundance",
+        time.col = "event_time", 
+        event.col = "event",
+        penalized = FALSE
+    )
+    
+    expect_type(fit_standard, "list")
+    expect_s3_class(fit_standard$fit, "coxph")
+    
+    # Standard model should not have CV results
+    expect_false("c_index_cv_mean" %in% names(fit_standard))
+    expect_false("c_index_cv_sd" %in% names(fit_standard))
+})
+
+# Check that method handles covariates
+test_that("getSurvival handles covariates", {
+    # Load and prepare data
+    data(crohn_survival)
+    tse <- crohn_survival
+    tse <- transformAssay(tse, method = "relabundance")
+    
+    set.seed(42)  # For reproducibility
+    
+    # Determine sample size
+    n <- ncol(tse)
+    
+    # Generate synthetic covariates
+    colData(tse)$age <- round(rnorm(n, mean = 40, sd = 12))  # ages, e.g. 20–70
+    colData(tse)$sex <- factor(sample(c("M", "F"), n, replace = TRUE))
+    
+    # Test with available covariates (use first available column)
+    fit_with_covs <- getSurvival(
+        tse, 
+        assay.type = "relabundance",
+        time.col = "event_time", 
+        event.col = "event",
+        col.var = c("age", "sex")
+    )
+    
+    # Test without covariates for comparison
+    fit_without_covs <- getSurvival(
+        tse, 
+        assay.type = "relabundance",
+        time.col = "event_time", 
+        event.col = "event"
+    )
+    
+    # Both should return valid results
+    expect_type(fit_with_covs, "list")
+    expect_type(fit_without_covs, "list")
+    
+    # Results should be different when covariates are included
+    expect_false(identical(fit_with_covs$risk_scores, fit_without_covs$risk_scores))
+    
+    # Both should have same structure
+    essential_components <- c("coef", "risk_scores", "c_index", "fit")
+    expect_true(all(essential_components %in% names(fit_with_covs)))
+    expect_true(all(essential_components %in% names(fit_without_covs)))
+})
+
+# Check that method handles coefficient filtering
+test_that("getSurvival handles coefficient filtering", {
+    # Load and prepare data
+    data(crohn_survival)
+    tse <- crohn_survival
+    tse <- transformAssay(tse, method = "relabundance")
+    
+    # Test with different coefficient thresholds
+    fit_no_filter <- getSurvival(
+        tse, 
+        assay.type = "relabundance",
+        time.col = "event_time", 
+        event.col = "event",
+        coef.threshold = 0  # No filtering
+    )
+    
+    fit_filtered <- getSurvival(
+        tse, 
+        assay.type = "relabundance",
+        time.col = "event_time", 
+        event.col = "event",
+        coef.threshold = 0.1  # Filter small coefficients
+    )
+    
+    # Filtered result should have fewer or equal coefficients
+    expect_true(length(fit_filtered$coef) <= length(fit_no_filter$coef))
+    
+    # All remaining coefficients should exceed the threshold
+    if(length(fit_filtered$coef) > 0) {
+        expect_true(all(abs(fit_filtered$coef) > 0.1))
+    }
+    
+    # Test with high threshold that might filter out all coefficients
+    fit_high_filter <- getSurvival(
+        tse, 
+        assay.type = "relabundance",
+        time.col = "event_time", 
+        event.col = "event",
+        coef.threshold = 10  # Very high threshold
+    )
+    
+    # Should still return valid structure even if no coefficients pass threshold
+    expect_type(fit_high_filter, "list")
+    expect_true("coef" %in% names(fit_high_filter))
+})
+
+# Check that method handles different transformations
+test_that("getSurvival handles different assay types", {
+    # Load data
+    data(crohn_survival)
+    tse <- crohn_survival
+    
+    # Test with original counts
+    fit_counts <- getSurvival(
+        tse, 
+        assay.type = "counts",
+        time.col = "event_time", 
+        event.col = "event"
+    )
+    
+    # Transform and test with relative abundances
+    tse <- transformAssay(tse, method = "relabundance")
+    fit_relabundance <- getSurvival(
+        tse, 
+        assay.type = "relabundance",
+        time.col = "event_time", 
+        event.col = "event"
+    )
+    
+    # Both should return valid results
+    expect_type(fit_counts, "list")
+    expect_type(fit_relabundance, "list")
+    
+    # Results should be different due to different data scaling
+    expect_false(identical(fit_counts$coef, fit_relabundance$coef))
+    expect_false(identical(fit_counts$risk_scores, fit_relabundance$risk_scores))
+    
+    # Add CLR transformation and test
+    tse <- transformAssay(tse, method = "clr", pseudocount = TRUE)
+    fit_clr <- getSurvival(
+        tse, 
+        assay.type = "clr",
+        time.col = "event_time", 
+        event.col = "event"
+    )
+    
+    expect_type(fit_clr, "list")
+    expect_false(identical(fit_clr$coef, fit_relabundance$coef))
+})
+
+# Check that methods results are reproducible
+test_that("getSurvival results are reproducible", {
+    # Load and prepare data
+    data(crohn_survival)
+    tse <- crohn_survival
+    tse <- transformAssay(tse, method = "relabundance")
+    
+    # Set seed and run analysis
+    set.seed(123)
+    fit1 <- getSurvival(
+        tse, 
+        assay.type = "relabundance",
+        time.col = "event_time", 
+        event.col = "event",
+        nfolds = 5  # Use fewer folds for faster testing
+    )
+    
+    # Set same seed and run analysis again
+    set.seed(123)
+    fit2 <- getSurvival(
+        tse, 
+        assay.type = "relabundance",
+        time.col = "event_time", 
+        event.col = "event",
+        nfolds = 5
+    )
+    
+    # Results should be identical with same seed
+    expect_identical(fit1$coef, fit2$coef)
+    expect_identical(fit1$risk_scores, fit2$risk_scores)
+    expect_equal(fit1$c_index, fit2$c_index)
+})
\ No newline at end of file