diff --git a/DESCRIPTION b/DESCRIPTION index 1a011cd..ccd716c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: miaTime Type: Package Title: Microbiome Time Series Analysis -Version: 0.99.10 +Version: 0.99.11 Authors@R: c(person(given = "Leo", family = "Lahti", role = c("aut"), email = "leo.lahti@iki.fi", diff --git a/NEWS b/NEWS index ca47b03..37fae73 100755 --- a/NEWS +++ b/NEWS @@ -1,3 +1,7 @@ +Changes in version 0.99.11 +Date: 2025-09-22 ++ Added Kumaraswamy2024 data + Changes in version 0.99.10 Date: 2025-09-06 + Added survival data on Crohn's disease (crohn_survival) diff --git a/R/miaTime.R b/R/miaTime.R index 818478e..588f3d0 100755 --- a/R/miaTime.R +++ b/R/miaTime.R @@ -185,3 +185,46 @@ NULL #' @keywords #' data NULL + +#' @title Kumaraswamy2024 +#' @description +#' The Kumaraswamy2024 includes microbiota and metabolite profiling data +#' from 78 Indian individuals (40 males, 38 females). +#' +#' The Indian subjects were grouped into four diet groups (~20 subjects per group), +#' and fecal samples were collected across three seasonal time points. +#' +#' The microbiota profiling was performed using HITChip microarray analysis +#' (in duplicate), qPCR (in triplicate with eight-point standard curves), and +#' LC-HRMS and HPLC metabolite profiling with internal standards. +#' +#' Column metadata includes diet group assignment, sampling season, sex, BMI, +#' age, and questionnaire-based lifestyle metadata. +#' +#' Quality control metrics include Pearson correlation (>0.98) for HITChip, +#' qPCR assay efficiency (>0.99), and technical replicates for HPLC and qPCR. +#' +#' Data sources: +#' - Microbiota HITChip microarray data +#' - qPCR absolute abundance data +#' - Chemical profiling data (HPLC, LC-HRMS) +#' - Sample metadata (diet, lifestyle) +#' +#' Processed and raw data are available via: +#' - Zenodo (DOI: https://doi.org/10.5281/zenodo.14424024) +#' - NCBI-SRA (fermented foods 16S rRNA sequencing, accession: PRJNA1191989) +#' +#' @name Kumaraswamy2024 +#' @docType data +#' @author Jeyaram, K., Lahti, L., Tims, S. et al +#' @return Loads the data set in R. +#' @references +#' Jeyaram, K., Lahti, L., Tims, S. et al. Fermented foods affect the seasonal +#' stability of gut bacteria in an Indian rural population. +#' Nat Commun 16, 771 \url{https://doi.org/10.1038/s41467-025-56014-6} +#' @usage data(Kumaraswamy2024) +#' @format The data set in +#' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}} +#' format. +#' @keywords data +NULL diff --git a/data/Kumaraswamy2024.rda b/data/Kumaraswamy2024.rda new file mode 100644 index 0000000..9ab0423 Binary files /dev/null and b/data/Kumaraswamy2024.rda differ diff --git a/inst/scripts/Kumaraswamy2024.R b/inst/scripts/Kumaraswamy2024.R new file mode 100644 index 0000000..b50b7da --- /dev/null +++ b/inst/scripts/Kumaraswamy2024.R @@ -0,0 +1,105 @@ +# Load required libraries +library(TreeSummarizedExperiment) +library(Cairo) +library(dplyr) +library(readxl) + +# Function to load Excel data +read_data <- function (f) { + x <- read_excel(f) + rownams <- unname(unlist(x[,1])) + x <- x[, -1] + x <- as.matrix(x) + rownames(x) <- rownams + return(x) +} +# Get data from : https://zenodo.org/records/14424024 +# Abundance profiles +gen <- read_data(file.path("..", "data", "Genus_hitchip.xlsx")) +phy <- read_data(file.path("..", "data", "Phylum_hitchip.xlsx")) +oli <- read_data(file.path("..", "data", "Oligo_hitchip.xlsx")) + +# Metadata +md <- read_data(file.path("..", "data", "modified_file.xlsx")) +rownames(md) <- unname(md[, "sample"]) +md <- as.data.frame(md) +md[14:61] <- lapply(md[14:61], as.logical) +# Group-A: never consumed Hawaijar and Dahi (n=20, control) +# Group-B: consume Hawaijar and Dahi (n=21) +# Group-C: consume Hawaijar, not Dahi (n=23) +# Group-D: consume Dahi, not Hawaijar (n=14) +md[, "timepoint"] <- as.numeric(unlist(md[, "timepoint"])) +md[, "season"] <- factor(unlist(md[, "season"]), + evels=c("summer", "autumn", "winter")) +factors <- c("age", "sex", "bmi", "clan", "nature_of_birth", + "marital_status", "residence", "subject", "group") +for(f in factors) { + md[, f] <- factor(unlist(md[, f]), levels=sort(unique(md[, f]))) +} + +# Create tse data object +tse <-TreeSummarizedExperiment( + assays=SimpleList(signal=gen), colData=DataFrame(md)) +# Add altExps +altExp(tse, "phylum") <- TreeSummarizedExperiment( + assays=SimpleList(signal=phy)) + +altExp(tse, "oligo") <- TreeSummarizedExperiment( + assays=SimpleList(signal=oli)) +# There is one NA, replace it with min value +assay(altExp(tse, "oligo"), "signal")[is.na(assay(altExp(tse, "oligo"), "signal"))] <- min(assay(altExp(tse, "oligo"), "signal"), na.rm=TRUE) + +# ------------------------------------------- + +# Total load in LOG10_16S _RNA_gene copies_per_g +# tabs 6 and 8 have different sample names +tabs <- list() +for (i in 1:11) { + tabs[[i]] <- read_excel(file.path("..", "data", "AbsoluteloadTaxaspecificqPCRdata.xlsx"), sheet = i) +} +tabs <- tabs[-c(6,8)] +d <- Reduce(function(dtf1,dtf2) dplyr::full_join(dtf1,dtf2,by="sample"), tabs) +d <- data.frame(d) +rownams <- unname(unlist(d[, "sample"])) +d <- d[, -1] +d[d %in% c("missing data", "NA")] <- NA +d <- apply(d, 2, as.numeric) +rownames(d) <- rownams +altExp(tse, "total_loads") <- TreeSummarizedExperiment(assays=SimpleList(signal=t(d))) + +# 'Fecal metabolite profile_LC-HRMS Data.xlsx' +x <- read_excel(file.path("..", "data", "Fecal\ metabolite\ profile_LC-HRMS Data.xlsx"), sheet = 1) +colnams <- as.character(x[3,]) +x <- x[-c(1,2,3),] +colnames(x) <- colnams +xr <- x[, 1:5] +rownames(xr) <- paste0("feature_", 1:nrow(xr)) +xd <- apply(as.matrix(x[, 6:ncol(x)]), 2, as.numeric) +M <- matrix(NA, nrow=nrow(xd), ncol=ncol(tse)) +colnames(M) <- colnames(tse) +# Match samples +M[, colnames(xd)] <- xd +rownames(M) <- rownames(xr) +altExp(tse, "metabolites") <- TreeSummarizedExperiment( + assays=SimpleList(signal=M), rowData=xr) + +# 'SCFA data-HPLC.xlsx' +x <- read_excel(file.path("..", "data", "SCFA\ data-HPLC.xlsx")) +colnams <- unname(unlist(x[1,])) +x <- x[-1, ] +colnames(x) <- colnams +rownams <- x$sample +x <- x[,-1] +x <- as.matrix(x) +x <- apply(x,2,as.numeric) +scfa <- t(x) +colnames(scfa) <- rownams +M <- matrix(NA, nrow=nrow(scfa), ncol=ncol(tse)) +colnames(M) <- colnames(tse) +M[, colnames(scfa)] <- scfa +rownames(M) <- colnams[-1] +altExp(tse, "scfa") <- TreeSummarizedExperiment(assays=SimpleList(signal=M)) + +# ----------------------------------------------------------------------------- + +save(tse, file="Kumaraswamy2024.rda") diff --git a/man/Kumaraswamy2024.Rd b/man/Kumaraswamy2024.Rd new file mode 100644 index 0000000..c5a423e --- /dev/null +++ b/man/Kumaraswamy2024.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/miaTime.R +\docType{data} +\name{Kumaraswamy2024} +\alias{Kumaraswamy2024} +\title{Kumaraswamy2024} +\format{ +The data set in +\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}} +format. +} +\usage{ +data(Kumaraswamy2024) +} +\value{ +Loads the data set in R. +} +\description{ +The Kumaraswamy2024 includes microbiota and metabolite profiling data +from 78 Indian individuals (40 males, 38 females). + +The Indian subjects were grouped into four diet groups (~20 subjects per group), +and fecal samples were collected across three seasonal time points. + +The microbiota profiling was performed using HITChip microarray analysis +(in duplicate), qPCR (in triplicate with eight-point standard curves), and +LC-HRMS and HPLC metabolite profiling with internal standards. + +Column metadata includes diet group assignment, sampling season, sex, BMI, +age, and questionnaire-based lifestyle metadata. + +Quality control metrics include Pearson correlation (>0.98) for HITChip, +qPCR assay efficiency (>0.99), and technical replicates for HPLC and qPCR. + +Data sources: +\itemize{ +\item Microbiota HITChip microarray data +\item qPCR absolute abundance data +\item Chemical profiling data (HPLC, LC-HRMS) +\item Sample metadata (diet, lifestyle) +} + +Processed and raw data are available via: +\itemize{ +\item Zenodo (DOI: https://doi.org/10.5281/zenodo.14424024) +\item NCBI-SRA (fermented foods 16S rRNA sequencing, accession: PRJNA1191989) +} +} +\references{ +Jeyaram, K., Lahti, L., Tims, S. et al. Fermented foods affect the seasonal +stability of gut bacteria in an Indian rural population. +Nat Commun 16, 771 \url{https://doi.org/10.1038/s41467-025-56014-6} +} +\author{ +Jeyaram, K., Lahti, L., Tims, S. et al +} +\keyword{data}