Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: miaTime
Type: Package
Title: Microbiome Time Series Analysis
Version: 0.99.10
Version: 0.99.11
Authors@R:
c(person(given = "Leo", family = "Lahti", role = c("aut"),
email = "leo.lahti@iki.fi",
Expand Down
4 changes: 4 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
Changes in version 0.99.11
Date: 2025-09-22
+ Added Kumaraswamy2024 data

Changes in version 0.99.10
Date: 2025-09-06
+ Added survival data on Crohn's disease (crohn_survival)
Expand Down
43 changes: 43 additions & 0 deletions R/miaTime.R
Original file line number Diff line number Diff line change
Expand Up @@ -185,3 +185,46 @@ NULL
#' @keywords
#' data
NULL

#' @title Kumaraswamy2024
#' @description
#' The Kumaraswamy2024 includes microbiota and metabolite profiling data
#' from 78 Indian individuals (40 males, 38 females).
#'
#' The Indian subjects were grouped into four diet groups (~20 subjects per group),
#' and fecal samples were collected across three seasonal time points.
#'
#' The microbiota profiling was performed using HITChip microarray analysis
#' (in duplicate), qPCR (in triplicate with eight-point standard curves), and
#' LC-HRMS and HPLC metabolite profiling with internal standards.
#'
#' Column metadata includes diet group assignment, sampling season, sex, BMI,
#' age, and questionnaire-based lifestyle metadata.
#'
#' Quality control metrics include Pearson correlation (>0.98) for HITChip,
#' qPCR assay efficiency (>0.99), and technical replicates for HPLC and qPCR.
#'
#' Data sources:
#' - Microbiota HITChip microarray data
#' - qPCR absolute abundance data
#' - Chemical profiling data (HPLC, LC-HRMS)
#' - Sample metadata (diet, lifestyle)
#'
#' Processed and raw data are available via:
#' - Zenodo (DOI: https://doi.org/10.5281/zenodo.14424024)
#' - NCBI-SRA (fermented foods 16S rRNA sequencing, accession: PRJNA1191989)
#'
#' @name Kumaraswamy2024
#' @docType data
#' @author Jeyaram, K., Lahti, L., Tims, S. et al
#' @return Loads the data set in R.
#' @references
#' Jeyaram, K., Lahti, L., Tims, S. et al. Fermented foods affect the seasonal
#' stability of gut bacteria in an Indian rural population.
#' Nat Commun 16, 771 \url{https://doi.org/10.1038/s41467-025-56014-6}
#' @usage data(Kumaraswamy2024)
#' @format The data set in
#' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}
#' format.
#' @keywords data
NULL
Binary file added data/Kumaraswamy2024.rda
Binary file not shown.
105 changes: 105 additions & 0 deletions inst/scripts/Kumaraswamy2024.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# Load required libraries
library(TreeSummarizedExperiment)
library(Cairo)
library(dplyr)
library(readxl)

# Function to load Excel data
read_data <- function (f) {
x <- read_excel(f)
rownams <- unname(unlist(x[,1]))
x <- x[, -1]
x <- as.matrix(x)
rownames(x) <- rownams
return(x)
}
# Get data from : https://zenodo.org/records/14424024
# Abundance profiles
gen <- read_data(file.path("..", "data", "Genus_hitchip.xlsx"))
phy <- read_data(file.path("..", "data", "Phylum_hitchip.xlsx"))
oli <- read_data(file.path("..", "data", "Oligo_hitchip.xlsx"))

# Metadata
md <- read_data(file.path("..", "data", "modified_file.xlsx"))
rownames(md) <- unname(md[, "sample"])
md <- as.data.frame(md)
md[14:61] <- lapply(md[14:61], as.logical)
# Group-A: never consumed Hawaijar and Dahi (n=20, control)
# Group-B: consume Hawaijar and Dahi (n=21)
# Group-C: consume Hawaijar, not Dahi (n=23)
# Group-D: consume Dahi, not Hawaijar (n=14)
md[, "timepoint"] <- as.numeric(unlist(md[, "timepoint"]))
md[, "season"] <- factor(unlist(md[, "season"]),
evels=c("summer", "autumn", "winter"))
factors <- c("age", "sex", "bmi", "clan", "nature_of_birth",
"marital_status", "residence", "subject", "group")
for(f in factors) {
md[, f] <- factor(unlist(md[, f]), levels=sort(unique(md[, f])))
}

# Create tse data object
tse <-TreeSummarizedExperiment(
assays=SimpleList(signal=gen), colData=DataFrame(md))
# Add altExps
altExp(tse, "phylum") <- TreeSummarizedExperiment(
assays=SimpleList(signal=phy))

altExp(tse, "oligo") <- TreeSummarizedExperiment(
assays=SimpleList(signal=oli))
# There is one NA, replace it with min value
assay(altExp(tse, "oligo"), "signal")[is.na(assay(altExp(tse, "oligo"), "signal"))] <- min(assay(altExp(tse, "oligo"), "signal"), na.rm=TRUE)

# -------------------------------------------

# Total load in LOG10_16S _RNA_gene copies_per_g
# tabs 6 and 8 have different sample names
tabs <- list()
for (i in 1:11) {
tabs[[i]] <- read_excel(file.path("..", "data", "AbsoluteloadTaxaspecificqPCRdata.xlsx"), sheet = i)
}
tabs <- tabs[-c(6,8)]
d <- Reduce(function(dtf1,dtf2) dplyr::full_join(dtf1,dtf2,by="sample"), tabs)
d <- data.frame(d)
rownams <- unname(unlist(d[, "sample"]))
d <- d[, -1]
d[d %in% c("missing data", "NA")] <- NA
d <- apply(d, 2, as.numeric)
rownames(d) <- rownams
altExp(tse, "total_loads") <- TreeSummarizedExperiment(assays=SimpleList(signal=t(d)))

# 'Fecal metabolite profile_LC-HRMS Data.xlsx'
x <- read_excel(file.path("..", "data", "Fecal\ metabolite\ profile_LC-HRMS Data.xlsx"), sheet = 1)
colnams <- as.character(x[3,])
x <- x[-c(1,2,3),]
colnames(x) <- colnams
xr <- x[, 1:5]
rownames(xr) <- paste0("feature_", 1:nrow(xr))
xd <- apply(as.matrix(x[, 6:ncol(x)]), 2, as.numeric)
M <- matrix(NA, nrow=nrow(xd), ncol=ncol(tse))
colnames(M) <- colnames(tse)
# Match samples
M[, colnames(xd)] <- xd
rownames(M) <- rownames(xr)
altExp(tse, "metabolites") <- TreeSummarizedExperiment(
assays=SimpleList(signal=M), rowData=xr)

# 'SCFA data-HPLC.xlsx'
x <- read_excel(file.path("..", "data", "SCFA\ data-HPLC.xlsx"))
colnams <- unname(unlist(x[1,]))
x <- x[-1, ]
colnames(x) <- colnams
rownams <- x$sample
x <- x[,-1]
x <- as.matrix(x)
x <- apply(x,2,as.numeric)
scfa <- t(x)
colnames(scfa) <- rownams
M <- matrix(NA, nrow=nrow(scfa), ncol=ncol(tse))
colnames(M) <- colnames(tse)
M[, colnames(scfa)] <- scfa
rownames(M) <- colnams[-1]
altExp(tse, "scfa") <- TreeSummarizedExperiment(assays=SimpleList(signal=M))

# -----------------------------------------------------------------------------

save(tse, file="Kumaraswamy2024.rda")
57 changes: 57 additions & 0 deletions man/Kumaraswamy2024.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading