diff --git a/inst/assets/bibliography.bib b/inst/assets/bibliography.bib index f475b347..dc1b1ec8 100644 --- a/inst/assets/bibliography.bib +++ b/inst/assets/bibliography.bib @@ -3068,3 +3068,24 @@ @article{Robinson2010 url = {https://genomebiology.biomedcentral.com/articles/10.1186/gb-2010-11-3-r25}, year = {2010} } + +@article{Gamboa-Tuz2025, + author = {Gamboa-Tuz, Samuel D. and Ramos, Marcel and Franzosa, Eric and Huttenhower, Curtis and Segata, Nicola and Oh, Sehyun and Waldron, Levi}, + title = {Commonly used compositional data analysis implementations are not advantageous in microbial differential abundance analyses benchmarked against biological ▎ ground truth}, + elocation-id = {2025.02.13.638109}, + year = {2025}, + doi = {10.1101/2025.02.13.638109}, + publisher = {Cold Spring Harbor Laboratory}, + abstract = {Previous benchmarking of differential abundance (DA) analysis methods in microbiome studies have employed synthetic data, simulations, and {\textquotedb ▎ lleft}real data{\textquotedblright} examples, but to the best of our knowledge, none have yet employed experimental data with known {\textquotedblleft}ground truth{ ▎ \textquotedblright} differential abundance. A key debate in the field centers on whether compositional methods are necessary for DA analysis, which is challenging t ▎ o answer due to the lack of ground truth data. To address this gap, we created the Bioconductor data package MicrobiomeBenchmarkData, featuring three microbiome dat ▎ asets with established biological ground truths: 1) diverse oral microbiomes from supragingival and subgingival plaques, expected to favor aerobic and anaerobic bac ▎ teria, respectively, 2) low-diversity microbiomes from healthy vaginas and bacterial vaginosis, conditions that have been well-characterized through cell culture an ▎ d microscopy, and 3) a spike-in dataset with constant, known absolute abundances of three bacteria. We benchmarked 17 DA approaches and demonstrated that compositio ▎ nal DA methods are not beneficial but rather lack sensitivity, show increased variability in constant-abundance spike-ins, and, most surprisingly, more frequently p ▎ roduce paradoxical results with DA in the wrong direction for the low-diversity microbiome. Conversely, commonly used methods in microbiome literature, such as LEfS ▎ e, the Wilcoxon test, and RNA-seq-derived methods, performed best. We conclude that researchers continue using widely adopted non-parametric or RNA-seq DA methods a ▎ nd that further development of compositional methods includes benchmarking against datasets with known biological ground truth.Competing Interest StatementThe autho ▎ rs have declared no competing interest.}, + URL = {https://www.biorxiv.org/content/early/2025/02/17/2025.02.13.638109}, + eprint = {https://www.biorxiv.org/content/early/2025/02/17/2025.02.13.638109.full.pdf}, + journal = {bioRxiv} +} + +@manual{HMP2data, + author = {Stansfield, John and Smirnova, Ekaterina and Zhao, Ni and Fettweis, Jennifer and Waldron, Levi and Dozmorov, Mikhail}, + title = {{HMP2data}: 16s rRNA sequencing data from the Human Microbiome Project 2}, + year = {2025}, + note = {R package version 1.22.0}, + doi = {10.18129/B9.bioc.HMP2Data} +} diff --git a/inst/pages/import.qmd b/inst/pages/import.qmd index 22764660..d70c19c0 100644 --- a/inst/pages/import.qmd +++ b/inst/pages/import.qmd @@ -525,6 +525,21 @@ sequencing data, all processed using the same pipeline and reference database. For more use examples in R/Bioconductor, see the [MicroBioMap vignette](https://blekhmanlab.github.io/MicroBioMap/articles/overview.html). +### Integrative human microbiome project + +Datasets from the second phase of the human microbiome project, also known as +integrative human microbiome project are made available with `r BiocStyle::Biocpkg("HMP2Data")`[@HMP2data]. +The datasets are offered as `TreeSE` objects. Additional data, *e.g.* cytokines, +is included when available. + +### Microbiome benchmark data + +The `r BiocStyle::Biocpkg("MicrobiomeBenchmarkData")` allows access to datasets +with some groundtruth available[@Gamboa-Tuz2025]. These datasets are compiled for the purpose of +benchmarking differential abundance methods, but the ground truth may be useful +for other benchmarking applications. Datasets are offered directily as `TreeSE` +objects. + ### Other data sources The current collections provide access to vast microbiome data