From 15c15f62cd8982ba5995875c6a442e069c913e46 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Tue, 12 Aug 2025 14:32:57 -0400 Subject: [PATCH 01/37] first commit for custom/fingerprintvcfparser --- .../fingerprintvcfparser/environment.yml | 10 +++ .../msk/custom/fingerprintvcfparser/main.nf | 49 ++++++++++++ .../msk/custom/fingerprintvcfparser/meta.yml | 37 +++++++++ .../usr/bin/parse_fingerprint_vcf.py | 70 +++++++++++++++++ .../fingerprintvcfparser/tests/main.nf.test | 77 +++++++++++++++++++ 5 files changed, 243 insertions(+) create mode 100644 modules/msk/custom/fingerprintvcfparser/environment.yml create mode 100644 modules/msk/custom/fingerprintvcfparser/main.nf create mode 100644 modules/msk/custom/fingerprintvcfparser/meta.yml create mode 100755 modules/msk/custom/fingerprintvcfparser/resources/usr/bin/parse_fingerprint_vcf.py create mode 100644 modules/msk/custom/fingerprintvcfparser/tests/main.nf.test diff --git a/modules/msk/custom/fingerprintvcfparser/environment.yml b/modules/msk/custom/fingerprintvcfparser/environment.yml new file mode 100644 index 00000000..cc119fe8 --- /dev/null +++ b/modules/msk/custom/fingerprintvcfparser/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # TODO nf-core: List required Conda package(s). + # Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + # For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. + - "bioconda::pysam=0.23.3" diff --git a/modules/msk/custom/fingerprintvcfparser/main.nf b/modules/msk/custom/fingerprintvcfparser/main.nf new file mode 100644 index 00000000..78beae2a --- /dev/null +++ b/modules/msk/custom/fingerprintvcfparser/main.nf @@ -0,0 +1,49 @@ +process CUSTOM_FINGERPRINTVCFPARSER { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pysam:0.23.0--py39hdd5828d_0': + 'biocontainers/pysam:0.23.0--py39hdd5828d_0' }" + + input: + tuple val(meta), path(vcf) + + output: + tuple val(meta), path("*.tsv"), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + parse_fingerprint_vcf.py \\ + --input ${vcf} \\ + --output ${prefix}.fp.tsv \\ + --samplename ${prefix} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + parse_fingerprint_vcf.py: 0.1.0 + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo $args + + touch ${prefix}.fp.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + parse_fingerprint_vcf.py: 0.1.0 + END_VERSIONS + """ +} diff --git a/modules/msk/custom/fingerprintvcfparser/meta.yml b/modules/msk/custom/fingerprintvcfparser/meta.yml new file mode 100644 index 00000000..453b71bd --- /dev/null +++ b/modules/msk/custom/fingerprintvcfparser/meta.yml @@ -0,0 +1,37 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +# # TODO nf-core: Add a description of the module and list keywords +name: "custom_fingerprintvcfparser" +description: write your description here +keywords: +- sort +- example +- genomics +tools: +## TODO nf-core: Add a description and other details for the software below +- "custom": + description: "Pysam is a Python module for reading and manipulating SAM/BAM/VCF/BCF + files. It's a lightweight wrapper of the htslib C-API, the same one that powers + samtools, bcftools, and tabix." + homepage: "None" + documentation: "None" + tool_dev_url: "None" + doi: "" + licence: ['MIT'] + identifier: biotools:pysam + +input: +# TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct + [] +output: +# TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct + versions: - + versions.yml: + type: file + description: File containing software versions + pattern: versions.yml + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: +- "@anoronh4" +maintainers: +- "@anoronh4" diff --git a/modules/msk/custom/fingerprintvcfparser/resources/usr/bin/parse_fingerprint_vcf.py b/modules/msk/custom/fingerprintvcfparser/resources/usr/bin/parse_fingerprint_vcf.py new file mode 100755 index 00000000..851bd498 --- /dev/null +++ b/modules/msk/custom/fingerprintvcfparser/resources/usr/bin/parse_fingerprint_vcf.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +import argparse + +""" +Converts fingerprint vcf to a formatted table +""" + +__author__ = "Anne Marie Noronha" +__email__ = "noronhaa@mskcc.org" +__version__ = "0.1.0" +__status__ = "Dev" + +import sys, os +from pysam import VariantFile # version >= 0.15.2 +from itertools import groupby + +def usage(): + parser = argparse.ArgumentParser() + parser.add_argument('--input','-i', help = 'input file', required = True) + parser.add_argument('--samplename','-n', help = 'sample name', required = True) + parser.add_argument('--output','-o', help = 'output file', required = True) + parser.add_argument('--depth-filter','-d', default = 20, type = int, help = 'minimum read depth for outputting a minor allele frequency [default = 20]') + return parser.parse_args() + +def main(): + args = usage() + + fp_out_list = [] + + vcf_in = VariantFile(args.input, "r") + for vcf_rec in vcf_in.fetch(): + ref_allele = vcf_rec.ref + alt_allele = vcf_rec.alts[0] + ref_allele_count = vcf_rec.samples[args.samplename]["RD"] + alt_allele_count = vcf_rec.samples[args.samplename]["AD"] + if ref_allele_count >= alt_allele_count and ref_allele_count > 0: + maf = alt_allele_count / float(ref_allele_count + alt_allele_count) + if maf < .1: + genotype = ref_allele*2 + else: + genotype = ref_allele + alt_allele + elif alt_allele_count > ref_allele_count: + maf = ref_allele_count / float(ref_allele_count + alt_allele_count) + if maf < .1: + genotype = alt_allele*2 + #else: genotype = alt_allele + ref_allele + else: + genotype = ref_allele + alt_allele + elif ref_allele_count == 0: + genotype = "--" + else: + genotype = ref_allele + alt_allele + if ref_allele_count + alt_allele_count < args.depth_filter or genotype == "--": + maf = "" + + + formatted_counts = "{}:{} {}:{}".format(ref_allele,ref_allele_count,alt_allele,alt_allele_count) + + locus = "{}:{}".format(vcf_rec.chrom,vcf_rec.pos) + depth = vcf_rec.samples[args.samplename]["DP"] + + fp_out_list += [[locus,formatted_counts, genotype, maf]] + + with open(args.output,'w') as f: + f.write("\t".join(['Locus', args.samplename + '_Counts', args.samplename + '_Genotypes', args.samplename + '_MinorAlleleFreq']) + "\n") + for i in fp_out_list: + f.write("\t".join([str(j) for j in i]) + "\n") + +if __name__ == "__main__": + main() diff --git a/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test b/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test new file mode 100644 index 00000000..e8254d24 --- /dev/null +++ b/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test @@ -0,0 +1,77 @@ +// nf-core modules test custom/fingerprintvcfparser +nextflow_process { + + name "Test Process CUSTOM_FINGERPRINTVCFPARSER" + script "../main.nf" + process "CUSTOM_FINGERPRINTVCFPARSER" + + tag "modules" + tag "modules_msk" + tag "custom" + tag "custom/fingerprintvcfparser" + tag "gbcms" + + test("sarscov2 - vcf") { + setup { + run("GBCMS"){ + script "../../gbcms/main.nf" + process { + """ + input[0] = [ + [ id:'test', sample:'197' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + "variant_file.vcf" + ] + input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + """ + } + } + } + when { + process { + """ + input[0] = GBCMS.out.variant_file + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix. + test("sarscov2 - vcf - stub") { + + options "-stub" + + when { + process { + """ + // TODO nf-core: define inputs of the process here. Example: + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} From 785cd7c1696b6c9177ddd2a361193f36b26449f7 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Tue, 12 Aug 2025 15:42:12 -0400 Subject: [PATCH 02/37] update module and nf-test --- .../msk/custom/fingerprintvcfparser/main.nf | 8 +-- .../fingerprintvcfparser/tests/main.nf.test | 13 ++-- .../tests/main.nf.test.snap | 72 +++++++++++++++++++ 3 files changed, 84 insertions(+), 9 deletions(-) create mode 100644 modules/msk/custom/fingerprintvcfparser/tests/main.nf.test.snap diff --git a/modules/msk/custom/fingerprintvcfparser/main.nf b/modules/msk/custom/fingerprintvcfparser/main.nf index 78beae2a..53e7d7b2 100644 --- a/modules/msk/custom/fingerprintvcfparser/main.nf +++ b/modules/msk/custom/fingerprintvcfparser/main.nf @@ -23,9 +23,9 @@ process CUSTOM_FINGERPRINTVCFPARSER { """ parse_fingerprint_vcf.py \\ --input ${vcf} \\ - --output ${prefix}.fp.tsv \\ - --samplename ${prefix} \\ - $args + --output ${prefix}.fp.tsv \\ + --samplename ${prefix} \\ + $args cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -38,7 +38,7 @@ process CUSTOM_FINGERPRINTVCFPARSER { def prefix = task.ext.prefix ?: "${meta.id}" """ echo $args - + touch ${prefix}.fp.tsv cat <<-END_VERSIONS > versions.yml diff --git a/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test b/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test index e8254d24..3d5f6a03 100644 --- a/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test +++ b/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test @@ -1,3 +1,4 @@ +// TODO nf-core: Once you have added the required tests, please run the following command to build this file: // nf-core modules test custom/fingerprintvcfparser nextflow_process { @@ -11,14 +12,15 @@ nextflow_process { tag "custom/fingerprintvcfparser" tag "gbcms" + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used test("sarscov2 - vcf") { setup { run("GBCMS"){ - script "../../gbcms/main.nf" + script "../../../gbcms/main.nf" process { """ input[0] = [ - [ id:'test', sample:'197' ], // meta map + [ id:'test', sample:'test' ], // meta map file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), @@ -33,6 +35,7 @@ nextflow_process { when { process { """ + // TODO nf-core: define inputs of the process here. Example: input[0] = GBCMS.out.variant_file """ } @@ -42,6 +45,8 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot(process.out).match() } + //TODO nf-core: Add all required assertions to verify the test output. + // See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples. ) } @@ -55,11 +60,9 @@ nextflow_process { when { process { """ - // TODO nf-core: define inputs of the process here. Example: - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) ] """ } diff --git a/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test.snap b/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test.snap new file mode 100644 index 00000000..5751d885 --- /dev/null +++ b/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "sarscov2 - vcf": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "sample": "test" + }, + "test.fp.tsv:md5,9fa9a081f17ee52f03463c96d46a23aa" + ] + ], + "1": [ + "versions.yml:md5,c8c9b3fa1b9110ca83a71490a317f2fa" + ], + "tsv": [ + [ + { + "id": "test", + "sample": "test" + }, + "test.fp.tsv:md5,9fa9a081f17ee52f03463c96d46a23aa" + ] + ], + "versions": [ + "versions.yml:md5,c8c9b3fa1b9110ca83a71490a317f2fa" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-12T15:07:39.656085692" + }, + "sarscov2 - vcf - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fp.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,c8c9b3fa1b9110ca83a71490a317f2fa" + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fp.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,c8c9b3fa1b9110ca83a71490a317f2fa" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-12T15:07:46.098292727" + } +} \ No newline at end of file From 7f47996637653701c5ce1ec8b052f4a080220f39 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Wed, 13 Aug 2025 23:43:18 -0400 Subject: [PATCH 03/37] set enable_conda to false --- modules/msk/custom/fingerprintvcfparser/tests/main.nf.test | 1 + modules/msk/custom/fingerprintvcfparser/tests/nextflow.config | 3 +++ 2 files changed, 4 insertions(+) create mode 100644 modules/msk/custom/fingerprintvcfparser/tests/nextflow.config diff --git a/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test b/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test index 3d5f6a03..574668ea 100644 --- a/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test +++ b/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test @@ -14,6 +14,7 @@ nextflow_process { // TODO nf-core: Change the test name preferably indicating the test-data and file-format used test("sarscov2 - vcf") { + config "./nextflow.config" setup { run("GBCMS"){ script "../../../gbcms/main.nf" diff --git a/modules/msk/custom/fingerprintvcfparser/tests/nextflow.config b/modules/msk/custom/fingerprintvcfparser/tests/nextflow.config new file mode 100644 index 00000000..f2cf46a3 --- /dev/null +++ b/modules/msk/custom/fingerprintvcfparser/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + enable_conda = false +} From af7d9028759724ac12a55a2f2347d8c60416a23b Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Wed, 13 Aug 2025 23:44:00 -0400 Subject: [PATCH 04/37] update meta.yml --- .../msk/custom/fingerprintvcfparser/main.nf | 4 +- .../msk/custom/fingerprintvcfparser/meta.yml | 59 ++++++++++++------- 2 files changed, 41 insertions(+), 22 deletions(-) diff --git a/modules/msk/custom/fingerprintvcfparser/main.nf b/modules/msk/custom/fingerprintvcfparser/main.nf index 53e7d7b2..2e1f4a4b 100644 --- a/modules/msk/custom/fingerprintvcfparser/main.nf +++ b/modules/msk/custom/fingerprintvcfparser/main.nf @@ -11,7 +11,7 @@ process CUSTOM_FINGERPRINTVCFPARSER { tuple val(meta), path(vcf) output: - tuple val(meta), path("*.tsv"), emit: tsv + tuple val(meta), path("${prefix}.fp.tsv"), emit: tsv path "versions.yml" , emit: versions when: @@ -19,7 +19,7 @@ process CUSTOM_FINGERPRINTVCFPARSER { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" """ parse_fingerprint_vcf.py \\ --input ${vcf} \\ diff --git a/modules/msk/custom/fingerprintvcfparser/meta.yml b/modules/msk/custom/fingerprintvcfparser/meta.yml index 453b71bd..d4ae3ed7 100644 --- a/modules/msk/custom/fingerprintvcfparser/meta.yml +++ b/modules/msk/custom/fingerprintvcfparser/meta.yml @@ -1,36 +1,55 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -# # TODO nf-core: Add a description of the module and list keywords name: "custom_fingerprintvcfparser" -description: write your description here +description: Custom script to parse fingerprint VCF files, generated by the GBCMS module. keywords: -- sort -- example -- genomics +- custom +- fingerprint +- vcf +- pysam tools: -## TODO nf-core: Add a description and other details for the software below - "custom": description: "Pysam is a Python module for reading and manipulating SAM/BAM/VCF/BCF files. It's a lightweight wrapper of the htslib C-API, the same one that powers samtools, bcftools, and tabix." - homepage: "None" - documentation: "None" - tool_dev_url: "None" - doi: "" + homepage: "https://pysam.readthedocs.io/en/latest/api.html" + documentation: "https://pysam.readthedocs.io/en/latest/api.html" + tool_dev_url: "https://github.com/pysam-developers/pysam" licence: ['MIT'] identifier: biotools:pysam input: -# TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct - [] + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - vcf: + type: file + description: Fasta file containing scaffold + pattern: "*.vcf" + ontologies: + - edam: http://edamontology.org/format_3016 # VCF output: -# TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: versions.yml - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + tsv: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}.fp.tsv: + type: file + description: Tab-separated values (TSV) file containing parsed fingerprint data + pattern: "${prefix}.fp.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: versions.yml + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@anoronh4" maintainers: From ed05eb0198f1c4ff27165320c87975d2bf9128ec Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Wed, 13 Aug 2025 23:47:16 -0400 Subject: [PATCH 05/37] remove TODO lines --- modules/msk/custom/fingerprintvcfparser/environment.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/modules/msk/custom/fingerprintvcfparser/environment.yml b/modules/msk/custom/fingerprintvcfparser/environment.yml index cc119fe8..a5547b5c 100644 --- a/modules/msk/custom/fingerprintvcfparser/environment.yml +++ b/modules/msk/custom/fingerprintvcfparser/environment.yml @@ -4,7 +4,4 @@ channels: - conda-forge - bioconda dependencies: - # TODO nf-core: List required Conda package(s). - # Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - # For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - "bioconda::pysam=0.23.3" From 5dc010eb578721a1ebfe0c7f581b4bfcc5027af7 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Wed, 13 Aug 2025 23:48:33 -0400 Subject: [PATCH 06/37] remove more TODO lines --- modules/msk/custom/fingerprintvcfparser/tests/main.nf.test | 6 ------ 1 file changed, 6 deletions(-) diff --git a/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test b/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test index 574668ea..e4454cba 100644 --- a/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test +++ b/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test @@ -1,4 +1,3 @@ -// TODO nf-core: Once you have added the required tests, please run the following command to build this file: // nf-core modules test custom/fingerprintvcfparser nextflow_process { @@ -12,7 +11,6 @@ nextflow_process { tag "custom/fingerprintvcfparser" tag "gbcms" - // TODO nf-core: Change the test name preferably indicating the test-data and file-format used test("sarscov2 - vcf") { config "./nextflow.config" setup { @@ -36,7 +34,6 @@ nextflow_process { when { process { """ - // TODO nf-core: define inputs of the process here. Example: input[0] = GBCMS.out.variant_file """ } @@ -46,14 +43,11 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot(process.out).match() } - //TODO nf-core: Add all required assertions to verify the test output. - // See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples. ) } } - // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix. test("sarscov2 - vcf - stub") { options "-stub" From 112ac67c9ac0e5366a85a6c3db7877fc9da75383 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Wed, 13 Aug 2025 23:53:24 -0400 Subject: [PATCH 07/37] add module to skipped nf-tests for conda --- .github/skip_nf_test.json | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/skip_nf_test.json b/.github/skip_nf_test.json index 4ea16616..25ca5dce 100644 --- a/.github/skip_nf_test.json +++ b/.github/skip_nf_test.json @@ -1,5 +1,6 @@ { "conda": [ + "modules/msk/custom/fingerprintvcfparser", "modules/msk/calculatenoise", "modules/msk/ppflagfixer", "modules/msk/facets", From e09936f8569542d0bcad713a832bfa5e930a3e33 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Wed, 13 Aug 2025 23:56:03 -0400 Subject: [PATCH 08/37] bugfix --- modules/msk/custom/fingerprintvcfparser/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/msk/custom/fingerprintvcfparser/main.nf b/modules/msk/custom/fingerprintvcfparser/main.nf index 2e1f4a4b..6a46b512 100644 --- a/modules/msk/custom/fingerprintvcfparser/main.nf +++ b/modules/msk/custom/fingerprintvcfparser/main.nf @@ -35,7 +35,7 @@ process CUSTOM_FINGERPRINTVCFPARSER { stub: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" """ echo $args From a8805ae4765c93276ac9e69ede0b858044734099 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Thu, 14 Aug 2025 19:51:39 -0400 Subject: [PATCH 09/37] add subworkflow for generating fingerprints with gbcms --- subworkflows/msk/fingerprint_gbcms/main.nf | 35 +++++++++++ subworkflows/msk/fingerprint_gbcms/meta.yml | 62 +++++++++++++++++++ .../msk/fingerprint_gbcms/tests/main.nf.test | 43 +++++++++++++ .../fingerprint_gbcms/tests/main.nf.test.snap | 39 ++++++++++++ 4 files changed, 179 insertions(+) create mode 100644 subworkflows/msk/fingerprint_gbcms/main.nf create mode 100644 subworkflows/msk/fingerprint_gbcms/meta.yml create mode 100644 subworkflows/msk/fingerprint_gbcms/tests/main.nf.test create mode 100644 subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap diff --git a/subworkflows/msk/fingerprint_gbcms/main.nf b/subworkflows/msk/fingerprint_gbcms/main.nf new file mode 100644 index 00000000..d121fd55 --- /dev/null +++ b/subworkflows/msk/fingerprint_gbcms/main.nf @@ -0,0 +1,35 @@ +include { GBCMS } from '../../../modules/msk/gbcms/main' +include { CUSTOM_FINGERPRINTVCFPARSER } from '../../../modules/msk/custom/fingerprintvcfparser/main' + +workflow FINGERPRINT_GBCMS { + + take: + ch_bam // channel: [ val(meta), [ bam ] ] + ch_bai // channel: [ val(meta), [ bai ] ] + ch_fp_vcf // channel: [ val(meta), [ vcf ] ] + ch_fasta // channel: [ fasta ] + ch_fastafai // channel: [ fastafai ] + + main: + + ch_versions = Channel.empty() + + GBCMS ( + ch_bam + .combine(ch_bai, by:[0]) + .combine(ch_fp_vcf.map{ if (it.size() > 1){ it[1] } else { it }}.first()) + .map{ meta, bam, bai, vcf -> [meta, bam, bai, vcf, meta.id + ".fp.vcf" ] }, + ch_fasta.first(), + ch_fastafai.first() + //ch_fasta.view().map{ if (it[0] instanceof Map){ it[1] } else { it }}.first(), + //ch_fastafai.view().map{ if (it[0] instanceof Map){ it[1] } else { it }}.first() + ) + ch_versions = ch_versions.mix(GBCMS.out.versions.first()) + + CUSTOM_FINGERPRINTVCFPARSER ( GBCMS.out.variant_file ) + ch_versions = ch_versions.mix(CUSTOM_FINGERPRINTVCFPARSER.out.versions.first()) + + emit: + fp_tsv = CUSTOM_FINGERPRINTVCFPARSER.out.tsv // channel: [ val(meta), tsv ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/msk/fingerprint_gbcms/meta.yml b/subworkflows/msk/fingerprint_gbcms/meta.yml new file mode 100644 index 00000000..f90038e5 --- /dev/null +++ b/subworkflows/msk/fingerprint_gbcms/meta.yml @@ -0,0 +1,62 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fingerprint_gbcms" +description: | + Get base counts for all fingerprinting sites from BAM/CRAM/SAM files using the GBCMS module, + and parse the resulting VCF files into standardized TSV format using a custom parser. +keywords: + - fingerprint + - fingerprinting + - loci + - vcf + - bam +components: + - gbcms + - custom/fingerprintvcfparser +input: + - ch_bam: + type: file + description: | + The input channel containing the BAM/CRAM/SAM files + Structure: [ val(meta), path(bam) ] + pattern: "*.{bam/cram/sam}" + - ch_bai: + type: file + description: | + The input channel containing the BAM index files (BAI/CSI) + Structure: [ val(meta), path(bai) ] # or path(csi) + pattern: "*.{bai/csi}" + - ch_fp_vcf: + type: file + description: | + Channel containing fingerprint VCF files + Structure: [ val(meta), path(vcf) ] + pattern: "*.vcf" + - ch_fasta: + type: file + description: | + Channel containing reference FASTA files + Structure: [ path(fasta) ] + pattern: "*.{fasta,fa}" + - ch_fastafai: + type: file + description: | + Channel containing reference FASTA index files + Structure: [ path(fasta.fai) ] + pattern: "*.{fasta,fa}.fai" +output: + - tsv: + type: file + description: | + Channel containing standardized fingerprint TSV files + Structure: [ val(meta), path(tsv) ] + pattern: "*.fp.tsv" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@anoronh4" +maintainers: + - "@anoronh4" diff --git a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test new file mode 100644 index 00000000..55de93ca --- /dev/null +++ b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test @@ -0,0 +1,43 @@ +nextflow_workflow { + + name "Test Subworkflow FINGERPRINT_GBCMS" + script "../main.nf" + workflow "FINGERPRINT_GBCMS" + + tag "subworkflows" + tag "subworkflows_msk" + tag "subworkflows/fingerprint_gbcms" + tag "gbcms" + tag "custom/fingerprintvcfparser" + + test("sarscov2 - bam") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', sample:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'test', sample:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + ]) + input[2] = Channel.of([ + [:], + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) + ]) + input[3] = Channel.of(file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)) + input[4] = Channel.of(file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } +} diff --git a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap new file mode 100644 index 00000000..9952b0bd --- /dev/null +++ b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap @@ -0,0 +1,39 @@ +{ + "sarscov2 - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "sample": "test" + }, + "test.fp.tsv:md5,9fa9a081f17ee52f03463c96d46a23aa" + ] + ], + "1": [ + "versions.yml:md5,41ff30ed71b1d19e95a6095b9ac3ca94", + "versions.yml:md5,4b4b3ad40aa1c2c3a002c0c347c385b8" + ], + "fp_tsv": [ + [ + { + "id": "test", + "sample": "test" + }, + "test.fp.tsv:md5,9fa9a081f17ee52f03463c96d46a23aa" + ] + ], + "versions": [ + "versions.yml:md5,41ff30ed71b1d19e95a6095b9ac3ca94", + "versions.yml:md5,4b4b3ad40aa1c2c3a002c0c347c385b8" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-14T19:50:44.771060618" + } +} \ No newline at end of file From c9cf10ffa2977bd2038fa8be3e80baf295d827be Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Thu, 14 Aug 2025 20:00:11 -0400 Subject: [PATCH 10/37] add subworkflow to skipped nf-tests for conda --- .github/skip_nf_test.json | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/skip_nf_test.json b/.github/skip_nf_test.json index 25ca5dce..2991ca79 100644 --- a/.github/skip_nf_test.json +++ b/.github/skip_nf_test.json @@ -29,6 +29,7 @@ "modules/msk/phylowgs/parsecnvs", "modules/msk/pvmaf/concat", "modules/msk/pvmaf/tagtraceback", + "subworkflows/msk/fingerprint_gbcms", "subworkflows/msk/genome_nexus" ], "docker": [ From e3a7bedd6b5b5933c04c481ff439a9a0e2a249c8 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Thu, 25 Sep 2025 17:51:43 -0400 Subject: [PATCH 11/37] add fingerprint contamination module --- .../fingerprintcontamination/environment.yml | 7 ++ .../custom/fingerprintcontamination/main.nf | 47 ++++++++ .../custom/fingerprintcontamination/meta.yml | 60 ++++++++++ .../usr/bin/calculate_contamination.py | 100 ++++++++++++++++ .../tests/main.nf.test | 108 +++++++++++++++++ .../tests/main.nf.test.snap | 72 +++++++++++ .../tests/nextflow.config | 10 ++ .../tests/stash_main.nf.test_stash | 113 ++++++++++++++++++ subworkflows/msk/fingerprint_gbcms/main.nf | 40 ++++++- subworkflows/msk/fingerprint_gbcms/meta.yml | 1 + 10 files changed, 552 insertions(+), 6 deletions(-) create mode 100644 modules/msk/custom/fingerprintcontamination/environment.yml create mode 100644 modules/msk/custom/fingerprintcontamination/main.nf create mode 100644 modules/msk/custom/fingerprintcontamination/meta.yml create mode 100755 modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py create mode 100644 modules/msk/custom/fingerprintcontamination/tests/main.nf.test create mode 100644 modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap create mode 100644 modules/msk/custom/fingerprintcontamination/tests/nextflow.config create mode 100644 modules/msk/custom/fingerprintcontamination/tests/stash_main.nf.test_stash diff --git a/modules/msk/custom/fingerprintcontamination/environment.yml b/modules/msk/custom/fingerprintcontamination/environment.yml new file mode 100644 index 00000000..21c00633 --- /dev/null +++ b/modules/msk/custom/fingerprintcontamination/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - numpy=2.3.3 + - pandas=2.3.2 diff --git a/modules/msk/custom/fingerprintcontamination/main.nf b/modules/msk/custom/fingerprintcontamination/main.nf new file mode 100644 index 00000000..bbdcdb51 --- /dev/null +++ b/modules/msk/custom/fingerprintcontamination/main.nf @@ -0,0 +1,47 @@ +process CUSTOM_FINGERPRINTCONTAMINATION { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + //'oras://community.wave.seqera.io/library/numpy_pandas:1f8cb70bfdb82865': + 'docker://community.wave.seqera.io/library/numpy_pandas:f27ed83387b3c038': + 'community.wave.seqera.io/library/numpy_pandas:f27ed83387b3c038' }" + + input: + tuple val(meta), path(fp_tumor), path(fp_normal) + + output: + tuple val(meta), path("*.contamination.tsv"), emit: contamination_tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + calculate_contamination.py \\ + -t ${fp_tumor} \\ + -n ${fp_normal ?: fp_tumor} \\ + -o ${prefix}.contamination.tsv \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + calculate_contamination.py: \$( calculate_contamination.py --version | rev | cut -f 1 -d " " | rev ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.contamination.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + calculate_contamination.py: \$( calculate_contamination.py --version | rev | cut -f 1 -d " " | rev ) + END_VERSIONS + """ +} diff --git a/modules/msk/custom/fingerprintcontamination/meta.yml b/modules/msk/custom/fingerprintcontamination/meta.yml new file mode 100644 index 00000000..4fde47a5 --- /dev/null +++ b/modules/msk/custom/fingerprintcontamination/meta.yml @@ -0,0 +1,60 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "custom_fingerprintcontamination" +description: "Calculate major and minor contamination from fingerprint tables" +version: "0.1.0" +keywords: + - fingerprint + - contamination + - qc +tools: + - "pandas": + description: "Python Data Analysis Library" + homepage: "https://pandas.pydata.org/" + documentation: "https://pandas.pydata.org/docs/" + - "numpy": + description: "Scientific computing library for Python" + homepage: "https://numpy.org/" + documentation: "https://numpy.org/doc/" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fp_tumor: + type: file + description: Fingerprint table file for tumor sample + pattern: "*.fp.tsv" + ontologies: + - edam: "http://edamontology.org/format_3750" # TSV + - fp_normal: + type: file + description: Fingerprint table file for normal sample + pattern: "*.fp.tsv" + ontologies: + - edam: "http://edamontology.org/format_3750" + +output: + - contamination_tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.contamination.tsv": + type: file + description: Contamination results table + pattern: "*.contamination.tsv" + ontologies: + - edam: "http://edamontology.org/format_3750" # TSV + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@anoronh4" +maintainers: + - "@anoronh4" diff --git a/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py b/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py new file mode 100755 index 00000000..dea92003 --- /dev/null +++ b/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python + + +""" +Calculates contamination from fingerprint table +""" + +__author__ = "Hanan Salim" +__email__ = "salimh@mskcc.org" +__contributors__ = "Anne Marie Noronha (noronhaa@mskcc.org)" +__version__ = "0.1.0" +__status__ = "Dev" + +import argparse +import pandas as pd +import numpy as np +import os +import sys + +def major_contamination(tumor, depth_filter): + tumor_filtered = get_coverage(tumor, depth_filter) + + homozygous = ['AA','CC','GG','TT','A','C','G','T'] + heterozygous = ~tumor_filtered['Genotype'].isin(homozygous) + + return sum(heterozygous)/tumor_filtered.shape[0] + +def get_coverage(file, depth_filter): + #print(file['Alleles'].str.split(' ', expand=True)) + file[['A1', 'A2']] = file['Alleles'].str.split(' ', expand=True) + + A1_count = list(file['A1'].str.split(':', expand=True)[1]) + A2_count = list(file['A2'].str.split(':', expand=True)[1]) + A1_int = list(map(int, A1_count)) + A2_int = list(map(int, A2_count)) + + file['coverage'] = list(map(lambda x, y: x + y, A1_int, A2_int)) + + filtered_data = file[file['coverage'] > depth_filter] + + return(filtered_data) + +def minor_contamination(normal, tumor, depth_filter): + homozygous_sites = normal.index[normal['MAF'] < .10] + + print(homozygous_sites) + + tumor_homozygous = tumor.loc[homozygous_sites] + print(tumor_homozygous) + tumor_homozygous_filtered = get_coverage(tumor_homozygous, depth_filter) + + return tumor_homozygous_filtered['MAF'].mean() + +def main(): + parser = argparse.ArgumentParser(prog=sys.argv[0], description='Calculate major and minor contamination') + + parser.add_argument('-t','--tumor', + required=True, + help='Tumor fingerprint table file') + + parser.add_argument('-n','--normal', + required=True, + help='Normal fingerprint table file') + + parser.add_argument('-o','--output', + required=True, + help='Output file for contamination results') + + parser.add_argument('-d','--depthfilter', + required=False, + default=20, + type=int, + help='Depth filter for coverage (default: 20)' + ) + + parser.add_argument('--version', + action='version', + version='%(prog)s ' + __version__ + ) + + args = parser.parse_args() + + fields = ['Position', 'Alleles', 'Genotype', 'MAF'] + + tumor = pd.read_csv(args.tumor, sep='\t',names=fields,header=0) + normal = pd.read_csv(args.normal, sep='\t',names=fields,header=0) + + major_contam = major_contamination(tumor, depth_filter=args.depthfilter) + minor_contam = minor_contamination(normal, tumor, depth_filter=args.depthfilter) + + with open(args.output,'w') as f: + f.write("Tumor\tNormal\tMajor_Contamination\tMinor_Contamination\n") + f.write("{}\t{}\t{:.4f}\t{:.4f}\n".format( + os.path.basename(args.tumor), + os.path.basename(args.normal), + major_contam, + minor_contam)) + +if __name__== "__main__": + main() diff --git a/modules/msk/custom/fingerprintcontamination/tests/main.nf.test b/modules/msk/custom/fingerprintcontamination/tests/main.nf.test new file mode 100644 index 00000000..bb89f9e0 --- /dev/null +++ b/modules/msk/custom/fingerprintcontamination/tests/main.nf.test @@ -0,0 +1,108 @@ +nextflow_process { + + name "Test Process CUSTOM_FINGERPRINTCONTAMINATION" + script "../main.nf" + process "CUSTOM_FINGERPRINTCONTAMINATION" + config "./nextflow.config" + + tag "modules" + tag "modules_msk" + tag "custom" + tag "custom/fingerprintcontamination" + tag "gbcms" + tag "custom/fingerprintvcfparser" + + test("sarscov2 - bam") { + setup { + run("GBCMS"){ + script "../../../gbcms/main.nf" + process { + """ + input[0] = [ + [ id:'test', sample:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + "variant_file.vcf" + ] + input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + """ + } + } + run("CUSTOM_FINGERPRINTVCFPARSER"){ + script "../../fingerprintvcfparser/main.nf" + process { + """ + input[0] = GBCMS.out.variant_file + """ + } + } + } + + when { + process { + """ + input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv.map{ meta, tsv -> [meta,tsv,[]]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam - stub") { + + options "-stub" + + setup { + run("GBCMS"){ + script "../../../gbcms/main.nf" + process { + """ + input[0] = [ + [ id:'test', sample:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + "variant_file.vcf" + ] + input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + """ + } + } + run("CUSTOM_FINGERPRINTVCFPARSER"){ + script "../../fingerprintvcfparser/main.nf" + process { + """ + input[0] = GBCMS.out.variant_file + """ + } + } + } + + when { + process { + """ + input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv.map{ meta, tsv -> [meta,tsv,[]]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap b/modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap new file mode 100644 index 00000000..7ceedf6b --- /dev/null +++ b/modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "sample": "test" + }, + "test.contamination.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,f1635e715bcdf39792aa9f7fc3cf4d84" + ], + "contamination_tsv": [ + [ + { + "id": "test", + "sample": "test" + }, + "test.contamination.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,f1635e715bcdf39792aa9f7fc3cf4d84" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-25T17:13:52.297869395" + }, + "sarscov2 - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "sample": "test" + }, + "test.contamination.tsv:md5,5b533c60b8eff1f4d2c5fe58a8262303" + ] + ], + "1": [ + "versions.yml:md5,f1635e715bcdf39792aa9f7fc3cf4d84" + ], + "contamination_tsv": [ + [ + { + "id": "test", + "sample": "test" + }, + "test.contamination.tsv:md5,5b533c60b8eff1f4d2c5fe58a8262303" + ] + ], + "versions": [ + "versions.yml:md5,f1635e715bcdf39792aa9f7fc3cf4d84" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-25T17:13:42.805178656" + } +} \ No newline at end of file diff --git a/modules/msk/custom/fingerprintcontamination/tests/nextflow.config b/modules/msk/custom/fingerprintcontamination/tests/nextflow.config new file mode 100644 index 00000000..fbd2b2d0 --- /dev/null +++ b/modules/msk/custom/fingerprintcontamination/tests/nextflow.config @@ -0,0 +1,10 @@ +process { + + withName: 'CUSTOM_FINGERPRINTCONTAMINATION' { + ext.args = "-d 0" + } + + withName: 'CUSTOM_FINGERPRINTVCFPARSER' { + ext.args = "-d 0" + } +} diff --git a/modules/msk/custom/fingerprintcontamination/tests/stash_main.nf.test_stash b/modules/msk/custom/fingerprintcontamination/tests/stash_main.nf.test_stash new file mode 100644 index 00000000..ba4496a5 --- /dev/null +++ b/modules/msk/custom/fingerprintcontamination/tests/stash_main.nf.test_stash @@ -0,0 +1,113 @@ +nextflow_process { + + name "Test Process CUSTOM_FINGERPRINTCONTAMINATION" + script "../main.nf" + process "CUSTOM_FINGERPRINTCONTAMINATION" + config "./nextflow.config" + + tag "modules" + tag "modules_msk" + tag "custom" + tag "custom/fingerprintcontamination" + + test("homo sapiens - chr 22 bam") { + setup { + run("GBCMS"){ + script "../../../gbcms/main.nf" + + params{ + input = "NA12878_GIAB.chr22.vcf" + } + + process { + """ + input[0] = [ + [ id:'test', sample:'test' ], // meta map + file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/bam/NA12878.chr22.bam", checkIfExists:true), + file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/bam/NA12878.chr22.bam.bai", checkIfExists:true), + file("$baseDir/modules/msk/custom/fingerprintcontamination/tests/NA12878_GIAB.chr22.vcf", checkIfExists:true), + "variant_file.vcf" + ] + input[1] = file(params.test_data_mskcc['calculate_noise']['test_chr22_fa'], checkIfExists: true) + input[2] = file(params.test_data_mskcc['calculate_noise']['test_chr22_fa_fai'], checkIfExists: true) + //input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + //input[2] = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + """ + } + } + run("CUSTOM_FINGERPRINTVCFPARSER"){ + script "../../fingerprintvcfparser/main.nf" + process { + """ + input[0] = GBCMS.out.variant_file + """ + } + } + } + + when { + process { + """ + input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv.map{ meta, tsv -> [meta,tsv,[]]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam - stub") { + + options "-stub" + + setup { + run("GBCMS"){ + script "../../../gbcms/main.nf" + process { + """ + input[0] = [ + [ id:'test', sample:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + "variant_file.vcf" + ] + input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + """ + } + } + run("CUSTOM_FINGERPRINTVCFPARSER"){ + script "../../fingerprintvcfparser/main.nf" + process { + """ + input[0] = GBCMS.out.variant_file + """ + } + } + } + + when { + process { + """ + input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv.map{ meta, tsv -> [meta,tsv,[]]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/subworkflows/msk/fingerprint_gbcms/main.nf b/subworkflows/msk/fingerprint_gbcms/main.nf index d121fd55..823225a3 100644 --- a/subworkflows/msk/fingerprint_gbcms/main.nf +++ b/subworkflows/msk/fingerprint_gbcms/main.nf @@ -1,12 +1,20 @@ -include { GBCMS } from '../../../modules/msk/gbcms/main' -include { CUSTOM_FINGERPRINTVCFPARSER } from '../../../modules/msk/custom/fingerprintvcfparser/main' +include { GBCMS } from '../../../modules/msk/gbcms/main' +include { CUSTOM_FINGERPRINTVCFPARSER } from '../../../modules/msk/custom/fingerprintvcfparser/main' +include { CUSTOM_FINGERPRINTCONTAMINATION } from '../../../modules/msk/custom/fingerprintcontamination/main' workflow FINGERPRINT_GBCMS { take: + //ch_bam // channel: [ val(meta), [ bam ] ] + //ch_bai // channel: [ val(meta), [ bai ] ] + //ch_fp_loci_vcf // channel: [ val(meta), [ vcf ] ] + //ch_fasta // channel: [ fasta ] + //ch_fastafai // channel: [ fastafai ] ch_bam // channel: [ val(meta), [ bam ] ] ch_bai // channel: [ val(meta), [ bai ] ] - ch_fp_vcf // channel: [ val(meta), [ vcf ] ] + ch_fp_tsv // channel: [ val(meta), [ tsv ] ] + ch_fp_loci_vcf // channel: [ val(meta), [ vcf ] ] + ch_liftover_loci_mapping // channel: [ liftover_loci_mapping ] ch_fasta // channel: [ fasta ] ch_fastafai // channel: [ fastafai ] @@ -17,7 +25,7 @@ workflow FINGERPRINT_GBCMS { GBCMS ( ch_bam .combine(ch_bai, by:[0]) - .combine(ch_fp_vcf.map{ if (it.size() > 1){ it[1] } else { it }}.first()) + .combine(ch_fp_loci_vcf.map{ if (it.size() > 1){ it[1] } else { it }}.first()) .map{ meta, bam, bai, vcf -> [meta, bam, bai, vcf, meta.id + ".fp.vcf" ] }, ch_fasta.first(), ch_fastafai.first() @@ -29,7 +37,27 @@ workflow FINGERPRINT_GBCMS { CUSTOM_FINGERPRINTVCFPARSER ( GBCMS.out.variant_file ) ch_versions = ch_versions.mix(CUSTOM_FINGERPRINTVCFPARSER.out.versions.first()) + all_fps = CUSTOM_FINGERPRINTVCFPARSER.out.tsv.mix(ch_fp_tsv) + + paired_fps = all_fps + .filter{ meta, tsv -> meta.case_id != null && meta.control_id != null && meta.id == meta.case_id } + .combine(all_fps.out.tsv) + .filter{ meta1, fp1, meta2, fp2 -> + meta1.control_id == meta2.id + }.map{ meta1, fp1, meta2, fp2 -> + [ meta1, fp1, fp2] + } + + unpaired_fps = all_fps + .filter{ meta, tsv -> meta.id != meta.case_id || meta.control_id == null } + .map{ meta, tsv -> [ meta, tsv, null ] } + + CUSTOM_FINGERPRINTCONTAMINATION ( paired_fps.mix(unpaired_fps) ) + ch_versions = ch_versions.mix(CUSTOM_FINGERPRINTCONTAMINATION.out.versions.first()) + + emit: - fp_tsv = CUSTOM_FINGERPRINTVCFPARSER.out.tsv // channel: [ val(meta), tsv ] - versions = ch_versions // channel: [ versions.yml ] + fp_tsv = CUSTOM_FINGERPRINTVCFPARSER.out.tsv // channel: [ val(meta), tsv ] + contamination_tsv = CUSTOM_FINGERPRINTCONTAMINATION.out.contamination_tsv // channel: [ val(meta), contamination_tsv ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/msk/fingerprint_gbcms/meta.yml b/subworkflows/msk/fingerprint_gbcms/meta.yml index f90038e5..c0002dbb 100644 --- a/subworkflows/msk/fingerprint_gbcms/meta.yml +++ b/subworkflows/msk/fingerprint_gbcms/meta.yml @@ -9,6 +9,7 @@ keywords: - loci - vcf - bam + - qc components: - gbcms - custom/fingerprintvcfparser From 95548d1980cbb95f3e993bad974bc5c5ea01f735 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Thu, 25 Sep 2025 23:50:37 -0400 Subject: [PATCH 12/37] add custom/fingerprintcombine module --- .../custom/fingerprintcombine/environment.yml | 11 ++ modules/msk/custom/fingerprintcombine/main.nf | 66 +++++++++++ .../msk/custom/fingerprintcombine/meta.yml | 55 +++++++++ .../resources/usr/bin/complete_FP_table.R | 108 ++++++++++++++++++ .../fingerprintcombine/tests/loci_mapping.tsv | 10 ++ .../fingerprintcombine/tests/main.nf.test | 102 +++++++++++++++++ .../tests/main.nf.test.snap | 48 ++++++++ .../fingerprintcombine/tests/nextflow.config | 5 + 8 files changed, 405 insertions(+) create mode 100644 modules/msk/custom/fingerprintcombine/environment.yml create mode 100644 modules/msk/custom/fingerprintcombine/main.nf create mode 100644 modules/msk/custom/fingerprintcombine/meta.yml create mode 100755 modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R create mode 100644 modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv create mode 100644 modules/msk/custom/fingerprintcombine/tests/main.nf.test create mode 100644 modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap create mode 100644 modules/msk/custom/fingerprintcombine/tests/nextflow.config diff --git a/modules/msk/custom/fingerprintcombine/environment.yml b/modules/msk/custom/fingerprintcombine/environment.yml new file mode 100644 index 00000000..8a3b7591 --- /dev/null +++ b/modules/msk/custom/fingerprintcombine/environment.yml @@ -0,0 +1,11 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: +- conda-forge +- bioconda +dependencies: +- conda-forge::r-argparse=2.2.5 +- conda-forge::r-data.table=1.17.8 +- conda-forge::r-dplyr=1.1.4 +- conda-forge::r-plyr=1.8.9 +- conda-forge::r-tidyverse=2.0.0 diff --git a/modules/msk/custom/fingerprintcombine/main.nf b/modules/msk/custom/fingerprintcombine/main.nf new file mode 100644 index 00000000..4d7b5e6d --- /dev/null +++ b/modules/msk/custom/fingerprintcombine/main.nf @@ -0,0 +1,66 @@ +process CUSTOM_FINGERPRINTCOMBINE { + tag '$bam' + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://community.wave.seqera.io/library/r-argparse_r-data.table_r-dplyr_r-plyr_r-tidyverse:8c0daffb3624cb66': + 'community.wave.seqera.io/library/r-argparse_r-data.table_r-dplyr_r-plyr_r-tidyverse:8c0daffb3624cb66' }" + //' oras://community.wave.seqera.io/library/r-argparse_r-data.table_r-dplyr_r-plyr_r-tidyverse:d96a65055f79744c': + + + input: + tuple path(fp_tsv), // list of paths to fingerprint TSV files + val(sample), // list of sample identifiers, one per TSV file, in the same order + val(genome_build) // list of genome builds, one per TSV file, in the same order + path(liftover_loci_mapping) + + output: + path "*DPfilter_ALL_FP.txt", emit: combined_fp_tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + declare -a fp_tsv_list + declare -a sample_list + declare -a genome_build_list + fp_tsv_list=(${fp_tsv.join(' ')}) + sample_list=(${sample.join(' ')}) + genome_build_list=(${genome_build.join(' ')}) + echo -e "sample_id\tgenome_build\tfp_tsv" > input.tsv + for i in \$(seq 0 1 \$((\${#fp_tsv_list[@]}-1)) ) ; do + fp_tsv=\${fp_tsv_list[i]} + sample=\${sample_list[i]} + genome=\${genome_build_list[i]} + echo -e "\$sample\t\$genome\t\$fp_tsv" + done >> input.tsv + + complete_FP_table.R \\ + -i input.tsv \\ + -l $liftover_loci_mapping \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + complete_FP_table.R: 0.1.0 + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + + """ + echo $args + + touch XDPfilter_ALL_FP.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + complete_FP_table.R: 0.1.0 + END_VERSIONS + """ +} diff --git a/modules/msk/custom/fingerprintcombine/meta.yml b/modules/msk/custom/fingerprintcombine/meta.yml new file mode 100644 index 00000000..52b4ed5a --- /dev/null +++ b/modules/msk/custom/fingerprintcombine/meta.yml @@ -0,0 +1,55 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +# # TODO nf-core: Add a description of the module and list keywords +name: "custom_fingerprintcombine" +description: | + A module to combine multiple fingerprint TSV files into a single comprehensive + table, with optional liftover of loci coordinates. +keywords: +- fingerprint +- qc +- loci +- tsv +- correlation +tools: +## TODO nf-core: Add a description and other details for the software below +- "custom": + description: "A custom R script to combine fingerprint TSV files" + homepage: "https://github.com/mskcc-omics-workflows/modules/tree/main/modules/msk/custom/fingerprintcombine/meta.yml" + +input: + - - fp_tsv: + type: file + description: | + Fingerprint TSV files to be combined. + Structure: [ val(sample), val(genome_build), path(fp_tsv) ] + - sample: + type: string + description: Sample identifier corresponding to each fingerprint TSV file. + - genome_build: + type: string + description: Genome build (e.g., hg19, hg38) corresponding to each fingerprint TSV file. + - liftover_loci_mapping: + type: file + description: | + A TSV file mapping original loci to liftover loci. + Format: original_chr, original_pos, liftover_chr, liftover_pos + pattern: "*.tsv" +output: + combined_fp_tsv: + - "*DPfilter_ALL_FP.txt": + type: file + description: Wide table combining all input fingerprint TSV files. + pattern: '*DPfilter_ALL_FP.txt' + ontologies: + - edam: http://edamontology.org/format_3750 # TSV + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: versions.yml + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: +- "@anoronh4" +maintainers: +- "@anoronh4" diff --git a/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R b/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R new file mode 100755 index 00000000..3265f903 --- /dev/null +++ b/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R @@ -0,0 +1,108 @@ +#! /usr/bin/env Rscript + +#------------------------------------------------------------------------------- +# Script: complete_FP_table.R +# Author: Erika Gedvilaite +# Date: 2025-09-23 +# Version: 0.1.0 +# +# Description: This script takes in standard fingerprint tables and combines +# them into a single, wide table for downstream plotting and analysis. +# +# Annotation: +# - Input table should have three columns: sample_id, genome_build, fp_tsv +# - Genome build should be either "hg19" or "hg38" or "GRCh37" or "GRCh38" +# (case insensitive) +# +#------------------------------------------------------------------------------- + + +rm(list=ls()) + +library(argparse, quietly = T) +library(plyr, quietly = T) +library(dplyr, quietly = T) +library(data.table, quietly = T) +library(tidyverse, quietly = T) + +`%notin%` <- Negate(`%in%`) +`%notlike%` <- Negate(`%like%`) + +parser = ArgumentParser(description = 'Generate FP tables for plotting') +parser$add_argument('-i', '--input_table', required = TRUE, + help = 'Input table with paths to individual fingerprint TSV files, sample ids, and genome build') +parser$add_argument('-o', '--analysis_folder', required = FALSE, default = ".", + help = 'Output folder') +parser$add_argument('-l', '--loci_mapper', required = TRUE, + help = 'Loci mapper file') +parser$add_argument('-d', '--depth_filter', required = FALSE, default = 20, + help = 'Depth filter to apply to individual fingerprint TSV files (default: 20)') +args = parser$parse_args() + + + +message("Reading in Liftover file") + +hg19_hg38_mapper = fread(args$loci_mapper,header = T) +hg19_hg38_mapper$Loci_hg19 = paste(hg19_hg38_mapper$GRCH37_CHROM,hg19_hg38_mapper$GRCH37_POS,sep=":") +hg19_hg38_mapper$Loci_hg38 = paste(hg19_hg38_mapper$GRCH38_CHROM,hg19_hg38_mapper$GRCH38_POS,sep=":") +hg19_hg38_mapper = hg19_hg38_mapper %>% select(Loci_hg19, Loci_hg38) %>% unique() + +message("Loading Samples") +input_table = fread(args$input_table, header = T) +for (i in 1:nrow(input_table)){ + sample = input_table$sample_id[i] + genome_build = input_table$genome_build[i] + print(genome_build) + if (tolower(genome_build) %notin% c("hg19","grch37","hg38","grch38")){ + stop(paste0("Genome build not recognized: ", genome_build, ". Must be in the following list: hg19, hg38, grch37, grch38 (case will be ignored).")) + } + file = input_table$fp_tsv[i] + if (!file.exists(file)){ + stop(paste0("File does not exist: ", input_table$fp_tsv[i])) + } + temp_dataset <- fread(file, header = T, sep="\t") + colnames(temp_dataset) = c("Locus", "Count", "Genotype","VAF") + temp_dataset = separate(temp_dataset, Count, into = c(NA,'DP1',NA,'DP2'), remove = F) + temp_dataset$DP2[is.na(temp_dataset$DP2)==T] <- 0 + temp_dataset$DP = as.numeric(temp_dataset$DP1) + as.numeric(temp_dataset$DP2) + temp_dataset = temp_dataset[temp_dataset$DP >= args$depth_filter,] ## keeping loci >= 20 dp by default + temp_dataset$VAF[is.na(temp_dataset$VAF)==T] <- 0 + temp_dataset$Sample = sample #only loci with DP >= depth filter will have Sample info + temp_dataset = temp_dataset %>% select("Locus","Genotype","Sample","VAF") + temp_dataset$Locus = str_replace(temp_dataset$Locus,"chr","") + + if (tolower(genome_build) %in% c("hg19","grch37")){ + temp_dataset = merge(hg19_hg38_mapper, temp_dataset, by.x = "Loci_hg19", by.y = "Locus", all.x = T) + temp_dataset$VAF[is.na(temp_dataset$VAF)==T] <- 0 + } else if (tolower(genome_build) %in% c("hg38","grch38")){ + temp_dataset = merge(hg19_hg38_mapper, temp_dataset, by.x = "Loci_hg38", by.y = "Locus", all.x = T) + temp_dataset$VAF[is.na(temp_dataset$VAF)==T] <- 0 + } + + if (!exists("all_gbcm")){ + all_gbcm = temp_dataset + } else { + all_gbcm = rbind(all_gbcm, temp_dataset) + } +} +all_gbcm = all_gbcm[is.na(all_gbcm$Sample)==F,] # filters out loci that don't have Sample info (i.e. loci not passing DP filter) +all_gbcm$VAF = round(as.numeric(all_gbcm$VAF), 5) + +wide_all_gbcm = all_gbcm %>% pivot_wider(names_from = Sample, values_from = c(Genotype, VAF)) + +message("Creating final GBCM file") + +all_fp_gbcm_final = merge(hg19_hg38_mapper, wide_all_gbcm,all.x = T) + +if (!dir.exists(args$analysis_folder)) { + dir.create(args$analysis_folder, recursive = TRUE) +} else { + print(paste("Directory already exists:", args$analysis_folder)) +} + +message(paste("Output file: ", args$analysis_folder,"/",args$depth_filter,"DPfilter_ALL_FP.txt", sep="")) + +write.table(all_fp_gbcm_final, file = paste(args$analysis_folder,"/",args$depth_filter,"DPfilter_ALL_FP.txt", sep=""), append = F, sep = "\t", row.names = F, quote = F) + +message("FP file completed") diff --git a/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv b/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv new file mode 100644 index 00000000..7592a2e3 --- /dev/null +++ b/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv @@ -0,0 +1,10 @@ +GRCH37_CHROM GRCH37_POS GRCH38_CHROM GRCH38_POS +MT192765.1 197 MT192765.1 199 +MT192765.1 4788 MT192765.1 4900 +MT192765.1 8236 MT192765.1 8257 +MT192765.1 10506 MT192765.1 10528 +MT192765.1 11037 MT192765.1 11059 +MT192765.1 15009 MT192765.1 15500 +MT192765.1 18807 MT192765.1 18929 +MT192765.1 23813 MT192765.1 24835 +MT192765.1 24103 MT192765.1 25125 diff --git a/modules/msk/custom/fingerprintcombine/tests/main.nf.test b/modules/msk/custom/fingerprintcombine/tests/main.nf.test new file mode 100644 index 00000000..11176e34 --- /dev/null +++ b/modules/msk/custom/fingerprintcombine/tests/main.nf.test @@ -0,0 +1,102 @@ +// nf-core modules test custom/fingerprintcombine +nextflow_process { + + name "Test Process CUSTOM_FINGERPRINTCOMBINE" + script "../main.nf" + process "CUSTOM_FINGERPRINTCOMBINE" + config "./nextflow.config" + + tag "modules" + tag "modules_msk" + tag "custom" + tag "custom/fingerprintcombine" + tag "gbcms" + tag "custom/fingerprintvcfparser" + + test("sarscov2 - bam") { + + setup { + run("GBCMS"){ + script "../../../gbcms/main.nf" + process { + """ + input[0] = Channel.of( + [ + [ id:'test', sample:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + "variant_file.vcf" + ], + [ + [ id:'test2', sample:'test2' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + "variant_file.vcf" + ], + ) + input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + """ + } + } + run("CUSTOM_FINGERPRINTVCFPARSER"){ + script "../../fingerprintvcfparser/main.nf" + process { + """ + input[0] = GBCMS.out.variant_file + """ + } + } + } + + when { + process { + """ + input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv + .map{ meta, tsv -> ["placeholder",tsv, meta.id, "hg19"] } + .groupTuple(by:[0]) + .map{ placeholder, tsv, sampleid, genome -> [tsv, sampleid, genome] } + input[1] = file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true)], + ["testsample"], + ["hg19"] + ] + input[1] = file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap b/modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap new file mode 100644 index 00000000..e8576d2f --- /dev/null +++ b/modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap @@ -0,0 +1,48 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + { + "0": [ + "XDPfilter_ALL_FP.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "1": [ + "versions.yml:md5,3bd40a0fd11a907f31110dd113fd88c2" + ], + "combined_fp_tsv": [ + "XDPfilter_ALL_FP.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,3bd40a0fd11a907f31110dd113fd88c2" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-25T22:47:09.499353594" + }, + "sarscov2 - bam": { + "content": [ + { + "0": [ + "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" + ], + "1": [ + "versions.yml:md5,3bd40a0fd11a907f31110dd113fd88c2" + ], + "combined_fp_tsv": [ + "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" + ], + "versions": [ + "versions.yml:md5,3bd40a0fd11a907f31110dd113fd88c2" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-25T22:47:03.219089934" + } +} \ No newline at end of file diff --git a/modules/msk/custom/fingerprintcombine/tests/nextflow.config b/modules/msk/custom/fingerprintcombine/tests/nextflow.config new file mode 100644 index 00000000..583ce385 --- /dev/null +++ b/modules/msk/custom/fingerprintcombine/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'CUSTOM_FINGERPRINTCOMBINE' { + ext.args = "-d 0" + } +} From b6e5229b2b74f50ce2a162e46708a5d7ada8bdd6 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Thu, 25 Sep 2025 23:55:57 -0400 Subject: [PATCH 13/37] add subworkflow fingerprint_gbcms_batch --- .../msk/fingerprint_gbcms_batch/main.nf | 27 +++++++ .../msk/fingerprint_gbcms_batch/meta.yml | 41 +++++++++++ .../tests/main.nf.test | 72 +++++++++++++++++++ .../tests/main.nf.test.snap | 25 +++++++ .../tests/nextflow.config | 5 ++ 5 files changed, 170 insertions(+) create mode 100644 subworkflows/msk/fingerprint_gbcms_batch/main.nf create mode 100644 subworkflows/msk/fingerprint_gbcms_batch/meta.yml create mode 100644 subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test create mode 100644 subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap create mode 100644 subworkflows/msk/fingerprint_gbcms_batch/tests/nextflow.config diff --git a/subworkflows/msk/fingerprint_gbcms_batch/main.nf b/subworkflows/msk/fingerprint_gbcms_batch/main.nf new file mode 100644 index 00000000..98e227b7 --- /dev/null +++ b/subworkflows/msk/fingerprint_gbcms_batch/main.nf @@ -0,0 +1,27 @@ + +include { CUSTOM_FINGERPRINTCOMBINE } from '../../../modules/msk/custom/fingerprintcombine/main' + +workflow FINGERPRINT_GBCMS_BATCH { + + take: + ch_fp // channel: [ val(meta), [ bam ] ] + ch_liftover_loci_mapping // channel: [ liftover_loci_mapping ] + + main: + + ch_versions = Channel.empty() + + + CUSTOM_FINGERPRINTCOMBINE( + ch_fp + .map{ meta, tsv -> ["placeholder",tsv, meta.id, "hg19"] } + .groupTuple(by:[0]) + .map{ placeholder, tsv, sampleid, genome -> [tsv, sampleid, genome] }, + ch_liftover_loci_mapping.first() + ) + ch_versions = ch_versions.mix(CUSTOM_FINGERPRINTCOMBINE.out.versions.first()) + + emit: + combined_fp_tsv = CUSTOM_FINGERPRINTCOMBINE.out.combined_fp_tsv // channel: [ val(meta), [ bam ] ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/msk/fingerprint_gbcms_batch/meta.yml b/subworkflows/msk/fingerprint_gbcms_batch/meta.yml new file mode 100644 index 00000000..ca573350 --- /dev/null +++ b/subworkflows/msk/fingerprint_gbcms_batch/meta.yml @@ -0,0 +1,41 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fingerprint_gbcms_batch" +description: "Subworkflow to combine and compare Fingerprint files from different samples" +keywords: + - fingerprint + - qc + - liftover + - batch + - pool +components: + - modules/msk/custom/fingerprintcombine +input: + - ch_fp: + type: file + description: | + The input channel containing one fingerprint file per sample + Structure: [ val(meta), path(fp_tsv) ] + pattern: "*.fp.tsv" + - ch_liftover_loci_mapping: + type: file + description: | + The input channel containing the loci mapping file for liftover + Structure: [ path(loci_mapping.tsv) ] + pattern: "*.tsv" +output: + - combined_fp_tsv: + type: file + description: | + Channel containing combined fingerprint TSV file + Structure: [ path(combined_fp_tsv) ] + pattern: "*DPfilter_ALL_FP.txt" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@anoronh4" +maintainers: + - "@anoronh4" diff --git a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test new file mode 100644 index 00000000..bfdd825a --- /dev/null +++ b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test @@ -0,0 +1,72 @@ +// nf-core subworkflows test fingerprint_gbcms_batch +nextflow_workflow { + + name "Test Subworkflow FINGERPRINT_GBCMS_BATCH" + script "../main.nf" + config "./nextflow.config" + workflow "FINGERPRINT_GBCMS_BATCH" + + tag "subworkflows" + tag "subworkflows_msk" + tag "subworkflows/fingerprint_gbcms_batch" + tag "gbcms" + tag "custom/fingerprintvcfparser" + tag "custom/fingerprintcombine" + + + test("sarscov2 - bam - single_end") { + + setup { + run("GBCMS"){ + script "../../../../modules/msk/gbcms/main.nf" + process { + """ + input[0] = Channel.of( + [ + [ id:'test', sample:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + "variant_file.vcf" + ], + [ + [ id:'test2', sample:'test2' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + "variant_file.vcf" + ], + ) + input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + """ + } + } + run("CUSTOM_FINGERPRINTVCFPARSER"){ + script "../../../../modules/msk/custom/fingerprintvcfparser/main.nf" + process { + """ + input[0] = GBCMS.out.variant_file + """ + } + } + } + + when { + workflow { + """ + input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv + input[1] = [file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true)] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + + } +} diff --git a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap new file mode 100644 index 00000000..7611bc83 --- /dev/null +++ b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap @@ -0,0 +1,25 @@ +{ + "sarscov2 - bam - single_end": { + "content": [ + { + "0": [ + "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" + ], + "1": [ + "versions.yml:md5,bc54e025756d97cd9b14d51a3c9e3667" + ], + "combined_fp_tsv": [ + "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" + ], + "versions": [ + "versions.yml:md5,bc54e025756d97cd9b14d51a3c9e3667" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-25T23:49:26.050835746" + } +} \ No newline at end of file diff --git a/subworkflows/msk/fingerprint_gbcms_batch/tests/nextflow.config b/subworkflows/msk/fingerprint_gbcms_batch/tests/nextflow.config new file mode 100644 index 00000000..583ce385 --- /dev/null +++ b/subworkflows/msk/fingerprint_gbcms_batch/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'CUSTOM_FINGERPRINTCOMBINE' { + ext.args = "-d 0" + } +} From 257e8f9f134ff3f5ff42c7c592031d134ece774f Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Fri, 26 Sep 2025 00:27:27 -0400 Subject: [PATCH 14/37] update subworkflow to designate a genome to each sample fingerprint file --- subworkflows/msk/fingerprint_gbcms_batch/main.nf | 3 ++- subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/subworkflows/msk/fingerprint_gbcms_batch/main.nf b/subworkflows/msk/fingerprint_gbcms_batch/main.nf index 98e227b7..ee16fcd6 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/main.nf +++ b/subworkflows/msk/fingerprint_gbcms_batch/main.nf @@ -6,6 +6,7 @@ workflow FINGERPRINT_GBCMS_BATCH { take: ch_fp // channel: [ val(meta), [ bam ] ] ch_liftover_loci_mapping // channel: [ liftover_loci_mapping ] + default_genome main: @@ -14,7 +15,7 @@ workflow FINGERPRINT_GBCMS_BATCH { CUSTOM_FINGERPRINTCOMBINE( ch_fp - .map{ meta, tsv -> ["placeholder",tsv, meta.id, "hg19"] } + .map{ meta, tsv -> ["placeholder", tsv, meta.id, meta.genome ?: default_genome ] } .groupTuple(by:[0]) .map{ placeholder, tsv, sampleid, genome -> [tsv, sampleid, genome] }, ch_liftover_loci_mapping.first() diff --git a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test index bfdd825a..8500dcfd 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test +++ b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test @@ -57,6 +57,7 @@ nextflow_workflow { """ input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv input[1] = [file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true)] + input[2] = "hg19" """ } } From 33c6cbda5ede1535a7249c1713e071c79e9cf81a Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Fri, 26 Sep 2025 01:29:19 -0400 Subject: [PATCH 15/37] remove tag attribute on custom/fingerprintcombine --- modules/msk/custom/fingerprintcombine/main.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/msk/custom/fingerprintcombine/main.nf b/modules/msk/custom/fingerprintcombine/main.nf index 4d7b5e6d..be6b873d 100644 --- a/modules/msk/custom/fingerprintcombine/main.nf +++ b/modules/msk/custom/fingerprintcombine/main.nf @@ -1,5 +1,4 @@ process CUSTOM_FINGERPRINTCOMBINE { - tag '$bam' label 'process_single' conda "${moduleDir}/environment.yml" From 404b8ef8f38597f47f272eed4ddb8937ab3476a0 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Fri, 26 Sep 2025 01:31:16 -0400 Subject: [PATCH 16/37] update fingerprint_gbcms subworkflow, including fingerprint_gbcms_batch subworkflow --- subworkflows/msk/fingerprint_gbcms/main.nf | 38 +++++++---- .../msk/fingerprint_gbcms/tests/main.nf.test | 43 ++++++++---- .../fingerprint_gbcms/tests/main.nf.test.snap | 66 +++++++++++++++++-- .../fingerprint_gbcms/tests/nextflow.config | 13 ++++ 4 files changed, 130 insertions(+), 30 deletions(-) create mode 100644 subworkflows/msk/fingerprint_gbcms/tests/nextflow.config diff --git a/subworkflows/msk/fingerprint_gbcms/main.nf b/subworkflows/msk/fingerprint_gbcms/main.nf index 823225a3..5f67f594 100644 --- a/subworkflows/msk/fingerprint_gbcms/main.nf +++ b/subworkflows/msk/fingerprint_gbcms/main.nf @@ -1,15 +1,11 @@ include { GBCMS } from '../../../modules/msk/gbcms/main' include { CUSTOM_FINGERPRINTVCFPARSER } from '../../../modules/msk/custom/fingerprintvcfparser/main' include { CUSTOM_FINGERPRINTCONTAMINATION } from '../../../modules/msk/custom/fingerprintcontamination/main' +include { FINGERPRINT_GBCMS_BATCH } from '../fingerprint_gbcms_batch/main' workflow FINGERPRINT_GBCMS { take: - //ch_bam // channel: [ val(meta), [ bam ] ] - //ch_bai // channel: [ val(meta), [ bai ] ] - //ch_fp_loci_vcf // channel: [ val(meta), [ vcf ] ] - //ch_fasta // channel: [ fasta ] - //ch_fastafai // channel: [ fastafai ] ch_bam // channel: [ val(meta), [ bam ] ] ch_bai // channel: [ val(meta), [ bai ] ] ch_fp_tsv // channel: [ val(meta), [ tsv ] ] @@ -17,16 +13,21 @@ workflow FINGERPRINT_GBCMS { ch_liftover_loci_mapping // channel: [ liftover_loci_mapping ] ch_fasta // channel: [ fasta ] ch_fastafai // channel: [ fastafai ] + default_genome // channel: [ genome ] + run_correlation main: ch_versions = Channel.empty() - GBCMS ( + println ch_fp_loci_vcf.getClass() + println ch_fasta.getClass() + + GBCMS( ch_bam .combine(ch_bai, by:[0]) - .combine(ch_fp_loci_vcf.map{ if (it.size() > 1){ it[1] } else { it }}.first()) - .map{ meta, bam, bai, vcf -> [meta, bam, bai, vcf, meta.id + ".fp.vcf" ] }, + .combine(ch_fp_loci_vcf.map{ if ( [it].flatten().size() > 1){ it[1] } else { it }}.first()) + .map{ meta, bam, bai, vcf -> [ meta, bam, bai, vcf, meta.id + ".fp.vcf" ] }.view(), ch_fasta.first(), ch_fastafai.first() //ch_fasta.view().map{ if (it[0] instanceof Map){ it[1] } else { it }}.first(), @@ -34,14 +35,17 @@ workflow FINGERPRINT_GBCMS { ) ch_versions = ch_versions.mix(GBCMS.out.versions.first()) + + CUSTOM_FINGERPRINTVCFPARSER ( GBCMS.out.variant_file ) ch_versions = ch_versions.mix(CUSTOM_FINGERPRINTVCFPARSER.out.versions.first()) all_fps = CUSTOM_FINGERPRINTVCFPARSER.out.tsv.mix(ch_fp_tsv) + paired_fps = all_fps .filter{ meta, tsv -> meta.case_id != null && meta.control_id != null && meta.id == meta.case_id } - .combine(all_fps.out.tsv) + .combine(all_fps) .filter{ meta1, fp1, meta2, fp2 -> meta1.control_id == meta2.id }.map{ meta1, fp1, meta2, fp2 -> @@ -50,14 +54,26 @@ workflow FINGERPRINT_GBCMS { unpaired_fps = all_fps .filter{ meta, tsv -> meta.id != meta.case_id || meta.control_id == null } - .map{ meta, tsv -> [ meta, tsv, null ] } + .map{ meta, tsv -> [ meta, tsv, [] ] } - CUSTOM_FINGERPRINTCONTAMINATION ( paired_fps.mix(unpaired_fps) ) + CUSTOM_FINGERPRINTCONTAMINATION ( paired_fps.mix(unpaired_fps).view() ) ch_versions = ch_versions.mix(CUSTOM_FINGERPRINTCONTAMINATION.out.versions.first()) + if (run_correlation) { + FINGERPRINT_GBCMS_BATCH ( + all_fps, + ch_liftover_loci_mapping, + default_genome + ) + ch_versions = ch_versions.mix(FINGERPRINT_GBCMS_BATCH.out.versions.first()) + } else { + FINGERPRINT_GBCMS_BATCH.out.combined_fp_tsv = Channel.empty() + } emit: fp_tsv = CUSTOM_FINGERPRINTVCFPARSER.out.tsv // channel: [ val(meta), tsv ] contamination_tsv = CUSTOM_FINGERPRINTCONTAMINATION.out.contamination_tsv // channel: [ val(meta), contamination_tsv ] + combined_fp_tsv = FINGERPRINT_GBCMS_BATCH.out.combined_fp_tsv // channel: [ tsv ] versions = ch_versions // channel: [ versions.yml ] + } diff --git a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test index 55de93ca..46a8bdd8 100644 --- a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test +++ b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test @@ -2,11 +2,13 @@ nextflow_workflow { name "Test Subworkflow FINGERPRINT_GBCMS" script "../main.nf" + config "./nextflow.config" workflow "FINGERPRINT_GBCMS" tag "subworkflows" tag "subworkflows_msk" tag "subworkflows/fingerprint_gbcms" + tag "subworkflows/fingerprint_gbcms_batch" tag "gbcms" tag "custom/fingerprintvcfparser" @@ -15,20 +17,33 @@ nextflow_workflow { when { workflow { """ - input[0] = Channel.of([ - [ id:'test', sample:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true) - ]) - input[1] = Channel.of([ - [ id:'test', sample:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), - ]) - input[2] = Channel.of([ - [:], - file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) - ]) - input[3] = Channel.of(file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)) - input[4] = Channel.of(file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)) + input[0] = Channel.of( + [ + [ id:'test', sample:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true) + ], + [ + [ id:'test2', sample:'test2' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true) + ], + ) + input[1] = Channel.of( + [ + [ id:'test', sample:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true) + ], + [ + [ id:'test2', sample:'test2' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true) + ], + ) + input[2] = Channel.empty() + input[3] = Channel.of(file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)) + input[4] = [file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true)] + input[5] = Channel.of(file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)) + input[6] = Channel.of(file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)) + input[7] = "hg19" + input[8] = true """ } } diff --git a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap index 9952b0bd..17ed67dc 100644 --- a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap +++ b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap @@ -8,12 +8,59 @@ "id": "test", "sample": "test" }, - "test.fp.tsv:md5,9fa9a081f17ee52f03463c96d46a23aa" + "test.fp.tsv:md5,c467328eb3c7fb534b555b83b0227206" + ], + [ + { + "id": "test2", + "sample": "test2" + }, + "test2.fp.tsv:md5,c3fbcee584048e9bc4fc93bc6ca487d2" ] ], "1": [ + [ + { + "id": "test", + "sample": "test" + }, + "test.contamination.tsv:md5,5b533c60b8eff1f4d2c5fe58a8262303" + ], + [ + { + "id": "test2", + "sample": "test2" + }, + "test2.contamination.tsv:md5,2eb950d4d5e0f9b4f7ae53d41d22fb5f" + ] + ], + "2": [ + "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" + ], + "3": [ + "versions.yml:md5,16a7edd0fbcb47825904a8cb939c7620", "versions.yml:md5,41ff30ed71b1d19e95a6095b9ac3ca94", - "versions.yml:md5,4b4b3ad40aa1c2c3a002c0c347c385b8" + "versions.yml:md5,4b4b3ad40aa1c2c3a002c0c347c385b8", + "versions.yml:md5,4fb29c6ff25ce4e29f6cf293a70aa8e6" + ], + "combined_fp_tsv": [ + "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" + ], + "contamination_tsv": [ + [ + { + "id": "test", + "sample": "test" + }, + "test.contamination.tsv:md5,5b533c60b8eff1f4d2c5fe58a8262303" + ], + [ + { + "id": "test2", + "sample": "test2" + }, + "test2.contamination.tsv:md5,2eb950d4d5e0f9b4f7ae53d41d22fb5f" + ] ], "fp_tsv": [ [ @@ -21,12 +68,21 @@ "id": "test", "sample": "test" }, - "test.fp.tsv:md5,9fa9a081f17ee52f03463c96d46a23aa" + "test.fp.tsv:md5,c467328eb3c7fb534b555b83b0227206" + ], + [ + { + "id": "test2", + "sample": "test2" + }, + "test2.fp.tsv:md5,c3fbcee584048e9bc4fc93bc6ca487d2" ] ], "versions": [ + "versions.yml:md5,16a7edd0fbcb47825904a8cb939c7620", "versions.yml:md5,41ff30ed71b1d19e95a6095b9ac3ca94", - "versions.yml:md5,4b4b3ad40aa1c2c3a002c0c347c385b8" + "versions.yml:md5,4b4b3ad40aa1c2c3a002c0c347c385b8", + "versions.yml:md5,4fb29c6ff25ce4e29f6cf293a70aa8e6" ] } ], @@ -34,6 +90,6 @@ "nf-test": "0.9.2", "nextflow": "25.04.6" }, - "timestamp": "2025-08-14T19:50:44.771060618" + "timestamp": "2025-09-26T01:19:25.852151971" } } \ No newline at end of file diff --git a/subworkflows/msk/fingerprint_gbcms/tests/nextflow.config b/subworkflows/msk/fingerprint_gbcms/tests/nextflow.config new file mode 100644 index 00000000..17e225ec --- /dev/null +++ b/subworkflows/msk/fingerprint_gbcms/tests/nextflow.config @@ -0,0 +1,13 @@ +process { + withName: 'CUSTOM_FINGERPRINTCOMBINE' { + ext.args = "-d 0" + } + + withName: 'CUSTOM_FINGERPRINTCONTAMINATION' { + ext.args = "-d 0" + } + + withName: 'CUSTOM_FINGERPRINTVCFPARSER' { + ext.args = "-d 0" + } +} From ebf5800fff8c95b95434aa519bef8f80236397fa Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Fri, 26 Sep 2025 10:19:22 -0400 Subject: [PATCH 17/37] skip conda tests for fingerprint modules and subworkflows --- .github/skip_nf_test.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/skip_nf_test.json b/.github/skip_nf_test.json index 2991ca79..443e1390 100644 --- a/.github/skip_nf_test.json +++ b/.github/skip_nf_test.json @@ -1,6 +1,8 @@ { "conda": [ "modules/msk/custom/fingerprintvcfparser", + "modules/msk/custom/fingerprintcontamination", + "modules/msk/custom/fingerprintcombine", "modules/msk/calculatenoise", "modules/msk/ppflagfixer", "modules/msk/facets", From eb9e073ad88e221e897feb5acaf43bf00cfbbdcc Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Fri, 26 Sep 2025 10:32:13 -0400 Subject: [PATCH 18/37] update version output of contamination script --- .../resources/usr/bin/calculate_contamination.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py b/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py index dea92003..40e1a210 100755 --- a/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py +++ b/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py @@ -52,7 +52,7 @@ def minor_contamination(normal, tumor, depth_filter): return tumor_homozygous_filtered['MAF'].mean() def main(): - parser = argparse.ArgumentParser(prog=sys.argv[0], description='Calculate major and minor contamination') + parser = argparse.ArgumentParser(prog=os.path.basename(sys.argv[0]), description='Calculate major and minor contamination') parser.add_argument('-t','--tumor', required=True, From e0cf039445a2b64df9c3cc103e27e296964a7c68 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Fri, 26 Sep 2025 10:34:32 -0400 Subject: [PATCH 19/37] update snapshot --- .../fingerprintcontamination/tests/main.nf.test.snap | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap b/modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap index 7ceedf6b..5b51c22a 100644 --- a/modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap +++ b/modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap @@ -12,7 +12,7 @@ ] ], "1": [ - "versions.yml:md5,f1635e715bcdf39792aa9f7fc3cf4d84" + "versions.yml:md5,904a4c1ae690600f67c1ceb3d72c5ce1" ], "contamination_tsv": [ [ @@ -24,7 +24,7 @@ ] ], "versions": [ - "versions.yml:md5,f1635e715bcdf39792aa9f7fc3cf4d84" + "versions.yml:md5,904a4c1ae690600f67c1ceb3d72c5ce1" ] } ], @@ -32,7 +32,7 @@ "nf-test": "0.9.2", "nextflow": "25.04.6" }, - "timestamp": "2025-09-25T17:13:52.297869395" + "timestamp": "2025-09-26T10:33:23.354208776" }, "sarscov2 - bam": { "content": [ @@ -47,7 +47,7 @@ ] ], "1": [ - "versions.yml:md5,f1635e715bcdf39792aa9f7fc3cf4d84" + "versions.yml:md5,904a4c1ae690600f67c1ceb3d72c5ce1" ], "contamination_tsv": [ [ @@ -59,7 +59,7 @@ ] ], "versions": [ - "versions.yml:md5,f1635e715bcdf39792aa9f7fc3cf4d84" + "versions.yml:md5,904a4c1ae690600f67c1ceb3d72c5ce1" ] } ], @@ -67,6 +67,6 @@ "nf-test": "0.9.2", "nextflow": "25.04.6" }, - "timestamp": "2025-09-25T17:13:42.805178656" + "timestamp": "2025-09-26T10:33:12.245205382" } } \ No newline at end of file From 66910630f9ee65a6b1326042c81b8963ba9aeb6c Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Fri, 26 Sep 2025 11:07:12 -0400 Subject: [PATCH 20/37] update snapshot --- subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap index 17ed67dc..492f6ee5 100644 --- a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap +++ b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap @@ -38,7 +38,7 @@ "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" ], "3": [ - "versions.yml:md5,16a7edd0fbcb47825904a8cb939c7620", + "versions.yml:md5,1d2160eb0eb102d00d5786a8d056328e", "versions.yml:md5,41ff30ed71b1d19e95a6095b9ac3ca94", "versions.yml:md5,4b4b3ad40aa1c2c3a002c0c347c385b8", "versions.yml:md5,4fb29c6ff25ce4e29f6cf293a70aa8e6" @@ -79,7 +79,7 @@ ] ], "versions": [ - "versions.yml:md5,16a7edd0fbcb47825904a8cb939c7620", + "versions.yml:md5,1d2160eb0eb102d00d5786a8d056328e", "versions.yml:md5,41ff30ed71b1d19e95a6095b9ac3ca94", "versions.yml:md5,4b4b3ad40aa1c2c3a002c0c347c385b8", "versions.yml:md5,4fb29c6ff25ce4e29f6cf293a70aa8e6" @@ -90,6 +90,6 @@ "nf-test": "0.9.2", "nextflow": "25.04.6" }, - "timestamp": "2025-09-26T01:19:25.852151971" + "timestamp": "2025-09-26T11:05:45.091814897" } } \ No newline at end of file From de1c5f604ccb7906711cb162fea9d956f6c37fa3 Mon Sep 17 00:00:00 2001 From: anoronh4 Date: Wed, 5 Nov 2025 16:41:53 -0500 Subject: [PATCH 21/37] exclude X, Y chromosomes from contamination calculations --- .../resources/usr/bin/calculate_contamination.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py b/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py index 40e1a210..a8d23c06 100755 --- a/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py +++ b/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py @@ -42,11 +42,7 @@ def get_coverage(file, depth_filter): def minor_contamination(normal, tumor, depth_filter): homozygous_sites = normal.index[normal['MAF'] < .10] - - print(homozygous_sites) - tumor_homozygous = tumor.loc[homozygous_sites] - print(tumor_homozygous) tumor_homozygous_filtered = get_coverage(tumor_homozygous, depth_filter) return tumor_homozygous_filtered['MAF'].mean() @@ -83,7 +79,9 @@ def main(): fields = ['Position', 'Alleles', 'Genotype', 'MAF'] tumor = pd.read_csv(args.tumor, sep='\t',names=fields,header=0) + tumor = tumor[~tumor['Position'].str.contains('X|Y', na=False)] normal = pd.read_csv(args.normal, sep='\t',names=fields,header=0) + normal = normal[~normal['Position'].str.contains('X|Y', na=False)] major_contam = major_contamination(tumor, depth_filter=args.depthfilter) minor_contam = minor_contamination(normal, tumor, depth_filter=args.depthfilter) From a461d1eaa857d2e6f21586d2a2ed6765544a2e90 Mon Sep 17 00:00:00 2001 From: anoronh4 Date: Thu, 6 Nov 2025 15:57:23 -0500 Subject: [PATCH 22/37] set index of table to 'Position' --- .../resources/usr/bin/calculate_contamination.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py b/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py index a8d23c06..8a0dc2ee 100755 --- a/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py +++ b/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py @@ -80,8 +80,10 @@ def main(): tumor = pd.read_csv(args.tumor, sep='\t',names=fields,header=0) tumor = tumor[~tumor['Position'].str.contains('X|Y', na=False)] + tumor = tumor.set_index('Position') normal = pd.read_csv(args.normal, sep='\t',names=fields,header=0) normal = normal[~normal['Position'].str.contains('X|Y', na=False)] + normal = normal.set_index('Position') major_contam = major_contamination(tumor, depth_filter=args.depthfilter) minor_contam = minor_contamination(normal, tumor, depth_filter=args.depthfilter) From a280d3020773097ee1ed08245adb6369724a18b7 Mon Sep 17 00:00:00 2001 From: anoronh4 Date: Thu, 6 Nov 2025 19:31:17 -0500 Subject: [PATCH 23/37] fixed filtering of table by index labels --- .../resources/usr/bin/calculate_contamination.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py b/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py index 8a0dc2ee..3f5ece93 100755 --- a/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py +++ b/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py @@ -42,7 +42,7 @@ def get_coverage(file, depth_filter): def minor_contamination(normal, tumor, depth_filter): homozygous_sites = normal.index[normal['MAF'] < .10] - tumor_homozygous = tumor.loc[homozygous_sites] + tumor_homozygous = tumor.loc[[i for i in homozygous_sites if i in tumor.index]] tumor_homozygous_filtered = get_coverage(tumor_homozygous, depth_filter) return tumor_homozygous_filtered['MAF'].mean() From 04ff5ad4590ab2ca48623e782cace313fdf8e339 Mon Sep 17 00:00:00 2001 From: anoronh4 Date: Thu, 6 Nov 2025 19:32:49 -0500 Subject: [PATCH 24/37] fix indentation --- .github/skip_nf_test.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/skip_nf_test.json b/.github/skip_nf_test.json index 443e1390..41eb3ca5 100644 --- a/.github/skip_nf_test.json +++ b/.github/skip_nf_test.json @@ -1,8 +1,8 @@ { "conda": [ "modules/msk/custom/fingerprintvcfparser", - "modules/msk/custom/fingerprintcontamination", - "modules/msk/custom/fingerprintcombine", + "modules/msk/custom/fingerprintcontamination", + "modules/msk/custom/fingerprintcombine", "modules/msk/calculatenoise", "modules/msk/ppflagfixer", "modules/msk/facets", From e5fb9eda245ecc87f5a59648b9ca5ff660ff73f5 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Tue, 16 Dec 2025 23:26:09 -0500 Subject: [PATCH 25/37] fix file formatting and spacing --- .../fingerprintcombine/tests/loci_mapping.tsv | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv b/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv index 7592a2e3..0339b805 100644 --- a/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv +++ b/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv @@ -1,10 +1,10 @@ -GRCH37_CHROM GRCH37_POS GRCH38_CHROM GRCH38_POS -MT192765.1 197 MT192765.1 199 -MT192765.1 4788 MT192765.1 4900 -MT192765.1 8236 MT192765.1 8257 -MT192765.1 10506 MT192765.1 10528 -MT192765.1 11037 MT192765.1 11059 -MT192765.1 15009 MT192765.1 15500 -MT192765.1 18807 MT192765.1 18929 -MT192765.1 23813 MT192765.1 24835 -MT192765.1 24103 MT192765.1 25125 +GRCH37_CHROM GRCH37_POS GRCH38_CHROM GRCH38_POS +MT192765.1 197 MT192765.1 199 +MT192765.1 4788 MT192765.1 4900 +MT192765.1 8236 MT192765.1 8257 +MT192765.1 10506 MT192765.1 10528 +MT192765.1 11037 MT192765.1 11059 +MT192765.1 15009 MT192765.1 15500 +MT192765.1 18807 MT192765.1 18929 +MT192765.1 23813 MT192765.1 24835 +MT192765.1 24103 MT192765.1 25125 From 0c3c341de7152f696929ad86e5a4b4805f8333d3 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Tue, 16 Dec 2025 23:27:11 -0500 Subject: [PATCH 26/37] add meta map to custom/fingerprintcombine --- modules/msk/custom/fingerprintcombine/main.nf | 8 +++++--- .../msk/custom/fingerprintcombine/tests/main.nf.test | 11 ++++++----- subworkflows/msk/fingerprint_gbcms_batch/main.nf | 10 +++++++--- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/modules/msk/custom/fingerprintcombine/main.nf b/modules/msk/custom/fingerprintcombine/main.nf index be6b873d..175a140c 100644 --- a/modules/msk/custom/fingerprintcombine/main.nf +++ b/modules/msk/custom/fingerprintcombine/main.nf @@ -1,4 +1,5 @@ process CUSTOM_FINGERPRINTCOMBINE { + tag '$meta.id' label 'process_single' conda "${moduleDir}/environment.yml" @@ -9,14 +10,15 @@ process CUSTOM_FINGERPRINTCOMBINE { input: - tuple path(fp_tsv), // list of paths to fingerprint TSV files + tuple val(meta), + path(fp_tsv), // list of paths to fingerprint TSV files val(sample), // list of sample identifiers, one per TSV file, in the same order val(genome_build) // list of genome builds, one per TSV file, in the same order path(liftover_loci_mapping) output: - path "*DPfilter_ALL_FP.txt", emit: combined_fp_tsv - path "versions.yml" , emit: versions + tuple val(meta), path("*DPfilter_ALL_FP.txt"), emit: combined_fp_tsv + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/msk/custom/fingerprintcombine/tests/main.nf.test b/modules/msk/custom/fingerprintcombine/tests/main.nf.test index 11176e34..005f6a8c 100644 --- a/modules/msk/custom/fingerprintcombine/tests/main.nf.test +++ b/modules/msk/custom/fingerprintcombine/tests/main.nf.test @@ -22,14 +22,14 @@ nextflow_process { """ input[0] = Channel.of( [ - [ id:'test', sample:'test' ], // meta map + [ id:'test', sample:'test', pool:'mypool' ], // meta map file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), "variant_file.vcf" ], [ - [ id:'test2', sample:'test2' ], // meta map + [ id:'test2', sample:'test2', pool:'mypool' ], // meta map file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), @@ -55,9 +55,10 @@ nextflow_process { process { """ input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv - .map{ meta, tsv -> ["placeholder",tsv, meta.id, "hg19"] } - .groupTuple(by:[0]) - .map{ placeholder, tsv, sampleid, genome -> [tsv, sampleid, genome] } + .map{ meta, tsv -> + def meta2 = [id:meta.pool] + [[id:meta.pool], tsv, meta.id, "hg19"] + }.groupTuple(by:[0]) input[1] = file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true) """ } diff --git a/subworkflows/msk/fingerprint_gbcms_batch/main.nf b/subworkflows/msk/fingerprint_gbcms_batch/main.nf index ee16fcd6..2b42fe0c 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/main.nf +++ b/subworkflows/msk/fingerprint_gbcms_batch/main.nf @@ -15,9 +15,13 @@ workflow FINGERPRINT_GBCMS_BATCH { CUSTOM_FINGERPRINTCOMBINE( ch_fp - .map{ meta, tsv -> ["placeholder", tsv, meta.id, meta.genome ?: default_genome ] } - .groupTuple(by:[0]) - .map{ placeholder, tsv, sampleid, genome -> [tsv, sampleid, genome] }, + .map{ meta, tsv -> + def meta2 = [id:'defaultbatch'] + if (meta.pool) { + meta2.id = meta.pool + } + [meta2, tsv, meta.id, meta.genome ?: default_genome ] + }.groupTuple(by:[0]), ch_liftover_loci_mapping.first() ) ch_versions = ch_versions.mix(CUSTOM_FINGERPRINTCOMBINE.out.versions.first()) From 3cc109ef3f4840d5a571b767cd2ee0623808179a Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Wed, 17 Dec 2025 19:10:50 -0500 Subject: [PATCH 27/37] add custom/fingerprintcorrelation --- modules/msk/custom/fingerprintcombine/main.nf | 19 +- .../msk/custom/fingerprintcombine/meta.yml | 93 +++++--- .../resources/usr/bin/complete_FP_table.R | 2 +- .../fingerprintcombine/tests/main.nf.test | 3 +- .../tests/main.nf.test.snap | 68 ++++-- .../custom/fingerprintcontamination/main.nf | 12 +- .../custom/fingerprintcontamination/meta.yml | 21 +- .../tests/main.nf.test.snap | 40 +++- .../fingerprintcorrelation/environment.yml | 18 ++ .../msk/custom/fingerprintcorrelation/main.nf | 41 ++++ .../custom/fingerprintcorrelation/meta.yml | 51 +++++ .../resources/usr/bin/plot_gbcm.R | 209 ++++++++++++++++++ .../fingerprintcorrelation/tests/main.nf.test | 109 +++++++++ .../tests/main.nf.test.snap | 50 +++++ .../tests/nextflow.config | 8 + .../msk/custom/fingerprintvcfparser/main.nf | 12 +- .../msk/custom/fingerprintvcfparser/meta.yml | 52 ++--- .../usr/bin/parse_fingerprint_vcf.py | 3 +- .../tests/main.nf.test.snap | 40 +++- modules/msk/gbcms/main.nf | 10 +- modules/msk/gbcms/meta.yml | 63 ++++-- modules/msk/gbcms/tests/main.nf.test.snap | 20 +- subworkflows/msk/fingerprint_gbcms/main.nf | 8 - subworkflows/msk/fingerprint_gbcms/meta.yml | 2 + .../msk/fingerprint_gbcms/tests/main.nf.test | 1 + .../fingerprint_gbcms/tests/main.nf.test.snap | 32 ++- .../msk/fingerprint_gbcms_batch/main.nf | 13 +- .../msk/fingerprint_gbcms_batch/meta.yml | 3 +- .../tests/main.nf.test | 1 + .../tests/main.nf.test.snap | 26 ++- 30 files changed, 803 insertions(+), 227 deletions(-) create mode 100644 modules/msk/custom/fingerprintcorrelation/environment.yml create mode 100644 modules/msk/custom/fingerprintcorrelation/main.nf create mode 100644 modules/msk/custom/fingerprintcorrelation/meta.yml create mode 100755 modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R create mode 100644 modules/msk/custom/fingerprintcorrelation/tests/main.nf.test create mode 100644 modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap create mode 100644 modules/msk/custom/fingerprintcorrelation/tests/nextflow.config diff --git a/modules/msk/custom/fingerprintcombine/main.nf b/modules/msk/custom/fingerprintcombine/main.nf index 175a140c..121beb5f 100644 --- a/modules/msk/custom/fingerprintcombine/main.nf +++ b/modules/msk/custom/fingerprintcombine/main.nf @@ -10,15 +10,12 @@ process CUSTOM_FINGERPRINTCOMBINE { input: - tuple val(meta), - path(fp_tsv), // list of paths to fingerprint TSV files - val(sample), // list of sample identifiers, one per TSV file, in the same order - val(genome_build) // list of genome builds, one per TSV file, in the same order + tuple val(meta), path(fp_tsv), val(sample), val(genome_build) path(liftover_loci_mapping) output: - tuple val(meta), path("*DPfilter_ALL_FP.txt"), emit: combined_fp_tsv - path "versions.yml" , emit: versions + tuple val(meta), path("*DPfilter_ALL_FP.txt") , emit: combined_fp_tsv + tuple val("${task.process}"), val('complete_FP_table.R'), val('0.1.0'), emit: versions_fingerprintcombine, topic: versions when: task.ext.when == null || task.ext.when @@ -44,11 +41,6 @@ process CUSTOM_FINGERPRINTCOMBINE { -i input.tsv \\ -l $liftover_loci_mapping \\ $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - complete_FP_table.R: 0.1.0 - END_VERSIONS """ stub: @@ -58,10 +50,5 @@ process CUSTOM_FINGERPRINTCOMBINE { echo $args touch XDPfilter_ALL_FP.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - complete_FP_table.R: 0.1.0 - END_VERSIONS """ } diff --git a/modules/msk/custom/fingerprintcombine/meta.yml b/modules/msk/custom/fingerprintcombine/meta.yml index 52b4ed5a..7ed95b68 100644 --- a/modules/msk/custom/fingerprintcombine/meta.yml +++ b/modules/msk/custom/fingerprintcombine/meta.yml @@ -1,55 +1,82 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -# # TODO nf-core: Add a description of the module and list keywords name: "custom_fingerprintcombine" description: | A module to combine multiple fingerprint TSV files into a single comprehensive table, with optional liftover of loci coordinates. keywords: -- fingerprint -- qc -- loci -- tsv -- correlation + - fingerprint + - qc + - loci + - tsv + - correlation tools: -## TODO nf-core: Add a description and other details for the software below -- "custom": - description: "A custom R script to combine fingerprint TSV files" - homepage: "https://github.com/mskcc-omics-workflows/modules/tree/main/modules/msk/custom/fingerprintcombine/meta.yml" - + - "custom": + description: "A custom R script to combine fingerprint TSV files" + homepage: "https://github.com/mskcc-omics-workflows/modules/tree/main/modules/msk/custom/fingerprintcombine/meta.yml" + identifier: "" input: - - - fp_tsv: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - fp_tsv: type: file description: | Fingerprint TSV files to be combined. Structure: [ val(sample), val(genome_build), path(fp_tsv) ] + ontologies: [] - sample: type: string description: Sample identifier corresponding to each fingerprint TSV file. - genome_build: type: string - description: Genome build (e.g., hg19, hg38) corresponding to each fingerprint TSV file. - - liftover_loci_mapping: - type: file - description: | - A TSV file mapping original loci to liftover loci. - Format: original_chr, original_pos, liftover_chr, liftover_pos - pattern: "*.tsv" -output: - combined_fp_tsv: - - "*DPfilter_ALL_FP.txt": + description: + Genome build (e.g., hg19, hg38) corresponding to each fingerprint + TSV file. + - - liftover_loci_mapping: type: file - description: Wide table combining all input fingerprint TSV files. - pattern: '*DPfilter_ALL_FP.txt' + description: | + A TSV file mapping original loci to liftover loci. + Format: original_chr, original_pos, liftover_chr, liftover_pos + pattern: "*.tsv" ontologies: - - edam: http://edamontology.org/format_3750 # TSV + - edam: http://edamontology.org/format_3475 # TSV +output: + combined_fp_tsv: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*DPfilter_ALL_FP.txt": + type: file + description: Wide table combining all input fingerprint TSV files. + pattern: "*DPfilter_ALL_FP.txt" + ontologies: + - edam: http://edamontology.org/format_3750 # TSV + versions_fingerprintcombine: + - - ${task.process}: + type: string + description: The name of the process + - complete_FP_table.R: + type: string + description: The name of the tool + - 0.1.0: + type: string + description: Version of the custom script +topics: versions: - - versions.yml: - type: file - description: File containing software versions - pattern: versions.yml - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: The name of the process + - complete_FP_table.R: + type: string + description: The name of the tool + - 0.1.0: + type: string + description: Version of the custom script authors: -- "@anoronh4" + - "@anoronh4" maintainers: -- "@anoronh4" + - "@anoronh4" diff --git a/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R b/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R index 3265f903..7a8e3ad5 100755 --- a/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R +++ b/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R @@ -49,7 +49,7 @@ hg19_hg38_mapper$Loci_hg38 = paste(hg19_hg38_mapper$GRCH38_CHROM,hg19_hg38_mappe hg19_hg38_mapper = hg19_hg38_mapper %>% select(Loci_hg19, Loci_hg38) %>% unique() message("Loading Samples") -input_table = fread(args$input_table, header = T) +input_table = fread(args$input_table, header = T) %>% arrange(sample_id) for (i in 1:nrow(input_table)){ sample = input_table$sample_id[i] genome_build = input_table$genome_build[i] diff --git a/modules/msk/custom/fingerprintcombine/tests/main.nf.test b/modules/msk/custom/fingerprintcombine/tests/main.nf.test index 005f6a8c..03b3388b 100644 --- a/modules/msk/custom/fingerprintcombine/tests/main.nf.test +++ b/modules/msk/custom/fingerprintcombine/tests/main.nf.test @@ -56,7 +56,7 @@ nextflow_process { """ input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv .map{ meta, tsv -> - def meta2 = [id:meta.pool] + println meta [[id:meta.pool], tsv, meta.id, "hg19"] }.groupTuple(by:[0]) input[1] = file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true) @@ -82,6 +82,7 @@ nextflow_process { process { """ input[0] = [ + [id:"testsample"], [file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true)], ["testsample"], ["hg19"] diff --git a/modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap b/modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap index e8576d2f..68a0b5c4 100644 --- a/modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap +++ b/modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap @@ -3,46 +3,82 @@ "content": [ { "0": [ - "XDPfilter_ALL_FP.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + [ + { + "id": "testsample" + }, + "XDPfilter_ALL_FP.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ], "1": [ - "versions.yml:md5,3bd40a0fd11a907f31110dd113fd88c2" + [ + "CUSTOM_FINGERPRINTCOMBINE", + "complete_FP_table.R", + "0.1.0" + ] ], "combined_fp_tsv": [ - "XDPfilter_ALL_FP.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + [ + { + "id": "testsample" + }, + "XDPfilter_ALL_FP.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ], - "versions": [ - "versions.yml:md5,3bd40a0fd11a907f31110dd113fd88c2" + "versions_fingerprintvcfparser": [ + [ + "CUSTOM_FINGERPRINTCOMBINE", + "complete_FP_table.R", + "0.1.0" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-25T22:47:09.499353594" + "timestamp": "2025-12-17T13:28:48.061258305" }, "sarscov2 - bam": { "content": [ { "0": [ - "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" + [ + { + "id": "mypool" + }, + "0DPfilter_ALL_FP.txt:md5,66113c255cf1f52e27802183764a406d" + ] ], "1": [ - "versions.yml:md5,3bd40a0fd11a907f31110dd113fd88c2" + [ + "CUSTOM_FINGERPRINTCOMBINE", + "complete_FP_table.R", + "0.1.0" + ] ], "combined_fp_tsv": [ - "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" + [ + { + "id": "mypool" + }, + "0DPfilter_ALL_FP.txt:md5,66113c255cf1f52e27802183764a406d" + ] ], - "versions": [ - "versions.yml:md5,3bd40a0fd11a907f31110dd113fd88c2" + "versions_fingerprintvcfparser": [ + [ + "CUSTOM_FINGERPRINTCOMBINE", + "complete_FP_table.R", + "0.1.0" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-25T22:47:03.219089934" + "timestamp": "2025-12-17T13:28:39.908034467" } } \ No newline at end of file diff --git a/modules/msk/custom/fingerprintcontamination/main.nf b/modules/msk/custom/fingerprintcontamination/main.nf index bbdcdb51..4d48deda 100644 --- a/modules/msk/custom/fingerprintcontamination/main.nf +++ b/modules/msk/custom/fingerprintcontamination/main.nf @@ -12,8 +12,8 @@ process CUSTOM_FINGERPRINTCONTAMINATION { tuple val(meta), path(fp_tumor), path(fp_normal) output: - tuple val(meta), path("*.contamination.tsv"), emit: contamination_tsv - path "versions.yml" , emit: versions + tuple val(meta), path("*.contamination.tsv") , emit: contamination_tsv + tuple val("${task.process}"), val('calculate_contamination.py'), eval('calculate_contamination.py -v | cut -f 2 -d" "'), emit: versions_fingerprintvcfparser, topic: versions when: task.ext.when == null || task.ext.when @@ -28,10 +28,6 @@ process CUSTOM_FINGERPRINTCONTAMINATION { -o ${prefix}.contamination.tsv \\ ${args} - cat <<-END_VERSIONS > versions.yml - "${task.process}": - calculate_contamination.py: \$( calculate_contamination.py --version | rev | cut -f 1 -d " " | rev ) - END_VERSIONS """ stub: @@ -39,9 +35,5 @@ process CUSTOM_FINGERPRINTCONTAMINATION { """ touch ${prefix}.contamination.tsv - cat <<-END_VERSIONS > versions.yml - "${task.process}": - calculate_contamination.py: \$( calculate_contamination.py --version | rev | cut -f 1 -d " " | rev ) - END_VERSIONS """ } diff --git a/modules/msk/custom/fingerprintcontamination/meta.yml b/modules/msk/custom/fingerprintcontamination/meta.yml index 4fde47a5..162fff70 100644 --- a/modules/msk/custom/fingerprintcontamination/meta.yml +++ b/modules/msk/custom/fingerprintcontamination/meta.yml @@ -11,11 +11,13 @@ tools: description: "Python Data Analysis Library" homepage: "https://pandas.pydata.org/" documentation: "https://pandas.pydata.org/docs/" + identifier: biotools:pandas - "numpy": description: "Scientific computing library for Python" homepage: "https://numpy.org/" documentation: "https://numpy.org/doc/" + identifier: biotools:numpy input: - - meta: type: map @@ -27,7 +29,8 @@ input: description: Fingerprint table file for tumor sample pattern: "*.fp.tsv" ontologies: - - edam: "http://edamontology.org/format_3750" # TSV + - edam: "http://edamontology.org/format_3750" # TSV + - edam: http://edamontology.org/format_3475 # TSV - fp_normal: type: file description: Fingerprint table file for normal sample @@ -35,9 +38,10 @@ input: ontologies: - edam: "http://edamontology.org/format_3750" + - edam: http://edamontology.org/format_3475 # TSV output: - - contamination_tsv: - - meta: + contamination_tsv: + - - meta: type: map description: | Groovy Map containing sample information @@ -48,12 +52,11 @@ output: pattern: "*.contamination.tsv" ontologies: - edam: "http://edamontology.org/format_3750" # TSV - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - + - edam: http://edamontology.org/format_3475 # TSV + versions_fingerprintvcfparser: + - - ${task.process}: {} + - calculate_contamination.py: {} + - 'calculate_contamination.py -v | cut -f 2 -d" ': {} authors: - "@anoronh4" maintainers: diff --git a/modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap b/modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap index 5b51c22a..233a4680 100644 --- a/modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap +++ b/modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap @@ -12,7 +12,11 @@ ] ], "1": [ - "versions.yml:md5,904a4c1ae690600f67c1ceb3d72c5ce1" + [ + "CUSTOM_FINGERPRINTCONTAMINATION", + "calculate_contamination.py", + "" + ] ], "contamination_tsv": [ [ @@ -23,16 +27,20 @@ "test.contamination.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,904a4c1ae690600f67c1ceb3d72c5ce1" + "versions_fingerprintvcfparser": [ + [ + "CUSTOM_FINGERPRINTCONTAMINATION", + "calculate_contamination.py", + "" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-26T10:33:23.354208776" + "timestamp": "2025-12-17T13:12:25.869022442" }, "sarscov2 - bam": { "content": [ @@ -47,7 +55,11 @@ ] ], "1": [ - "versions.yml:md5,904a4c1ae690600f67c1ceb3d72c5ce1" + [ + "CUSTOM_FINGERPRINTCONTAMINATION", + "calculate_contamination.py", + "" + ] ], "contamination_tsv": [ [ @@ -58,15 +70,19 @@ "test.contamination.tsv:md5,5b533c60b8eff1f4d2c5fe58a8262303" ] ], - "versions": [ - "versions.yml:md5,904a4c1ae690600f67c1ceb3d72c5ce1" + "versions_fingerprintvcfparser": [ + [ + "CUSTOM_FINGERPRINTCONTAMINATION", + "calculate_contamination.py", + "" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-26T10:33:12.245205382" + "timestamp": "2025-12-17T13:12:16.153445117" } } \ No newline at end of file diff --git a/modules/msk/custom/fingerprintcorrelation/environment.yml b/modules/msk/custom/fingerprintcorrelation/environment.yml new file mode 100644 index 00000000..acabcada --- /dev/null +++ b/modules/msk/custom/fingerprintcorrelation/environment.yml @@ -0,0 +1,18 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::r-argparse=2.3.1 + - conda-forge::r-data.table=1.17.8 + - conda-forge::r-dplyr=1.1.4 + - conda-forge::r-ggforce=0.5.0 + - conda-forge::r-ggiraph=0.8.12 + - conda-forge::r-gtools=3.9.5 + - conda-forge::r-htmlwidgets=1.6.4 + - conda-forge::r-plotly=4.11.0 + - conda-forge::r-plyr=1.8.9 + - conda-forge::r-reshape2=1.4.4 + - conda-forge::r-scales=1.4.0 + - conda-forge::r-tidyverse=2.0.0 diff --git a/modules/msk/custom/fingerprintcorrelation/main.nf b/modules/msk/custom/fingerprintcorrelation/main.nf new file mode 100644 index 00000000..eafc51a7 --- /dev/null +++ b/modules/msk/custom/fingerprintcorrelation/main.nf @@ -0,0 +1,41 @@ +process CUSTOM_FINGERPRINTCORRELATION { + tag {'$prefix'} + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://community.wave.seqera.io/library/r-argparse_r-data.table_r-dplyr_r-ggforce_pruned:5c045bc9fea1dbd5': + 'community.wave.seqera.io/library/r-argparse_r-data.table_r-dplyr_r-ggforce_pruned:5c045bc9fea1dbd5' } " + // 'oras://community.wave.seqera.io/library/r-argparse_r-data.table_r-dplyr_r-ggforce_pruned:8211a2010a4712ea': + + input: + tuple val(meta), path(combined_fp_tsv) + + output: + tuple val(meta), path("*_gbcm_sample-to-sample4.pdf"), emit: heatmap_pdf + tuple val(meta), path("*_interactive4.html"), emit: heatmap_html + tuple val(meta), path("*_observations.tab"), emit: observations_tab + tuple val("${task.process}"), val('plot_gbcm.R'), val("0.1.0"), topic: versions, emit: versions_fingerprintcorrelation + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = meta.id ?: "batch" + """ + plot_gbcm.R \\ + -t ${combined_fp_tsv} \\ + -o ./ \\ + -p ${prefix} + """ + + stub: + def args = task.ext.args ?: '' + def prefix = meta.id ?: "batch" + """ + touch ${prefix}_gbcm_sample-to-sample4.pdf + touch ${prefix}_interactive4.html + touch ${prefix}_observations.tab + """ +} diff --git a/modules/msk/custom/fingerprintcorrelation/meta.yml b/modules/msk/custom/fingerprintcorrelation/meta.yml new file mode 100644 index 00000000..8e5e1d37 --- /dev/null +++ b/modules/msk/custom/fingerprintcorrelation/meta.yml @@ -0,0 +1,51 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "custom_fingerprintcorrelation" +description: null +keywords: + - sort + - example + - genomics +tools: + - "custom": + description: "" + homepage: "" + documentation: "" + tool_dev_url: "" + doi: "" + licence: null + identifier: null + +input: + - - meta: {} + - combined_fp_tsv: {} +output: + heatmap_pdf: + - - meta: {} + - "*_gbcm_sample-to-sample4.pdf": {} + heatmap_html: + - - meta: {} + - "*_interactive4.html": {} + observations_tab: + - - meta: {} + - "*_observations.tab": {} + versions_fingerprintcorrelation: + - - ${task.process}: + type: string + description: The name of the process + - plot_gbcm.R: {} + - 0.1.0: {} +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - plot_gbcm.R: + type: string + description: The name of the tool + - 0.1.0: + type: eval + description: The expression to obtain the version of the tool +authors: + - "@anoronh4" +maintainers: + - "@anoronh4" diff --git a/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R b/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R new file mode 100755 index 00000000..5ba3460c --- /dev/null +++ b/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R @@ -0,0 +1,209 @@ +#!/usr/bin/env Rscript +#------------------------------------------------------------------------------- +# Script: plot_gbcm.R +# Author: Hanan Salim +# Date: 2025-11-03 +# Version: 0.1.0 +# +# Description: This script takes in a wide fingerprinting table pertaining +# to multiple samples and plots in pdf and html formats. +# Additionally, a table with the number of observations for each correlation +# is also written to an output file. +# +#------------------------------------------------------------------------------- + + +rm(list=ls()) + +library(argparse, quietly = T) +library(plyr, quietly = T) +library(dplyr, quietly = T) +library(data.table, quietly = T) +library(tidyverse, quietly = T) +library(scales, quietly = T) +library(ggforce, quietly = T) +library(gtools, quietly = T) +library(plotly) +library(htmlwidgets) +library(ggiraph) +library(reshape2) + +`%notin%` <- Negate(`%in%`) +`%notlike%` <- Negate(`%like%`) + +parser = ArgumentParser(description = 'create correlation plots for a given sample') + +parser$add_argument('-t', '--table', required = TRUE, + help = 'summary table') + +parser$add_argument('-o', '--analysis_folder', required = TRUE, + help = 'output folder') + +parser$add_argument('-p', '--pool', required = TRUE, + help = 'pool ID') + +args = parser$parse_args() + +all_fp_gbcm_final = fread(args$table, sep = '\t') +outdir = args$analysis_folder +sample = args$pool + +all_fp_gbcm_final <- all_fp_gbcm_final %>% select(-contains(c('Loci_hg19', 'Loci_hg38'))) +cols <- grep("VAF", names(all_fp_gbcm_final), value = TRUE) +#print(class(all_fp_gbcm_final)) +all_fp_gbcm_final <- all_fp_gbcm_final[, ..cols] + +for ( col in 1:ncol(all_fp_gbcm_final)){ + colnames(all_fp_gbcm_final)[col] <- sub("VAF_", "", colnames(all_fp_gbcm_final)[col]) +} + +title = paste("Patient:", sample,"; ", nrow(all_fp_gbcm_final)," Loci used",sep = "") + +all_fp_gbcm_final_matrix <- data.matrix(all_fp_gbcm_final) +all_fp_gbcm_final_matrix = cor(as.matrix(all_fp_gbcm_final_matrix), method = c("pearson"), use = "pairwise.complete.obs") + +gbcm_data_long <- reshape2::melt(all_fp_gbcm_final_matrix) +gbcm_observation = crossprod(!is.na(all_fp_gbcm_final)) +gbcm_obs_long <- reshape2::melt(gbcm_observation) +gbcm_combo_data <- data.frame(gbcm_data_long, size = gbcm_obs_long$value) + +# plot +#pdf(paste(outdir,"/",sample,'_sample-to-sample.pdf', sep = ""), width = 25, height = 25) + +n_x <- length(unique(gbcm_combo_data$Var1)) +n_y <- length(unique(gbcm_combo_data$Var2)) + +# Define your plot size (in inches) +plot_width_in <- 20 +plot_height_in <- 20 + +# Convert to mm (1 inch = 25.4 mm) +plot_width_mm <- plot_width_in * 25.4 +plot_height_mm <- plot_height_in * 25.4 + +# Calculate tile size in mm +tile_width_mm <- plot_width_mm / n_x +tile_height_mm <- plot_height_mm / n_y + +# Max circle diameter (fits inside smallest tile dimension) +max_diameter_mm <- min(tile_width_mm, tile_height_mm) + +# Approximate max point size for geom_point (radius in mm) +max_point_size <- max_diameter_mm + +# Calculate log2 size column +gbcm_combo_data$log2_size <- log2(gbcm_combo_data$size) +#print(gbcm_combo_data$log2_size) + + +gbcm_combo_data$Var1 <- factor(gbcm_combo_data$Var1, levels = mixedsort(unique(gbcm_combo_data$Var1))) +gbcm_combo_data$Var2 <- factor(gbcm_combo_data$Var2, levels = mixedsort(unique(gbcm_combo_data$Var2))) + +p <- ggplot(gbcm_combo_data, aes(x = Var1, y = Var2)) + + geom_tile(color = "black", linewidth = 0.5, fill = NA) + + geom_point(aes(size = log2_size, fill = value), shape = 21, color = "black") + + #geom_text(aes(label = size), color = "white", size = 4) + + scale_x_discrete(limits = sort(levels(gbcm_combo_data$Var1))) + + scale_y_discrete(limits = sort(levels(gbcm_combo_data$Var2))) + + scale_fill_viridis_c( + name = "Correlation", + option = "viridis", + direction = -1, + alpha = 0.75, + begin = 0, + end = 1, + limits = c(-1, 1), + guide = guide_colorbar(direction = "vertical", + title.position = "top" + )) + + scale_size_continuous(limits = c(0, 14.2), # known max of log2(size) + range = c(0, max_point_size), + name = "Sites (log2)", + guide = guide_legend(direction = "vertical", + title.position = "top") + ) + + #scale_size_identity(name = "Sites (log2)", + # guide = guide_legend(direction = "vertical", + # title.position = "top"), + # breaks = rescale(c(2, 5, 10, 14.2), to = c(1, 10), from = c(0, max_log2)), + # labels = c("2", "5", "10", "14.2")) + + + #scale_size_continuous(name = "Sites (log2)", + #range = c(4, 32), + #guide = guide_legend(direction = "vertical", + #title.position = "top")) + + labs(title = title) + + theme_minimal() + + theme( + panel.grid = element_blank(), + axis.text.x = element_text(angle = 90, hjust = 1, size = 10, color = "black"), + axis.text.y = element_text(size = 10, color = "black"), + axis.title = element_blank(), + plot.title = element_text(hjust = 0.5, size = 20, margin = margin(b = 15)), + legend.position = "right", + legend.box = "horizontal", + legend.box.just = "left", + legend.title.align = 0.5, + legend.spacing.x = unit(1, "cm"), + aspect.ratio = 1 + ) + +p2 <- ggplot(gbcm_combo_data, aes(x = Var1, y = Var2)) + + geom_tile(color = "black", linewidth = 0.5, fill = NA) + + geom_point_interactive( + aes(size = log2_size, + fill = value, + tooltip = paste0( + "x: ", Var1, "\n", + "y: ", Var2, "\n", + "Size: ", size, "\n", + "Correlation: ", round(value, 2) + )), + shape = 21, + color = "black" + ) + + scale_x_discrete(limits = sort(levels(gbcm_combo_data$Var1))) + + scale_y_discrete(limits = sort(levels(gbcm_combo_data$Var2))) + + scale_fill_viridis_c( + name = "Correlation", + option = "viridis", + direction = -1, + alpha = 0.75, + begin = 0, + end = 1, + limits = c(-1, 1), + guide = guide_colorbar(direction = "vertical", + title.position = "top" + )) + + scale_size_continuous(limits = c(0, 14.2), # known max of log2(size) + range = c(0, max_point_size), + name = "Sites (log2)", + guide = guide_legend(direction = "vertical", + title.position = "top") + ) + + labs(title = title) + + theme_minimal() + + theme( + text = element_text(family = "Courier"), + panel.grid = element_blank(), + axis.text.x = element_text(angle = 90, hjust = 1, size = 10, color = "black"), + axis.text.y = element_text(size = 10, color = "black"), + axis.title = element_blank(), + plot.title = element_text(hjust = 0.5, size = 24, margin = margin(b = 15)), + legend.position = "right", + legend.box = "horizontal", + legend.box.just = "left", + legend.title.align = 0.5, + legend.spacing.x = unit(1, "cm"), + aspect.ratio = 1, + ) + + +pg = girafe(ggobj = p2, width_svg = 25, height_svg = 25, + options = list(opts_tooltip(css = "padding:5pt; font-size:16pt; color:white; background-color:black;"))) + +saveWidget(pg, paste(outdir,"/",sample,'_interactive4.html', sep = ""), selfcontained = TRUE) + + +ggsave(paste(outdir,"/",sample,'_gbcm_sample-to-sample4.pdf', sep = ""), plot = p, width = 25, height = 25, units = "in", device = cairo_pdf) +write.table(gbcm_observation, paste(outdir,"/",sample,'_observations.tab', sep = ''), sep = '\t') diff --git a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test new file mode 100644 index 00000000..e6231546 --- /dev/null +++ b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test @@ -0,0 +1,109 @@ +nextflow_process { + + name "Test Process CUSTOM_FINGERPRINTCORRELATION" + script "../main.nf" + process "CUSTOM_FINGERPRINTCORRELATION" + config "./nextflow.config" + + tag "modules" + tag "modules_msk" + tag "custom" + tag "custom/fingerprintcorrelation" + tag "custom/fingerprintcombine" + tag "gbcms" + tag "custom/fingerprintvcfparser" + + test("sarscov2 - bam") { + setup { + run("GBCMS"){ + script "../../../gbcms/main.nf" + process { + """ + input[0] = Channel.of( + [ + [ id:'test', sample:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + "variant_file.vcf" + ], + [ + [ id:'test2', sample:'test2' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + "variant_file.vcf" + ], + ) + input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + """ + } + } + run("CUSTOM_FINGERPRINTVCFPARSER"){ + script "../../fingerprintvcfparser/main.nf" + process { + """ + input[0] = GBCMS.out.variant_file + """ + } + } + run("CUSTOM_FINGERPRINTCOMBINE"){ + script "../../fingerprintcombine/main.nf" + process { + """ + input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv + .map{ meta, tsv -> + def meta2 = [id:meta.pool] + [[id:meta.pool], tsv, meta.id, "hg19"] + }.groupTuple(by:[0]) + input[1] = file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true) + """ + } + } + } + when { + process { + """ + input[0] = CUSTOM_FINGERPRINTCOMBINE.out.combined_fp_tsv + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.observations_tab, + process.out.versions_fingerprintcorrelation + ).match() } + ) + } + + } + + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [[id:'thispool'], file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true)] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.observations_tab, + process.out.versions_fingerprintcorrelation + ).match() } + ) + } + + } + +} diff --git a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap new file mode 100644 index 00000000..98ee5274 --- /dev/null +++ b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap @@ -0,0 +1,50 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + [ + [ + { + "id": "thispool" + }, + "thispool_observations.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + [ + "CUSTOM_FINGERPRINTCORRELATION", + "plot_gbcm.R", + "0.1.0" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-17T12:40:10.210681589" + }, + "sarscov2 - bam": { + "content": [ + [ + [ + { + "id": null + }, + "batch_observations.tab:md5,858d6d115a4da81652bb98dcc8b8077f" + ] + ], + [ + [ + "CUSTOM_FINGERPRINTCORRELATION", + "plot_gbcm.R", + "0.1.0" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-17T12:40:03.812030372" + } +} \ No newline at end of file diff --git a/modules/msk/custom/fingerprintcorrelation/tests/nextflow.config b/modules/msk/custom/fingerprintcorrelation/tests/nextflow.config new file mode 100644 index 00000000..b676d906 --- /dev/null +++ b/modules/msk/custom/fingerprintcorrelation/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: 'CUSTOM_FINGERPRINTCOMBINE' { + ext.args = "-d 0" + } + withName: 'CUSTOM_FINGERPRINTVCFPARSER' { + ext.args = "-d 0" + } +} diff --git a/modules/msk/custom/fingerprintvcfparser/main.nf b/modules/msk/custom/fingerprintvcfparser/main.nf index 6a46b512..b5924ce8 100644 --- a/modules/msk/custom/fingerprintvcfparser/main.nf +++ b/modules/msk/custom/fingerprintvcfparser/main.nf @@ -11,8 +11,8 @@ process CUSTOM_FINGERPRINTVCFPARSER { tuple val(meta), path(vcf) output: - tuple val(meta), path("${prefix}.fp.tsv"), emit: tsv - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}.fp.tsv") , emit: tsv + tuple val("${task.process}"), val('parse_fingerprint_vcf.py'), eval('parse_fingerprint_vcf.py -v | cut -f 2 -d" "'), emit: versions_fingerprintvcfparser, topic: versions when: task.ext.when == null || task.ext.when @@ -27,10 +27,6 @@ process CUSTOM_FINGERPRINTVCFPARSER { --samplename ${prefix} \\ $args - cat <<-END_VERSIONS > versions.yml - "${task.process}": - parse_fingerprint_vcf.py: 0.1.0 - END_VERSIONS """ stub: @@ -41,9 +37,5 @@ process CUSTOM_FINGERPRINTVCFPARSER { touch ${prefix}.fp.tsv - cat <<-END_VERSIONS > versions.yml - "${task.process}": - parse_fingerprint_vcf.py: 0.1.0 - END_VERSIONS """ } diff --git a/modules/msk/custom/fingerprintvcfparser/meta.yml b/modules/msk/custom/fingerprintvcfparser/meta.yml index d4ae3ed7..922f1504 100644 --- a/modules/msk/custom/fingerprintvcfparser/meta.yml +++ b/modules/msk/custom/fingerprintvcfparser/meta.yml @@ -1,21 +1,24 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "custom_fingerprintvcfparser" -description: Custom script to parse fingerprint VCF files, generated by the GBCMS module. +description: + Custom script to parse fingerprint VCF files, generated by the GBCMS + module. keywords: -- custom -- fingerprint -- vcf -- pysam + - custom + - fingerprint + - vcf + - pysam tools: -- "custom": - description: "Pysam is a Python module for reading and manipulating SAM/BAM/VCF/BCF - files. It's a lightweight wrapper of the htslib C-API, the same one that powers - samtools, bcftools, and tabix." - homepage: "https://pysam.readthedocs.io/en/latest/api.html" - documentation: "https://pysam.readthedocs.io/en/latest/api.html" - tool_dev_url: "https://github.com/pysam-developers/pysam" - licence: ['MIT'] - identifier: biotools:pysam + - "custom": + description: + "Pysam is a Python module for reading and manipulating SAM/BAM/VCF/BCF + files. It's a lightweight wrapper of the htslib C-API, the same one that powers + samtools, bcftools, and tabix." + homepage: "https://pysam.readthedocs.io/en/latest/api.html" + documentation: "https://pysam.readthedocs.io/en/latest/api.html" + tool_dev_url: "https://github.com/pysam-developers/pysam" + licence: ["MIT"] + identifier: biotools:pysam input: - - meta: @@ -38,19 +41,18 @@ output: e.g. [ id:'test' ] - ${prefix}.fp.tsv: type: file - description: Tab-separated values (TSV) file containing parsed fingerprint data + description: + Tab-separated values (TSV) file containing parsed fingerprint + data pattern: "${prefix}.fp.tsv" ontologies: - - edam: http://edamontology.org/format_3475 # TSV + - edam: http://edamontology.org/format_3475 # TSV - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: versions.yml - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + versions_fingerprintvcfparser: + - - ${task.process}: {} + - parse_fingerprint_vcf.py: {} + - 'parse_fingerprint_vcf.py -v | cut -f 2 -d" ': {} authors: -- "@anoronh4" + - "@anoronh4" maintainers: -- "@anoronh4" + - "@anoronh4" diff --git a/modules/msk/custom/fingerprintvcfparser/resources/usr/bin/parse_fingerprint_vcf.py b/modules/msk/custom/fingerprintvcfparser/resources/usr/bin/parse_fingerprint_vcf.py index 851bd498..b4ddd044 100755 --- a/modules/msk/custom/fingerprintvcfparser/resources/usr/bin/parse_fingerprint_vcf.py +++ b/modules/msk/custom/fingerprintvcfparser/resources/usr/bin/parse_fingerprint_vcf.py @@ -15,11 +15,12 @@ from itertools import groupby def usage(): - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(prog='parse_fingerprint_vcf.py') parser.add_argument('--input','-i', help = 'input file', required = True) parser.add_argument('--samplename','-n', help = 'sample name', required = True) parser.add_argument('--output','-o', help = 'output file', required = True) parser.add_argument('--depth-filter','-d', default = 20, type = int, help = 'minimum read depth for outputting a minor allele frequency [default = 20]') + parser.add_argument('--version','-v',action='version',version='%(prog)s ' + __version__, help="Show program's version number and exit.") return parser.parse_args() def main(): diff --git a/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test.snap b/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test.snap index 5751d885..fb734f9f 100644 --- a/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test.snap +++ b/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test.snap @@ -12,7 +12,11 @@ ] ], "1": [ - "versions.yml:md5,c8c9b3fa1b9110ca83a71490a317f2fa" + [ + "CUSTOM_FINGERPRINTVCFPARSER", + "parse_fingerprint_vcf.py", + "0.1.0" + ] ], "tsv": [ [ @@ -23,16 +27,20 @@ "test.fp.tsv:md5,9fa9a081f17ee52f03463c96d46a23aa" ] ], - "versions": [ - "versions.yml:md5,c8c9b3fa1b9110ca83a71490a317f2fa" + "versions_fingerprintvcfparser": [ + [ + "CUSTOM_FINGERPRINTVCFPARSER", + "parse_fingerprint_vcf.py", + "0.1.0" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-08-12T15:07:39.656085692" + "timestamp": "2025-12-17T13:02:44.951823372" }, "sarscov2 - vcf - stub": { "content": [ @@ -47,7 +55,11 @@ ] ], "1": [ - "versions.yml:md5,c8c9b3fa1b9110ca83a71490a317f2fa" + [ + "CUSTOM_FINGERPRINTVCFPARSER", + "parse_fingerprint_vcf.py", + "0.1.0" + ] ], "tsv": [ [ @@ -58,15 +70,19 @@ "test.fp.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,c8c9b3fa1b9110ca83a71490a317f2fa" + "versions_fingerprintvcfparser": [ + [ + "CUSTOM_FINGERPRINTVCFPARSER", + "parse_fingerprint_vcf.py", + "0.1.0" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-08-12T15:07:46.098292727" + "timestamp": "2025-12-17T13:02:51.967429606" } } \ No newline at end of file diff --git a/modules/msk/gbcms/main.nf b/modules/msk/gbcms/main.nf index 38922559..bd0d8dbf 100644 --- a/modules/msk/gbcms/main.nf +++ b/modules/msk/gbcms/main.nf @@ -12,7 +12,7 @@ process GBCMS { output: tuple val(meta), path('*.{vcf,maf}'), emit: variant_file - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('gbcms'), eval("GetBaseCountsMultiSample --help | grep -oP '[0-9]\\.[0-9]\\.[0-9]'"), emit: versions_gbcms, topic: versions when: task.ext.when == null || task.ext.when @@ -44,10 +44,6 @@ process GBCMS { --output ${output} \\ --bam $sample:${bam} $args - cat <<-END_VERSIONS > versions.yml - "${task.process}": - GetBaseCountsMultiSample: \$(echo \$(GetBaseCountsMultiSample --help) | grep -oP '[0-9]\\.[0-9]\\.[0-9]') - END_VERSIONS """ stub: @@ -56,9 +52,5 @@ process GBCMS { """ touch variant_file.maf - cat <<-END_VERSIONS > versions.yml - "${task.process}": - GetBaseCountsMultiSample: 1.2.5 - END_VERSIONS """ } diff --git a/modules/msk/gbcms/meta.yml b/modules/msk/gbcms/meta.yml index 170a3e3c..a782f77a 100644 --- a/modules/msk/gbcms/meta.yml +++ b/modules/msk/gbcms/meta.yml @@ -14,10 +14,9 @@ tools: in a given VCF file or MAF file" homepage: "https://github.com/msk-access/GetBaseCountsMultiSample" documentation: "https://github.com/msk-access/GetBaseCountsMultiSample/blob/master/README.md" - identifier: "" + input: - # Only when we have meta - - meta: type: map description: | @@ -29,45 +28,65 @@ input: Input bam file, in the format of SAMPLE_NAME:BAM_FILE. This paramter need to be specified at least once pattern: "*.bam" + ontologies: [] - bambai: type: file description: Index of Bam pattern: "*.bai" + ontologies: [] - variant_file: type: file description: Input variant file in TCGA maf format. --maf or --vcf need to be specified at least once. But --maf and --vcf are mutually exclusive pattern: "*.{maf,vcf}" + ontologies: [] - output: type: string description: Output file - - - fasta: - type: file - description: Input reference sequence file - pattern: "*.fasta" - - - fastafai: - type: file - description: Index of the reference Fasta - pattern: "*.fai" + - fasta: + type: file + description: Input reference sequence file + pattern: "*.fasta" + ontologies: [] + - fastafai: + type: file + description: Index of the reference Fasta + pattern: "*.fai" + + ontologies: [] output: - - variant_file: - - meta: - type: file - description: - base counts in multiple BAM files for all the sites in a given - VCF file or MAF file - pattern: "*.{vcf,maf}" + variant_file: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` - "*.{vcf,maf}": type: file description: base counts in multiple BAM files for all the sites in a given VCF file or MAF file pattern: "*.{vcf,maf}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_gbcms: + - - ${task.process}: + type: string + description: The name of the process + - gbcms: + type: string + description: The name of the tool + - GetBaseCountsMultiSample --help | grep -oP '[0-9]\\.[0-9]\\.[0-9]': {} +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - gbcms: + type: string + description: The name of the tool + - GetBaseCountsMultiSample --help | grep -oP '[0-9]\\.[0-9]\\.[0-9]': + type: eval + description: The expression to obtain the version of the tool authors: - "@buehlere" diff --git a/modules/msk/gbcms/tests/main.nf.test.snap b/modules/msk/gbcms/tests/main.nf.test.snap index 31b547e2..60ff40c5 100644 --- a/modules/msk/gbcms/tests/main.nf.test.snap +++ b/modules/msk/gbcms/tests/main.nf.test.snap @@ -12,7 +12,11 @@ ] ], "1": [ - "versions.yml:md5,a94265ed3bc4b5631d85b9b9b5d2b7e5" + [ + "GBCMS", + "gbcms", + "1.2.4" + ] ], "variant_file": [ [ @@ -23,15 +27,19 @@ "variant_file.vcf:md5,28c8df33c7ea5ed5d1cf9997d8e00ffa" ] ], - "versions": [ - "versions.yml:md5,a94265ed3bc4b5631d85b9b9b5d2b7e5" + "versions_gbcms": [ + [ + "GBCMS", + "gbcms", + "1.2.4" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-02-13T17:19:51.302342" + "timestamp": "2025-12-17T12:07:13.813792199" } } \ No newline at end of file diff --git a/subworkflows/msk/fingerprint_gbcms/main.nf b/subworkflows/msk/fingerprint_gbcms/main.nf index 5f67f594..d6c90f14 100644 --- a/subworkflows/msk/fingerprint_gbcms/main.nf +++ b/subworkflows/msk/fingerprint_gbcms/main.nf @@ -18,8 +18,6 @@ workflow FINGERPRINT_GBCMS { main: - ch_versions = Channel.empty() - println ch_fp_loci_vcf.getClass() println ch_fasta.getClass() @@ -33,16 +31,13 @@ workflow FINGERPRINT_GBCMS { //ch_fasta.view().map{ if (it[0] instanceof Map){ it[1] } else { it }}.first(), //ch_fastafai.view().map{ if (it[0] instanceof Map){ it[1] } else { it }}.first() ) - ch_versions = ch_versions.mix(GBCMS.out.versions.first()) CUSTOM_FINGERPRINTVCFPARSER ( GBCMS.out.variant_file ) - ch_versions = ch_versions.mix(CUSTOM_FINGERPRINTVCFPARSER.out.versions.first()) all_fps = CUSTOM_FINGERPRINTVCFPARSER.out.tsv.mix(ch_fp_tsv) - paired_fps = all_fps .filter{ meta, tsv -> meta.case_id != null && meta.control_id != null && meta.id == meta.case_id } .combine(all_fps) @@ -57,7 +52,6 @@ workflow FINGERPRINT_GBCMS { .map{ meta, tsv -> [ meta, tsv, [] ] } CUSTOM_FINGERPRINTCONTAMINATION ( paired_fps.mix(unpaired_fps).view() ) - ch_versions = ch_versions.mix(CUSTOM_FINGERPRINTCONTAMINATION.out.versions.first()) if (run_correlation) { FINGERPRINT_GBCMS_BATCH ( @@ -65,7 +59,6 @@ workflow FINGERPRINT_GBCMS { ch_liftover_loci_mapping, default_genome ) - ch_versions = ch_versions.mix(FINGERPRINT_GBCMS_BATCH.out.versions.first()) } else { FINGERPRINT_GBCMS_BATCH.out.combined_fp_tsv = Channel.empty() } @@ -74,6 +67,5 @@ workflow FINGERPRINT_GBCMS { fp_tsv = CUSTOM_FINGERPRINTVCFPARSER.out.tsv // channel: [ val(meta), tsv ] contamination_tsv = CUSTOM_FINGERPRINTCONTAMINATION.out.contamination_tsv // channel: [ val(meta), contamination_tsv ] combined_fp_tsv = FINGERPRINT_GBCMS_BATCH.out.combined_fp_tsv // channel: [ tsv ] - versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/msk/fingerprint_gbcms/meta.yml b/subworkflows/msk/fingerprint_gbcms/meta.yml index c0002dbb..8deac1af 100644 --- a/subworkflows/msk/fingerprint_gbcms/meta.yml +++ b/subworkflows/msk/fingerprint_gbcms/meta.yml @@ -13,6 +13,8 @@ keywords: components: - gbcms - custom/fingerprintvcfparser + - custom/fingerprintcontamination + - fingerprint_gbcms_batch input: - ch_bam: type: file diff --git a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test index 46a8bdd8..171495b5 100644 --- a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test +++ b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test @@ -11,6 +11,7 @@ nextflow_workflow { tag "subworkflows/fingerprint_gbcms_batch" tag "gbcms" tag "custom/fingerprintvcfparser" + tag "custom/fingerprintcontamination" test("sarscov2 - bam") { diff --git a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap index 492f6ee5..4f6067f4 100644 --- a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap +++ b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap @@ -35,16 +35,20 @@ ] ], "2": [ - "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" - ], - "3": [ - "versions.yml:md5,1d2160eb0eb102d00d5786a8d056328e", - "versions.yml:md5,41ff30ed71b1d19e95a6095b9ac3ca94", - "versions.yml:md5,4b4b3ad40aa1c2c3a002c0c347c385b8", - "versions.yml:md5,4fb29c6ff25ce4e29f6cf293a70aa8e6" + [ + { + "id": "defaultbatch" + }, + "0DPfilter_ALL_FP.txt:md5,21ecaf823768ac5d6787fa6a6b2aca37" + ] ], "combined_fp_tsv": [ - "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" + [ + { + "id": "defaultbatch" + }, + "0DPfilter_ALL_FP.txt:md5,21ecaf823768ac5d6787fa6a6b2aca37" + ] ], "contamination_tsv": [ [ @@ -77,19 +81,13 @@ }, "test2.fp.tsv:md5,c3fbcee584048e9bc4fc93bc6ca487d2" ] - ], - "versions": [ - "versions.yml:md5,1d2160eb0eb102d00d5786a8d056328e", - "versions.yml:md5,41ff30ed71b1d19e95a6095b9ac3ca94", - "versions.yml:md5,4b4b3ad40aa1c2c3a002c0c347c385b8", - "versions.yml:md5,4fb29c6ff25ce4e29f6cf293a70aa8e6" ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-26T11:05:45.091814897" + "timestamp": "2025-12-17T13:30:35.667469411" } } \ No newline at end of file diff --git a/subworkflows/msk/fingerprint_gbcms_batch/main.nf b/subworkflows/msk/fingerprint_gbcms_batch/main.nf index 2b42fe0c..e0df5c92 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/main.nf +++ b/subworkflows/msk/fingerprint_gbcms_batch/main.nf @@ -1,5 +1,5 @@ - -include { CUSTOM_FINGERPRINTCOMBINE } from '../../../modules/msk/custom/fingerprintcombine/main' +include { CUSTOM_FINGERPRINTCOMBINE } from '../../../modules/msk/custom/fingerprintcombine/main' +include { CUSTOM_FINGERPRINTCORRELATION } from '../../../modules/msk/custom/fingerprintcorrelation/main' workflow FINGERPRINT_GBCMS_BATCH { @@ -10,9 +10,6 @@ workflow FINGERPRINT_GBCMS_BATCH { main: - ch_versions = Channel.empty() - - CUSTOM_FINGERPRINTCOMBINE( ch_fp .map{ meta, tsv -> @@ -24,9 +21,11 @@ workflow FINGERPRINT_GBCMS_BATCH { }.groupTuple(by:[0]), ch_liftover_loci_mapping.first() ) - ch_versions = ch_versions.mix(CUSTOM_FINGERPRINTCOMBINE.out.versions.first()) + + CUSTOM_FINGERPRINTCORRELATION( + CUSTOM_FINGERPRINTCOMBINE.out.combined_fp_tsv + ) emit: combined_fp_tsv = CUSTOM_FINGERPRINTCOMBINE.out.combined_fp_tsv // channel: [ val(meta), [ bam ] ] - versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/msk/fingerprint_gbcms_batch/meta.yml b/subworkflows/msk/fingerprint_gbcms_batch/meta.yml index ca573350..1646d1c9 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/meta.yml +++ b/subworkflows/msk/fingerprint_gbcms_batch/meta.yml @@ -8,7 +8,8 @@ keywords: - batch - pool components: - - modules/msk/custom/fingerprintcombine + - custom/fingerprintcombine + - custom/fingerprintcorrelation input: - ch_fp: type: file diff --git a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test index 8500dcfd..c1bd3701 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test +++ b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test @@ -12,6 +12,7 @@ nextflow_workflow { tag "gbcms" tag "custom/fingerprintvcfparser" tag "custom/fingerprintcombine" + tag "custom/fingerprintcorrelation" test("sarscov2 - bam - single_end") { diff --git a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap index 7611bc83..3441d510 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap +++ b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap @@ -3,23 +3,27 @@ "content": [ { "0": [ - "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" - ], - "1": [ - "versions.yml:md5,bc54e025756d97cd9b14d51a3c9e3667" + [ + { + "id": "defaultbatch" + }, + "0DPfilter_ALL_FP.txt:md5,66113c255cf1f52e27802183764a406d" + ] ], "combined_fp_tsv": [ - "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" - ], - "versions": [ - "versions.yml:md5,bc54e025756d97cd9b14d51a3c9e3667" + [ + { + "id": "defaultbatch" + }, + "0DPfilter_ALL_FP.txt:md5,66113c255cf1f52e27802183764a406d" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-25T23:49:26.050835746" + "timestamp": "2025-12-17T13:30:57.724162129" } } \ No newline at end of file From 54fff1bcae4ce676a70b20276bb91e70697a354c Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Wed, 17 Dec 2025 20:28:30 -0500 Subject: [PATCH 28/37] fix failing test --- .../msk/custom/fingerprintcombine/tests/main.nf.test.snap | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap b/modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap index 68a0b5c4..85f90edd 100644 --- a/modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap +++ b/modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap @@ -25,7 +25,7 @@ "XDPfilter_ALL_FP.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions_fingerprintvcfparser": [ + "versions_fingerprintcombine": [ [ "CUSTOM_FINGERPRINTCOMBINE", "complete_FP_table.R", @@ -38,7 +38,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2025-12-17T13:28:48.061258305" + "timestamp": "2025-12-17T20:26:07.925718004" }, "sarscov2 - bam": { "content": [ @@ -66,7 +66,7 @@ "0DPfilter_ALL_FP.txt:md5,66113c255cf1f52e27802183764a406d" ] ], - "versions_fingerprintvcfparser": [ + "versions_fingerprintcombine": [ [ "CUSTOM_FINGERPRINTCOMBINE", "complete_FP_table.R", @@ -79,6 +79,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2025-12-17T13:28:39.908034467" + "timestamp": "2025-12-17T20:25:58.985229402" } } \ No newline at end of file From 53a03e7847ddfff0de5a71564d6286fda1a33f74 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Wed, 17 Dec 2025 20:31:09 -0500 Subject: [PATCH 29/37] exclude one more module from conda tests --- .github/skip_nf_test.json | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/skip_nf_test.json b/.github/skip_nf_test.json index b2ec96b8..8b5688a8 100644 --- a/.github/skip_nf_test.json +++ b/.github/skip_nf_test.json @@ -3,6 +3,7 @@ "modules/msk/custom/fingerprintvcfparser", "modules/msk/custom/fingerprintcontamination", "modules/msk/custom/fingerprintcombine", + "modules/msk/custom/fingerprintcorrelation", "modules/msk/calculatenoise", "modules/msk/ppflagfixer", "modules/msk/facets", From 8a596c9921a7aa4f94e99e10671e57050501a296 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Thu, 18 Dec 2025 20:23:34 -0500 Subject: [PATCH 30/37] update custom/fingerprintcorrelation to output table with correlation values --- .../msk/custom/fingerprintcorrelation/main.nf | 8 +++++--- .../resources/usr/bin/plot_gbcm.R | 1 + .../fingerprintcorrelation/tests/main.nf.test | 2 ++ .../tests/main.nf.test.snap | 20 +++++++++++++++++-- 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/modules/msk/custom/fingerprintcorrelation/main.nf b/modules/msk/custom/fingerprintcorrelation/main.nf index eafc51a7..fbd5cfeb 100644 --- a/modules/msk/custom/fingerprintcorrelation/main.nf +++ b/modules/msk/custom/fingerprintcorrelation/main.nf @@ -12,9 +12,10 @@ process CUSTOM_FINGERPRINTCORRELATION { tuple val(meta), path(combined_fp_tsv) output: - tuple val(meta), path("*_gbcm_sample-to-sample4.pdf"), emit: heatmap_pdf - tuple val(meta), path("*_interactive4.html"), emit: heatmap_html - tuple val(meta), path("*_observations.tab"), emit: observations_tab + tuple val(meta), path("*_gbcm_sample-to-sample4.pdf") , emit: heatmap_pdf + tuple val(meta), path("*_interactive4.html") , emit: heatmap_html + tuple val(meta), path("*_observations.tab") , emit: observations_tab + tuple val(meta), path("*_correlations.tab") , emit: correlations_tab tuple val("${task.process}"), val('plot_gbcm.R'), val("0.1.0"), topic: versions, emit: versions_fingerprintcorrelation when: @@ -37,5 +38,6 @@ process CUSTOM_FINGERPRINTCORRELATION { touch ${prefix}_gbcm_sample-to-sample4.pdf touch ${prefix}_interactive4.html touch ${prefix}_observations.tab + touch ${prefix}_correlations.tab """ } diff --git a/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R b/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R index 5ba3460c..8fbf5971 100755 --- a/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R +++ b/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R @@ -61,6 +61,7 @@ title = paste("Patient:", sample,"; ", nrow(all_fp_gbcm_final)," Loci used",sep all_fp_gbcm_final_matrix <- data.matrix(all_fp_gbcm_final) all_fp_gbcm_final_matrix = cor(as.matrix(all_fp_gbcm_final_matrix), method = c("pearson"), use = "pairwise.complete.obs") +write.table(all_fp_gbcm_final_matrix, paste(outdir,"/",sample,'_correlations.tab', sep = ''), sep = '\t',quote=F) gbcm_data_long <- reshape2::melt(all_fp_gbcm_final_matrix) gbcm_observation = crossprod(!is.na(all_fp_gbcm_final)) diff --git a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test index e6231546..b23d8356 100644 --- a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test +++ b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test @@ -74,6 +74,7 @@ nextflow_process { assert process.success assertAll( { assert snapshot( + process.out.correlations_tab, process.out.observations_tab, process.out.versions_fingerprintcorrelation ).match() } @@ -98,6 +99,7 @@ nextflow_process { assert process.success assertAll( { assert snapshot( + process.out.correlations_tab, process.out.observations_tab, process.out.versions_fingerprintcorrelation ).match() } diff --git a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap index 98ee5274..c261cb94 100644 --- a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap +++ b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap @@ -1,6 +1,14 @@ { "sarscov2 - bam - stub": { "content": [ + [ + [ + { + "id": "thispool" + }, + "thispool_correlations.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], [ [ { @@ -21,10 +29,18 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2025-12-17T12:40:10.210681589" + "timestamp": "2025-12-18T20:20:30.919363465" }, "sarscov2 - bam": { "content": [ + [ + [ + { + "id": null + }, + "batch_correlations.tab:md5,4622cb8a7eff25e7bbd28ed23b74b239" + ] + ], [ [ { @@ -45,6 +61,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2025-12-17T12:40:03.812030372" + "timestamp": "2025-12-18T20:20:24.501583538" } } \ No newline at end of file From 51047f1752fa13cdd98fe9aaaaa3b31a97b2c6af Mon Sep 17 00:00:00 2001 From: NoronhaA Date: Thu, 8 Jan 2026 22:58:19 -0500 Subject: [PATCH 31/37] add grouping logic for ordering samples in fingerprintcombine --- modules/msk/custom/fingerprintcombine/main.nf | 9 ++++++--- .../resources/usr/bin/complete_FP_table.R | 3 ++- subworkflows/msk/fingerprint_gbcms_batch/main.nf | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/modules/msk/custom/fingerprintcombine/main.nf b/modules/msk/custom/fingerprintcombine/main.nf index 121beb5f..47a32ba7 100644 --- a/modules/msk/custom/fingerprintcombine/main.nf +++ b/modules/msk/custom/fingerprintcombine/main.nf @@ -10,7 +10,7 @@ process CUSTOM_FINGERPRINTCOMBINE { input: - tuple val(meta), path(fp_tsv), val(sample), val(genome_build) + tuple val(meta), path(fp_tsv), val(sample), val(genome_build), val(group) path(liftover_loci_mapping) output: @@ -26,15 +26,18 @@ process CUSTOM_FINGERPRINTCOMBINE { declare -a fp_tsv_list declare -a sample_list declare -a genome_build_list + declare -a group_list fp_tsv_list=(${fp_tsv.join(' ')}) sample_list=(${sample.join(' ')}) genome_build_list=(${genome_build.join(' ')}) - echo -e "sample_id\tgenome_build\tfp_tsv" > input.tsv + group_list=(${group.join(' ')}) + echo -e "sample_id\tgenome_build\tfp_tsv\tgroup" > input.tsv for i in \$(seq 0 1 \$((\${#fp_tsv_list[@]}-1)) ) ; do fp_tsv=\${fp_tsv_list[i]} sample=\${sample_list[i]} genome=\${genome_build_list[i]} - echo -e "\$sample\t\$genome\t\$fp_tsv" + group=\${group_list[i]} + echo -e "\$sample\t\$genome\t\$fp_tsv\t\$group" done >> input.tsv complete_FP_table.R \\ diff --git a/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R b/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R index 7a8e3ad5..b9d4567f 100755 --- a/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R +++ b/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R @@ -49,7 +49,7 @@ hg19_hg38_mapper$Loci_hg38 = paste(hg19_hg38_mapper$GRCH38_CHROM,hg19_hg38_mappe hg19_hg38_mapper = hg19_hg38_mapper %>% select(Loci_hg19, Loci_hg38) %>% unique() message("Loading Samples") -input_table = fread(args$input_table, header = T) %>% arrange(sample_id) +input_table = fread(args$input_table, header = T) %>% arrange(group, sample_id) for (i in 1:nrow(input_table)){ sample = input_table$sample_id[i] genome_build = input_table$genome_build[i] @@ -103,6 +103,7 @@ if (!dir.exists(args$analysis_folder)) { message(paste("Output file: ", args$analysis_folder,"/",args$depth_filter,"DPfilter_ALL_FP.txt", sep="")) +all_fp_gbcm_final <- apply(all_fp_gbcm_final,2,as.character) write.table(all_fp_gbcm_final, file = paste(args$analysis_folder,"/",args$depth_filter,"DPfilter_ALL_FP.txt", sep=""), append = F, sep = "\t", row.names = F, quote = F) message("FP file completed") diff --git a/subworkflows/msk/fingerprint_gbcms_batch/main.nf b/subworkflows/msk/fingerprint_gbcms_batch/main.nf index e0df5c92..9f2e10c3 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/main.nf +++ b/subworkflows/msk/fingerprint_gbcms_batch/main.nf @@ -17,7 +17,7 @@ workflow FINGERPRINT_GBCMS_BATCH { if (meta.pool) { meta2.id = meta.pool } - [meta2, tsv, meta.id, meta.genome ?: default_genome ] + [meta2, tsv, meta.id, meta.genome ?: default_genome, meta.group ?: "default" ] }.groupTuple(by:[0]), ch_liftover_loci_mapping.first() ) From f45b4d99e1a09a44a1a84f6b2a449bde5355b265 Mon Sep 17 00:00:00 2001 From: NoronhaA Date: Thu, 8 Jan 2026 23:01:08 -0500 Subject: [PATCH 32/37] add logic to handle exception for when denominator of fraction is zero --- .../resources/usr/bin/calculate_contamination.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py b/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py index 3f5ece93..23febaff 100755 --- a/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py +++ b/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py @@ -23,7 +23,10 @@ def major_contamination(tumor, depth_filter): homozygous = ['AA','CC','GG','TT','A','C','G','T'] heterozygous = ~tumor_filtered['Genotype'].isin(homozygous) - return sum(heterozygous)/tumor_filtered.shape[0] + try: + return sum(heterozygous)/tumor_filtered.shape[0] + except Exception as e: + return 0 def get_coverage(file, depth_filter): #print(file['Alleles'].str.split(' ', expand=True)) From 18db500ec5f5f3a1fdae3787ff15bb76f11839d1 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Thu, 8 Jan 2026 23:38:30 -0500 Subject: [PATCH 33/37] fix failing nf-tests --- modules/msk/custom/fingerprintcombine/tests/main.nf.test | 5 +++-- .../msk/custom/fingerprintcorrelation/tests/main.nf.test | 2 +- subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap | 6 +++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/modules/msk/custom/fingerprintcombine/tests/main.nf.test b/modules/msk/custom/fingerprintcombine/tests/main.nf.test index 03b3388b..0cb6e4d9 100644 --- a/modules/msk/custom/fingerprintcombine/tests/main.nf.test +++ b/modules/msk/custom/fingerprintcombine/tests/main.nf.test @@ -57,7 +57,7 @@ nextflow_process { input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv .map{ meta, tsv -> println meta - [[id:meta.pool], tsv, meta.id, "hg19"] + [[id:meta.pool], tsv, meta.id, "hg19","default"] }.groupTuple(by:[0]) input[1] = file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true) """ @@ -85,7 +85,8 @@ nextflow_process { [id:"testsample"], [file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true)], ["testsample"], - ["hg19"] + ["hg19"], + ["default"] ] input[1] = file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true) """ diff --git a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test index b23d8356..00d54193 100644 --- a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test +++ b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test @@ -55,7 +55,7 @@ nextflow_process { input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv .map{ meta, tsv -> def meta2 = [id:meta.pool] - [[id:meta.pool], tsv, meta.id, "hg19"] + [[id:meta.pool], tsv, meta.id, "hg19", "default"] }.groupTuple(by:[0]) input[1] = file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true) """ diff --git a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap index 4f6067f4..c0e01878 100644 --- a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap +++ b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap @@ -39,7 +39,7 @@ { "id": "defaultbatch" }, - "0DPfilter_ALL_FP.txt:md5,21ecaf823768ac5d6787fa6a6b2aca37" + "0DPfilter_ALL_FP.txt:md5,2b376a207fd1bd6bec55fa765e3a3947" ] ], "combined_fp_tsv": [ @@ -47,7 +47,7 @@ { "id": "defaultbatch" }, - "0DPfilter_ALL_FP.txt:md5,21ecaf823768ac5d6787fa6a6b2aca37" + "0DPfilter_ALL_FP.txt:md5,2b376a207fd1bd6bec55fa765e3a3947" ] ], "contamination_tsv": [ @@ -88,6 +88,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2025-12-17T13:30:35.667469411" + "timestamp": "2026-01-08T23:23:53.894051098" } } \ No newline at end of file From d6ecf3c586ad88a583b08f19bf05db3358207a18 Mon Sep 17 00:00:00 2001 From: NoronhaA Date: Fri, 30 Jan 2026 14:28:11 -0500 Subject: [PATCH 34/37] put process tag in double quotes --- modules/msk/custom/fingerprintcombine/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/msk/custom/fingerprintcombine/main.nf b/modules/msk/custom/fingerprintcombine/main.nf index 47a32ba7..60abf46d 100644 --- a/modules/msk/custom/fingerprintcombine/main.nf +++ b/modules/msk/custom/fingerprintcombine/main.nf @@ -1,5 +1,5 @@ process CUSTOM_FINGERPRINTCOMBINE { - tag '$meta.id' + tag "$meta.id" label 'process_single' conda "${moduleDir}/environment.yml" From 6d6c0e1a3f59f7cd9f968dd88d886659c157b2fc Mon Sep 17 00:00:00 2001 From: NoronhaA Date: Fri, 30 Jan 2026 14:28:58 -0500 Subject: [PATCH 35/37] change method of adding a column in order to handle empty table --- .../fingerprintcombine/resources/usr/bin/complete_FP_table.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R b/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R index b9d4567f..aa2869ef 100755 --- a/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R +++ b/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R @@ -68,7 +68,8 @@ for (i in 1:nrow(input_table)){ temp_dataset$DP = as.numeric(temp_dataset$DP1) + as.numeric(temp_dataset$DP2) temp_dataset = temp_dataset[temp_dataset$DP >= args$depth_filter,] ## keeping loci >= 20 dp by default temp_dataset$VAF[is.na(temp_dataset$VAF)==T] <- 0 - temp_dataset$Sample = sample #only loci with DP >= depth filter will have Sample info + #temp_dataset$Sample = sample #only loci with DP >= depth filter will have Sample info + temp_dataset$Sample <- rep(sample, nrow(temp_dataset)) temp_dataset = temp_dataset %>% select("Locus","Genotype","Sample","VAF") temp_dataset$Locus = str_replace(temp_dataset$Locus,"chr","") From acfa784029bb3ae2fd0b301de6264219dbff4146 Mon Sep 17 00:00:00 2001 From: NoronhaA Date: Fri, 30 Jan 2026 14:34:24 -0500 Subject: [PATCH 36/37] change output channel to include mix of run-computed and previously-computed FPs --- subworkflows/msk/fingerprint_gbcms/main.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/subworkflows/msk/fingerprint_gbcms/main.nf b/subworkflows/msk/fingerprint_gbcms/main.nf index d6c90f14..1cdd7ea9 100644 --- a/subworkflows/msk/fingerprint_gbcms/main.nf +++ b/subworkflows/msk/fingerprint_gbcms/main.nf @@ -64,7 +64,8 @@ workflow FINGERPRINT_GBCMS { } emit: - fp_tsv = CUSTOM_FINGERPRINTVCFPARSER.out.tsv // channel: [ val(meta), tsv ] + fp_tsv_from_bam = CUSTOM_FINGERPRINTVCFPARSER.out.tsv // channel: [ val(meta), tsv ] + fp_tsv = all_fps // channel: [ val(meta), tsv ] contamination_tsv = CUSTOM_FINGERPRINTCONTAMINATION.out.contamination_tsv // channel: [ val(meta), contamination_tsv ] combined_fp_tsv = FINGERPRINT_GBCMS_BATCH.out.combined_fp_tsv // channel: [ tsv ] From baff4ae32cebe7c098f0ee95016726cec26d5be8 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Thu, 12 Feb 2026 10:38:37 -0500 Subject: [PATCH 37/37] updates --- modules/msk/custom/fingerprintcombine/main.nf | 12 +- .../resources/usr/bin/complete_FP_table.R | 2 +- .../msk/custom/fingerprintcorrelation/main.nf | 12 +- .../resources/usr/bin/plot_gbcm.R | 331 +++++++++--------- .../fingerprintcorrelation/tests/main.nf.test | 2 + .../tests/main.nf.test.snap | 12 +- subworkflows/msk/fingerprint_gbcms/main.nf | 8 +- .../fingerprint_gbcms/tests/main.nf.test.snap | 38 +- .../msk/fingerprint_gbcms_batch/main.nf | 8 +- .../tests/main.nf.test | 2 + 10 files changed, 235 insertions(+), 192 deletions(-) diff --git a/modules/msk/custom/fingerprintcombine/main.nf b/modules/msk/custom/fingerprintcombine/main.nf index 60abf46d..ffb406d5 100644 --- a/modules/msk/custom/fingerprintcombine/main.nf +++ b/modules/msk/custom/fingerprintcombine/main.nf @@ -10,7 +10,7 @@ process CUSTOM_FINGERPRINTCOMBINE { input: - tuple val(meta), path(fp_tsv), val(sample), val(genome_build), val(group) + tuple val(meta), path(fp_tsv), val(sample), val(genome_build), val(patient) path(liftover_loci_mapping) output: @@ -26,18 +26,18 @@ process CUSTOM_FINGERPRINTCOMBINE { declare -a fp_tsv_list declare -a sample_list declare -a genome_build_list - declare -a group_list + declare -a patient_list fp_tsv_list=(${fp_tsv.join(' ')}) sample_list=(${sample.join(' ')}) genome_build_list=(${genome_build.join(' ')}) - group_list=(${group.join(' ')}) - echo -e "sample_id\tgenome_build\tfp_tsv\tgroup" > input.tsv + patient_list=(${patient.join(' ')}) + echo -e "sample_id\tgenome_build\tfp_tsv\tpatient" > input.tsv for i in \$(seq 0 1 \$((\${#fp_tsv_list[@]}-1)) ) ; do fp_tsv=\${fp_tsv_list[i]} sample=\${sample_list[i]} genome=\${genome_build_list[i]} - group=\${group_list[i]} - echo -e "\$sample\t\$genome\t\$fp_tsv\t\$group" + patient=\${patient_list[i]} + echo -e "\$sample\t\$genome\t\$fp_tsv\t\$patient" done >> input.tsv complete_FP_table.R \\ diff --git a/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R b/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R index aa2869ef..f551459e 100755 --- a/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R +++ b/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R @@ -49,7 +49,7 @@ hg19_hg38_mapper$Loci_hg38 = paste(hg19_hg38_mapper$GRCH38_CHROM,hg19_hg38_mappe hg19_hg38_mapper = hg19_hg38_mapper %>% select(Loci_hg19, Loci_hg38) %>% unique() message("Loading Samples") -input_table = fread(args$input_table, header = T) %>% arrange(group, sample_id) +input_table = fread(args$input_table, header = T) %>% arrange(patient, sample_id) for (i in 1:nrow(input_table)){ sample = input_table$sample_id[i] genome_build = input_table$genome_build[i] diff --git a/modules/msk/custom/fingerprintcorrelation/main.nf b/modules/msk/custom/fingerprintcorrelation/main.nf index fbd5cfeb..1eec8ff8 100644 --- a/modules/msk/custom/fingerprintcorrelation/main.nf +++ b/modules/msk/custom/fingerprintcorrelation/main.nf @@ -10,10 +10,11 @@ process CUSTOM_FINGERPRINTCORRELATION { input: tuple val(meta), path(combined_fp_tsv) + val(filter_term) output: - tuple val(meta), path("*_gbcm_sample-to-sample4.pdf") , emit: heatmap_pdf - tuple val(meta), path("*_interactive4.html") , emit: heatmap_html + tuple val(meta), path("*.pdf") , emit: heatmap_pdf + tuple val(meta), path("*.html") , emit: heatmap_html tuple val(meta), path("*_observations.tab") , emit: observations_tab tuple val(meta), path("*_correlations.tab") , emit: correlations_tab tuple val("${task.process}"), val('plot_gbcm.R'), val("0.1.0"), topic: versions, emit: versions_fingerprintcorrelation @@ -24,19 +25,20 @@ process CUSTOM_FINGERPRINTCORRELATION { script: def args = task.ext.args ?: '' def prefix = meta.id ?: "batch" + def filter_args = (filter_term && filter_term != "") ? "-p ${filter_term} -f" : "" """ plot_gbcm.R \\ -t ${combined_fp_tsv} \\ -o ./ \\ - -p ${prefix} + ${filter_args} """ stub: def args = task.ext.args ?: '' def prefix = meta.id ?: "batch" """ - touch ${prefix}_gbcm_sample-to-sample4.pdf - touch ${prefix}_interactive4.html + touch ${prefix}.pdf + touch ${prefix}.html touch ${prefix}_observations.tab touch ${prefix}_correlations.tab """ diff --git a/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R b/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R index 8fbf5971..7e985759 100755 --- a/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R +++ b/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R @@ -1,9 +1,10 @@ #!/usr/bin/env Rscript + #------------------------------------------------------------------------------- # Script: plot_gbcm.R # Author: Hanan Salim -# Date: 2025-11-03 -# Version: 0.1.0 +# Date: 2026-02-09 +# Version: 0.2.0 # # Description: This script takes in a wide fingerprinting table pertaining # to multiple samples and plots in pdf and html formats. @@ -12,8 +13,7 @@ # #------------------------------------------------------------------------------- - -rm(list=ls()) +rm(list=ls()) library(argparse, quietly = T) library(plyr, quietly = T) @@ -23,14 +23,112 @@ library(tidyverse, quietly = T) library(scales, quietly = T) library(ggforce, quietly = T) library(gtools, quietly = T) -library(plotly) library(htmlwidgets) library(ggiraph) -library(reshape2) + `%notin%` <- Negate(`%in%`) `%notlike%` <- Negate(`%like%`) + +#function to size the dots +calculate_point_size <- function(x,y) { + n_x <- length(unique(x)) + n_y <- length(unique(y)) + + #define your plot size (in inches) + plot_width_in <- 20 + plot_height_in <- 20 + + #convert to mm (1 inch = 25.4 mm) + plot_width_mm <- plot_width_in * 25.4 + plot_height_mm <- plot_height_in * 25.4 + + #calculate tile size in mm + tile_width_mm <- plot_width_mm / n_x + tile_height_mm <- plot_height_mm / n_y + + #max circle diameter (fits inside smallest tile dimension) + max_diameter_mm <- min(tile_width_mm, tile_height_mm) + + #approximate max point size for geom_point (radius in mm) + max_point_size <- max_diameter_mm + + return(max_point_size) +} + + +#function to create static plots +static_plot <- function(data, max_point_size) { + n = length(unique(data$Var1)) + legend_size = max_point_size * n * .4 + + axis_text_size = if (n < 25) 14 else 10 + + p <- ggplot(data, aes(x = Var1, y = Var2)) + + geom_tile(color = "gray50", linewidth = 0.25, fill = NA) + + geom_point_interactive( + aes(size = log2_size, + fill = value, + tooltip = paste0( + "x: ", Var1, "\n", + "y: ", Var2, "\n", + "Loci Overlap: ", size, "\n", + "Correlation: ", round(value, 2) + )), + shape = 21, + color="NA" + ) + + scale_x_discrete(limits = sort(levels(data$Var1))) + + scale_y_discrete(limits = rev(sort(levels(data$Var2)))) + + scale_fill_viridis_c( + name = "Correlation", + option = "viridis", + direction = -1, + alpha = 0.75, + begin = 0, + end = 1, + limits = c(-1, 1), + breaks = seq(-1, 1, by = .25), + guide = guide_colorbar(direction = "vertical", + title.position = "top", + barheight = unit(legend_size, "mm"), + barwidth = unit(legend_size*.05, "mm") + )) + + scale_size_continuous( + limits = c(0, 14.2), #known max of log2(size) + range = c(0, max_point_size), + breaks = seq(2, 14, by = 4), + name = "Loci Overlap (log2)", + guide = guide_legend(direction = "vertical", + title.position = "top", + keyheight = unit(legend_size/4, "mm"), + override.aes = list( + color = "black", + stroke = 0.5 + )) + ) + + labs(title = title) + + theme_minimal() + + theme( + text = element_text(family = "Courier"), + panel.grid = element_blank(), + axis.text.x = element_text(angle = 90, hjust = 1, size = 10, color = "black"), + axis.text.y = element_text(size = 10, color = "black"), + axis.title = element_blank(), + plot.title = element_text(hjust = 0.5, size = 24, margin = margin(b = 15)), + legend.position = "right", + legend.box = "horizontal", + legend.box.just = "left", + legend.title.align = 0.5, + legend.spacing.x = unit(1, "cm"), + aspect.ratio = 1 + ) + + return(p) +} + + parser = ArgumentParser(description = 'create correlation plots for a given sample') parser$add_argument('-t', '--table', required = TRUE, @@ -39,172 +137,75 @@ parser$add_argument('-t', '--table', required = TRUE, parser$add_argument('-o', '--analysis_folder', required = TRUE, help = 'output folder') -parser$add_argument('-p', '--pool', required = TRUE, +parser$add_argument('-p', '--pool', required = FALSE, + default = "fp_plots", help = 'pool ID') +parser$add_argument('-f', '--filter', + action = "store_true", + default = FALSE, + help = "create pool levelel plots instead of extended plots" +) + args = parser$parse_args() -all_fp_gbcm_final = fread(args$table, sep = '\t') +fingerprints = fread(args$table, sep = '\t') outdir = args$analysis_folder sample = args$pool -all_fp_gbcm_final <- all_fp_gbcm_final %>% select(-contains(c('Loci_hg19', 'Loci_hg38'))) -cols <- grep("VAF", names(all_fp_gbcm_final), value = TRUE) -#print(class(all_fp_gbcm_final)) -all_fp_gbcm_final <- all_fp_gbcm_final[, ..cols] -for ( col in 1:ncol(all_fp_gbcm_final)){ - colnames(all_fp_gbcm_final)[col] <- sub("VAF_", "", colnames(all_fp_gbcm_final)[col]) +#format data +fingerprints <- fingerprints %>% select(-contains(c('Loci_hg19', 'Loci_hg38'))) +cols <- grep("VAF", names(fingerprints), value = TRUE) +fingerprints <- fingerprints[, ..cols] + +for ( col in 1:ncol(fingerprints)){ + colnames(fingerprints)[col] <- sub("VAF_", "", colnames(fingerprints)[col]) +} + +title = paste("Pool:", sample,"; ", nrow(fingerprints)," Loci used",sep = "") + +fp_matrix <- data.matrix(fingerprints) +fp_matrix = cor(as.matrix(fp_matrix), method = c("pearson"), use = "pairwise.complete.obs") + +fp_long <- reshape2::melt(fp_matrix) +observations = crossprod(!is.na(fingerprints)) +obs_long <- reshape2::melt(observations) +final <- data.frame(fp_long, size = obs_long$value) + +#calculate log2 size column +final$log2_size <- log2(final$size) + +if (args$filter) { + + if (identical(args$pool, "fp_plots")) { + message("A pool ID is required to create pool level plots") + quit(status = 1) + } + + message("Creating pool level plots") + type="pool" + + final = final %>% filter(grepl(args$pool, Var1) & grepl(args$pool, Var2)) + final = droplevels(final) + +} else { + message("Creating extended plots") + type="extended" } -title = paste("Patient:", sample,"; ", nrow(all_fp_gbcm_final)," Loci used",sep = "") - -all_fp_gbcm_final_matrix <- data.matrix(all_fp_gbcm_final) -all_fp_gbcm_final_matrix = cor(as.matrix(all_fp_gbcm_final_matrix), method = c("pearson"), use = "pairwise.complete.obs") -write.table(all_fp_gbcm_final_matrix, paste(outdir,"/",sample,'_correlations.tab', sep = ''), sep = '\t',quote=F) - -gbcm_data_long <- reshape2::melt(all_fp_gbcm_final_matrix) -gbcm_observation = crossprod(!is.na(all_fp_gbcm_final)) -gbcm_obs_long <- reshape2::melt(gbcm_observation) -gbcm_combo_data <- data.frame(gbcm_data_long, size = gbcm_obs_long$value) - -# plot -#pdf(paste(outdir,"/",sample,'_sample-to-sample.pdf', sep = ""), width = 25, height = 25) - -n_x <- length(unique(gbcm_combo_data$Var1)) -n_y <- length(unique(gbcm_combo_data$Var2)) - -# Define your plot size (in inches) -plot_width_in <- 20 -plot_height_in <- 20 - -# Convert to mm (1 inch = 25.4 mm) -plot_width_mm <- plot_width_in * 25.4 -plot_height_mm <- plot_height_in * 25.4 - -# Calculate tile size in mm -tile_width_mm <- plot_width_mm / n_x -tile_height_mm <- plot_height_mm / n_y - -# Max circle diameter (fits inside smallest tile dimension) -max_diameter_mm <- min(tile_width_mm, tile_height_mm) - -# Approximate max point size for geom_point (radius in mm) -max_point_size <- max_diameter_mm - -# Calculate log2 size column -gbcm_combo_data$log2_size <- log2(gbcm_combo_data$size) -#print(gbcm_combo_data$log2_size) - - -gbcm_combo_data$Var1 <- factor(gbcm_combo_data$Var1, levels = mixedsort(unique(gbcm_combo_data$Var1))) -gbcm_combo_data$Var2 <- factor(gbcm_combo_data$Var2, levels = mixedsort(unique(gbcm_combo_data$Var2))) - -p <- ggplot(gbcm_combo_data, aes(x = Var1, y = Var2)) + - geom_tile(color = "black", linewidth = 0.5, fill = NA) + - geom_point(aes(size = log2_size, fill = value), shape = 21, color = "black") + - #geom_text(aes(label = size), color = "white", size = 4) + - scale_x_discrete(limits = sort(levels(gbcm_combo_data$Var1))) + - scale_y_discrete(limits = sort(levels(gbcm_combo_data$Var2))) + - scale_fill_viridis_c( - name = "Correlation", - option = "viridis", - direction = -1, - alpha = 0.75, - begin = 0, - end = 1, - limits = c(-1, 1), - guide = guide_colorbar(direction = "vertical", - title.position = "top" - )) + - scale_size_continuous(limits = c(0, 14.2), # known max of log2(size) - range = c(0, max_point_size), - name = "Sites (log2)", - guide = guide_legend(direction = "vertical", - title.position = "top") - ) + - #scale_size_identity(name = "Sites (log2)", - # guide = guide_legend(direction = "vertical", - # title.position = "top"), - # breaks = rescale(c(2, 5, 10, 14.2), to = c(1, 10), from = c(0, max_log2)), - # labels = c("2", "5", "10", "14.2")) + - - #scale_size_continuous(name = "Sites (log2)", - #range = c(4, 32), - #guide = guide_legend(direction = "vertical", - #title.position = "top")) + - labs(title = title) + - theme_minimal() + - theme( - panel.grid = element_blank(), - axis.text.x = element_text(angle = 90, hjust = 1, size = 10, color = "black"), - axis.text.y = element_text(size = 10, color = "black"), - axis.title = element_blank(), - plot.title = element_text(hjust = 0.5, size = 20, margin = margin(b = 15)), - legend.position = "right", - legend.box = "horizontal", - legend.box.just = "left", - legend.title.align = 0.5, - legend.spacing.x = unit(1, "cm"), - aspect.ratio = 1 - ) - -p2 <- ggplot(gbcm_combo_data, aes(x = Var1, y = Var2)) + - geom_tile(color = "black", linewidth = 0.5, fill = NA) + - geom_point_interactive( - aes(size = log2_size, - fill = value, - tooltip = paste0( - "x: ", Var1, "\n", - "y: ", Var2, "\n", - "Size: ", size, "\n", - "Correlation: ", round(value, 2) - )), - shape = 21, - color = "black" - ) + - scale_x_discrete(limits = sort(levels(gbcm_combo_data$Var1))) + - scale_y_discrete(limits = sort(levels(gbcm_combo_data$Var2))) + - scale_fill_viridis_c( - name = "Correlation", - option = "viridis", - direction = -1, - alpha = 0.75, - begin = 0, - end = 1, - limits = c(-1, 1), - guide = guide_colorbar(direction = "vertical", - title.position = "top" - )) + - scale_size_continuous(limits = c(0, 14.2), # known max of log2(size) - range = c(0, max_point_size), - name = "Sites (log2)", - guide = guide_legend(direction = "vertical", - title.position = "top") - ) + - labs(title = title) + - theme_minimal() + - theme( - text = element_text(family = "Courier"), - panel.grid = element_blank(), - axis.text.x = element_text(angle = 90, hjust = 1, size = 10, color = "black"), - axis.text.y = element_text(size = 10, color = "black"), - axis.title = element_blank(), - plot.title = element_text(hjust = 0.5, size = 24, margin = margin(b = 15)), - legend.position = "right", - legend.box = "horizontal", - legend.box.just = "left", - legend.title.align = 0.5, - legend.spacing.x = unit(1, "cm"), - aspect.ratio = 1, - ) - - -pg = girafe(ggobj = p2, width_svg = 25, height_svg = 25, - options = list(opts_tooltip(css = "padding:5pt; font-size:16pt; color:white; background-color:black;"))) - -saveWidget(pg, paste(outdir,"/",sample,'_interactive4.html', sep = ""), selfcontained = TRUE) - - -ggsave(paste(outdir,"/",sample,'_gbcm_sample-to-sample4.pdf', sep = ""), plot = p, width = 25, height = 25, units = "in", device = cairo_pdf) -write.table(gbcm_observation, paste(outdir,"/",sample,'_observations.tab', sep = ''), sep = '\t') +#get max point size +max_point_size = calculate_point_size(final$Var1, final$Var2) + +#create static plot +s <- static_plot(final, max_point_size) +ggsave(paste(outdir,"/",sample,"_", type, '.pdf', sep = ""), plot = s, width = 25, height = 25, units = "in", device = cairo_pdf) + +#create interactive plot +i = girafe(ggobj = s, width_svg = 25, height_svg = 25, + options = list(opts_tooltip(css = "padding:5pt; font-size:16pt; color:white; background-color:black;"))) +saveWidget(i, paste(outdir,"/",sample,"_", type,'.html', sep = ""), selfcontained = TRUE) + +#save tables +write.table(observations, paste(outdir,"/",sample, '_observations.tab', sep = ''), sep = '\t') +write.table(fp_matrix, paste(outdir,"/",sample, '_correlations.tab', sep = ''), sep = '\t') diff --git a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test index 00d54193..8142af7b 100644 --- a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test +++ b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test @@ -66,6 +66,7 @@ nextflow_process { process { """ input[0] = CUSTOM_FINGERPRINTCOMBINE.out.combined_fp_tsv + input[1] = "" """ } } @@ -91,6 +92,7 @@ nextflow_process { process { """ input[0] = [[id:'thispool'], file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true)] + input[1] = "" """ } } diff --git a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap index c261cb94..13fe33ac 100644 --- a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap +++ b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap @@ -27,9 +27,9 @@ ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.3" }, - "timestamp": "2025-12-18T20:20:30.919363465" + "timestamp": "2026-02-11T12:20:48.405942771" }, "sarscov2 - bam": { "content": [ @@ -38,7 +38,7 @@ { "id": null }, - "batch_correlations.tab:md5,4622cb8a7eff25e7bbd28ed23b74b239" + "fp_plots_correlations.tab:md5,dbc55d8829950501d3ed2db9a832165c" ] ], [ @@ -46,7 +46,7 @@ { "id": null }, - "batch_observations.tab:md5,858d6d115a4da81652bb98dcc8b8077f" + "fp_plots_observations.tab:md5,858d6d115a4da81652bb98dcc8b8077f" ] ], [ @@ -59,8 +59,8 @@ ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.3" }, - "timestamp": "2025-12-18T20:20:24.501583538" + "timestamp": "2026-02-11T12:20:41.807879336" } } \ No newline at end of file diff --git a/subworkflows/msk/fingerprint_gbcms/main.nf b/subworkflows/msk/fingerprint_gbcms/main.nf index 1cdd7ea9..bc63c22f 100644 --- a/subworkflows/msk/fingerprint_gbcms/main.nf +++ b/subworkflows/msk/fingerprint_gbcms/main.nf @@ -57,16 +57,18 @@ workflow FINGERPRINT_GBCMS { FINGERPRINT_GBCMS_BATCH ( all_fps, ch_liftover_loci_mapping, - default_genome + default_genome, + [] ) + combined_fp_tsv = FINGERPRINT_GBCMS_BATCH.out.combined_fp_tsv } else { - FINGERPRINT_GBCMS_BATCH.out.combined_fp_tsv = Channel.empty() + combined_fp_tsv = Channel.empty() } emit: fp_tsv_from_bam = CUSTOM_FINGERPRINTVCFPARSER.out.tsv // channel: [ val(meta), tsv ] fp_tsv = all_fps // channel: [ val(meta), tsv ] contamination_tsv = CUSTOM_FINGERPRINTCONTAMINATION.out.contamination_tsv // channel: [ val(meta), contamination_tsv ] - combined_fp_tsv = FINGERPRINT_GBCMS_BATCH.out.combined_fp_tsv // channel: [ tsv ] + combined_fp_tsv = combined_fp_tsv // channel: [ tsv ] } diff --git a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap index c0e01878..414032c9 100644 --- a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap +++ b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap @@ -19,6 +19,22 @@ ] ], "1": [ + [ + { + "id": "test", + "sample": "test" + }, + "test.fp.tsv:md5,c467328eb3c7fb534b555b83b0227206" + ], + [ + { + "id": "test2", + "sample": "test2" + }, + "test2.fp.tsv:md5,c3fbcee584048e9bc4fc93bc6ca487d2" + ] + ], + "2": [ [ { "id": "test", @@ -34,7 +50,7 @@ "test2.contamination.tsv:md5,2eb950d4d5e0f9b4f7ae53d41d22fb5f" ] ], - "2": [ + "3": [ [ { "id": "defaultbatch" @@ -81,13 +97,29 @@ }, "test2.fp.tsv:md5,c3fbcee584048e9bc4fc93bc6ca487d2" ] + ], + "fp_tsv_from_bam": [ + [ + { + "id": "test", + "sample": "test" + }, + "test.fp.tsv:md5,c467328eb3c7fb534b555b83b0227206" + ], + [ + { + "id": "test2", + "sample": "test2" + }, + "test2.fp.tsv:md5,c3fbcee584048e9bc4fc93bc6ca487d2" + ] ] } ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.3" }, - "timestamp": "2026-01-08T23:23:53.894051098" + "timestamp": "2026-02-10T14:58:55.67145979" } } \ No newline at end of file diff --git a/subworkflows/msk/fingerprint_gbcms_batch/main.nf b/subworkflows/msk/fingerprint_gbcms_batch/main.nf index 9f2e10c3..19a19169 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/main.nf +++ b/subworkflows/msk/fingerprint_gbcms_batch/main.nf @@ -4,9 +4,10 @@ include { CUSTOM_FINGERPRINTCORRELATION } from '../../../modules/msk/custom/fing workflow FINGERPRINT_GBCMS_BATCH { take: - ch_fp // channel: [ val(meta), [ bam ] ] + ch_fp // channel: [ val(meta), [ bam ] ] ch_liftover_loci_mapping // channel: [ liftover_loci_mapping ] default_genome + filter_terms // channel: filterterm main: @@ -17,13 +18,14 @@ workflow FINGERPRINT_GBCMS_BATCH { if (meta.pool) { meta2.id = meta.pool } - [meta2, tsv, meta.id, meta.genome ?: default_genome, meta.group ?: "default" ] + [meta2, tsv, meta.id, meta.genome ?: default_genome, meta.patient ?: meta.sample ] }.groupTuple(by:[0]), ch_liftover_loci_mapping.first() ) CUSTOM_FINGERPRINTCORRELATION( - CUSTOM_FINGERPRINTCOMBINE.out.combined_fp_tsv + CUSTOM_FINGERPRINTCOMBINE.out.combined_fp_tsv, + filter_terms.unique() ) emit: diff --git a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test index c1bd3701..d705e2c7 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test +++ b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test @@ -59,6 +59,8 @@ nextflow_workflow { input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv input[1] = [file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true)] input[2] = "hg19" + input[3] = Channel.empty() + //input[3] = Channel.of("") """ } }