diff --git a/install_data.sh b/install_data.sh new file mode 100644 index 000000000000..aed5c33bd36c --- /dev/null +++ b/install_data.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + + +# Test data is hosted on Google Drive at: +# https://drive.google.com/file/d/1GtT8jsBGwRoQC-5wHh06r8RFkiFBuirp/view?usp=sharing + +fileid=1GtT8jsBGwRoQC-5wHh06r8RFkiFBuirp + +filename=test_nucleo.tar.gz +foldername=test_nucleo + +# Skip if already have test data +[[ -f $filename ]] && exit 0 +[[ -d $foldername ]] && exit 0 + +curl -c ./cookie -s -k -L "https://drive.google.com/uc?export=download&id=$fileid" > /dev/null + +curl -k -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename} + +# Suppress linux warnings for MacOS tar.gz files +if [[ "$OSTYPE" == "linux-gnu" ]]; then + tar --warning=no-unknown-keyword -xzvf $filename +elif [[ "$OSTYPE" == "darwin"* ]]; then + tar -xzvf $filename +fi + +rm $filename diff --git a/subworkflows/nf-core/fastq_extractumi_fgbio_picard_gatk4_fastp/main.nf b/subworkflows/nf-core/fastq_extractumi_fgbio_picard_gatk4_fastp/main.nf new file mode 100644 index 000000000000..992105eaa0a2 --- /dev/null +++ b/subworkflows/nf-core/fastq_extractumi_fgbio_picard_gatk4_fastp/main.nf @@ -0,0 +1,68 @@ +// TODO nf-core: If in doubt look at other nf-core/subworkflows to see how we are doing things! :) +// https://github.com/nf-core/modules/tree/master/subworkflows +// You can also ask for help via your pull request or on the #subworkflows channel on the nf-core Slack workspace: +// https://nf-co.re/join +// TODO nf-core: A subworkflow SHOULD import at least two modules + +include { FGBIO_FASTQTOBAM } from '../../../modules/nf-core/fgbio/fastqtobam/main' +include { PICARD_MERGESAMFILES } from '../../../modules/nf-core/picard/mergesamfiles/' +include { GATK4_SAMTOFASTQ } from '../../../modules/nf-core/gatk4/samtofastq/main' +include { FASTP } from '../../../modules/nf-core/fastp/main' + +workflow FASTQ_EXTRACTUMI_FGBIO_PICARD_GATK4_FASTP { + + take: + // TODO nf-core: edit input (take) channels + ch_fastq // channel: [ val(meta), [ bam ] ] + + main: + + ch_versions = Channel.empty() + + // FGBIO_FASTQTOBAM: get unmerged bams + // ch_fastq is a channel, which enables parallel + // channels enable parallel: https://www.nextflow.io/docs/latest/faq.html?highlight=parallel + FGBIO_FASTQTOBAM ( + ch_fastq + ) + FGBIO_FASTQTOBAM.out.bam.map{ + meta, bam -> + [bam] + }.collect().map{ + bams -> + [[id: 'unmerged_bams'], bams ] + }.set{unmerged_bams} + ch_versions = ch_versions.mix(FGBIO_FASTQTOBAM.out.versions) //write out versioning + + // PICARD_MERGESAMFILES: merge bams files + PICARD_MERGESAMFILES ( + unmerged_bams + ).bam.map { + meta, bam -> + new_id = 'merged_bam' + [[id: new_id], bam ] + }.set {merged_bam} + ch_versions = ch_versions.mix(PICARD_MERGESAMFILES.out.versions) + // GATK4_SAMTOFASTQ: get fastqs from merged bam + GATK4_SAMTOFASTQ ( + merged_bam + ).fastq.map { + meta, fastq -> + new_id = 'merged_fastq' + [[id: new_id], fastq ] + }.set {merged_fastq} + ch_versions = ch_versions.mix(GATK4_SAMTOFASTQ.out.versions) + + // GATK4_SAMTOFASTQ: Run fastp on fastqs + FASTP ( + merged_fastq, [], false, false + ) + ch_versions = ch_versions.mix(FASTP.out.versions) + // final emit + emit: + // TODO nf-core: edit emitted channels + bam = FASTP.out.reads + + versions = ch_versions // channel: [ versions.yml ] + +} diff --git a/subworkflows/nf-core/fastq_extractumi_fgbio_picard_gatk4_fastp/meta.yml b/subworkflows/nf-core/fastq_extractumi_fgbio_picard_gatk4_fastp/meta.yml new file mode 100644 index 000000000000..988903096ad9 --- /dev/null +++ b/subworkflows/nf-core/fastq_extractumi_fgbio_picard_gatk4_fastp/meta.yml @@ -0,0 +1,48 @@ +name: "fastq_extractumi_fgbio_picard_gatk4_fastp" +## TODO nf-core: Add a description of the subworkflow and list keywords +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +## TODO nf-core: Add a list of the modules used in the subworkflow +modules: + - samtools/sort + - samtools/index +## TODO nf-core: List all of the variables used as input, including their types and descriptions +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +## TODO nf-core: List all of the variables used as output, including their types and descriptions +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: BAM/CRAM/SAM samtools index + pattern: "*.{bai,crai,sai}" + - csi: + type: file + description: CSI samtools index + pattern: "*.csi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@buehlere" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 0144bc892ef7..8fe443976f0f 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -3736,6 +3736,10 @@ subworkflows/fastq_download_prefetch_fasterqdump_sratools: - subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/** - tests/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/** +subworkflows/fastq_extractumi_fgbio_picard_gatk4_fastp: + - subworkflows/nf-core/fastq_extractumi_fgbio_picard_gatk4_fastp/** + - tests/subworkflows/nf-core/fastq_extractumi_fgbio_picard_gatk4_fastp/** + subworkflows/fastq_fastqc_umitools_fastp: - subworkflows/nf-core/fastq_fastqc_umitools_fastp/** - tests/subworkflows/nf-core/fastq_fastqc_umitools_fastp/** diff --git a/tests/subworkflows/nf-core/fastq_extractumi_fgbio_picard_gatk4_fastp/main.nf b/tests/subworkflows/nf-core/fastq_extractumi_fgbio_picard_gatk4_fastp/main.nf new file mode 100644 index 000000000000..0458c1ef02fb --- /dev/null +++ b/tests/subworkflows/nf-core/fastq_extractumi_fgbio_picard_gatk4_fastp/main.nf @@ -0,0 +1,26 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { FASTQ_EXTRACTUMI_FGBIO_PICARD_GATK4_FASTP } from '../../../../subworkflows/nf-core/fastq_extractumi_fgbio_picard_gatk4_fastp/main.nf' + + +workflow test_fastq_extractumi_fgbio_picard_gatk4_fastp { + // load test data + def bashScriptFile = new File('install_data.sh') + + def processBuilder = new ProcessBuilder('bash', bashScriptFile.toString()) + processBuilder.redirectOutput(ProcessBuilder.Redirect.INHERIT) + processBuilder.redirectError(ProcessBuilder.Redirect.INHERIT) + + def process = processBuilder.start() + process.waitFor() + + // channels enable parralle: https://www.nextflow.io/docs/latest/faq.html?highlight=parallel + fastq = [ + [[id:'gene1', single_end:false], [file('test_nucleo/fastq/seracare_0-5_R1_001ad.fastq.gz'), file('test_nucleo/fastq/seracare_0-5_R2_001ad.fastq.gz')]], + [[id:'gene2', single_end:false], [file('test_nucleo/fastq/seracare_0-5_R1_001ae.fastq.gz'), file('test_nucleo/fastq/seracare_0-5_R2_001ae.fastq.gz')]] + ] + fastq = ch_fastq = Channel.fromList(fastq) + FASTQ_EXTRACTUMI_FGBIO_PICARD_GATK4_FASTP ( fastq ) +} diff --git a/tests/subworkflows/nf-core/fastq_extractumi_fgbio_picard_gatk4_fastp/nextflow.config b/tests/subworkflows/nf-core/fastq_extractumi_fgbio_picard_gatk4_fastp/nextflow.config new file mode 100644 index 000000000000..8730f1c4b930 --- /dev/null +++ b/tests/subworkflows/nf-core/fastq_extractumi_fgbio_picard_gatk4_fastp/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/tests/subworkflows/nf-core/fastq_extractumi_fgbio_picard_gatk4_fastp/test.yml b/tests/subworkflows/nf-core/fastq_extractumi_fgbio_picard_gatk4_fastp/test.yml new file mode 100644 index 000000000000..932d3b646266 --- /dev/null +++ b/tests/subworkflows/nf-core/fastq_extractumi_fgbio_picard_gatk4_fastp/test.yml @@ -0,0 +1,27 @@ +- name: fastq_extractumi_fgbio_picard_gatk4_fastp test_fastq_extractumi_fgbio_picard_gatk4_fastp + command: nextflow run ./tests/subworkflows/nf-core/fastq_extractumi_fgbio_picard_gatk4_fastp -entry test_fastq_extractumi_fgbio_picard_gatk4_fastp -c ./tests/config/nextflow.config + tags: + - fastp + - fgbio + - fgbio/fastqtobam + - gatk4 + - gatk4/samtofastq + - picard + - picard/mergesamfiles + - subworkflows + - subworkflows/fastq_extractumi_fgbio_picard_gatk4_fastp + files: + - path: output/bwa/bwa/chr14_chr16.amb + md5sum: 00fb74627e074db6238dcd9bc08dc48a + - path: output/bwa/bwa/chr14_chr16.ann + md5sum: d8825e2fcb3cd372cd61ededfe283025 + - path: output/bwa/bwa/chr14_chr16.bwt + md5sum: 45637ec2c011d0f73cac6c470c5b5d2b + - path: output/bwa/bwa/chr14_chr16.pac + md5sum: 46f856371d59e859295497c967478d31 + - path: output/bwa/bwa/chr14_chr16.sa + md5sum: 466dbbbce2fb9528e760477ccdc2ea5b + - path: output/bwa/gene.bam + md5sum: d7c5943b79704d8ed7f432786738f25d + - path: output/picard/aligned_bam.bam + md5sum: 89acecb9fcb99f9182a417215489ea50