From 48c932dd2b5e44bf5ac26516145d48a4f1ed736f Mon Sep 17 00:00:00 2001 From: nservant <nicolas.servant@curie.fr> Date: Mon, 21 Feb 2022 19:42:13 +0100 Subject: [PATCH] [MODIF] HICPRO workflow is running --- conf/igenomes.config | 584 +++++------------- conf/modules.config | 94 ++- conf/test.config | 57 +- lib/WorkflowHic.groovy | 14 +- main.nf | 1 + modules.json | 18 +- modules/local/bowtie2_merge_mapping_steps.nf | 47 -- modules/local/build_contact_maps.nf | 26 - modules/local/combine_mates.nf | 36 -- modules/local/converts_to_pairs.nf | 27 - modules/local/getRestictionFragments.nf | 23 - modules/local/get_chromsize.nf | 16 + modules/local/get_restriction_fragments.nf | 16 + modules/local/get_valid_interaction.nf | 45 -- modules/local/hicpro/bowtie2_merge.nf | 37 ++ modules/local/hicpro/build_contact_maps.nf | 21 + modules/local/hicpro/combine_mates.nf | 18 + modules/local/hicpro/combine_mates.nf~ | 18 + modules/local/hicpro/get_valid_interaction.nf | 31 + .../get_valid_interaction_dnase.nf | 0 modules/local/hicpro/hicpro2pairs.nf | 18 + .../local/hicpro/merge_valid_interaction.nf | 51 ++ modules/local/hicpro/run_ice.nf | 20 + modules/local/hicpro/trim_reads.nf | 23 + modules/local/makeChromSize.nf | 24 - modules/local/remove_duplicates.nf | 58 -- modules/local/run_ice.nf | 30 - modules/local/samplesheet_check.nf | 8 +- .../local/{ => trash}/bowtie2_end_to_end.nf | 0 .../{ => trash}/bowtie2_on_trimmed_reads.nf | 0 modules/local/trim_reads.nf | 29 - modules/nf-core/modules/bowtie2/align/main.nf | 77 +++ .../nf-core/modules/bowtie2/align/meta.yml | 51 ++ modules/nf-core/modules/bowtie2/build/main.nf | 30 + .../nf-core/modules/bowtie2/build/meta.yml | 33 + modules/nf-core/modules/fastqc/functions.nf | 68 -- modules/nf-core/modules/fastqc/main.nf | 42 +- modules/nf-core/modules/fastqc/meta.yml | 89 +-- modules/nf-core/modules/multiqc/functions.nf | 68 -- modules/nf-core/modules/multiqc/main.nf | 34 +- modules/nf-core/modules/multiqc/meta.yml | 65 +- .../nf-core/modules/samtools/merge/main.nf | 41 ++ .../nf-core/modules/samtools/merge/meta.yml | 54 ++ modules/nf-core/modules/samtools/sort/main.nf | 31 + .../nf-core/modules/samtools/sort/meta.yml | 44 ++ nextflow.config | 71 +++ nextflow_schema.json | 475 ++++++++++---- subworkflows/local/hicpro.nf | 81 ++- subworkflows/local/hicpro_mapping.nf | 102 +++ subworkflows/local/hicpro_mapping.nf~ | 102 +++ subworkflows/local/input_check.nf | 32 +- subworkflows/local/prepare_genome.nf | 60 ++ subworkflows/local/prepare_genome.nf~ | 60 ++ workflows/hic.nf | 214 ++++--- 54 files changed, 2009 insertions(+), 1305 deletions(-) delete mode 100644 modules/local/bowtie2_merge_mapping_steps.nf delete mode 100644 modules/local/build_contact_maps.nf delete mode 100644 modules/local/combine_mates.nf delete mode 100644 modules/local/converts_to_pairs.nf delete mode 100644 modules/local/getRestictionFragments.nf create mode 100644 modules/local/get_chromsize.nf create mode 100644 modules/local/get_restriction_fragments.nf delete mode 100644 modules/local/get_valid_interaction.nf create mode 100644 modules/local/hicpro/bowtie2_merge.nf create mode 100644 modules/local/hicpro/build_contact_maps.nf create mode 100644 modules/local/hicpro/combine_mates.nf create mode 100644 modules/local/hicpro/combine_mates.nf~ create mode 100644 modules/local/hicpro/get_valid_interaction.nf rename modules/local/{ => hicpro}/get_valid_interaction_dnase.nf (100%) create mode 100644 modules/local/hicpro/hicpro2pairs.nf create mode 100644 modules/local/hicpro/merge_valid_interaction.nf create mode 100644 modules/local/hicpro/run_ice.nf create mode 100644 modules/local/hicpro/trim_reads.nf delete mode 100644 modules/local/makeChromSize.nf delete mode 100644 modules/local/remove_duplicates.nf delete mode 100644 modules/local/run_ice.nf rename modules/local/{ => trash}/bowtie2_end_to_end.nf (100%) rename modules/local/{ => trash}/bowtie2_on_trimmed_reads.nf (100%) delete mode 100644 modules/local/trim_reads.nf create mode 100644 modules/nf-core/modules/bowtie2/align/main.nf create mode 100644 modules/nf-core/modules/bowtie2/align/meta.yml create mode 100644 modules/nf-core/modules/bowtie2/build/main.nf create mode 100644 modules/nf-core/modules/bowtie2/build/meta.yml delete mode 100644 modules/nf-core/modules/fastqc/functions.nf delete mode 100644 modules/nf-core/modules/multiqc/functions.nf create mode 100644 modules/nf-core/modules/samtools/merge/main.nf create mode 100644 modules/nf-core/modules/samtools/merge/meta.yml create mode 100644 modules/nf-core/modules/samtools/sort/main.nf create mode 100644 modules/nf-core/modules/samtools/sort/meta.yml create mode 100644 subworkflows/local/hicpro_mapping.nf create mode 100644 subworkflows/local/hicpro_mapping.nf~ create mode 100644 subworkflows/local/prepare_genome.nf create mode 100644 subworkflows/local/prepare_genome.nf~ diff --git a/conf/igenomes.config b/conf/igenomes.config index 855948d..1ba2588 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -1,432 +1,162 @@ /* -======================================================================================== - Nextflow config file for iGenomes paths -======================================================================================== - Defines reference genomes using iGenome paths. - Can be used by any config that customises the base path using: - $params.igenomes_base / --igenomes_base ----------------------------------------------------------------------------------------- -*/ + * ------------------------------------------------- + * Nextflow config file for iGenomes paths + * ------------------------------------------------- + * Defines reference genomes, using iGenome paths + * Can be used by any config that customises the base + * path using $params.igenomes_base / --igenomes_base + */ params { - // illumina iGenomes reference file paths - genomes { - 'GRCh37' { - fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed" - } - 'GRCh38' { - fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" - } - 'GRCm38' { - fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "1.87e9" - blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed" - } - 'TAIR10' { - fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" - mito_name = "Mt" - } - 'EB2' { - fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" - } - 'UMD3.1' { - fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" - mito_name = "MT" - } - 'WBcel235' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" - mito_name = "MtDNA" - macs_gsize = "9e7" - } - 'CanFam3.1' { - fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" - mito_name = "MT" - } - 'GRCz10' { - fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'BDGP6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" - mito_name = "M" - macs_gsize = "1.2e8" - } - 'EquCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" - mito_name = "MT" - } - 'EB1' { - fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" - } - 'Galgal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'Gm01' { - fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" - } - 'Mmul_1' { - fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" - mito_name = "MT" - } - 'IRGSP-1.0' { - fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" - mito_name = "Mt" - } - 'CHIMP2.1.4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" - mito_name = "MT" - } - 'Rnor_5.0' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'Rnor_6.0' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'R64-1-1' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" - mito_name = "MT" - macs_gsize = "1.2e7" - } - 'EF2' { - fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "1.21e7" - } - 'Sbi1' { - fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" - } - 'Sscrofa10.2' { - fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" - mito_name = "MT" - } - 'AGPv3' { - fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" - mito_name = "Mt" - } - 'hg38' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" - } - 'hg19' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed" - } - 'mm10' { - fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "1.87e9" - blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed" - } - 'bosTau8' { - fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" - mito_name = "chrM" - } - 'ce10' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "9e7" - } - 'canFam3' { - fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" - mito_name = "chrM" - } - 'danRer10' { - fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "1.37e9" - } - 'dm6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "1.2e8" - } - 'equCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" - mito_name = "chrM" - } - 'galGal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" - mito_name = "chrM" - } - 'panTro4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" - mito_name = "chrM" - } - 'rn6' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" - mito_name = "chrM" - } - 'sacCer3' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" - readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "1.2e7" - } - 'susScr3' { - fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" - mito_name = "chrM" - } + // illumina iGenomes reference file paths + genomes { + 'GRCh37' { + fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" } + 'GRCh38' { + fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" + } + 'GRCm38' { + fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" + } + 'TAIR10' { + fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" + } + 'EB2' { + fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" + } + 'UMD3.1' { + fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" + } + 'WBcel235' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" + } + 'CanFam3.1' { + fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" + } + 'GRCz10' { + fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" + } + 'BDGP6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" + } + 'EquCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" + } + 'EB1' { + fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" + } + 'Galgal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" + } + 'Gm01' { + fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" + } + 'Mmul_1' { + fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" + } + 'IRGSP-1.0' { + fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" + } + 'CHIMP2.1.4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" + } + 'Rnor_6.0' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" + } + 'R64-1-1' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" + } + 'EF2' { + fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" + } + 'Sbi1' { + fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" + } + 'Sscrofa10.2' { + fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" + } + 'AGPv3' { + fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" + } + 'hg38' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" + } + 'hg19' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" + } + 'mm10' { + fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" + } + 'bosTau8' { + fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" + } + 'ce10' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" + } + 'canFam3' { + fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" + } + 'danRer10' { + fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" + } + 'dm6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" + } + 'equCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" + } + 'galGal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" + } + 'panTro4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" + } + 'rn6' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" + } + 'sacCer3' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" + } + 'susScr3' { + fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" + } + } } diff --git a/conf/modules.config b/conf/modules.config index 0b1bfde..5b8d560 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1,32 +1,64 @@ -/* -======================================================================================== - Config file for defining DSL2 per module options -======================================================================================== - Available keys to override module options: - args = Additional arguments appended to command in module. - args2 = Second set of arguments appended to command in module (multi-tool modules). - args3 = Third set of arguments appended to command in module (multi-tool modules). - publish_dir = Directory to publish results. - publish_by_meta = Groovy list of keys available in meta map to append as directories to "publish_dir" path - If publish_by_meta = true - Value of ${meta['id']} is appended as a directory to "publish_dir" path - If publish_by_meta = ['id', 'custompath'] - If "id" is in meta map and "custompath" isn't then "${meta['id']}/custompath/" - is appended as a directory to "publish_dir" path - If publish_by_meta = false / null - No directories are appended to "publish_dir" path - publish_files = Groovy map where key = "file_ext" and value = "directory" to publish results for that file extension - The value of "directory" is appended to the standard "publish_dir" path as defined above. - If publish_files = null (unspecified) - All files are published. - If publish_files = false - No files are published. - suffix = File name suffix for output files. ----------------------------------------------------------------------------------------- -*/ - -params { - modules { - 'fastqc' { - args = "--quiet" - } - 'multiqc' { - args = "" - } - } +process { + + //Default + publishDir = [ + path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + + // PREPARE_GENOME + withName: 'BOWTIE2_BUILD' { + publishDir = [ + path: { "${params.outdir}/genome/bowtie2" }, + mode: 'copy', + enabled: params.save_reference + ] + } + + withName: 'GET_CHROMSIZE' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: 'copy', + enabled: params.save_reference + ] + } + + withName: 'GET_RESTRICTION_FRAGMENTS' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: 'copy', + enabled: params.save_reference + ] + } + + // HICPRO + withName:'BOWTIE2_ALIGN' { + ext.prefix = { "${meta.id}_${meta.mates}" } + ext.args = params.bwt2_opts_end2end ?: '' + } + + withName:'BOWTIE2_ALIGN_TRIMMED' { + ext.prefix = { "${meta.id}_${meta.mates}_trimmed" } + ext.args = params.bwt2_opts_trimmed ?: '' + } + + withName: 'COMBINE_MATES' { + ext.args = [ + "-t", + params.keep_multi ? "--multi" : "", + params.min_mapq ? "-q ${params.min_mapq}" : "" + ].join(' ').trim() + } + + withName: 'GET_VALID_INTERACTION' { + ext.args = [ + params.min_cis_dist > 0 ? " -d ${params.min_cis_dist}" : '', + params.min_insert_size > 0 ? " -s ${params.min_insert_size}" : '', + params.max_insert_size > 0 ? " -l ${params.max_insert_size}" : '', + params.min_restriction_fragment_size > 0 ? " -t ${params.min_restriction_fragment_size}" : '', + params.max_restriction_fragment_size > 0 ? " -m ${params.max_restriction_fragment_size}" : '', + params.save_interaction_bam ? " --sam" : '' + ].join(' ').trim() + } } diff --git a/conf/test.config b/conf/test.config index d117e41..0e7285e 100644 --- a/conf/test.config +++ b/conf/test.config @@ -1,29 +1,42 @@ /* -======================================================================================== - Nextflow config file for running minimal tests -======================================================================================== - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/hic -profile test,<docker/singularity> - ----------------------------------------------------------------------------------------- -*/ + * ------------------------------------------------- + * Nextflow config file for running tests + * ------------------------------------------------- + * Defines bundled input files and everything required + * to run a fast and simple test. Use as follows: + * nextflow run nf-core/hic -profile test,<docker/singularity> + */ params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' + config_profile_name = 'Hi-C test data from Schalbetter et al. (2017)' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on Travis + max_cpus = 2 + max_memory = 4.GB + max_time = 1.h - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = 6.GB - max_time = 6.h + // Input data + //input_paths = [ + // ['SRR4292758_00', ['https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R2.fastq.gz']] + // ] + input = '/home/nservant/tmp/nf-core-hic/samplesheet.csv' - // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' + // Annotations + fasta = 'https://github.com/nf-core/test-datasets/raw/hic/reference/W303_SGD_2015_JRIU00000000.fsa' + digestion = 'hindiii' + min_mapq = 10 + min_restriction_fragment_size = 100 + max_restriction_fragment_size = 100000 + min_insert_size = 100 + max_insert_size = 600 - // Genome references - genome = 'R64-1-1' + bin_size = '1000' + res_dist_decay = '1000' + res_tads = '1000' + tads_caller = 'insulation,hicexplorer' + res_compartments = '1000' + + // Ignore `--input` as otherwise the parameter validation will throw an error + schema_ignore_params = 'genomes,digest,input_paths,input' } diff --git a/lib/WorkflowHic.groovy b/lib/WorkflowHic.groovy index 5381157..2c21fc5 100755 --- a/lib/WorkflowHic.groovy +++ b/lib/WorkflowHic.groovy @@ -10,10 +10,18 @@ class WorkflowHic { public static void initialise(params, log) { genomeExistsError(params, log) - if (!params.fasta) { - log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." - System.exit(1) + // digestion parameters + if (params.digest && params.digestion && !params.digest.containsKey(params.digestion)) { + log.error "Unknown digestion protocol. Currently, the available digestion options are ${params.digest.keySet().join(", ")}. Please set manually the '--restriction_site' and '--ligation_site' parameters." + System.exit(1) } + + // Check Digestion or DNase Hi-C mode + //if (!params.dnase && !params.ligation_site) { + // log.error "Ligation motif not found. Please either use the `--digestion` parameters or specify the `--restriction_site` and `--ligation_site`. For DNase Hi-C, please use '--dnase' option" + // System.exit(1) + //} + } // diff --git a/main.nf b/main.nf index 59b9663..18b7438 100644 --- a/main.nf +++ b/main.nf @@ -18,6 +18,7 @@ nextflow.enable.dsl = 2 */ params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +params.bwt2_index = WorkflowMain.getGenomeAttribute(params, 'bowtie2') /* ======================================================================================== diff --git a/modules.json b/modules.json index a68b1c1..bba8150 100644 --- a/modules.json +++ b/modules.json @@ -3,12 +3,24 @@ "homePage": "https://github.com/nf-core/hic", "repos": { "nf-core/modules": { + "bowtie2/align": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "bowtie2/build": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, "fastqc": { - "git_sha": "e937c7950af70930d1f34bb961403d9d2aa81c7d" + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "multiqc": { - "git_sha": "e937c7950af70930d1f34bb961403d9d2aa81c7d" + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "samtools/merge": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "samtools/sort": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" } } } -} +} \ No newline at end of file diff --git a/modules/local/bowtie2_merge_mapping_steps.nf b/modules/local/bowtie2_merge_mapping_steps.nf deleted file mode 100644 index 148acd0..0000000 --- a/modules/local/bowtie2_merge_mapping_steps.nf +++ /dev/null @@ -1,47 +0,0 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process bowtie2_merge_mapping_steps{ - tag "$prefix = $bam1 + $bam2" - label 'process_medium' - publishDir "${params.outdir}/hicpro/mapping", mode: params.publish_dir_mode, - saveAs: { filename -> if (params.save_aligned_intermediates && filename.endsWith("stat")) "stats/$filename" - else if (params.save_aligned_intermediates) filename} - - input: - tuple val(prefix), path(bam1), path(bam2) - - output: - tuple val(sample), path("${prefix}_bwt2merged.bam"), emit:bwt2_merged_bam - tuple val(oname), path("${prefix}.mapstat"), emit:all_mapstat - - script: - sample = prefix.toString() - ~/(_R1|_R2)/ - tag = prefix.toString() =~/_R1/ ? "R1" : "R2" - oname = prefix.toString() - ~/(\.[0-9]+)$/ - """ - samtools merge -@ ${task.cpus} \\ - -f ${prefix}_bwt2merged.bam \\ - ${bam1} ${bam2} - - samtools sort -@ ${task.cpus} -m 800M \\ - -n \\ - -o ${prefix}_bwt2merged.sorted.bam \\ - ${prefix}_bwt2merged.bam - - mv ${prefix}_bwt2merged.sorted.bam ${prefix}_bwt2merged.bam - - echo "## ${prefix}" > ${prefix}.mapstat - echo -n "total_${tag}\t" >> ${prefix}.mapstat - samtools view -c ${prefix}_bwt2merged.bam >> ${prefix}.mapstat - echo -n "mapped_${tag}\t" >> ${prefix}.mapstat - samtools view -c -F 4 ${prefix}_bwt2merged.bam >> ${prefix}.mapstat - echo -n "global_${tag}\t" >> ${prefix}.mapstat - samtools view -c -F 4 ${bam1} >> ${prefix}.mapstat - echo -n "local_${tag}\t" >> ${prefix}.mapstat - samtools view -c -F 4 ${bam2} >> ${prefix}.mapstat - """ -} diff --git a/modules/local/build_contact_maps.nf b/modules/local/build_contact_maps.nf deleted file mode 100644 index 764f482..0000000 --- a/modules/local/build_contact_maps.nf +++ /dev/null @@ -1,26 +0,0 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process build_contact_maps{ - tag "$sample - $mres" - label 'process_highmem' - publishDir "${params.outdir}/hicpro/matrix/raw", mode: params.publish_dir_mode - - when: - !params.skip_maps && params.hicpro_maps - - input: - tuple val(sample), path(vpairs), val(mres) - path chrsize - - output: - tuple val(sample), val(mres), path("*.matrix"), path("*.bed"), emit: raw_maps_4cool - - script: - """ - build_matrix --matrix-format upper --binsize ${mres} --chrsizes ${chrsize} --ipath ${vpairs} --oprefix ${sample}_${mres} - """ -} diff --git a/modules/local/combine_mates.nf b/modules/local/combine_mates.nf deleted file mode 100644 index 503911e..0000000 --- a/modules/local/combine_mates.nf +++ /dev/null @@ -1,36 +0,0 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process combine_mates{ - tag "$sample = $r1_prefix + $r2_prefix" - label 'process_low' - publishDir "${params.outdir}/hicpro/mapping", mode: params.publish_dir_mode, - saveAs: {filename -> filename.endsWith(".pairstat") ? "stats/$filename" : "$filename"} - - input: - tuple val(sample), path(aligned_bam) - - output: - tuple val(oname), path("${sample}_bwt2pairs.bam"), emit:paired_bam - tuple val(oname), path("*.pairstat"), emit:all_pairstat - - script: - r1_bam = aligned_bam[0] - r1_prefix = r1_bam.toString() - ~/_bwt2merged.bam$/ - r2_bam = aligned_bam[1] - r2_prefix = r2_bam.toString() - ~/_bwt2merged.bam$/ - oname = sample.toString() - ~/(\.[0-9]+)$/ - - def opts = "-t" - if (params.keep_multi) { - opts="${opts} --multi" - }else if (params.min_mapq){ - opts="${opts} -q ${params.min_mapq}" - } - """ - mergeSAM.py -f ${r1_bam} -r ${r2_bam} -o ${sample}_bwt2pairs.bam ${opts} - """ -} diff --git a/modules/local/converts_to_pairs.nf b/modules/local/converts_to_pairs.nf deleted file mode 100644 index 60554d4..0000000 --- a/modules/local/converts_to_pairs.nf +++ /dev/null @@ -1,27 +0,0 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process convert_to_pairs { - tag "$sample" - label 'process_medium' - - when: - !params.skip_maps - - input: - tuple val(sample), path(vpairs) - path chrsize - - output: - tuple val(sample), path("*.txt.gz"), emit: cool_build_zoom - - script: - """ - ## chr/pos/strand/chr/pos/strand - awk '{OFS="\t";print \$1,\$2,\$3,\$5,\$6,\$4,\$7}' $vpairs > contacts.txt - gzip contacts.txt - """ -} diff --git a/modules/local/getRestictionFragments.nf b/modules/local/getRestictionFragments.nf deleted file mode 100644 index 00c0dda..0000000 --- a/modules/local/getRestictionFragments.nf +++ /dev/null @@ -1,23 +0,0 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process getRestrictionFragments { - tag "$fasta ${params.restriction_site}" - label 'process_low' - publishDir path: { params.save_reference ? "${params.outdir}/reference_genome" : params.outdir }, - saveAs: { params.save_reference ? it : null }, mode: params.publish_dir_mode - - input: - path fasta - - output: - path "*.bed", emit:res_frag_file - - script: - """ - digest_genome.py -r ${params.restriction_site} -o restriction_fragments.bed ${fasta} - """ -} diff --git a/modules/local/get_chromsize.nf b/modules/local/get_chromsize.nf new file mode 100644 index 0000000..f6b8c8a --- /dev/null +++ b/modules/local/get_chromsize.nf @@ -0,0 +1,16 @@ +process GET_CHROMSIZE { + tag "$fasta" + label 'process_low' + + input: + path fasta + + output: + path "*.size", emit: results + + script: + """ + samtools faidx ${fasta} + cut -f1,2 ${fasta}.fai > chrom.size + """ +} diff --git a/modules/local/get_restriction_fragments.nf b/modules/local/get_restriction_fragments.nf new file mode 100644 index 0000000..93258b7 --- /dev/null +++ b/modules/local/get_restriction_fragments.nf @@ -0,0 +1,16 @@ +process GET_RESTRICTION_FRAGMENTS { + tag "$res_site" + label 'process_low' + + input: + path fasta + val(res_site) + + output: + path "*.bed", emit: results + + script: + """ + digest_genome.py -r ${res_site} -o restriction_fragments.bed ${fasta} + """ +} diff --git a/modules/local/get_valid_interaction.nf b/modules/local/get_valid_interaction.nf deleted file mode 100644 index 622c08e..0000000 --- a/modules/local/get_valid_interaction.nf +++ /dev/null @@ -1,45 +0,0 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process get_valid_interaction{ - tag "$sample" - label 'process_low' - publishDir "${params.outdir}/hicpro/valid_pairs", mode: params.publish_dir_mode, - saveAs: {filename -> if (filename.endsWith("RSstat")) "stats/$filename" - else if (filename.endsWith(".validPairs")) filename - else if (params.save_nonvalid_pairs) filename} - - input: - tuple val(sample), path(pe_bam) - path frag_path - - output: - tuple val(sample), path("*.validPairs"), emit:valid_pairs - tuple val(sample), path("*.validPairs"), emit:valid_pairs_4cool - tuple val(sample), path("*.DEPairs"), emit:de_pairs - tuple val(sample), path("*.SCPairs"), emit:sc_pairs - tuple val(sample), path("*.REPairs"), emit:re_pairs - tuple val(sample), path("*.FiltPairs"), emit:filt_pairs - tuple val(sample), path("*RSstat"), emit:all_rsstat - - script: - if (params.split_fastq){ - sample = sample.toString() - ~/(\.[0-9]+)$/ - } - - def opts = "" - opts += params.min_cis_dist > 0 ? " -d ${params.min_cis_dist}" : '' - opts += params.min_insert_size > 0 ? " -s ${params.min_insert_size}" : '' - opts += params.max_insert_size > 0 ? " -l ${params.max_insert_size}" : '' - opts += params.min_restriction_fragment_size > 0 ? " -t ${params.min_restriction_fragment_size}" : '' - opts += params.max_restriction_fragment_size > 0 ? " -m ${params.max_restriction_fragment_size}" : '' - opts += params.save_interaction_bam ? " --sam" : '' - prefix = pe_bam.toString() - ~/.bam/ - """ - mapped_2hic_fragments.py -f ${frag_file} -r ${pe_bam} --all ${opts} - sort -k2,2V -k3,3n -k5,5V -k6,6n -o ${prefix}.validPairs ${prefix}.validPairs - """ -} diff --git a/modules/local/hicpro/bowtie2_merge.nf b/modules/local/hicpro/bowtie2_merge.nf new file mode 100644 index 0000000..0dfd25c --- /dev/null +++ b/modules/local/hicpro/bowtie2_merge.nf @@ -0,0 +1,37 @@ +process MERGE_BOWTIE2{ + tag "$prefix" + label 'process_medium' + + input: + tuple val(meta), path(bam1), path(bam2) + + output: + tuple val(meta), path("${prefix}_bwt2merged.bam"), emit: bam + tuple val(meta), path("${prefix}.mapstat"), emit: stats + + script: + prefix = meta.id + "_" + meta.mates + tag = meta.mates + """ + samtools merge -@ ${task.cpus} \\ + -f ${prefix}_bwt2merged.bam \\ + ${bam1} ${bam2} + + samtools sort -@ ${task.cpus} -m 800M \\ + -n \\ + -o ${prefix}_bwt2merged.sorted.bam \\ + ${prefix}_bwt2merged.bam + + mv ${prefix}_bwt2merged.sorted.bam ${prefix}_bwt2merged.bam + + echo "## ${prefix}" > ${prefix}.mapstat + echo -n "total_${tag}\t" >> ${prefix}.mapstat + samtools view -c ${prefix}_bwt2merged.bam >> ${prefix}.mapstat + echo -n "mapped_${tag}\t" >> ${prefix}.mapstat + samtools view -c -F 4 ${prefix}_bwt2merged.bam >> ${prefix}.mapstat + echo -n "global_${tag}\t" >> ${prefix}.mapstat + samtools view -c -F 4 ${bam1} >> ${prefix}.mapstat + echo -n "local_${tag}\t" >> ${prefix}.mapstat + samtools view -c -F 4 ${bam2} >> ${prefix}.mapstat + """ +} diff --git a/modules/local/hicpro/build_contact_maps.nf b/modules/local/hicpro/build_contact_maps.nf new file mode 100644 index 0000000..0a9a35b --- /dev/null +++ b/modules/local/hicpro/build_contact_maps.nf @@ -0,0 +1,21 @@ +process BUILD_CONTACT_MAPS{ + tag "$meta.id - $res" + label 'process_highmem' + + input: + tuple val(meta), path(vpairs), val(res) + path chrsize + + output: + tuple val(meta), val(res), path("*.matrix"), path("*.bed"), emit: maps + + script: + """ + build_matrix \\ + --matrix-format upper \\ + --binsize ${res} \\ + --chrsizes ${chrsize} \\ + --ifile ${vpairs} \\ + --oprefix ${meta.id}_${res} + """ +} diff --git a/modules/local/hicpro/combine_mates.nf b/modules/local/hicpro/combine_mates.nf new file mode 100644 index 0000000..2f5f2d0 --- /dev/null +++ b/modules/local/hicpro/combine_mates.nf @@ -0,0 +1,18 @@ +process COMBINE_MATES { + tag "$prefix" + label 'process_low' + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*bwt2pairs.bam"), emit:bam + tuple val(meta), path("*.pairstat"), optional:true, emit:stats + + script: + prefix = meta.id + def args = task.ext.args ?: '' + """ + mergeSAM.py -f ${bam[0]} -r ${bam[1]} -o ${prefix}_bwt2pairs.bam ${args} + """ +} diff --git a/modules/local/hicpro/combine_mates.nf~ b/modules/local/hicpro/combine_mates.nf~ new file mode 100644 index 0000000..d4ea96d --- /dev/null +++ b/modules/local/hicpro/combine_mates.nf~ @@ -0,0 +1,18 @@ +process COMBINE_MATES { + tag "$prefix" + label 'process_low' + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*bwt2pairs.bam"), emit:bam + tuple val(meta), path("*.pairstat"), emit:stats + + script: + prefix = meta.id + def args = task.ext.args ?: '' + """ + mergeSAM.py -f ${bam[0]} -r ${bam[1]} -o ${prefix}_bwt2pairs.bam ${args} + """ +} diff --git a/modules/local/hicpro/get_valid_interaction.nf b/modules/local/hicpro/get_valid_interaction.nf new file mode 100644 index 0000000..3858576 --- /dev/null +++ b/modules/local/hicpro/get_valid_interaction.nf @@ -0,0 +1,31 @@ +process GET_VALID_INTERACTION { + tag "$meta.id" + label 'process_low' + + input: + tuple val(meta), path(bam) + path(resfrag) + + output: + tuple val(meta), path("*.validPairs"), emit:valid_pairs + tuple val(meta), path("*.DEPairs"), emit:de_pairs + tuple val(meta), path("*.SCPairs"), emit:sc_pairs + tuple val(meta), path("*.REPairs"), emit:re_pairs + tuple val(meta), path("*.FiltPairs"), emit:filt_pairs + tuple val(meta), path("*RSstat"), emit:stats + + script: + if (params.split_fastq){ + sample = sample.toString() - ~/(\.[0-9]+)$/ + } + def args = task.ext.args ?: '' + """ + mapped_2hic_fragments.py \\ + -f ${resfrag} \\ + -r ${bam} \\ + --all \\ + ${args} + + sort -k2,2V -k3,3n -k5,5V -k6,6n -o ${bam.baseName}.validPairs ${bam.baseName}.validPairs + """ +} diff --git a/modules/local/get_valid_interaction_dnase.nf b/modules/local/hicpro/get_valid_interaction_dnase.nf similarity index 100% rename from modules/local/get_valid_interaction_dnase.nf rename to modules/local/hicpro/get_valid_interaction_dnase.nf diff --git a/modules/local/hicpro/hicpro2pairs.nf b/modules/local/hicpro/hicpro2pairs.nf new file mode 100644 index 0000000..02abfb8 --- /dev/null +++ b/modules/local/hicpro/hicpro2pairs.nf @@ -0,0 +1,18 @@ +process HICPRO2PAIRS { + tag "$meta.id" + label 'process_medium' + + input: + tuple val(meta), path(vpairs) + path chrsize + + output: + tuple val(meta), path("*.txt.gz"), emit: pairs + + script: + """ + ## chr/pos/strand/chr/pos/strand + awk '{OFS="\t";print \$1,\$2,\$3,\$5,\$6,\$4,\$7}' $vpairs > contacts.txt + gzip contacts.txt + """ +} diff --git a/modules/local/hicpro/merge_valid_interaction.nf b/modules/local/hicpro/merge_valid_interaction.nf new file mode 100644 index 0000000..0819675 --- /dev/null +++ b/modules/local/hicpro/merge_valid_interaction.nf @@ -0,0 +1,51 @@ +process MERGE_VALID_INTERACTION { + tag "$prefix" + label 'process_highmem' + + input: + tuple val(meta), path(vpairs) + + output: + tuple val(meta), path("*.allValidPairs"), emit: valid_pairs + path("stats/"), emit:mqc + path("*mergestat"), emit:stats + + script: + prefix = meta.id + if ( ! params.keep_dups ){ + """ + mkdir -p stats/${prefix} + + ## Sort valid pairs and remove read pairs with same starts (i.e duplicated read pairs) + sort -S 50% -k2,2V -k3,3n -k5,5V -k6,6n -m ${vpairs} | \\ + awk -F"\\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=\$2 || c2!=\$5 || s1!=\$3 || s2!=\$6){print;c1=\$2;c2=\$5;s1=\$3;s2=\$6}' > ${prefix}.allValidPairs + + echo -n "valid_interaction\t" > ${prefix}_allValidPairs.mergestat + cat ${vpairs} | wc -l >> ${prefix}_allValidPairs.mergestat + echo -n "valid_interaction_rmdup\t" >> ${prefix}_allValidPairs.mergestat + cat ${prefix}.allValidPairs | wc -l >> ${prefix}_allValidPairs.mergestat + + ## Count short range (<20000) vs long range contacts + awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} \$2 == \$5{cis=cis+1; d=\$6>\$3?\$6-\$3:\$3-\$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} \$2!=\$5{trans=trans+1}END{print "trans_interaction\\t"trans"\\ncis_interaction\\t"cis"\\ncis_shortRange\\t"sr"\\ncis_longRange\\t"lr}' ${prefix}.allValidPairs >> ${prefix}_allValidPairs.mergestat + + ## For MultiQC + mkdir -p stats/${prefix} + cp ${prefix}_allValidPairs.mergestat stats/${prefix}/ + """ + }else{ + """ + cat ${vpairs} > ${prefix}.allValidPairs + echo -n "valid_interaction\t" > ${prefix}_allValidPairs.mergestat + cat ${vpairs} | wc -l >> ${prefix}_allValidPairs.mergestat + echo -n "valid_interaction_rmdup\t" >> ${prefix}_allValidPairs.mergestat + cat ${prefix}.allValidPairs | wc -l >> ${prefix}_allValidPairs.mergestat + + ## Count short range (<20000) vs long range contacts + awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} \$2 == \$5{cis=cis+1; d=\$6>\$3?\$6-\$3:\$3-\$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} \$2!=\$5{trans=trans+1}END{print "trans_interaction\\t"trans"\\ncis_interaction\\t"cis"\\ncis_shortRange\\t"sr"\\ncis_longRange\\t"lr}' ${prefix}.allValidPairs >> ${prefix}_allValidPairs.mergestat + + ## For MultiQC + mkdir -p stats/${prefix} + cp ${prefix}_allValidPairs.mergestat stats/${prefix}/ + """ + } +} diff --git a/modules/local/hicpro/run_ice.nf b/modules/local/hicpro/run_ice.nf new file mode 100644 index 0000000..521cb31 --- /dev/null +++ b/modules/local/hicpro/run_ice.nf @@ -0,0 +1,20 @@ +process ICE_NORMALIZATION{ + tag "$rmaps" + label 'process_highmem' + + input: + tuple val(meta), val(res), path(rmaps), path(bed) + + output: + tuple val(meta), val(res), path("*iced.matrix"), path(bed), emit:maps + path ("*.biases"), emit:bias + + script: + prefix = rmaps.toString() - ~/(\.matrix)?$/ + """ + ice --filter_low_counts_perc ${params.ice_filter_low_count_perc} \ + --results_filename ${prefix}_iced.matrix \ + --filter_high_counts_perc ${params.ice_filter_high_count_perc} \ + --max_iter ${params.ice_max_iter} --eps ${params.ice_eps} --remove-all-zeros-loci --output-bias 1 --verbose 1 ${rmaps} + """ +} diff --git a/modules/local/hicpro/trim_reads.nf b/modules/local/hicpro/trim_reads.nf new file mode 100644 index 0000000..5eafc43 --- /dev/null +++ b/modules/local/hicpro/trim_reads.nf @@ -0,0 +1,23 @@ +process TRIM_READS { + tag "$meta.id" + label 'process_low' + + input: + tuple val(meta), path(reads) + val(motif) + + output: + tuple val(meta), path("*trimmed.fastq"), emit: fastq + path("versions.yml") , emit: versions + + script: + """ + cutsite_trimming --fastq $reads \\ + --cutsite ${motif} \\ + --out ${meta.id}_trimmed.fastq + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(echo \$(python --version 2>&1) | sed 's/^Python//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/makeChromSize.nf b/modules/local/makeChromSize.nf deleted file mode 100644 index ffe198c..0000000 --- a/modules/local/makeChromSize.nf +++ /dev/null @@ -1,24 +0,0 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process makeChromSize { - tag "$fasta" - label 'process_low' - publishDir path: { params.save_reference ? "${params.outdir}/reference_genome" : params.outdir }, - saveAs: { params.save_reference ? it : null }, mode: params.publish_dir_mode - - input: - path fasta - - output: - path "*.size", emit: chrsize_compartments - - script: - """ - samtools faidx ${fasta} - cut -f1,2 ${fasta}.fai > chrom.size - """ -} diff --git a/modules/local/remove_duplicates.nf b/modules/local/remove_duplicates.nf deleted file mode 100644 index a8b1093..0000000 --- a/modules/local/remove_duplicates.nf +++ /dev/null @@ -1,58 +0,0 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process remove_duplicates { - tag "$sample" - label 'process_highmem' - publishDir "${params.outdir}/hicpro/valid_pairs", mode: params.publish_dir_mode, - saveAs: {filename -> if (filename.endsWith("mergestat")) "stats/$filename" - else if (filename.endsWith("allValidPairs")) "$filename"} - input: - tuple val(sample), path(vpairs) - - output: - tuple val(sample), path("*.allValidPairs"), emit: ch_vpairs_cool - path("stats/"), emit:mqc_mergestat - path("*mergestat"), emit:all_mergestat - - script: - if ( ! params.keep_dups ){ - """ - mkdir -p stats/${sample} - - ## Sort valid pairs and remove read pairs with same starts (i.e duplicated read pairs) - sort -S 50% -k2,2V -k3,3n -k5,5V -k6,6n -m ${vpairs} | \\ - awk -F"\\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=\$2 || c2!=\$5 || s1!=\$3 || s2!=\$6){print;c1=\$2;c2=\$5;s1=\$3;s2=\$6}' > ${sample}.allValidPairs - - echo -n "valid_interaction\t" > ${sample}_allValidPairs.mergestat - cat ${vpairs} | wc -l >> ${sample}_allValidPairs.mergestat - echo -n "valid_interaction_rmdup\t" >> ${sample}_allValidPairs.mergestat - cat ${sample}.allValidPairs | wc -l >> ${sample}_allValidPairs.mergestat - - ## Count short range (<20000) vs long range contacts - awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} \$2 == \$5{cis=cis+1; d=\$6>\$3?\$6-\$3:\$3-\$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} \$2!=\$5{trans=trans+1}END{print "trans_interaction\\t"trans"\\ncis_interaction\\t"cis"\\ncis_shortRange\\t"sr"\\ncis_longRange\\t"lr}' ${sample}.allValidPairs >> ${sample}_allValidPairs.mergestat - - ## For MultiQC - mkdir -p stats/${sample} - cp ${sample}_allValidPairs.mergestat stats/${sample}/ - """ - }else{ - """ - cat ${vpairs} > ${sample}.allValidPairs - echo -n "valid_interaction\t" > ${sample}_allValidPairs.mergestat - cat ${vpairs} | wc -l >> ${sample}_allValidPairs.mergestat - echo -n "valid_interaction_rmdup\t" >> ${sample}_allValidPairs.mergestat - cat ${sample}.allValidPairs | wc -l >> ${sample}_allValidPairs.mergestat - - ## Count short range (<20000) vs long range contacts - awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} \$2 == \$5{cis=cis+1; d=\$6>\$3?\$6-\$3:\$3-\$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} \$2!=\$5{trans=trans+1}END{print "trans_interaction\\t"trans"\\ncis_interaction\\t"cis"\\ncis_shortRange\\t"sr"\\ncis_longRange\\t"lr}' ${sample}.allValidPairs >> ${sample}_allValidPairs.mergestat - - ## For MultiQC - mkdir -p stats/${sample} - cp ${sample}_allValidPairs.mergestat stats/${sample}/ - """ - } -} diff --git a/modules/local/run_ice.nf b/modules/local/run_ice.nf deleted file mode 100644 index ffdafb5..0000000 --- a/modules/local/run_ice.nf +++ /dev/null @@ -1,30 +0,0 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process run_ice{ - tag "$rmaps" - label 'process_highmem' - publishDir "${params.outdir}/hicpro/matrix/iced", mode: params.publish_dir_mode - - when: - !params.skip_maps && !params.skip_balancing && params.hicpro_maps - - input: - tuple val(sample), val(res), path(rmaps), path(bed) - - output: - tuple val(sample), val(res), path("*iced.matrix"), path(bed), emit:hicpro_iced_maps - path ("*.biases"), emit:hicpro_iced_bias - - script: - prefix = rmaps.toString() - ~/(\.matrix)?$/ - """ - ice --filter_low_counts_perc ${params.ice_filter_low_count_perc} \ - --results_filename ${prefix}_iced.matrix \ - --filter_high_counts_perc ${params.ice_filter_high_count_perc} \ - --max_iter ${params.ice_max_iter} --eps ${params.ice_eps} --remove-all-zeros-loci --output-bias 1 --verbose 1 ${rmaps} - """ -} diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index 7bbf09f..f95f003 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -1,13 +1,7 @@ // Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' -params.options = [:] -options = initOptions(params.options) process SAMPLESHEET_CHECK { tag "$samplesheet" - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'pipeline_info', meta:[:], publish_by_meta:[]) } conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { @@ -22,7 +16,7 @@ process SAMPLESHEET_CHECK { output: path '*.csv' - script: // This script is bundled with the pipeline, in nf-core/hic/bin/ + script: """ check_samplesheet.py \\ $samplesheet \\ diff --git a/modules/local/bowtie2_end_to_end.nf b/modules/local/trash/bowtie2_end_to_end.nf similarity index 100% rename from modules/local/bowtie2_end_to_end.nf rename to modules/local/trash/bowtie2_end_to_end.nf diff --git a/modules/local/bowtie2_on_trimmed_reads.nf b/modules/local/trash/bowtie2_on_trimmed_reads.nf similarity index 100% rename from modules/local/bowtie2_on_trimmed_reads.nf rename to modules/local/trash/bowtie2_on_trimmed_reads.nf diff --git a/modules/local/trim_reads.nf b/modules/local/trim_reads.nf deleted file mode 100644 index 3b893cc..0000000 --- a/modules/local/trim_reads.nf +++ /dev/null @@ -1,29 +0,0 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process trim_reads { - tag "$sample" - label 'process_low' - publishDir path: { params.save_aligned_intermediates ? "${params.outdir}/mapping/bwt2_trimmed" : params.outdir }, - saveAs: { filename -> if (params.save_aligned_intermediates) filename }, mode: params.publish_dir_mode - - when: - !params.dnase - - input: - tuple val(sample), path(reads) - - output: - tuple val(sample), path("${prefix}_trimmed.fastq"), emit:trimmed_reads - - script: - prefix = reads.toString() - ~/(\.fq)?(\.fastq)?(\.gz)?$/ - """ - cutsite_trimming --fastq $reads \\ - --cutsite ${params.ligation_site} \\ - --out ${prefix}_trimmed.fastq - """ -} diff --git a/modules/nf-core/modules/bowtie2/align/main.nf b/modules/nf-core/modules/bowtie2/align/main.nf new file mode 100644 index 0000000..7e8a965 --- /dev/null +++ b/modules/nf-core/modules/bowtie2/align/main.nf @@ -0,0 +1,77 @@ +process BOWTIE2_ALIGN { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? 'bioconda::bowtie2=2.4.4 bioconda::samtools=1.14 conda-forge::pigz=2.6' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:4d235f41348a00533f18e47c9669f1ecb327f629-0' : + 'quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:4d235f41348a00533f18e47c9669f1ecb327f629-0' }" + + input: + tuple val(meta), path(reads) + path index + val save_unaligned + + output: + tuple val(meta), path('*.bam') , emit: bam + tuple val(meta), path('*.log') , emit: log + tuple val(meta), path('*fastq.gz'), emit: fastq, optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.single_end) { + def unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : '' + """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'` + bowtie2 \\ + -x \$INDEX \\ + -U $reads \\ + --threads $task.cpus \\ + $unaligned \\ + $args \\ + 2> ${prefix}.bowtie2.log \\ + | samtools view -@ $task.cpus $args2 -bhS -o ${prefix}.bam - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + } else { + def unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : '' + """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'` + bowtie2 \\ + -x \$INDEX \\ + -1 ${reads[0]} \\ + -2 ${reads[1]} \\ + --threads $task.cpus \\ + $unaligned \\ + $args \\ + 2> ${prefix}.bowtie2.log \\ + | samtools view -@ $task.cpus $args2 -bhS -o ${prefix}.bam - + + if [ -f ${prefix}.unmapped.fastq.1.gz ]; then + mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz + fi + if [ -f ${prefix}.unmapped.fastq.2.gz ]; then + mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/modules/bowtie2/align/meta.yml b/modules/nf-core/modules/bowtie2/align/meta.yml new file mode 100644 index 0000000..f80421e --- /dev/null +++ b/modules/nf-core/modules/bowtie2/align/meta.yml @@ -0,0 +1,51 @@ +name: bowtie2_align +description: Align reads to a reference genome using bowtie2 +keywords: + - align + - fasta + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1038/nmeth.1923 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - index: + type: file + description: Bowtie2 genome index files + pattern: "*.ebwt" +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fastq: + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - log: + type: file + description: Aligment log + pattern: "*.log" +authors: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/modules/bowtie2/build/main.nf b/modules/nf-core/modules/bowtie2/build/main.nf new file mode 100644 index 0000000..a4da62d --- /dev/null +++ b/modules/nf-core/modules/bowtie2/build/main.nf @@ -0,0 +1,30 @@ +process BOWTIE2_BUILD { + tag "$fasta" + label 'process_high' + + conda (params.enable_conda ? 'bioconda::bowtie2=2.4.4' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bowtie2:2.4.4--py39hbb4e92a_0' : + 'quay.io/biocontainers/bowtie2:2.4.4--py39hbb4e92a_0' }" + + input: + path fasta + + output: + path 'bowtie2' , emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + mkdir bowtie2 + bowtie2-build $args --threads $task.cpus $fasta bowtie2/${fasta.baseName} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/bowtie2/build/meta.yml b/modules/nf-core/modules/bowtie2/build/meta.yml new file mode 100644 index 0000000..2da9a21 --- /dev/null +++ b/modules/nf-core/modules/bowtie2/build/meta.yml @@ -0,0 +1,33 @@ +name: bowtie2_build +description: Builds bowtie index for reference genome +keywords: + - build + - index + - fasta + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1038/nmeth.1923 + licence: ["GPL-3.0-or-later"] +input: + - fasta: + type: file + description: Input genome fasta file +output: + - index: + type: file + description: Bowtie2 genome index files + pattern: "*.bt2" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/modules/fastqc/functions.nf b/modules/nf-core/modules/fastqc/functions.nf deleted file mode 100644 index da9da09..0000000 --- a/modules/nf-core/modules/fastqc/functions.nf +++ /dev/null @@ -1,68 +0,0 @@ -// -// Utility functions used in nf-core DSL2 module files -// - -// -// Extract name of software tool from process name using $task.process -// -def getSoftwareName(task_process) { - return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() -} - -// -// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules -// -def initOptions(Map args) { - def Map options = [:] - options.args = args.args ?: '' - options.args2 = args.args2 ?: '' - options.args3 = args.args3 ?: '' - options.publish_by_meta = args.publish_by_meta ?: [] - options.publish_dir = args.publish_dir ?: '' - options.publish_files = args.publish_files - options.suffix = args.suffix ?: '' - return options -} - -// -// Tidy up and join elements of a list to return a path string -// -def getPathFromList(path_list) { - def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries - paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes - return paths.join('/') -} - -// -// Function to save/publish module results -// -def saveFiles(Map args) { - if (!args.filename.endsWith('.version.txt')) { - def ioptions = initOptions(args.options) - def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - if (ioptions.publish_by_meta) { - def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta - for (key in key_list) { - if (args.meta && key instanceof String) { - def path = key - if (args.meta.containsKey(key)) { - path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] - } - path = path instanceof String ? path : '' - path_list.add(path) - } - } - } - if (ioptions.publish_files instanceof Map) { - for (ext in ioptions.publish_files) { - if (args.filename.endsWith(ext.key)) { - def ext_list = path_list.collect() - ext_list.add(ext.value) - return "${getPathFromList(ext_list)}/$args.filename" - } - } - } else if (ioptions.publish_files == null) { - return "${getPathFromList(path_list)}/$args.filename" - } - } -} diff --git a/modules/nf-core/modules/fastqc/main.nf b/modules/nf-core/modules/fastqc/main.nf index 39c327b..ed6b8c5 100644 --- a/modules/nf-core/modules/fastqc/main.nf +++ b/modules/nf-core/modules/fastqc/main.nf @@ -1,22 +1,11 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - process FASTQC { tag "$meta.id" label 'process_medium' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0" - } else { - container "quay.io/biocontainers/fastqc:0.11.9--0" - } + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : + 'quay.io/biocontainers/fastqc:0.11.9--0' }" input: tuple val(meta), path(reads) @@ -24,24 +13,35 @@ process FASTQC { output: tuple val(meta), path("*.html"), emit: html tuple val(meta), path("*.zip") , emit: zip - path "*.version.txt" , emit: version + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when script: + def args = task.ext.args ?: '' // Add soft-links to original FastQs for consistent naming in pipeline - def software = getSoftwareName(task.process) - def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.id}" if (meta.single_end) { """ [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz - fastqc $options.args --threads $task.cpus ${prefix}.fastq.gz - fastqc --version | sed -e "s/FastQC v//g" > ${software}.version.txt + fastqc $args --threads $task.cpus ${prefix}.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS """ } else { """ [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz - fastqc $options.args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz - fastqc --version | sed -e "s/FastQC v//g" > ${software}.version.txt + fastqc $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS """ } } diff --git a/modules/nf-core/modules/fastqc/meta.yml b/modules/nf-core/modules/fastqc/meta.yml index 8eb9953..4da5bb5 100644 --- a/modules/nf-core/modules/fastqc/meta.yml +++ b/modules/nf-core/modules/fastqc/meta.yml @@ -1,51 +1,52 @@ name: fastqc description: Run FastQC on sequenced reads keywords: - - quality control - - qc - - adapters - - fastq + - quality control + - qc + - adapters + - fastq tools: - - fastqc: - description: | - FastQC gives general quality metrics about your reads. - It provides information about the quality score distribution - across your reads, the per base sequence content (%A/C/G/T). - You get information about adapter contamination and other - overrepresented sequences. - homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ - documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ + - fastqc: + description: | + FastQC gives general quality metrics about your reads. + It provides information about the quality score distribution + across your reads, the per base sequence content (%A/C/G/T). + You get information about adapter contamination and other + overrepresented sequences. + homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ + documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ + licence: ["GPL-2.0-only"] input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - html: - type: file - description: FastQC report - pattern: "*_{fastqc.html}" - - zip: - type: file - description: FastQC report archive - pattern: "*_{fastqc.zip}" - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - html: + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - zip: + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - - "@drpatelh" - - "@grst" - - "@ewels" - - "@FelixKrueger" + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/modules/multiqc/functions.nf b/modules/nf-core/modules/multiqc/functions.nf deleted file mode 100644 index da9da09..0000000 --- a/modules/nf-core/modules/multiqc/functions.nf +++ /dev/null @@ -1,68 +0,0 @@ -// -// Utility functions used in nf-core DSL2 module files -// - -// -// Extract name of software tool from process name using $task.process -// -def getSoftwareName(task_process) { - return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() -} - -// -// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules -// -def initOptions(Map args) { - def Map options = [:] - options.args = args.args ?: '' - options.args2 = args.args2 ?: '' - options.args3 = args.args3 ?: '' - options.publish_by_meta = args.publish_by_meta ?: [] - options.publish_dir = args.publish_dir ?: '' - options.publish_files = args.publish_files - options.suffix = args.suffix ?: '' - return options -} - -// -// Tidy up and join elements of a list to return a path string -// -def getPathFromList(path_list) { - def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries - paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes - return paths.join('/') -} - -// -// Function to save/publish module results -// -def saveFiles(Map args) { - if (!args.filename.endsWith('.version.txt')) { - def ioptions = initOptions(args.options) - def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - if (ioptions.publish_by_meta) { - def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta - for (key in key_list) { - if (args.meta && key instanceof String) { - def path = key - if (args.meta.containsKey(key)) { - path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] - } - path = path instanceof String ? path : '' - path_list.add(path) - } - } - } - if (ioptions.publish_files instanceof Map) { - for (ext in ioptions.publish_files) { - if (args.filename.endsWith(ext.key)) { - def ext_list = path_list.collect() - ext_list.add(ext.value) - return "${getPathFromList(ext_list)}/$args.filename" - } - } - } else if (ioptions.publish_files == null) { - return "${getPathFromList(path_list)}/$args.filename" - } - } -} diff --git a/modules/nf-core/modules/multiqc/main.nf b/modules/nf-core/modules/multiqc/main.nf index da78080..1264aac 100644 --- a/modules/nf-core/modules/multiqc/main.nf +++ b/modules/nf-core/modules/multiqc/main.nf @@ -1,21 +1,10 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - process MULTIQC { label 'process_medium' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } - conda (params.enable_conda ? "bioconda::multiqc=1.10.1" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/multiqc:1.10.1--py_0" - } else { - container "quay.io/biocontainers/multiqc:1.10.1--py_0" - } + conda (params.enable_conda ? 'bioconda::multiqc=1.12' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" input: path multiqc_files @@ -24,12 +13,19 @@ process MULTIQC { path "*multiqc_report.html", emit: report path "*_data" , emit: data path "*_plots" , optional:true, emit: plots - path "*.version.txt" , emit: version + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when script: - def software = getSoftwareName(task.process) + def args = task.ext.args ?: '' """ - multiqc -f $options.args . - multiqc --version | sed -e "s/multiqc, version //g" > ${software}.version.txt + multiqc -f $args . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS """ } diff --git a/modules/nf-core/modules/multiqc/meta.yml b/modules/nf-core/modules/multiqc/meta.yml index 532a8bb..6fa891e 100644 --- a/modules/nf-core/modules/multiqc/meta.yml +++ b/modules/nf-core/modules/multiqc/meta.yml @@ -1,39 +1,40 @@ name: MultiQC description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: - - QC - - bioinformatics tools - - Beautiful stand-alone HTML report + - QC + - bioinformatics tools + - Beautiful stand-alone HTML report tools: - - multiqc: - description: | - MultiQC searches a given directory for analysis logs and compiles a HTML report. - It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. - homepage: https://multiqc.info/ - documentation: https://multiqc.info/docs/ + - multiqc: + description: | + MultiQC searches a given directory for analysis logs and compiles a HTML report. + It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. + homepage: https://multiqc.info/ + documentation: https://multiqc.info/docs/ + licence: ["GPL-3.0-or-later"] input: - - multiqc_files: - type: file - description: | - List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC output: - - report: - type: file - description: MultiQC report file - pattern: "multiqc_report.html" - - data: - type: dir - description: MultiQC data dir - pattern: "multiqc_data" - - plots: - type: file - description: Plots created by MultiQC - pattern: "*_data" - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" + - report: + type: file + description: MultiQC report file + pattern: "multiqc_report.html" + - data: + type: dir + description: MultiQC data dir + pattern: "multiqc_data" + - plots: + type: file + description: Plots created by MultiQC + pattern: "*_data" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - - "@abhi18av" - - "@bunop" - - "@drpatelh" + - "@abhi18av" + - "@bunop" + - "@drpatelh" diff --git a/modules/nf-core/modules/samtools/merge/main.nf b/modules/nf-core/modules/samtools/merge/main.nf new file mode 100644 index 0000000..be6fe32 --- /dev/null +++ b/modules/nf-core/modules/samtools/merge/main.nf @@ -0,0 +1,41 @@ +process SAMTOOLS_MERGE { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : + 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + + input: + tuple val(meta), path(input_files) + path fasta + + output: + tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam + tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def file_type = input_files[0].getExtension() + def reference = fasta ? "--reference ${fasta}" : "" + """ + samtools \\ + merge \\ + --threads ${task.cpus-1} \\ + $args \\ + ${reference} \\ + ${prefix}.${file_type} \\ + $input_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/samtools/merge/meta.yml b/modules/nf-core/modules/samtools/merge/meta.yml new file mode 100644 index 0000000..fb78e55 --- /dev/null +++ b/modules/nf-core/modules/samtools/merge/meta.yml @@ -0,0 +1,54 @@ +name: samtools_merge +description: Merge BAM or CRAM file +keywords: + - merge + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_files: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram,sam}" + - fasta: + type: optional file + description: Reference file the CRAM was created with + pattern: "*.{fasta,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + - cram: + type: file + description: CRAM file + pattern: "*.{cram}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@yuukiiwa " + - "@maxulysse" + - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/samtools/sort/main.nf b/modules/nf-core/modules/samtools/sort/main.nf new file mode 100644 index 0000000..0f2237c --- /dev/null +++ b/modules/nf-core/modules/samtools/sort/main.nf @@ -0,0 +1,31 @@ +process SAMTOOLS_SORT { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : + 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + samtools sort $args -@ $task.cpus -o ${prefix}.bam -T $prefix $bam + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/samtools/sort/meta.yml b/modules/nf-core/modules/samtools/sort/meta.yml new file mode 100644 index 0000000..a820c55 --- /dev/null +++ b/modules/nf-core/modules/samtools/sort/meta.yml @@ -0,0 +1,44 @@ +name: samtools_sort +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" diff --git a/nextflow.config b/nextflow.config index 8c5d7ac..4a7d9d6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -17,6 +17,77 @@ params { genome = null igenomes_base = 's3://ngi-igenomes/igenomes' igenomes_ignore = false + chromosome_size = false + restriction_fragments = false + save_reference = false + + // Mapping + split_fastq = false + fastq_chunks_size = 20000000 + save_interaction_bam = false + save_aligned_intermediates = false + bwt2_opts_end2end = '--very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder' + bwt2_opts_trimmed = '--very-sensitive -L 20 --score-min L,-0.6,-0.2 --end-to-end --reorder' + keep_dups = false + keep_multi = false + min_mapq = 10 + + // Digestion Hi-C + digestion = false + ligation_site = null + restriction_site = null + digest { + 'hindiii'{ + restriction_site='A^AGCTT' + ligation_site='AAGCTAGCTT' + } + 'mboi' { + restriction_site='^GATC' + ligation_site='GATCGATC' + } + 'dpnii' { + restriction_site='^GATC' + ligation_site='GATCGATC' + } + 'arima' { + restriction_site='^GATC,G^ANT' + ligation_site='GATCGATC,GATCGANT,GANTGATC,GANTGANT' + } + } + + min_restriction_fragment_size = 0 + max_restriction_fragment_size = 0 + min_insert_size = 0 + max_insert_size = 0 + save_nonvalid_pairs = false + + // Dnase Hi-C + dnase = false + min_cis_dist = 0 + + // Contact maps + bin_size = '1000000' + res_zoomify = '5000' + hicpro_maps = false + ice_max_iter = 100 + ice_filter_low_count_perc = 0.02 + ice_filter_high_count_perc = 0 + ice_eps = 0.1 + + // Downstream Analysis + res_dist_decay = '250000' + tads_caller = 'insulation' + res_tads = '40000' + res_compartments = '250000' + + // Workflow + skip_maps = false + skip_balancing = false + skip_mcool = false + skip_dist_decay = false + skip_compartments = false + skip_tads = false + skip_multiqc = false // MultiQC options multiqc_config = null diff --git a/nextflow_schema.json b/nextflow_schema.json index f8154bd..7fe34b7 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -16,17 +16,19 @@ "properties": { "input": { "type": "string", - "format": "file-path", - "mimetype": "text/csv", - "pattern": "^\\S+\\.csv$", - "schema": "assets/schema_input.json", - "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/hic/usage#samplesheet-input).", - "fa_icon": "fas fa-file-csv" + "fa_icon": "fas fa-dna", + "description": "Input FastQ files.", + "help_text": "Use this to specify the location of your input FastQ files. For example:\n\n```bash\n--input 'path/to/data/sample_*_{1,2}.fastq'\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The path must have at least one `*` wildcard character\n3. When using the pipeline with paired end data, the path must use `{1,2}` notation to specify read pairs.\n\nIf left unspecified, a default pattern is used: `data/*{1,2}.fastq.gz`" + }, + "input_paths": { + "type": "string", + "hidden": true, + "description": "Input FastQ files for test only", + "default": "undefined" }, "outdir": { "type": "string", - "description": "Path to the output directory where the results will be saved.", + "description": "The output directory where the results will be saved.", "default": "./results", "fa_icon": "fas fa-folder-open" }, @@ -36,11 +38,6 @@ "fa_icon": "fas fa-envelope", "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" - }, - "multiqc_title": { - "type": "string", - "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", - "fa_icon": "fas fa-file-signature" } } }, @@ -48,26 +45,22 @@ "title": "Reference genome options", "type": "object", "fa_icon": "fas fa-dna", - "description": "Reference genome related files and options required for the workflow.", + "description": "Options for the reference genome indices used to align reads.", "properties": { "genome": { "type": "string", "description": "Name of iGenomes reference.", "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`.\n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." }, "fasta": { "type": "string", - "format": "file-path", - "mimetype": "text/plain", - "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", + "fa_icon": "fas fa-font", "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", - "fa_icon": "far fa-file-code" + "help_text": "If you have no genome reference available, the pipeline can build one using a FASTA file. This requires additional time and resources, so it's better to use a pre-build index if possible." }, "igenomes_base": { "type": "string", - "format": "directory-path", "description": "Directory / URL base for iGenomes references.", "default": "s3://ngi-igenomes/igenomes", "fa_icon": "fas fa-cloud-download-alt", @@ -79,95 +72,260 @@ "fa_icon": "fas fa-ban", "hidden": true, "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + }, + "bwt2_index": { + "type": "string", + "description": "Full path to directory containing Bowtie index including base name. i.e. `/path/to/index/base`.", + "fa_icon": "far fa-file-alt" } } }, - "institutional_config_options": { - "title": "Institutional config options", + "digestion_hi_c": { + "title": "Digestion Hi-C", "type": "object", - "fa_icon": "fas fa-university", - "description": "Parameters used to describe centralised config profiles. These should not be edited.", - "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "description": "Parameters for protocols based on restriction enzyme", + "default": "", "properties": { - "custom_config_version": { + "digestion": { "type": "string", - "description": "Git commit id for Institutional configs.", - "default": "master", - "hidden": true, - "fa_icon": "fas fa-users-cog" + "default": "hindiii", + "description": "Name of restriction enzyme to automatically set the restriction_site and ligation_site options" }, - "custom_config_base": { + "restriction_site": { "type": "string", - "description": "Base directory for Institutional configs.", - "default": "https://raw.githubusercontent.com/nf-core/configs/master", - "hidden": true, - "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", - "fa_icon": "fas fa-users-cog" + "default": "'A^AGCTT'", + "description": "Restriction motifs used during digestion. Several motifs (comma separated) can be provided." }, - "hostnames": { + "ligation_site": { "type": "string", - "description": "Institutional configs hostname.", - "hidden": true, - "fa_icon": "fas fa-users-cog" + "default": "'AAGCTAGCTT", + "description": "Expected motif after DNA ligation. Several motifs (comma separated) can be provided." }, - "config_profile_name": { + "chromosome_size": { "type": "string", - "description": "Institutional config name.", - "hidden": true, - "fa_icon": "fas fa-users-cog" + "description": "Full path to file specifying chromosome sizes (tab separated with chromosome name and size)`.", + "fa_icon": "far fa-file-alt", + "help_text": "If not specified, the pipeline will build this file from the reference genome file" }, - "config_profile_description": { + "restriction_fragments": { "type": "string", - "description": "Institutional config description.", - "hidden": true, - "fa_icon": "fas fa-users-cog" + "description": "Full path to restriction fragment (bed) file.", + "fa_icon": "far fa-file-alt", + "help_text": "This file depends on the Hi-C protocols and digestion strategy. If not provided, the pipeline will build it using the --restriction_site option" }, - "config_profile_contact": { + "save_reference": { + "type": "boolean", + "description": "If generated by the pipeline save the annotation and indexes in the results directory.", + "help_text": "Use this parameter to save all annotations to your results folder. These can then be used for future pipeline runs, reducing processing times.", + "fa_icon": "fas fa-save" + }, + "save_nonvalid_pairs": { + "type": "boolean", + "description": "Save the non valid pairs detected by HiC-Pro.", + "help_text": "Use this parameter to save non valid pairs detected by HiC-Pro (dangling-end, self-circle, re-ligation, filtered).", + "fa_icon": "fas fa-save" + } + } + }, + "dnase_hi_c": { + "title": "DNAse Hi-C", + "type": "object", + "description": "Parameters for protocols based on DNAse digestion", + "default": "", + "properties": { + "dnase": { + "type": "boolean", + "description": "For Hi-C protocols which are not based on enzyme digestion such as DNase Hi-C" + }, + "min_cis_dist": { + "type": "integer", + "description": "Minimum distance between loci to consider. Useful for --dnase mode to remove spurious ligation products. Only values > 0 are considered" + } + } + }, + "alignments": { + "title": "Alignments", + "type": "object", + "description": "Parameters for reads aligments", + "default": "", + "fa_icon": "fas fa-bahai", + "properties": { + "split_fastq": { + "type": "boolean", + "description": "Split the reads into chunks before running the pipelne", + "fa_icon": "fas fa-dna" + }, + "fastq_chunks_size": { + "type": "integer", + "description": "Read number per chunks if split_fastq is used", + "default": 20000000 + }, + "min_mapq": { + "type": "integer", + "default": 10, + "description": "Keep aligned reads with a minimum quality value" + }, + "bwt2_opts_end2end": { "type": "string", - "description": "Institutional config contact information.", - "hidden": true, - "fa_icon": "fas fa-users-cog" + "default": "'--very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder'", + "description": "Option for HiC-Pro end-to-end bowtie mapping" }, - "config_profile_url": { + "bwt2_opts_trimmed": { "type": "string", - "description": "Institutional config URL link.", - "hidden": true, - "fa_icon": "fas fa-users-cog" + "default": "'--very-sensitive -L 20 --score-min L,-0.6,-0.2 --end-to-end --reorder'", + "description": "Option for HiC-Pro trimmed reads mapping" + }, + "save_aligned_intermediates": { + "type": "boolean", + "description": "Save all BAM files during two-steps mapping" } } }, - "max_job_request_options": { - "title": "Max job request options", + "valid_pairs_detection": { + "title": "Valid Pairs Detection", "type": "object", - "fa_icon": "fab fa-acquisitions-incorporated", - "description": "Set the top limit for requested resources for any single job.", - "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", + "description": "Options to call significant interactions", + "default": "", + "fa_icon": "fas fa-signature", "properties": { - "max_cpus": { + "keep_dups": { + "type": "boolean", + "description": "Keep duplicated reads" + }, + "keep_multi": { + "type": "boolean", + "description": "Keep multi-aligned reads" + }, + "max_insert_size": { "type": "integer", - "description": "Maximum number of CPUs that can be requested for any single job.", - "default": 16, - "fa_icon": "fas fa-microchip", - "hidden": true, - "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" + "description": "Maximum fragment size to consider. Only values > 0 are considered" }, - "max_memory": { + "min_insert_size": { + "type": "integer", + "description": "Minimum fragment size to consider. Only values > 0 are considered" + }, + "max_restriction_fragment_size": { + "type": "integer", + "description": "Maximum restriction fragment size to consider. Only values > 0 are considered" + }, + "min_restriction_fragment_size": { + "type": "integer", + "description": "Minimum restriction fragment size to consider. Only values > 0 are considered" + }, + "save_interaction_bam": { + "type": "boolean", + "description": "Save a BAM file where all reads are flagged by their interaction classes" + } + } + }, + "contact_maps": { + "title": "Contact maps", + "type": "object", + "description": "Options to build Hi-C contact maps", + "default": "", + "fa_icon": "fas fa-chess-board", + "properties": { + "bin_size": { "type": "string", - "description": "Maximum amount of memory that can be requested for any single job.", - "default": "128.GB", - "fa_icon": "fas fa-memory", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "hidden": true, - "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" + "pattern": "^(\\d+)(,\\d+)*$", + "default": "1000000,500000", + "description": "Resolution to build the maps (comma separated)" }, - "max_time": { + "hicpro_maps": { + "type": "boolean", + "description": "Generate raw and normalized contact maps with HiC-Pro" + }, + "ice_filter_low_count_perc": { + "type": "number", + "default": 0.02, + "description": "Filter low counts rows before HiC-Pro normalization" + }, + "ice_filter_high_count_perc": { + "type": "integer", + "description": "Filter high counts rows before HiC-Pro normalization" + }, + "ice_eps": { + "type": "number", + "default": 0.1, + "description": "Threshold for HiC-Pro ICE convergence" + }, + "ice_max_iter": { + "type": "integer", + "default": 100, + "description": "Maximum number of iteraction for HiC-Pro ICE normalization" + }, + "res_zoomify": { "type": "string", - "description": "Maximum amount of time that can be requested for any single job.", - "default": "240.h", - "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", - "hidden": true, - "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" + "default": "5000", + "description": "Maximum resolution to build mcool file" + } + } + }, + "downstream_analysis": { + "title": "Downstream Analysis", + "type": "object", + "description": "Set up downstream analysis from contact maps", + "default": "", + "properties": { + "res_dist_decay": { + "type": "string", + "pattern": "^(\\d+)(,\\d+)*$", + "default": "1000000", + "description": "Resolution to build count/distance plot" + }, + "tads_caller": { + "type": "string", + "default": "hicexplorer,insulation", + "description": "Define methods for TADs calling" + }, + "res_tads": { + "type": "string", + "pattern": "^(\\d+)(,\\d+)*$", + "default": "40000,20000", + "description": "Resolution to run TADs callers (comma separated)" + }, + "res_compartments": { + "type": "string", + "pattern": "^(\\d+)(,\\d+)*$", + "default": "250000", + "description": "Resolution for compartments calling" + } + } + }, + "skip_options": { + "title": "Skip options", + "type": "object", + "description": "Skip some steps of the pipeline", + "default": "", + "fa_icon": "fas fa-random", + "properties": { + "skip_maps": { + "type": "boolean", + "description": "Do not build contact maps" + }, + "skip_dist_decay": { + "type": "boolean", + "description": "Do not run distance/decay plot" + }, + "skip_tads": { + "type": "boolean", + "description": "Do not run TADs calling" + }, + "skip_compartments": { + "type": "string", + "description": "Do not run compartments calling" + }, + "skip_balancing": { + "type": "boolean", + "description": "Do not run cooler balancing normalization" + }, + "skip_mcool": { + "type": "boolean", + "description": "Do not generate mcool file for Higlass visualization" + }, + "skip_multiqc": { + "type": "boolean", + "description": "Do not generate MultiQC report" } } }, @@ -181,12 +339,13 @@ "help": { "type": "boolean", "description": "Display help text.", - "fa_icon": "fas fa-question-circle", - "hidden": true + "hidden": true, + "fa_icon": "fas fa-question-circle" }, "publish_dir_mode": { "type": "string", "default": "copy", + "hidden": true, "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", @@ -197,7 +356,13 @@ "copy", "copyNoFollow", "move" - ], + ] + }, + "validate_params": { + "type": "boolean", + "description": "Boolean whether to validate parameters against the schema at runtime", + "default": true, + "fa_icon": "fas fa-check-square", "hidden": true }, "email_on_fail": { @@ -205,28 +370,30 @@ "description": "Email address for completion summary, only when pipeline fails.", "fa_icon": "fas fa-exclamation-triangle", "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", - "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", - "hidden": true + "hidden": true, + "help_text": "This works exactly as with `--email`, except emails are only sent if the workflow is not successful." }, "plaintext_email": { "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", - "hidden": true + "hidden": true, + "help_text": "Set to receive plain-text e-mails instead of HTML formatted." }, "max_multiqc_email_size": { "type": "string", "description": "File size limit when attaching MultiQC reports to summary emails.", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", "default": "25.MB", "fa_icon": "fas fa-file-upload", - "hidden": true + "hidden": true, + "help_text": "If file generated by pipeline exceeds the threshold, it will not be attached." }, "monochrome_logs": { "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", - "hidden": true + "hidden": true, + "help_text": "Set to disable colourful command line output and live life in monochrome." }, "multiqc_config": { "type": "string", @@ -241,32 +408,101 @@ "fa_icon": "fas fa-cogs", "hidden": true }, - "validate_params": { - "type": "boolean", - "description": "Boolean whether to validate parameters against the schema at runtime", - "default": true, - "fa_icon": "fas fa-check-square", - "hidden": true - }, "show_hidden_params": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + } + } + }, + "max_job_request_options": { + "title": "Max job request options", + "type": "object", + "fa_icon": "fab fa-acquisitions-incorporated", + "description": "Set the top limit for requested resources for any single job.", + "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", + "properties": { + "max_cpus": { + "type": "integer", + "description": "Maximum number of CPUs that can be requested for any single job.", + "default": 16, + "fa_icon": "fas fa-microchip", + "hidden": true, + "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" }, - "enable_conda": { - "type": "boolean", - "description": "Run this workflow with Conda. You can also use '-profile conda' instead of providing this parameter.", + "max_memory": { + "type": "string", + "description": "Maximum amount of memory that can be requested for any single job.", + "default": "128.GB", + "fa_icon": "fas fa-memory", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", "hidden": true, - "fa_icon": "fas fa-bacon" + "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" }, - "singularity_pull_docker_container": { - "type": "boolean", - "description": "Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.", + "max_time": { + "type": "string", + "description": "Maximum amount of time that can be requested for any single job.", + "default": "240.h", + "fa_icon": "far fa-clock", + "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "hidden": true, + "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" + } + } + }, + "institutional_config_options": { + "title": "Institutional config options", + "type": "object", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", + "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "properties": { + "custom_config_version": { + "type": "string", + "description": "Git commit id for Institutional configs.", + "default": "master", + "hidden": true, + "fa_icon": "fas fa-users-cog", + "help_text": "Provide git commit id for custom Institutional configs hosted at `nf-core/configs`. This was implemented for reproducibility purposes. Default: `master`.\n\n```bash\n## Download and use config file with following git commit id\n--custom_config_version d52db660777c4bf36546ddb188ec530c3ada1b96\n```" + }, + "custom_config_base": { + "type": "string", + "description": "Base directory for Institutional configs.", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "hidden": true, + "help_text": "If you're running offline, nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell nextflow where to find them with the `custom_config_base` option. For example:\n\n```bash\n## Download and unzip the config files\ncd /path/to/my/configs\nwget https://github.com/nf-core/configs/archive/master.zip\nunzip master.zip\n\n## Run the pipeline\ncd /path/to/my/data\nnextflow run /path/to/pipeline/ --custom_config_base /path/to/my/configs/configs-master/\n```\n\n> Note that the nf-core/tools helper package has a `download` command to download all required pipeline files + singularity containers + institutional configs in one go for you, to make this process easier.", + "fa_icon": "fas fa-users-cog" + }, + "hostnames": { + "type": "string", + "description": "Institutional configs hostname.", "hidden": true, - "fa_icon": "fas fa-toolbox", - "help_text": "This may be useful for example if you are unable to directly pull Singularity containers to run the pipeline due to http/https proxy issues." + "fa_icon": "fas fa-users-cog" + }, + "config_profile_name": { + "type": "string", + "description": "Institutional config name", + "hidden": true + }, + "config_profile_description": { + "type": "string", + "description": "Institutional config description.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_contact": { + "type": "string", + "description": "Institutional config contact information.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_url": { + "type": "string", + "description": "Institutional config URL link.", + "hidden": true, + "fa_icon": "fas fa-users-cog" } } } @@ -279,13 +515,34 @@ "$ref": "#/definitions/reference_genome_options" }, { - "$ref": "#/definitions/institutional_config_options" + "$ref": "#/definitions/digestion_hi_c" }, { - "$ref": "#/definitions/max_job_request_options" + "$ref": "#/definitions/dnase_hi_c" + }, + { + "$ref": "#/definitions/alignments" + }, + { + "$ref": "#/definitions/valid_pairs_detection" + }, + { + "$ref": "#/definitions/contact_maps" + }, + { + "$ref": "#/definitions/downstream_analysis" + }, + { + "$ref": "#/definitions/skip_options" }, { "$ref": "#/definitions/generic_options" + }, + { + "$ref": "#/definitions/max_job_request_options" + }, + { + "$ref": "#/definitions/institutional_config_options" } ] } diff --git a/subworkflows/local/hicpro.nf b/subworkflows/local/hicpro.nf index 46a418a..00359de 100644 --- a/subworkflows/local/hicpro.nf +++ b/subworkflows/local/hicpro.nf @@ -1,26 +1,71 @@ -params.options = [:] +/* + * HICPRO MAIN WORKFLOW + * INPUT : paired-end sequencing data + * OUTPUT : .pairs file with the list of valid interaction + */ + +include { HICPRO_MAPPING } from './hicpro_mapping' +include { GET_VALID_INTERACTION } from '../../modules/local/hicpro/get_valid_interaction' +include { MERGE_VALID_INTERACTION } from '../../modules/local/hicpro/merge_valid_interaction' +include { HICPRO2PAIRS } from '../../modules/local/hicpro/hicpro2pairs' +include { BUILD_CONTACT_MAPS } from '../../modules/local/hicpro/build_contact_maps' +include { ICE_NORMALIZATION } from '../../modules/local/hicpro/run_ice' -include { BOWTIE2_END_TO_END } from '../../modules/local/bowtie2_end_to_end' addParams( options: params.options ) -include { BOWTIE2_ON_TRIMED_READS } from '../../modules/local/bowtie2_on_trimmed_reads' addParams( options: params.options ) -include { BOWTIE2_MERGE_MAPPING_STEPS } from '../../modules/local/bowtie2_merge_mapping_steps' addParams( options: params.options ) -include { DNASE_MAPPING_STATS } from '../../modules/local/dnase_mapping_stats' addParams( options: params.options ) -include { COMBINE_MATES } from '../../modules/local/combine_mates' addParams( options: params.options ) -include { GET_VALID_INTERACTION } from '../../modules/local/get_valid_interaction' addParams( options: params.options ) -include { GET_VALID_INTERACTION_DNASE } from '../../modules/local/get_valid_interaction_dnase' addParams( options: params.options ) -include { REMOVE_DUPLICATES } from '../../modules/local/remove_duplicates' addParams( options: params.options ) -include { MERGE_STATS } from '../../modules/local/merge_stats' addParams( options: params.options ) -include { BUILD_CONTACT_MAPS } from '../../modules/local/build_contact_maps' addParams( options: params.options ) -include { RUN_ICE } from '../../modules/local/run_ice' addParams( options: params.options ) -include { CONVERTS_TO_PAIRS } from '../../modules/local/convert_to_pairs' addParams( options: params.options ) +workflow HICPRO { -workflow HIC_PRO { + take: + reads // [meta, read1, read2] + index + fragments + chrsize + ligation_site + map_res - take: + main: + ch_versions = Channel.empty() + // fastq to paired-end bam + HICPRO_MAPPING( + reads, + index, + ligation_site + ) - main: - + // get valid interaction + GET_VALID_INTERACTION ( + HICPRO_MAPPING.out.bam, + fragments + ) + + // merge valid interactions and remove duplicates + MERGE_VALID_INTERACTION ( + GET_VALID_INTERACTION.out.valid_pairs + ) + + // convert to pairs + HICPRO2PAIRS ( + MERGE_VALID_INTERACTION.out.valid_pairs, + chrsize + ) + + //merge stats - emit: + + if (!params.hicpro_maps){ + + //build_contact_maps + BUILD_CONTACT_MAPS( + MERGE_VALID_INTERACTION.out.valid_pairs.combine(map_res), + chrsize + ) + // run_ice + ICE_NORMALIZATION( + BUILD_CONTACT_MAPS.out.maps + ) + } + + emit: + versions = ch_versions + pairs = HICPRO2PAIRS.out.pairs } diff --git a/subworkflows/local/hicpro_mapping.nf b/subworkflows/local/hicpro_mapping.nf new file mode 100644 index 0000000..daa53c6 --- /dev/null +++ b/subworkflows/local/hicpro_mapping.nf @@ -0,0 +1,102 @@ +include { BOWTIE2_ALIGN } from '../../modules/nf-core/modules/bowtie2/align/main' +include { TRIM_READS } from '../../modules/local/hicpro/trim_reads' +include { BOWTIE2_ALIGN as BOWTIE2_ALIGN_TRIMMED } from '../../modules/nf-core/modules/bowtie2/align/main' +include { MERGE_BOWTIE2 } from '../../modules/local/hicpro/bowtie2_merge' +include { COMBINE_MATES} from '../../modules/local/hicpro/combine_mates' + +//include { BOWTIE2_ON_TRIMED_READS } from '../../modules/local/bowtie2_on_trimmed_reads' addParams( options: params.options ) +//include { BOWTIE2_MERGE_MAPPING_STEPS } from '../../modules/local/bowtie2_merge_mapping_steps' addParams( options: params.options ) +//include { DNASE_MAPPING_STATS } from '../../modules/local/dnase_mapping_stats' addParams( options: params.options ) +//include { COMBINE_MATES } from '../../modules/local/combine_mates' addParams( options: params.options ) +//include { GET_VALID_INTERACTION } from '../../modules/local/get_valid_interaction' addParams( options: params.options ) +//include { GET_VALID_INTERACTION_DNASE } from '../../modules/local/get_valid_interaction_dnase' addParams( options: params.options ) +//include { REMOVE_DUPLICATES } from '../../modules/local/remove_duplicates' addParams( options: params.options ) +//include { MERGE_STATS } from '../../modules/local/merge_stats' addParams( options: params.options ) +//include { BUILD_CONTACT_MAPS } from '../../modules/local/build_contact_maps' addParams( options: params.options ) +//include { RUN_ICE } from '../../modules/local/run_ice' addParams( options: params.options ) +//include { CONVERTS_TO_PAIRS } from '../../modules/local/convert_to_pairs' addParams( options: params.options ) + +// Paired-end to Single-end +def pairToSingle(row, mates) { + def meta = [:] + meta.id = row[0].id + meta.single_end = true + meta.mates = mates + def array = [] + if (mates == "R1") { + return [meta, [ row[1][0]] ] + }else if (mates == "R2"){ + return [meta, [ row[1][1]] ] + } +} + +def singleToPair(row){ + def meta = [:] + meta.id = row[0].id + meta.single_end = false + return [ meta, row[1] ] +} + +workflow HICPRO_MAPPING { + + take: + reads // [meta, read1, read2] + index + ligation_site + + main: + ch_versions = Channel.empty() + + // Align each mates separetly + ch_reads_r1 = reads.map{it -> pairToSingle(it,"R1")} + ch_reads_r2 = reads.map{pairToSingle(it,"R2")} + ch_reads = ch_reads_r1.concat(ch_reads_r2) + + // bowtie2 + BOWTIE2_ALIGN( + ch_reads, + index.collect(), + Channel.value(true).collect() + ) + ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions) + + // trim reads + TRIM_READS( + BOWTIE2_ALIGN.out.fastq, + ligation_site.collect() + ) + ch_versions = ch_versions.mix(TRIM_READS.out.versions) + + // bowtie2 on trimmed reads + BOWTIE2_ALIGN_TRIMMED( + TRIM_READS.out.fastq, + index.collect(), + Channel.value(false).collect() + ) + ch_versions = ch_versions.mix(BOWTIE2_ALIGN_TRIMMED.out.versions) + + // Merge the two mapping steps + BOWTIE2_ALIGN.out.bam + .combine(BOWTIE2_ALIGN_TRIMMED.out.bam, by:[0]) + .view() + .set { ch_bowtie2_align} + + MERGE_BOWTIE2( + ch_bowtie2_align + ) + + // Combine mates + MERGE_BOWTIE2.out.bam + .map { singleToPair(it) } + .groupTuple() + .view() + .set {ch_bams} + + COMBINE_MATES ( + ch_bams + ) + + emit: + versions = ch_versions + bam = COMBINE_MATES.out.bam +} diff --git a/subworkflows/local/hicpro_mapping.nf~ b/subworkflows/local/hicpro_mapping.nf~ new file mode 100644 index 0000000..249f656 --- /dev/null +++ b/subworkflows/local/hicpro_mapping.nf~ @@ -0,0 +1,102 @@ +include { BOWTIE2_ALIGN } from '../../modules/nf-core/modules/bowtie2/align/main' +include { HICPRO_TRIM_READS } from '../../modules/local/hicpro/trim_reads' +include { BOWTIE2_ALIGN as BOWTIE2_ALIGN_TRIMMED } from '../../modules/nf-core/modules/bowtie2/align/main' +include { HICPRO_MERGE_BOWTIE2 } from '../../modules/local/hicpro/bowtie2_merge' +include { HICPRO_COMBINE_MATES} from '../../modules/local/hicpro/combine_mates' + +//include { BOWTIE2_ON_TRIMED_READS } from '../../modules/local/bowtie2_on_trimmed_reads' addParams( options: params.options ) +//include { BOWTIE2_MERGE_MAPPING_STEPS } from '../../modules/local/bowtie2_merge_mapping_steps' addParams( options: params.options ) +//include { DNASE_MAPPING_STATS } from '../../modules/local/dnase_mapping_stats' addParams( options: params.options ) +//include { COMBINE_MATES } from '../../modules/local/combine_mates' addParams( options: params.options ) +//include { GET_VALID_INTERACTION } from '../../modules/local/get_valid_interaction' addParams( options: params.options ) +//include { GET_VALID_INTERACTION_DNASE } from '../../modules/local/get_valid_interaction_dnase' addParams( options: params.options ) +//include { REMOVE_DUPLICATES } from '../../modules/local/remove_duplicates' addParams( options: params.options ) +//include { MERGE_STATS } from '../../modules/local/merge_stats' addParams( options: params.options ) +//include { BUILD_CONTACT_MAPS } from '../../modules/local/build_contact_maps' addParams( options: params.options ) +//include { RUN_ICE } from '../../modules/local/run_ice' addParams( options: params.options ) +//include { CONVERTS_TO_PAIRS } from '../../modules/local/convert_to_pairs' addParams( options: params.options ) + +// Paired-end to Single-end +def pairToSingle(row, mates) { + def meta = [:] + meta.id = row[0].id + meta.single_end = true + meta.mates = mates + def array = [] + if (mates == "R1") { + return [meta, [ row[1][0]] ] + }else if (mates == "R2"){ + return [meta, [ row[1][1]] ] + } +} + +def singleToPair(row){ + def meta = [:] + meta.id = row[0].id + meta.single_end = false + return [ meta, row[1] ] +} + +workflow HICPRO_MAPPING { + + take: + reads // [meta, read1, read2] + index + ligation_site + + main: + ch_versions = Channel.empty() + + // Align each mates separetly + ch_reads_r1 = reads.map{it -> pairToSingle(it,"R1")} + ch_reads_r2 = reads.map{pairToSingle(it,"R2")} + ch_reads = ch_reads_r1.concat(ch_reads_r2) + + // bowtie2 + BOWTIE2_ALIGN( + ch_reads, + index.collect(), + Channel.value(true).collect() + ) + ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions) + + // trim reads + HICPRO_TRIM_READS( + BOWTIE2_ALIGN.out.fastq, + ligation_site.collect() + ) + ch_versions = ch_versions.mix(HICPRO_TRIM_READS.out.versions) + + // bowtie2 on trimmed reads + BOWTIE2_ALIGN_TRIMMED( + HICPRO_TRIM_READS.out.fastq, + index.collect(), + Channel.value(false).collect() + ) + ch_versions = ch_versions.mix(BOWTIE2_ALIGN_TRIMMED.out.versions) + + // Merge the two mapping steps + BOWTIE2_ALIGN.out.bam + .combine(BOWTIE2_ALIGN_TRIMMED.out.bam, by:[0]) + .view() + .set { ch_bowtie2_align} + + HICPRO_MERGE_BOWTIE2( + ch_bowtie2_align + ) + + // Combine mates + HICPRO_MERGE_BOWTIE2.out.bam + .map { singleToPair(it) } + .groupTuple() + .view() + .set {ch_bams} + + HICPRO_COMBINE_MATES ( + ch_bams + ) + + emit: + versions = ch_versions + bam = HICPRO_COMBINE_MATES.out.bam +} diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index b664bc8..267ea4f 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -2,9 +2,7 @@ // Check input samplesheet and get read channels // -params.options = [:] - -include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' addParams( options: params.options ) +include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' workflow INPUT_CHECK { take: @@ -22,21 +20,17 @@ workflow INPUT_CHECK { // Function to get list of [ meta, [ fastq_1, fastq_2 ] ] def create_fastq_channels(LinkedHashMap row) { - def meta = [:] - meta.id = row.sample - meta.single_end = row.single_end.toBoolean() + def meta = [:] + meta.id = row.sample + meta.single_end = false - def array = [] - if (!file(row.fastq_1).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" - } - if (meta.single_end) { - array = [ meta, [ file(row.fastq_1) ] ] - } else { - if (!file(row.fastq_2).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" - } - array = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] - } - return array + def array = [] + if (!file(row.fastq_1).exists()) { + exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" + } + if (!file(row.fastq_2).exists()) { + exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" + } + array = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] + return array } diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf new file mode 100644 index 0000000..2c4a226 --- /dev/null +++ b/subworkflows/local/prepare_genome.nf @@ -0,0 +1,60 @@ +/* + * Prepare Annotation Genome for Hi-C data analysis + */ + +include { BOWTIE2_BUILD } from '../../modules/nf-core/modules/bowtie2/build/main' +include { GET_CHROMSIZE } from '../../modules/local/get_chromsize' +include { GET_RESTRICTION_FRAGMENTS } from '../../modules/local/get_restriction_fragments' + +workflow PREPARE_GENOME { + + take: + fasta + restriction_site + + main: + ch_versions = Channel.empty() + + //*************************************** + // Bowtie Index + if(!params.bwt2_index){ + BOWTIE2_BUILD ( + fasta + ) + ch_index = BOWTIE2_BUILD.out.index + }else{ + Channel.fromPath( params.bwt2_index , checkIfExists: true) + .ifEmpty { exit 1, "Genome index: Provided index not found: ${params.bwt2_index}" } + .into { ch_index } + } + + //*************************************** + // Chromosome size + if(!params.chromosome_size){ + GET_CHROMSIZE( + fasta + ) + ch_chromsize = GET_CHROMSIZE.out.results + }else{ + Channel.fromPath( params.chromosome_size , checkIfExists: true) + .into {ch_chromsize} + } + + //*************************************** + // Restriction fragments + if(!params.restriction_fragments && !params.dnase){ + GET_RESTRICTION_FRAGMENTS( + fasta, + restriction_site + ) + ch_resfrag = GET_RESTRICTION_FRAGMENTS.out.results + }else{ + Channel.fromPath( params.restriction_fragments, checkIfExists: true ) + .set {ch_resfrag} + } + + emit: + index = ch_index + chromosome_size = ch_chromsize + res_frag = ch_resfrag +} diff --git a/subworkflows/local/prepare_genome.nf~ b/subworkflows/local/prepare_genome.nf~ new file mode 100644 index 0000000..e989f87 --- /dev/null +++ b/subworkflows/local/prepare_genome.nf~ @@ -0,0 +1,60 @@ +/* + * Prepare Annotation Genome + */ + +include { BOWTIE2_BUILD } from '../../modules/nf-core/modules/bowtie2/build/main' +include { GET_CHROMSIZE } from '../../modules/local/get_chromsize' +include { GET_RESTRICTION_FRAGMENTS } from '../../modules/local/get_restriction_fragments' + +workflow PREPARE_GENOME { + + take: + fasta + restriction_site + + main: + ch_versions = Channel.empty() + + //*************************************** + // Bowtie Index + if(!params.bwt2_index){ + BOWTIE2_BUILD ( + fasta + ) + ch_index = BOWTIE2_BUILD.out.index + }else{ + Channel.fromPath( params.bwt2_index , checkIfExists: true) + .ifEmpty { exit 1, "Genome index: Provided index not found: ${params.bwt2_index}" } + .into { ch_index } + } + + //*************************************** + // Chromosome size + if(!params.chromosome_size){ + GET_CHROMSIZE( + fasta + ) + ch_chromsize = GET_CHROMSIZE.out.results + }else{ + Channel.fromPath( params.chromosome_size , checkIfExists: true) + .into {ch_chromsize} + } + + //*************************************** + // Restriction fragments + if(!params.restriction_fragments && !params.dnase){ + GET_RESTRICTION_FRAGMENTS( + fasta, + restriction_site + ) + ch_resfrag = GET_RESTRICTION_FRAGMENTS.out.results + }else{ + Channel.fromPath( params.restriction_fragments, checkIfExists: true ) + .set {ch_resfrag} + } + + emit: + index = ch_index + chromosome_size = ch_chromsize + res_frag = ch_resfrag +} diff --git a/workflows/hic.nf b/workflows/hic.nf index e0e8096..e50af0f 100644 --- a/workflows/hic.nf +++ b/workflows/hic.nf @@ -7,16 +7,75 @@ def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) // Validate input parameters -WorkflowHic.initialise(params, log) +//WorkflowHic.initialise(params, log) // TODO nf-core: Add all file path parameters for the pipeline to the list below // Check input path parameters to see if they exist -def checkPathParamList = [ params.input, params.multiqc_config, params.fasta ] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } +//def checkPathParamList = [ params.input ] +//for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } // Check mandatory parameters if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } +// Digestion parameters +if (params.digestion){ + restriction_site = params.digestion ? params.digest[ params.digestion ].restriction_site ?: false : false + ch_restriction_site = Channel.value(restriction_site) + + ligation_site = params.digestion ? params.digest[ params.digestion ].ligation_site ?: false : false + ch_ligation_site = Channel.value(ligation_site) +}else{ + ch_restriction_site = Channel.empty() + ch_ligation_site = Channel.empty() +} + +// Resolutions for contact maps +ch_map_res = Channel.from( params.bin_size ).splitCsv().flatten() +if (params.res_tads && !params.skip_tads){ + Channel.from( "${params.res_tads}" ) + .splitCsv() + .flatten() + .set {ch_tads_res} + ch_map_res = ch_map_res.concat(ch_tads_res) +}else{ + ch_tads_res=Channel.empty() + if (!params.skip_tads){ + log.warn "[nf-core/hic] Hi-C resolution for TADs calling not specified. See --res_tads" + } +} + +if (params.res_dist_decay && !params.skip_dist_decay){ + Channel.from( "${params.res_dist_decay}" ) + .splitCsv() + .flatten() + .set {ch_ddecay_res} + ch_map_res = ch_map_res.concat(ch_ddecay_res) +}else{ + ch_ddecay_res = Channel.create() + if (!params.skip_dist_decay){ + log.warn "[nf-core/hic] Hi-C resolution for distance decay not specified. See --res_dist_decay" + } +} + +if (params.res_compartments && !params.skip_compartments){ + //Channel.fromPath( params.fasta ) + // .ifEmpty { exit 1, "Compartments calling: Fasta file not found: ${params.fasta}" } + // .set { fasta_for_compartments } + Channel.from( "${params.res_compartments}" ) + .splitCsv() + .flatten() + .set {ch_comp_res} + ch_map_res = ch_map_res.concat(ch_comp_res) +}else{ + //fasta_for_compartments = Channel.empty() + ch_comp_res = Channel.create() + if (!params.skip_compartments){ + log.warn "[nf-core/hic] Hi-C resolution for compartment calling not specified. See --res_compartments" + } +} + +ch_map_res = ch_map_res.unique() + /* ======================================================================================== CONFIG FILES @@ -33,22 +92,23 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multi */ // Don't overwrite global params.modules, create a copy instead and use that within the main script. -def modules = params.modules.clone() +//def modules = params.modules.clone() // // MODULE: Local to the pipeline // -include { GET_SOFTWARE_VERSIONS } from '../modules/local/get_software_versions' addParams( options: [publish_files : ['tsv':'']] ) -include { OUTPUT_DOCUMENTATION } from '../modules/local/output_documentation' addParams( options: [publish_files : ['tsv':'']] ) +//include { GET_SOFTWARE_VERSIONS } from '../modules/local/get_software_versions' addParams( options: [publish_files : ['tsv':'']] ) +//include { OUTPUT_DOCUMENTATION } from '../modules/local/output_documentation' addParams( options: [publish_files : ['tsv':'']] ) // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { INPUT_CHECK } from '../subworkflows/local/input_check' addParams( options: [:] ) -include { HIC_PRO } from '../subworkflows/local/hicpro' addParams( options: [:] ) -include { COOLER } from '../subworkflows/local/cooler' addParams( options: [:] ) -include { COMPARTMENTS } from '../subworkflows/local/compartments' addParams( options: [:] ) -include { TADS } from '../subworkflows/local/tads' addParams( options: [:] ) +include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' +include { HICPRO } from '../subworkflows/local/hicpro' +//include { COOLER } from '../subworkflows/local/cooler' +//include { COMPARTMENTS } from '../subworkflows/local/compartments' +//include { TADS } from '../subworkflows/local/tads' /* ======================================================================================== @@ -56,14 +116,24 @@ include { TADS } from '../subworkflows/local/tads' addParams( options: [:] ) ======================================================================================== */ -def multiqc_options = modules['multiqc'] -multiqc_options.args += params.multiqc_title ? Utils.joinModuleArgs(["--title \"$params.multiqc_title\""]) : '' +//def multiqc_options = modules['multiqc'] +//multiqc_options.args += params.multiqc_title ? Utils.joinModuleArgs(["--title \"$params.multiqc_title\""]) : '' // // MODULE: Installed directly from nf-core/modules // -include { FASTQC } from '../modules/nf-core/modules/fastqc/main' addParams( options: modules['fastqc'] ) -include { MULTIQC } from '../modules/nf-core/modules/multiqc/main' addParams( options: multiqc_options ) +include { FASTQC } from '../modules/nf-core/modules/fastqc/main' +//include { MULTIQC } from '../modules/nf-core/modules/multiqc/main' addParams( options: multiqc_options ) + +/* +======================================================================================== + CHANNELS +======================================================================================== +*/ + +Channel.fromPath( params.fasta ) + .ifEmpty { exit 1, "Genome index: Fasta file not found: ${params.fasta}" } + .set { ch_fasta } /* ======================================================================================== @@ -76,56 +146,62 @@ def multiqc_report = [] workflow HIC { - ch_software_versions = Channel.empty() - - // - // SUBWORKFLOW: Read in samplesheet, validate and stage input files - // - INPUT_CHECK ( - ch_input - ) - - // - // MODULE: Run FastQC - // - FASTQC ( - INPUT_CHECK.out.reads - ) - ch_software_versions = ch_software_versions.mix(FASTQC.out.version.first().ifEmpty(null)) - - // - // MODULE: Pipeline reporting - // - ch_software_versions - .map { it -> if (it) [ it.baseName, it ] } - .groupTuple() - .map { it[1][0] } - .flatten() - .collect() - .set { ch_software_versions } - - GET_SOFTWARE_VERSIONS ( - ch_software_versions.map { it }.collect() - ) - - // - // MODULE: MultiQC - // - workflow_summary = WorkflowHic.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config)) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(GET_SOFTWARE_VERSIONS.out.yaml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - - MULTIQC ( - ch_multiqc_files.collect() - ) - multiqc_report = MULTIQC.out.report.toList() - ch_software_versions = ch_software_versions.mix(MULTIQC.out.version.ifEmpty(null)) + ch_software_versions = Channel.empty() + + // + // SUBWORKFLOW: Read in samplesheet, validate and stage input files + // + INPUT_CHECK ( + ch_input + ) + + // + // SUBWORKFLOW: Prepare genome annotation + // + PREPARE_GENOME( + ch_fasta, + ch_restriction_site + ) + + // + // MODULE: Run FastQC + // + FASTQC ( + INPUT_CHECK.out.reads + ) + + // + // SUB-WORFLOW: HiC-Pro + // + HICPRO ( + INPUT_CHECK.out.reads, + PREPARE_GENOME.out.index, + PREPARE_GENOME.out.res_frag, + PREPARE_GENOME.out.chromosome_size, + ch_ligation_site, + ch_map_res + ) + + + // + // MODULE: MultiQC + // + workflow_summary = WorkflowHic.paramsSummaryMultiqc(workflow, summary_params) + ch_workflow_summary = Channel.value(workflow_summary) + + ch_multiqc_files = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config)) + ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + //ch_multiqc_files = ch_multiqc_files.mix(GET_SOFTWARE_VERSIONS.out.yaml.collect()) + //ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) + + //MULTIQC ( + // ch_multiqc_files.collect() + //) + //multiqc_report = MULTIQC.out.report.toList() + multiqc_report = [] + //ch_software_versions = ch_software_versions.mix(MULTIQC.out.version.ifEmpty(null)) } /* @@ -135,10 +211,10 @@ workflow HIC { */ workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.summary(workflow, params, log) + if (params.email || params.email_on_fail) { + NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) + } + NfcoreTemplate.summary(workflow, params, log) } /* -- GitLab