diff --git a/conf/modules.config b/conf/modules.config index 5b8d560be75de6ec74302f6e4e3149f316d25913..ff144483df14fede36406729bbfb8541139a9f82 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -7,6 +7,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] + //********************************************** // PREPARE_GENOME withName: 'BOWTIE2_BUILD' { publishDir = [ @@ -32,18 +33,49 @@ process { ] } + //******************************************* // HICPRO - withName:'BOWTIE2_ALIGN' { + withName: 'BOWTIE2_ALIGN' { + publishDir = [ + path: { "${params.outdir}/hicpro/mapping" }, + mode: 'copy', + enabled: params.save_aligned_intermediates + ] ext.prefix = { "${meta.id}_${meta.mates}" } ext.args = params.bwt2_opts_end2end ?: '' } - withName:'BOWTIE2_ALIGN_TRIMMED' { + withName: 'TRIM_READS' { + publishDir = [ + path: { "${params.outdir}/hicpro/mapping/" }, + mode: 'copy', + enabled: params.save_aligned_intermediates + ] + } + + withName: 'BOWTIE2_ALIGN_TRIMMED' { + publishDir = [ + path: { "${params.outdir}/hicpro/mapping" }, + mode: 'copy', + enabled: params.save_aligned_intermediates + ] ext.prefix = { "${meta.id}_${meta.mates}_trimmed" } ext.args = params.bwt2_opts_trimmed ?: '' } + withName: 'MERGE_BOWTIE2' { + publishDir = [ + path: { "${params.outdir}/hicpro/mapping" }, + mode: 'copy', + enabled: params.save_aligned_intermediates + ] + } + withName: 'COMBINE_MATES' { + publishDir = [ + path: { "${params.outdir}/hicpro/mapping" }, + mode: 'copy' + ] ext.args = [ "-t", params.keep_multi ? "--multi" : "", @@ -52,6 +84,10 @@ process { } withName: 'GET_VALID_INTERACTION' { + publishDir = [ + path: { "${params.outdir}/hicpro/valid_pairs" }, + mode: 'copy' + ] ext.args = [ params.min_cis_dist > 0 ? " -d ${params.min_cis_dist}" : '', params.min_insert_size > 0 ? " -s ${params.min_insert_size}" : '', @@ -61,4 +97,96 @@ process { params.save_interaction_bam ? " --sam" : '' ].join(' ').trim() } + + withName: 'MERGE_VALID_INTERACTION' { + publishDir = [ + path: { "${params.outdir}/hicpro/valid_pairs" }, + mode: 'copy' + ] + } + + withName: 'HICPRO2PAIRS' { + publishDir = [ + path: { "${params.outdir}/hicpro/valid_pairs/pairix/" }, + mode: 'copy' + ] + } + + withName: 'BUILD_CONTACT_MAPS' { + publishDir = [ + path: { "${params.outdir}/hicpro/matrix/raw" }, + mode: 'copy', + enabled: params.hicpro_maps + ] + } + + withName: 'ICE_NORMALIZATION' { + publishDir = [ + path: { "${params.outdir}/hicpro/matrix/iced" }, + mode: 'copy', + enabled: params.hicpro_maps + ] + } + + //***************************************** + // COOLER + + withName: 'COOLER_MAKEBINS' { + publishDir = [ + path: { "${params.outdir}/contact_maps/bins/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + mode: 'copy' + ] + } + + withName: 'COOLER_CLOAD' { + publishDir = [ + path: { "${params.outdir}/contact_maps/cool/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + mode: 'copy', + enabled : params.save_raw_maps + ] + ext.args = "pairs -c1 2 -p1 3 -c2 4 -p2 5" + } + + withName: 'COOLER_BALANCE' { + publishDir = [ + path: { "${params.outdir}/contact_maps/cool/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + mode: 'copy' + ] + ext.args = '--force' + } + + withName: 'COOLER_DUMP' { + publishDir = [ + enabled: false + ] + ext.args = "--one-based-ids --balanced --na-rep 0" + } + + withName:'SPLIT_COOLER_DUMP' { + publishDir = [ + [ + path: { "${params.outdir}/contact_maps/txt/" }, + mode: 'copy', + pattern: "*_raw.txt", + enabled: params.save_raw_maps + ], + [ + path: { "${params.outdir}/contact_maps/txt/" }, + mode: 'copy', + pattern: "*_norm.txt" + ] + ] + } + + withName: 'COOLER_ZOOMIFY' { + publishDir = [ + path: { "${params.outdir}/contact_maps/cool/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + mode: 'copy' + ] + ext.args = "--balance" + } } diff --git a/conf/test.config b/conf/test.config index 0e7285ea8259aee9c7360942536f29a19f04d40d..faebd8bedf252ccb8b46e54ae8338aaf304ba682 100644 --- a/conf/test.config +++ b/conf/test.config @@ -31,11 +31,11 @@ params { min_insert_size = 100 max_insert_size = 600 - bin_size = '1000' + bin_size = '2000,1000' res_dist_decay = '1000' res_tads = '1000' tads_caller = 'insulation,hicexplorer' - res_compartments = '1000' + res_compartments = '2000' // Ignore `--input` as otherwise the parameter validation will throw an error schema_ignore_params = 'genomes,digest,input_paths,input' diff --git a/modules.json b/modules.json index bba8150db8e631c68a04dadd2f8c007991da405a..b7bfabedbf2ff583704d754b3432519ab329d941 100644 --- a/modules.json +++ b/modules.json @@ -9,16 +9,19 @@ "bowtie2/build": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, - "fastqc": { + "cooler/cload": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, - "multiqc": { + "cooler/dump": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "cooler/zoomify": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, - "samtools/merge": { + "fastqc": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, - "samtools/sort": { + "multiqc": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" } } diff --git a/modules/local/balance.nf b/modules/local/balance.nf new file mode 100644 index 0000000000000000000000000000000000000000..ce6e3bb5951edd34aab6aa4aa743ec2ee92a837f --- /dev/null +++ b/modules/local/balance.nf @@ -0,0 +1,31 @@ +process COOLER_BALANCE { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? "bioconda::cooler=0.8.11" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0' : + 'quay.io/biocontainers/cooler:0.8.11--pyh3252c3a_0' }" + + input: + tuple val(meta), val(resolution), path(cool) + + output: + tuple val(meta), val(resolution), path("*_norm.cool"), emit: cool + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + """ + cp ${cool} ${cool.baseName}_norm.cool + cooler balance \\ + $args \\ + -p ${task.cpus} \\ + ${cool.baseName}_norm.cool + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') + END_VERSIONS + """ +} diff --git a/modules/local/balance.nf~ b/modules/local/balance.nf~ new file mode 100644 index 0000000000000000000000000000000000000000..728a04e10a5816ed6b1f76768e9a16c54cfb4543 --- /dev/null +++ b/modules/local/balance.nf~ @@ -0,0 +1,31 @@ +process COOLER_BALANCE { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? "bioconda::cooler=0.8.11" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0' : + 'quay.io/biocontainers/cooler:0.8.11--pyh3252c3a_0' }" + + input: + tuple val(meta), path(cool) + + output: + tuple val(meta), path("*.cool"), emit: cool + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + """ + cp ${cool} ${cool.baseName}_norm.cool + cooler balance \\ + $args \\ + -p ${task.cpus} \\ + ${cool.baseName}_norm.cool + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') + END_VERSIONS + """ +} diff --git a/modules/local/hicpro/combine_mates.nf~ b/modules/local/hicpro/combine_mates.nf~ deleted file mode 100644 index d4ea96d37f6da7667845e607fb786debdcae9a1e..0000000000000000000000000000000000000000 --- a/modules/local/hicpro/combine_mates.nf~ +++ /dev/null @@ -1,18 +0,0 @@ -process COMBINE_MATES { - tag "$prefix" - label 'process_low' - - input: - tuple val(meta), path(bam) - - output: - tuple val(meta), path("*bwt2pairs.bam"), emit:bam - tuple val(meta), path("*.pairstat"), emit:stats - - script: - prefix = meta.id - def args = task.ext.args ?: '' - """ - mergeSAM.py -f ${bam[0]} -r ${bam[1]} -o ${prefix}_bwt2pairs.bam ${args} - """ -} diff --git a/modules/local/hicpro/hicpro2pairs.nf b/modules/local/hicpro/hicpro2pairs.nf index 02abfb857f2d3103024e2fed0c20081ccdfdf9dd..698225da80c9474ef898c8c3238c3173eccab40b 100644 --- a/modules/local/hicpro/hicpro2pairs.nf +++ b/modules/local/hicpro/hicpro2pairs.nf @@ -7,12 +7,14 @@ process HICPRO2PAIRS { path chrsize output: - tuple val(meta), path("*.txt.gz"), emit: pairs + tuple val(meta), path("*.pairs.gz"), path("*.pairs.gz.px2"), emit: pairs script: + prefix = "${meta.id}" """ - ## chr/pos/strand/chr/pos/strand - awk '{OFS="\t";print \$1,\$2,\$3,\$5,\$6,\$4,\$7}' $vpairs > contacts.txt - gzip contacts.txt + ##columns: readID chr1 pos1 chr2 pos2 strand1 strand2 + awk '{OFS="\t";print \$1,\$2,\$3,\$5,\$6,\$4,\$7}' $vpairs > ${prefix}_contacts.pairs + sort -k2,2 -k4,4 -k3,3n -k5,5n ${prefix}_contacts.pairs | bgzip -c > ${prefix}_contacts.pairs.gz + pairix -f ${prefix}_contacts.pairs.gz """ } diff --git a/modules/local/makebins.nf b/modules/local/makebins.nf new file mode 100644 index 0000000000000000000000000000000000000000..0dc4dee4140b89c4a2eb07ee666f9ad936725a9a --- /dev/null +++ b/modules/local/makebins.nf @@ -0,0 +1,30 @@ +process COOLER_MAKEBINS { + tag "${cool_bin}" + label 'process_low' + + conda (params.enable_conda ? "bioconda::cooler=0.8.11" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0' : + 'quay.io/biocontainers/cooler:0.8.11--pyh3252c3a_0' }" + + input: + tuple path(chromsizes), val(cool_bin) + + output: + path ("*.bed") , emit: bed + path "versions.yml", emit: versions + + script: + def args = task.ext.args ?: '' + """ + cooler makebins \\ + $args \\ + ${chromsizes} \\ + ${cool_bin} > cooler_bins_${cool_bin}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') + END_VERSIONS + """ +} diff --git a/modules/local/makebins.nf~ b/modules/local/makebins.nf~ new file mode 100644 index 0000000000000000000000000000000000000000..4d3f307d404ca52338e3e9b429d6a4261983e53a --- /dev/null +++ b/modules/local/makebins.nf~ @@ -0,0 +1,30 @@ +process COOLER_MAKEBINS { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::cooler=0.8.11" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0' : + 'quay.io/biocontainers/cooler:0.8.11--pyh3252c3a_0' }" + + input: + tuple path(chromsizes), val(cool_bin) + + output: + path ("*.bed") , emit: bed + path "versions.yml", emit: versions + + script: + def args = task.ext.args ?: '' + """ + cooler makebins \\ + $args \\ + ${chromsizes} \\ + ${cool_bin} > cooler_bins_${cool_bin}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') + END_VERSIONS + """ +} diff --git a/modules/local/split_cooler_dump.nf b/modules/local/split_cooler_dump.nf new file mode 100644 index 0000000000000000000000000000000000000000..3ce1aaaed39fce9a1e573f439a802baec5925a25 --- /dev/null +++ b/modules/local/split_cooler_dump.nf @@ -0,0 +1,24 @@ +process SPLIT_COOLER_DUMP { + tag "$meta.id" + label 'process_low' + + input: + tuple val(meta), path(bedpe) + + output: + path "*.txt", emit: matrix + path "versions.yml", emit: versions + + script: + def args = task.ext.args ?: '' + prefix = bedpe.toString() - ~/(\_norm)?.bedpe$/ + """ + cat ${bedpe} | awk '{OFS="\t"; print \$1,\$2,\$3}' > ${prefix}_raw.txt + cat ${bedpe} | awk '{OFS="\t"; print \$1,\$2,\$4}' > ${prefix}_norm.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cooler: \$(awk --version | head -1 | cut -f1 -d, | sed -e 's/GNU Awk //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/cooler/cload/main.nf b/modules/nf-core/modules/cooler/cload/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..8602fb44862bceb4d6f398404318a10c08c99ad6 --- /dev/null +++ b/modules/nf-core/modules/cooler/cload/main.nf @@ -0,0 +1,39 @@ +process COOLER_CLOAD { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? "bioconda::cooler=0.8.11" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0' : + 'quay.io/biocontainers/cooler:0.8.11--pyh3252c3a_0' }" + + input: + tuple val(meta), path(pairs), path(index), val(cool_bin) + path chromsizes + + output: + tuple val(meta), val(cool_bin), path("*.cool"), emit: cool + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def nproc = args.contains('pairix') || args.contains('tabix')? "--nproc $task.cpus" : '' + + """ + cooler cload \\ + $args \\ + $nproc \\ + ${chromsizes}:${cool_bin} \\ + $pairs \\ + ${prefix}.${cool_bin}.cool + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/cooler/cload/main.nf~ b/modules/nf-core/modules/cooler/cload/main.nf~ new file mode 100644 index 0000000000000000000000000000000000000000..52964b8dd931d30606bb69a122d1529c15470279 --- /dev/null +++ b/modules/nf-core/modules/cooler/cload/main.nf~ @@ -0,0 +1,40 @@ +process COOLER_CLOAD { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? "bioconda::cooler=0.8.11" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0' : + 'quay.io/biocontainers/cooler:0.8.11--pyh3252c3a_0' }" + + input: + tuple val(meta), path(pairs), path(index) + val cool_bin + path chromsizes + + output: + tuple val(meta), val(cool_bin), path("*.cool"), emit: cool + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def nproc = args.contains('pairix') || args.contains('tabix')? "--nproc $task.cpus" : '' + + """ + cooler cload \\ + $args \\ + $nproc \\ + ${chromsizes}:${cool_bin} \\ + $pairs \\ + ${prefix}.${cool_bin}.cool + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/cooler/cload/meta.yml b/modules/nf-core/modules/cooler/cload/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..ddb0443b23f7795c3db92c252e227babb10a7aeb --- /dev/null +++ b/modules/nf-core/modules/cooler/cload/meta.yml @@ -0,0 +1,52 @@ +name: cooler_cload +description: Create a cooler from genomic pairs and bins +keywords: + - cool +tools: + - cooler: + description: Sparse binary format for genomic interaction matrices + homepage: https://cooler.readthedocs.io/en/latest/index.html + documentation: https://cooler.readthedocs.io/en/latest/index.html + tool_dev_url: https://github.com/open2c/cooler + doi: "10.1093/bioinformatics/btz540" + licence: ["BSD-3-clause"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - pairs: + type: file + description: Path to contacts (i.e. read pairs) file. + - index: + type: file + description: Path to index file of the contacts. + - cool_bin: + type: value + description: Bins size in bp + - chromsizes: + type: file + description: Path to a chromsizes file. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - version: + type: file + description: File containing software version + pattern: "versions.yml" + - cool: + type: file + description: Output COOL file path + pattern: "*.cool" + - cool_bin: + type: value + description: Bins size in bp + +authors: + - "@jianhong" diff --git a/modules/nf-core/modules/cooler/dump/main.nf b/modules/nf-core/modules/cooler/dump/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..5aa98a1723013a0cf19ef409e8ec7e47e2d3b46d --- /dev/null +++ b/modules/nf-core/modules/cooler/dump/main.nf @@ -0,0 +1,35 @@ +process COOLER_DUMP { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? "bioconda::cooler=0.8.11" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0' : + 'quay.io/biocontainers/cooler:0.8.11--pyh3252c3a_0' }" + + input: + tuple val(meta), val(resolution), path(cool) + + output: + tuple val(meta), path("*.bedpe"), emit: bedpe + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${cool.baseName}" + def suffix = resolution ? "::/resolutions/$resolution" : "" + """ + cooler dump \\ + $args \\ + -o ${prefix}.bedpe \\ + $cool$suffix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/cooler/dump/main.nf~ b/modules/nf-core/modules/cooler/dump/main.nf~ new file mode 100644 index 0000000000000000000000000000000000000000..ffa36a1aaef96618328ecf280bebdab739625694 --- /dev/null +++ b/modules/nf-core/modules/cooler/dump/main.nf~ @@ -0,0 +1,35 @@ +process COOLER_DUMP { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? "bioconda::cooler=0.8.11" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0' : + 'quay.io/biocontainers/cooler:0.8.11--pyh3252c3a_0' }" + + input: + tuple val(meta), val(resolution), path(cool) + + output: + tuple val(meta), path("*.bedpe"), emit: bedpe + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${cool.baseName}" + def suffix = resolution ? "::$resolution" : "" + """ + cooler dump \\ + $args \\ + -o ${prefix}.bedpe \\ + $cool$suffix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/cooler/dump/meta.yml b/modules/nf-core/modules/cooler/dump/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..fc12cdf3f5b4faf8107dd68852240ba25aeade28 --- /dev/null +++ b/modules/nf-core/modules/cooler/dump/meta.yml @@ -0,0 +1,44 @@ +name: cooler_dump +description: Dump a cooler’s data to a text stream. +keywords: + - dump +tools: + - cooler: + description: Sparse binary format for genomic interaction matrices + homepage: https://cooler.readthedocs.io/en/latest/index.html + documentation: https://cooler.readthedocs.io/en/latest/index.html + tool_dev_url: https://github.com/open2c/cooler + doi: "10.1093/bioinformatics/btz540" + licence: ["BSD-3-Clause"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - cool: + type: file + description: Path to COOL file + pattern: "*.{cool,mcool}" + - resolution: + type: value + description: Resolution + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bedpe: + type: file + description: Output text file + pattern: "*.bedpe" + +authors: + - "@jianhong" diff --git a/modules/nf-core/modules/cooler/zoomify/main.nf b/modules/nf-core/modules/cooler/zoomify/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..942282c0d6a8ab161502a375863185579bae9f47 --- /dev/null +++ b/modules/nf-core/modules/cooler/zoomify/main.nf @@ -0,0 +1,35 @@ +process COOLER_ZOOMIFY { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? "bioconda::cooler=0.8.11" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0' : + 'quay.io/biocontainers/cooler:0.8.11--pyh3252c3a_0' }" + + input: + tuple val(meta), path(cool) + + output: + tuple val(meta), path("*.mcool"), emit: mcool + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + cooler zoomify \\ + $args \\ + -n $task.cpus \\ + -o ${prefix}.mcool \\ + $cool + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/cooler/zoomify/meta.yml b/modules/nf-core/modules/cooler/zoomify/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..d9e12b0587d3500e9592402bd5788615cad9d17a --- /dev/null +++ b/modules/nf-core/modules/cooler/zoomify/meta.yml @@ -0,0 +1,41 @@ +name: cooler_zoomify +description: Generate a multi-resolution cooler file by coarsening +keywords: + - mcool +tools: + - cooler: + description: Sparse binary format for genomic interaction matrices + homepage: https://cooler.readthedocs.io/en/latest/index.html + documentation: https://cooler.readthedocs.io/en/latest/index.html + tool_dev_url: https://github.com/open2c/cooler + doi: "10.1093/bioinformatics/btz540" + licence: ["BSD-3-clause"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - cool: + type: file + description: Path to COOL file + pattern: "*.{cool,mcool}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - mcool: + type: file + description: Output mcool file + pattern: "*.mcool" + +authors: + - "@jianhong" diff --git a/modules/nf-core/modules/samtools/merge/main.nf b/modules/nf-core/modules/samtools/merge/main.nf deleted file mode 100644 index be6fe32ebc5945652c80c25bea2ce8fb39c14455..0000000000000000000000000000000000000000 --- a/modules/nf-core/modules/samtools/merge/main.nf +++ /dev/null @@ -1,41 +0,0 @@ -process SAMTOOLS_MERGE { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" - - input: - tuple val(meta), path(input_files) - path fasta - - output: - tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam - tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - def file_type = input_files[0].getExtension() - def reference = fasta ? "--reference ${fasta}" : "" - """ - samtools \\ - merge \\ - --threads ${task.cpus-1} \\ - $args \\ - ${reference} \\ - ${prefix}.${file_type} \\ - $input_files - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/samtools/merge/meta.yml b/modules/nf-core/modules/samtools/merge/meta.yml deleted file mode 100644 index fb78e55cd3903c6b8717a464357b8d3233560d7e..0000000000000000000000000000000000000000 --- a/modules/nf-core/modules/samtools/merge/meta.yml +++ /dev/null @@ -1,54 +0,0 @@ -name: samtools_merge -description: Merge BAM or CRAM file -keywords: - - merge - - bam - - sam - - cram -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input_files: - type: file - description: BAM/CRAM file - pattern: "*.{bam,cram,sam}" - - fasta: - type: optional file - description: Reference file the CRAM was created with - pattern: "*.{fasta,fa}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM file - pattern: "*.{bam}" - - cram: - type: file - description: CRAM file - pattern: "*.{cram}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@yuukiiwa " - - "@maxulysse" - - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/samtools/sort/main.nf b/modules/nf-core/modules/samtools/sort/main.nf deleted file mode 100644 index 0f2237cc1816688b332f319f2f20adf1283a042a..0000000000000000000000000000000000000000 --- a/modules/nf-core/modules/samtools/sort/main.nf +++ /dev/null @@ -1,31 +0,0 @@ -process SAMTOOLS_SORT { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" - - input: - tuple val(meta), path(bam) - - output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - """ - samtools sort $args -@ $task.cpus -o ${prefix}.bam -T $prefix $bam - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/samtools/sort/meta.yml b/modules/nf-core/modules/samtools/sort/meta.yml deleted file mode 100644 index a820c55a36004ef4f83d0955f05df5116ffb381a..0000000000000000000000000000000000000000 --- a/modules/nf-core/modules/samtools/sort/meta.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: samtools_sort -description: Sort SAM/BAM/CRAM file -keywords: - - sort - - bam - - sam - - cram -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@ewels" diff --git a/nextflow.config b/nextflow.config index 4a7d9d6e42e3f1d21fc6069af6c5f5e1b89bf95e..586c6b4ff1a5cda58efb1f5539d409ccb49b3297 100644 --- a/nextflow.config +++ b/nextflow.config @@ -66,6 +66,7 @@ params { min_cis_dist = 0 // Contact maps + save_raw_maps = false bin_size = '1000000' res_zoomify = '5000' hicpro_maps = false diff --git a/subworkflows/local/cooler.nf b/subworkflows/local/cooler.nf index e4a908d190e50b2f7afcb3271351e5820c47a56d..e24aed4eddb1063285c0df44fba6f0000d5245d0 100644 --- a/subworkflows/local/cooler.nf +++ b/subworkflows/local/cooler.nf @@ -1,17 +1,76 @@ -params.options = [:] +/* + * COOLER MAIN WORKFLOW + * INPUT : pair text file with the list of valid interaction + * OUTPUT : cooler files + */ -include { COOLER_RAW } from '../../modules/local/cooler_raw' addParams( options: params.options ) -include { COOLER_BALANCE } from '../../modules/local/cooler_balance' addParams( options: params.options ) -include { COOLER_ZOOMIFY } from '../../modules/local/cooler_zoomify' addParams( options: params.options ) +include { COOLER_CLOAD } from '../../modules/nf-core/modules/cooler/cload/main' +include { COOLER_DUMP } from '../../modules/nf-core/modules/cooler/dump/main' +include { COOLER_ZOOMIFY } from '../../modules/nf-core/modules/cooler/zoomify/main' + +include { COOLER_BALANCE } from '../../modules/local/balance' +include { SPLIT_COOLER_DUMP } from '../../modules/local/split_cooler_dump' +include { COOLER_MAKEBINS } from '../../modules/local/makebins' workflow COOLER { - take: + take: + pairs // [meta, pairs, index] + chromsize + cool_bins + main: + ch_versions = Channel.empty() - main: - + //***************************************** + // EXPORT BINS - emit: + COOLER_MAKEBINS( + chromsize.combine(cool_bins) + ) + //***************************************** + // BUILD COOL FILE PER RESOLUTION + // [meta, pairs, resolution] + + COOLER_CLOAD( + pairs.combine(cool_bins), + chromsize.collect() + ) + + COOLER_BALANCE( + COOLER_CLOAD.out.cool + ) + + // Zoomify at minimum bin resolution + COOLER_CLOAD.out.cool + .combine(cool_bins.min()) + .filter{ it [1] == it[3] } + .map{it->[it[0], it[2]]} + .set{ch_cool_zoomify} + + COOLER_ZOOMIFY( + ch_cool_zoomify + ) + + //***************************************** + // DUMP DATA + // [meta, cool] / resolution + + COOLER_DUMP( + COOLER_BALANCE.out.cool.map{[it[0], "", it[2]]} + ) + + //COOLER_DUMP( + // COOLER_ZOOMIFY.out.mcool.combine(cool_bins).map{it->[it[0], it[2], it[1]]} + //) + + SPLIT_COOLER_DUMP( + COOLER_DUMP.out.bedpe + ) + + emit: + versions = ch_versions + cool = COOLER_BALANCE.out.cool + mcool = COOLER_ZOOMIFY.out.mcool } \ No newline at end of file diff --git a/subworkflows/local/hicpro.nf b/subworkflows/local/hicpro.nf index 00359dec181d173b7441ce458d7e7023bc17948a..7ea1bcceb646c83832706dd6e1c415ddcfe2d2e0 100644 --- a/subworkflows/local/hicpro.nf +++ b/subworkflows/local/hicpro.nf @@ -49,14 +49,14 @@ workflow HICPRO { ) //merge stats + // TODO - - if (!params.hicpro_maps){ - + if (params.hicpro_maps){ + //build_contact_maps BUILD_CONTACT_MAPS( MERGE_VALID_INTERACTION.out.valid_pairs.combine(map_res), - chrsize + chrsize.collect() ) // run_ice diff --git a/subworkflows/local/hicpro_mapping.nf~ b/subworkflows/local/hicpro_mapping.nf~ deleted file mode 100644 index 249f656096094b3df237b03aa4dbfd00aa1a6dea..0000000000000000000000000000000000000000 --- a/subworkflows/local/hicpro_mapping.nf~ +++ /dev/null @@ -1,102 +0,0 @@ -include { BOWTIE2_ALIGN } from '../../modules/nf-core/modules/bowtie2/align/main' -include { HICPRO_TRIM_READS } from '../../modules/local/hicpro/trim_reads' -include { BOWTIE2_ALIGN as BOWTIE2_ALIGN_TRIMMED } from '../../modules/nf-core/modules/bowtie2/align/main' -include { HICPRO_MERGE_BOWTIE2 } from '../../modules/local/hicpro/bowtie2_merge' -include { HICPRO_COMBINE_MATES} from '../../modules/local/hicpro/combine_mates' - -//include { BOWTIE2_ON_TRIMED_READS } from '../../modules/local/bowtie2_on_trimmed_reads' addParams( options: params.options ) -//include { BOWTIE2_MERGE_MAPPING_STEPS } from '../../modules/local/bowtie2_merge_mapping_steps' addParams( options: params.options ) -//include { DNASE_MAPPING_STATS } from '../../modules/local/dnase_mapping_stats' addParams( options: params.options ) -//include { COMBINE_MATES } from '../../modules/local/combine_mates' addParams( options: params.options ) -//include { GET_VALID_INTERACTION } from '../../modules/local/get_valid_interaction' addParams( options: params.options ) -//include { GET_VALID_INTERACTION_DNASE } from '../../modules/local/get_valid_interaction_dnase' addParams( options: params.options ) -//include { REMOVE_DUPLICATES } from '../../modules/local/remove_duplicates' addParams( options: params.options ) -//include { MERGE_STATS } from '../../modules/local/merge_stats' addParams( options: params.options ) -//include { BUILD_CONTACT_MAPS } from '../../modules/local/build_contact_maps' addParams( options: params.options ) -//include { RUN_ICE } from '../../modules/local/run_ice' addParams( options: params.options ) -//include { CONVERTS_TO_PAIRS } from '../../modules/local/convert_to_pairs' addParams( options: params.options ) - -// Paired-end to Single-end -def pairToSingle(row, mates) { - def meta = [:] - meta.id = row[0].id - meta.single_end = true - meta.mates = mates - def array = [] - if (mates == "R1") { - return [meta, [ row[1][0]] ] - }else if (mates == "R2"){ - return [meta, [ row[1][1]] ] - } -} - -def singleToPair(row){ - def meta = [:] - meta.id = row[0].id - meta.single_end = false - return [ meta, row[1] ] -} - -workflow HICPRO_MAPPING { - - take: - reads // [meta, read1, read2] - index - ligation_site - - main: - ch_versions = Channel.empty() - - // Align each mates separetly - ch_reads_r1 = reads.map{it -> pairToSingle(it,"R1")} - ch_reads_r2 = reads.map{pairToSingle(it,"R2")} - ch_reads = ch_reads_r1.concat(ch_reads_r2) - - // bowtie2 - BOWTIE2_ALIGN( - ch_reads, - index.collect(), - Channel.value(true).collect() - ) - ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions) - - // trim reads - HICPRO_TRIM_READS( - BOWTIE2_ALIGN.out.fastq, - ligation_site.collect() - ) - ch_versions = ch_versions.mix(HICPRO_TRIM_READS.out.versions) - - // bowtie2 on trimmed reads - BOWTIE2_ALIGN_TRIMMED( - HICPRO_TRIM_READS.out.fastq, - index.collect(), - Channel.value(false).collect() - ) - ch_versions = ch_versions.mix(BOWTIE2_ALIGN_TRIMMED.out.versions) - - // Merge the two mapping steps - BOWTIE2_ALIGN.out.bam - .combine(BOWTIE2_ALIGN_TRIMMED.out.bam, by:[0]) - .view() - .set { ch_bowtie2_align} - - HICPRO_MERGE_BOWTIE2( - ch_bowtie2_align - ) - - // Combine mates - HICPRO_MERGE_BOWTIE2.out.bam - .map { singleToPair(it) } - .groupTuple() - .view() - .set {ch_bams} - - HICPRO_COMBINE_MATES ( - ch_bams - ) - - emit: - versions = ch_versions - bam = HICPRO_COMBINE_MATES.out.bam -} diff --git a/subworkflows/local/prepare_genome.nf~ b/subworkflows/local/prepare_genome.nf~ deleted file mode 100644 index e989f87bda6918747a76e222c25af320f332b34b..0000000000000000000000000000000000000000 --- a/subworkflows/local/prepare_genome.nf~ +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Prepare Annotation Genome - */ - -include { BOWTIE2_BUILD } from '../../modules/nf-core/modules/bowtie2/build/main' -include { GET_CHROMSIZE } from '../../modules/local/get_chromsize' -include { GET_RESTRICTION_FRAGMENTS } from '../../modules/local/get_restriction_fragments' - -workflow PREPARE_GENOME { - - take: - fasta - restriction_site - - main: - ch_versions = Channel.empty() - - //*************************************** - // Bowtie Index - if(!params.bwt2_index){ - BOWTIE2_BUILD ( - fasta - ) - ch_index = BOWTIE2_BUILD.out.index - }else{ - Channel.fromPath( params.bwt2_index , checkIfExists: true) - .ifEmpty { exit 1, "Genome index: Provided index not found: ${params.bwt2_index}" } - .into { ch_index } - } - - //*************************************** - // Chromosome size - if(!params.chromosome_size){ - GET_CHROMSIZE( - fasta - ) - ch_chromsize = GET_CHROMSIZE.out.results - }else{ - Channel.fromPath( params.chromosome_size , checkIfExists: true) - .into {ch_chromsize} - } - - //*************************************** - // Restriction fragments - if(!params.restriction_fragments && !params.dnase){ - GET_RESTRICTION_FRAGMENTS( - fasta, - restriction_site - ) - ch_resfrag = GET_RESTRICTION_FRAGMENTS.out.results - }else{ - Channel.fromPath( params.restriction_fragments, checkIfExists: true ) - .set {ch_resfrag} - } - - emit: - index = ch_index - chromosome_size = ch_chromsize - res_frag = ch_resfrag -} diff --git a/workflows/hic.nf b/workflows/hic.nf index e50af0f3089594eb3c59066d38769d847c6ee321..a65a385b05b26d7069e2b99d5d6a29864f2fec82 100644 --- a/workflows/hic.nf +++ b/workflows/hic.nf @@ -75,7 +75,6 @@ if (params.res_compartments && !params.skip_compartments){ } ch_map_res = ch_map_res.unique() - /* ======================================================================================== CONFIG FILES @@ -106,7 +105,7 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multi include { INPUT_CHECK } from '../subworkflows/local/input_check' include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' include { HICPRO } from '../subworkflows/local/hicpro' -//include { COOLER } from '../subworkflows/local/cooler' +include { COOLER } from '../subworkflows/local/cooler' //include { COMPARTMENTS } from '../subworkflows/local/compartments' //include { TADS } from '../subworkflows/local/tads' @@ -182,6 +181,14 @@ workflow HIC { ch_map_res ) + // + // SUB-WORKFLOW: COOLER + // + COOLER ( + HICPRO.out.pairs, + PREPARE_GENOME.out.chromosome_size, + ch_map_res + ) // // MODULE: MultiQC