From c4a7c59207cd323f52cfceab0dc6f972c557ab7e Mon Sep 17 00:00:00 2001 From: Mia Croiset <mia.croiset@ens-lyon.fr> Date: Wed, 3 May 2023 10:29:12 +0200 Subject: [PATCH] Different matrix building --- conf/hicstuff.config | 14 ++++++++-- conf/modules.config | 9 ++++++ modules/local/hicexplorer/hicPlotMatrix.nf | 24 ++++++++++++++++ modules/local/hicstuff/bam2pairs.nf | 10 ++++--- .../local/hicstuff/build_matrix_cool_alt.nf | 28 +++++++++++++++++++ subworkflows/local/hicstuff_sub.nf | 17 +++++++++-- workflows/hicstuff.nf | 3 +- 7 files changed, 95 insertions(+), 10 deletions(-) create mode 100644 modules/local/hicexplorer/hicPlotMatrix.nf create mode 100644 modules/local/hicstuff/build_matrix_cool_alt.nf diff --git a/conf/hicstuff.config b/conf/hicstuff.config index e670077..0c0010d 100644 --- a/conf/hicstuff.config +++ b/conf/hicstuff.config @@ -130,6 +130,7 @@ params { hicstuff_valid_idx = 'valid_idx.pairs' hicstuff_min_qual = 30 hicstuff_matrix = 'abs_fragments_contacts_weighted.txt' + hicstuff_bin = 10000 } process { @@ -165,9 +166,9 @@ process { } withName: 'BAM2PAIRS' { + ext.pairs = params.hicstuff_valid_pairs + ext.index = params.hicstuff_valid_idx ext.args = { [ - " -o ${params.hicstuff_valid_pairs}", - " -x ${params.hicstuff_valid_idx}", " -q ${params.hicstuff_min_qual}", " -c ${params.hicstuff_circular}" ].join('').trim() } @@ -190,6 +191,15 @@ process { mode: 'copy' ] } + withName: 'BUILD_MATRIX_COOL_ALT' { + ext.outname = params.hicstuff_matrix + ext.bin = params.hicstuff_bin + ext.args = {"-c1 2 -p1 3 -p2 4 -c2 5"} + publishDir = [ + path: { "${params.outdir}/hicstuff/matrix" }, + mode: 'copy' + ] + } //********************************************** // PREPARE_GENOME diff --git a/conf/modules.config b/conf/modules.config index 096a860..dc39872 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -286,4 +286,13 @@ process { ext.args = '--correctForMultipleTesting fdr' ext.prefix = { "${cool.baseName}" } } + + withName: 'HIC_PLOT_MATRIX' { + publishDir = [ + path: { "${params.outdir}/contact_maps/cool/" }, + mode: 'copy' + ] + ext.args = '--log --perChromosome' + ext.prefix = { "${cool.baseName}" } + } } diff --git a/modules/local/hicexplorer/hicPlotMatrix.nf b/modules/local/hicexplorer/hicPlotMatrix.nf new file mode 100644 index 0000000..8041aa1 --- /dev/null +++ b/modules/local/hicexplorer/hicPlotMatrix.nf @@ -0,0 +1,24 @@ +process HIC_PLOT_MATRIX { + tag "$meta.id" + label 'process_single' + + conda "bioconda::hicexplorer=3.7.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hicexplorer:3.7.2--pyhdfd78af_1' : + 'quay.io/biocontainers/hicexplorer:3.7.2--pyhdfd78af_1' }" + + + input: + tuple val(meta), path(cool) + + output: + tuple val(meta), path("*.pdf"), emit: pdf + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + hicPlotMatrix -m ${cool} -o ${prefix}_cool_log.pdf ${args} + """ +} diff --git a/modules/local/hicstuff/bam2pairs.nf b/modules/local/hicstuff/bam2pairs.nf index c8a5a14..4b2b26e 100644 --- a/modules/local/hicstuff/bam2pairs.nf +++ b/modules/local/hicstuff/bam2pairs.nf @@ -1,5 +1,5 @@ process BAM2PAIRS { - tag "$meta1" + tag "$meta1.id" label 'process_high' conda "conda-forge::python=3.9 conda-forge::biopython=1.80 conda-forge::numpy=1.22.3 conda-forge::matplotlib=3.6.3 conda-forge::pandas=1.5.3" @@ -12,15 +12,17 @@ process BAM2PAIRS { tuple val(meta), path(fasta) output: - tuple val(meta1), path("valid.pairs"), emit: valid_pairs - tuple val(meta1), path("valid_idx.pairs"), emit: idx_pairs + tuple val(meta1), path("*_valid.pairs"), emit: valid_pairs + tuple val(meta1), path("*_valid_idx.pairs"), emit: idx_pairs script: def args = task.ext.args ?: '' + def pairs = task.ext.pairs ?: '' + def index = task.ext.index ?: '' """ - hicstuff_bam2pairs.py -b1 ${bam1} -b2 ${bam2} -i ${info_contigs} -e ${digestion} -f ${fasta} ${args} + hicstuff_bam2pairs.py -b1 ${bam1} -b2 ${bam2} -i ${info_contigs} -e ${digestion} -f ${fasta} -o ${meta1.id}_${pairs} -x ${meta1.id}_${index} ${args} """ } diff --git a/modules/local/hicstuff/build_matrix_cool_alt.nf b/modules/local/hicstuff/build_matrix_cool_alt.nf new file mode 100644 index 0000000..a52465d --- /dev/null +++ b/modules/local/hicstuff/build_matrix_cool_alt.nf @@ -0,0 +1,28 @@ +process BUILD_MATRIX_COOL_ALT { + tag "$meta1.id" + label 'process_single' + + conda "conda-forge::python=3.9 conda-forge::biopython=1.80 conda-forge::numpy=1.22.3 conda-forge::matplotlib=3.6.3 conda-forge::pandas=1.5.3" + container = "lbmc/hicstuff:3.1.3" + + input: + tuple val(meta), path(chromosome_size) + tuple val(meta1), path(idx_pairs) + + + output: + tuple val(meta), path("${meta1.id}_*.cool"), emit: matrix + + script: + + def args = task.ext.args ?: '' + def bin = task.ext.bin.toInteger() ?: '' + def outname = task.ext.outname ?: '' + def base = outname.replaceFirst(/.txt/,"") + + """ + cooler cload pairs ${args} ${chromosome_size}:${bin} ${idx_pairs} ${base}.cool + + mv ${base}.cool ${meta1.id}_${base}_${bin}_alt.cool + """ +} diff --git a/subworkflows/local/hicstuff_sub.nf b/subworkflows/local/hicstuff_sub.nf index 60f1f79..47a005c 100644 --- a/subworkflows/local/hicstuff_sub.nf +++ b/subworkflows/local/hicstuff_sub.nf @@ -4,6 +4,7 @@ include { FRAGMENT_ENZYME } from '../../modules/local/hicstuff/fragment_enzyme' include { BAM2PAIRS } from '../../modules/local/hicstuff/bam2pairs' include { BUILD_MATRIX } from '../../modules/local/hicstuff/build_matrix' include { BUILD_MATRIX_COOL } from '../../modules/local/hicstuff/build_matrix_cool' +include { BUILD_MATRIX_COOL_ALT } from '../../modules/local/hicstuff/build_matrix_cool_alt' // Paired-end to Single-end def pairToSingle(row, mates) { @@ -33,6 +34,7 @@ workflow HICSTUFF_SUB { ligation_site digestion fasta + chromosome_size main: @@ -55,20 +57,29 @@ workflow HICSTUFF_SUB { BOWTIE2_ALIGNMENT.out.bam.combine(BOWTIE2_ALIGNMENT.out.bam) .map { meta1, bam1, meta2, bam2 -> - meta1.id == meta2.id && meta1.mates == "R1" && meta2.mates == "R2" ? [ meta1, bam1, meta2, bam2 ] : null + meta1.id == meta2.id && meta1.chunk == meta2.chunk && meta1.mates == "R1" && meta2.mates == "R2" ? [ meta1, bam1, meta2, bam2 ] : null }, FRAGMENT_ENZYME.out.info_contigs.collect(), digestion, fasta.collect() ) + + //TODO rajouter filtres + distance law + filtres PCR en options + // pour les PCR filter, soit le hicstuff soit PICARD + BUILD_MATRIX( BAM2PAIRS.out.idx_pairs, - FRAGMENT_ENZYME.out.fragments_list + FRAGMENT_ENZYME.out.fragments_list.collect() ) BUILD_MATRIX_COOL( BAM2PAIRS.out.idx_pairs, - FRAGMENT_ENZYME.out.fragments_list + FRAGMENT_ENZYME.out.fragments_list.collect() + ) + + BUILD_MATRIX_COOL_ALT( + chromosome_size.collect(), + BAM2PAIRS.out.idx_pairs ) } diff --git a/workflows/hicstuff.nf b/workflows/hicstuff.nf index 4a69b68..8263bd3 100644 --- a/workflows/hicstuff.nf +++ b/workflows/hicstuff.nf @@ -39,6 +39,7 @@ workflow HICSTUFF { PREPARE_GENOME.out.index, ch_ligation_site, params.digestion, //str enzyme - ch_fasta + ch_fasta, + PREPARE_GENOME.out.chromosome_size ) } -- GitLab