From ce06ca062bd2b101eb604f53a63767676ece7c82 Mon Sep 17 00:00:00 2001 From: Mia Croiset <mia.croiset@ens-lyon.fr> Date: Wed, 20 Sep 2023 10:36:47 +0200 Subject: [PATCH] correct picard pcr filter for hicpro --- modules/local/hicpro/hicpro2pairs.nf | 6 ++++-- subworkflows/local/filter_pcr_dup.nf | 9 +++------ subworkflows/local/hicpro.nf | 15 -------------- subworkflows/local/hicpro_mapping.nf | 30 +++++++++++++++++++++++++++- 4 files changed, 36 insertions(+), 24 deletions(-) diff --git a/modules/local/hicpro/hicpro2pairs.nf b/modules/local/hicpro/hicpro2pairs.nf index eb9b86b..bdb111b 100644 --- a/modules/local/hicpro/hicpro2pairs.nf +++ b/modules/local/hicpro/hicpro2pairs.nf @@ -19,8 +19,10 @@ process HICPRO2PAIRS { prefix = "${meta.id}" """ ##columns: readID chr1 pos1 chr2 pos2 strand1 strand2 - awk '{OFS="\t";print \$1,\$2,\$3,\$5,\$6,\$4,\$7}' $vpairs | bgzip -c > ${prefix}_contacts.pairs.gz - ##sort -k2,2 -k4,4 -k3,3n -k5,5n ${prefix}_contacts.pairs | bgzip -c > ${prefix}_contacts.pairs.gz + ## bgzip -c > ${prefix}_contacts.pairs.gz + awk '{OFS="\t";print \$1,\$2,\$3,\$5,\$6,\$4,\$7}' $vpairs |\\ + + sort -k2,2 -k4,4 -k3,3n -k5,5n | bgzip -c > ${prefix}_contacts.pairs.gz pairix -f ${prefix}_contacts.pairs.gz cat <<-END_VERSIONS > versions.yml diff --git a/subworkflows/local/filter_pcr_dup.nf b/subworkflows/local/filter_pcr_dup.nf index 344c1c9..cd5eece 100644 --- a/subworkflows/local/filter_pcr_dup.nf +++ b/subworkflows/local/filter_pcr_dup.nf @@ -32,18 +32,15 @@ workflow FILTER_PCR_DUP { ) SAMTOOLS_SORT_N.out.bam.set{ ch_bam } - if (params.workflow == "hicstuff"){ - FILTER_PAIR( + + FILTER_PAIR( ch_bam.combine(ch_bam) .map { meta1, bam1, meta2, bam2 -> meta1.id == meta2.id && meta1.chunk == meta2.chunk && meta1.mates == "R1" && meta2.mates == "R2" ? [ meta1, bam1, meta2, bam2 ] : null }) FILTER_PAIR.out.bam.set{ new_ch_bam } - } - else{ - ch_bam.set{ new_ch_bam } - } + emit: diff --git a/subworkflows/local/hicpro.nf b/subworkflows/local/hicpro.nf index cf1915c..2553e22 100644 --- a/subworkflows/local/hicpro.nf +++ b/subworkflows/local/hicpro.nf @@ -46,22 +46,7 @@ workflow HICPRO { HICPRO_MAPPING.out.bam.set{ ch_bam } - //*************************************** - // FILTER PCR DUPLICATES - if (params.filter_pcr_picard && !params.keep_dups){ - error "Error: cannot filter PCR duplicates with both methods! If filter_pcr_picard is true, keep_dups should be true too" - } - else if (params.filter_pcr_picard){ - FILTER_PCR_DUP( - HICPRO_MAPPING.out.bam, - fasta, - index - ) - FILTER_PCR_DUP.out.bam - .set {ch_bam} - } - //TODO add versions //*************************************** // DIGESTION PROTOCOLS diff --git a/subworkflows/local/hicpro_mapping.nf b/subworkflows/local/hicpro_mapping.nf index 61a95a8..e544ebd 100644 --- a/subworkflows/local/hicpro_mapping.nf +++ b/subworkflows/local/hicpro_mapping.nf @@ -9,6 +9,7 @@ include { BOWTIE2_ALIGN as BOWTIE2_ALIGN_TRIMMED } from '../../modules/nf-core/b include { MERGE_BOWTIE2 } from '../../modules/local/hicpro/bowtie2_merge' include { COMBINE_MATES} from '../../modules/local/hicpro/combine_mates' include { MAPPING_STATS_DNASE } from '../../modules/local/hicpro/dnase_mapping_stats' +include { FILTER_PCR_DUP } from './filter_pcr_dup' // Paired-end to Single-end def pairToSingle(row, mates) { @@ -88,15 +89,42 @@ workflow HICPRO_MAPPING { MERGE_BOWTIE2( ch_bowtie2_align ) + MERGE_BOWTIE2.out.bam + .set{ ch_bowtie2_merged } + + ch_bowtie2_merged.view() + ch_versions = ch_versions.mix(MERGE_BOWTIE2.out.versions) ch_mapping_stats = MERGE_BOWTIE2.out.stats + //*************************************** + // FILTER PCR DUPLICATES + + if (params.filter_pcr_picard && !params.keep_dups){ + error "Error: cannot filter PCR duplicates with both methods! If filter_pcr_picard is true, keep_dups should be true too" + } + else if (params.filter_pcr_picard){ + FILTER_PCR_DUP( + ch_bowtie2_merged, + fasta, + index + ) + FILTER_PCR_DUP.out.bam + .flatten() + .buffer( size: 2 ) + .set {ch_bowtie2_merged} + } + //TODO add versions + ch_bowtie2_merged.view() + // Combine mates - MERGE_BOWTIE2.out.bam + ch_bowtie2_merged .map { singleToPair(it) } .groupTuple() .set {ch_bams} + ch_bams.view() + }else{ MAPPING_STATS_DNASE( -- GitLab