diff --git a/conf/modules.config b/conf/modules.config index dc39872faf4da1cf93ef4ba05acfa9ea31ee1371..c289ffbf9f52ebf1230baa4f7fb87b25e81eb9bd 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -287,12 +287,44 @@ process { ext.prefix = { "${cool.baseName}" } } - withName: 'HIC_PLOT_MATRIX' { +// withName: 'HIC_PLOT_MATRIX' { +// publishDir = [ +// path: { "${params.outdir}/contact_maps/cool/" }, +// mode: 'copy' +// ] +// ext.args = '--log --perChromosome' +// ext.prefix = { "${cool.baseName}" } +// } + + //******************************** + // FILTER_PCR_DUP + + withName: 'PICARD_MARKDUPLICATES' { + ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}" } + ext.args = { [ + "--REMOVE_DUPLICATES true" + ].join('').trim() } publishDir = [ - path: { "${params.outdir}/contact_maps/cool/" }, + path: { "${params.outdir}/picard/bam" }, + mode: 'copy' + ] + } + + withName: 'SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}_sorted" } + ext.args = { [ + "" + ].join('').trim() } + } + + withName: 'SAMTOOLS_SORT_N' { + ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}_nsorted" } + ext.args = { [ + "-n" + ].join('').trim() } + publishDir = [ + path: { "${params.outdir}/picard/bam" }, mode: 'copy' ] - ext.args = '--log --perChromosome' - ext.prefix = { "${cool.baseName}" } } } diff --git a/nextflow.config b/nextflow.config index 83e09fb64cadef9db5592792d9f133b4c5d7520d..38a505162cecd2037adc739dc820b264810fd9c3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -28,8 +28,9 @@ params { save_aligned_intermediates = false bwt2_opts_end2end = '--very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder' bwt2_opts_trimmed = '--very-sensitive -L 20 --score-min L,-0.6,-0.2 --end-to-end --reorder' - keep_dups = false + keep_dups = true keep_multi = false + filter_pcr_picard = true min_mapq = 10 // Digestion Hi-C @@ -54,13 +55,13 @@ params { ligation_site='GATCGATC,GATCANTC,GANTGATC,GANTANTC' } } - + min_restriction_fragment_size = 0 max_restriction_fragment_size = 0 min_insert_size = 0 max_insert_size = 0 save_pairs_intermediates = false - + // Dnase Hi-C dnase = false min_cis_dist = 0 @@ -111,7 +112,7 @@ params { validate_params = true show_hidden_params = false schema_ignore_params = 'genomes,digest' - + // Config options custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" diff --git a/subworkflows/local/filter_pcr_dup.nf b/subworkflows/local/filter_pcr_dup.nf new file mode 100644 index 0000000000000000000000000000000000000000..7fb3fa55f498e66c9ebc6cbc386946ce6e3000e9 --- /dev/null +++ b/subworkflows/local/filter_pcr_dup.nf @@ -0,0 +1,46 @@ +/* + * FILTER_PCR_DUP + * Delete PCR duplicates reads with PICARD MarkDuplicates + */ + +include { SAMTOOLS_SORT } from '../../modules/nf-core/custom/samtools/sort/main' +include { SAMTOOLS_SORT_N } from '../../modules/nf-core/custom/samtools_n/sort/main' +include { FILTER_PAIR } from '../../modules/local/filterbam/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/custom/samtools/index/main' +include { PICARD_MARKDUPLICATES } from '../../modules/nf-core/custom/picard/markduplicates/main' + +workflow FILTER_PCR_DUP { + + take: + bam + fasta + index + + main: + + SAMTOOLS_SORT( + bam + ) + + PICARD_MARKDUPLICATES( + SAMTOOLS_SORT.out.bam, + fasta.collect(), + index.collect() + ) + + SAMTOOLS_SORT_N( + PICARD_MARKDUPLICATES.out.bam + ) + SAMTOOLS_SORT_N.out.bam.set{ ch_bam } + + FILTER_PAIR( + ch_bam.combine(ch_bam) + .map { + meta1, bam1, meta2, bam2 -> + meta1.id == meta2.id && meta1.chunk == meta2.chunk && meta1.mates == "R1" && meta2.mates == "R2" ? [ meta1, bam1, meta2, bam2 ] : null + }) + FILTER_PAIR.out.bam.set{ new_ch_bam } + + emit: + bam = new_ch_bam +} diff --git a/subworkflows/local/hicpro.nf b/subworkflows/local/hicpro.nf index 8b106a0820cf808501add6e5ad886cc726626107..0cea0ff0cf49d7aa84a49d9b042e349ca7610d6f 100644 --- a/subworkflows/local/hicpro.nf +++ b/subworkflows/local/hicpro.nf @@ -3,7 +3,7 @@ * MAIN WORKFLOW * From the raw sequencing reads to the list of valid interactions */ - + include { HICPRO_MAPPING } from './hicpro_mapping' include { GET_VALID_INTERACTION } from '../../modules/local/hicpro/get_valid_interaction' include { GET_VALID_INTERACTION_DNASE } from '../../modules/local/hicpro/get_valid_interaction_dnase' @@ -12,121 +12,142 @@ include { MERGE_STATS } from '../../modules/local/hicpro/merge_stats' include { HICPRO2PAIRS } from '../../modules/local/hicpro/hicpro2pairs' include { BUILD_CONTACT_MAPS } from '../../modules/local/hicpro/build_contact_maps' include { ICE_NORMALIZATION } from '../../modules/local/hicpro/run_ice' +include { FILTER_PCR_DUP } from './filter_pcr_dup' // Remove meta.chunks def removeChunks(row){ - meta = row[0].clone() - meta.remove('chunk') - return [meta, row[1]] + meta = row[0].clone() + meta.remove('chunk') + return [meta, row[1]] } workflow HICPRO { - take: - reads // [meta, read1, read2] - index // path - fragments // path - chrsize // path - ligation_site // value - map_res // values - - main: - ch_versions = Channel.empty() - - // Fastq to paired-end bam - HICPRO_MAPPING( - reads, - index, - ligation_site - ) - ch_versions = ch_versions.mix(HICPRO_MAPPING.out.versions) - - //*************************************** - // DIGESTION PROTOCOLS - - if (!params.dnase){ - GET_VALID_INTERACTION ( - HICPRO_MAPPING.out.bam, - fragments.collect() + take: + reads // [meta, read1, read2] + index // path + fragments // path + chrsize // path + ligation_site // value + map_res // values + fasta + + main: + ch_versions = Channel.empty() + + // Fastq to paired-end bam + HICPRO_MAPPING( + reads, + index, + ligation_site ) - ch_versions = ch_versions.mix(GET_VALID_INTERACTION.out.versions) - ch_valid_pairs = GET_VALID_INTERACTION.out.valid_pairs - ch_valid_stats = GET_VALID_INTERACTION.out.stats - - }else{ - - //**************************************** - // DNASE-LIKE PROTOCOLS - - GET_VALID_INTERACTION_DNASE ( - HICPRO_MAPPING.out.bam + ch_versions = ch_versions.mix(HICPRO_MAPPING.out.versions) + + //*************************************** + // FILTER PCR DUPLICATES + + if (params.filter_pcr_picard && !params.keep_dups){ + error "Error: cannot filter PCR duplicates with both methods! If filter_pcr_picard is true, keep_dups should be true too" + } + else if (params.filter_pcr_picard){ + FILTER_PCR_DUP( + HICPRO_MAPPING.out.bam, + fasta, + index + ) + FILTER_PCR_DUP.out.bam.set{ ch_bam } + } + else { + HICPRO_MAPPING.out.bam.set{ ch_bam } + } + + + //*************************************** + // DIGESTION PROTOCOLS + + if (!params.dnase){ + GET_VALID_INTERACTION ( + ch_bam, + fragments.collect() + ) + ch_versions = ch_versions.mix(GET_VALID_INTERACTION.out.versions) + ch_valid_pairs = GET_VALID_INTERACTION.out.valid_pairs + ch_valid_stats = GET_VALID_INTERACTION.out.stats + + }else{ + + //**************************************** + // DNASE-LIKE PROTOCOLS + + GET_VALID_INTERACTION_DNASE ( + ch_bam + ) + ch_versions = ch_versions.mix(GET_VALID_INTERACTION_DNASE.out.versions) + ch_valid_pairs = GET_VALID_INTERACTION_DNASE.out.valid_pairs + ch_valid_stats = GET_VALID_INTERACTION_DNASE.out.stats + } + + + //************************************** + // MERGE AND REMOVE DUPLICATES + + //if (params.split_fastq){ + ch_valid_pairs = ch_valid_pairs.map{ it -> removeChunks(it)}.groupTuple() + ch_hicpro_stats = HICPRO_MAPPING.out.mapstats.map{it->removeChunks(it)}.groupTuple() + .concat(HICPRO_MAPPING.out.pairstats.map{it->removeChunks(it)}.groupTuple(), + ch_valid_stats.map{it->removeChunks(it)}.groupTuple()) + //}else{ + // ch_hicpro_stats = HICPRO_MAPPING.out.mapstats.groupTuple() + // .concat(HICPRO_MAPPING.out.pairstats.groupTuple(), + // ch_valid_stats.groupTuple()) + //} + + MERGE_VALID_INTERACTION ( + ch_valid_pairs ) - ch_versions = ch_versions.mix(GET_VALID_INTERACTION_DNASE.out.versions) - ch_valid_pairs = GET_VALID_INTERACTION_DNASE.out.valid_pairs - ch_valid_stats = GET_VALID_INTERACTION_DNASE.out.stats - } - - - //************************************** - // MERGE AND REMOVE DUPLICATES - - //if (params.split_fastq){ - ch_valid_pairs = ch_valid_pairs.map{ it -> removeChunks(it)}.groupTuple() - ch_hicpro_stats = HICPRO_MAPPING.out.mapstats.map{it->removeChunks(it)}.groupTuple() - .concat(HICPRO_MAPPING.out.pairstats.map{it->removeChunks(it)}.groupTuple(), - ch_valid_stats.map{it->removeChunks(it)}.groupTuple()) - //}else{ - // ch_hicpro_stats = HICPRO_MAPPING.out.mapstats.groupTuple() - // .concat(HICPRO_MAPPING.out.pairstats.groupTuple(), - // ch_valid_stats.groupTuple()) - //} - - MERGE_VALID_INTERACTION ( - ch_valid_pairs - ) - ch_versions = ch_versions.mix(MERGE_VALID_INTERACTION.out.versions) - - MERGE_STATS( - ch_hicpro_stats - ) - ch_versions = ch_versions.mix(MERGE_STATS.out.versions) - - //*************************************** - // CONVERTS TO PAIRS - HICPRO2PAIRS ( - MERGE_VALID_INTERACTION.out.valid_pairs, - chrsize.collect() - ) - ch_versions = ch_versions.mix(HICPRO2PAIRS.out.versions) - - //*************************************** - // CONTACT MAPS - - if (params.hicpro_maps){ - - //build_contact_maps - BUILD_CONTACT_MAPS( - MERGE_VALID_INTERACTION.out.valid_pairs.combine(map_res), - chrsize.collect() + ch_versions = ch_versions.mix(MERGE_VALID_INTERACTION.out.versions) + + MERGE_STATS( + ch_hicpro_stats ) - ch_hicpro_raw_maps = BUILD_CONTACT_MAPS.out.maps - - // run_ice - ICE_NORMALIZATION( - BUILD_CONTACT_MAPS.out.maps + ch_versions = ch_versions.mix(MERGE_STATS.out.versions) + + //*************************************** + // CONVERTS TO PAIRS + HICPRO2PAIRS ( + MERGE_VALID_INTERACTION.out.valid_pairs, + chrsize.collect() ) - ch_hicpro_iced_maps = ICE_NORMALIZATION.out.maps - ch_versions = ch_versions.mix(ICE_NORMALIZATION.out.versions) - - }else{ - ch_hicpro_raw_maps = Channel.empty() - ch_hicpro_iced_maps = Channel.empty() - } - - emit: - versions = ch_versions - pairs = HICPRO2PAIRS.out.pairs - mqc = MERGE_VALID_INTERACTION.out.mqc.concat(MERGE_STATS.out.mqc) - raw_maps = ch_hicpro_raw_maps - iced_maps = ch_hicpro_iced_maps + ch_versions = ch_versions.mix(HICPRO2PAIRS.out.versions) + + //*************************************** + // CONTACT MAPS + + if (params.hicpro_maps){ + + //build_contact_maps + BUILD_CONTACT_MAPS( + MERGE_VALID_INTERACTION.out.valid_pairs.combine(map_res), + chrsize.collect() + ) + ch_hicpro_raw_maps = BUILD_CONTACT_MAPS.out.maps + + // run_ice + ICE_NORMALIZATION( + BUILD_CONTACT_MAPS.out.maps + ) + ch_hicpro_iced_maps = ICE_NORMALIZATION.out.maps + ch_versions = ch_versions.mix(ICE_NORMALIZATION.out.versions) + + }else{ + ch_hicpro_raw_maps = Channel.empty() + ch_hicpro_iced_maps = Channel.empty() + } + + emit: + versions = ch_versions + pairs = HICPRO2PAIRS.out.pairs + mqc = MERGE_VALID_INTERACTION.out.mqc.concat(MERGE_STATS.out.mqc) + raw_maps = ch_hicpro_raw_maps + iced_maps = ch_hicpro_iced_maps } diff --git a/workflows/hic.nf b/workflows/hic.nf index 50c6a0a0f7e8045e48f32be47120093ad20a0173..98ea236523a1841fb4d699883b630ee5f53f0d4a 100644 --- a/workflows/hic.nf +++ b/workflows/hic.nf @@ -179,7 +179,8 @@ workflow HIC { PREPARE_GENOME.out.res_frag, PREPARE_GENOME.out.chromosome_size, ch_ligation_site, - ch_map_res + ch_map_res, + ch_fasta ) ch_versions = ch_versions.mix(HICPRO.out.versions)