From 168039301f5995afa18711b4118f6678a8e4990d Mon Sep 17 00:00:00 2001 From: nservant <nicolas.servant@curie.fr> Date: Tue, 2 Apr 2019 10:15:23 +0200 Subject: [PATCH] running version + first docker,conda,singularity env --- bin/mapped_2hic_fragments.py | 4 +- conf/base.config | 24 ++++++- conf/hicpro.config | 27 +++++++- conf/test.config | 5 +- environment.yml | 10 ++- main.nf | 121 +++++++++++++++++++++++++++++------ nextflow.config | 4 +- 7 files changed, 163 insertions(+), 32 deletions(-) diff --git a/bin/mapped_2hic_fragments.py b/bin/mapped_2hic_fragments.py index 391a58b..efa32e6 100755 --- a/bin/mapped_2hic_fragments.py +++ b/bin/mapped_2hic_fragments.py @@ -501,9 +501,9 @@ if __name__ == "__main__": minInsertSize = arg elif opt in ("-l", "--longestInsertSize"): maxInsertSize = arg - elif opt in ("-t", "--shortestFragmentLength"): + elif opt in ("-t", "--shortestFragmentSize"): minFragSize = arg - elif opt in ("-m", "--longestFragmentLength"): + elif opt in ("-m", "--longestFragmentSize"): maxFragSize = arg elif opt in ("-d", "--minCisDist"): minDist = arg diff --git a/conf/base.config b/conf/base.config index a8413de..a550a4a 100644 --- a/conf/base.config +++ b/conf/base.config @@ -33,13 +33,33 @@ process { time = { check_max( 5.h * task.attempt, 'time' ) } } withName:merge_mapping_steps { - cpus = { check_max( 1, 'cpus' ) } + cpus = { check_max( 4, 'cpus' ) } memory = { check_max( 20.GB * task.attempt, 'memory' ) } time = { check_max( 5.h * task.attempt, 'time' ) } } withName:trim_reads { cpus = { check_max (1, 'cpus')} - memory = { check_max( 10.GB * task.attempt, 'memory' ) } + memory = { check_max( 1.GB * task.attempt, 'memory' ) } + time = { check_max( 5.h * task.attempt, 'time' ) } + } + withName:combine_mapped_files { + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + time = { check_max( 5.h * task.attempt, 'time' ) } + } + withName:get_valid_interaction { + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + time = { check_max( 5.h * task.attempt, 'time' ) } + } + withName:build_matrix { + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 5.h * task.attempt, 'time' ) } + } +withName:run_iced { + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 20.GB * task.attempt, 'memory' ) } time = { check_max( 5.h * task.attempt, 'time' ) } } } diff --git a/conf/hicpro.config b/conf/hicpro.config index 3eafbd1..9ef8dc6 100644 --- a/conf/hicpro.config +++ b/conf/hicpro.config @@ -8,10 +8,35 @@ */ params { + + // Alignment options bwt2_index = '/data/annotations/pipelines/Human/hg19/indexes/bowtie2/hg19' bwt2_opts_end2end = '--very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder' bwt2_opts_trimmed = '--very-sensitive -L 20 --score-min L,-0.6,-0.2 --end-to-end --reorder' - restriction_fragment_bed = '/data/users/nservant/Apps/HiC-Pro_annotation/HindIII_resfrag_hg19.bed' + min_mapq = 10 + + // Genome Reference chromosome_size = '/data/users/nservant/Apps/HiC-Pro_annotation/chrom_hg19.sizes' + + // Digestion Hi-C + restriction_fragment_bed = '/data/users/nservant/Apps/HiC-Pro_annotation/HindIII_resfrag_hg19.bed' + ligation_site = 'AAGCTAGCTT' + min_restriction_fragment_size = 0 + max_restriction_fragment_size = 100 + min_insert_size = 0 + max_insert_size = 500 + + // Hi-C Processing + min_cis_dist = 1000 + rm_singleton = true + rm_multi = true + rm_dup = true + + bins_size = '1000000,500000' + + ice_max_iter = 100 + ice_filer_low_count_perc = 0.02 + ice_filer_high_count_perc = 0 + ice_eps = 0.1 } diff --git a/conf/test.config b/conf/test.config index e8a8e0e..4a8a3ad 100644 --- a/conf/test.config +++ b/conf/test.config @@ -13,8 +13,7 @@ params { max_memory = 6.GB max_time = 48.h // Input data - readPaths = [ - ['Testdata2', ['/bioinfo/users/nservant/GIT/HiCPro/test-op/hSRR400264_00_R1.fastq.gz', '/bioinfo/users/nservant/GIT/HiCPro/test-op/hSRR400264_00_R1.fastq.gz']], - ['Testdata1', ['/bioinfo/users/nservant/GIT/HiCPro/test-op/hSRR400264_01_R1.fastq.gz', '/bioinfo/users/nservant/GIT/HiCPro/test-op/hSRR400264_01_R1.fastq.gz']], + ['SRR400264_00', ['https://github.com/nf-core/test-datasets/raw/hic/SRR400264_00_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/SRR400264_00_R1.fastq.gz']], + ['SRR400264_01', ['https://github.com/nf-core/test-datasets/raw/hic/SRR400264_01_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/SRR400264_01_R1.fastq.gz']], ] } diff --git a/environment.yml b/environment.yml index 42c74b4..542fb9b 100644 --- a/environment.yml +++ b/environment.yml @@ -4,6 +4,12 @@ channels: - bioconda - defaults dependencies: - # TODO nf-core: Add required software dependencies here - - fastqc=0.11.8 + - python=2.7.11 + - conda-forge::scipy=0.18.1 + - conda-forge::numpy=1.12.1 + - bcbio::bx-python=0.5.0 + - bioconda::pysam=0.8.3 + - cooler=0.8.3 + - bowtie2=2.2.9 + - samtools=1.9 - multiqc=1.6 diff --git a/main.nf b/main.nf index 49b3d61..6ce7933 100644 --- a/main.nf +++ b/main.nf @@ -10,8 +10,16 @@ */ +/*TOOO +- outputs +- env +- multiqc +- update version tools +- install + compile +*/ + + def helpMessage() { - // TODO nf-core: Add to this help message with new command line parameters log.info""" ======================================================= ,--./,-. @@ -39,10 +47,33 @@ def helpMessage() { Available: conda, docker, singularity, awsbatch, test and more. Options: - - - References If not specified in the configuration file or you wish to overwrite any of the references. - --fasta Path to Fasta reference + --bwt2_index Path to bowtie2 indexes (including indexes prefix) + --bwt2_opts_end2end Option for bowtie2 end-to-end mappinf (first mapping step) + --bwt2_opts_trimmed Option for bowtie2 mapping after ligation site trimming + --min_mapq Minimum mapping quality values to consider + + --chromosome_size Path to chromosome size file + --restriction_fragment_bed Path to restriction fragment file (bed) + --ligation-site Ligation motifs to trim (comma separated) + + --min_restriction_fragment_size Minimum size of restriction fragments to consider + --max_restriction_framgnet_size Maximum size of restriction fragmants to consider + --min_insert_size Minimum insert size of mapped reads to consider + --max_insert_size Maximum insert size of mapped reads to consider + --min_cis_dist Minimum intra-chromosomal distance to consider + --rm_singleton Remove singleton reads + --rm_multi Remove multi-mapped reads + --rm_dup Remove duplicates + + --bin_size Bin size for contact maps (comma separated) + --ice_max_iter Maximum number of iteration for ICE normalization + --ice_filter_low_count_perc Percentage of low counts columns/rows to filter before ICE normalization + --ice_filter_high_count_perc Percentage of high counts columns/rows to filter before ICE normalization + --ice_eps Convergence criteria for ICE normalization + + + //References If not specified in the configuration file or you wish to overwrite any of the references. + // --fasta Path to Fasta reference Other options: --outdir The output directory where the results will be saved @@ -129,11 +160,10 @@ bwt2_file = file("${params.bwt2_index}.1.bt2") if( !bwt2_file.exists() ) exit 1, "Reference genome Bowtie 2 not found: ${params.bwt2_index}" bwt2_index = Channel.value( "${params.bwt2_index}" ) -// Restriction fragment -res_frag_file = Channel.value( "${params.restriction_fragment_bed}" ) -// Chromosome size +res_frag_file = Channel.value( "${params.restriction_fragment_bed}" ) chr_size = Channel.value( "${params.chromosome_size}" ) +map_res = Channel.from( params.bins_size.tokenize(',') ) @@ -157,8 +187,7 @@ summary['Pipeline Version'] = workflow.manifest.version summary['Run Name'] = custom_runName ?: workflow.runName // TODO nf-core: Report custom parameters here summary['Reads'] = params.reads -summary['Fasta Ref'] = params.fasta -//summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End' +//summary['Fasta Ref'] = params.fasta summary['Max Memory'] = params.max_memory summary['Max CPUs'] = params.max_cpus summary['Max Time'] = params.max_time @@ -264,7 +293,7 @@ process trim_reads { script: """ cutsite_trimming --fastq $reads \\ - --cutsite params.ligation_motifs \\ + --cutsite ${params.ligation_site} \\ --out ${prefix}_trimmed.fastq """ } @@ -280,10 +309,9 @@ process bowtie2_on_trimmed_reads { script: prefix = reads.toString() - ~/(_trimmed)?(\.fq)?(\.fastq)?(\.gz)?$/ - def bwt2_opts = params.bwt2_opts_trimmed """ bowtie2 --rg-id BMG --rg SM:${prefix} \\ - ${bwt2_opts} \\ + ${params.bwt2_opts_trimmed} \\ -p ${task.cpus} \\ -x ${bt2_index} \\ -U ${reads} | samtools view -bS - > ${prefix}_trimmed.bam @@ -328,8 +356,12 @@ process combine_mapped_files{ r1_prefix = r1_bam.toString() - ~/_bwt2merged.bam$/ r2_bam = aligned_bam[1] r2_prefix = r2_bam.toString() - ~/_bwt2merged.bam$/ + + def opts = "" + opts = params.rm_singleton ? "${opts}" : "--single ${opts}" + opts = params.rm_multi ? "${opts}" : "--multi ${opts}" """ - mergeSAM.py -f ${r1_bam} -r ${r2_bam} -o ${sample}_bwt2pairs.bam + mergeSAM.py -f ${r1_bam} -r ${r2_bam} -o ${sample}_bwt2pairs.bam -q ${params.min_mapq} ${opts} """ } @@ -345,10 +377,18 @@ process get_valid_interaction{ output: set val(sample), file("*.validPairs") into valid_pairs + set val(sample), file("*.validPairs") into valid_pairs_4cool script: + def opts = "" + if (params.min_cis_dist != "") opts="${opts} -d ${params.min_cis_dist}" + if (params.min_insert_size != "") opts="${opts} -s ${params.min_insert_size}" + if (params.max_insert_size != "") opts="${opts} -l ${params.max_insert_size}" + if (params.min_restriction_fragment_size != "") opts="${opts} -t ${params.min_restriction_fragment_size}" + if (params.max_restriction_fragment_size != "") opts="${opts} -m ${params.max_restriction_fragment_size}" + """ - mapped_2hic_fragments.py -f ${frag_file} -r ${pe_bam} + mapped_2hic_fragments.py -f ${frag_file} -r ${pe_bam} ${opts} """ } @@ -358,24 +398,64 @@ process get_valid_interaction{ */ process build_contact_maps{ - tag "$sample" + tag "$sample - $mres" input: - set val(sample), file(vpairs) from valid_pairs + set val(sample), file(vpairs), val(mres) from valid_pairs.combine(map_res) val chrsize from chr_size output: - set val(sample), file("*.matrix") into matrix_file + file("*.matrix") into raw_maps script: """ - build_matrix --matrix-format upper --binsize 1000000 --chrsizes ${chrsize} --ifile ${vpairs} --oprefix ${sample}_1000000 + build_matrix --matrix-format upper --binsize ${mres} --chrsizes ${chrsize} --ifile ${vpairs} --oprefix ${sample}_${mres} """ +} + +/* + * STEP 4 - NORMALIZE MATRIX +*/ + +process run_iced{ + tag "$rmaps" + input: + file(rmaps) from raw_maps + + output: + file("*iced.matrix") into iced_maps + script: + prefix = rmaps.toString() - ~/(\.matrix)?$/ + """ + ice --filter_low_counts_perc ${params.ice_filer_low_count_perc} \ + --results_filename ${prefix}_iced.matrix \ + --filter_high_counts_perc ${params.ice_filer_high_count_perc} \ + --max_iter ${params.ice_max_iter} --eps ${params.ice_eps} --remove-all-zeros-loci --output-bias 1 --verbose 1 ${rmaps} + """ } +/* + * STEP 5 - COOLER FILE + + +process generate_cool{ + tag "$sample" + input: + set val(sample), file(vpairs) from valid_pairs_4cool + val chrsize from chr_size + + output: + file("*mcool") into cool_maps + + script: + """ + hicpro2higlass.sh -i $vpairs -r 5000 -c ${chrsize} -n + """ +} +*/ /* - // STEP 2 - MultiQC + // STEP 5 - MultiQC process multiqc { publishDir "${params.outdir}/MultiQC", mode: 'copy' @@ -495,5 +575,4 @@ workflow.onComplete { output_tf.withWriter { w -> w << email_txt } log.info "[nf-core/hic] Pipeline Complete" - } diff --git a/nextflow.config b/nextflow.config index 0e1bb10..7bf286a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -17,7 +17,7 @@ params { // Workflow flags // TODO nf-core: Specify your pipeline's command line flags - reads = "data/*{1,2}.fastq.gz" + reads = "*{1,2}.fastq.gz" outdir = './results' // Boilerplate options @@ -41,8 +41,10 @@ includeConfig 'conf/base.config' // Load nf-core custom profiles from different Institutions includeConfig "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}/nfcore_custom.config" +// Load hic config file includeConfig 'conf/hicpro.config' +// Create profiles profiles { awsbatch { includeConfig 'conf/awsbatch.config' } conda { process.conda = "$baseDir/environment.yml" } -- GitLab