diff --git a/.travis.yml b/.travis.yml index 43a67666d7f1bcdeb18ba4e2874dd26dfdf613f3..966de4b291e58f20cafbedcf791057f2789dbd8a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,7 +28,6 @@ install: env: - NXF_VER='0.32.0' # Specify a minimum NF version that should be tested and work - - NXF_VER='' # Plus: get the latest NF version and check that it works script: # Lint the pipeline code diff --git a/conf/base.config b/conf/base.config index 11bc185704befdf72dee8b90592695f4c74fbe4a..ad6b710c7c11eec30b0bd9b7396dfe0541691bec 100644 --- a/conf/base.config +++ b/conf/base.config @@ -22,19 +22,26 @@ process { maxErrors = '-1' // Process-specific resource requirements + + withName:makeBowtie2Index { + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 10.GB * task.attempt, 'memory' ) } + time = { check_max( 12.h * task.attempt, 'time' ) } + } + withName:bowtie2_end_to_end { cpus = { check_max( 2, 'cpus' ) } - memory = { check_max( 16.GB * task.attempt, 'memory' ) } + memory = { check_max( 4.GB * task.attempt, 'memory' ) } time = { check_max( 5.h * task.attempt, 'time' ) } } withName:bowtie2_on_trimmed_reads { cpus = { check_max( 2, 'cpus' ) } - memory = { check_max( 16.GB * task.attempt, 'memory' ) } + memory = { check_max( 4.GB * task.attempt, 'memory' ) } time = { check_max( 5.h * task.attempt, 'time' ) } } withName:merge_mapping_steps { cpus = { check_max( 4, 'cpus' ) } - memory = { check_max( 20.GB * task.attempt, 'memory' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } time = { check_max( 5.h * task.attempt, 'time' ) } } withName:trim_reads { @@ -59,15 +66,15 @@ process { } withName:run_iced { cpus = { check_max( 1, 'cpus' ) } - memory = { check_max( 20.GB * task.attempt, 'memory' ) } + memory = { check_max( 10.GB * task.attempt, 'memory' ) } time = { check_max( 5.h * task.attempt, 'time' ) } } } params { // Defaults only, expecting to be overwritten - max_memory = 20.GB - max_cpus = 1 + max_memory = 8.GB + max_cpus = 2 max_time = 24.h igenomes_base = 's3://ngi-igenomes/igenomes/' } diff --git a/conf/hicpro.config b/conf/hicpro.config index 63b10192f1034ec0dc93920670a86fe782537812..163b0863cc181bdc12724a395ae4a5fa561f47af 100644 --- a/conf/hicpro.config +++ b/conf/hicpro.config @@ -18,7 +18,7 @@ params { restriction_site = 'A^AGGCT' ligation_site = 'AAGCTAGCTT' min_restriction_fragment_size = 0 - max_restriction_fragment_size = 100 + max_restriction_fragment_size = 1000 min_insert_size = 0 max_insert_size = 500 diff --git a/main.nf b/main.nf index 1e9ed2fe7f1e060132e81b4d4e003eca6908a3ed..ba99843af75bc234a3c7d8d592edcd6504d2291f 100644 --- a/main.nf +++ b/main.nf @@ -104,6 +104,8 @@ if (params.genomes && params.genome && !params.genomes.containsKey(params.genome // Define these here - after the profiles are loaded with the iGenomes paths params.bwt2_index = params.genome ? params.genomes[ params.genome ].bowtie2 ?: false : false params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false + + //params.chromosome_size = false //params.restriction_fragments = false @@ -139,11 +141,6 @@ ch_output_docs = Channel.fromPath("$baseDir/docs/output.md") */ if (params.readPaths){ - Channel - .from( params.readPaths ) - .map { row -> [ row[0], [file(row[1][0]), file(row[1][1])]] } - .ifEmpty { exit 1, "params.readPaths was empty - no input files supplied" } - .set { raw_reads_pairs } raw_reads = Channel.create() raw_reads_2 = Channel.create() @@ -152,11 +149,8 @@ if (params.readPaths){ .from( params.readPaths ) .map { row -> [ row[0], [file(row[1][0]), file(row[1][1])]] } .separate( raw_reads, raw_reads_2 ) { a -> [tuple(a[0], a[1][0]), tuple(a[0], a[1][1])] } + .println() }else{ - Channel - .fromFilePairs( params.reads ) - .ifEmpty { exit 1, "params.readPaths was empty - no input files supplied" } - .set { raw_reads_pairs } raw_reads = Channel.create() raw_reads_2 = Channel.create() @@ -166,6 +160,8 @@ if (params.readPaths){ .separate( raw_reads, raw_reads_2 ) { a -> [tuple(a[0], a[1][0]), tuple(a[0], a[1][1])] } } +raw_reads = raw_reads.concat( raw_reads_2 ) + // SPlit fastq files // https://www.nextflow.io/docs/latest/operator.html#splitfastq @@ -174,21 +170,21 @@ if (params.readPaths){ */ // Reference genome - if ( params.bwt2_index ){ lastPath = params.bwt2_index.lastIndexOf(File.separator) bwt2_dir = params.bwt2_index.substring(0,lastPath+1) bwt2_base = params.bwt2_index.substring(lastPath+1) - Channel.fromPath( bwt2_dir, checkIfExists: true ) + Channel.fromPath( bwt2_dir , checkIfExists: true) .ifEmpty { exit 1, "Genome index: Provided index not found: ${params.bwt2_index}" } - .into { bwt2_index_end2end; bwt2_index_trim } + .into { bwt2_index_end2end; bwt2_index_trim } + } else if ( params.fasta ) { lastPath = params.fasta.lastIndexOf(File.separator) bwt2_base = params.fasta.substring(lastPath+1) - Channel.fromPath( params.fasta, checkIfExists: true ) + Channel.fromPath( params.fasta ) .ifEmpty { exit 1, "Genome index: Fasta file not found: ${params.fasta}" } .set { fasta_for_index } } @@ -196,14 +192,18 @@ else { exit 1, "No reference genome specified!" } +//println (bwt2_dir) +//println (bwt2_base) + + // Chromosome size if ( params.chromosome_size ){ - Channel.FromPath( params.chromosome_size, checkIfExists: true ) - .set {chromosome_size} + Channel.fromPath( params.chromosome_size , checkIfExists: true) + .set {chromosome_size} } else if ( params.fasta ){ - Channel.fromPath( params.fasta, checkIfExists: true ) + Channel.fromPath( params.fasta ) .ifEmpty { exit 1, "Chromosome sizes: Fasta file not found: ${params.fasta}" } .set { fasta_for_chromsize } } @@ -213,11 +213,11 @@ else { // Restriction fragments if ( params.restriction_fragments ){ - Channel.FromPath( params.restriction_fragments, checkIfExists: true ) + Channel.fromPath( params.restriction_fragments, checkIfExists: true ) .set {res_frag_file} } else if ( params.fasta && params.restriction_site ){ - Channel.fromPath(params.fasta, checkIfExists: true) + Channel.fromPath( params.fasta ) .ifEmpty { exit 1, "Restriction fragments: Fasta file not found: ${params.fasta}" } .set { fasta_for_resfrag } } @@ -247,10 +247,11 @@ def summary = [:] summary['Pipeline Name'] = 'nf-core/hic' summary['Pipeline Version'] = workflow.manifest.version summary['Run Name'] = custom_runName ?: workflow.runName -// TODO nf-core: Report custom parameters here + summary['Reads'] = params.reads summary['Fasta Ref'] = params.fasta + summary['Max Memory'] = params.max_memory summary['Max CPUs'] = params.max_cpus summary['Max Time'] = params.max_time @@ -302,13 +303,13 @@ process get_software_versions { script: """ - echo $workflow.manifest.version > v_pipeline.txt - echo $workflow.nextflow.version > v_nextflow.txt - bowtie2 --version > v_bowtie2.txt - python --version > v_python.txt - samtools --version > v_samtools.txt - scrape_software_versions.py > software_versions_mqc.yaml - """ + echo $workflow.manifest.version > v_pipeline.txt + echo $workflow.nextflow.version > v_nextflow.txt + bowtie2 --version > v_bowtie2.txt + python --version > v_python.txt + samtools --version > v_samtools.txt + scrape_software_versions.py > software_versions_mqc.yaml + """ } @@ -317,8 +318,8 @@ process get_software_versions { */ if(!params.bwt2_index && params.fasta){ - process makeBowtieIndex { - tag "$fasta" + process makeBowtie2Index { + tag "$bwt2_base" //publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir }, // saveAs: { params.saveReference ? it : null }, mode: 'copy' @@ -326,12 +327,14 @@ if(!params.bwt2_index && params.fasta){ file fasta from fasta_for_index output: - file "bowtie2" into bwt2_index + file "bowtie2_index" into bwt2_index_end2end + file "bowtie2_index" into bwt2_index_trim script: + bwt2_base = fasta.toString() - ~/(\.fa)?(\.fasta)?(\.fas)?$/ """ - mkdir bwt2_index - + mkdir bowtie2_index + bowtie2-build ${fasta} bowtie2_index/${bwt2_base} """ } } @@ -351,7 +354,8 @@ if(!params.chromosome_size && params.fasta){ script: """ - samtools faidx ${fasta} | cut -f1,2 > chrom.size + samtools faidx ${fasta} + cut -f1,2 ${fasta}.fai > chrom.size """ } } @@ -383,13 +387,11 @@ if(!params.restriction_fragments && params.fasta){ * STEP 1 - Two-steps Reads Mapping */ -raw_reads = raw_reads.concat( raw_reads_2 ) - process bowtie2_end_to_end { tag "$prefix" input: set val(sample), file(reads) from raw_reads - file index from bwt2_index_end2end + file index from bwt2_index_end2end.collect() output: set val(prefix), file("${prefix}_unmap.fastq") into unmapped_end_to_end @@ -428,7 +430,7 @@ process bowtie2_on_trimmed_reads { tag "$prefix" input: set val(prefix), file(reads) from trimmed_reads - file index from bwt2_index_trim + file index from bwt2_index_trim.collect() output: set val(prefix), file("${prefix}_trimmed.bam") into trimmed_bam @@ -468,7 +470,6 @@ process merge_mapping_steps{ """ } - process combine_mapped_files{ tag "$sample = $r1_prefix + $r2_prefix" input: @@ -491,10 +492,12 @@ process combine_mapped_files{ """ } + /* * STEP2 - DETECT VALID PAIRS */ + process get_valid_interaction{ tag "$sample" input: @@ -523,6 +526,7 @@ process get_valid_interaction{ * STEP3 - BUILD MATRIX */ +/* process build_contact_maps{ tag "$sample - $mres" input: @@ -537,11 +541,13 @@ process build_contact_maps{ build_matrix --matrix-format upper --binsize ${mres} --chrsizes ${chrsize} --ifile ${vpairs} --oprefix ${sample}_${mres} """ } +*/ /* * STEP 4 - NORMALIZE MATRIX */ +/* process run_iced{ tag "$rmaps" input: @@ -559,11 +565,11 @@ process run_iced{ --max_iter ${params.ice_max_iter} --eps ${params.ice_eps} --remove-all-zeros-loci --output-bias 1 --verbose 1 ${rmaps} """ } +*/ /* * STEP 5 - COOLER FILE - process generate_cool{ tag "$sample" input: diff --git a/nextflow.config b/nextflow.config index fe584c309301808d8aa860cec38a868beb01bb21..0075f7f7e82e408308915d5c82bb219bef2eff6f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,6 +20,11 @@ params { // TODO nf-core: Specify your pipeline's command line flags reads = "*{1,2}.fastq.gz" outdir = './results' + genome = false + readPaths = false + chromosome_size = false + restriction_fragments = false + // Boilerplate options name = false @@ -27,7 +32,7 @@ params { email = false plaintext_email = false help = false - //igenomes_base = "./iGenomes" + igenomes_base = "./iGenomes" tracedir = "${params.outdir}/pipeline_info" clusterOptions = false awsqueue = false