diff --git a/conf/test.config b/conf/test.config index d06ad508f9c3cd0db4b2173b5fe7a890216369c5..5c5fc84c35989f039418aeba4bc5b5b1c10da1a6 100644 --- a/conf/test.config +++ b/conf/test.config @@ -35,4 +35,7 @@ params { res_tads = '1000' tads_caller = 'insulation,hicexplorer' res_compartments = '1000' + + // Ignore `--input` as otherwise the parameter validation will throw an error + schema_ignore_params = 'genomes,digest,input_paths,input' } diff --git a/conf/test_full.config b/conf/test_full.config index 65dcbf8f5ddbce6c5e46c3160461e87a3ee56e98..1e793cc57628bdbed6bbe322e558bffc0e15a3d1 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -11,8 +11,6 @@ params { config_profile_name = 'Full test profile' config_profile_description = 'Full test dataset to check pipeline function' - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed // Input data for full size test input_paths = [ ['SRR4292758_00', ['https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R2.fastq.gz']] @@ -20,19 +18,19 @@ params { // Annotations fasta = 'https://github.com/nf-core/test-datasets/raw/hic/reference/W303_SGD_2015_JRIU00000000.fsa' - restriction_site = 'A^AGCTT' - ligation_site = 'AAGCTAGCTT' - - min_mapq = 2 - rm_dup = true - rm_singleton = true - rm_multi = true - + digestion = 'hindiii' + min_mapq = 10 min_restriction_fragment_size = 100 max_restriction_fragment_size = 100000 min_insert_size = 100 max_insert_size = 600 + + bin_size = '1000' + res_dist_decay = '1000' + res_tads = '1000' + tads_caller = 'insulation,hicexplorer' + res_compartments = '1000' - // Options - skip_cool = true + // Ignore `--input` as otherwise the parameter validation will throw an error + schema_ignore_params = 'genomes,digest,input_paths,input' } diff --git a/main.nf b/main.nf index 776a48e6cfe7a366fdfa07d385fe1d14200a7e78..e45087a3d979c6a75766868af11fd8de0a5c1688 100644 --- a/main.nf +++ b/main.nf @@ -121,19 +121,19 @@ if (params.split_fastq ){ // Reference genome if ( params.bwt2_index ){ - lastPath = params.bwt2_index.lastIndexOf(File.separator) - bwt2_dir = params.bwt2_index.substring(0,lastPath+1) - bwt2_base = params.bwt2_index.substring(lastPath+1) + //lastPath = params.bwt2_index.lastIndexOf(File.separator) + //bwt2_dir = params.bwt2_index.substring(0,lastPath+1) + //bwt2_base = params.bwt2_index.substring(lastPath+1) - Channel.fromPath( bwt2_dir , checkIfExists: true) + Channel.fromPath( params.bwt2_index , checkIfExists: true) .ifEmpty { exit 1, "Genome index: Provided index not found: ${params.bwt2_index}" } .into { bwt2_index_end2end; bwt2_index_trim } } else if ( params.fasta ) { - lastPath = params.fasta.lastIndexOf(File.separator) - fasta_base = params.fasta.substring(lastPath+1) - bwt2_base = fasta_base.toString() - ~/(\.fa)?(\.fasta)?(\.fas)?(\.fsa)?$/ + //lastPath = params.fasta.lastIndexOf(File.separator) + //fasta_base = params.fasta.substring(lastPath+1) + //fasta_base = fasta_base.toString() - ~/(\.fa)?(\.fasta)?(\.fas)?(\.fsa)?$/ Channel.fromPath( params.fasta ) .ifEmpty { exit 1, "Genome index: Fasta file not found: ${params.fasta}" } @@ -329,7 +329,7 @@ process get_software_versions { if(!params.bwt2_index && params.fasta){ process makeBowtie2Index { - tag "$bwt2_base" + tag "$fasta_base" label 'process_highmem' publishDir path: { params.save_reference ? "${params.outdir}/reference_genome" : params.outdir }, saveAs: { params.save_reference ? it : null }, mode: params.publish_dir_mode @@ -342,9 +342,10 @@ if(!params.bwt2_index && params.fasta){ file "bowtie2_index" into bwt2_index_trim script: + fasta_base = fasta.toString() - ~/(\.fa)?(\.fasta)?(\.fas)?(\.fsa)?$/ """ mkdir bowtie2_index - bowtie2-build ${fasta} bowtie2_index/${bwt2_base} + bowtie2-build ${fasta} bowtie2_index/${fasta_base} """ } } @@ -418,19 +419,21 @@ process bowtie2_end_to_end { def bwt2_opts = params.bwt2_opts_end2end if (!params.dnase){ """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'` bowtie2 --rg-id BMG --rg SM:${prefix} \\ ${bwt2_opts} \\ -p ${task.cpus} \\ - -x ${index}/${bwt2_base} \\ + -x \${INDEX} \\ --un ${prefix}_unmap.fastq \\ -U ${reads} | samtools view -F 4 -bS - > ${prefix}.bam """ }else{ """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'` bowtie2 --rg-id BMG --rg SM:${prefix} \\ ${bwt2_opts} \\ -p ${task.cpus} \\ - -x ${index}/${bwt2_base} \\ + -x \${INDEX} \\ --un ${prefix}_unmap.fastq \\ -U ${reads} > ${prefix}.bam """ @@ -480,10 +483,11 @@ process bowtie2_on_trimmed_reads { script: prefix = reads.toString() - ~/(_trimmed)?(\.fq)?(\.fastq)?(\.gz)?$/ """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'` bowtie2 --rg-id BMG --rg SM:${prefix} \\ ${params.bwt2_opts_trimmed} \\ -p ${task.cpus} \\ - -x ${index}/${bwt2_base} \\ + -x \${INDEX} \\ -U ${reads} | samtools view -bS - > ${prefix}_trimmed.bam """ } @@ -784,9 +788,9 @@ process run_ice{ script: prefix = rmaps.toString() - ~/(\.matrix)?$/ """ - ice --filter_low_counts_perc ${params.ice_filer_low_count_perc} \ + ice --filter_low_counts_perc ${params.ice_filter_low_count_perc} \ --results_filename ${prefix}_iced.matrix \ - --filter_high_counts_perc ${params.ice_filer_high_count_perc} \ + --filter_high_counts_perc ${params.ice_filter_high_count_perc} \ --max_iter ${params.ice_max_iter} --eps ${params.ice_eps} --remove-all-zeros-loci --output-bias 1 --verbose 1 ${rmaps} """ } diff --git a/nextflow.config b/nextflow.config index 48a1cbcd6fe0a431fdcb8afc11678dc28e57298c..c5b39add6663b624d616043b62bc5fa75a749b30 100644 --- a/nextflow.config +++ b/nextflow.config @@ -63,8 +63,8 @@ params { bin_size = '1000000' res_zoomify = '5000' ice_max_iter = 100 - ice_filer_low_count_perc = 0.02 - ice_filer_high_count_perc = 0 + ice_filter_low_count_perc = 0.02 + ice_filter_high_count_perc = 0 ice_eps = 0.1 // Downstream Analysis @@ -86,7 +86,6 @@ params { // Boilerplate options publish_dir_mode = 'copy' multiqc_config = false - name = false email = false email_on_fail = false max_multiqc_email_size = 25.MB @@ -101,12 +100,13 @@ params { custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" hostnames = false + config_profile_name = false config_profile_description = false config_profile_contact = false config_profile_url = false validate_params = true show_hidden_params = false - schema_ignore_params = 'genomes,input_paths' + schema_ignore_params = 'genomes,digest,input_paths' // Defaults only, expecting to be overwritten max_memory = 24.GB diff --git a/nextflow_schema.json b/nextflow_schema.json index f866246b95c35b2940e30606476e05215a68728c..92748776bce1616a11b79c049c14601d93f91bc4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -133,7 +133,6 @@ }, "min_cis_dist": { "type": "integer", - "default": "O", "description": "Minimum distance between loci to consider. Useful for --dnase mode to remove spurious ligation products. Only values > 0 are considered" } } @@ -148,17 +147,16 @@ "split_fastq": { "type": "boolean", "description": "Split the reads into chunks before running the pipelne", - "fa_icon": "fas fa-dna", - "default": "false" + "fa_icon": "fas fa-dna" }, "fastq_chunks_size": { "type": "integer", "description": "Read number per chunks if split_fastq is used", - "default": "20000000" + "default": 20000000 }, "min_mapq": { "type": "integer", - "default": "10", + "default": 10, "description": "Keep aligned reads with a minimum quality value" }, "bwt2_opts_end2end": { @@ -185,33 +183,27 @@ "fa_icon": "fas fa-signature", "properties": { "keep_dups": { - "type": "string", - "description": "Keep duplicated reads", - "default": "False" + "type": "boolean", + "description": "Keep duplicated reads" }, "keep_multi": { - "type": "string", - "description": "Keep multi-aligned reads", - "default": "False" + "type": "boolean", + "description": "Keep multi-aligned reads" }, "max_insert_size": { "type": "integer", - "default": "0", "description": "Maximum fragment size to consider. Only values > 0 are considered" }, "min_insert_size": { "type": "integer", - "default": "0", "description": "Minimum fragment size to consider. Only values > 0 are considered" }, "max_restriction_fragment_size": { "type": "integer", - "default": "0", "description": "Maximum restriction fragment size to consider. Only values > 0 are considered" }, "min_restriction_fragment_size": { "type": "integer", - "default": "0", "description": "Minimum restriction fragment size to consider. Only values > 0 are considered" }, "save_interaction_bam": { @@ -232,29 +224,28 @@ "default": "'1000000,500000'", "description": "Resolution to build the maps (comma separated)" }, - "ice_filer_low_count_perc": { - "type": "string", + "ice_filter_low_count_perc": { + "type": "number", "default": 0.02, "description": "Filter low counts rows before normalization" }, - "ice_filer_high_count_perc": { + "ice_filter_high_count_perc": { "type": "integer", - "default": "0", "description": "Filter high counts rows before normalization" }, "ice_eps": { - "type": "string", - "default": "0.1", + "type": "number", + "default": 0.1, "description": "Threshold for ICE convergence" }, "ice_max_iter": { "type": "integer", - "default": "100", + "default": 100, "description": "Maximum number of iteraction for ICE normalization" }, "res_zoomify": { - "type": "integer", - "default": 5000, + "type": "string", + "default": "5000", "description": "Maximum resolution to build mcool file" } } @@ -266,8 +257,8 @@ "default": "", "properties": { "res_dist_decay": { - "type": "integer", - "default": 1000000, + "type": "string", + "default": "1000000", "description": "Resolution to build count/distance plot" }, "tads_caller": { @@ -281,8 +272,8 @@ "description": "Resolution to run TADs callers (comma separated)" }, "res_compartments": { - "type": "integer", - "default": 250000, + "type": "string", + "default": "250000", "description": "Resolution for compartments calling" } } @@ -299,33 +290,28 @@ "description": "Do not build contact maps" }, "skip_ice": { - "type": "string", - "description": "Do not run ICE normalization", - "default": "False" + "type": "boolean", + "description": "Do not run ICE normalization" }, "skip_dist_decay": { - "type": "string", - "description": "Do not run distance/decay plot", - "default": "False" + "type": "boolean", + "description": "Do not run distance/decay plot" }, "skip_tads": { - "type": "string", - "description": "Do not run TADs calling", - "default": "False" + "type": "boolean", + "description": "Do not run TADs calling" }, "skip_compartments": { "type": "string", "description": "Do not run compartments calling" }, "skip_balancing": { - "type": "string", - "description": "Do not run cooler balancing normalization", - "default": "False" + "type": "boolean", + "description": "Do not run cooler balancing normalization" }, "skip_mcool": { - "type": "string", - "description": "Do not generate mcool file for Higlass visualization", - "default": "False" + "type": "boolean", + "description": "Do not generate mcool file for Higlass visualization" }, "skip_multiqc": { "type": "boolean", @@ -487,9 +473,8 @@ }, "config_profile_name": { "type": "string", - "description": "Institutional config name.", - "hidden": true, - "fa_icon": "fas fa-users-cog" + "description": "Institutional config name", + "hidden": true }, "config_profile_description": { "type": "string",