diff --git a/CHANGELOG.md b/CHANGELOG.md index b982a5782b737510d5eccd2fe6331c73fd192480..019a0e11fb5808c9d240033171694226a613c5e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # nf-core/hic: Changelog +## v1.1dev + +* Fix bug for reads extension _1/_2 (#30) + +* Update manual (#28) + ## v1.0dev - 2019-04-09 First version of nf-core-hic pipeline which is a Nextflow implementation of the [HiC-Pro pipeline](https://github.com/nservant/HiC-Pro/). diff --git a/docs/usage.md b/docs/usage.md index 9b2bb6a5b160dbb19632188ee91c66e58ce58a21..03acd69bbca376826643231be66742d4cc3a28e1 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -85,7 +85,7 @@ NXF_OPTS='-Xms1g -Xmx4g' The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/hic --reads '*_R{1,2}.fastq.gz' -genome GRCh37 -profile docker +nextflow run nf-core/hic --reads '*_R{1,2}.fastq.gz' --genome GRCh37 -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. diff --git a/main.nf b/main.nf index eeb692312a671226acc27244bd86c7f8953e6ccb..0d51db3548aff9e55c2f3fd38b5ea9d42e722b57 100644 --- a/main.nf +++ b/main.nf @@ -22,57 +22,56 @@ def helpMessage() { nextflow run nf-core/hic --reads '*_R{1,2}.fastq.gz' -profile conda Mandatory arguments: - --reads Path to input data (must be surrounded with quotes) - -profile Configuration profile to use. Can use multiple (comma separated) - Available: conda, docker, singularity, awsbatch, test and more. + --reads Path to input data (must be surrounded with quotes) + -profile Configuration profile to use. Can use multiple (comma separated) + Available: conda, docker, singularity, awsbatch, test and more. - References: If not specified in the configuration file or you wish to overwrite any of the references. - --genome Name of iGenomes reference - --bwt2_index Path to Bowtie2 index - --fasta Path to Fasta reference - --chromosome_size Path to chromosome size file - --restriction_fragments Path to restriction fragment file (bed) + References: If not specified in the configuration file or you wish to overwrite any of the references. + --genome Name of iGenomes reference + --bwt2_index Path to Bowtie2 index + --fasta Path to Fasta reference + --chromosome_size Path to chromosome size file + --restriction_fragments Path to restriction fragment file (bed) Options: - --bwt2_opts_end2end Options for bowtie2 end-to-end mappinf (first mapping step) - --bwt2_opts_trimmed Options for bowtie2 mapping after ligation site trimming - --min_mapq Minimum mapping quality values to consider - - --restriction_site Cutting motif(s) of restriction enzyme(s) (comma separated) - --ligation_site Ligation motifs to trim (comma separated) - --min_restriction_fragment_size Minimum size of restriction fragments to consider - --max_restriction_framgnet_size Maximum size of restriction fragmants to consider - --min_insert_size Minimum insert size of mapped reads to consider - --max_insert_size Maximum insert size of mapped reads to consider - - --dnase Run DNase Hi-C mode. All options related to restriction fragments are not considered - - --min_cis_dist Minimum intra-chromosomal distance to consider - --rm_singleton Remove singleton reads - --rm_multi Remove multi-mapped reads - --rm_dup Remove duplicates - - --bin_size Bin size for contact maps (comma separated) - --ice_max_iter Maximum number of iteration for ICE normalization - --ice_filter_low_count_perc Percentage of low counts columns/rows to filter before ICE normalization - --ice_filter_high_count_perc Percentage of high counts columns/rows to filter before ICE normalization - --ice_eps Convergence criteria for ICE normalization + --bwt2_opts_end2end Options for bowtie2 end-to-end mappinf (first mapping step). See hic.config for default. + --bwt2_opts_trimmed Options for bowtie2 mapping after ligation site trimming. See hic.config for default. + --min_mapq Minimum mapping quality values to consider. Default: 10 + --restriction_site Cutting motif(s) of restriction enzyme(s) (comma separated). Default: 'A^AGCTT' + --ligation_site Ligation motifs to trim (comma separated). Default: 'AAGCTAGCTT' + --min_restriction_fragment_size Minimum size of restriction fragments to consider. Default: None + --max_restriction_framgnet_size Maximum size of restriction fragmants to consider. Default: None + --min_insert_size Minimum insert size of mapped reads to consider. Default: None + --max_insert_size Maximum insert size of mapped reads to consider. Default: None + + --dnase Run DNase Hi-C mode. All options related to restriction fragments are not considered. Default: false + + --min_cis_dist Minimum intra-chromosomal distance to consider. Default: None + --rm_singleton Remove singleton reads. Default: true + --rm_multi Remove multi-mapped reads. Default: true + --rm_dup Remove duplicates. Default: true + + --bin_size Bin size for contact maps (comma separated). Default: '1000000,500000' + --ice_max_iter Maximum number of iteration for ICE normalization. Default: 100 + --ice_filter_low_count_perc Percentage of low counts columns/rows to filter before ICE normalization. Default: 0.02 + --ice_filter_high_count_perc Percentage of high counts columns/rows to filter before ICE normalization. Default: 0 + --ice_eps Convergence criteria for ICE normalization. Default: 0.1 Other options: - --splitFastq Size of read chuncks to use to speed up the workflow - --outdir The output directory where the results will be saved - --email Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits - -name Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic. + --splitFastq Size of read chuncks to use to speed up the workflow. Default: None + --outdir The output directory where the results will be saved. Default: './results' + --email Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. Default: None + -name Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic. Default: None Step options: - --skip_maps Skip generation of contact maps. Useful for capture-C - --skip_ice Skip ICE normalization - --skip_cool Skip generation of cool files - --skip_multiQC Skip MultiQC + --skip_maps Skip generation of contact maps. Useful for capture-C. Default: false + --skip_ice Skip ICE normalization. Default: false + --skip_cool Skip generation of cool files. Default: false + --skip_multiQC Skip MultiQC. Default: false AWSBatch options: - --awsqueue The AWSBatch JobQueue that needs to be set when running on AWSBatch - --awsregion The AWS Region for your AWS Batch job to run on + --awsqueue The AWSBatch JobQueue that needs to be set when running on AWSBatch + --awsregion The AWS Region for your AWS Batch job to run on """.stripIndent() } @@ -494,8 +493,8 @@ if (!params.dnase){ set val(oname), file("${prefix}.mapstat") into all_mapstat script: - sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2)/ - tag = prefix.toString() =~/_R1|_val_1/ ? "R1" : "R2" + sample = prefix.toString() - ~/(_R1$|_R2$|_val_1$|_val_2$|_1$|_2$)/ + tag = prefix.toString() =~/_R1$|_val_1$|_1$/ ? "R1" : "R2" oname = prefix.toString() - ~/(\.[0-9]+)$/ """ @@ -535,8 +534,8 @@ if (!params.dnase){ set val(oname), file("${prefix}.mapstat") into all_mapstat script: - sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2)/ - tag = prefix.toString() =~/_R1|_val_1/ ? "R1" : "R2" + sample = prefix.toString() - ~/(_R1$|_R2$|_val_1$|_val_2$|_1$|_2$)/ + tag = prefix.toString() =~/_R1$|_val_1$|_1$/ ? "R1" : "R2" oname = prefix.toString() - ~/(\.[0-9]+)$/ """ @@ -552,6 +551,7 @@ if (!params.dnase){ } } + process combine_mapped_files{ tag "$sample = $r1_prefix + $r2_prefix" publishDir "${params.outdir}/mapping", mode: 'copy', @@ -699,7 +699,7 @@ process merge_sample { file("mstats/") into all_mstats script: - sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2)/ + sample = prefix.toString() - ~/(_R1$|_R2$|_val_1$|_val_2$|_1$|_2$)/ if ( (fstat =~ /.mapstat/) ){ ext = "mmapstat" } if ( (fstat =~ /.pairstat/) ){ ext = "mpairstat" } if ( (fstat =~ /.RSstat/) ){ ext = "mRSstat" }