diff --git a/CHANGELOG.md b/CHANGELOG.md index 25397928ef27bbee77b3d2af2c30edb9aa55233a..b982a5782b737510d5eccd2fe6331c73fd192480 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,14 +2,15 @@ ## v1.0dev - 2019-04-09 - First version of nf-core-hic pipeline which is a Nextflow implementation of the HiC-Pro pipeline [https://github.com/nservant/HiC-Pro]. - Note that all HiC-Pro functionalities are not yet all implemented. The current version is designed for protocols based on restriction enzyme digestion. - - In summary, this version allows : - * Automatic detection and generation of annotation files based on igenomes if not provided. - * Two-steps alignment of raw sequencing reads - * Reads filtering and detection of valid interaction products - * Generation of raw contact matrices for a set of resolutions - * Normalization of the contact maps using the ICE algorithm - * Generation of cooler file for visualization on higlass [https://higlass.io/] - * Quality report based on HiC-Pro MultiQC module +First version of nf-core-hic pipeline which is a Nextflow implementation of the [HiC-Pro pipeline](https://github.com/nservant/HiC-Pro/). +Note that all HiC-Pro functionalities are not yet all implemented. The current version is designed for protocols based on restriction enzyme digestion. + +In summary, this version allows : + +* Automatic detection and generation of annotation files based on igenomes if not provided. +* Two-steps alignment of raw sequencing reads +* Reads filtering and detection of valid interaction products +* Generation of raw contact matrices for a set of resolutions +* Normalization of the contact maps using the ICE algorithm +* Generation of cooler file for visualization on [higlass](https://higlass.io/) +* Quality report based on HiC-Pro MultiQC module diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py index 8cf977c143a81ee4acc6a958a492f269739bf2b1..7a38feec0135d37268ff82fa0c92dab46c84ac6a 100755 --- a/bin/scrape_software_versions.py +++ b/bin/scrape_software_versions.py @@ -3,17 +3,21 @@ from __future__ import print_function from collections import OrderedDict import re -# TODO nf-core: Add additional regexes for new tools in process get_software_versions +# Add additional regexes for new tools in process get_software_versions regexes = { 'nf-core/hic': ['v_pipeline.txt', r"(\S+)"], 'Nextflow': ['v_nextflow.txt', r"(\S+)"], - 'FastQC': ['v_fastqc.txt', r"FastQC v(\S+)"], + 'Bowtie2': ['v_bowtie2.txt', r"Bowtie2 v(\S+)"], + 'Python': ['v_python.txt', r"Python v(\S+)"], + 'Samtools': ['v_samtools.txt', r"Samtools v(\S+)"], 'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"], } results = OrderedDict() results['nf-core/hic'] = '<span style="color:#999999;\">N/A</span>' results['Nextflow'] = '<span style="color:#999999;\">N/A</span>' -results['FastQC'] = '<span style="color:#999999;\">N/A</span>' +results['Bowtie2'] = '<span style="color:#999999;\">N/A</span>' +results['Python'] = '<span style="color:#999999;\">N/A</span>' +results['Samtools'] = '<span style="color:#999999;\">N/A</span>' results['MultiQC'] = '<span style="color:#999999;\">N/A</span>' # Search each file using its regex diff --git a/conf/base.config b/conf/base.config index 156fa28b432d8742d4252580f30329a12dfd6819..28b467901007da4efaf10945d3c2000644f69d90 100644 --- a/conf/base.config +++ b/conf/base.config @@ -11,7 +11,7 @@ process { - // TODO nf-core: Check the defaults for all processes + // Check the defaults for all processes cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 8.GB * task.attempt, 'memory' ) } time = { check_max( 2.h * task.attempt, 'time' ) } diff --git a/docs/configuration/local.md b/docs/configuration/local.md index 9cd485e2cd60b7670e242f4a399fde8e15bdaf92..d4530fa9007866b32cf2dda77ed780c4fe19f1e8 100644 --- a/docs/configuration/local.md +++ b/docs/configuration/local.md @@ -10,6 +10,7 @@ Nextflow has [excellent integration](https://www.nextflow.io/docs/latest/docker. First, install docker on your system: [Docker Installation Instructions](https://docs.docker.com/engine/installation/) Then, simply run the analysis pipeline: + ```bash nextflow run nf-core/hic -profile docker --genome '<genome ID>' ``` diff --git a/docs/configuration/reference_genomes.md b/docs/configuration/reference_genomes.md index 1fafa8feeaaef766fa294b9d1de1bd32ba1f7dc2..c52faf821ad7e676ed56dc04c92aae165ea573c7 100644 --- a/docs/configuration/reference_genomes.md +++ b/docs/configuration/reference_genomes.md @@ -39,11 +39,12 @@ Multiple reference index types are held together with consistent structure for m We have put a copy of iGenomes up onto AWS S3 hosting and this pipeline is configured to use this by default. The hosting fees for AWS iGenomes are currently kindly funded by a grant from Amazon. The pipeline will automatically download the required reference files when you run the pipeline. -For more information about the AWS iGenomes, see https://ewels.github.io/AWS-iGenomes/ +For more information about the AWS iGenomes, see [AWS-iGenomes](https://ewels.github.io/AWS-iGenomes/) Downloading the files takes time and bandwidth, so we recommend making a local copy of the iGenomes resource. Once downloaded, you can customise the variable `params.igenomes_base` in your custom configuration file to point to the reference location. For example: + ```nextflow params.igenomes_base = '/path/to/data/igenomes/' ``` diff --git a/docs/installation.md b/docs/installation.md index 70c4a6d472f51c28925ab2bedf4f7ec4b468ad7e..9ac66d585871d374c90df1f14b2c192f2d24b7a8 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -74,7 +74,7 @@ Be warned of two important points about this default configuration: #### 3.1) Software deps: Docker First, install docker on your system: [Docker Installation Instructions](https://docs.docker.com/engine/installation/) -Then, running the pipeline with the option `-profile docker` tells Nextflow to enable Docker for this run. An image containing all of the software requirements will be automatically fetched and used from dockerhub (https://hub.docker.com/r/nfcore/hic). +Then, running the pipeline with the option `-profile docker` tells Nextflow to enable Docker for this run. An image containing all of the software requirements will be automatically fetched and used from [dockerhub](https://hub.docker.com/r/nfcore/hic). #### 3.1) Software deps: Singularity If you're not able to use Docker then [Singularity](http://singularity.lbl.gov/) is a great alternative. diff --git a/docs/output.md b/docs/output.md index f395dcd111f5c8c09371e84f0581297f326bed05..53c9c0c7c20b11e85acd758e4f7b157116ef2378 100644 --- a/docs/output.md +++ b/docs/output.md @@ -64,7 +64,7 @@ Short range interactions that are likely to be spurious ligation products can th The validPairs are stored using a simple tab-delimited text format ; -``` +```bash read name / chr_reads1 / pos_reads1 / strand_reads1 / chr_reads2 / pos_reads2 / strand_reads2 / fragment_size / res frag name R1 / res frag R2 / mapping qual R1 / mapping qual R2 [/ allele_specific_tag] ``` @@ -102,7 +102,7 @@ A contact map is defined by : Based on the observation that a contact map is symmetric and usually sparse, only non-zero values are stored for half of the matrix. The user can specified if the 'upper', 'lower' or 'complete' matrix has to be stored. The 'asis' option allows to store the contacts as they are observed from the valid pairs files. -``` +```bash A B 10 A C 23 B C 24 diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index e6772eb34bd66f12b8477547a1c3cc250d34f33d..e0f2d0774afa327390d3e3cb33c7c3b1e6c829fb 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -7,11 +7,11 @@ If only no file, only one input file , or only read one and not read two is pick 1. The path must be enclosed in quotes (`'` or `"`) 2. The path must have at least one `*` wildcard character. This is even if you are only running one paired end sample. 3. When using the pipeline with paired end data, the path must use `{1,2}` or `{R1,R2}` notation to specify read pairs. -4. If you are running Single end data make sure to specify `--singleEnd` +4. If you are running Single end data make sure to specify `--singleEnd` If the pipeline can't find your files then you will get the following error -``` +```bash ERROR ~ Cannot find any reads matching: *{1,2}.fastq.gz ``` diff --git a/docs/usage.md b/docs/usage.md index 4f6825eb506b05db2e73afc415863c6adcaef056..9b2bb6a5b160dbb19632188ee91c66e58ce58a21 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -7,69 +7,69 @@ * [Updating the pipeline](#updating-the-pipeline) * [Reproducibility](#reproducibility) * [Main arguments](#main-arguments) - * [`-profile`](#-profile-single-dash) - * [`awsbatch`](#awsbatch) - * [`conda`](#conda) - * [`docker`](#docker) - * [`singularity`](#singularity) - * [`test`](#test) - * [`--reads`](#--reads) - * [`--singleEnd`](#--singleend) + * [`-profile`](#-profile-single-dash) + * [`awsbatch`](#awsbatch) + * [`conda`](#conda) + * [`docker`](#docker) + * [`singularity`](#singularity) + * [`test`](#test) + * [`--reads`](#--reads) + * [`--singleEnd`](#--singleend) * [Reference genomes](#reference-genomes) - * [`--genome`](#--genome) - * [`--fasta`](#--fasta) - * [`--igenomesIgnore`](#--igenomesignore) - * [`--bwt2_index`](#--bwt2_index) - * [`--chromosome_size`](#--chromosome_size) - * [`--restriction_fragments`](#--restriction_fragments) + * [`--genome`](#--genome) + * [`--fasta`](#--fasta) + * [`--igenomesIgnore`](#--igenomesignore) + * [`--bwt2_index`](#--bwt2_index) + * [`--chromosome_size`](#--chromosome_size) + * [`--restriction_fragments`](#--restriction_fragments) * [Hi-C specific options](#hi-c-specific-options) - * [Reads mapping](#reads-mapping) - * [`--bwt2_opts_end2end`](#--bwt2_opts_end2end) - * [`--bwt2_opts_trimmed`](#--bwt2_opts_trimmed) - * [`--min_mapq`](#--min_mapq) - * [Digestion Hi-C](#digestion-hi-c) - * [`--restriction_site`](#--restriction_site) - * [`--ligation_site`](#--ligation_site) - * [`--min_restriction_fragment_size`](#--min_restriction_fragment_size) - * [`--max_restriction_fragment_size`](#--max_restriction_fragment_size) - * [`--min_insert_size`](#--min_insert_size) - * [`--max_insert_size`](#--max_insert_size) - * [DNase Hi-C](#dnase-hi-c) - * [`--dnase`](#--dnase) - * [Hi-C Processing](#hi-c-processing) - * [`--min_cis_dist`](#--min_cis_dist) - * [`--rm_singleton`](#--rm_singleton) - * [`--rm_dup`](#--rm_dup) - * [`--rm_multi`](#--rm_multi) - * [Genome-wide contact maps](#genome-wide-contact-maps) - * [`--bins_size`](#--bins_size) - * [`--ice_max_iter`](#--ice_max_iter) - * [`--ice_filer_low_count_perc`](#--ice_filer_low_count_perc) - * [`--ice_filer_high_count_perc`](#--ice_filer_high_count_perc) - * [`--ice_eps`](#--ice_eps) - * [Inputs/Outputs](#inputs-outputs) - * [`--splitFastq`](#--splitFastq) - * [`--saveReference`](#--saveReference) - * [`--saveAlignedIntermediates`](#--saveAlignedIntermediates) + * [Reads mapping](#reads-mapping) + * [`--bwt2_opts_end2end`](#--bwt2_opts_end2end) + * [`--bwt2_opts_trimmed`](#--bwt2_opts_trimmed) + * [`--min_mapq`](#--min_mapq) + * [Digestion Hi-C](#digestion-hi-c) + * [`--restriction_site`](#--restriction_site) + * [`--ligation_site`](#--ligation_site) + * [`--min_restriction_fragment_size`](#--min_restriction_fragment_size) + * [`--max_restriction_fragment_size`](#--max_restriction_fragment_size) + * [`--min_insert_size`](#--min_insert_size) + * [`--max_insert_size`](#--max_insert_size) + * [DNase Hi-C](#dnase-hi-c) + * [`--dnase`](#--dnase) + * [Hi-C Processing](#hi-c-processing) + * [`--min_cis_dist`](#--min_cis_dist) + * [`--rm_singleton`](#--rm_singleton) + * [`--rm_dup`](#--rm_dup) + * [`--rm_multi`](#--rm_multi) + * [Genome-wide contact maps](#genome-wide-contact-maps) + * [`--bins_size`](#--bins_size) + * [`--ice_max_iter`](#--ice_max_iter) + * [`--ice_filer_low_count_perc`](#--ice_filer_low_count_perc) + * [`--ice_filer_high_count_perc`](#--ice_filer_high_count_perc) + * [`--ice_eps`](#--ice_eps) + * [Inputs/Outputs](#inputs-outputs) + * [`--splitFastq`](#--splitFastq) + * [`--saveReference`](#--saveReference) + * [`--saveAlignedIntermediates`](#--saveAlignedIntermediates) * [Job resources](#job-resources) * [Automatic resubmission](#automatic-resubmission) * [Custom resource requests](#custom-resource-requests) * [AWS batch specific parameters](#aws-batch-specific-parameters) - * [`-awsbatch`](#-awsbatch) - * [`--awsqueue`](#--awsqueue) - * [`--awsregion`](#--awsregion) + * [`-awsbatch`](#-awsbatch) + * [`--awsqueue`](#--awsqueue) + * [`--awsregion`](#--awsregion) * [Other command line parameters](#other-command-line-parameters) - * [`--outdir`](#--outdir) - * [`--email`](#--email) - * [`-name`](#-name-single-dash) - * [`-resume`](#-resume-single-dash) - * [`-c`](#-c-single-dash) - * [`--custom_config_version`](#--custom_config_version) - * [`--max_memory`](#--max_memory) - * [`--max_time`](#--max_time) - * [`--max_cpus`](#--max_cpus) - * [`--plaintext_email`](#--plaintext_email) - * [`--multiqc_config`](#--multiqc_config) + * [`--outdir`](#--outdir) + * [`--email`](#--email) + * [`-name`](#-name-single-dash) + * [`-resume`](#-resume-single-dash) + * [`-c`](#-c-single-dash) + * [`--custom_config_version`](#--custom_config_version) + * [`--max_memory`](#--max_memory) + * [`--max_time`](#--max_time) + * [`--max_cpus`](#--max_cpus) + * [`--plaintext_email`](#--plaintext_email) + * [`--multiqc_config`](#--multiqc_config) ## General Nextflow info @@ -83,6 +83,7 @@ NXF_OPTS='-Xms1g -Xmx4g' ## Running the pipeline The typical command for running the pipeline is as follows: + ```bash nextflow run nf-core/hic --reads '*_R{1,2}.fastq.gz' -genome GRCh37 -profile docker ``` @@ -135,8 +136,6 @@ If `-profile` is not specified at all the pipeline will be run locally and expec * A profile with a complete configuration for automated testing * Includes links to test data so needs no other parameters -<!-- TODO nf-core: Document required command line parameters --> - ### `--reads` Use this to specify the location of your input FastQ files. For example: @@ -211,7 +210,8 @@ The bowtie2 indexes are required to run the Hi-C pipeline. If the `--bwt2_index` The Hi-C pipeline will also requires a two-columns text file with the chromosome name and its size (tab separated). If not specified, this file will be automatically created by the pipeline. In the latter case, the `--fasta` reference genome has to be specified. -``` + +```bash chr1 249250621 chr2 243199373 chr3 198022430 @@ -233,7 +233,7 @@ If not specified, this file will be automatically created by the pipeline. In th Finally, Hi-C experiments based on restriction enzyme digestion requires a BED file with coordinates of restriction fragments. -``` +```bash chr1 0 16007 HIC_chr1_1 0 + chr1 16007 24571 HIC_chr1_2 0 + chr1 24571 27981 HIC_chr1_3 0 + @@ -445,7 +445,7 @@ The `--splitFastq` option allows to automatically split input read pairs into ch If specified, annotation files automatically generated from the `--fasta` file are exported in the results folder. Default: false -``` +```bash --saveReference ``` @@ -453,7 +453,7 @@ If specified, annotation files automatically generated from the `--fasta` file a If specified, all intermediate mapping files are saved and exported in the results folder. Default: false -``` +```bash --saveReference ``` diff --git a/main.nf b/main.nf index bccbb0d121382893c819ecbad7e54c2136a902d1..17ff4d32ccb99f1c9f783376a9c05b73b6a1657e 100644 --- a/main.nf +++ b/main.nf @@ -11,7 +11,7 @@ def helpMessage() { - // TODO nf-core: Add to this help message with new command line parameters + // Add to this help message with new command line parameters log.info nfcoreHeader() log.info""" @@ -868,7 +868,7 @@ workflow.onComplete { email_fields['summary']['Nextflow Build'] = workflow.nextflow.build email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - // TODO nf-core: If not using MultiQC, strip out this code (including params.maxMultiqcEmailFileSize) + // If not using MultiQC, strip out this code (including params.maxMultiqcEmailFileSize) // On success try attach the multiqc report def mqc_report = null try { diff --git a/nextflow.config b/nextflow.config index a526e9eb077b2a1b40be644d5b1da4b694481a27..eba513985951add5b77fd942c9064026e05c2e07 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,7 +9,7 @@ params { // Workflow flags - // TODO nf-core: Specify your pipeline's command line flags + // Specify your pipeline's command line flags reads = "*{1,2}.fastq.gz" outdir = './results' genome = false