diff --git a/.travis.yml b/.travis.yml index b77371b03dae59b22b900792fe7f76b2d07faab1..2dd43f74a0f277205eeb13f0e52c970038c2c400 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,13 +8,12 @@ matrix: fast_finish: true before_install: - # PRs to master are only ok if coming from dev branch - - '[ $TRAVIS_PULL_REQUEST = "false" ] || [ $TRAVIS_BRANCH != "master" ] || ([ $TRAVIS_PULL_REQUEST_SLUG = $TRAVIS_REPO_SLUG ] && [ $TRAVIS_PULL_REQUEST_BRANCH = "dev" ])' + - '[ $TRAVIS_PULL_REQUEST = "false" ] || [ $TRAVIS_BRANCH != "master" ] || ([ $TRAVIS_PULL_REQUEST_SLUG = $TRAVIS_REPO_SLUG ] && ([ $TRAVIS_PULL_REQUEST_BRANCH = "dev" ] || [ $TRAVIS_PULL_REQUEST_BRANCH = "patch" ]))' # Pull the docker image first so the test doesn't wait for this - docker pull nfcore/hic:dev # Fake the tag locally so that the pipeline runs properly # Looks weird when this is :dev to :dev, but makes sense when testing code for a release (:dev to :1.0.1) - - docker tag nfcore/hic:dev nfcore/hic:1.1.0 + - docker tag nfcore/hic:dev nfcore/hic:dev install: # Install Nextflow @@ -30,7 +29,7 @@ install: - sudo apt-get install npm && npm install -g markdownlint-cli env: - - NXF_VER='0.32.0' # Specify a minimum NF version that should be tested and work + - NXF_VER='19.04.0' # Specify a minimum NF version that should be tested and work - NXF_VER='' # Plus: get the latest NF version and check that it works script: diff --git a/CHANGELOG.md b/CHANGELOG.md index 7142fd883baa866d8d117030139d6261ef11a2d7..aac514667058a584d1fba7e70cd1a61a4ebd7540 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,13 +1,13 @@ # nf-core/hic: Changelog -## v1.1dev +## v1.1.0 - 2019-10-15 * Support 'N' base motif in restriction/ligation sites -* Support multiple restriction enzymes/ligattion sites (comma separated) (#31) +* Support multiple restriction enzymes/ligattion sites (comma separated) ([#31](https://github.com/nf-core/hic/issues/31)) * Add --saveInteractionBAM option -* Add DOI (#29) -* Fix bug for reads extension _1/_2 (#30) -* Update manual (#28) +* Add DOI ([#29](https://github.com/nf-core/hic/issues/29)) +* Fix bug for reads extension _1/_2 ([#30](https://github.com/nf-core/hic/issues/30)) +* Update manual ([#28](https://github.com/nf-core/hic/issues/28)) ## v1.0 - 2019-05-06 diff --git a/Dockerfile b/Dockerfile index bd89e7eed206414d6257f30f7ed2ebd91fb23971..4714783d6d4c757834980a200f109612ab56cd48 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM nfcore/base +FROM nfcore/base:1.7 LABEL authors="Nicolas Servant" \ description="Docker image containing all requirements for nf-core/hic pipeline" @@ -7,4 +7,4 @@ RUN apt-get update && apt-get install -y gcc g++ && apt-get clean -y COPY environment.yml / RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/nf-core-hic-1.1.0dev/bin:$PATH +ENV PATH /opt/conda/envs/nf-core-hic-1.1.1dev/bin:$PATH diff --git a/README.md b/README.md index 2227cb44fc2348a16c7b48a509eb9dafb2aa3a54..def8c35381f7dc28007dbf53de53bbde7a093b43 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ **Analysis of Chromosome Conformation Capture data (Hi-C)**. [](https://travis-ci.com/nf-core/hic) -[](https://www.nextflow.io/) +[](https://www.nextflow.io/) [](http://bioconda.github.io/) [](https://hub.docker.com/r/nfcore/hic) @@ -15,7 +15,7 @@ This pipeline is based on the [HiC-Pro workflow](https://github.com/nservant/HiC-Pro). -It was designed to process Hi-C data from raw fastq files (paired-end Illumina +It was designed to process Hi-C data from raw FastQ files (paired-end Illumina data) to normalized contact maps. The current version supports most protocols, including digestion protocols as well as protocols that do not require restriction enzymes such as DNase Hi-C. @@ -39,24 +39,58 @@ sites (bowtie2) 6. Quality controls and report (MultiQC) 7. Addition export for visualisation and downstream analysis (cooler) +## Quick Start + +i. Install [`nextflow`](https://nf-co.re/usage/installation) + +ii. Install one of [`docker`](https://docs.docker.com/engine/installation/), +[`singularity`](https://www.sylabs.io/guides/3.0/user-guide/) or +[`conda`](https://conda.io/miniconda.html) + +iii. Download the pipeline and test it on a minimal dataset with a single command + +```bash +nextflow run nf-core/hic -profile test,<docker/singularity/conda/institute> +``` + +iv. Start running your own analysis! + +```bash +nextflow run nf-core/hic -profile <docker/singularity/conda/institute> --reads '*_R{1,2}.fastq.gz' --genome GRCh37 +``` + +See [usage docs](docs/usage.md) for all of the available options when running the pipeline. + ## Documentation The nf-core/hic pipeline comes with documentation about the pipeline, found in the `docs/` directory: -1. [Installation](docs/installation.md) +1. [Installation](https://nf-co.re/usage/installation) 2. Pipeline configuration - * [Local installation](docs/configuration/local.md) - * [Adding your own system](docs/configuration/adding_your_own.md) - * [Reference genomes](docs/configuration/reference_genomes.md) + * [Local installation](https://nf-co.re/usage/local_installation) + * [Adding your own system config](https://nf-co.re/usage/adding_own_config) + * [Reference genomes](https://nf-co.re/usage/reference_genomes) 3. [Running the pipeline](docs/usage.md) 4. [Output and how to interpret the results](docs/output.md) -5. [Troubleshooting](docs/troubleshooting.md) +5. [Troubleshooting](https://nf-co.re/usage/troubleshooting) + +## Contributions and Support + +If you would like to contribute to this pipeline, please see the +[contributing guidelines](.github/CONTRIBUTING.md). + +For further information or help, don't hesitate to get in touch on +[Slack](https://nfcore.slack.com/channels/hic). +You can join with [this invite](https://nf-co.re/join/slack). + ## Credits nf-core/hic was originally written by Nicolas Servant. +## Citation + If you use nf-core/hic for your analysis, please cite it using the following doi: [10.5281/zenodo.2669513](https://doi.org/10.5281/zenodo.2669513) diff --git a/conf/curie.config b/conf/curie.config deleted file mode 100644 index ab85a2d9d778ac3ca875a273e9bbcb7eb966253d..0000000000000000000000000000000000000000 --- a/conf/curie.config +++ /dev/null @@ -1,16 +0,0 @@ -singularity { - enabled = false -} - -process { - executor = 'pbs' - queue = params.queue - //beforeScript = 'export PATH=/bioinfo/pipelines/sandbox/dev/nfcore/rnaseq/modules/conda/envs/nf-core-rnaseq-1.2/bin:$PATH' -} - -params { - clusterOptions = false - max_memory = 128.GB - max_cpus = 4 - max_time = 240.h -} diff --git a/conf/multiqc_config.yaml b/conf/multiqc_config.yaml deleted file mode 100644 index f2a738c43be4dae15db5075017559607c66c0542..0000000000000000000000000000000000000000 --- a/conf/multiqc_config.yaml +++ /dev/null @@ -1,7 +0,0 @@ -report_comment: > - This report has been generated by the <a href="https://github.com/nf-core/hic" target="_blank">nf-core/hic</a> - analysis pipeline. For information about how to interpret these results, please see the - <a href="https://github.com/nf-core/hic" target="_blank">documentation</a>. -report_section_order: - nf-core/hic-software-versions: - order: -1000 diff --git a/conf/test.config b/conf/test.config index 592e3a40d8bce4cf22b5fe1ad9014ded48d439ce..00c47f85cd86d5d5a05ce3123024a37a9cc9e466 100644 --- a/conf/test.config +++ b/conf/test.config @@ -29,5 +29,5 @@ params { min_mapq = 0 // Options - skip_cool = true + skipCool = true } diff --git a/docs/README.md b/docs/README.md index d7dbdac40b9452baa3d7d7747d264077276fb679..e160867d029e09c793168dd764f8a0ea01dcbd59 100644 --- a/docs/README.md +++ b/docs/README.md @@ -2,11 +2,11 @@ The nf-core/hic documentation is split into the following files: -1. [Installation](installation.md) +1. [Installation](https://nf-co.re/usage/installation) 2. Pipeline configuration - * [Local installation](configuration/local.md) - * [Adding your own system](configuration/adding_your_own.md) - * [Reference genomes](configuration/reference_genomes.md) + * [Local installation](https://nf-co.re/usage/local_installation) + * [Adding your own system config](https://nf-co.re/usage/adding_own_config) + * [Reference genomes](https://nf-co.re/usage/reference_genomes) 3. [Running the pipeline](usage.md) 4. [Output and how to interpret the results](output.md) -5. [Troubleshooting](troubleshooting.md) +5. [Troubleshooting](https://nf-co.re/usage/troubleshooting) diff --git a/docs/configuration/adding_your_own.md b/docs/configuration/adding_your_own.md deleted file mode 100644 index b1703c1e10d064a0d989e2520642d6e77d742e66..0000000000000000000000000000000000000000 --- a/docs/configuration/adding_your_own.md +++ /dev/null @@ -1,126 +0,0 @@ -# nf-core/hic: Configuration for other clusters - -It is entirely possible to run this pipeline on other clusters, though you will -need to set up your own config file so that the pipeline knows how to work with -your cluster. - -> If you think that there are other people using the pipeline who would benefit -from your configuration (eg. other common cluster setups), please let us know. -We can add a new configuration and profile which can used by specifying -`-profile <name>` when running the pipeline. The config file will then be -hosted at `nf-core/configs` and will be pulled automatically before the pipeline -is executed. - -If you are the only person to be running this pipeline, you can create your -config file as `~/.nextflow/config` and it will be applied every time you run -Nextflow. Alternatively, save the file anywhere and reference it when running -the pipeline with `-c path/to/config` (see the -[Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) -for more). - -A basic configuration comes with the pipeline, which loads the -[`conf/base.config`](../../conf/base.config) by default. This means that you -only need to configure the specifics for your system and overwrite any defaults -that you want to change. - -## Cluster Environment - -By default, pipeline uses the `local` Nextflow executor - in other words, all -jobs are run in the login session. If you're using a simple server, this may be -fine. If you're using a compute cluster, this is bad as all jobs will run on -the head node. - -To specify your cluster environment, add the following line to your config -file: - -```nextflow -process.executor = 'YOUR_SYSTEM_TYPE' -``` - -Many different cluster types are supported by Nextflow. For more information, -please see the -[Nextflow documentation](https://www.nextflow.io/docs/latest/executor.html). - -Note that you may need to specify cluster options, such as a project or queue. -To do so, use the `clusterOptions` config option: - -```nextflow -process { - executor = 'SLURM' - clusterOptions = '-A myproject' -} -``` - -## Software Requirements - -To run the pipeline, several software packages are required. How you satisfy -these requirements is essentially up to you and depends on your system. -If possible, we _highly_ recommend using either Docker or Singularity. - -Please see the [`installation documentation`](../installation.md) for how to -run using the below as a one-off. These instructions are about configuring a -config file for repeated use. - -### Docker - -Docker is a great way to run nf-core/hic, as it manages all software -installations and allows the pipeline to be run in an identical software -environment across a range of systems. - -Nextflow has -[excellent integration](https://www.nextflow.io/docs/latest/docker.html) -with Docker, and beyond installing the two tools, not much else is required - -nextflow will automatically fetch the -[nfcore/hic](https://hub.docker.com/r/nfcore/hic/) image that we have created -and is hosted at dockerhub at run time. - -To add docker support to your own config file, add the following: - -```nextflow -docker.enabled = true -process.container = "nfcore/hic" -``` - -Note that the dockerhub organisation name annoyingly can't have a hyphen, -so is `nfcore` and not `nf-core`. - -### Singularity image - -Many HPC environments are not able to run Docker due to security issues. -[Singularity](http://singularity.lbl.gov/) is a tool designed to run on such -HPC systems which is very similar to Docker. - -To specify singularity usage in your pipeline config file, add the following: - -```nextflow -singularity.enabled = true -process.container = "shub://nf-core/hic" -``` - -If you intend to run the pipeline offline, nextflow will not be able to -automatically download the singularity image for you. -Instead, you'll have to do this yourself manually first, transfer the image -file and then point to that. - -First, pull the image file where you have an internet connection: - -```bash -singularity pull --name nf-core-hic.simg shub://nf-core/hic -``` - -Then transfer this file and point the config file to the image: - -```nextflow -singularity.enabled = true -process.container = "/path/to/nf-core-hic.simg" -``` - -### Conda - -If you're not able to use Docker or Singularity, you can instead use conda to -manage the software requirements. -To use conda in your own config file, add the following: - -```nextflow -process.conda = "$baseDir/environment.yml" -``` diff --git a/docs/configuration/local.md b/docs/configuration/local.md deleted file mode 100644 index c3a047fbd856182b6ce823f8f91548b1a2bccc8a..0000000000000000000000000000000000000000 --- a/docs/configuration/local.md +++ /dev/null @@ -1,76 +0,0 @@ -# nf-core/hic: Local Configuration - -If running the pipeline in a local environment, we highly recommend using -either Docker or Singularity. - -## Docker - -Docker is a great way to run `nf-core/hic`, as it manages all software -installations and allows the pipeline to be run in an identical software -environment across a range of systems. - -Nextflow has -[excellent integration](https://www.nextflow.io/docs/latest/docker.html) with -Docker, and beyond installing the two tools, not much else is required. -The `nf-core/hic` profile comes with a configuration profile for docker, making -it very easy to use. This also comes with the required presets to use the AWS -iGenomes resource, meaning that if using common reference genomes you just -specify the reference ID and it will be automatically downloaded from AWS S3. - -First, install docker on your system: -[Docker Installation Instructions](https://docs.docker.com/engine/installation/) - -Then, simply run the analysis pipeline: - -```bash -nextflow run nf-core/hic -profile docker --genome '<genome ID>' -``` - -Nextflow will recognise `nf-core/hic` and download the pipeline from GitHub. -The `-profile docker` configuration lists the -[nf-core/hic](https://hub.docker.com/r/nfcore/hic/) image that we have created -and is hosted at dockerhub, and this is downloaded. - -For more information about how to work with reference genomes, see -[`docs/configuration/reference_genomes.md`](reference_genomes.md). - -### Pipeline versions - -The public docker images are tagged with the same version numbers as the code, -which you can use to ensure reproducibility. When running the pipeline, -specify the pipeline version with `-r`, for example `-r 1.0`. This uses -pipeline code and docker image from this tagged version. - -## Singularity image - -Many HPC environments are not able to run Docker due to security issues. -[Singularity](http://singularity.lbl.gov/) is a tool designed to run on such -HPC systems which is very similar to Docker. Even better, it can use create -images directly from dockerhub. - -To use the singularity image for a single run, use `-with-singularity`. -This will download the docker container from dockerhub and create a singularity -image for you dynamically. - -If you intend to run the pipeline offline, nextflow will not be able to -automatically download the singularity image for you. Instead, you'll have -to do this yourself manually first, transfer the image file and then point to -that. - -First, pull the image file where you have an internet connection: - -> NB: The "tag" at the end of this command corresponds to the pipeline version. -> Here, we're pulling the docker image for version 1.0 of the nf-core/hic -pipeline -> Make sure that this tag corresponds to the version of the pipeline that -you're using - -```bash -singularity pull --name nf-core-hic-1.0.img docker://nf-core/hic:1.0 -``` - -Then transfer this file and run the pipeline with this path: - -```bash -nextflow run /path/to/nf-core-hic -with-singularity /path/to/nf-core-hic-1.0.img -``` diff --git a/docs/configuration/reference_genomes.md b/docs/configuration/reference_genomes.md deleted file mode 100644 index d584c0c8e0c9000b150406665f7d2edc33615edf..0000000000000000000000000000000000000000 --- a/docs/configuration/reference_genomes.md +++ /dev/null @@ -1,68 +0,0 @@ -# nf-core/hic: Reference Genomes Configuration - -The nf-core/hic pipeline needs a reference genome for alignment and annotation. - -These paths can be supplied on the command line at run time (see the -[usage docs](../usage.md)), -but for convenience it's often better to save these paths in a nextflow config -file. -See below for instructions on how to do this. -Read [Adding your own system](adding_your_own.md) to find out how to set up -custom config files. - -## Adding paths to a config file - -Specifying long paths every time you run the pipeline is a pain. -To make this easier, the pipeline comes configured to understand reference -genome keywords which correspond to preconfigured paths, meaning that you can -just specify `--genome ID` when running the pipeline. - -Note that this genome key can also be specified in a config file if you always -use the same genome. - -To use this system, add paths to your config file using the following template: - -```nextflow -params { - genomes { - 'YOUR-ID' { - fasta = '<PATH TO FASTA FILE>/genome.fa' - } - 'OTHER-GENOME' { - // [..] - } - } - // Optional - default genome. Ignored if --genome 'OTHER-GENOME' specified - // on command line - genome = 'YOUR-ID' -} -``` - -You can add as many genomes as you like as long as they have unique IDs. - -## illumina iGenomes - -To make the use of reference genomes easier, illumina has developed a -centralised resource called -[iGenomes](https://support.illumina.com/sequencing/sequencing_software/igenome.html). -Multiple reference index types are held together with consistent structure for -multiple genomes. - -We have put a copy of iGenomes up onto AWS S3 hosting and this pipeline is -configured to use this by default. -The hosting fees for AWS iGenomes are currently kindly funded by a grant from -Amazon. -The pipeline will automatically download the required reference files when you -run the pipeline. -For more information about the AWS iGenomes, see -[AWS-iGenomes](https://ewels.github.io/AWS-iGenomes/) - -Downloading the files takes time and bandwidth, so we recommend making a local -copy of the iGenomes resource. -Once downloaded, you can customise the variable `params.igenomes_base` in your -custom configuration file to point to the reference location. -For example: - -```nextflow -params.igenomes_base = '/path/to/data/igenomes/' -``` diff --git a/docs/installation.md b/docs/installation.md deleted file mode 100644 index c3dc01893dd774c1cf5925ddbf441a26fdc24f93..0000000000000000000000000000000000000000 --- a/docs/installation.md +++ /dev/null @@ -1,148 +0,0 @@ -# nf-core/hic: Installation - -To start using the nf-core/hic pipeline, follow the steps below: - -1. [Install Nextflow](#1-install-nextflow) -2. [Install the pipeline](#2-install-the-pipeline) - * [Automatic](#21-automatic) - * [Offline](#22-offline) - * [Development](#23-development) -3. [Pipeline configuration](#3-pipeline-configuration) - * [Software deps: Docker and Singularity](#31-software-deps-docker-and-singularity) - * [Software deps: Bioconda](#32-software-deps-bioconda) - * [Configuration profiles](#33-configuration-profiles) -4. [Reference genomes](#4-reference-genomes) - -## 1) Install NextFlow - -Nextflow runs on most POSIX systems (Linux, Mac OSX etc). It can be installed -by running the following commands: - -```bash -# Make sure that Java v8+ is installed: -java -version - -# Install Nextflow -curl -fsSL get.nextflow.io | bash - -# Add Nextflow binary to your PATH: -mv nextflow ~/bin/ -# OR system-wide installation: -# sudo mv nextflow /usr/local/bin -``` - -See [nextflow.io](https://www.nextflow.io/) for further instructions on how to -install and configure Nextflow. - -## 2) Install the pipeline - -### 2.1) Automatic - -This pipeline itself needs no installation - NextFlow will automatically fetch -it from GitHub if `nf-core/hic` is specified as the pipeline name. - -### 2.2) Offline - -The above method requires an internet connection so that Nextflow can download -the pipeline files. If you're running on a system that has no internet -connection, you'll need to download and transfer the pipeline files manually: - -```bash -wget https://github.com/nf-core/hic/archive/master.zip -mkdir -p ~/my-pipelines/nf-core/ -unzip master.zip -d ~/my-pipelines/nf-core/ -cd ~/my_data/ -nextflow run ~/my-pipelines/nf-core/hic-master -``` - -To stop nextflow from looking for updates online, you can tell it to run in -offline mode by specifying the following environment variable in your -~/.bashrc file: - -```bash -export NXF_OFFLINE='TRUE' -``` - -### 2.3) Development - -If you would like to make changes to the pipeline, it's best to make a fork on -GitHub and then clone the files. Once cloned you can run the pipeline directly -as above. - -## 3) Pipeline configuration - -By default, the pipeline loads a basic server configuration -[`conf/base.config`](../conf/base.config) -This uses a number of sensible defaults for process requirements and is -suitable for running on a simple (if powerful!) local server. - -Be warned of two important points about this default configuration: - -1. The default profile uses the `local` executor - * All jobs are run in the login session. If you're using a simple server, -this may be fine. If you're using a compute cluster, this is bad as all jobs -will run on the head node. - * See the -[nextflow docs](https://www.nextflow.io/docs/latest/executor.html) for -information about running with other hardware backends. Most job scheduler -systems are natively supported. -2. Nextflow will expect all software to be installed and available on the -`PATH` - * It's expected to use an additional config profile for docker, singularity -or conda support. See below. - -### 3.1) Software deps: Docker - -First, install docker on your system: -[Docker Installation Instructions](https://docs.docker.com/engine/installation/) - -Then, running the pipeline with the option `-profile docker` tells Nextflow to -enable Docker for this run. An image containing all of the software -requirements will be automatically fetched and used from -[dockerhub](https://hub.docker.com/r/nfcore/hic). - -### 3.1) Software deps: Singularity - -If you're not able to use Docker then -[Singularity](http://singularity.lbl.gov/) is a great alternative. -The process is very similar: running the pipeline with the option -`-profile singularity` tells Nextflow to enable singularity for this run. -An image containing all of the software requirements will be automatically -fetched and used from singularity hub. - -If running offline with Singularity, you'll need to download and transfer the -Singularity image first: - -```bash -singularity pull --name nf-core-hic.simg shub://nf-core/hic -``` - -Once transferred, use `-with-singularity` and specify the path to the image -file: - -```bash -nextflow run /path/to/nf-core-hic -with-singularity nf-core-hic.simg -``` - -Remember to pull updated versions of the singularity image if you update the -pipeline. - -### 3.2) Software deps: conda - -If you're not able to use Docker _or_ Singularity, you can instead use conda to -manage the software requirements. -This is slower and less reproducible than the above, but is still better than -having to install all requirements yourself! -The pipeline ships with a conda environment file and nextflow has built-in -support for this. -To use it first ensure that you have conda installed (we recommend -[miniconda](https://conda.io/miniconda.html)), then follow the same pattern -as above and use the flag `-profile conda` - -### 3.3) Configuration profiles - -See [`docs/configuration/adding_your_own.md`](configuration/adding_your_own.md) - -## 4) Reference genomes - -See [`docs/configuration/reference_genomes.md`](configuration/reference_genomes.md) diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md deleted file mode 100644 index df43e8a755881b646991815e75e159069972c459..0000000000000000000000000000000000000000 --- a/docs/troubleshooting.md +++ /dev/null @@ -1,43 +0,0 @@ -# nf-core/hic: Troubleshooting - -## Input files not found - -If only no file, only one input file , or only read one and not read two is -picked up then something is wrong with your input file declaration - -1. The path must be enclosed in quotes (`'` or `"`) -2. The path must have at least one `*` wildcard character. This is even if -you are only running one paired end sample. -3. When using the pipeline with paired end data, the path must use `{1,2}` or -`{R1,R2}` notation to specify read pairs. -4. If you are running Single end data make sure to specify `--singleEnd` - -If the pipeline can't find your files then you will get the following error - -```bash -ERROR ~ Cannot find any reads matching: *{1,2}.fastq.gz -``` - -Note that if your sample name is "messy" then you have to be very particular -with your glob specification. A file name like -`L1-1-D-2h_S1_L002_R1_001.fastq.gz` can be difficult enough for a human to -read. Specifying `*{1,2}*.gz` wont work whilst `*{R1,R2}*.gz` will. - -## Data organization - -The pipeline can't take a list of multiple input files - it takes a glob -expression. If your input files are scattered in different paths then we -recommend that you generate a directory with symlinked files. If running -in paired end mode please make sure that your files are sensibly named so -that they can be properly paired. See the previous point. - -## Extra resources and getting help - -If you still have an issue with running the pipeline then feel free to -contact us. -Have a look at the [pipeline website](https://github.com/nf-core/hic) to -find out how. - -If you have problems that are related to Nextflow and not our pipeline then -check out the [Nextflow gitter channel](https://gitter.im/nextflow-io/nextflow) -or the [google group](https://groups.google.com/forum/#!forum/nextflow). diff --git a/docs/usage.md b/docs/usage.md index 57f1e3edb293ca20830c41c145f06eb03c5e5d30..f1cd3a56a220a077cdd835c4d1e8e87e13cf726e 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -53,10 +53,10 @@ * [`--saveAlignedIntermediates`](#--saveAlignedIntermediates) * [`--saveInteractionBAM`](#--saveInteractionBAM) * [Skip options](#skip-options) - * [--skip_maps](#--skip_maps) - * [--skip_ice](#--skip_ice) - * [--skip_cool](#--skip_cool) - * [--skip_multiqc](#--skip_multiqc) + * [--skipMaps](#--skipMaps) + * [--skipIce](#--skipIce) + * [--skipCool](#--skipCool) + * [--skipMultiQC](#--skipMultiQC) * [Job resources](#job-resources) * [Automatic resubmission](#automatic-resubmission) * [Custom resource requests](#custom-resource-requests) @@ -561,38 +561,38 @@ dangling end, self-circle, etc.) and its tags. ## Skip options -### `--skip_maps` +### `--skipMaps` If defined, the workflow stops with the list of valid interactions, and the genome-wide maps are not built. Usefult for capture-C analysis. Default: false ```bash ---skip_maps +--skipMaps ``` -### `--skip_ice` +### `--skipIce` If defined, the ICE normalization is not run on the raw contact maps. Default: false ```bash ---skip_ice +--skipIce ``` -### `--skip_cool` +### `--skipCool` If defined, cooler files are not generated. Default: false ```bash ---skip_cool +--skipCool ``` -### `--skip_multiqc` +### `--skipMultiQC` If defined, the MultiQC report is not generated. Default: false ```bash ---skip_multiqc +--skipMultiQC ``` ## Job resources diff --git a/environment.yml b/environment.yml index 1a69ba7ac34662b5d0c7f808d50d7dd5b86899c3..271f3f5273246d63738d15c6f49cc7c07d1fdb94 100644 --- a/environment.yml +++ b/environment.yml @@ -1,10 +1,9 @@ # You can use this file to create a conda environment for this pipeline: # conda env create -f environment.yml -name: nf-core-hic-1.1.0 +name: nf-core-hic-1.1.1dev channels: - conda-forge - bioconda - - bioconda/label/cf201901 - defaults dependencies: - python=2.7.15 diff --git a/main.nf b/main.nf index 2b8c2ecd67c164e95e61ae2451d2e0bf40f4d8f5..e531cb85226cf6615236624120e94ff39bd8d5c3 100644 --- a/main.nf +++ b/main.nf @@ -26,7 +26,7 @@ def helpMessage() { -profile Configuration profile to use. Can use multiple (comma separated) Available: conda, docker, singularity, awsbatch, test and more. - References: If not specified in the configuration file or you wish to overwrite any of the references. + References If not specified in the configuration file or you wish to overwrite any of the references. --genome Name of iGenomes reference --bwt2_index Path to Bowtie2 index --fasta Path to Fasta reference @@ -35,12 +35,17 @@ def helpMessage() { --saveReference Save reference genome to output folder. Default: False --saveAlignedIntermediates Save intermediates alignment files. Default: False - Options: + Alignments --bwt2_opts_end2end Options for bowtie2 end-to-end mappinf (first mapping step). See hic.config for default. --bwt2_opts_trimmed Options for bowtie2 mapping after ligation site trimming. See hic.config for default. --min_mapq Minimum mapping quality values to consider. Default: 10 --restriction_site Cutting motif(s) of restriction enzyme(s) (comma separated). Default: 'A^AGCTT' --ligation_site Ligation motifs to trim (comma separated). Default: 'AAGCTAGCTT' + --rm_singleton Remove singleton reads. Default: true + --rm_multi Remove multi-mapped reads. Default: true + --rm_dup Remove duplicates. Default: true + + Contacts calling --min_restriction_fragment_size Minimum size of restriction fragments to consider. Default: None --max_restriction_fragment_size Maximum size of restriction fragments to consider. Default: None --min_insert_size Minimum insert size of mapped reads to consider. Default: None @@ -48,32 +53,29 @@ def helpMessage() { --saveInteractionBAM Save BAM file with interaction tags (dangling-end, self-circle, etc.). Default: False --dnase Run DNase Hi-C mode. All options related to restriction fragments are not considered. Default: False - --min_cis_dist Minimum intra-chromosomal distance to consider. Default: None - --rm_singleton Remove singleton reads. Default: true - --rm_multi Remove multi-mapped reads. Default: true - --rm_dup Remove duplicates. Default: true + Contact maps --bin_size Bin size for contact maps (comma separated). Default: '1000000,500000' --ice_max_iter Maximum number of iteration for ICE normalization. Default: 100 --ice_filter_low_count_perc Percentage of low counts columns/rows to filter before ICE normalization. Default: 0.02 --ice_filter_high_count_perc Percentage of high counts columns/rows to filter before ICE normalization. Default: 0 --ice_eps Convergence criteria for ICE normalization. Default: 0.1 - Other options: + + Workflow + --skipMaps Skip generation of contact maps. Useful for capture-C. Default: False + --skipIce Skip ICE normalization. Default: False + --skipCool Skip generation of cool files. Default: False + --skipMultiQC Skip MultiQC. Default: False + + Other --splitFastq Size of read chuncks to use to speed up the workflow. Default: None --outdir The output directory where the results will be saved. Default: './results' --email Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. Default: None -name Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic. Default: None - Step options: - - --skip_maps Skip generation of contact maps. Useful for capture-C. Default: False - --skip_ice Skip ICE normalization. Default: False - --skip_cool Skip generation of cool files. Default: False - --skip_multiQC Skip MultiQC. Default: False - - AWSBatch options: + AWSBatch --awsqueue The AWSBatch JobQueue that needs to be set when running on AWSBatch --awsregion The AWS Region for your AWS Batch job to run on """.stripIndent() @@ -83,7 +85,7 @@ def helpMessage() { * SET UP CONFIGURATION VARIABLES */ -// Show help emssage +// Show help message if (params.help){ helpMessage() exit 0 @@ -243,6 +245,12 @@ summary['Restriction Motif']= params.restriction_site summary['Ligation Motif'] = params.ligation_site summary['DNase Mode'] = params.dnase summary['Remove Dup'] = params.rm_dup +summary['Min MAPQ'] = params.min_mapq +summary['Min Fragment Size']= params.min_restriction_fragment_size +summary['Max Fragment Size']= params.max_restriction_framgnet_size +summary['Min Insert Size'] = params.min_insert_size +summary['Max Insert Size'] = params.max_insert_size +summary['Min CIS dist'] = params.min_cis_dist summary['Maps resolution'] = params.bin_size summary['Max Memory'] = params.max_memory @@ -273,7 +281,7 @@ if(params.email) { summary['MultiQC maxsize'] = params.maxMultiqcEmailFileSize } log.info summary.collect { k,v -> "${k.padRight(18)}: $v" }.join("\n") -log.info "\033[2m----------------------------------------------------\033[0m" +log.info "-\033[2m--------------------------------------------------\033[0m-" // Check the hostnames against configured profiles checkHostname() @@ -403,36 +411,36 @@ process bowtie2_end_to_end { saveAs: { params.saveAlignedIntermediates ? it : null }, mode: 'copy' input: - set val(sample), file(reads) from raw_reads - file index from bwt2_index_end2end.collect() + set val(sample), file(reads) from raw_reads + file index from bwt2_index_end2end.collect() output: - set val(prefix), file("${prefix}_unmap.fastq") into unmapped_end_to_end - set val(prefix), file("${prefix}.bam") into end_to_end_bam + set val(prefix), file("${prefix}_unmap.fastq") into unmapped_end_to_end + set val(prefix), file("${prefix}.bam") into end_to_end_bam script: - prefix = reads.toString() - ~/(\.fq)?(\.fastq)?(\.gz)?$/ - def bwt2_opts = params.bwt2_opts_end2end - - if (!params.dnase){ - """ - bowtie2 --rg-id BMG --rg SM:${prefix} \\ - ${bwt2_opts} \\ - -p ${task.cpus} \\ - -x ${index}/${bwt2_base} \\ - --un ${prefix}_unmap.fastq \\ - -U ${reads} | samtools view -F 4 -bS - > ${prefix}.bam - """ - }else{ - """ - bowtie2 --rg-id BMG --rg SM:${prefix} \\ - ${bwt2_opts} \\ - -p ${task.cpus} \\ - -x ${index}/${bwt2_base} \\ - --un ${prefix}_unmap.fastq \\ - -U ${reads} > ${prefix}.bam - """ - } + prefix = reads.toString() - ~/(\.fq)?(\.fastq)?(\.gz)?$/ + def bwt2_opts = params.bwt2_opts_end2end + + if (!params.dnase){ + """ + bowtie2 --rg-id BMG --rg SM:${prefix} \\ + ${bwt2_opts} \\ + -p ${task.cpus} \\ + -x ${index}/${bwt2_base} \\ + --un ${prefix}_unmap.fastq \\ + -U ${reads} | samtools view -F 4 -bS - > ${prefix}.bam + """ + }else{ + """ + bowtie2 --rg-id BMG --rg SM:${prefix} \\ + ${bwt2_opts} \\ + -p ${task.cpus} \\ + -x ${index}/${bwt2_base} \\ + --un ${prefix}_unmap.fastq \\ + -U ${reads} > ${prefix}.bam + """ + } } process trim_reads { @@ -441,20 +449,20 @@ process trim_reads { saveAs: { params.saveAlignedIntermediates ? it : null }, mode: 'copy' when: - !params.dnase + !params.dnase input: - set val(prefix), file(reads) from unmapped_end_to_end + set val(prefix), file(reads) from unmapped_end_to_end output: - set val(prefix), file("${prefix}_trimmed.fastq") into trimmed_reads + set val(prefix), file("${prefix}_trimmed.fastq") into trimmed_reads script: - """ - cutsite_trimming --fastq $reads \\ - --cutsite ${params.ligation_site} \\ - --out ${prefix}_trimmed.fastq - """ + """ + cutsite_trimming --fastq $reads \\ + --cutsite ${params.ligation_site} \\ + --out ${prefix}_trimmed.fastq + """ } process bowtie2_on_trimmed_reads { @@ -463,24 +471,24 @@ process bowtie2_on_trimmed_reads { saveAs: { params.saveAlignedIntermediates ? it : null }, mode: 'copy' when: - !params.dnase + !params.dnase input: - set val(prefix), file(reads) from trimmed_reads - file index from bwt2_index_trim.collect() + set val(prefix), file(reads) from trimmed_reads + file index from bwt2_index_trim.collect() output: - set val(prefix), file("${prefix}_trimmed.bam") into trimmed_bam + set val(prefix), file("${prefix}_trimmed.bam") into trimmed_bam script: - prefix = reads.toString() - ~/(_trimmed)?(\.fq)?(\.fastq)?(\.gz)?$/ - """ - bowtie2 --rg-id BMG --rg SM:${prefix} \\ - ${params.bwt2_opts_trimmed} \\ - -p ${task.cpus} \\ - -x ${index}/${bwt2_base} \\ - -U ${reads} | samtools view -bS - > ${prefix}_trimmed.bam - """ + prefix = reads.toString() - ~/(_trimmed)?(\.fq)?(\.fastq)?(\.gz)?$/ + """ + bowtie2 --rg-id BMG --rg SM:${prefix} \\ + ${params.bwt2_opts_trimmed} \\ + -p ${task.cpus} \\ + -x ${index}/${bwt2_base} \\ + -U ${reads} | samtools view -bS - > ${prefix}_trimmed.bam + """ } if (!params.dnase){ @@ -490,39 +498,38 @@ if (!params.dnase){ saveAs: { params.saveAlignedIntermediates ? it : null }, mode: 'copy' input: - set val(prefix), file(bam1), file(bam2) from end_to_end_bam.join( trimmed_bam ) + set val(prefix), file(bam1), file(bam2) from end_to_end_bam.join( trimmed_bam ) output: - set val(sample), file("${prefix}_bwt2merged.bam") into bwt2_merged_bam - set val(oname), file("${prefix}.mapstat") into all_mapstat + set val(sample), file("${prefix}_bwt2merged.bam") into bwt2_merged_bam + set val(oname), file("${prefix}.mapstat") into all_mapstat script: - sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2|_1$|_2)/ - tag = prefix.toString() =~/_R1|_val_1|_1/ ? "R1" : "R2" - oname = prefix.toString() - ~/(\.[0-9]+)$/ - - """ - samtools merge -@ ${task.cpus} \\ - -f ${prefix}_bwt2merged.bam \\ - ${bam1} ${bam2} + sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2|_1$|_2)/ + tag = prefix.toString() =~/_R1|_val_1|_1/ ? "R1" : "R2" + oname = prefix.toString() - ~/(\.[0-9]+)$/ + """ + samtools merge -@ ${task.cpus} \\ + -f ${prefix}_bwt2merged.bam \\ + ${bam1} ${bam2} - samtools sort -@ ${task.cpus} -m 800M \\ + samtools sort -@ ${task.cpus} -m 800M \\ -n -T /tmp/ \\ -o ${prefix}_bwt2merged.sorted.bam \\ ${prefix}_bwt2merged.bam - mv ${prefix}_bwt2merged.sorted.bam ${prefix}_bwt2merged.bam - - echo "## ${prefix}" > ${prefix}.mapstat - echo -n "total_${tag}\t" >> ${prefix}.mapstat - samtools view -c ${prefix}_bwt2merged.bam >> ${prefix}.mapstat - echo -n "mapped_${tag}\t" >> ${prefix}.mapstat - samtools view -c -F 4 ${prefix}_bwt2merged.bam >> ${prefix}.mapstat - echo -n "global_${tag}\t" >> ${prefix}.mapstat - samtools view -c -F 4 ${bam1} >> ${prefix}.mapstat - echo -n "local_${tag}\t" >> ${prefix}.mapstat - samtools view -c -F 4 ${bam2} >> ${prefix}.mapstat - """ + mv ${prefix}_bwt2merged.sorted.bam ${prefix}_bwt2merged.bam + + echo "## ${prefix}" > ${prefix}.mapstat + echo -n "total_${tag}\t" >> ${prefix}.mapstat + samtools view -c ${prefix}_bwt2merged.bam >> ${prefix}.mapstat + echo -n "mapped_${tag}\t" >> ${prefix}.mapstat + samtools view -c -F 4 ${prefix}_bwt2merged.bam >> ${prefix}.mapstat + echo -n "global_${tag}\t" >> ${prefix}.mapstat + samtools view -c -F 4 ${bam1} >> ${prefix}.mapstat + echo -n "local_${tag}\t" >> ${prefix}.mapstat + samtools view -c -F 4 ${bam2} >> ${prefix}.mapstat + """ } }else{ process dnase_mapping_stats{ @@ -531,27 +538,26 @@ if (!params.dnase){ saveAs: { params.saveAlignedIntermediates ? it : null }, mode: 'copy' input: - set val(prefix), file(bam1) from end_to_end_bam + set val(prefix), file(bam1) from end_to_end_bam output: - set val(sample), file(bam1) into bwt2_merged_bam - set val(oname), file("${prefix}.mapstat") into all_mapstat + set val(sample), file(bam1) into bwt2_merged_bam + set val(oname), file("${prefix}.mapstat") into all_mapstat script: - sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2|_1|_2)/ - tag = prefix.toString() =~/_R1|_val_1|_1/ ? "R1" : "R2" - oname = prefix.toString() - ~/(\.[0-9]+)$/ - - """ - echo "## ${prefix}" > ${prefix}.mapstat - echo -n "total_${tag}\t" >> ${prefix}.mapstat - samtools view -c ${bam1} >> ${prefix}.mapstat - echo -n "mapped_${tag}\t" >> ${prefix}.mapstat - samtools view -c -F 4 ${bam1} >> ${prefix}.mapstat - echo -n "global_${tag}\t" >> ${prefix}.mapstat - samtools view -c -F 4 ${bam1} >> ${prefix}.mapstat - echo -n "local_${tag}\t0" >> ${prefix}.mapstat - """ + sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2|_1|_2)/ + tag = prefix.toString() =~/_R1|_val_1|_1/ ? "R1" : "R2" + oname = prefix.toString() - ~/(\.[0-9]+)$/ + """ + echo "## ${prefix}" > ${prefix}.mapstat + echo -n "total_${tag}\t" >> ${prefix}.mapstat + samtools view -c ${bam1} >> ${prefix}.mapstat + echo -n "mapped_${tag}\t" >> ${prefix}.mapstat + samtools view -c -F 4 ${bam1} >> ${prefix}.mapstat + echo -n "global_${tag}\t" >> ${prefix}.mapstat + samtools view -c -F 4 ${bam1} >> ${prefix}.mapstat + echo -n "local_${tag}\t0" >> ${prefix}.mapstat + """ } } @@ -563,26 +569,26 @@ process combine_mapped_files{ saveAs: {filename -> filename.indexOf(".pairstat") > 0 ? "stats/$filename" : "$filename"} input: - set val(sample), file(aligned_bam) from bwt2_merged_bam.groupTuple() + set val(sample), file(aligned_bam) from bwt2_merged_bam.groupTuple() output: - set val(sample), file("${sample}_bwt2pairs.bam") into paired_bam - set val(oname), file("*.pairstat") into all_pairstat + set val(sample), file("${sample}_bwt2pairs.bam") into paired_bam + set val(oname), file("*.pairstat") into all_pairstat script: - r1_bam = aligned_bam[0] - r1_prefix = r1_bam.toString() - ~/_bwt2merged.bam$/ - r2_bam = aligned_bam[1] - r2_prefix = r2_bam.toString() - ~/_bwt2merged.bam$/ - oname = sample.toString() - ~/(\.[0-9]+)$/ - - def opts = "-t" - opts = params.rm_singleton ? "${opts}" : "--single ${opts}" - opts = params.rm_multi ? "${opts}" : "--multi ${opts}" - if ("$params.min_mapq".isInteger()) opts="${opts} -q ${params.min_mapq}" - """ - mergeSAM.py -f ${r1_bam} -r ${r2_bam} -o ${sample}_bwt2pairs.bam ${opts} - """ + r1_bam = aligned_bam[0] + r1_prefix = r1_bam.toString() - ~/_bwt2merged.bam$/ + r2_bam = aligned_bam[1] + r2_prefix = r2_bam.toString() - ~/_bwt2merged.bam$/ + oname = sample.toString() - ~/(\.[0-9]+)$/ + + def opts = "-t" + opts = params.rm_singleton ? "${opts}" : "--single ${opts}" + opts = params.rm_multi ? "${opts}" : "--multi ${opts}" + if ("$params.min_mapq".isInteger()) opts="${opts} -q ${params.min_mapq}" + """ + mergeSAM.py -f ${r1_bam} -r ${r2_bam} -o ${sample}_bwt2pairs.bam ${opts} + """ } @@ -597,34 +603,33 @@ if (!params.dnase){ saveAs: {filename -> filename.indexOf("*stat") > 0 ? "stats/$filename" : "$filename"} input: - set val(sample), file(pe_bam) from paired_bam - file frag_file from res_frag_file.collect() + set val(sample), file(pe_bam) from paired_bam + file frag_file from res_frag_file.collect() output: - set val(sample), file("*.validPairs") into valid_pairs - set val(sample), file("*.validPairs") into valid_pairs_4cool - set val(sample), file("*.DEPairs") into de_pairs - set val(sample), file("*.SCPairs") into sc_pairs - set val(sample), file("*.REPairs") into re_pairs - set val(sample), file("*.FiltPairs") into filt_pairs - set val(sample), file("*RSstat") into all_rsstat + set val(sample), file("*.validPairs") into valid_pairs + set val(sample), file("*.validPairs") into valid_pairs_4cool + set val(sample), file("*.DEPairs") into de_pairs + set val(sample), file("*.SCPairs") into sc_pairs + set val(sample), file("*.REPairs") into re_pairs + set val(sample), file("*.FiltPairs") into filt_pairs + set val(sample), file("*RSstat") into all_rsstat script: - if (params.splitFastq){ - sample = sample.toString() - ~/(\.[0-9]+)$/ - } - - def opts = "" - if ("$params.min_cis_dist".isInteger()) opts="${opts} -d ${params.min_cis_dist}" - if ("$params.min_insert_size".isInteger()) opts="${opts} -s ${params.min_insert_size}" - if ("$params.max_insert_size".isInteger()) opts="${opts} -l ${params.max_insert_size}" - if ("$params.min_restriction_fragment_size".isInteger()) opts="${opts} -t ${params.min_restriction_fragment_size}" - if ("$params.max_restriction_fragment_size".isInteger()) opts="${opts} -m ${params.max_restriction_fragment_size}" - if (params.saveInteractionBAM) opts="${opts} --sam" - - """ - mapped_2hic_fragments.py -f ${frag_file} -r ${pe_bam} --all ${opts} - """ + if (params.splitFastq){ + sample = sample.toString() - ~/(\.[0-9]+)$/ + } + + def opts = "" + if ("$params.min_cis_dist".isInteger()) opts="${opts} -d ${params.min_cis_dist}" + if ("$params.min_insert_size".isInteger()) opts="${opts} -s ${params.min_insert_size}" + if ("$params.max_insert_size".isInteger()) opts="${opts} -l ${params.max_insert_size}" + if ("$params.min_restriction_fragment_size".isInteger()) opts="${opts} -t ${params.min_restriction_fragment_size}" + if ("$params.max_restriction_fragment_size".isInteger()) opts="${opts} -m ${params.max_restriction_fragment_size}" + if (params.saveInteractionBAM) opts="${opts} --sam" + """ + mapped_2hic_fragments.py -f ${frag_file} -r ${pe_bam} --all ${opts} + """ } } else{ @@ -634,23 +639,23 @@ else{ saveAs: {filename -> filename.indexOf("*stat") > 0 ? "stats/$filename" : "$filename"} input: - set val(sample), file(pe_bam) from paired_bam + set val(sample), file(pe_bam) from paired_bam output: - set val(sample), file("*.validPairs") into valid_pairs - set val(sample), file("*.validPairs") into valid_pairs_4cool - set val(sample), file("*RSstat") into all_rsstat + set val(sample), file("*.validPairs") into valid_pairs + set val(sample), file("*.validPairs") into valid_pairs_4cool + set val(sample), file("*RSstat") into all_rsstat script: - if (params.splitFastq){ - sample = sample.toString() - ~/(\.[0-9]+)$/ - } - - def opts = "" - if ("$params.min_cis_dist".isInteger()) opts="${opts} -d ${params.min_cis_dist}" - """ - mapped_2hic_dnase.py -r ${pe_bam} ${opts} - """ + if (params.splitFastq){ + sample = sample.toString() - ~/(\.[0-9]+)$/ + } + + def opts = "" + if ("$params.min_cis_dist".isInteger()) opts="${opts} -d ${params.min_cis_dist}" + """ + mapped_2hic_dnase.py -r ${pe_bam} ${opts} + """ } } @@ -665,12 +670,12 @@ process remove_duplicates { saveAs: {filename -> filename.indexOf("*stat") > 0 ? "stats/$sample/$filename" : "$filename"} input: - set val(sample), file(vpairs) from valid_pairs.groupTuple() + set val(sample), file(vpairs) from valid_pairs.groupTuple() output: - set val(sample), file("*.allValidPairs") into all_valid_pairs - set val(sample), file("*.allValidPairs") into all_valid_pairs_4cool - file("stats/") into all_mergestat + set val(sample), file("*.allValidPairs") into all_valid_pairs + set val(sample), file("*.allValidPairs") into all_valid_pairs_4cool + file("stats/") into all_mergestat script: if ( params.rm_dup ){ @@ -710,21 +715,20 @@ process merge_sample { publishDir "${params.outdir}/hic_results/stats/${sample}", mode: 'copy' input: - set val(prefix), file(fstat) from all_mapstat.groupTuple().concat(all_pairstat.groupTuple(), all_rsstat.groupTuple()) + set val(prefix), file(fstat) from all_mapstat.groupTuple().concat(all_pairstat.groupTuple(), all_rsstat.groupTuple()) - output: - file("mstats/") into all_mstats + output: + file("mstats/") into all_mstats script: - sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2|_1|_2)/ - if ( (fstat =~ /.mapstat/) ){ ext = "mmapstat" } - if ( (fstat =~ /.pairstat/) ){ ext = "mpairstat" } - if ( (fstat =~ /.RSstat/) ){ ext = "mRSstat" } - - """ - mkdir -p mstats/${sample} - merge_statfiles.py -f ${fstat} > mstats/${sample}/${prefix}.${ext} - """ + sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2|_1|_2)/ + if ( (fstat =~ /.mapstat/) ){ ext = "mmapstat" } + if ( (fstat =~ /.pairstat/) ){ ext = "mpairstat" } + if ( (fstat =~ /.RSstat/) ){ ext = "mRSstat" } + """ + mkdir -p mstats/${sample} + merge_statfiles.py -f ${fstat} > mstats/${sample}/${prefix}.${ext} + """ } @@ -733,15 +737,15 @@ process build_contact_maps{ publishDir "${params.outdir}/hic_results/matrix/raw", mode: 'copy' when: - !params.skip_maps + !params.skipMaps input: - set val(sample), file(vpairs), val(mres) from all_valid_pairs.combine(map_res) - file chrsize from chromosome_size.collect() + set val(sample), file(vpairs), val(mres) from all_valid_pairs.combine(map_res) + file chrsize from chromosome_size.collect() output: - file("*.matrix") into raw_maps - file "*.bed" + file("*.matrix") into raw_maps + file "*.bed" script: """ @@ -758,14 +762,14 @@ process run_ice{ publishDir "${params.outdir}/hic_results/matrix/iced", mode: 'copy' when: - !params.skip_maps && !params.skip_ice + !params.skipMaps && !params.skipIce input: - file(rmaps) from raw_maps - file "*.biases" + file(rmaps) from raw_maps + file "*.biases" output: - file("*iced.matrix") into iced_maps + file("*iced.matrix") into iced_maps script: prefix = rmaps.toString() - ~/(\.matrix)?$/ @@ -786,14 +790,14 @@ process generate_cool{ publishDir "${params.outdir}/export/cool", mode: 'copy' when: - !params.skip_cool + !params.skipCool input: - set val(sample), file(vpairs) from all_valid_pairs_4cool - file chrsize from chromosome_size_cool.collect() + set val(sample), file(vpairs) from all_valid_pairs_4cool + file chrsize from chromosome_size_cool.collect() output: - file("*mcool") into cool_maps + file("*mcool") into cool_maps script: """ @@ -803,51 +807,50 @@ process generate_cool{ /* - * STEP 5 - MultiQC + * STEP 6 - MultiQC */ process multiqc { - publishDir "${params.outdir}/MultiQC", mode: 'copy' + publishDir "${params.outdir}/MultiQC", mode: 'copy' - when: - !params.skip_multiqc - - input: - file multiqc_config from ch_multiqc_config - file ('input_*/*') from all_mstats.concat(all_mergestat).collect() - file ('software_versions/*') from software_versions_yaml - file workflow_summary from create_workflow_summary(summary) + when: + !params.skipMultiQC - output: - file "*multiqc_report.html" into multiqc_report - file "*_data" + input: + file multiqc_config from ch_multiqc_config + file ('input_*/*') from all_mstats.concat(all_mergestat).collect() + file ('software_versions/*') from software_versions_yaml + file workflow_summary from create_workflow_summary(summary) - script: - rtitle = custom_runName ? "--title \"$custom_runName\"" : '' - rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : '' + output: + file "*multiqc_report.html" into multiqc_report + file "*_data" - """ - multiqc -f $rtitle $rfilename --config $multiqc_config . - """ + script: + rtitle = custom_runName ? "--title \"$custom_runName\"" : '' + rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : '' + """ + multiqc -f $rtitle $rfilename --config $multiqc_config . + """ } /* - * STEP 3 - Output Description HTML + * STEP 7 - Output Description HTML */ process output_documentation { - publishDir "${params.outdir}/pipeline_info", mode: 'copy' + publishDir "${params.outdir}/pipeline_info", mode: 'copy' - input: - file output_docs from ch_output_docs + input: + file output_docs from ch_output_docs - output: - file "results_description.html" + output: + file "results_description.html" - script: - """ - markdown_to_html.r $output_docs results_description.html - """ + script: + """ + markdown_to_html.r $output_docs results_description.html + """ } @@ -948,10 +951,10 @@ workflow.onComplete { c_green = params.monochrome_logs ? '' : "\033[0;32m"; c_red = params.monochrome_logs ? '' : "\033[0;31m"; - if (workflow.stats.ignoredCountFmt > 0 && workflow.success) { + if (workflow.stats.ignoredCount > 0 && workflow.success) { log.info "${c_purple}Warning, pipeline completed, but with errored process(es) ${c_reset}" - log.info "${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCountFmt} ${c_reset}" - log.info "${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCountFmt} ${c_reset}" + log.info "${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCount} ${c_reset}" + log.info "${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCount} ${c_reset}" } if(workflow.success){ @@ -976,14 +979,14 @@ def nfcoreHeader(){ c_cyan = params.monochrome_logs ? '' : "\033[0;36m"; c_white = params.monochrome_logs ? '' : "\033[0;37m"; - return """ ${c_dim}----------------------------------------------------${c_reset} + return """ -${c_dim}--------------------------------------------------${c_reset}- ${c_green},--.${c_black}/${c_green},-.${c_reset} ${c_blue} ___ __ __ __ ___ ${c_green}/,-._.--~\'${c_reset} ${c_blue} |\\ | |__ __ / ` / \\ |__) |__ ${c_yellow}} {${c_reset} ${c_blue} | \\| | \\__, \\__/ | \\ |___ ${c_green}\\`-._,-`-,${c_reset} ${c_green}`._,._,\'${c_reset} - ${c_purple} nf-core/hic v${workflow.manifest.version}${c_reset} - ${c_dim}----------------------------------------------------${c_reset} + ${c_purple} nf-core/atacseq v${workflow.manifest.version}${c_reset} + -${c_dim}--------------------------------------------------${c_reset}- """.stripIndent() } diff --git a/nextflow.config b/nextflow.config index 37a7d3c86ea1e5af718b17bfdd94f6b2bbe3795d..5d69802ee9402cae44e25e6880d3353e4d236561 100644 --- a/nextflow.config +++ b/nextflow.config @@ -16,10 +16,10 @@ params { readPaths = false chromosome_size = false restriction_fragments = false - skip_maps = false - skip_ice = false - skip_cool = false - skip_multiqc = false + skipMaps = false + skipIce = false + skipCool = false + skipMultiQC = false dnase = false // Boilerplate options @@ -45,7 +45,7 @@ params { // Container slug. Stable releases should specify release tag! // Developmental code should specify :dev -process.container = 'nfcore/hic:1.1.0' +process.container = 'nfcore/hic:dev' // Load base.config by default for all pipelines includeConfig 'conf/base.config' @@ -101,8 +101,8 @@ manifest { homePage = 'https://github.com/nf-core/hic' description = 'Analysis of Chromosome Conformation Capture data (Hi-C)' mainScript = 'main.nf' - nextflowVersion = '>=0.32.0' - version = '1.1.0' + nextflowVersion = '>=19.04.0' + version = '1.1.1dev' } // Function to ensure that resource requirements don't go beyond