diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 26d2e7e9f9223900ddc066163f442f821d1a62f3..a43253198aa92a5832082dcd687516047b7af65b 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -20,7 +20,6 @@ jobs: - name: Install awscli run: conda install -c conda-forge awscli - name: Start AWS batch job - # TODO nf-core: You can customise AWS full pipeline tests as required # Add full size test data (but still relatively small datasets for few samples) # on the `test_full.config` test runs with only one set of parameters # Then specify `-profile test_full` instead of `-profile test` on the AWS batch command diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 78ee5bc67a2ad5565bca78f0de19cb639bf51998..94bca240004c28e2608c4e1a8c2a82bb054ce428 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -21,7 +21,6 @@ jobs: - name: Install awscli run: conda install -c conda-forge awscli - name: Start AWS batch job - # TODO nf-core: You can customise CI pipeline run tests as required # For example: adding multiple test runs with different parameters # Remember that you can parallelise this by using strategy.matrix env: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fe636dd4fdc884d7ffc18dfc0b7ae81a186796f4..a8a8ba508df126c0af56f37daa5fe7e94a976875 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,13 +34,13 @@ jobs: - name: Build new docker image if: env.GIT_DIFF - run: docker build --no-cache . -t nfcore/hic:dev + run: docker build --no-cache . -t nfcore/hic:1.2.2 - name: Pull docker image if: ${{ !env.GIT_DIFF }} run: | docker pull nfcore/hic:dev - docker tag nfcore/hic:dev nfcore/hic:dev + docker tag nfcore/hic:dev nfcore/hic:1.2.2 - name: Install Nextflow run: | @@ -48,8 +48,5 @@ jobs: sudo mv nextflow /usr/local/bin/ - name: Run pipeline with test data - # TODO nf-core: You can customise CI pipeline run tests as required - # For example: adding multiple test runs with different parameters - # Remember that you can parallelise this by using strategy.matrix run: | nextflow run ${GITHUB_WORKSPACE} -profile test,docker diff --git a/Dockerfile b/Dockerfile index 1f8dacfeed27d70458e18e57bd0aac77045df08c..3c8d019dc4fb85111fe16279257c734d8aeafd43 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,10 +10,10 @@ COPY environment.yml / RUN conda env create --quiet -f /environment.yml && conda clean -a # Add conda installation dir to PATH (instead of doing 'conda activate') -ENV PATH /opt/conda/envs/nf-core-hic-1.2.1/bin:$PATH +ENV PATH /opt/conda/envs/nf-core-hic-1.2.2/bin:$PATH # Dump the details of the installed packages to a file for posterity -RUN conda env export --name nf-core-hic-1.2.1 > nf-core-hic-1.2.1.yml +RUN conda env export --name nf-core-hic-1.2.2 > nf-core-hic-1.2.2.yml # Instruct R processes to use these empty files instead of clashing with a local version RUN touch .Rprofile diff --git a/README.md b/README.md index cde30f54178cda6f5a4cc4915d06795359c37f27..da5877743ea00743c8e3e50a01275dece0a2b1bd 100644 --- a/README.md +++ b/README.md @@ -104,8 +104,6 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool 4. Start running your own analysis! - <!-- TODO nf-core: Update the example "typical command" below used to run the pipeline --> - ```bash nextflow run nf-core/hic -profile <docker/singularity/conda/institute> --input '*_R{1,2}.fastq.gz' --genome GRCh37 ``` diff --git a/conf/hicpro.config b/conf/hicpro.config index 01b755a955c5aee521a6cf43b00847cfbc8d0cd3..cd0cf0b5a54f860312f49ac193802d53964ce686 100644 --- a/conf/hicpro.config +++ b/conf/hicpro.config @@ -10,7 +10,6 @@ params { // Alignment options - splitFastq = false bwt2_opts_end2end = '--very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder' bwt2_opts_trimmed = '--very-sensitive -L 20 --score-min L,-0.6,-0.2 --end-to-end --reorder' min_mapq = 10 @@ -35,9 +34,5 @@ params { ice_filer_low_count_perc = 0.02 ice_filer_high_count_perc = 0 ice_eps = 0.1 - - saveReference = false - saveAlignedIntermediates = false - saveInteractionBAM = false } diff --git a/conf/test_full.config b/conf/test_full.config index 921372eec05fe6147c540a39463c54f0ebf09bce..47d31760585c66025666f112dcd03a23faeac543 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -12,11 +12,25 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed - single_end = false input_paths = [ - ['Testdata', ['https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R1.tiny.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R2.tiny.fastq.gz']], - ['SRR389222', ['https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub2.fastq.gz']] - ] + ['SRR4292758_00', ['https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R2.fastq.gz']] + ] + + // Annotations + fasta = 'https://github.com/nf-core/test-datasets/raw/hic/reference/W303_SGD_2015_JRIU00000000.fsa' + restriction_site = 'A^AGCTT' + ligation_site = 'AAGCTAGCTT' + + min_mapq = 2 + rm_dup = true + rm_singleton = true + rm_multi = true + + min_restriction_fragment_size = 100 + max_restriction_fragment_size = 100000 + min_insert_size = 100 + max_insert_size = 600 + + // Options + skip_cool = true } diff --git a/docs/README.md b/docs/README.md index 67df846ef262b9918e55dff0147baf0d3cc03a9e..a6889549c7f27bda0aed81947685713781fe2d1b 100644 --- a/docs/README.md +++ b/docs/README.md @@ -2,8 +2,6 @@ The nf-core/hic documentation is split into the following pages: -<!-- TODO nf-core: If you write more documentation pages, add them to the docs index page here --> - * [Usage](usage.md) * An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. * [Output](output.md) diff --git a/environment.yml b/environment.yml index e8944c64299636c7796f9ff22ff64cc48a7fb22f..ccca9c3d12e94380287c1be0f9f34ca9813890ae 100644 --- a/environment.yml +++ b/environment.yml @@ -1,6 +1,6 @@ # You can use this file to create a conda environment for this pipeline: # conda env create -f environment.yml -name: nf-core-hic-1.2.1 +name: nf-core-hic-1.2.2 channels: - conda-forge - bioconda diff --git a/main.nf b/main.nf index f33f88c9c4c1d0920a9af77f3bec57f8063fb58d..0cfcbc67001461bd73d04d497056d11387c722aa 100644 --- a/main.nf +++ b/main.nf @@ -32,10 +32,10 @@ def helpMessage() { --chromosome_size [file] Path to chromosome size file --restriction_fragments [file] Path to restriction fragment file (bed) --save_reference [bool] Save reference genome to output folder. Default: False - --save_aligned_intermediates [bool] Save intermediates alignment files. Default: False Alignments --split_fastq [bool] Size of read chuncks to use to speed up the workflow. Default: None + --save_aligned_intermediates [bool] Save intermediates alignment files. Default: False --bwt2_opts_end2end [str] Options for bowtie2 end-to-end mappinf (first mapping step). See hic.config for default. --bwt2_opts_trimmed [str] Options for bowtie2 mapping after ligation site trimming. See hic.config for default. --min_mapq [int] Minimum mapping quality values to consider. Default: 10 diff --git a/nextflow.config b/nextflow.config index 24ff566a6356cdcadf02f951436b31d8fc29a55c..c598df934ea0bd8b61c414117332211b792a99f8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -15,7 +15,8 @@ params { outdir = './results' genome = false - readPaths = false + input_paths = false + split_fastq = false chromosome_size = false restriction_fragments = false skip_maps = false @@ -25,16 +26,28 @@ params { save_reference = false save_interaction_bam = false save_aligned_intermediates = false - - dnase = false - rm_dup = false - rm_singleton = false - rm_multi = false + + bwt2_opts_end2end = '--very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder' + bwt2_opts_trimmed = '--very-sensitive -L 20 --score-min L,-0.6,-0.2 --end-to-end --reorder' + min_mapq = 10 + + // Digestion Hi-C + restriction_site = 'A^AGCTT' + ligation_site = 'AAGCTAGCTT' min_restriction_fragment_size = false max_restriction_fragment_size = false min_insert_size = false max_insert_size = false - min_cis_dist = false + dnase = false + rm_dup = false + rm_singleton = false + rm_multi = false + bin_size = '1000000,500000' + ice_max_iter = 100 + ice_filer_low_count_perc = 0.02 + ice_filer_high_count_perc = 0 + ice_eps = 0.1 + publish_dir_mode = 'copy' // Boilerplate options @@ -65,7 +78,7 @@ params { // Container slug. Stable releases should specify release tag! // Developmental code should specify :dev -process.container = 'nfcore/hic:1.2.1' +process.container = 'nfcore/hic:1.2.2' // Load base.config by default for all pipelines includeConfig 'conf/base.config' @@ -77,9 +90,6 @@ try { System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") } -// Load hic config file -includeConfig 'conf/hicpro.config' - // Create profiles profiles { conda { process.conda = "$baseDir/environment.yml" } @@ -138,7 +148,7 @@ manifest { description = 'Analysis of Chromosome Conformation Capture data (Hi-C)' mainScript = 'main.nf' nextflowVersion = '>=19.10.0' - version = '1.2.1' + version = '1.2.2' } // Function to ensure that resource requirements don't go beyond diff --git a/nextflow_schema.json b/nextflow_schema.json index ccbfe4b5162528808d9ffb5289a826e15ece76e7..bb4529597d769dc9e5b74ceed8ebcd2567b5ac8a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -20,6 +20,17 @@ "description": "Input FastQ files.", "help_text": "Use this to specify the location of your input FastQ files. For example:\n\n```bash\n--input 'path/to/data/sample_*_{1,2}.fastq'\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The path must have at least one `*` wildcard character\n3. When using the pipeline with paired end data, the path must use `{1,2}` notation to specify read pairs.\n\nIf left unspecified, a default pattern is used: `data/*{1,2}.fastq.gz`" }, + "input_paths": { + "type": "string", + "hidden": true, + "description": "Input FastQ files for test only", + "default": "undefined" + }, + "split_fastq": { + "type": "number", + "description": "Split the reads into chunks before running. Specify the number of reads per chuncks as --split_fastq 20000000.", + "fa_icon": "fas fa-dna" + }, "single_end": { "type": "boolean", "description": "Specifies that the input is single-end reads.", @@ -72,6 +83,180 @@ "fa_icon": "fas fa-ban", "hidden": true, "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + }, + "bwt2_index": { + "type": "string", + "description": "Full path to directory containing Bowtie index including base name. i.e. `/path/to/index/base`.", + "fa_icon": "far fa-file-alt" + }, + "chromosome_size": { + "type": "string", + "description": "Full path to file specifying chromosome sizes (tab separated with chromosome name and size)`.", + "fa_icon": "far fa-file-alt", + "help_text": "If not specified, the pipeline will build this file from the reference genome file" + }, + "restriction_fragments": { + "type": "string", + "description": "Full path to restriction fragment (bed) file.", + "fa_icon": "far fa-file-alt", + "help_text": "This file depends on the Hi-C protocols and digestion strategy. If not provided, the pipeline will build it using the --restriction_site option" + }, + "save_reference": { + "type": "boolean", + "description": "If generated by the pipeline save the annotation and indexes in the results directory.", + "help_text": "Use this parameter to save all annotations to your results folder. These can then be used for future pipeline runs, reducing processing times.", + "fa_icon": "fas fa-save" + } + } + }, + "data_processing_options": { + "title": "Data processing", + "type": "object", + "description": "Parameters for Hi-C data processing", + "default": "", + "fa_icon": "fas fa-bahai", + "properties": { + "dnase": { + "type": "boolean", + "description": "For Hi-C protocols which are not based on enzyme digestion such as DNase Hi-C" + }, + "restriction_site": { + "type": "string", + "default": "'A^AGCTT'", + "description": "Restriction motifs used during digestion. Several motifs (comma separated) can be provided." + }, + "ligation_site": { + "type": "string", + "default": "'AAGCTAGCTT", + "description": "Expected motif after DNA ligation. Several motifs (comma separated) can be provided." + }, + "rm_dup": { + "type": "boolean", + "description": "Remove duplicates" + }, + "rm_multi": { + "type": "boolean", + "description": "Remove multi-mapped reads" + }, + "rm_singleton": { + "type": "boolean", + "description": "Remove singleton" + }, + "min_mapq": { + "type": "integer", + "default": "10", + "description": "Keep aligned reads with a minimum quality value" + }, + "bwt2_opts_end2end": { + "type": "string", + "default": "'--very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder'", + "description": "Option for end-to-end bowtie mapping" + }, + "bwt2_opts_trimmed": { + "type": "string", + "default": "'--very-sensitive -L 20 --score-min L,-0.6,-0.2 --end-to-end --reorder'", + "description": "Option for trimmed reads mapping" + }, + "save_interaction_bam": { + "type": "boolean", + "description": "Save a BAM file where all reads are flagged by their interaction classes" + }, + "save_aligned_intermediates": { + "type": "boolean", + "description": "Save all BAM files during two-steps mapping" + } + } + }, + "contacts_calling_options": { + "title": "Contacts calling", + "type": "object", + "description": "Options to call significant interactions", + "default": "", + "fa_icon": "fas fa-signature", + "properties": { + "min_cis_dist": { + "type": "string", + "default": "undefined", + "description": "Minimum distance between loci to consider. Useful for --dnase mode to remove spurious ligation products" + }, + "max_insert_size": { + "type": "string", + "default": "undefined", + "description": "Maximum fragment size to consider" + }, + "min_insert_size": { + "type": "string", + "default": "undefined", + "description": "Minimum fragment size to consider" + }, + "max_restriction_fragment_size": { + "type": "string", + "default": "undefined", + "description": "Maximum restriction fragment size to consider" + }, + "min_restriction_fragment_size": { + "type": "string", + "default": "undefined", + "description": "Minimum restriction fragment size to consider" + } + } + }, + "contact_maps_options": { + "title": "Contact maps", + "type": "object", + "description": "Options to build Hi-C contact maps", + "default": "", + "fa_icon": "fas fa-chess-board", + "properties": { + "bin_size": { + "type": "string", + "default": "'1000000,500000'", + "description": "Resolution to build the maps (comma separated)" + }, + "ice_filer_low_count_perc": { + "type": "string", + "default": 0.02, + "description": "Filter low counts rows before normalization" + }, + "ice_filer_high_count_perc": { + "type": "integer", + "default": "0", + "description": "Filter high counts rows before normalization" + }, + "ice_eps": { + "type": "string", + "default": "0.1", + "description": "Threshold for ICE convergence" + }, + "ice_max_iter": { + "type": "integer", + "default": "100", + "description": "Maximum number of iteraction for ICE normalization" + } + } + }, + "skip_options": { + "title": "Skip options", + "type": "object", + "description": "Skip some steps of the pipeline", + "default": "", + "fa_icon": "fas fa-random", + "properties": { + "skip_maps": { + "type": "boolean", + "description": "Do not build contact maps" + }, + "skip_ice": { + "type": "boolean", + "description": "Do not normalize contact maps" + }, + "skip_cool": { + "type": "boolean", + "description": "Do not generate cooler file" + }, + "skip_multiqc": { + "type": "boolean", + "description": "Do not generate MultiQC report" } } }, @@ -246,6 +431,18 @@ { "$ref": "#/definitions/reference_genome_options" }, + { + "$ref": "#/definitions/data_processing_options" + }, + { + "$ref": "#/definitions/contacts_calling_options" + }, + { + "$ref": "#/definitions/contact_maps_options" + }, + { + "$ref": "#/definitions/skip_options" + }, { "$ref": "#/definitions/generic_options" },