From c93fa3be844bb7ff08fa6cf15f4214cfccf6fd52 Mon Sep 17 00:00:00 2001 From: nservant <nservant@curie.fr> Date: Tue, 2 Apr 2019 23:07:08 +0200 Subject: [PATCH] add igenomes --- conf/base.config | 1 + conf/hicpro.config | 11 +-- conf/igenomes.config | 89 ++++++---------------- conf/test.config | 10 ++- main.nf | 173 +++++++++++++++++++++++++++++++++---------- nextflow.config | 6 +- 6 files changed, 175 insertions(+), 115 deletions(-) diff --git a/conf/base.config b/conf/base.config index 311c186..11bc185 100644 --- a/conf/base.config +++ b/conf/base.config @@ -69,4 +69,5 @@ params { max_memory = 20.GB max_cpus = 1 max_time = 24.h + igenomes_base = 's3://ngi-igenomes/igenomes/' } diff --git a/conf/hicpro.config b/conf/hicpro.config index 9ef8dc6..268d4d4 100644 --- a/conf/hicpro.config +++ b/conf/hicpro.config @@ -9,17 +9,18 @@ params { + // Genome Reference + bwt2_index = + chromosome_size = + // Alignment options - bwt2_index = '/data/annotations/pipelines/Human/hg19/indexes/bowtie2/hg19' bwt2_opts_end2end = '--very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder' bwt2_opts_trimmed = '--very-sensitive -L 20 --score-min L,-0.6,-0.2 --end-to-end --reorder' min_mapq = 10 - // Genome Reference - chromosome_size = '/data/users/nservant/Apps/HiC-Pro_annotation/chrom_hg19.sizes' - // Digestion Hi-C - restriction_fragment_bed = '/data/users/nservant/Apps/HiC-Pro_annotation/HindIII_resfrag_hg19.bed' + restriction_site = 'A^AGGCT' + restriction_fragments = ligation_site = 'AAGCTAGCTT' min_restriction_fragment_size = 0 max_restriction_fragment_size = 100 diff --git a/conf/igenomes.config b/conf/igenomes.config index d19e61f..5cb5f85 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -9,139 +9,94 @@ params { // illumina iGenomes reference file paths - // TODO nf-core: Add new reference types and strip out those that are not needed genomes { 'GRCh37' { - bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/genome" } 'GRCm38' { - bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" + bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/genome" } 'TAIR10' { - bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" + bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/genome" } 'EB2' { - bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" + bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/genome" } 'UMD3.1' { - bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" + bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/genome" } 'WBcel235' { - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/genome" } 'CanFam3.1' { - bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/genome" } 'GRCz10' { - bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" + bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/genome" } 'BDGP6' { - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/genome" } 'EquCab2' { - bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" + bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/genome" } 'EB1' { - bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" + bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/genome" } 'Galgal4' { - bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/genome" } 'Gm01' { - bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" + bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/genome" } 'Mmul_1' { - bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" + bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/genome" } 'IRGSP-1.0' { - bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" + bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/genome" } 'CHIMP2.1.4' { - bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/genome" } 'Rnor_6.0' { - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/genome" } 'R64-1-1' { - bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/genome" } 'EF2' { - bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" + bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/genome" } 'Sbi1' { - bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" + bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/genome" } 'Sscrofa10.2' { - bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/genome" } 'AGPv3' { - bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" + bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/genome" } } } diff --git a/conf/test.config b/conf/test.config index caad86c..e13858c 100644 --- a/conf/test.config +++ b/conf/test.config @@ -9,6 +9,9 @@ params { + config_profile_name = 'Hi-C test data from Dixon et al. (2012)' + config_profile_description = 'Minimal test dataset to check pipeline function' + // Limit resources so that this can run on Travis max_cpus = 4 max_memory = 6.GB @@ -16,7 +19,10 @@ params { // Input data readPaths = [ - ['SRR400264_00', ['https://github.com/nf-core/test-datasets/raw/hic/SRR400264_00_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/SRR400264_00_R2.fastq.gz']], - ['SRR400264_01', ['https://github.com/nf-core/test-datasets/raw/hic/SRR400264_01_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/SRR400264_01_R2.fastq.gz']] + ['SRR400264_00', ['https://github.com/nf-core/test-datasets/raw/hic/testdata/SRR400264_00_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/testdata/SRR400264_00_R2.fastq.gz']], + ['SRR400264_01', ['https://github.com/nf-core/test-datasets/raw/hic/testdata/SRR400264_01_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/testdata/SRR400264_01_R2.fastq.gz']] ] + + // Annotations + genome = 'GRch37' } diff --git a/main.nf b/main.nf index 618977b..ee1945d 100644 --- a/main.nf +++ b/main.nf @@ -12,7 +12,6 @@ /*TOOO - outputs -- env - multiqc - update version tools - install + compile @@ -38,23 +37,27 @@ def helpMessage() { The typical command for running the pipeline is as follows: - nextflow run nf-core/hic --reads '*_R{1,2}.fastq.gz' -profile docker + nextflow run nf-core/hic --reads '*_R{1,2}.fastq.gz' -profile conda Mandatory arguments: - --reads Path to input data (must be surrounded with quotes) - // --genome Name of iGenomes reference - -profile Configuration profile to use. Can use multiple (comma separated) - Available: conda, docker, singularity, awsbatch, test and more. + --reads Path to input data (must be surrounded with quotes) + --genome Name of iGenomes reference + -profile Configuration profile to use. Can use multiple (comma separated) + Available: conda, docker, singularity, awsbatch, test and more. + + References If not specified in the configuration file or you wish to overwrite any of the references. + --bwt2_index Path to Bowtie2 index + --fasta Path to Fasta reference + --chromosome_size Path to chromosome size file + --restriction_fragment_bed Path to restriction fragment file (bed) Options: - --bwt2_index Path to bowtie2 indexes (including indexes prefix) - --bwt2_opts_end2end Option for bowtie2 end-to-end mappinf (first mapping step) - --bwt2_opts_trimmed Option for bowtie2 mapping after ligation site trimming - --min_mapq Minimum mapping quality values to consider + --bwt2_opts_end2end Options for bowtie2 end-to-end mappinf (first mapping step) + --bwt2_opts_trimmed Options for bowtie2 mapping after ligation site trimming + --min_mapq Minimum mapping quality values to consider - --chromosome_size Path to chromosome size file - --restriction_fragment_bed Path to restriction fragment file (bed) - --ligation-site Ligation motifs to trim (comma separated) + --restriction-site Cutting motif(s) of restriction enzyme(s) (comma separated) + --ligation-site Ligation motifs to trim (comma separated) --min_restriction_fragment_size Minimum size of restriction fragments to consider --max_restriction_framgnet_size Maximum size of restriction fragmants to consider @@ -71,18 +74,14 @@ def helpMessage() { --ice_filter_high_count_perc Percentage of high counts columns/rows to filter before ICE normalization --ice_eps Convergence criteria for ICE normalization - - //References If not specified in the configuration file or you wish to overwrite any of the references. - // --fasta Path to Fasta reference - Other options: - --outdir The output directory where the results will be saved - --email Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits - -name Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic. + --outdir The output directory where the results will be saved + --email Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits + -name Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic. AWSBatch options: - --awsqueue The AWSBatch JobQueue that needs to be set when running on AWSBatch - --awsregion The AWS Region for your AWS Batch job to run on + --awsqueue The AWSBatch JobQueue that needs to be set when running on AWSBatch + --awsregion The AWS Region for your AWS Batch job to run on """.stripIndent() } @@ -96,13 +95,15 @@ if (params.help){ exit 0 } -// TODO nf-core: Add any reference files that are needed -// Configurable reference genomes -//fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false -//if ( params.fasta ){ -// fasta = file(params.fasta) -// if( !fasta.exists() ) exit 1, "Fasta file not found: ${params.fasta}" -//} +// Check if genome exists in the config file +if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + exit 1, "The provided genome '${params.genome}' is not available in the iGenomes file. Currently the available genomes are ${params.genomes.keySet().join(", ")}" +} + +// Reference index path configuration +// Define these here - after the profiles are loaded with the iGenomes paths +params.bwt2_index = params.genome ? params.genomes[ params.genome ].bowtie2 ?: false : false +params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false // Has the run name been specified by the user? @@ -112,7 +113,6 @@ if( !(workflow.runName ==~ /[a-z]+_[a-z]+/) ){ custom_runName = workflow.runName } - if( workflow.profile == 'awsbatch') { // AWSBatch sanity checking if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" @@ -172,16 +172,50 @@ if (params.readPaths){ * Other input channels */ -// Bowtie2 Index -bwt2_file = file("${params.bwt2_index}.1.bt2") -if( !bwt2_file.exists() ) exit 1, "Reference genome Bowtie 2 not found: ${params.bwt2_index}" -bwt2_index = Channel.value( "${params.bwt2_index}" ) +// Reference genome +if ( params.bwt2_index ){ + bwt2_file = file("${params.bwt2_index}.1.bt2") + if( !bwt2_file.exists() ) exit 1, "Reference genome Bowtie 2 not found: ${params.bwt2_index}" + bwt2_index = Channel.value( "${params.bwt2_index}" ) +} +else if ( params.fasta ) { + Channel.fromPath(params.fasta) + .ifEmpty { exit 1, "Fasta file not found: ${params.fasta}" } + .set { fasta_for_index } +} +else { + exit 1, "No reference genome specified!" +} -res_frag_file = Channel.value( "${params.restriction_fragment_bed}" ) -chr_size = Channel.value( "${params.chromosome_size}" ) -map_res = Channel.from( params.bins_size.tokenize(',') ) +// Chromosome size +if ( params.chromosome_size ){ + chromosome_size = Channel.value( "${params.chromosome_size}" ) +} +else if ( params.fasta ){ + Channel.fromPath(params.fasta) + .ifEmpty { exit 1, "Fasta file not found: ${params.fasta}" } + .set { fasta_for_chromsize } +} +else { + exit 1, "No chromosome size specified!" +} + +// Restriction fragments +if ( params.restriction_fragments ){ + res_frag_file = Channel.value( "${params.restriction_fragments}" ) +} +else if ( params.fasta && params.restriction_site ){ + Channel.fromPath(params.fasta) + .ifEmpty { exit 1, "Fasta file not found: ${params.fasta}" } + .set { fasta_for_resfrag } +else { + exit 1, "No restriction fragments file specified!" +} + +// Resolutions for contact maps +map_res = Channel.from( params.bins_size.tokenize(',') ) /********************************************************** @@ -204,7 +238,8 @@ summary['Pipeline Version'] = workflow.manifest.version summary['Run Name'] = custom_runName ?: workflow.runName // TODO nf-core: Report custom parameters here summary['Reads'] = params.reads -//summary['Fasta Ref'] = params.fasta +summary['Fasta Ref'] = params.fasta + summary['Max Memory'] = params.max_memory summary['Max CPUs'] = params.max_cpus summary['Max Time'] = params.max_time @@ -266,6 +301,68 @@ process get_software_versions { } +/**************************************************** + * PRE-PROCESSING + */ + +if(!params.bwt2_index && params.fasta){ + process makeBowtieIndex { + tag "$fasta" + publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir }, + saveAs: { params.saveReference ? it : null }, mode: 'copy' + + input: + file fasta from fasta_for_index + + output: + file "bowtie2" into bwt2_index + + script: + """ + mkdir bwt2_index + """ + } + } + + +if(!params.chromosome_size && params.fasta){ + process makeChromSize { + tag "$fasta" + publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir }, + saveAs: { params.saveReference ? it : null }, mode: 'copy' + + input: + file fasta from fasta_for_chromsize + + output: + file "*.size" into chromosome_size + + script: + """ + samtools faidx ${fasta} | cut -f1,2 > chrom.size + """ + } + } + +if(!params.bwt2_index && params.fasta){ + process makeRestrictionFragments { + tag "$fasta" + publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir }, + saveAs: { params.saveReference ? it : null }, mode: 'copy' + + input: + file fasta from fasta_for_resfrag + + output: + file "*.bed" into restriction_fragments + + script: + """ + python digest_genome.py -r ${params.restriction_site} -o restriction_fragments.bed ${fasta} + """ + } + } + /**************************************************** * MAIN WORKFLOW */ diff --git a/nextflow.config b/nextflow.config index 27c12fe..fe584c3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -63,9 +63,9 @@ profiles { } // Load igenomes.config if required -//if(!params.igenomesIgnore){ -// includeConfig 'conf/igenomes.config' -//} +if(!params.igenomesIgnore){ + includeConfig 'conf/igenomes.config' +} // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] -- GitLab