From c93fa3be844bb7ff08fa6cf15f4214cfccf6fd52 Mon Sep 17 00:00:00 2001
From: nservant <nservant@curie.fr>
Date: Tue, 2 Apr 2019 23:07:08 +0200
Subject: [PATCH] add igenomes

---
 conf/base.config     |   1 +
 conf/hicpro.config   |  11 +--
 conf/igenomes.config |  89 ++++++----------------
 conf/test.config     |  10 ++-
 main.nf              | 173 +++++++++++++++++++++++++++++++++----------
 nextflow.config      |   6 +-
 6 files changed, 175 insertions(+), 115 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 311c186..11bc185 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -69,4 +69,5 @@ params {
   max_memory = 20.GB
   max_cpus = 1
   max_time = 24.h
+  igenomes_base = 's3://ngi-igenomes/igenomes/'
 }
diff --git a/conf/hicpro.config b/conf/hicpro.config
index 9ef8dc6..268d4d4 100644
--- a/conf/hicpro.config
+++ b/conf/hicpro.config
@@ -9,17 +9,18 @@
 
 params {
 
+       // Genome Reference
+       bwt2_index = 
+       chromosome_size = 
+
        // Alignment options
-       bwt2_index = '/data/annotations/pipelines/Human/hg19/indexes/bowtie2/hg19'
        bwt2_opts_end2end = '--very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder'
        bwt2_opts_trimmed = '--very-sensitive -L 20 --score-min L,-0.6,-0.2 --end-to-end --reorder'
        min_mapq = 10       
 
-       // Genome Reference
-       chromosome_size = '/data/users/nservant/Apps/HiC-Pro_annotation/chrom_hg19.sizes'
-
        // Digestion Hi-C
-       restriction_fragment_bed = '/data/users/nservant/Apps/HiC-Pro_annotation/HindIII_resfrag_hg19.bed'
+       restriction_site = 'A^AGGCT'
+       restriction_fragments = 
        ligation_site = 'AAGCTAGCTT'
        min_restriction_fragment_size = 0
        max_restriction_fragment_size = 100
diff --git a/conf/igenomes.config b/conf/igenomes.config
index d19e61f..5cb5f85 100644
--- a/conf/igenomes.config
+++ b/conf/igenomes.config
@@ -9,139 +9,94 @@
 
 params {
   // illumina iGenomes reference file paths
-  // TODO nf-core: Add new reference types and strip out those that are not needed
   genomes {
     'GRCh37' {
-      bed12   = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed"
       fasta   = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa"
-      gtf     = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf"
-      star    = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/"
+      bowtie2    = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/genome"
     }
     'GRCm38' {
-      bed12   = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed"
       fasta   = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa"
-      gtf     = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf"
-      star    = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/"
+      bowtie2    = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/genome"
     }
     'TAIR10' {
-      bed12   = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed"
       fasta   = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa"
-      gtf     = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf"
-      star    = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/"
+      bowtie2    = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/genome"
     }
     'EB2' {
-      bed12   = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed"
       fasta   = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa"
-      gtf     = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf"
-      star    = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/"
+      bowtie2    = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/genome"
     }
     'UMD3.1' {
-      bed12   = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed"
       fasta   = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa"
-      gtf     = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf"
-      star    = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/"
+      bowtie2    = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/genome"
     }
     'WBcel235' {
-      bed12   = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed"
       fasta   = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa"
-      gtf     = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf"
-      star    = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/"
+      bowtie2    = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/genome"
     }
     'CanFam3.1' {
-      bed12   = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed"
       fasta   = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa"
-      gtf     = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf"
-      star    = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/"
+      bowtie2    = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/genome"
     }
     'GRCz10' {
-      bed12   = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed"
       fasta   = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa"
-      gtf     = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf"
-      star    = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/"
+      bowtie2    = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/genome"
     }
     'BDGP6' {
-      bed12   = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed"
       fasta   = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa"
-      gtf     = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf"
-      star    = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/"
+      bowtie2    = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/genome"
     }
     'EquCab2' {
-      bed12   = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed"
       fasta   = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa"
-      gtf     = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf"
-      star    = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/"
+      bowtie2    = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/genome"
     }
     'EB1' {
-      bed12   = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed"
       fasta   = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa"
-      gtf     = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf"
-      star    = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/"
+      bowtie2    = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/genome"
     }
     'Galgal4' {
-      bed12   = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed"
       fasta   = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa"
-      gtf     = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf"
-      star    = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/"
+      bowtie2    = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/genome"
     }
     'Gm01' {
-      bed12   = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed"
       fasta   = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa"
-      gtf     = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf"
-      star    = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/"
+      bowtie2    = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/genome"                                                                                                                 
     }
     'Mmul_1' {
-      bed12   = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed"
       fasta   = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa"
-      gtf     = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf"
-      star    = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/"
+      bowtie2    = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/genome"
     }
     'IRGSP-1.0' {
-      bed12   = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed"
       fasta   = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa"
-      gtf     = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf"
-      star    = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/"
+      bowtie2    = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/genome"
     }
     'CHIMP2.1.4' {
-      bed12   = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed"
       fasta   = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa"
-      gtf     = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf"
-      star    = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/"
+      bowtie2    = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/genome"
     }
     'Rnor_6.0' {
-      bed12   = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed"
       fasta   = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa"
-      gtf     = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf"
-      star    = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/"
+      bowtie2    = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/genome"
     }
     'R64-1-1' {
-      bed12   = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed"
       fasta   = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa"
-      gtf     = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf"
-      star    = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/"
+      bowtie2    = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/genome"
     }
     'EF2' {
-      bed12   = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed"
       fasta   = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa"
-      gtf     = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf"
-      star    = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/"
+      bowtie2    = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/genome"
     }
     'Sbi1' {
-      bed12   = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed"
       fasta   = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa"
-      gtf     = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf"
-      star    = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/"
+      bowtie2    = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/genome"
     }
     'Sscrofa10.2' {
-      bed12   = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed"
       fasta   = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa"
-      gtf     = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf"
-      star    = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/"
+      bowtie2    = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/genome"
     }
     'AGPv3' {
-      bed12   = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed"
       fasta   = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa"
-      gtf     = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf"
-      star    = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/"
+      bowtie2    = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/genome"                                     
     }
   }
 }
diff --git a/conf/test.config b/conf/test.config
index caad86c..e13858c 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -9,6 +9,9 @@
 
 params {
 
+  config_profile_name = 'Hi-C test data from Dixon et al. (2012)'
+  config_profile_description = 'Minimal test dataset to check pipeline function'
+
   // Limit resources so that this can run on Travis
   max_cpus = 4
   max_memory = 6.GB
@@ -16,7 +19,10 @@ params {
   
   // Input data
   readPaths = [
-    ['SRR400264_00', ['https://github.com/nf-core/test-datasets/raw/hic/SRR400264_00_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/SRR400264_00_R2.fastq.gz']],
-    ['SRR400264_01', ['https://github.com/nf-core/test-datasets/raw/hic/SRR400264_01_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/SRR400264_01_R2.fastq.gz']]
+    ['SRR400264_00', ['https://github.com/nf-core/test-datasets/raw/hic/testdata/SRR400264_00_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/testdata/SRR400264_00_R2.fastq.gz']],
+    ['SRR400264_01', ['https://github.com/nf-core/test-datasets/raw/hic/testdata/SRR400264_01_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/testdata/SRR400264_01_R2.fastq.gz']]
   ]
+
+  // Annotations
+  genome = 'GRch37'
 }
diff --git a/main.nf b/main.nf
index 618977b..ee1945d 100644
--- a/main.nf
+++ b/main.nf
@@ -12,7 +12,6 @@
 
 /*TOOO
 - outputs
-- env
 - multiqc
 - update version tools
 - install + compile
@@ -38,23 +37,27 @@ def helpMessage() {
 
     The typical command for running the pipeline is as follows:
 
-    nextflow run nf-core/hic --reads '*_R{1,2}.fastq.gz' -profile docker
+    nextflow run nf-core/hic --reads '*_R{1,2}.fastq.gz' -profile conda
 
     Mandatory arguments:
-      --reads                   Path to input data (must be surrounded with quotes)
-     // --genome                      Name of iGenomes reference
-      -profile                      Configuration profile to use. Can use multiple (comma separated)
-                                    Available: conda, docker, singularity, awsbatch, test and more.
+      --reads				    Path to input data (must be surrounded with quotes)
+      --genome                       	    Name of iGenomes reference
+      -profile                      	    Configuration profile to use. Can use multiple (comma separated)
+                                    	    Available: conda, docker, singularity, awsbatch, test and more.
+
+    References                      	    If not specified in the configuration file or you wish to overwrite any of the references.
+      --bwt2_index                     	    Path to Bowtie2 index
+      --fasta                       	    Path to Fasta reference
+      --chromosome_size             	    Path to chromosome size file
+      --restriction_fragment_bed    	    Path to restriction fragment file (bed)
 
     Options:
-      --bwt2_index		   Path to bowtie2 indexes (including indexes prefix)
-      --bwt2_opts_end2end	   Option for bowtie2 end-to-end mappinf (first mapping step)
-      --bwt2_opts_trimmed	   Option for bowtie2 mapping after ligation site trimming
-      --min_mapq		   Minimum mapping quality values to consider
+      --bwt2_opts_end2end		    Options for bowtie2 end-to-end mappinf (first mapping step)
+      --bwt2_opts_trimmed	    	    Options for bowtie2 mapping after ligation site trimming
+      --min_mapq		    	    Minimum mapping quality values to consider
 
-      --chromosome_size		   Path to chromosome size file
-      --restriction_fragment_bed   Path to restriction fragment file (bed)
-      --ligation-site		   Ligation motifs to trim (comma separated)
+      --restriction-site	    	    Cutting motif(s) of restriction enzyme(s) (comma separated)
+      --ligation-site		    	    Ligation motifs to trim (comma separated)
 
       --min_restriction_fragment_size	    Minimum size of restriction fragments to consider
       --max_restriction_framgnet_size	    Maximum size of restriction fragmants to consider
@@ -71,18 +74,14 @@ def helpMessage() {
       --ice_filter_high_count_perc	    Percentage of high counts columns/rows to filter before ICE normalization
       --ice_eps				    Convergence criteria for ICE normalization
 
-
-    //References                      If not specified in the configuration file or you wish to overwrite any of the references.
-    //  --fasta                       Path to Fasta reference
-
     Other options:
-      --outdir                      The output directory where the results will be saved
-      --email                       Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits
-      -name                         Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic.
+      --outdir				    The output directory where the results will be saved
+      --email                       	    Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits
+      -name                         	    Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic.
 
     AWSBatch options:
-      --awsqueue                    The AWSBatch JobQueue that needs to be set when running on AWSBatch
-      --awsregion                   The AWS Region for your AWS Batch job to run on
+      --awsqueue			    The AWSBatch JobQueue that needs to be set when running on AWSBatch
+      --awsregion                   	    The AWS Region for your AWS Batch job to run on
     """.stripIndent()
 }
 
@@ -96,13 +95,15 @@ if (params.help){
     exit 0
 }
 
-// TODO nf-core: Add any reference files that are needed
-// Configurable reference genomes
-//fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false
-//if ( params.fasta ){
-//    fasta = file(params.fasta)
-//    if( !fasta.exists() ) exit 1, "Fasta file not found: ${params.fasta}"
-//}
+// Check if genome exists in the config file
+if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) {
+    exit 1, "The provided genome '${params.genome}' is not available in the iGenomes file. Currently the available genomes are ${params.genomes.keySet().join(", ")}"
+}
+
+// Reference index path configuration
+// Define these here - after the profiles are loaded with the iGenomes paths
+params.bwt2_index = params.genome ? params.genomes[ params.genome ].bowtie2 ?: false : false
+params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false
 
 
 // Has the run name been specified by the user?
@@ -112,7 +113,6 @@ if( !(workflow.runName ==~ /[a-z]+_[a-z]+/) ){
   custom_runName = workflow.runName
 }
 
-
 if( workflow.profile == 'awsbatch') {
   // AWSBatch sanity checking
   if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!"
@@ -172,16 +172,50 @@ if (params.readPaths){
  * Other input channels
  */
 
-// Bowtie2 Index
-bwt2_file = file("${params.bwt2_index}.1.bt2")
-if( !bwt2_file.exists() ) exit 1, "Reference genome Bowtie 2 not found: ${params.bwt2_index}"
-bwt2_index = Channel.value( "${params.bwt2_index}" )
+// Reference genome
 
+if ( params.bwt2_index ){
+   bwt2_file = file("${params.bwt2_index}.1.bt2")
+   if( !bwt2_file.exists() ) exit 1, "Reference genome Bowtie 2 not found: ${params.bwt2_index}"
+   bwt2_index = Channel.value( "${params.bwt2_index}" )
+}
+else if ( params.fasta ) {
+   Channel.fromPath(params.fasta)
+	.ifEmpty { exit 1, "Fasta file not found: ${params.fasta}" }
+        .set { fasta_for_index }
+}
+else {
+   exit 1, "No reference genome specified!"
+}
 
-res_frag_file = Channel.value( "${params.restriction_fragment_bed}" )
-chr_size = Channel.value( "${params.chromosome_size}" )
-map_res = Channel.from( params.bins_size.tokenize(',') )
+// Chromosome size
 
+if ( params.chromosome_size ){
+   chromosome_size = Channel.value( "${params.chromosome_size}" )
+}
+else if ( params.fasta ){
+   Channel.fromPath(params.fasta)
+	.ifEmpty { exit 1, "Fasta file not found: ${params.fasta}" }
+       	.set { fasta_for_chromsize }
+}
+else {
+   exit 1, "No chromosome size specified!"
+}
+
+// Restriction fragments
+if ( params.restriction_fragments ){
+   res_frag_file = Channel.value( "${params.restriction_fragments}" )
+}
+else if ( params.fasta && params.restriction_site ){
+   Channel.fromPath(params.fasta)
+           .ifEmpty { exit 1, "Fasta file not found: ${params.fasta}" }
+           .set { fasta_for_resfrag }
+else {
+    exit 1, "No restriction fragments file specified!"
+}
+
+// Resolutions for contact maps
+map_res = Channel.from( params.bins_size.tokenize(',') )
 
 
 /**********************************************************
@@ -204,7 +238,8 @@ summary['Pipeline Version'] = workflow.manifest.version
 summary['Run Name']     = custom_runName ?: workflow.runName
 // TODO nf-core: Report custom parameters here
 summary['Reads']        = params.reads
-//summary['Fasta Ref']    = params.fasta
+summary['Fasta Ref']    = params.fasta
+
 summary['Max Memory']   = params.max_memory
 summary['Max CPUs']     = params.max_cpus
 summary['Max Time']     = params.max_time
@@ -266,6 +301,68 @@ process get_software_versions {
 }
 
 
+/****************************************************
+ * PRE-PROCESSING
+ */
+
+if(!params.bwt2_index && params.fasta){
+    process makeBowtieIndex {
+        tag "$fasta"
+        publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir },
+                   saveAs: { params.saveReference ? it : null }, mode: 'copy'
+
+        input:
+        file fasta from fasta_for_index
+
+        output:
+        file "bowtie2" into bwt2_index
+
+        script:
+        """
+        mkdir bwt2_index
+	"""
+      }
+ }
+
+
+if(!params.chromosome_size && params.fasta){
+    process makeChromSize {
+        tag "$fasta"
+        publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir },
+                   saveAs: { params.saveReference ? it : null }, mode: 'copy'
+
+        input:
+        file fasta from fasta_for_chromsize
+
+        output:
+        file "*.size" into chromosome_size 
+
+        script:
+        """
+	samtools faidx ${fasta} | cut -f1,2 > chrom.size
+   	"""	
+      }
+ }
+
+if(!params.bwt2_index && params.fasta){
+    process makeRestrictionFragments {
+        tag "$fasta"
+        publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir },
+                   saveAs: { params.saveReference ? it : null }, mode: 'copy'
+
+        input:
+        file fasta from fasta_for_resfrag
+
+        output:
+        file "*.bed" into restriction_fragments
+
+        script:
+        """
+	python digest_genome.py -r ${params.restriction_site} -o restriction_fragments.bed ${fasta}
+	"""
+      }
+ }
+
 /****************************************************
  * MAIN WORKFLOW
  */
diff --git a/nextflow.config b/nextflow.config
index 27c12fe..fe584c3 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -63,9 +63,9 @@ profiles {
 }
 
 // Load igenomes.config if required
-//if(!params.igenomesIgnore){
-//  includeConfig 'conf/igenomes.config'
-//}
+if(!params.igenomesIgnore){
+  includeConfig 'conf/igenomes.config'
+}
 
 // Capture exit codes from upstream processes when piping
 process.shell = ['/bin/bash', '-euo', 'pipefail']
-- 
GitLab