From fe439e9deb1c339aa2fec026ed868acefe2908c9 Mon Sep 17 00:00:00 2001 From: nservant <nservant@curie.fr> Date: Wed, 3 Apr 2019 10:03:34 +0200 Subject: [PATCH] fix bug in igenomes --- conf/hicpro.config | 8 ++---- conf/igenomes.config | 44 +++++++++++++++--------------- conf/test.config | 4 +-- main.nf | 65 ++++++++++++++++++++++++++------------------ 4 files changed, 65 insertions(+), 56 deletions(-) diff --git a/conf/hicpro.config b/conf/hicpro.config index ed7c3b3..63b1019 100644 --- a/conf/hicpro.config +++ b/conf/hicpro.config @@ -9,10 +9,6 @@ params { - // Reference - chromosome_size = false - bwt2_index = false - // Alignment options bwt2_opts_end2end = '--very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder' bwt2_opts_trimmed = '--very-sensitive -L 20 --score-min L,-0.6,-0.2 --end-to-end --reorder' @@ -20,7 +16,6 @@ params { // Digestion Hi-C restriction_site = 'A^AGGCT' - restriction_fragments = false ligation_site = 'AAGCTAGCTT' min_restriction_fragment_size = 0 max_restriction_fragment_size = 100 @@ -39,5 +34,8 @@ params { ice_filer_low_count_perc = 0.02 ice_filer_high_count_perc = 0 ice_eps = 0.1 + + saveReference = false + saveAlignedIntermediates = false } diff --git a/conf/igenomes.config b/conf/igenomes.config index 5cb5f85..26950cf 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -12,91 +12,91 @@ params { genomes { 'GRCh37' { fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/genome" } 'GRCm38' { fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/genome" } 'TAIR10' { fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/genome" } 'EB2' { fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/genome" } 'UMD3.1' { fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/genome" } 'WBcel235' { fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/genome" } 'CanFam3.1' { fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/genome" } 'GRCz10' { fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/genome" } 'BDGP6' { fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/genome" } 'EquCab2' { fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/genome" } 'EB1' { fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/genome" } 'Galgal4' { fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/genome" } 'Gm01' { fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/genome" } 'Mmul_1' { fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/genome" } 'IRGSP-1.0' { fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/genome" } 'CHIMP2.1.4' { fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/genome" } 'Rnor_6.0' { fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/genome" } 'R64-1-1' { fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/genome" } 'EF2' { fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/genome" } 'Sbi1' { fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/genome" } 'Sscrofa10.2' { fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/genome" } 'AGPv3' { fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/genome" + bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/genome" } } } diff --git a/conf/test.config b/conf/test.config index 78c37e9..1dd0992 100644 --- a/conf/test.config +++ b/conf/test.config @@ -19,8 +19,8 @@ params { // Input data readPaths = [ - ['SRR400264_00', ['https://github.com/nf-core/test-datasets/raw/hic/testdata/SRR400264_00_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/testdata/SRR400264_00_R2.fastq.gz']], - ['SRR400264_01', ['https://github.com/nf-core/test-datasets/raw/hic/testdata/SRR400264_01_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/testdata/SRR400264_01_R2.fastq.gz']] + ['SRR400264_00', ['https://github.com/nf-core/test-datasets/raw/hic/SRR400264_00_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/SRR400264_00_R2.fastq.gz']], + ['SRR400264_01', ['https://github.com/nf-core/test-datasets/raw/hic/SRR400264_01_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/SRR400264_01_R2.fastq.gz']] ] // Annotations diff --git a/main.nf b/main.nf index a891d4e..1e9ed2f 100644 --- a/main.nf +++ b/main.nf @@ -102,9 +102,10 @@ if (params.genomes && params.genome && !params.genomes.containsKey(params.genome // Reference index path configuration // Define these here - after the profiles are loaded with the iGenomes paths -params.bwt2_index = params.genome ? params.genomes[ params.genome ].bowtie2 ?: false : false +params.bwt2_index = params.genome ? params.genomes[ params.genome ].bowtie2 ?: false : false params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false - +//params.chromosome_size = false +//params.restriction_fragments = false // Has the run name been specified by the user? // this has the bonus effect of catching both -name and --name @@ -175,13 +176,20 @@ if (params.readPaths){ // Reference genome if ( params.bwt2_index ){ - bwt2_file = file("${params.bwt2_index}.1.bt2") - if( !bwt2_file.exists() ) exit 1, "Reference genome Bowtie 2 not found: ${params.bwt2_index}" - bwt2_index = Channel.value( "${params.bwt2_index}" ) + lastPath = params.bwt2_index.lastIndexOf(File.separator) + bwt2_dir = params.bwt2_index.substring(0,lastPath+1) + bwt2_base = params.bwt2_index.substring(lastPath+1) + + Channel.fromPath( bwt2_dir, checkIfExists: true ) + .ifEmpty { exit 1, "Genome index: Provided index not found: ${params.bwt2_index}" } + .into { bwt2_index_end2end; bwt2_index_trim } } else if ( params.fasta ) { - Channel.fromPath(params.fasta) - .ifEmpty { exit 1, "Fasta file not found: ${params.fasta}" } + lastPath = params.fasta.lastIndexOf(File.separator) + bwt2_base = params.fasta.substring(lastPath+1) + + Channel.fromPath( params.fasta, checkIfExists: true ) + .ifEmpty { exit 1, "Genome index: Fasta file not found: ${params.fasta}" } .set { fasta_for_index } } else { @@ -191,11 +199,12 @@ else { // Chromosome size if ( params.chromosome_size ){ - chromosome_size = Channel.value( "${params.chromosome_size}" ) + Channel.FromPath( params.chromosome_size, checkIfExists: true ) + .set {chromosome_size} } else if ( params.fasta ){ - Channel.fromPath(params.fasta) - .ifEmpty { exit 1, "Fasta file not found: ${params.fasta}" } + Channel.fromPath( params.fasta, checkIfExists: true ) + .ifEmpty { exit 1, "Chromosome sizes: Fasta file not found: ${params.fasta}" } .set { fasta_for_chromsize } } else { @@ -204,11 +213,12 @@ else { // Restriction fragments if ( params.restriction_fragments ){ - res_frag_file = Channel.value( "${params.restriction_fragments}" ) + Channel.FromPath( params.restriction_fragments, checkIfExists: true ) + .set {res_frag_file} } else if ( params.fasta && params.restriction_site ){ - Channel.fromPath(params.fasta) - .ifEmpty { exit 1, "Fasta file not found: ${params.fasta}" } + Channel.fromPath(params.fasta, checkIfExists: true) + .ifEmpty { exit 1, "Restriction fragments: Fasta file not found: ${params.fasta}" } .set { fasta_for_resfrag } } else { @@ -309,8 +319,8 @@ process get_software_versions { if(!params.bwt2_index && params.fasta){ process makeBowtieIndex { tag "$fasta" - publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir }, - saveAs: { params.saveReference ? it : null }, mode: 'copy' + //publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir }, + // saveAs: { params.saveReference ? it : null }, mode: 'copy' input: file fasta from fasta_for_index @@ -321,6 +331,7 @@ if(!params.bwt2_index && params.fasta){ script: """ mkdir bwt2_index + """ } } @@ -329,8 +340,8 @@ if(!params.bwt2_index && params.fasta){ if(!params.chromosome_size && params.fasta){ process makeChromSize { tag "$fasta" - publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir }, - saveAs: { params.saveReference ? it : null }, mode: 'copy' + //publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir }, + // saveAs: { params.saveReference ? it : null }, mode: 'copy' input: file fasta from fasta_for_chromsize @@ -348,18 +359,18 @@ if(!params.chromosome_size && params.fasta){ if(!params.restriction_fragments && params.fasta){ process makeRestrictionFragments { tag "$fasta" - publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir }, - saveAs: { params.saveReference ? it : null }, mode: 'copy' + //publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir }, + // saveAs: { params.saveReference ? it : null }, mode: 'copy' input: file fasta from fasta_for_resfrag output: - file "*.bed" into restriction_fragments + file "*.bed" into res_frag_file script: """ - python digest_genome.py -r ${params.restriction_site} -o restriction_fragments.bed ${fasta} + digest_genome.py -r ${params.restriction_site} -o restriction_fragments.bed ${fasta} """ } } @@ -378,7 +389,7 @@ process bowtie2_end_to_end { tag "$prefix" input: set val(sample), file(reads) from raw_reads - val bt2_index from bwt2_index + file index from bwt2_index_end2end output: set val(prefix), file("${prefix}_unmap.fastq") into unmapped_end_to_end @@ -391,7 +402,7 @@ process bowtie2_end_to_end { bowtie2 --rg-id BMG --rg SM:${prefix} \\ ${bwt2_opts} \\ -p ${task.cpus} \\ - -x ${bt2_index} \\ + -x ${index}/${bwt2_base} \\ --un ${prefix}_unmap.fastq \\ -U ${reads} | samtools view -F 4 -bS - > ${prefix}.bam """ @@ -417,7 +428,7 @@ process bowtie2_on_trimmed_reads { tag "$prefix" input: set val(prefix), file(reads) from trimmed_reads - val bt2_index from bwt2_index + file index from bwt2_index_trim output: set val(prefix), file("${prefix}_trimmed.bam") into trimmed_bam @@ -428,7 +439,7 @@ process bowtie2_on_trimmed_reads { bowtie2 --rg-id BMG --rg SM:${prefix} \\ ${params.bwt2_opts_trimmed} \\ -p ${task.cpus} \\ - -x ${bt2_index} \\ + -x ${index}/${bwt2_base} \\ -U ${reads} | samtools view -bS - > ${prefix}_trimmed.bam """ } @@ -516,7 +527,7 @@ process build_contact_maps{ tag "$sample - $mres" input: set val(sample), file(vpairs), val(mres) from valid_pairs.combine(map_res) - val chrsize from chr_size + val chrsize from chromosome_size output: file("*.matrix") into raw_maps @@ -557,7 +568,7 @@ process generate_cool{ tag "$sample" input: set val(sample), file(vpairs) from valid_pairs_4cool - val chrsize from chr_size + val chrsize from chromosome_size output: file("*mcool") into cool_maps -- GitLab