diff --git a/src/chipster.nf b/src/chipster.nf index 903711d25789a1a2f763f91ab82bc83aa9cac54e..61cf30411b0bdb7ec0dcc50b4d6a20de0d0e2fb8 100755 --- a/src/chipster.nf +++ b/src/chipster.nf @@ -13,24 +13,30 @@ nextflow.enable.dsl=2 **************************************************************** */ -params.paired_end = true +params.paired_end = false /* false for single end data, true for paired-end data @type: Boolean */ -params.fastq = "data/tinyTestDataset/fastq/*_{1,2}.fastq" +params.fastq = "./data/tiny-delta-te-dataset/fastq_rnaseq/*.gz" /* Fastq files @type: Files */ -params.genome = "data/tinyTestDataset/reference.fasta" +params.genome = "./data/tiny-delta-te-dataset/synth.fasta" /* A genome file @type: File */ +params.chrom_sizes = "./data/tiny-delta-te-dataset/chrom.sizes" +/* samtools generated genome.sizes file: samtools faidx synth.fasta and cut -f 1,2 synth.fasta.fai > chrom.sizes + +@type: File +*/ + // params.idx = "" /* already indexed reference genome ? enter path... @@ -50,6 +56,8 @@ params.sort_bam_out = "$params.project/Bam_filtered_sorted/" params.index_bam_out = "$params.project/Bam_filt_sort_indexed/" params.bam_to_bigwig_out = "$params.project/BigWig/" params.peak_calling_bg_out = "$params.project/Peak_calling/" +params.bam_to_bed_out = "$params.project/Bed/" +params.bed_slop_out = "$params.project/Bed_sloped/" /* **************************************************************** @@ -59,6 +67,7 @@ params.peak_calling_bg_out = "$params.project/Peak_calling/" log.info "fastq files : ${params.fastq}" log.info "genome file : ${params.genome}" +log.info "genome sizes : ${params.chrom_sizes}" /* log.info "indexed genome file : ${params.idxgenome}" */ /* @@ -99,7 +108,7 @@ Channel .fromPath( params.genome ) .ifEmpty { error "Cannot find any files matching: ${params.genome}" } .map{it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} - .set { genome_file } + .set { genome_file } /* Channel // IP & CTRL names @@ -116,6 +125,12 @@ Channel .set { genome_idx } */ +Channel + .fromPath( params.chrom_sizes ) + .ifEmpty { error "Cannot find any files matching: ${params.chrom_sizes}" } + .map{it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} + .set{ genome_sizes } + /* **************************************************************** Imports @@ -134,6 +149,8 @@ include { sort_bam } from "./nf_modules/samtools/main.nf" include { index_bam } from "./nf_modules/samtools/main.nf" include { bam_to_bigwig } from "./nf_modules/deeptools/main.nf" include { peak_calling_bg } from "./nf_modules/macs3/main.nf" +include { bam_to_bed } from "./nf_modules/bedtools/main.nf" +include { bed_slop } from "./nf_modules/bedtools/main.nf" /* **************************************************************** @@ -179,10 +196,16 @@ workflow { sort_bam(filter_bam_quality.out.bam) // samtools_index - index_bam(sort_bam.out.bam) + // index_bam(sort_bam.out.bam.collect()) // Create a bigwig file - bam_to_bigwig(index_bam.out.bam_idx) + // bam_to_bigwig(index_bam.out.bam_idx) + + // From Bam to Bed + bam_to_bed(sort_bam.out.bam) + + // Extension of reads with bedtools slop + bed_slop(bam_to_bed.out.bed, genome_sizes.collect()) // peak calling using MACS3 Prend des bed ou des bam en entrée... // peak_calling_bg() diff --git a/src/nf_modules/bedtools/main.nf b/src/nf_modules/bedtools/main.nf index 9400abf4e55bcf57f52e896b5f1d41e1a8fe8bfa..6826615fac6b1f8fbb096c17731f4ad30d84f179 100644 --- a/src/nf_modules/bedtools/main.nf +++ b/src/nf_modules/bedtools/main.nf @@ -119,3 +119,53 @@ bedtools genomecov \ -bg > ${bam.simpleName}.bg """ } + +params.bam_to_bed = "" +params.bam_to_bed_out = "" +process bam_to_bed { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${bam_id}" + if (params.bam_to_bed_out != "") { + publishDir "results/${params.bam_to_bed_out}", mode: 'copy' + } + + input: + tuple val(bam_id), path(bam) + + output: + tuple val(bam_id), path("*.bed"), emit: bed + + script: +""" +bedtools bamtobed \ + -i ${bam} \ + > ${bam.simpleName}.bed +""" +} + +params.bed_slop = "" +params.bed_slop_out = "" +process bed_slop { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${bed_id}" + if (params.bed_slop_out != "") { + publishDir "results/${params.bed_slop_out}", mode: 'copy' + } + + input: + tuple val(bed_id), path(bed) + tuple val(file_id), path(chromsizes) + + output: + tuple val(bed_id), path("*_sloped.bed"), emit: bed_sloped + + script: +""" +bedtools slop -s -r 100 -l 0 \ + -i ${bed} \ + -g ${chromsizes} \ + > ${bed.simpleName}_sloped.bed +""" +} \ No newline at end of file