From 956fd73f32d7418e62e012113770fb69b9d30456 Mon Sep 17 00:00:00 2001 From: xgrand <xavier.grand@ens-lyon.fr> Date: Mon, 6 Sep 2021 18:12:21 +0200 Subject: [PATCH] =?UTF-8?q?modif=20chipster.nf=20&=20bedtools/main.nf=20aj?= =?UTF-8?q?out=20des=20fonction=20de=20cr=C3=A9ation=20et=20sloping=20des?= =?UTF-8?q?=20bed=20files.=20Il=20manque=20encore=20la=20transformation=20?= =?UTF-8?q?des=20bed=20en=20BigWig.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chipster.nf | 35 +++++++++++++++++++---- src/nf_modules/bedtools/main.nf | 50 +++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 6 deletions(-) diff --git a/src/chipster.nf b/src/chipster.nf index 903711d2..61cf3041 100755 --- a/src/chipster.nf +++ b/src/chipster.nf @@ -13,24 +13,30 @@ nextflow.enable.dsl=2 **************************************************************** */ -params.paired_end = true +params.paired_end = false /* false for single end data, true for paired-end data @type: Boolean */ -params.fastq = "data/tinyTestDataset/fastq/*_{1,2}.fastq" +params.fastq = "./data/tiny-delta-te-dataset/fastq_rnaseq/*.gz" /* Fastq files @type: Files */ -params.genome = "data/tinyTestDataset/reference.fasta" +params.genome = "./data/tiny-delta-te-dataset/synth.fasta" /* A genome file @type: File */ +params.chrom_sizes = "./data/tiny-delta-te-dataset/chrom.sizes" +/* samtools generated genome.sizes file: samtools faidx synth.fasta and cut -f 1,2 synth.fasta.fai > chrom.sizes + +@type: File +*/ + // params.idx = "" /* already indexed reference genome ? enter path... @@ -50,6 +56,8 @@ params.sort_bam_out = "$params.project/Bam_filtered_sorted/" params.index_bam_out = "$params.project/Bam_filt_sort_indexed/" params.bam_to_bigwig_out = "$params.project/BigWig/" params.peak_calling_bg_out = "$params.project/Peak_calling/" +params.bam_to_bed_out = "$params.project/Bed/" +params.bed_slop_out = "$params.project/Bed_sloped/" /* **************************************************************** @@ -59,6 +67,7 @@ params.peak_calling_bg_out = "$params.project/Peak_calling/" log.info "fastq files : ${params.fastq}" log.info "genome file : ${params.genome}" +log.info "genome sizes : ${params.chrom_sizes}" /* log.info "indexed genome file : ${params.idxgenome}" */ /* @@ -99,7 +108,7 @@ Channel .fromPath( params.genome ) .ifEmpty { error "Cannot find any files matching: ${params.genome}" } .map{it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} - .set { genome_file } + .set { genome_file } /* Channel // IP & CTRL names @@ -116,6 +125,12 @@ Channel .set { genome_idx } */ +Channel + .fromPath( params.chrom_sizes ) + .ifEmpty { error "Cannot find any files matching: ${params.chrom_sizes}" } + .map{it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} + .set{ genome_sizes } + /* **************************************************************** Imports @@ -134,6 +149,8 @@ include { sort_bam } from "./nf_modules/samtools/main.nf" include { index_bam } from "./nf_modules/samtools/main.nf" include { bam_to_bigwig } from "./nf_modules/deeptools/main.nf" include { peak_calling_bg } from "./nf_modules/macs3/main.nf" +include { bam_to_bed } from "./nf_modules/bedtools/main.nf" +include { bed_slop } from "./nf_modules/bedtools/main.nf" /* **************************************************************** @@ -179,10 +196,16 @@ workflow { sort_bam(filter_bam_quality.out.bam) // samtools_index - index_bam(sort_bam.out.bam) + // index_bam(sort_bam.out.bam.collect()) // Create a bigwig file - bam_to_bigwig(index_bam.out.bam_idx) + // bam_to_bigwig(index_bam.out.bam_idx) + + // From Bam to Bed + bam_to_bed(sort_bam.out.bam) + + // Extension of reads with bedtools slop + bed_slop(bam_to_bed.out.bed, genome_sizes.collect()) // peak calling using MACS3 Prend des bed ou des bam en entrée... // peak_calling_bg() diff --git a/src/nf_modules/bedtools/main.nf b/src/nf_modules/bedtools/main.nf index 9400abf4..6826615f 100644 --- a/src/nf_modules/bedtools/main.nf +++ b/src/nf_modules/bedtools/main.nf @@ -119,3 +119,53 @@ bedtools genomecov \ -bg > ${bam.simpleName}.bg """ } + +params.bam_to_bed = "" +params.bam_to_bed_out = "" +process bam_to_bed { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${bam_id}" + if (params.bam_to_bed_out != "") { + publishDir "results/${params.bam_to_bed_out}", mode: 'copy' + } + + input: + tuple val(bam_id), path(bam) + + output: + tuple val(bam_id), path("*.bed"), emit: bed + + script: +""" +bedtools bamtobed \ + -i ${bam} \ + > ${bam.simpleName}.bed +""" +} + +params.bed_slop = "" +params.bed_slop_out = "" +process bed_slop { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "${bed_id}" + if (params.bed_slop_out != "") { + publishDir "results/${params.bed_slop_out}", mode: 'copy' + } + + input: + tuple val(bed_id), path(bed) + tuple val(file_id), path(chromsizes) + + output: + tuple val(bed_id), path("*_sloped.bed"), emit: bed_sloped + + script: +""" +bedtools slop -s -r 100 -l 0 \ + -i ${bed} \ + -g ${chromsizes} \ + > ${bed.simpleName}_sloped.bed +""" +} \ No newline at end of file -- GitLab