From b6467be237119077e798d26dd38e4da154173c9b Mon Sep 17 00:00:00 2001
From: Xavier Grand <xavier.grand@ens-lyon.fr>
Date: Thu, 25 Nov 2021 07:43:16 +0100
Subject: [PATCH] Demmarage modif pour distance exon-pics FasterDB avec
 bedtools

---
 src/CTCF2.yml                   |  3 +++
 src/MYCN.yml                    |  3 +++
 src/chipster.nf                 | 15 +++++++++++++--
 src/nf_modules/bedtools/main.nf | 23 +++++++++++++++++++++++
 4 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/src/CTCF2.yml b/src/CTCF2.yml
index 0a24500b..857deb6c 100644
--- a/src/CTCF2.yml
+++ b/src/CTCF2.yml
@@ -25,6 +25,9 @@ idx: "/home/adminxavier/CTCF/Ref/genomeindex/"
 # output folder under results/ directory
 project: "CTCF_peak_calling_input_Genome_part2"
 
+# FasterDB exons bed file
+exons: "/home/adminxavier/CTCF/ChIPster/data/exon_sorted.bed"
+
 input:
   
   row1:
diff --git a/src/MYCN.yml b/src/MYCN.yml
index d1f4898a..495874e0 100644
--- a/src/MYCN.yml
+++ b/src/MYCN.yml
@@ -25,6 +25,9 @@ idx: "/home/adminxavier/CTCF/Ref/genomeindex/"
 # output folder under results/ directory
 project: "SRA293647_MYCN_BE2"
 
+# FasterDB exons bed file
+exons: "/home/adminxavier/CTCF/ChIPster/data/exon_sorted.bed"
+
 input:
   
   row1:
diff --git a/src/chipster.nf b/src/chipster.nf
index db90dd91..052ae129 100755
--- a/src/chipster.nf
+++ b/src/chipster.nf
@@ -17,7 +17,8 @@ params.peak_calling_out = "$params.project/Peak_calling/"
 params.bam_to_bed_out = "$params.project/Bed/"
 params.bed_slop_out = "$params.project/Bed_sloped/"
 params.bedGraph_out = "$params.project/BedGraph/"
-params.chipseq_bam2BW_out = "$params.project/chipseq_BigWig"
+params.chipseq_bam2BW_out = "$params.project/chipseq_BigWig/"
+params.nearestExonDist_out = "$params.project/nearest_Exon_From_Peak/"
 
 /*
  ****************************************************************
@@ -28,6 +29,7 @@ params.chipseq_bam2BW_out = "$params.project/chipseq_BigWig"
 log.info "fastq folder : ${params.fastq_folder}"
 log.info "genome file : ${params.genome}"
 log.info "genome sizes : ${params.chrom_sizes}"
+log.info "fasterDB exons bed file : ${params.exons}"
 
 /*
  ****************************************************************
@@ -72,6 +74,11 @@ Channel
   .map{it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]}
   .set{ genome_sizes }
 
+Channel
+  .fromPath( params.exons )
+  .ifEmpty { error "Cannot find any files matching: ${params.exons }" }
+  .set { exons }
+
 /*
  ****************************************************************
                           Imports
@@ -90,6 +97,7 @@ include { sort_bam_chipster } from "./nf_modules/samtools/main.nf"
 include { index_bam_chipster } from "./nf_modules/samtools/main.nf"
 include { chipseq_bam2BW_chipster } from "./nf_modules/deeptools/main.nf"
 include { peak_calling } from "./nf_modules/macs3/main.nf"
+include { nearestExon_To_Peak } from "./nf_modules/bedtools/main.nf"
 
 /*
  ****************************************************************
@@ -162,6 +170,9 @@ workflow {
   index_bam_chipster.out.bam_idx.groupTuple(by: 3).set { combined_bams }
   combined_bams.map { it -> if(it[4][0] == 'IP') { [it[3], it[1][0], it[1][1]] } else {[ it[3], it[1][1], it[1][0]]} }.set { peak_calling_channel_in }
 
-  // peak calling using MACS3 Prend des bed ou des bam en entrée...
+  // peak calling using MACS3, bed or bam files as input...
   peak_calling(peak_calling_channel_in)
+
+  // Nearest fasterDB exons detection and distance calculation.
+  // nearestExon_To_Peak(peak_calling.out.bed, )
 }
\ No newline at end of file
diff --git a/src/nf_modules/bedtools/main.nf b/src/nf_modules/bedtools/main.nf
index bc017708..18ac4b7e 100644
--- a/src/nf_modules/bedtools/main.nf
+++ b/src/nf_modules/bedtools/main.nf
@@ -194,4 +194,27 @@ bedtools genomecov -bg\
   -g ${chromsizes} \
   > ${bed.simpleName}.bg
 """
+}
+
+params.nearestExonDist = ""
+params.nearestExonDist_out = ""
+process nearestExon_To_Peak {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${bed_id}"
+  if (params.nearestExonDist_out != "") {
+    publishDir "results/${params.nearestExonDist_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(bed_id), path(bed), val(condition), val(type)
+    path( exons )
+
+  output:
+    tuple val(bed_id), path("*_nearestExon.bed"), val(condition), val(type), emit: bed_exon
+  
+  script:
+"""
+bedtools closest -d -a ${bed} -b ${db} > ${bed_id}_nearestExon.bed
+"""
 }
\ No newline at end of file
-- 
GitLab