From f643b343b1e0d9d2f12a17ace87c5ec1e3d34573 Mon Sep 17 00:00:00 2001
From: Laurent Modolo <laurent.modolo@ens-lyon.fr>
Date: Tue, 28 Mar 2023 16:15:54 +0200
Subject: [PATCH] add --intermediate-bigwig option

---
 Cargo.lock    |  2 +-
 src/bed.rs    | 85 ++++++++++++++++++++++++++++++++++++++++++++++++---
 src/main.rs   | 12 ++++++++
 src/pileup.rs |  5 +++
 src/stats.rs  | 57 +++++++++++++++++++++++++++++++---
 5 files changed, 151 insertions(+), 10 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 8332ca1..4efb1cf 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -65,7 +65,7 @@ dependencies = [
 
 [[package]]
 name = "bamcalib"
-version = "0.1.1"
+version = "0.1.2"
 dependencies = [
  "bam",
  "bigtools",
diff --git a/src/bed.rs b/src/bed.rs
index fa12b61..afd1275 100644
--- a/src/bed.rs
+++ b/src/bed.rs
@@ -5,6 +5,7 @@
 use crate::stats;
 use bigtools;
 use std::collections::HashMap;
+use std::path::PathBuf;
 
 /// BedGraphLine store information on a bedgraph line
 #[derive(Debug, Clone)]
@@ -62,7 +63,7 @@ impl BedGraphLine {
     }
 }
 
-/// BedGraph store information necessary to generate a normalized bed file
+// BedGraph store information necessary to generate a normalized bed file
 /// It use bio::Record to store the current output bed line
 /// direct value to store the chr id and current value in numeric format
 /// or_stats to store everything to compute the normalization from bam files
@@ -130,6 +131,13 @@ impl BedGraph {
             None => self.or_stats.next(),
         }
     }
+
+    pub fn reset(&mut self) {
+        self.or_stats.reset();
+        self.future_bedline = None;
+        self.current_chr_id = 0;
+        self.current_value = 0.0;
+    }
 }
 
 impl Iterator for BedGraph {
@@ -168,6 +176,7 @@ impl Iterator for BedGraph {
 }
 
 /// Structure to make an interface with the bigtools crate
+#[derive(Debug, Clone)]
 pub struct BedGraphWrapper {
     bed: BedGraph,
     chr_name: String,
@@ -203,7 +212,52 @@ impl BedGraphWrapper {
         no_bits: u16,
         file_ip: &str,
         file_wce: &str,
+        fragment_estimation: bool,
+        intermediate_bigwig: bool,
     ) {
+        let (mut bedwrapper, chrom_map_wce) = BedGraphWrapper::write_bigwig_ip(
+            bam_ip,
+            bam_wce,
+            prefix,
+            scaling,
+            thread_local,
+            no_bits,
+            file_ip,
+            fragment_estimation,
+        );
+        BedGraphWrapper::write_bigwig_wce(file_wce, chrom_map_wce, thread_local);
+
+        if intermediate_bigwig {
+            bedwrapper.bed.reset();
+            bedwrapper.bed.or_stats.intermediate_bigwig(true, true);
+            let mut path = PathBuf::from(file_ip);
+            path.set_extension("ip.bigwig");
+            BedGraphWrapper::write_bigwig_old_norm(
+                bedwrapper.clone(),
+                &path.to_str().unwrap(),
+                thread_local,
+            );
+            bedwrapper.bed.reset();
+            bedwrapper.bed.or_stats.intermediate_bigwig(true, false);
+            path.set_extension("wce.bigwig");
+            BedGraphWrapper::write_bigwig_old_norm(
+                bedwrapper,
+                &path.to_str().unwrap(),
+                thread_local,
+            );
+        }
+    }
+
+    fn write_bigwig_ip(
+        bam_ip: &str,
+        bam_wce: &str,
+        prefix: &str,
+        scaling: f64,
+        thread_local: u16,
+        no_bits: u16,
+        file: &str,
+        fragment_estimation: bool,
+    ) -> (BedGraphWrapper, HashMap<String, u32>) {
         let pool = futures::executor::ThreadPoolBuilder::new()
             .pool_size(thread_local as usize)
             .create()
@@ -211,26 +265,32 @@ impl BedGraphWrapper {
         let mut bed = BedGraph::new();
         bed.init(&bam_ip, &bam_wce, &prefix, scaling, thread_local, no_bits);
         let bedwrapper = BedGraphWrapper::new(bed);
+        let bedwrapper_return = bedwrapper.clone();
 
+        // write ratio IP
         let chrom_map = bedwrapper.bed.or_stats.get_chr().to_hashmap_filtered(false);
         let chrom_map_wce = bedwrapper.bed.or_stats.get_chr().to_hashmap_filtered(false);
-        let bed_wce = EmptyBed::new(&chrom_map);
 
         let vals_iter = bigtools::bed::bedparser::BedParser::new(bedwrapper);
-        let writer = bigtools::bigwig::BigWigWrite::create_file(String::from(file_ip));
+        let writer = bigtools::bigwig::BigWigWrite::create_file(String::from(file));
         let chsi = bigtools::bed::bedparser::BedParserStreamingIterator::new(
             vals_iter,
             chrom_map.clone(),
             true,
         );
         writer.write(chrom_map, chsi, pool).unwrap();
+        return (bedwrapper_return, chrom_map_wce);
+    }
 
+    fn write_bigwig_wce(file: &str, chrom_map_wce: HashMap<String, u32>, thread_local: u16) {
+        // write ratio WCE
         let pool = futures::executor::ThreadPoolBuilder::new()
             .pool_size(thread_local as usize)
             .create()
             .expect("Unable to create thread pool.");
+        let bed_wce = EmptyBed::new(&chrom_map_wce);
         let vals_iter = bigtools::bed::bedparser::BedParser::new(bed_wce);
-        let writer = bigtools::bigwig::BigWigWrite::create_file(String::from(file_wce));
+        let writer = bigtools::bigwig::BigWigWrite::create_file(String::from(file));
         let chsi = bigtools::bed::bedparser::BedParserStreamingIterator::new(
             vals_iter,
             chrom_map_wce.clone(),
@@ -238,6 +298,23 @@ impl BedGraphWrapper {
         );
         writer.write(chrom_map_wce, chsi, pool).unwrap();
     }
+
+    fn write_bigwig_old_norm(bedwrapper: BedGraphWrapper, file: &str, thread_local: u16) {
+        let pool = futures::executor::ThreadPoolBuilder::new()
+            .pool_size(thread_local as usize)
+            .create()
+            .expect("Unable to create thread pool.");
+        let chrom_map = bedwrapper.bed.or_stats.get_chr().to_hashmap_filtered(false);
+
+        let vals_iter = bigtools::bed::bedparser::BedParser::new(bedwrapper);
+        let writer = bigtools::bigwig::BigWigWrite::create_file(String::from(file));
+        let chsi = bigtools::bed::bedparser::BedParserStreamingIterator::new(
+            vals_iter,
+            chrom_map.clone(),
+            true,
+        );
+        writer.write(chrom_map, chsi, pool).unwrap();
+    }
 }
 
 impl bigtools::bed::bedparser::StreamingBedValues for BedGraphWrapper {
diff --git a/src/main.rs b/src/main.rs
index 7a8ebfd..82bf329 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -47,6 +47,14 @@ struct Args {
     /// number of threads to parse bam files
     #[arg(short, long, default_value_t = 1)]
     thread: u16,
+
+    /// compute coverage from read information instead of fragment estimation
+    #[arg(long)]
+    no_fragment_estimation: bool,
+
+    /// output normIP and normWCE bigwig files (not just the ratio of the two)
+    #[arg(long)]
+    intermediate_bigwig: bool,
 }
 
 fn main() {
@@ -57,6 +65,8 @@ fn main() {
     let bigwig_wce = matches.bigwig_wce;
     let cal_prefix = matches.cal_prefix;
     let scaling = matches.scaling;
+    let fragment_estimation = !matches.no_fragment_estimation;
+    let intermediate_bigwig = matches.intermediate_bigwig;
     let mut thread: u16 = matches.thread;
     if thread == 0 {
         thread = 1;
@@ -71,5 +81,7 @@ fn main() {
         no_bits,
         &bigwig_ip,
         &bigwig_wce,
+        fragment_estimation,
+        intermediate_bigwig,
     );
 }
diff --git a/src/pileup.rs b/src/pileup.rs
index 31d7d2b..305442a 100644
--- a/src/pileup.rs
+++ b/src/pileup.rs
@@ -228,6 +228,11 @@ impl Depth {
             None => return None,
         }
     }
+
+    pub fn reset(&mut self) {
+        self.chr_id = 0;
+        self.pos = 0;
+    }
 }
 
 impl Iterator for Depth {
diff --git a/src/stats.rs b/src/stats.rs
index 4b08093..a845f69 100644
--- a/src/stats.rs
+++ b/src/stats.rs
@@ -62,6 +62,8 @@ pub struct ORStats {
     wce_depth: pileup::Depth,
     pos_chr_id: u32,
     pos_chr_pos: u32,
+    intermediate_bigwig: bool,
+    intermediate_bigwig_ip: bool,
 }
 
 impl ORStats {
@@ -76,6 +78,8 @@ impl ORStats {
             wce_depth: pileup::Depth::new(),
             pos_chr_id: 0,
             pos_chr_pos: 0,
+            intermediate_bigwig: false,
+            intermediate_bigwig_ip: true,
         }
     }
 
@@ -214,6 +218,47 @@ impl ORStats {
     pub fn get_alpha(&self) -> f64 {
         return self.alpha;
     }
+
+    fn norm_ip(&self, pos_chr_id: u32, pos_chr_pos: u32, ip_depth: u32) -> Option<(u32, u32, f64)> {
+        Some((pos_chr_id, pos_chr_pos, self.or * (ip_depth as f64)))
+    }
+
+    fn norm_wce(
+        &self,
+        pos_chr_id: u32,
+        pos_chr_pos: u32,
+        wce_depth: u32,
+    ) -> Option<(u32, u32, f64)> {
+        Some((
+            pos_chr_id,
+            pos_chr_pos,
+            self.alpha * (wce_depth as f64) + 1.0,
+        ))
+    }
+
+    fn ratio_norm(
+        &self,
+        pos_chr_id: u32,
+        pos_chr_pos: u32,
+        ip_depth: u32,
+        wce_depth: u32,
+    ) -> Option<(u32, u32, f64)> {
+        Some((
+            pos_chr_id,
+            pos_chr_pos,
+            self.or * (ip_depth as f64) / (self.alpha * (wce_depth as f64) + 1.0),
+        ))
+    }
+
+    pub fn reset(&mut self) {
+        self.ip_depth.reset();
+        self.wce_depth.reset();
+    }
+
+    pub fn intermediate_bigwig(&mut self, activate: bool, ip: bool) {
+        self.intermediate_bigwig = activate;
+        self.intermediate_bigwig_ip = ip;
+    }
 }
 
 impl Iterator for ORStats {
@@ -240,11 +285,13 @@ impl Iterator for ORStats {
                 Some(pos) => pos,
                 None => return None,
             };
-        return Some((
-            pos_chr_id,
-            pos_chr_pos,
-            self.or * (ip_depth as f64) / (self.alpha * (wce_depth as f64) + 1.0),
-        ));
+        if self.intermediate_bigwig && self.intermediate_bigwig_ip {
+            return self.norm_ip(pos_chr_id, pos_chr_pos, ip_depth);
+        }
+        if self.intermediate_bigwig && !self.intermediate_bigwig_ip {
+            return self.norm_wce(pos_chr_id, pos_chr_pos, wce_depth);
+        }
+        return self.ratio_norm(pos_chr_id, pos_chr_pos, ip_depth, wce_depth);
     }
 }
 
-- 
GitLab