From f23458c1318927336cb76197da440fbc4c7eff3b Mon Sep 17 00:00:00 2001
From: Laurent Modolo <laurent.modolo@ens-lyon.fr>
Date: Fri, 10 Mar 2023 16:11:09 +0100
Subject: [PATCH] compte mean read count instead of read count

---
 src/pileup.rs | 17 +++++++++++++----
 src/stats.rs  | 18 +++++++++---------
 2 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/src/pileup.rs b/src/pileup.rs
index 31d7d2b..2243e7c 100644
--- a/src/pileup.rs
+++ b/src/pileup.rs
@@ -2,9 +2,9 @@
 //
 // SPDX-License-Identifier: AGPL-3.0-or-later
 
-use crate::multimapping::MultiMappingReads;
-use crate::utils::{open_bam, Chromosomes};
+use create::multimapping::MultiMappingReads;
 use bam;
+use create::utils::{open_bam, Chromosomes};
 use statrs::distribution::{ContinuousCDF, Normal};
 use std::fs::File;
 use std::rc::Rc;
@@ -269,7 +269,7 @@ pub fn compute_depth(
     prefix: &str,
     thread_local: u16,
     no_bits: u16,
-) -> (Depth, u64, u64, Chromosomes) {
+) -> (Depth, f64, f64, Chromosomes) {
     let (multimap, chr, fragment_len) =
         MultiMappingReads::init(file, prefix, thread_local, no_bits);
     let mut reader = open_bam(file, thread_local);
@@ -278,6 +278,8 @@ pub fn compute_depth(
     depth.init(&chr);
     let mut ref_count: u64 = 0;
     let mut calib_count: u64 = 0;
+    let mut ref_size: u64 = 0;
+    let mut calib_size: u64 = 0;
     let mut ref_depth = ColumnDepth::new(fragment_len);
     let mut calib_depth = ColumnDepth::new(fragment_len);
     for column in pileup {
@@ -287,8 +289,10 @@ pub fn compute_depth(
                 column.ref_pos(),
                 ref_depth.depth(&column) as u64,
             );
+            ref_size += 1;
         } else {
             calib_count += calib_depth.depth(&column) as u64;
+            calib_size += 1;
         }
     }
     if ref_count == 0 {
@@ -297,5 +301,10 @@ pub fn compute_depth(
     if calib_count == 0 {
         panic!("no read detected in the calibration genome (maybe the no_bits is filtering too stringent)")
     }
-    return (depth, ref_count, calib_count, chr);
+    return (
+        depth,
+        (ref_count as f64 / ref_size as f64),
+        (calib_count as f64 / calib_size as f64),
+        chr,
+    );
 }
diff --git a/src/stats.rs b/src/stats.rs
index 953e6e3..5196281 100644
--- a/src/stats.rs
+++ b/src/stats.rs
@@ -2,21 +2,21 @@
 //
 // SPDX-License-Identifier: AGPL-3.0-or-later
 
-use crate::pileup;
-use crate::utils::Chromosomes;
+use create::pileup;
+use create::utils::Chromosomes;
 use std::sync::mpsc;
 use std::thread;
 
 /// Store genome statistics necessary for normalization
 #[derive(Debug, Copy, Clone)]
 struct GenomeStats {
-    read_count: u64,
+    read_count: f64,
 }
 
 impl GenomeStats {
     /// Create a new GenomeStats with read_count set to 0
     pub fn new() -> GenomeStats {
-        GenomeStats { read_count: 0 }
+        GenomeStats { read_count: 0.0 }
     }
 }
 
@@ -93,7 +93,7 @@ impl ORStats {
         self.compute_depth(bam_ip, bam_wce, prefix, thread_local, no_bits);
         (self.pos_chr_id, self.pos_chr_pos) = match self.ip.chromosomes.get_start_pos(true) {
             Some(pos) => pos,
-            None => panic!("init(): no valide position in the reference genome"),
+            None => panic!("init(): no valid position in the reference genome"),
         };
         self.compute_or();
         self.compute_alpha();
@@ -167,7 +167,7 @@ impl ORStats {
 
     /// Function to compute the OR from the results of the compute_depth() function
     pub fn compute_or(&mut self) {
-        self.or = 1.0e9 * (self.wce.calibration.read_count as f64)
+        self.or = (self.wce.calibration.read_count as f64)
             / ((self.ip.calibration.read_count as f64) * (self.wce.reference.read_count as f64));
     }
 
@@ -178,7 +178,7 @@ impl ORStats {
         let mut chr_pos;
         (chr_id, chr_pos) = match self.ip.chromosomes.get_start_pos(true) {
             Some(pos) => pos,
-            None => panic!("compute_alpha(): no valide position in the reference genome"),
+            None => panic!("compute_alpha(): no valid position in the reference genome"),
         };
         loop {
             let ip_depth = match self.ip_depth.get(chr_id, chr_pos) {
@@ -198,7 +198,7 @@ impl ORStats {
             }
         }
         self.alpha = (self.alpha / (self.ip.reference.read_count as f64))
-            * ((self.wce.calibration.read_count as f64) / 1.0e9);
+            * (self.wce.calibration.read_count as f64);
     }
 
     /// Return the OR value to normalize the IP coverage
@@ -246,7 +246,7 @@ impl Iterator for ORStats {
 
 #[cfg(test)]
 mod tests {
-    use crate::stats::*;
+    use create::stats::*;
 
     #[test]
     fn test_chromosomes() {
-- 
GitLab