From a2b8cde90f451e49376c9978a0c078d18d1350d0 Mon Sep 17 00:00:00 2001
From: Laurent Modolo <laurent.modolo@ens-lyon.fr>
Date: Tue, 21 Mar 2023 15:19:34 +0100
Subject: [PATCH] v1.0.3: fix OR computation and update README formula

---
 Cargo.lock   | 2 +-
 Cargo.toml   | 2 +-
 README.md    | 6 +++---
 src/main.rs  | 2 +-
 src/stats.rs | 8 +++-----
 5 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 8332ca1..9803dee 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -65,7 +65,7 @@ dependencies = [
 
 [[package]]
 name = "bamcalib"
-version = "0.1.1"
+version = "0.1.3"
 dependencies = [
  "bam",
  "bigtools",
diff --git a/Cargo.toml b/Cargo.toml
index 6479255..1650804 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,7 +4,7 @@
 
 [package]
 name = "bamcalib"
-version = "0.1.2"
+version = "0.1.3"
 edition = "2021"
 description = "A command line tools to compute normalized bigwig from calibrated bam of a Chip-Seq experiment"
 license = "AGPL-3.0-or-later"
diff --git a/README.md b/README.md
index 4e0498b..5ef8931 100644
--- a/README.md
+++ b/README.md
@@ -73,7 +73,7 @@ The `bamcalib` expect the following parameters:
 - $WCE_{c}\left(t\right)$ the coverage at position $t$ in the WCE sample on the calibration genome
 
 ```math
-\text{norm}IP\left(t\right) = IP_{x}\left(t\right) \frac{\frac{1}{T_c}\sum_{t^{'}=1}^{T_c}WCE_{c}\left(t^{'}\right)}{\frac{1}{T_c}\sum_{t^{'}=1}^{T_c}IP_{c}\left(t^{'}\right)\frac{1}{T_x}\sum_{t^{'}=1}^{T_x}WCE_{x}\left(t^{'}\right)}
+\text{norm}IP\left(t\right) = IP_{x}\left(t\right) \frac{\beta \frac{1}{T_c}\sum_{t^{'}=1}^{T_c}WCE_{c}\left(t^{'}\right)}{\frac{1}{T_c}\sum_{t^{'}=1}^{T_c}IP_{c}\left(t^{'}\right)\frac{1}{T_x}\sum_{t^{'}=1}^{T_x}WCE_{x}\left(t^{'}\right)}
 ```
 
 In a reference genome of size $`T_x`$ (ignoring the chromosome segmentation), and in a calibration genome of size $`T_c`$.
@@ -99,10 +99,10 @@ E\left(\text{norm}IP\left(t\right)\right)= E\left(\frac{\text{norm}IP\left(t\rig
 which gives
 
 ```math
-\alpha = \frac{\sum_{t^{'}=1}^{T_c}WCE_{c}\left(t^{'}\right)}{10^9} \frac{\sum_{t=1}^{T_x}\frac{IP_{x}\left(t\right)}{WCE_{x}\left(t\right)}}{\sum_{t=1}^{T_x}IP_{x}\left(t\right)}
+\text{norm}WCE\left(t\right) = WCE_{x}\left(t\right) \sum_{t=1}^{T_x}\frac{IP_{x}\left(t\right)}{WCE_{x}\left(t\right)} \frac{1}{\sum_{t=1}^{T_x}IP_{x}\left(t\right)}
 ```
 
-Not that all the computation are scaled by the genome size and not at the read number as in [Hu et al.](https://doi.org/10.1093/nar/gkv670), this is also why we add a scaling factor (default to $10^3$).
+Not that all the computation are scaled by the genome size and not at the read number as in [Hu et al.](https://doi.org/10.1093/nar/gkv670), this is also why we add a scaling factor $\beta$ (default to $10^3$).
 The $\text{ratio}IP\left(t\right)$ is multiplied by this scaling factor.
 
 With this method, we retain the interesting properties of [Hu et al.](https://doi.org/10.1093/nar/gkv670) normalization on the average read density between samples (i.e., we can compare two different samples in a quantitative way) and we account for the local bias of read density observed in the WCE samples (differential chromatin accessibility, repetition, low mappability region, etc.).
diff --git a/src/main.rs b/src/main.rs
index 7a8ebfd..8a78cf6 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -13,7 +13,7 @@ use clap::Parser;
 #[derive(Parser, Debug)]
 #[command(name = "bamcalib")]
 #[command(author = "Laurent Modolo <laurent.modolo@ens-lyon.fr>")]
-#[command(version = "1.0")]
+#[command(version = "1.0.3")]
 #[command(about = "Compute calibrated density from calibration mapping data", long_about = None)]
 struct Args {
     /// sorted bam file for the IP data
diff --git a/src/stats.rs b/src/stats.rs
index 4b08093..0b3d153 100644
--- a/src/stats.rs
+++ b/src/stats.rs
@@ -166,7 +166,7 @@ impl ORStats {
         );
     }
 
-    /// Function to compute the OR from the results of the compute_depth() function
+    /// Function to compute the scaling factor for the IP data
     pub fn compute_or(&mut self, scaling: f64) {
         let ref_size = self.ip.chromosomes.get_genomesize(true) as f64;
         self.or = scaling * (self.wce.calibration.read_count as f64)
@@ -174,7 +174,7 @@ impl ORStats {
                 * (self.wce.reference.read_count as f64 / ref_size));
     }
 
-    /// Function to compute the alpha from the results of the compute_depth() function
+    /// Function to compute the scaling factor for the WCE data
     pub fn compute_alpha(&mut self) {
         self.alpha = 0.0;
         let mut chr_id;
@@ -200,9 +200,7 @@ impl ORStats {
                 self.alpha += (ip_depth as f64) / (wce_depth as f64);
             }
         }
-        let calib_size = self.ip.chromosomes.get_genomesize(false) as f64;
-        self.alpha = (self.alpha / (self.ip.reference.read_count as f64))
-            * (self.wce.calibration.read_count as f64 / calib_size);
+        self.alpha = self.alpha / (self.ip.reference.read_count as f64);
     }
 
     /// Return the OR value to normalize the IP coverage
-- 
GitLab