v1.0.3: fix OR computation and update README formula

a2b8cde9 · Laurent Modolo · 8bacdb89 · a2b8cde9 · a2b8cde9 · a2b8cde9
Verified Commit a2b8cde9 authored Mar 21, 2023 by Laurent Modolo
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -65,7 +65,7 @@ dependencies = [
 [[package]]
 name = "bamcalib"
-version = "0.1.1"
+version = "0.1.3"
 dependencies = [
 "bam",
 "bigtools",

--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,7 +4,7 @@
 [package]
 name = "bamcalib"
-version = "0.1.2"
+version = "0.1.3"
 edition = "2021"
 description = "A command line tools to compute normalized bigwig from calibrated bam of a Chip-Seq experiment"
 license = "AGPL-3.0-or-later"

--- a/README.md
+++ b/README.md
@@ -73,7 +73,7 @@ The `bamcalib` expect the following parameters:
 - $WCE_{c}\left(t\right)$ the coverage at position $t$ in the WCE sample on the calibration genome
 ```math
-\text{norm}IP\left(t\right) = IP_{x}\left(t\right) \frac{\frac{1}{T_c}\sum_{t^{'}=1}^{T_c}WCE_{c}\left(t^{'}\right)}{\frac{1}{T_c}\sum_{t^{'}=1}^{T_c}IP_{c}\left(t^{'}\right)\frac{1}{T_x}\sum_{t^{'}=1}^{T_x}WCE_{x}\left(t^{'}\right)}
+\text{norm}IP\left(t\right) = IP_{x}\left(t\right) \frac{\beta \frac{1}{T_c}\sum_{t^{'}=1}^{T_c}WCE_{c}\left(t^{'}\right)}{\frac{1}{T_c}\sum_{t^{'}=1}^{T_c}IP_{c}\left(t^{'}\right)\frac{1}{T_x}\sum_{t^{'}=1}^{T_x}WCE_{x}\left(t^{'}\right)}
 ```
 In a reference genome of size $`T_x`$ (ignoring the chromosome segmentation), and in a calibration genome of size $`T_c`$.
@@ -99,10 +99,10 @@ E\left(\text{norm}IP\left(t\right)\right)= E\left(\frac{\text{norm}IP\left(t\rig
 which gives
 ```math
-\alpha = \frac{\sum_{t^{'}=1}^{T_c}WCE_{c}\left(t^{'}\right)}{10^9} \frac{\sum_{t=1}^{T_x}\frac{IP_{x}\left(t\right)}{WCE_{x}\left(t\right)}}{\sum_{t=1}^{T_x}IP_{x}\left(t\right)}
+\text{norm}WCE\left(t\right) = WCE_{x}\left(t\right) \sum_{t=1}^{T_x}\frac{IP_{x}\left(t\right)}{WCE_{x}\left(t\right)} \frac{1}{\sum_{t=1}^{T_x}IP_{x}\left(t\right)}
 ```
-Not that all the computation are scaled by the genome size and not at the read number as in [Hu et al.](https://doi.org/10.1093/nar/gkv670), this is also why we add a scaling factor (default to $10^3$).
+Not that all the computation are scaled by the genome size and not at the read number as in [Hu et al.](https://doi.org/10.1093/nar/gkv670), this is also why we add a scaling factor $\beta$ (default to $10^3$).
 The $\text{ratio}IP\left(t\right)$ is multiplied by this scaling factor.
 With this method, we retain the interesting properties of [Hu et al.](https://doi.org/10.1093/nar/gkv670) normalization on the average read density between samples (i.e., we can compare two different samples in a quantitative way) and we account for the local bias of read density observed in the WCE samples (differential chromatin accessibility, repetition, low mappability region, etc.).

--- a/src/main.rs
+++ b/src/main.rs
@@ -13,7 +13,7 @@ use clap::Parser;
 #[derive(Parser, Debug)]
 #[command(name = "bamcalib")]
 #[command(author = "Laurent Modolo <laurent.modolo@ens-lyon.fr>")]
-#[command(version = "1.0")]
+#[command(version = "1.0.3")]
 #[command(about = "Compute calibrated density from calibration mapping data", long_about = None)]
 struct Args {
    /// sorted bam file for the IP data

--- a/src/stats.rs
+++ b/src/stats.rs
@@ -166,7 +166,7 @@ impl ORStats {
        );
    }
-    /// Function to compute the OR from the results of the compute_depth() function
+    /// Function to compute the scaling factor for the IP data
    pub fn compute_or(&mut self, scaling: f64) {
        let ref_size = self.ip.chromosomes.get_genomesize(true) as f64;
        self.or = scaling * (self.wce.calibration.read_count as f64)
@@ -174,7 +174,7 @@ impl ORStats {
                * (self.wce.reference.read_count as f64 / ref_size));
    }
-    /// Function to compute the alpha from the results of the compute_depth() function
+    /// Function to compute the scaling factor for the WCE data
    pub fn compute_alpha(&mut self) {
        self.alpha = 0.0;
        let mut chr_id;
@@ -200,9 +200,7 @@ impl ORStats {
                self.alpha += (ip_depth as f64) / (wce_depth as f64);
            }
        }
-        let calib_size = self.ip.chromosomes.get_genomesize(false) as f64;
+        self.alpha = self.alpha / (self.ip.reference.read_count as f64);
-        self.alpha = (self.alpha / (self.ip.reference.read_count as f64))
-            * (self.wce.calibration.read_count as f64 / calib_size);
    }
    /// Return the OR value to normalize the IP coverage