From a2b8cde90f451e49376c9978a0c078d18d1350d0 Mon Sep 17 00:00:00 2001 From: Laurent Modolo <laurent.modolo@ens-lyon.fr> Date: Tue, 21 Mar 2023 15:19:34 +0100 Subject: [PATCH] v1.0.3: fix OR computation and update README formula --- Cargo.lock | 2 +- Cargo.toml | 2 +- README.md | 6 +++--- src/main.rs | 2 +- src/stats.rs | 8 +++----- 5 files changed, 9 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8332ca1..9803dee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -65,7 +65,7 @@ dependencies = [ [[package]] name = "bamcalib" -version = "0.1.1" +version = "0.1.3" dependencies = [ "bam", "bigtools", diff --git a/Cargo.toml b/Cargo.toml index 6479255..1650804 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ [package] name = "bamcalib" -version = "0.1.2" +version = "0.1.3" edition = "2021" description = "A command line tools to compute normalized bigwig from calibrated bam of a Chip-Seq experiment" license = "AGPL-3.0-or-later" diff --git a/README.md b/README.md index 4e0498b..5ef8931 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,7 @@ The `bamcalib` expect the following parameters: - $WCE_{c}\left(t\right)$ the coverage at position $t$ in the WCE sample on the calibration genome ```math -\text{norm}IP\left(t\right) = IP_{x}\left(t\right) \frac{\frac{1}{T_c}\sum_{t^{'}=1}^{T_c}WCE_{c}\left(t^{'}\right)}{\frac{1}{T_c}\sum_{t^{'}=1}^{T_c}IP_{c}\left(t^{'}\right)\frac{1}{T_x}\sum_{t^{'}=1}^{T_x}WCE_{x}\left(t^{'}\right)} +\text{norm}IP\left(t\right) = IP_{x}\left(t\right) \frac{\beta \frac{1}{T_c}\sum_{t^{'}=1}^{T_c}WCE_{c}\left(t^{'}\right)}{\frac{1}{T_c}\sum_{t^{'}=1}^{T_c}IP_{c}\left(t^{'}\right)\frac{1}{T_x}\sum_{t^{'}=1}^{T_x}WCE_{x}\left(t^{'}\right)} ``` In a reference genome of size $`T_x`$ (ignoring the chromosome segmentation), and in a calibration genome of size $`T_c`$. @@ -99,10 +99,10 @@ E\left(\text{norm}IP\left(t\right)\right)= E\left(\frac{\text{norm}IP\left(t\rig which gives ```math -\alpha = \frac{\sum_{t^{'}=1}^{T_c}WCE_{c}\left(t^{'}\right)}{10^9} \frac{\sum_{t=1}^{T_x}\frac{IP_{x}\left(t\right)}{WCE_{x}\left(t\right)}}{\sum_{t=1}^{T_x}IP_{x}\left(t\right)} +\text{norm}WCE\left(t\right) = WCE_{x}\left(t\right) \sum_{t=1}^{T_x}\frac{IP_{x}\left(t\right)}{WCE_{x}\left(t\right)} \frac{1}{\sum_{t=1}^{T_x}IP_{x}\left(t\right)} ``` -Not that all the computation are scaled by the genome size and not at the read number as in [Hu et al.](https://doi.org/10.1093/nar/gkv670), this is also why we add a scaling factor (default to $10^3$). +Not that all the computation are scaled by the genome size and not at the read number as in [Hu et al.](https://doi.org/10.1093/nar/gkv670), this is also why we add a scaling factor $\beta$ (default to $10^3$). The $\text{ratio}IP\left(t\right)$ is multiplied by this scaling factor. With this method, we retain the interesting properties of [Hu et al.](https://doi.org/10.1093/nar/gkv670) normalization on the average read density between samples (i.e., we can compare two different samples in a quantitative way) and we account for the local bias of read density observed in the WCE samples (differential chromatin accessibility, repetition, low mappability region, etc.). diff --git a/src/main.rs b/src/main.rs index 7a8ebfd..8a78cf6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,7 +13,7 @@ use clap::Parser; #[derive(Parser, Debug)] #[command(name = "bamcalib")] #[command(author = "Laurent Modolo <laurent.modolo@ens-lyon.fr>")] -#[command(version = "1.0")] +#[command(version = "1.0.3")] #[command(about = "Compute calibrated density from calibration mapping data", long_about = None)] struct Args { /// sorted bam file for the IP data diff --git a/src/stats.rs b/src/stats.rs index 4b08093..0b3d153 100644 --- a/src/stats.rs +++ b/src/stats.rs @@ -166,7 +166,7 @@ impl ORStats { ); } - /// Function to compute the OR from the results of the compute_depth() function + /// Function to compute the scaling factor for the IP data pub fn compute_or(&mut self, scaling: f64) { let ref_size = self.ip.chromosomes.get_genomesize(true) as f64; self.or = scaling * (self.wce.calibration.read_count as f64) @@ -174,7 +174,7 @@ impl ORStats { * (self.wce.reference.read_count as f64 / ref_size)); } - /// Function to compute the alpha from the results of the compute_depth() function + /// Function to compute the scaling factor for the WCE data pub fn compute_alpha(&mut self) { self.alpha = 0.0; let mut chr_id; @@ -200,9 +200,7 @@ impl ORStats { self.alpha += (ip_depth as f64) / (wce_depth as f64); } } - let calib_size = self.ip.chromosomes.get_genomesize(false) as f64; - self.alpha = (self.alpha / (self.ip.reference.read_count as f64)) - * (self.wce.calibration.read_count as f64 / calib_size); + self.alpha = self.alpha / (self.ip.reference.read_count as f64); } /// Return the OR value to normalize the IP coverage -- GitLab