Skip to content
Snippets Groups Projects
Verified Commit a2b8cde9 authored by Laurent Modolo's avatar Laurent Modolo
Browse files

v1.0.3: fix OR computation and update README formula

parent 8bacdb89
No related branches found
No related tags found
No related merge requests found
...@@ -65,7 +65,7 @@ dependencies = [ ...@@ -65,7 +65,7 @@ dependencies = [
[[package]] [[package]]
name = "bamcalib" name = "bamcalib"
version = "0.1.1" version = "0.1.3"
dependencies = [ dependencies = [
"bam", "bam",
"bigtools", "bigtools",
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
[package] [package]
name = "bamcalib" name = "bamcalib"
version = "0.1.2" version = "0.1.3"
edition = "2021" edition = "2021"
description = "A command line tools to compute normalized bigwig from calibrated bam of a Chip-Seq experiment" description = "A command line tools to compute normalized bigwig from calibrated bam of a Chip-Seq experiment"
license = "AGPL-3.0-or-later" license = "AGPL-3.0-or-later"
......
...@@ -73,7 +73,7 @@ The `bamcalib` expect the following parameters: ...@@ -73,7 +73,7 @@ The `bamcalib` expect the following parameters:
- $WCE_{c}\left(t\right)$ the coverage at position $t$ in the WCE sample on the calibration genome - $WCE_{c}\left(t\right)$ the coverage at position $t$ in the WCE sample on the calibration genome
```math ```math
\text{norm}IP\left(t\right) = IP_{x}\left(t\right) \frac{\frac{1}{T_c}\sum_{t^{'}=1}^{T_c}WCE_{c}\left(t^{'}\right)}{\frac{1}{T_c}\sum_{t^{'}=1}^{T_c}IP_{c}\left(t^{'}\right)\frac{1}{T_x}\sum_{t^{'}=1}^{T_x}WCE_{x}\left(t^{'}\right)} \text{norm}IP\left(t\right) = IP_{x}\left(t\right) \frac{\beta \frac{1}{T_c}\sum_{t^{'}=1}^{T_c}WCE_{c}\left(t^{'}\right)}{\frac{1}{T_c}\sum_{t^{'}=1}^{T_c}IP_{c}\left(t^{'}\right)\frac{1}{T_x}\sum_{t^{'}=1}^{T_x}WCE_{x}\left(t^{'}\right)}
``` ```
In a reference genome of size $`T_x`$ (ignoring the chromosome segmentation), and in a calibration genome of size $`T_c`$. In a reference genome of size $`T_x`$ (ignoring the chromosome segmentation), and in a calibration genome of size $`T_c`$.
...@@ -99,10 +99,10 @@ E\left(\text{norm}IP\left(t\right)\right)= E\left(\frac{\text{norm}IP\left(t\rig ...@@ -99,10 +99,10 @@ E\left(\text{norm}IP\left(t\right)\right)= E\left(\frac{\text{norm}IP\left(t\rig
which gives which gives
```math ```math
\alpha = \frac{\sum_{t^{'}=1}^{T_c}WCE_{c}\left(t^{'}\right)}{10^9} \frac{\sum_{t=1}^{T_x}\frac{IP_{x}\left(t\right)}{WCE_{x}\left(t\right)}}{\sum_{t=1}^{T_x}IP_{x}\left(t\right)} \text{norm}WCE\left(t\right) = WCE_{x}\left(t\right) \sum_{t=1}^{T_x}\frac{IP_{x}\left(t\right)}{WCE_{x}\left(t\right)} \frac{1}{\sum_{t=1}^{T_x}IP_{x}\left(t\right)}
``` ```
Not that all the computation are scaled by the genome size and not at the read number as in [Hu et al.](https://doi.org/10.1093/nar/gkv670), this is also why we add a scaling factor (default to $10^3$). Not that all the computation are scaled by the genome size and not at the read number as in [Hu et al.](https://doi.org/10.1093/nar/gkv670), this is also why we add a scaling factor $\beta$ (default to $10^3$).
The $\text{ratio}IP\left(t\right)$ is multiplied by this scaling factor. The $\text{ratio}IP\left(t\right)$ is multiplied by this scaling factor.
With this method, we retain the interesting properties of [Hu et al.](https://doi.org/10.1093/nar/gkv670) normalization on the average read density between samples (i.e., we can compare two different samples in a quantitative way) and we account for the local bias of read density observed in the WCE samples (differential chromatin accessibility, repetition, low mappability region, etc.). With this method, we retain the interesting properties of [Hu et al.](https://doi.org/10.1093/nar/gkv670) normalization on the average read density between samples (i.e., we can compare two different samples in a quantitative way) and we account for the local bias of read density observed in the WCE samples (differential chromatin accessibility, repetition, low mappability region, etc.).
......
...@@ -13,7 +13,7 @@ use clap::Parser; ...@@ -13,7 +13,7 @@ use clap::Parser;
#[derive(Parser, Debug)] #[derive(Parser, Debug)]
#[command(name = "bamcalib")] #[command(name = "bamcalib")]
#[command(author = "Laurent Modolo <laurent.modolo@ens-lyon.fr>")] #[command(author = "Laurent Modolo <laurent.modolo@ens-lyon.fr>")]
#[command(version = "1.0")] #[command(version = "1.0.3")]
#[command(about = "Compute calibrated density from calibration mapping data", long_about = None)] #[command(about = "Compute calibrated density from calibration mapping data", long_about = None)]
struct Args { struct Args {
/// sorted bam file for the IP data /// sorted bam file for the IP data
......
...@@ -166,7 +166,7 @@ impl ORStats { ...@@ -166,7 +166,7 @@ impl ORStats {
); );
} }
/// Function to compute the OR from the results of the compute_depth() function /// Function to compute the scaling factor for the IP data
pub fn compute_or(&mut self, scaling: f64) { pub fn compute_or(&mut self, scaling: f64) {
let ref_size = self.ip.chromosomes.get_genomesize(true) as f64; let ref_size = self.ip.chromosomes.get_genomesize(true) as f64;
self.or = scaling * (self.wce.calibration.read_count as f64) self.or = scaling * (self.wce.calibration.read_count as f64)
...@@ -174,7 +174,7 @@ impl ORStats { ...@@ -174,7 +174,7 @@ impl ORStats {
* (self.wce.reference.read_count as f64 / ref_size)); * (self.wce.reference.read_count as f64 / ref_size));
} }
/// Function to compute the alpha from the results of the compute_depth() function /// Function to compute the scaling factor for the WCE data
pub fn compute_alpha(&mut self) { pub fn compute_alpha(&mut self) {
self.alpha = 0.0; self.alpha = 0.0;
let mut chr_id; let mut chr_id;
...@@ -200,9 +200,7 @@ impl ORStats { ...@@ -200,9 +200,7 @@ impl ORStats {
self.alpha += (ip_depth as f64) / (wce_depth as f64); self.alpha += (ip_depth as f64) / (wce_depth as f64);
} }
} }
let calib_size = self.ip.chromosomes.get_genomesize(false) as f64; self.alpha = self.alpha / (self.ip.reference.read_count as f64);
self.alpha = (self.alpha / (self.ip.reference.read_count as f64))
* (self.wce.calibration.read_count as f64 / calib_size);
} }
/// Return the OR value to normalize the IP coverage /// Return the OR value to normalize the IP coverage
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment