From f643b343b1e0d9d2f12a17ace87c5ec1e3d34573 Mon Sep 17 00:00:00 2001 From: Laurent Modolo <laurent.modolo@ens-lyon.fr> Date: Tue, 28 Mar 2023 16:15:54 +0200 Subject: [PATCH] add --intermediate-bigwig option --- Cargo.lock | 2 +- src/bed.rs | 85 ++++++++++++++++++++++++++++++++++++++++++++++++--- src/main.rs | 12 ++++++++ src/pileup.rs | 5 +++ src/stats.rs | 57 +++++++++++++++++++++++++++++++--- 5 files changed, 151 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8332ca1..4efb1cf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -65,7 +65,7 @@ dependencies = [ [[package]] name = "bamcalib" -version = "0.1.1" +version = "0.1.2" dependencies = [ "bam", "bigtools", diff --git a/src/bed.rs b/src/bed.rs index fa12b61..afd1275 100644 --- a/src/bed.rs +++ b/src/bed.rs @@ -5,6 +5,7 @@ use crate::stats; use bigtools; use std::collections::HashMap; +use std::path::PathBuf; /// BedGraphLine store information on a bedgraph line #[derive(Debug, Clone)] @@ -62,7 +63,7 @@ impl BedGraphLine { } } -/// BedGraph store information necessary to generate a normalized bed file +// BedGraph store information necessary to generate a normalized bed file /// It use bio::Record to store the current output bed line /// direct value to store the chr id and current value in numeric format /// or_stats to store everything to compute the normalization from bam files @@ -130,6 +131,13 @@ impl BedGraph { None => self.or_stats.next(), } } + + pub fn reset(&mut self) { + self.or_stats.reset(); + self.future_bedline = None; + self.current_chr_id = 0; + self.current_value = 0.0; + } } impl Iterator for BedGraph { @@ -168,6 +176,7 @@ impl Iterator for BedGraph { } /// Structure to make an interface with the bigtools crate +#[derive(Debug, Clone)] pub struct BedGraphWrapper { bed: BedGraph, chr_name: String, @@ -203,7 +212,52 @@ impl BedGraphWrapper { no_bits: u16, file_ip: &str, file_wce: &str, + fragment_estimation: bool, + intermediate_bigwig: bool, ) { + let (mut bedwrapper, chrom_map_wce) = BedGraphWrapper::write_bigwig_ip( + bam_ip, + bam_wce, + prefix, + scaling, + thread_local, + no_bits, + file_ip, + fragment_estimation, + ); + BedGraphWrapper::write_bigwig_wce(file_wce, chrom_map_wce, thread_local); + + if intermediate_bigwig { + bedwrapper.bed.reset(); + bedwrapper.bed.or_stats.intermediate_bigwig(true, true); + let mut path = PathBuf::from(file_ip); + path.set_extension("ip.bigwig"); + BedGraphWrapper::write_bigwig_old_norm( + bedwrapper.clone(), + &path.to_str().unwrap(), + thread_local, + ); + bedwrapper.bed.reset(); + bedwrapper.bed.or_stats.intermediate_bigwig(true, false); + path.set_extension("wce.bigwig"); + BedGraphWrapper::write_bigwig_old_norm( + bedwrapper, + &path.to_str().unwrap(), + thread_local, + ); + } + } + + fn write_bigwig_ip( + bam_ip: &str, + bam_wce: &str, + prefix: &str, + scaling: f64, + thread_local: u16, + no_bits: u16, + file: &str, + fragment_estimation: bool, + ) -> (BedGraphWrapper, HashMap<String, u32>) { let pool = futures::executor::ThreadPoolBuilder::new() .pool_size(thread_local as usize) .create() @@ -211,26 +265,32 @@ impl BedGraphWrapper { let mut bed = BedGraph::new(); bed.init(&bam_ip, &bam_wce, &prefix, scaling, thread_local, no_bits); let bedwrapper = BedGraphWrapper::new(bed); + let bedwrapper_return = bedwrapper.clone(); + // write ratio IP let chrom_map = bedwrapper.bed.or_stats.get_chr().to_hashmap_filtered(false); let chrom_map_wce = bedwrapper.bed.or_stats.get_chr().to_hashmap_filtered(false); - let bed_wce = EmptyBed::new(&chrom_map); let vals_iter = bigtools::bed::bedparser::BedParser::new(bedwrapper); - let writer = bigtools::bigwig::BigWigWrite::create_file(String::from(file_ip)); + let writer = bigtools::bigwig::BigWigWrite::create_file(String::from(file)); let chsi = bigtools::bed::bedparser::BedParserStreamingIterator::new( vals_iter, chrom_map.clone(), true, ); writer.write(chrom_map, chsi, pool).unwrap(); + return (bedwrapper_return, chrom_map_wce); + } + fn write_bigwig_wce(file: &str, chrom_map_wce: HashMap<String, u32>, thread_local: u16) { + // write ratio WCE let pool = futures::executor::ThreadPoolBuilder::new() .pool_size(thread_local as usize) .create() .expect("Unable to create thread pool."); + let bed_wce = EmptyBed::new(&chrom_map_wce); let vals_iter = bigtools::bed::bedparser::BedParser::new(bed_wce); - let writer = bigtools::bigwig::BigWigWrite::create_file(String::from(file_wce)); + let writer = bigtools::bigwig::BigWigWrite::create_file(String::from(file)); let chsi = bigtools::bed::bedparser::BedParserStreamingIterator::new( vals_iter, chrom_map_wce.clone(), @@ -238,6 +298,23 @@ impl BedGraphWrapper { ); writer.write(chrom_map_wce, chsi, pool).unwrap(); } + + fn write_bigwig_old_norm(bedwrapper: BedGraphWrapper, file: &str, thread_local: u16) { + let pool = futures::executor::ThreadPoolBuilder::new() + .pool_size(thread_local as usize) + .create() + .expect("Unable to create thread pool."); + let chrom_map = bedwrapper.bed.or_stats.get_chr().to_hashmap_filtered(false); + + let vals_iter = bigtools::bed::bedparser::BedParser::new(bedwrapper); + let writer = bigtools::bigwig::BigWigWrite::create_file(String::from(file)); + let chsi = bigtools::bed::bedparser::BedParserStreamingIterator::new( + vals_iter, + chrom_map.clone(), + true, + ); + writer.write(chrom_map, chsi, pool).unwrap(); + } } impl bigtools::bed::bedparser::StreamingBedValues for BedGraphWrapper { diff --git a/src/main.rs b/src/main.rs index 7a8ebfd..82bf329 100644 --- a/src/main.rs +++ b/src/main.rs @@ -47,6 +47,14 @@ struct Args { /// number of threads to parse bam files #[arg(short, long, default_value_t = 1)] thread: u16, + + /// compute coverage from read information instead of fragment estimation + #[arg(long)] + no_fragment_estimation: bool, + + /// output normIP and normWCE bigwig files (not just the ratio of the two) + #[arg(long)] + intermediate_bigwig: bool, } fn main() { @@ -57,6 +65,8 @@ fn main() { let bigwig_wce = matches.bigwig_wce; let cal_prefix = matches.cal_prefix; let scaling = matches.scaling; + let fragment_estimation = !matches.no_fragment_estimation; + let intermediate_bigwig = matches.intermediate_bigwig; let mut thread: u16 = matches.thread; if thread == 0 { thread = 1; @@ -71,5 +81,7 @@ fn main() { no_bits, &bigwig_ip, &bigwig_wce, + fragment_estimation, + intermediate_bigwig, ); } diff --git a/src/pileup.rs b/src/pileup.rs index 31d7d2b..305442a 100644 --- a/src/pileup.rs +++ b/src/pileup.rs @@ -228,6 +228,11 @@ impl Depth { None => return None, } } + + pub fn reset(&mut self) { + self.chr_id = 0; + self.pos = 0; + } } impl Iterator for Depth { diff --git a/src/stats.rs b/src/stats.rs index 4b08093..a845f69 100644 --- a/src/stats.rs +++ b/src/stats.rs @@ -62,6 +62,8 @@ pub struct ORStats { wce_depth: pileup::Depth, pos_chr_id: u32, pos_chr_pos: u32, + intermediate_bigwig: bool, + intermediate_bigwig_ip: bool, } impl ORStats { @@ -76,6 +78,8 @@ impl ORStats { wce_depth: pileup::Depth::new(), pos_chr_id: 0, pos_chr_pos: 0, + intermediate_bigwig: false, + intermediate_bigwig_ip: true, } } @@ -214,6 +218,47 @@ impl ORStats { pub fn get_alpha(&self) -> f64 { return self.alpha; } + + fn norm_ip(&self, pos_chr_id: u32, pos_chr_pos: u32, ip_depth: u32) -> Option<(u32, u32, f64)> { + Some((pos_chr_id, pos_chr_pos, self.or * (ip_depth as f64))) + } + + fn norm_wce( + &self, + pos_chr_id: u32, + pos_chr_pos: u32, + wce_depth: u32, + ) -> Option<(u32, u32, f64)> { + Some(( + pos_chr_id, + pos_chr_pos, + self.alpha * (wce_depth as f64) + 1.0, + )) + } + + fn ratio_norm( + &self, + pos_chr_id: u32, + pos_chr_pos: u32, + ip_depth: u32, + wce_depth: u32, + ) -> Option<(u32, u32, f64)> { + Some(( + pos_chr_id, + pos_chr_pos, + self.or * (ip_depth as f64) / (self.alpha * (wce_depth as f64) + 1.0), + )) + } + + pub fn reset(&mut self) { + self.ip_depth.reset(); + self.wce_depth.reset(); + } + + pub fn intermediate_bigwig(&mut self, activate: bool, ip: bool) { + self.intermediate_bigwig = activate; + self.intermediate_bigwig_ip = ip; + } } impl Iterator for ORStats { @@ -240,11 +285,13 @@ impl Iterator for ORStats { Some(pos) => pos, None => return None, }; - return Some(( - pos_chr_id, - pos_chr_pos, - self.or * (ip_depth as f64) / (self.alpha * (wce_depth as f64) + 1.0), - )); + if self.intermediate_bigwig && self.intermediate_bigwig_ip { + return self.norm_ip(pos_chr_id, pos_chr_pos, ip_depth); + } + if self.intermediate_bigwig && !self.intermediate_bigwig_ip { + return self.norm_wce(pos_chr_id, pos_chr_pos, wce_depth); + } + return self.ratio_norm(pos_chr_id, pos_chr_pos, ip_depth, wce_depth); } } -- GitLab