From 79c3092cfda56bb25117d57f1803b75c736dcb2f Mon Sep 17 00:00:00 2001 From: aduvermy <arnaud.duvermy@ens-lyon.fr> Date: Wed, 29 Jun 2022 09:46:23 +0200 Subject: [PATCH] add src to build table counts from h5 files --- src/kallisto_output2TablCnts.R | 49 ++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 src/kallisto_output2TablCnts.R diff --git a/src/kallisto_output2TablCnts.R b/src/kallisto_output2TablCnts.R new file mode 100644 index 0000000..66fadca --- /dev/null +++ b/src/kallisto_output2TablCnts.R @@ -0,0 +1,49 @@ +library(tidyverse) +library(rhdf5) +library(tximport) + +# code contributed from Andrew Morgan +read_kallisto_h5 <- function(fpath, ...) { + if (!requireNamespace("rhdf5", quietly=TRUE)) { + stop("reading kallisto results from hdf5 files requires Bioconductor package `rhdf5`") + } + counts <- rhdf5::h5read(fpath, "est_counts") + ids <- rhdf5::h5read(fpath, "aux/ids") + efflens <- rhdf5::h5read(fpath, "aux/eff_lengths") + + # as suggested by https://support.bioconductor.org/p/96958/#101090 + ids <- as.character(ids) + + stopifnot(length(counts) == length(ids)) + stopifnot(length(efflens) == length(ids)) + + result <- data.frame(target_id = ids, + eff_length = efflens, + est_counts = counts, + stringsAsFactors = FALSE) + normfac <- with(result, (1e6)/sum(est_counts/eff_length)) + result$tpm <- with(result, normfac*(est_counts/eff_length)) + return(result) +} + + +setwd("/home/arnux/counts_simulation/data/SRP217588/") +listFile = list.files(path= ".", pattern=NULL, all.files=FALSE, full.names=FALSE) + +h52dtf <- function(fn){ + print(fn) + h5 = tximport(files = fn, + type = "kallisto", + txOut = TRUE, importer = read_kallisto_h5 ) + return(h5$counts) +} + +a = listFile %>% map(~h52dtf(.x)) +tbl_cnts = do.call("cbind", a) +colnames(tbl_cnts) = listFile %>% str_split(pattern = ".h5", simplify = TRUE)%>% .[,1] %>% str_replace(., '-', '_' ) %>% str_replace(., '-', '_' ) +tbl_cnts= tbl_cnts %>% data.frame() +tbl_cnts = tbl_cnts %>% rownames_to_column("gene_id") +write_tsv(tbl_cnts, "../../src/htrsim/inst/extdata/SRP217588.tsv" ) +#write_tsv(tbl_cnts, "../../src/htrsim/inst/extdata/SRP217588.tsv" ) + + -- GitLab