Skip to content
Snippets Groups Projects
Commit 96e96a1a authored by Arnaud Duvermy's avatar Arnaud Duvermy
Browse files

update package

parent a3dbde3b
No related branches found
No related tags found
No related merge requests found
...@@ -27,9 +27,14 @@ rn_sim <- function(mu, alpha, n_replicates, ...){ ...@@ -27,9 +27,14 @@ rn_sim <- function(mu, alpha, n_replicates, ...){
#' @import plyr #' @import plyr
#' @import BiocGenerics #' @import BiocGenerics
#' @import purrr #' @import purrr
#' @import readr
#' #'
#' @examples #' @examples
generate_counts <- function(setup_dtf, export = FALSE){ generate_counts <- function(setup_dtf, export = FALSE){
full_name <- NULL
name <- NULL
variable <- NULL
message("reading and processing counts per genes ...") message("reading and processing counts per genes ...")
...@@ -46,7 +51,7 @@ generate_counts <- function(setup_dtf, export = FALSE){ ...@@ -46,7 +51,7 @@ generate_counts <- function(setup_dtf, export = FALSE){
tidyr::drop_na(counts) %>% tidyr::drop_na(counts) %>%
reshape2::dcast(., gene_id ~ full_name, value.var= "counts") reshape2::dcast(., gene_id ~ full_name, value.var= "counts")
if (export == TRUE) write.tsv(cnt.dtf, 'results/2022-03-03/estimate_dispersion.tsv') if (export == TRUE) readr::write_tsv(cnt.dtf, 'results/2022-03-03/estimate_dispersion.tsv')
return(cnt.dtf) return(cnt.dtf)
} }
......
...@@ -12,21 +12,26 @@ ...@@ -12,21 +12,26 @@
#' @examples #' @examples
htrsim <- function(countData, bioDesign, N_replicates){ htrsim <- function(countData, bioDesign, N_replicates){
# launch standard DESEQ2 analysis # launch standard DESEQ2 analysis
dds = run.deseq(tabl_cnts, bioDesign = bioDesign) dds = run.deseq(tabl_cnts = countData, bioDesign = bioDesign)
## Model matrix per samples
mm <- model.matrix(~genotype + env + genotype:env, bioDesign)
## Input estimation ## Input estimation
mu.input = estim.mu(dds) res = estim.mu(dds, mm)
mu.input = res$mu
alpha.input = estim.alpha(dds) alpha.input = estim.alpha(dds)
# Setup simulation # Setup simulation
input = reshape_input2setup(mu.dtf = mu.input, alpha.dtf = alpha.input) input = reshape_input2setup(mu.dtf = mu.input, alpha.dtf = alpha.input, average_rep = FALSE)
setup.simulation <- setup_countGener(bioSample_id = input$bioSample_id, setup.simulation <- setup_countGener(bioSample_id = input$bioSample_id,
n_rep = N_replicates, n_rep = 1,
alpha = input$alpha, alpha = input$alpha,
gene_id = input$gene_id, gene_id = input$gene_id,
mu = input$mu) mu = input$mu)
# Simulate counts # Simulate counts
htrs <- generate_counts(setup.simulation) htrs <- generate_counts(setup.simulation)
return(list(countDataSim = htrs, input = input, dds = dds)) return(list(countDataSim = htrs, input = input, dds = dds))
......
...@@ -11,15 +11,20 @@ ...@@ -11,15 +11,20 @@
#' @import dplyr #' @import dplyr
#' @import tibble #' @import tibble
#' @import BiocGenerics #' @import BiocGenerics
#' @import S4Vectors
#' @import readr
#' @examples #' @examples
#' @importFrom rlang .data #' @importFrom rlang .data
estim.alpha <- function(dds, export = FALSE){ estim.alpha <- function(dds, export = FALSE){
gene_id <- NULL gene_id <- NULL
expressed_gene_dispersion <- NULL
################### Estimate alpha per gene ######################## ################### Estimate alpha per gene ########################
#N.B: alpha = dispersion per gene #N.B: alpha = dispersion per gene
#dds <- DESeq2::estimateDispersions(dds, quiet = F) #dds <- DESeq2::estimateDispersions(dds, quiet = F)
#dispersion estimation #dispersion estimation
dispersion_estimate <- DESeq2::dispersions(dds) dds.mcols = S4Vectors::mcols(dds,use.names=TRUE)
dispersion_estimate = dds.mcols$dispersion
#dispersion_estimate <- DESeq2::dispersions(dds)
## Shape and export ## Shape and export
names(dispersion_estimate) <- names(dds@rowRanges) names(dispersion_estimate) <- names(dds@rowRanges)
...@@ -31,12 +36,103 @@ estim.alpha <- function(dds, export = FALSE){ ...@@ -31,12 +36,103 @@ estim.alpha <- function(dds, export = FALSE){
tibble::rownames_to_column() %>% tibble::rownames_to_column() %>%
dplyr::rename("alpha" = .data$., gene_id = "rowname") dplyr::rename("alpha" = .data$., gene_id = "rowname")
if (export == TRUE) write_tsv(disp_gene_express, 'results/2022-03-03/estimate_dispersion.tsv') if (export == TRUE) readr::write_tsv(expressed_gene_dispersion, 'results/2022-03-03/estimate_dispersion.tsv')
return(expressed_gene_dispersion) return(expressed_gene_dispersion)
} }
#' Estimate mu_ij
#'
#' @param dds DESEQ2 object
#' @param export Boolean
#' @param mm a model matrix
#'
#' @return mu_ij only for gene expressed c_ij != 0
#' @export
#' @import stats
#' @import dplyr
#' @import tibble
#' @import BiocGenerics
#' @import S4Vectors
#' @examples
#' @importFrom rlang .data
estim.mu <- function(dds, mm, export = FALSE){
gene_id <- NULL
nb_sples = BiocGenerics::rownames(dds@colData) %>% length()
nb_genes = BiocGenerics::rownames(dds@assays@data$counts) %>% length()
mm_epsi = rep(1, nb_sples)
names(mm_epsi) = 1 : nb_sples
dds.mcols = S4Vectors::mcols(dds,use.names=TRUE)
## BETA
B0 <- dds.mcols$Intercept
B1 <- dds.mcols$genotype_msn2D_vs_wt
B2 <- dds.mcols$genotype_msn4D_vs_wt
B3 <- dds.mcols$env_kcl_vs_control
B4 <- dds.mcols$genotypemsn2D.envkcl
B5 <- dds.mcols$genotypemsn4D.envkcl
#print(max(B0, na.rm=TRUE))
#print(max(B1, na.rm=TRUE))
#print(max(B2, na.rm=TRUE))
#print(max(B3, na.rm=TRUE))
#print(max(B4, na.rm=TRUE))
#print(max(B5, na.rm = TRUE))
## deviance = sigma2 -> estimate epsilon
deviance_i.sqrt = sqrt(dds.mcols$deviance)
beta.matrix = cbind(B0, B1,B2,B3,B4,B5) %>% as.matrix()
#p_ij = B0_i*mm1_j + B1_i*mm2_j + B3_i*mm3_j + B4_i*mm4_j + B5_i*mm5_j
p_ij = beta.matrix %*% t(mm)
#epsilon_ij ~ N(0, deviance)
#epsilon_ij = mm[,1] %>% map(., ~rnorm(deviance_i.sqrt, mean = 0, sd = 0 )) %>% data.frame() %>% as.matrix()
epsilon_ij = mm[,1] %>% map(., ~rnorm(deviance_i.sqrt, mean = 0, sd = deviance_i.sqrt )) %>% data.frame() %>% as.matrix()
## log_qij = p_ij + epsilon_ij
log_qij <- p_ij + epsilon_ij
## s_j
s_j = dds$sizeFactor
mu_ij = s_j * 2^log_qij
################# Estimate mu #########################
mu_estimate <- dds@assays@data$mu
#dds@assays@data$mu %>% dim()
#mu_estimate %>% dim()
rownames(mu_ij) = BiocGenerics::rownames(dds@assays@data$counts)
## drop NA in dispersion estimate (link to unexpressed gene)
### and convert to lovely dataframe
mu_gene_express = mu_ij %>%
stats::na.omit() %>%
data.frame()
colnames(mu_gene_express) <- rownames(dds@colData)
mu_gene_express <- mu_gene_express %>%
tibble::rownames_to_column(var = "gene_id")
if (export == TRUE) readr::write_tsv(mu_gene_express, 'results/2022-03-03/estimate_mu.tsv')
res = list(mu = mu_gene_express, beta.matrix = beta.matrix, deviance.sqrt = deviance_i.sqrt, dds.mcols = dds.mcols)
return(res)
}
#.xMm.foo <- function(b, m) return(b * m)
#.epsilon.foo <- function(x) return(rnorm(mean = 0 ,sd = x, n = 1 ))
#.epsilon_i <- function(dev_i) return(dev_i %>% map(., ~.epsilon.foo(.))%>% unlist())
#.getMu_i <- function(s, qi) return(2^(qi))
#' Estimate mu_ij #' Estimate mu_ij
...@@ -52,7 +148,7 @@ estim.alpha <- function(dds, export = FALSE){ ...@@ -52,7 +148,7 @@ estim.alpha <- function(dds, export = FALSE){
#' @import BiocGenerics #' @import BiocGenerics
#' @examples #' @examples
#' @importFrom rlang .data #' @importFrom rlang .data
estim.mu <- function(dds, export = FALSE){ estim.mu_beta <- function(dds, export = FALSE){
gene_id <- NULL gene_id <- NULL
################# Estimate mu ######################### ################# Estimate mu #########################
mu_estimate <- dds@assays@data$mu mu_estimate <- dds@assays@data$mu
......
...@@ -10,9 +10,12 @@ ...@@ -10,9 +10,12 @@
#' @examples #' @examples
run.deseq <- function(tabl_cnts, bioDesign ){ run.deseq <- function(tabl_cnts, bioDesign ){
########### LAUNCH DESEQ ############# ########### LAUNCH DESEQ #############
## Design model ## Design model - specify reference
dds = DESeq2::DESeqDataSetFromMatrix( countData = round(tabl_cnts), colData = bioDesign , design = ~ mutant + env + mutant:env) bioDesign$genotype <- factor(x = bioDesign$genotype,levels = c('wt','msn2D', 'msn4D'))
bioDesign$env <- factor(x = bioDesign$env,levels = c('control', 'kcl'))
## DESEQ standard analysis ## DESEQ standard analysis
dds = DESeq2::DESeqDataSetFromMatrix( countData = round(tabl_cnts), colData = bioDesign , design = ~ genotype + env + genotype:env)
dds <- DESeq2::DESeq(dds) dds <- DESeq2::DESeq(dds)
return(dds) return(dds)
} }
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#' #'
#' @param mu.dtf dataframe of mu_ij #' @param mu.dtf dataframe of mu_ij
#' @param alpha.dtf dataframe of alpha_i #' @param alpha.dtf dataframe of alpha_i
#' @param average_rep bool
#' #'
#' @return #' @return
#' @import purrr #' @import purrr
...@@ -11,103 +12,122 @@ ...@@ -11,103 +12,122 @@
#' @export #' @export
#' #'
#' @examples #' @examples
reshape_input2setup <- function(mu.dtf, alpha.dtf){ reshape_input2setup <- function(mu.dtf, alpha.dtf, average_rep = FALSE){
## Defining sample names gene_id <- NULL
bioSample_id <- mu.dtf %>%
if(average_rep == TRUE){
## Defining sample names
bioSample_id <- mu.dtf %>%
dplyr::select(-gene_id) %>%
BiocGenerics::colnames() %>%
purrr::map(., ~stringr::str_split(.,"_")[[1]][1:2] %>%
BiocGenerics::paste(., collapse='_') ) %>%
BiocGenerics::unlist() %>% BiocGenerics::unique()
############### Mu is equal for biosample replicate #############
### case 1: choose 1 replicate
#mu_params <- mu_params %>% dplyr::select(., contains("rep1"))
## rename mu_params colnames to ensure corresponding with sample_names
#colnames(mu_params) <- sample_names
### case 2: average replicates
average_rep <- function(x, dtf) {
varname <- x
dtf %>%
dplyr::select(.,contains(x)) %>%
dplyr::mutate(!!varname := rowMeans(.)) %>%
dplyr::select(varname)
}
mu_ij <- bioSample_id %>% purrr::map(.x = ., .f = ~average_rep(.x, mu.dtf)) %>% data.frame()
mu_ij$gene_id <- alpha.dtf$gene_id
}
else {
bioSample_id <- mu.dtf %>%
dplyr::select(-gene_id) %>% dplyr::select(-gene_id) %>%
BiocGenerics::colnames() %>% BiocGenerics::colnames()
purrr::map(., ~stringr::str_split(.,"_")[[1]][1:2] %>%
BiocGenerics::paste(., collapse='_') ) %>% mu_ij = mu.dtf
BiocGenerics::unlist() %>% BiocGenerics::unique()
############### Mu is same for biosample replicate #############
### case 1: choose 1 replicate
#mu_params <- mu_params %>% dplyr::select(., contains("rep1"))
## rename mu_params colnames to ensure corresponding with sample_names
#colnames(mu_params) <- sample_names
### case 2: average replicates
average_rep <- function(x, dtf) {
varname <- x
dtf %>%
dplyr::select(.,contains(x)) %>%
dplyr::mutate(!!varname := rowMeans(.)) %>%
dplyr::select(varname)
} }
mu_avg.dtf <- bioSample_id %>% purrr::map(.x = ., .f = ~average_rep(.x, mu.dtf)) %>% data.frame()
mu_avg.dtf$gene_id <- alpha.dtf$gene_id
return(list(alpha = alpha.dtf , mu = mu_avg.dtf, bioSample_id = bioSample_id, gene_id = alpha.dtf$gene_id)) return(list(alpha = alpha.dtf , mu = mu_ij, bioSample_id = bioSample_id, gene_id = alpha.dtf$gene_id))
} }
#' Handle exception #' Handle exception
#' #'
#' @param bioSample_id vector of id for each bioSample #' @param bioSample vector of id for each bioSample
#' @param n_rep number of replicates #' @param n_rep number of replicates
#' @param gene_id vector of id for each gene #' @param gene_id vector of id for each gene
#' @param alpha vector of alpha_i #' @param alpha vector of alpha_i
#' @param n_genes number of genes
#' #'
#' @return #' @return
#' @export #' @export
#' #'
#' @examples #' @examples
handle_except <- function(bioSample, n_rep , gene_id , alpha, n_genes){ handle_except <- function(bioSample, n_rep , gene_id , alpha){
if(is.numeric(n_rep) && length(n_rep) == 1){
message("Homogeneous number of replicates between samples: ", n_rep, " replicates per samples\n")
n_rep = rep(n_rep, length(bioSample)) if(is.null(bioSample)){
message("BioSample ID: NULL" )
bioSample = "my_first_lib"
message("Assuming only one library will be setup. A library named 'my_first_lib'")
} }
else message("BioSample ID: ", "OK")
if(!is.null(n_rep) && length(bioSample) != length(n_rep)) stop("ERROR: unconsistent length between samples_names and n_rep\n") if(is.null(n_rep)){
message("N_rep: ", "NULL")
n_rep = 1
message("Number of replicates unspecified\nAssuming n_rep = 1")
}
else message("N_rep: ", "OK")
if(is.numeric(n_rep) && length(n_rep) == 1){
message(n_rep, " replicates per samples")
n_rep = rep(n_rep, length(bioSample))
}
if(is.null(n_genes)) { if(is.null(gene_id)){
if(!is.null(gene_id) || !is.null(alpha)){ message("Gene_id: ", "NULL")
ifelse(length(alpha) == length(alpha),
(n_genes = length(gene_id)), ## if
stop("ERROR: unconsistent value between n_genes, length(gene_names) and length(gene_disp)\n")) ## else
}
} }
else message("Gene_id: ", "OK")
if(!is.null(n_genes)) { if(is.null(alpha)){
if(is.null(gene_id) && is.null(alpha)) { message("Alpha: ", "NULL")
### Precised alpha params for each genes
alpha = runif(0.2,120, n = n_genes) ## randomly defined between 2 and 100
id = paste0('gene', 1:n_genes)
alpha <- list(gene_id = id, alpha = alpha)
}
} }
else message("Alpha: ", "OK")
if(!is.null(n_rep) && length(bioSample) != length(n_rep)) stop("ERROR: unconsistent length between samples_names and n_rep")
if(is.null(n_genes) && is.null(gene_disp) && is.null(gene_id)) { if(!is.null(gene_id) && !is.null(alpha)) {
message("Number of genes unspecified\nAssuming n_genes = 3\nAssuming gene dispersion (alpha) follow a uniform law between 2 and 100\n") if (is.data.frame(alpha)) if (length(gene_id) != dim(alpha)[1]) stop("ERROR: unconsistent length between gene_id and alpha")
if (is.vector(alpha)) if (length(gene_id) != length(alpha)) stop("ERROR: unconsistent length between gene_id and alpha")
else n_genes = length(alpha)
}
if( is.null(gene_id) && is.null(alpha) ){
message("Assuming n_genes = 3")
n_genes = 3 n_genes = 3
alpha = runif(2,100, n = n_genes)
gene_id = paste0('gene', 1:n_genes) gene_id = paste0('gene', 1:n_genes)
} }
if(is.null(gene_id) && is.null(alpha)) { if(is.null(gene_id) && !is.null(alpha)) {
message("n_genes = ", n_genes, "\nAssuming gene dispersion (alpha) follow a uniform law between 2 and 100\n") message("Built gene_id")
alpha = runif(2,100, n = n_genes) n_genes = length(alpha)
gene_id = paste0('gene', 1:n_genes) gene_id = paste0('gene', 1:n_genes)
} }
if(length(bioSample) == 1 && bioSample == "my_first_lib") message("No sample name is provided.\nAssuming only one library will be setup\n") if(!is.null(gene_id) && is.null(alpha)) {
message("Alpha randomly defined from uniform law between 2 and 100")
if(is.null(n_rep)){ n_genes = length(gene_id)
message("Number of replicates not provided.\nAssuming 10 replicates per sample will be setup") alpha = runif(0.2,120, n = n_genes) ## randomly defined between 2 and 100
n_rep = 10
} }
if ( !exists("n_genes")) n_genes = length(gene_id)
if(is.null(gene_id)) gene_id = paste0('gene', 1:n_genes) my_list = list(bioSample = bioSample, rep = n_rep, n_g = n_genes, alpha = alpha, gene_id = gene_id)
my_list = list(bioSample = bioSample, rep = n_rep, n_g = n_genes, alpha = alpha)
return(my_list) return(my_list)
} }
...@@ -120,7 +140,6 @@ handle_except <- function(bioSample, n_rep , gene_id , alpha, n_genes){ ...@@ -120,7 +140,6 @@ handle_except <- function(bioSample, n_rep , gene_id , alpha, n_genes){
#' @param n_rep number of replicates #' @param n_rep number of replicates
#' @param gene_id vector of id for each gene #' @param gene_id vector of id for each gene
#' @param alpha vector of alpha_i #' @param alpha vector of alpha_i
#' @param n_genes number of genes
#' @param mu dataframe of mu_ij #' @param mu dataframe of mu_ij
#' @import purrr #' @import purrr
#' @import data.table #' @import data.table
...@@ -128,10 +147,11 @@ handle_except <- function(bioSample, n_rep , gene_id , alpha, n_genes){ ...@@ -128,10 +147,11 @@ handle_except <- function(bioSample, n_rep , gene_id , alpha, n_genes){
#' @export #' @export
#' #'
#' @examples #' @examples
setup_countGener <- function(bioSample_id = "my_first_lib", n_rep = NULL , gene_id = NULL , alpha = NULL, n_genes = NULL, mu = NULL ){ setup_countGener <- function(bioSample_id = NULL, n_rep = NULL , gene_id = NULL , alpha = NULL, mu = NULL ){
message("\nSetup counts generator ...")
######### HANDLE EXCEPTION ####### ######### HANDLE EXCEPTION #######
setup = handle_except(bioSample_id, n_rep , gene_id , alpha, n_genes) setup = handle_except(bioSample_id, n_rep , gene_id , alpha)
######## HANDLE TYPE MU ########## ######## HANDLE TYPE MU ##########
if(is.null(mu)) mu = .mu_generator # default function to generate mu if(is.null(mu)) mu = .mu_generator # default function to generate mu
...@@ -141,9 +161,10 @@ setup_countGener <- function(bioSample_id = "my_first_lib", n_rep = NULL , gene_ ...@@ -141,9 +161,10 @@ setup_countGener <- function(bioSample_id = "my_first_lib", n_rep = NULL , gene_
nBinom_params <- purrr::map2(.x= setup$bioSample, .y = setup$rep, nBinom_params <- purrr::map2(.x= setup$bioSample, .y = setup$rep,
~(list(name=.x, #sample_name ~(list(name=.x, #sample_name
n_replicates = .y, # random int between 1 & max_N_replicates n_replicates = .y, # random int between 1 & max_N_replicates
gene_id = setup$alpha$gene_id, # gene_id gene_id = setup$gene_id, # gene_id
mu = mu.set , #mu(ij) mu = mu.set , #mu(ij)
alpha = setup$alpha$alpha))) %>% # alpha(i) alpha = setup$alpha))) %>% # alpha(i)
data.table::rbindlist(.) %>% as.data.frame() ## convert to lovely dtf data.table::rbindlist(.) %>% as.data.frame() ## convert to lovely dtf
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment