From d540e6faea32dc261ffbe04bbb46659f06f12d51 Mon Sep 17 00:00:00 2001 From: aduvermy <arnaud.duvermy@ens-lyon.fr> Date: Wed, 6 Apr 2022 15:27:45 +0200 Subject: [PATCH] update htrsim to devtools package --- src/htrsim/DESCRIPTION | 27 +++ src/htrsim/NAMESPACE | 23 +++ src/htrsim/R/generate_counts.R | 53 ++++++ src/htrsim/R/htrsim_workflow.R | 33 ++++ src/htrsim/R/input_estimation.R | 78 ++++++++ src/htrsim/R/launch_deseq.R | 18 ++ src/htrsim/R/setup_cntsGenerator.R | 168 ++++++++++++++++++ src/htrsim/devtools_history.R | 14 ++ src/htrsim/inst/extdata/public_bioDesign.csv | 13 ++ src/htrsim/man/estim.alpha.Rd | 19 ++ src/htrsim/man/estim.mu.Rd | 19 ++ src/htrsim/man/generate_counts.Rd | 19 ++ src/htrsim/man/handle_except.Rd | 25 +++ src/htrsim/man/htrsim.Rd | 21 +++ src/htrsim/man/reshape_input2setup.Rd | 19 ++ src/htrsim/man/rn_sim.Rd | 23 +++ src/htrsim/man/run.deseq.Rd | 19 ++ src/htrsim/man/setup_countGener.Rd | 34 ++++ .../comment-utiliser-mon-package.Rmd | 84 +++++++++ .../counts_generator.R | 0 .../input_estimation.R | 0 src/{htrsim => htrsim_beta}/launch_deseq.R | 0 src/{htrsim => htrsim_beta}/main.R | 0 .../setup_cntsGenerator.R | 0 src/{htrsim => htrsim_beta}/setup_deseq.R | 0 src/{ => tuto_beta}/tutorial_htrsim.R | 0 src/{ => tuto_beta}/tutorial_htrsim.Rmd | 0 27 files changed, 709 insertions(+) create mode 100644 src/htrsim/DESCRIPTION create mode 100644 src/htrsim/NAMESPACE create mode 100644 src/htrsim/R/generate_counts.R create mode 100644 src/htrsim/R/htrsim_workflow.R create mode 100644 src/htrsim/R/input_estimation.R create mode 100644 src/htrsim/R/launch_deseq.R create mode 100644 src/htrsim/R/setup_cntsGenerator.R create mode 100644 src/htrsim/devtools_history.R create mode 100644 src/htrsim/inst/extdata/public_bioDesign.csv create mode 100644 src/htrsim/man/estim.alpha.Rd create mode 100644 src/htrsim/man/estim.mu.Rd create mode 100644 src/htrsim/man/generate_counts.Rd create mode 100644 src/htrsim/man/handle_except.Rd create mode 100644 src/htrsim/man/htrsim.Rd create mode 100644 src/htrsim/man/reshape_input2setup.Rd create mode 100644 src/htrsim/man/rn_sim.Rd create mode 100644 src/htrsim/man/run.deseq.Rd create mode 100644 src/htrsim/man/setup_countGener.Rd create mode 100644 src/htrsim/vignettes/comment-utiliser-mon-package.Rmd rename src/{htrsim => htrsim_beta}/counts_generator.R (100%) rename src/{htrsim => htrsim_beta}/input_estimation.R (100%) rename src/{htrsim => htrsim_beta}/launch_deseq.R (100%) rename src/{htrsim => htrsim_beta}/main.R (100%) rename src/{htrsim => htrsim_beta}/setup_cntsGenerator.R (100%) rename src/{htrsim => htrsim_beta}/setup_deseq.R (100%) rename src/{ => tuto_beta}/tutorial_htrsim.R (100%) rename src/{ => tuto_beta}/tutorial_htrsim.Rmd (100%) diff --git a/src/htrsim/DESCRIPTION b/src/htrsim/DESCRIPTION new file mode 100644 index 0000000..305c99c --- /dev/null +++ b/src/htrsim/DESCRIPTION @@ -0,0 +1,27 @@ +Package: htrsim +Title: Hightoughtput RNA-seq simulation +Version: 0.1 +Authors@R: person('Duvermy', 'Arnaud', email = 'arnaud.duvermy@ens-lyon.Fr', role = c('aut', 'cre')) +Description: blabla. +License: GPL-3 +Encoding: UTF-8 +Roxygen: list(markdown = TRUE) +RoxygenNote: 7.1.2 +Suggests: + rmarkdown, + knitr +VignetteBuilder: knitr +Imports: + stats, + DESeq2, + magrittr, + dplyr, + BiocGenerics, + tibble, + rlang, + stringr, + purrr, + data.table, + plyr, + reshape2, + tidyr diff --git a/src/htrsim/NAMESPACE b/src/htrsim/NAMESPACE new file mode 100644 index 0000000..5958cec --- /dev/null +++ b/src/htrsim/NAMESPACE @@ -0,0 +1,23 @@ +# Generated by roxygen2: do not edit by hand + +export(estim.alpha) +export(estim.mu) +export(generate_counts) +export(handle_except) +export(htrsim) +export(reshape_input2setup) +export(rn_sim) +export(run.deseq) +export(setup_countGener) +import(BiocGenerics) +import(DESeq2) +import(data.table) +import(dplyr) +import(plyr) +import(purrr) +import(reshape2) +import(stats) +import(stringr) +import(tibble) +import(tidyr) +importFrom(rlang,.data) diff --git a/src/htrsim/R/generate_counts.R b/src/htrsim/R/generate_counts.R new file mode 100644 index 0000000..beeaa85 --- /dev/null +++ b/src/htrsim/R/generate_counts.R @@ -0,0 +1,53 @@ +#' Sampling counts from Negative Binomial distribution +#' +#' @param mu mu_ij value +#' @param alpha alpha_i value +#' @param n_replicates number of replicates +#' @param ... everything else +#' +#' @return vector of length n_replicates +#' @export +#' +#' @examples +rn_sim <- function(mu, alpha, n_replicates, ...){ + simul<- rnbinom(mu=mu, size=alpha, n = n_replicates) + return(simul) +} + + +#' Simulate counts and convert to lovely deseq input +#' +#' @param setup_dtf Output from setup_cntsGenerator.R +#' @param export Boolean +#' +#' @return dataframe with counts c_ij +#' @export +#' @import tidyr +#' @import reshape2 +#' @import plyr +#' @import BiocGenerics +#' @import purrr +#' +#' @examples +generate_counts <- function(setup_dtf, export = FALSE){ + + message("reading and processing counts per genes ...") + + cnt.list = setup_dtf %>% + purrr::pmap(rn_sim) + + message("reshaping to dataframe ...") + + cnt.dtf <- cnt.list %>% + plyr::ldply(., rbind) %>% + BiocGenerics::cbind(setup_dtf %>% select(c("gene_id", "name"))) %>% + reshape2::melt(.,id=c('name','gene_id'),value.name = "counts") %>% + tidyr::unite(full_name, name, variable) %>% + tidyr::drop_na(counts) %>% + reshape2::dcast(., gene_id ~ full_name, value.var= "counts") + + if (export == TRUE) write.tsv(cnt.dtf, 'results/2022-03-03/estimate_dispersion.tsv') + + return(cnt.dtf) +} + diff --git a/src/htrsim/R/htrsim_workflow.R b/src/htrsim/R/htrsim_workflow.R new file mode 100644 index 0000000..3569b1f --- /dev/null +++ b/src/htrsim/R/htrsim_workflow.R @@ -0,0 +1,33 @@ + + +#' HTRSIM workflow +#' +#' @param countData dataframe with actual count per gene +#' @param bioDesign dataframe defining bioDesign +#' @param N_replicates Number of replicate +#' +#' @return dataframe with simulated count per gene +#' @export +#' +#' @examples +htrsim <- function(countData, bioDesign, N_replicates){ + + # launch standard DESEQ2 analysis + dds = run.deseq(tabl_cnts, bioDesign = bioDesign) + + ## Input estimation + mu.input = estim.mu(dds) + alpha.input = estim.alpha(dds) + + # Setup simulation + input = reshape_input2setup(mu.dtf = mu.input, alpha.dtf = alpha.input) + setup.simulation <- setup_countGener(bioSample_id = input$bioSample_id, + n_rep = N_replicates, + alpha = input$alpha, + gene_id = input$gene_id, + mu = input$mu) + + # Simulate counts + htrs <- generate_counts(setup.simulation) + return(list(countDataSim = htrs, input = input, dds = dds)) +} diff --git a/src/htrsim/R/input_estimation.R b/src/htrsim/R/input_estimation.R new file mode 100644 index 0000000..7699137 --- /dev/null +++ b/src/htrsim/R/input_estimation.R @@ -0,0 +1,78 @@ + +#' Estimate alpha_i +#' +#' @param dds DESEQ2 object +#' @param export Boolean +#' +#' @return alpha_i per gene only for gene expressed c_ij != 0 +#' @export +#' @import DESeq2 +#' @import stats +#' @import dplyr +#' @import tibble +#' @import BiocGenerics +#' @examples +#' @importFrom rlang .data +estim.alpha <- function(dds, export = FALSE){ + gene_id <- NULL + ################### Estimate alpha per gene ######################## + #N.B: alpha = dispersion per gene + #dds <- DESeq2::estimateDispersions(dds, quiet = F) + #dispersion estimation + dispersion_estimate <- DESeq2::dispersions(dds) + + ## Shape and export + names(dispersion_estimate) <- names(dds@rowRanges) + + ## drop NA in dispersion estimate (link to unexpressed gene) + ### and convert to lovely dataframe + expressed_gene_dispersion <- dispersion_estimate[!is.na(dispersion_estimate)] %>% + data.frame() %>% + tibble::rownames_to_column() %>% + dplyr::rename("alpha" = .data$., gene_id = "rowname") + + if (export == TRUE) write_tsv(disp_gene_express, 'results/2022-03-03/estimate_dispersion.tsv') + + return(expressed_gene_dispersion) + +} + + + +#' Estimate mu_ij +#' +#' @param dds DESEQ2 object +#' @param export Boolean +#' +#' @return mu_ij only for gene expressed c_ij != 0 +#' @export +#' @import stats +#' @import dplyr +#' @import tibble +#' @import BiocGenerics +#' @examples +#' @importFrom rlang .data +estim.mu <- function(dds, export = FALSE){ + gene_id <- NULL + ################# Estimate mu ######################### + mu_estimate <- dds@assays@data$mu + #dds@assays@data$mu %>% dim() + #mu_estimate %>% dim() + rownames(mu_estimate) = BiocGenerics::rownames(dds@assays@data$counts) + ## drop NA in dispersion estimate (link to unexpressed gene) + ### and convert to lovely dataframe + mu_gene_express = mu_estimate %>% + stats::na.omit() %>% + data.frame() + + colnames(mu_gene_express) <- rownames(dds@colData) + mu_gene_express <- mu_gene_express %>% + tibble::rownames_to_column(var = "gene_id") + + + if (export == TRUE) write_tsv(mu_gene_express, 'results/2022-03-03/estimate_mu.tsv') + + return(mu_gene_express) + + +} diff --git a/src/htrsim/R/launch_deseq.R b/src/htrsim/R/launch_deseq.R new file mode 100644 index 0000000..6650ac1 --- /dev/null +++ b/src/htrsim/R/launch_deseq.R @@ -0,0 +1,18 @@ + +#' Title +#' +#' @param tabl_cnts table containing counts per genes & samples +#' @param bioDesign table describing bioDesgin of input +#' @import DESeq2 +#' @return DESEQ2 object +#' @export +#' +#' @examples +run.deseq <- function(tabl_cnts, bioDesign ){ + ########### LAUNCH DESEQ ############# + ## Design model + dds = DESeq2::DESeqDataSetFromMatrix( countData = round(tabl_cnts), colData = bioDesign , design = ~ mutant + env + mutant:env) + ## DESEQ standard analysis + dds <- DESeq2::DESeq(dds) + return(dds) +} diff --git a/src/htrsim/R/setup_cntsGenerator.R b/src/htrsim/R/setup_cntsGenerator.R new file mode 100644 index 0000000..0558c19 --- /dev/null +++ b/src/htrsim/R/setup_cntsGenerator.R @@ -0,0 +1,168 @@ +#' Reshape input before building setup +#' +#' @param mu.dtf dataframe of mu_ij +#' @param alpha.dtf dataframe of alpha_i +#' +#' @return +#' @import purrr +#' @import stringr +#' @import dplyr +#' @import BiocGenerics +#' @export +#' +#' @examples +reshape_input2setup <- function(mu.dtf, alpha.dtf){ + ## Defining sample names + bioSample_id <- mu.dtf %>% + dplyr::select(-gene_id) %>% + BiocGenerics::colnames() %>% + purrr::map(., ~stringr::str_split(.,"_")[[1]][1:2] %>% + BiocGenerics::paste(., collapse='_') ) %>% + BiocGenerics::unlist() %>% BiocGenerics::unique() + + + ############### Mu is same for biosample replicate ############# + ### case 1: choose 1 replicate + #mu_params <- mu_params %>% dplyr::select(., contains("rep1")) + ## rename mu_params colnames to ensure corresponding with sample_names + #colnames(mu_params) <- sample_names + ### case 2: average replicates + average_rep <- function(x, dtf) { + varname <- x + dtf %>% + dplyr::select(.,contains(x)) %>% + dplyr::mutate(!!varname := rowMeans(.)) %>% + dplyr::select(varname) + } + mu_avg.dtf <- bioSample_id %>% purrr::map(.x = ., .f = ~average_rep(.x, mu.dtf)) %>% data.frame() + mu_avg.dtf$gene_id <- alpha.dtf$gene_id + + return(list(alpha = alpha.dtf , mu = mu_avg.dtf, bioSample_id = bioSample_id, gene_id = alpha.dtf$gene_id)) +} + + + +#' Handle exception +#' +#' @param bioSample_id vector of id for each bioSample +#' @param n_rep number of replicates +#' @param gene_id vector of id for each gene +#' @param alpha vector of alpha_i +#' @param n_genes number of genes +#' +#' @return +#' @export +#' +#' @examples +handle_except <- function(bioSample, n_rep , gene_id , alpha, n_genes){ + + if(is.numeric(n_rep) && length(n_rep) == 1){ + message("Homogeneous number of replicates between samples: ", n_rep, " replicates per samples\n") + n_rep = rep(n_rep, length(bioSample)) + } + + if(!is.null(n_rep) && length(bioSample) != length(n_rep)) stop("ERROR: unconsistent length between samples_names and n_rep\n") + + + if(is.null(n_genes)) { + if(!is.null(gene_id) || !is.null(alpha)){ + ifelse(length(alpha) == length(alpha), + (n_genes = length(gene_id)), ## if + stop("ERROR: unconsistent value between n_genes, length(gene_names) and length(gene_disp)\n")) ## else + } + } + + if(!is.null(n_genes)) { + if(is.null(gene_id) && is.null(alpha)) { + ### Precised alpha params for each genes + alpha = runif(0.2,120, n = n_genes) ## randomly defined between 2 and 100 + id = paste0('gene', 1:n_genes) + alpha <- list(gene_id = id, alpha = alpha) + } + } + + + + if(is.null(n_genes) && is.null(gene_disp) && is.null(gene_id)) { + message("Number of genes unspecified\nAssuming n_genes = 3\nAssuming gene dispersion (alpha) follow a uniform law between 2 and 100\n") + n_genes = 3 + alpha = runif(2,100, n = n_genes) + gene_id = paste0('gene', 1:n_genes) + } + + if(is.null(gene_id) && is.null(alpha)) { + message("n_genes = ", n_genes, "\nAssuming gene dispersion (alpha) follow a uniform law between 2 and 100\n") + alpha = runif(2,100, n = n_genes) + gene_id = paste0('gene', 1:n_genes) + } + + if(length(bioSample) == 1 && bioSample == "my_first_lib") message("No sample name is provided.\nAssuming only one library will be setup\n") + + if(is.null(n_rep)){ + message("Number of replicates not provided.\nAssuming 10 replicates per sample will be setup") + n_rep = 10 + } + + + if(is.null(gene_id)) gene_id = paste0('gene', 1:n_genes) + + + my_list = list(bioSample = bioSample, rep = n_rep, n_g = n_genes, alpha = alpha) + return(my_list) +} + + + + +#' Build setup for counts generator +#' +#' @param bioSample_id vector of id for each bioSample +#' @param n_rep number of replicates +#' @param gene_id vector of id for each gene +#' @param alpha vector of alpha_i +#' @param n_genes number of genes +#' @param mu dataframe of mu_ij +#' @import purrr +#' @import data.table +#' @return +#' @export +#' +#' @examples +setup_countGener <- function(bioSample_id = "my_first_lib", n_rep = NULL , gene_id = NULL , alpha = NULL, n_genes = NULL, mu = NULL ){ + + ######### HANDLE EXCEPTION ####### + setup = handle_except(bioSample_id, n_rep , gene_id , alpha, n_genes) + ######## HANDLE TYPE MU ########## + if(is.null(mu)) mu = .mu_generator # default function to generate mu + + if(is.function(mu)) { #mu = function + mu.set = mu(setup$n_g) + ######## BUILD AN INPUT DTF FOR count_generator ############ + nBinom_params <- purrr::map2(.x= setup$bioSample, .y = setup$rep, + ~(list(name=.x, #sample_name + n_replicates = .y, # random int between 1 & max_N_replicates + gene_id = setup$alpha$gene_id, # gene_id + mu = mu.set , #mu(ij) + alpha = setup$alpha$alpha))) %>% # alpha(i) + data.table::rbindlist(.) %>% as.data.frame() ## convert to lovely dtf + } + + + if(is.data.frame(mu)) { # mu = data.frame + mu.dtf = mu + ######## BUILD AN INPUT DTF FOR count_generator ############ + nBinom_params <- purrr::map2(.x= setup$bioSample, .y = setup$rep, + ~(list(name=.x, #sample_name + n_replicates = .y, # number replicates + gene_id = setup$alpha$gene_id, # gene_name + mu = mu.dtf %>% dplyr::select(all_of(.x)) %>% unlist() , #mu(ij) + alpha = setup$alpha$alpha))) %>% # alpha(i) + data.table::rbindlist(.) %>% as.data.frame() ## convert to lovely dtf + } + + return(nBinom_params) + +} + + +.mu_generator <- function(x) return(runif(100,1000, n = x )) diff --git a/src/htrsim/devtools_history.R b/src/htrsim/devtools_history.R new file mode 100644 index 0000000..69ccee8 --- /dev/null +++ b/src/htrsim/devtools_history.R @@ -0,0 +1,14 @@ +usethis::use_build_ignore("devtools_history.R") +usethis::use_package('DESeq2') +usethis::use_package('magrittr') +usethis::use_package('stats') +usethis::use_package('dplyr') +usethis::use_package("BiocGenerics") +usethis::use_package("tibble") +usethis::use_package("stringr") +usethis::use_package("purrr") +usethis::use_package("data.table") +usethis::use_package("plyr") +usethis::use_package("reshape2") +usethis::use_package("tidyr") +devtools::load_all() diff --git a/src/htrsim/inst/extdata/public_bioDesign.csv b/src/htrsim/inst/extdata/public_bioDesign.csv new file mode 100644 index 0000000..a4e913c --- /dev/null +++ b/src/htrsim/inst/extdata/public_bioDesign.csv @@ -0,0 +1,13 @@ +sample;env;mutant;mutant_env +Msn2D_KCl_rep1;kcl;msn2D;msn2D_kcl +Msn2D_KCl_rep2;kcl;msn2D;msn2D_kcl +Msn4D_KCl_rep1;kcl;msn4D;msn4D_kcl +Msn4D_KCl_rep2;kcl;msn4D;msn4D_kcl +Msn2D_control_rep1;control;msn2D;msn2D_control +Msn2D_control_rep1;control;msn2D;msn2D_control +Msn4D_control_rep1;control;msn4D;msn4D_control +Msn4D_control_rep2;control;msn4D;msn4D_control +WT_control_rep1;control;wt;wt_control +WT_control_rep2;control;wt;wt_control +WT_KCl_rep1;kcl;wt;wt_kcl +WT_KCl_rep2;kcl;wt;wt_kcl diff --git a/src/htrsim/man/estim.alpha.Rd b/src/htrsim/man/estim.alpha.Rd new file mode 100644 index 0000000..24d1213 --- /dev/null +++ b/src/htrsim/man/estim.alpha.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/input_estimation.R +\name{estim.alpha} +\alias{estim.alpha} +\title{Estimate alpha_i} +\usage{ +estim.alpha(dds, export = FALSE) +} +\arguments{ +\item{dds}{DESEQ2 object} + +\item{export}{Boolean} +} +\value{ +alpha_i per gene only for gene expressed c_ij != 0 +} +\description{ +Estimate alpha_i +} diff --git a/src/htrsim/man/estim.mu.Rd b/src/htrsim/man/estim.mu.Rd new file mode 100644 index 0000000..55ffd21 --- /dev/null +++ b/src/htrsim/man/estim.mu.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/input_estimation.R +\name{estim.mu} +\alias{estim.mu} +\title{Estimate mu_ij} +\usage{ +estim.mu(dds, export = FALSE) +} +\arguments{ +\item{dds}{DESEQ2 object} + +\item{export}{Boolean} +} +\value{ +mu_ij only for gene expressed c_ij != 0 +} +\description{ +Estimate mu_ij +} diff --git a/src/htrsim/man/generate_counts.Rd b/src/htrsim/man/generate_counts.Rd new file mode 100644 index 0000000..89cd640 --- /dev/null +++ b/src/htrsim/man/generate_counts.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_counts.R +\name{generate_counts} +\alias{generate_counts} +\title{Simulate counts and convert to lovely deseq input} +\usage{ +generate_counts(setup_dtf, export = FALSE) +} +\arguments{ +\item{setup_dtf}{Output from setup_cntsGenerator.R} + +\item{export}{Boolean} +} +\value{ +dataframe with counts c_ij +} +\description{ +Simulate counts and convert to lovely deseq input +} diff --git a/src/htrsim/man/handle_except.Rd b/src/htrsim/man/handle_except.Rd new file mode 100644 index 0000000..563ca47 --- /dev/null +++ b/src/htrsim/man/handle_except.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/setup_cntsGenerator.R +\name{handle_except} +\alias{handle_except} +\title{Handle exception} +\usage{ +handle_except(bioSample, n_rep, gene_id, alpha, n_genes) +} +\arguments{ +\item{n_rep}{number of replicates} + +\item{gene_id}{vector of id for each gene} + +\item{alpha}{vector of alpha_i} + +\item{n_genes}{number of genes} + +\item{bioSample_id}{vector of id for each bioSample} +} +\value{ + +} +\description{ +Handle exception +} diff --git a/src/htrsim/man/htrsim.Rd b/src/htrsim/man/htrsim.Rd new file mode 100644 index 0000000..ae549a9 --- /dev/null +++ b/src/htrsim/man/htrsim.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/htrsim_workflow.R +\name{htrsim} +\alias{htrsim} +\title{HTRSIM workflow} +\usage{ +htrsim(countData, bioDesign, N_replicates) +} +\arguments{ +\item{countData}{dataframe with actual count per gene} + +\item{bioDesign}{dataframe defining bioDesign} + +\item{N_replicates}{Number of replicate} +} +\value{ +dataframe with simulated count per gene +} +\description{ +HTRSIM workflow +} diff --git a/src/htrsim/man/reshape_input2setup.Rd b/src/htrsim/man/reshape_input2setup.Rd new file mode 100644 index 0000000..eb85d5a --- /dev/null +++ b/src/htrsim/man/reshape_input2setup.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/setup_cntsGenerator.R +\name{reshape_input2setup} +\alias{reshape_input2setup} +\title{Reshape input before building setup} +\usage{ +reshape_input2setup(mu.dtf, alpha.dtf) +} +\arguments{ +\item{mu.dtf}{dataframe of mu_ij} + +\item{alpha.dtf}{dataframe of alpha_i} +} +\value{ + +} +\description{ +Reshape input before building setup +} diff --git a/src/htrsim/man/rn_sim.Rd b/src/htrsim/man/rn_sim.Rd new file mode 100644 index 0000000..8915b7f --- /dev/null +++ b/src/htrsim/man/rn_sim.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_counts.R +\name{rn_sim} +\alias{rn_sim} +\title{Sampling counts from Negative Binomial distribution} +\usage{ +rn_sim(mu, alpha, n_replicates, ...) +} +\arguments{ +\item{mu}{mu_ij value} + +\item{alpha}{alpha_i value} + +\item{n_replicates}{number of replicates} + +\item{...}{everything else} +} +\value{ +vector of length n_replicates +} +\description{ +Sampling counts from Negative Binomial distribution +} diff --git a/src/htrsim/man/run.deseq.Rd b/src/htrsim/man/run.deseq.Rd new file mode 100644 index 0000000..dc4d447 --- /dev/null +++ b/src/htrsim/man/run.deseq.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/launch_deseq.R +\name{run.deseq} +\alias{run.deseq} +\title{Title} +\usage{ +run.deseq(tabl_cnts, bioDesign) +} +\arguments{ +\item{tabl_cnts}{table containing counts per genes & samples} + +\item{bioDesign}{table describing bioDesgin of input} +} +\value{ +DESEQ2 object +} +\description{ +Title +} diff --git a/src/htrsim/man/setup_countGener.Rd b/src/htrsim/man/setup_countGener.Rd new file mode 100644 index 0000000..c9b1556 --- /dev/null +++ b/src/htrsim/man/setup_countGener.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/setup_cntsGenerator.R +\name{setup_countGener} +\alias{setup_countGener} +\title{Build setup for counts generator} +\usage{ +setup_countGener( + bioSample_id = "my_first_lib", + n_rep = NULL, + gene_id = NULL, + alpha = NULL, + n_genes = NULL, + mu = NULL +) +} +\arguments{ +\item{bioSample_id}{vector of id for each bioSample} + +\item{n_rep}{number of replicates} + +\item{gene_id}{vector of id for each gene} + +\item{alpha}{vector of alpha_i} + +\item{n_genes}{number of genes} + +\item{mu}{dataframe of mu_ij} +} +\value{ + +} +\description{ +Build setup for counts generator +} diff --git a/src/htrsim/vignettes/comment-utiliser-mon-package.Rmd b/src/htrsim/vignettes/comment-utiliser-mon-package.Rmd new file mode 100644 index 0000000..5f5daa5 --- /dev/null +++ b/src/htrsim/vignettes/comment-utiliser-mon-package.Rmd @@ -0,0 +1,84 @@ +--- +title: "HTRSIM tutorial" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{HTRSIM} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +# A. Introduction + +$$ +Phenotype = Genotype + Environment + Genotype.Environment +$$ +From this expression, $\beta_{G}$, $\beta_{E}$, $\beta_{G*E}$ can be seen as coefficients which allow quantifying the participation of each factors (Genotype, Environment and interaction Genotype/Environment). +In mathematical term, it leads to a linear expression such: +$$ +P = \beta_{G}*G + \beta_{E}*E + \beta_{G*E}*G.E + \beta_{0} +$$ + +In order to estimate these coefficients, a Generalized Linear Model (GLM) can be used. + +# B. HTRSIM getting started + + <u>a. Required</u> + +```{r, setupWD, include=FALSE} +#knitr::opts_knit$set(root.dir = '~/counts_simulation/') +``` + +```{r setup} +library(htrsim) +library(tidyverse) +``` + + <u>b. Workflow</u> + + +```{r echo=FALSE, out.width='50%'} +#knitr::include_graphics('img/schema_loop.jpg') +``` + + <u>c. RNA-seq pipeline</u> + +You can used your favorite pipeline to obtain table counts from real data. +If you don't have any idea of how to obtain such table counts rendez-vous [at](https://gitbio.ens-lyon.fr/aduvermy/rna-seq_public_library_investigations) + + + <u> d. BioProject PRJNA675209b as input</u> + +To easily test *HTRSIM* we produced an usual table counts from BioProject PRJNA675209b. +Take the time to clean up your table counts before using it as input of htrsim. + +```{r} +fn = system.file("extdata/", "public_tablCnts.tsv", package = "htrsim") + +tabl_cnts <- read.table(file = fn, header = TRUE) +rownames(tabl_cnts) <- tabl_cnts$gene_id +tabl_cnts <- tabl_cnts %>% select(-gene_id)##suppr colonne GeneID +tabl_cnts <- tabl_cnts %>% select(-gene_name) ##suppr colonne GeneName +``` + + <u> e. Launch HTRSIM</u> + +```{r message=FALSE, warning=FALSE} +fn = system.file("extdata/", "public_bioDesign.csv", package = "htrsim") + +## import design of bioProject +bioDesign <- read.table(file = fn, header = T, sep = ';') +#bioDesign +#source(file = "htrsim/main.R") +tabl_cnts %>% dim() +bioDesign %>% dim() +#simul_cnts = htrsim(tabl_cnts, bioDesign = bioDesign, 2) + +htrsim.results = htrsim(countData = tabl_cnts, bioDesign= bioDesign, N_replicates=2) + +``` + + <u> f. Visualize results</u> + +```{r message=FALSE, warning=FALSE} +htrsim.results$input$d +``` diff --git a/src/htrsim/counts_generator.R b/src/htrsim_beta/counts_generator.R similarity index 100% rename from src/htrsim/counts_generator.R rename to src/htrsim_beta/counts_generator.R diff --git a/src/htrsim/input_estimation.R b/src/htrsim_beta/input_estimation.R similarity index 100% rename from src/htrsim/input_estimation.R rename to src/htrsim_beta/input_estimation.R diff --git a/src/htrsim/launch_deseq.R b/src/htrsim_beta/launch_deseq.R similarity index 100% rename from src/htrsim/launch_deseq.R rename to src/htrsim_beta/launch_deseq.R diff --git a/src/htrsim/main.R b/src/htrsim_beta/main.R similarity index 100% rename from src/htrsim/main.R rename to src/htrsim_beta/main.R diff --git a/src/htrsim/setup_cntsGenerator.R b/src/htrsim_beta/setup_cntsGenerator.R similarity index 100% rename from src/htrsim/setup_cntsGenerator.R rename to src/htrsim_beta/setup_cntsGenerator.R diff --git a/src/htrsim/setup_deseq.R b/src/htrsim_beta/setup_deseq.R similarity index 100% rename from src/htrsim/setup_deseq.R rename to src/htrsim_beta/setup_deseq.R diff --git a/src/tutorial_htrsim.R b/src/tuto_beta/tutorial_htrsim.R similarity index 100% rename from src/tutorial_htrsim.R rename to src/tuto_beta/tutorial_htrsim.R diff --git a/src/tutorial_htrsim.Rmd b/src/tuto_beta/tutorial_htrsim.Rmd similarity index 100% rename from src/tutorial_htrsim.Rmd rename to src/tuto_beta/tutorial_htrsim.Rmd -- GitLab