rnaseq_analysis.Rmd

title: "RNAseq analysis"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{RNAseq analysis}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(HTRfit)
## -- hided in vignette
## -- simulate small example to prevent excessively lengthy vignette construction
list_var <- init_variable( name = "genotype", mu = 3, sd = 0.2, level = 2) %>%
                      init_variable( name = "environment", mu = 2, sd = 0.43, level = 2) %>%
                        add_interaction( between_var = c("genotype", "environment"), mu = 0.44, sd = 0.2)
N_GENES = 30
MIN_REPLICATES = 4
MAX_REPLICATES = 4
BASAL_EXPR = 3
mock_data <- mock_rnaseq(list_var, N_GENES,
                         min_replicates  = MIN_REPLICATES,
                         max_replicates = MAX_REPLICATES,
                         basal_expression = BASAL_EXPR)
########################

## -- data from simulation or real data
count_matrix <- mock_data$counts
metaData <- mock_data$metadata
##############################
## -- gene count matrix
head(count_matrix)
## -- samples metadata
head(metaData)
## -- convert counts matrix and samples metadatas in a data frame for fitting
data2fit = prepareData2fit(countMatrix = count_matrix,
                           metadata =  metaData,
                           normalization = F,
                           response_name = "kij")


## -- median ratio normalization
data2fit = prepareData2fit(countMatrix = count_matrix,
                           metadata =  metaData,
                           normalization = T,
                           response_name = "kij")
l_tmb <- fitModelParallel(formula = kij ~ genotype + environment  + genotype:environment,
                          data = data2fit,
                          group_by = "geneID",
                          family = glmmTMB::nbinom2(link = "log"),
                          n.cores = 1)
l_tmb <- fitModelParallel(formula = kij ~ genotype + ( 1 | environment ),
                          data = data2fit,
                          group_by = "geneID",
                          family = glmmTMB::nbinom2(link = "log"),
                          n.cores = 1)
l_tmb <- fitModelParallel(formula = kij ~ genotype + environment  + genotype:environment,
                          data = data2fit,
                          group_by = "geneID",
                          n.cores = 1,
                          family = glmmTMB::nbinom2(link = "log"),
                          control = glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e5,
                                                                         eval.max=1e5)))
## -- get tidy results
tidy_results(l_tmb, coeff_threshold = 0.1, alternative_hypothesis = "greaterAbs")
## -- update your fit modifying the model family
l_tmb <- updateParallel(formula =  kij ~ genotype + environment  + genotype:environment,
                          list_tmb = l_tmb ,
                          family = gaussian(),
                          n.cores = 1)

## -- update fit using additional model control settings
l_tmb <- updateParallel(formula =  kij ~ genotype + environment  + genotype:environment ,
                          list_tmb = l_tmb ,
                          family = gaussian(),
                          n.cores = 1,
                          control = glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e3,
                                                                         eval.max=1e3)))


## -- update your model formula and your family model
l_tmb <- updateParallel(formula =   kij ~ genotype + environment  ,
                          list_tmb = l_tmb ,
                          family = glmmTMB::nbinom2(link = "log"),
                          n.cores = 1)
str(l_tmb$gene1, max.level = 1)
## -- plot all metrics
diagnostic_plot(list_tmb = l_tmb)
## -- Focus on metrics
diagnostic_plot(list_tmb = l_tmb, focus = c("dispersion", "logLik"))
## -- update your fit modifying the model family
l_anova <- anovaParallel(list_tmb = l_tmb)

## -- additional settings
l_anova <- anovaParallel(list_tmb = l_tmb, type = "III" )