rnaseq_analysis.Rmd

title: "RNAseq analysis"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{RNAseq analysis}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(HTRfit)
## -- hided in vignette
## -- simulate small example to prevent excessively lengthy vignette construction
list_var <- init_variable( name = "genotype", mu = 3, sd = 0.2, level = 2) %>%
            init_variable( name = "environment", mu = 2, sd = 0.43, level = 2) %>%
            add_interaction( between_var = c("genotype", "environment"), mu = 0.44, sd = 0.2)
N_GENES = 30
MIN_REPLICATES = 4
MAX_REPLICATES = 4
BASAL_EXPR = 3
mock_data <- mock_rnaseq(list_var, N_GENES,
                         min_replicates  = MIN_REPLICATES,
                         max_replicates = MAX_REPLICATES,
                         basal_expression = BASAL_EXPR)
########################

## -- data from simulation or real data
count_matrix <- mock_data$counts
metaData <- mock_data$metadata
##############################
## -- gene count matrix
count_matrix[1:4, 1:2]
## -- samples metadata
head(metaData)
## -- convert counts matrix and samples metadatas in a data frame for fitting
data2fit = prepareData2fit(
             countMatrix = count_matrix,
             metadata =  metaData,
             normalization = F,
             response_name = "kij")


## -- median ratio normalization
data2fit = prepareData2fit(
             countMatrix = count_matrix,
             metadata =  metaData,
             normalization = T,
             response_name = "kij")
l_tmb <- fitModelParallel(
          formula = kij ~ genotype + environment  + genotype:environment,
          data = data2fit,
          group_by = "geneID",
          family = glmmTMB::nbinom2(link = "log"),
          n.cores = 1)
l_tmb <- fitModelParallel(
          formula = kij ~ genotype + ( 1 | environment ),
          data = data2fit,
          group_by = "geneID",
          family = glmmTMB::nbinom2(link = "log"),
          n.cores = 1)
l_tmb <- fitModelParallel(
          formula = kij ~ genotype + environment  + genotype:environment,
          data = data2fit,
          group_by = "geneID",
          n.cores = 1,
          family = glmmTMB::nbinom2(link = "log"),
          control = glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e5,
                                                         eval.max=1e5)))
## -- get tidy results
my_tidy_res <- tidy_results(l_tmb, coeff_threshold = 0.1,
                            alternative_hypothesis = "greaterAbs")
## -- head
my_tidy_res[1:3,]
## -- update your fit modifying the model family
l_tmb <- updateParallel(
          formula =  kij ~ genotype + environment  + genotype:environment,
          list_tmb = l_tmb ,
          family = gaussian(),
          n.cores = 1)

## -- update fit using additional model control settings
l_tmb <- updateParallel(
          formula =  kij ~ genotype + environment  + genotype:environment ,
          list_tmb = l_tmb ,
          family = gaussian(),
          n.cores = 1,
          control = glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e3,
                                                         eval.max=1e3)))


## -- update your model formula and your family model
l_tmb <- updateParallel(
            formula =   kij ~ genotype + environment  ,
            list_tmb = l_tmb ,
            family = glmmTMB::nbinom2(link = "log"),
            n.cores = 1)
str(l_tmb$gene1, max.level = 1)
## -- plot all metrics
diagnostic_plot(list_tmb = l_tmb)
## -- Focus on metrics
diagnostic_plot(list_tmb = l_tmb, focus = c("dispersion", "logLik"))
## -- update your fit modifying the model family
l_anova <- anovaParallel(list_tmb = l_tmb)

## -- additional settings
l_anova <- anovaParallel(list_tmb = l_tmb, type = "III" )