session_4.Rmd

title: "R.4: data transformation"
author: "Laurent Modolo [laurent.modolo@ens-lyon.fr](mailto:laurent.modolo@ens-lyon.fr), Hélène Polvèche [hpolveche@istem.fr](mailto:hpolveche@istem.fr)"
date: "2021"
output:
  rmdformats::downcute:
    self_contain: true
    use_bookdown: true
    default_style: "dark"
    lightbox: true
    css: "http://perso.ens-lyon.fr/laurent.modolo/R/src/style.css"
rm(list=ls())
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(comment = NA)
klippy::klippy(
  position = c('top', 'right'),
  color = "white",
  tooltip_message = 'Click to copy',
  tooltip_success = 'Copied !')
install.packages("nycflights13")
library("tidyverse")
library("nycflights13")
flights
filter(flights, month == 1, day == 1)
jan1 <- filter(flights, month == 1, day == 1)
(dec25 <- filter(flights, month == 12, day == 25))
filter(flights, month == 11 | month == 12)
filter(flights, month %in% c(11, 12))
filter(flights, !(arr_delay > 120 | dep_delay > 120))
filter(flights, arr_delay <= 120, dep_delay <= 120)
NA > 5
10 == NA
NA + 10
is.na(NA)
df <- tibble(x = c(1, NA, 3))
filter(df, x > 1)
filter(df, is.na(x) | x > 1)
filter(flights, arr_delay >= 60 | arr_delay <= 120)
filter(flights, dest %in% c("IAH", "HOU"))
filter(flights, is.na(dep_time))
NA ^ 0 # ^ 0 is always 1 it's an arbitrary rule not a computation
NA | TRUE # if a member of a OR operation is TRUE the results is TRUE
FALSE & NA # if a member of a AN operation is FALSE the results is TRUE
NA * 0 # here we have a true computation
arrange(flights, year, month, day)
arrange(flights, desc(dep_delay))
arrange(tibble(x = c(5, 2, NA)), x)
arrange(tibble(x = c(5, 2, NA)), desc(x))
arrange(flights, desc(dep_delay))
arrange(flights, dep_delay)
arrange(tibble(x = c(5, 2, NA)), desc(is.na(x)))
select(flights, year, month, day)
select(flights, year:day)
select(flights, -(year:day))
select(flights, contains("time") | contains("delay"))
select(flights, contains("_") & !starts_with("sched") & !starts_with("time"))
vars <- c("year", "month", "day", "dep_delay", "arr_delay")
select(flights, one_of(vars))
select(flights, contains("TIME"))
select(flights, contains("TIME", ignore.case = FALSE))
(flights_sml <- select(flights,  year:day, ends_with("delay"), distance, air_time))
mutate(tbl, new_var_a = opperation_a, ..., new_var_n = opperation_n)
mutate(flights_sml, gain = dep_delay - arr_delay)
flights_sml <- mutate(flights_sml,
  gain = dep_delay - arr_delay,
  speed = distance / air_time * 60
)
mutate(
  flights,
  dep_time = (dep_time %/% 100) * 60 +
    dep_time %% 100,
  sched_dep_time = (sched_dep_time %/% 100) * 60 +
    sched_dep_time %% 100
)
install.packages(c("ghibli", "RColorBrewer", "viridis"))
library(tidyverse)

library(RColorBrewer)
library(ghibli)
library(viridis)
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = class)) +
  geom_point()
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = class)) +
  geom_point() +
  scale_color_brewer(palette = "Dark2")
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = class)) +
  geom_point() +
  scale_colour_ghibli_d("MononokeMedium")
display.brewer.all(colorblindFriendly = TRUE)
expr_DM1 <- read_csv2("http://perso.ens-lyon.fr/laurent.modolo/R/session_4/Expression_matrice_pivot_longer_DEGs_GSE86356.csv")

expr_DM1
ggplot(expr_DM1, aes(samples, Genes, fill= log1p(counts))) +
  geom_tile() +
  labs(y="Genes", x = "Samples") +
  theme(
    axis.text.y = element_text(size= 4),
    axis.text.x = element_text(size = 4, angle = 90)
  )
ggplot(expr_DM1, aes(samples, Genes, fill= log1p(counts))) +
  geom_tile() +
  scale_fill_gradient2(low = "white", high = "springgreen4") +
  labs(y="Genes", x = "Samples") +
  theme(
    axis.text.y = element_text(size= 4),
    axis.text.x = element_text(size = 4, angle = 90)
  )
ggplot(expr_DM1, aes(samples, Genes, fill= log1p(counts))) +
  geom_tile() +
  scale_fill_viridis_c() +
  labs(y="Genes", x = "Samples") +
  theme(
    axis.text.y = element_text(size= 4),
    axis.text.x = element_text(size = 4, angle = 90)
  )
tab <- read_csv2("http://perso.ens-lyon.fr/laurent.modolo/R/session_4/EWang_Tibialis_DEGs_GRCH37-87_GSE86356.csv")

tab
tab.sig <- tab %>%
  mutate(sig = baseMean > 20 & padj < 0.05 & abs(log2FoldChange) >= 1.5 ) %>%
  mutate(UpDown = ifelse(
                        baseMean > 20 & padj < 0.05 & log2FoldChange >= 1.5,
                        "Up",
                         ifelse(
                           baseMean > 20 & padj < 0.05 & log2FoldChange <= -1.5,
                           "Down",
                           "NO"
                          )))

tab.sig
install.packages("ggrepel")
library(ggrepel)
top10 <- tab.sig %>%
  filter(sig == TRUE) %>%
  slice_min(n = 10, padj)
ggplot(tab.sig, aes(x = log2FoldChange, y = -log10(padj), color = UpDown)) +
  geom_point() +
  scale_color_manual(values=c("steelblue", "lightgrey", "firebrick" )) +
  geom_hline(yintercept=-log10(0.05), col="black") +
  geom_vline(xintercept=c(-1.5, 1.5), col="black") +
  theme_minimal() +
  theme(
    legend.position="none"
  ) +
  labs(y="-log10(p-value)", x = "log2(FoldChange)") +
  geom_label_repel(data = top10, mapping = aes(label = gene_symbol))

ggplot(tab.sig, aes(x = log2FoldChange, y = -log10(padj), color = UpDown)) +
  geom_point() +
  scale_color_manual(values=c("steelblue", "lightgrey", "firebrick" )) +
  geom_hline(yintercept=-log10(0.05), col="black") +
  geom_vline(xintercept=c(-1.5, 1.5), col="black") +
  theme_minimal() +
  theme(
    legend.position="none"
  ) +
  labs(y="-log10(p-value)", x = "log2(FoldChange)") +
  geom_label_repel(data = top10, mapping = aes(label = gene_symbol))