From ebf0db051e6f5ca0cc6a9e00c669b8d0791c6288 Mon Sep 17 00:00:00 2001
From: Laurent Modolo <laurent.modolo@ens-lyon.fr>
Date: Thu, 9 Sep 2021 15:31:31 +0200
Subject: [PATCH] code cleanup

---
 session_2/slides.Rmd        | 404 ------------------------------------
 session_2/tp.R              |   0
 session_2/tp.md             |   0
 session_3/slides.Rmd        | 279 -------------------------
 session_3/tp.R              |   0
 session_3/tp.md             |   0
 session_4/challengeTime.Rmd | 139 -------------
 session_4/slides.Rmd        | 315 ----------------------------
 8 files changed, 1137 deletions(-)
 delete mode 100644 session_2/slides.Rmd
 delete mode 100644 session_2/tp.R
 delete mode 100644 session_2/tp.md
 delete mode 100644 session_3/slides.Rmd
 delete mode 100644 session_3/tp.R
 delete mode 100644 session_3/tp.md
 delete mode 100644 session_4/challengeTime.Rmd
 delete mode 100644 session_4/slides.Rmd

diff --git a/session_2/slides.Rmd b/session_2/slides.Rmd
deleted file mode 100644
index 3695714..0000000
--- a/session_2/slides.Rmd
+++ /dev/null
@@ -1,404 +0,0 @@
----
-title: "R#2: introduction to Tidyverse"
-author: "Laurent Modolo [laurent.modolo@ens-lyon.fr](mailto:laurent.modolo@ens-lyon.fr)"
-date: "24 Oct 2019"
-output:
-  slidy_presentation:
-    highlight: tango
-  beamer_presentation:
-    theme: metropolis
-    slide_level: 3
-    fig_caption: no
-    df_print: tibble
-    highlight: tango
-    latex_engine: xelatex
----
-
-```{r setup, include=FALSE, cache=TRUE}
-knitr::opts_chunk$set(echo = FALSE)
-library(tidyverse)
-tmp <- tempfile(fileext = ".zip")
-download.file("http://www.fueleconomy.gov/feg/epadata/vehicles.csv.zip",
-              tmp,
-              quiet = TRUE)
-unzip(tmp, exdir = "data-raw")
-new_class_level <- c(
-  "Compact Cars",
-  "Large Cars",
-  "Midsize Cars",
-  "Midsize Cars",
-  "Midsize Cars",
-  "Compact Cars",
-  "Minivan",
-  "Minivan",
-  "Pickup Trucks",
-  "Pickup Trucks",
-  "Pickup Trucks",
-  "Sport Utility Vehicle",
-  "Sport Utility Vehicle",
-  "Compact Cars",
-  "Special Purpose Vehicle",
-  "Special Purpose Vehicle",
-  "Special Purpose Vehicle",
-  "Special Purpose Vehicle",
-  "Special Purpose Vehicle",
-  "Special Purpose Vehicle",
-  "Sport Utility Vehicle",
-  "Sport Utility Vehicle",
-  "Pickup Trucks",
-  "Pickup Trucks",
-  "Pickup Trucks",
-  "Pickup Trucks",
-  "Sport Utility Vehicle",
-  "Sport Utility Vehicle",
-  "Compact Cars",
-  "Two Seaters",
-  "Vans",
-  "Vans",
-  "Vans",
-  "Vans"
-)
-new_fuel_level <- c(
-  "gas",
-  "Diesel",
-  "Regular",
-  "gas",
-  "gas",
-  "Regular",
-  "Regular",
-  "Hybrid",
-  "Hybrid",
-  "Regular",
-  "Regular",
-  "Hybrid",
-  "Hybrid"
-)
-read_csv("data-raw/vehicles.csv") %>%
-  select(
-    "id",
-    "make",
-    "model",
-    "year",
-    "VClass",
-    "trany",
-    "drive",
-    "cylinders",
-    "displ",
-    "fuelType",
-    "highway08",
-    "city08"
-  ) %>% 
-  rename(
-    "class" = "VClass",
-    "trans" = "trany",
-    "drive" = "drive",
-    "cyl" = "cylinders",
-    "displ" = "displ",
-    "fuel" = "fuelType",
-    "hwy" = "highway08",
-    "cty" = "city08"
-  ) %>%
-  filter(drive != "") %>%
-  drop_na() %>% 
-  arrange(make, model, year) %>%
-  mutate(class = factor(as.factor(class), labels = new_class_level)) %>%
-  mutate(fuel = factor(as.factor(fuel), labels = new_fuel_level)) %>%
-  write_csv("2_data.csv")
-```
-
-## R#2: introduction to Tidyverse
-The goal of this practical is to familiarize yourself with `ggplot2`.
-
-The objectives of this session will be to:
-
-- Create basic plot with `ggplot2`
-- Understand the `tibble` type
-- Learn the different aesthetics in R plots
-- Compose graphics
-
-## Tidyverse
-
-The tidyverse is a collection of R packages designed for data science.
-
-All packages share an underlying design philosophy, grammar, and data structures.
-
-```{r install_tidyverse, cache = TRUE, eval = FALSE}
-install.packages("tidyverse")
-```
-
-```{r load_tidyverse, cache = TRUE}
-library("tidyverse")
-```
-
-
-## Toy data set `mpg`
-
-This dataset contains a subset of the fuel economy data that the EPA makes available on **http://fueleconomy.gov**. It contains only models which had a new release every year between 1999 and 2008.
-
-
-```{r mpg_inspect, cache = TRUE, eval=FALSE}
-?mpg
-mpg
-dim(mpg)
-View(mpg)
-```
-
-## Updated version of the data
-
-`mpg` is loaded with tidyverse, we want to be able to read our own data from
-**http://perso.ens-lyon.fr/laurent.modolo/R/2_data.csv**
-
-```{r mpg_download, cache=TRUE, message=FALSE}
-new_mpg <- read_csv(
-  "http://perso.ens-lyon.fr/laurent.modolo/R/2_data.csv"
-  )
-```
-
-**http://perso.ens-lyon.fr/laurent.modolo/R/2_a**
-
-## First plot with `ggplot2`
-
-Relationship between engine size `displ` and fuel efficiency `hwy`.
-```{r new_mpg_plot_a, cache = TRUE, fig.width=8, fig.height=4.5}
-ggplot(data = new_mpg) + 
-  geom_point(mapping = aes(x = displ, y = hwy))
-```
-
-## Composition of plot with `ggplot2`
-
-Composition of plot with `ggplot2`
-
-```R
-ggplot(data = <DATA>) + 
-  <GEOM_FUNCTION>(mapping = aes(<MAPPINGS>))
-```
-
-- you begin a plot with the function `ggplot()`
-- you complete your graph by adding one or more layers
-- `geom_point()` adds a layer with a scatterplot
-- each geom function in `ggplot2` takes a `mapping` argument
-- the `mapping` argument is always paired with `aes()`
-
-
-## First challenge
-
-- Run `ggplot(data = new_mpg)`. What do you see?
-- How many rows are in `new_mpg`? How many columns?
-- What does the `cty` variable describe? Read the help for `?mpg` to find out.
-- Make a scatterplot of `hwy` vs. `cyl`.
-- What happens if you make a scatterplot of `class` vs. `drive`? Why is the plot not useful?
-
-## Run `ggplot(data = mpg)`. What do you see?
-
-```{r empty_plot, cache = TRUE, fig.width=8, fig.height=4.5}
-ggplot(data = new_mpg)
-```
-
-## How many rows are in `new_mpg`? How many columns?
-
-```{r size_of_mpg, cache = TRUE, fig.width=8, fig.height=4.5}
-new_mpg
-```
-
-## Make a scatterplot of `hwy` vs. `cyl`.
-
-```{r new_mpg_plot_b, cache = TRUE, fig.width=8, fig.height=4.5}
-ggplot(data = new_mpg) + 
-  geom_point(mapping = aes(x = hwy, y = cyl))
-```
-
-## What happens if you make a scatterplot of `class` vs. `drive`?
-Why is the plot not useful?
-
-```{r new_mpg_plot_c, cache = TRUE, fig.width=8, fig.height=4.5}
-ggplot(data = new_mpg) + 
-  geom_point(mapping = aes(x = class, y = drive))
-```
-
-## Aesthetic mappings
-
-How can you explain these cars?
-
-```{r new_mpg_plot_d, echo = FALSE, cache = TRUE, fig.width=8, fig.height=4.5}
-ggplot(data = mpg) + 
-  geom_point(mapping = aes(x = displ, y = hwy)) +
-  geom_point(data = mpg %>% filter(class == "2seater"),
-             mapping = aes(x = displ, y = hwy), color = "red")
-```
-
-##  Aesthetic mapping `color`
-
-```{r new_mpg_plot_e, cache = TRUE, fig.width=8, fig.height=4.5}
-ggplot(data = mpg) + 
-  geom_point(mapping = aes(x = displ, y = hwy, color = class))
-```
-
-##  Aesthetic mappings
-
-`ggplot2` will automatically assign a unique level of the aesthetic (here a unique color) to each unique value of the variable, a process known as scaling. `ggplot2` will also add a legend that explains which levels correspond to which values.
-
-Try the following aesthetic:
-
-- `size`
-- `alpha`
-- `shape`
-
-##  Aesthetic mapping `size`
-
-```{r new_mpg_plot_f, cache = TRUE, fig.width=8, fig.height=4.5, warning=FALSE}
-ggplot(data = mpg) + 
-  geom_point(mapping = aes(x = displ, y = hwy, size = class))
-```
-
-##  Aesthetic mapping `alpha`
-
-```{r new_mpg_plot_g, cache = TRUE, fig.width=8, fig.height=4.5, warning=FALSE}
-ggplot(data = mpg) + 
-  geom_point(mapping = aes(x = displ, y = hwy, alpha = class))
-```
-
-##  Aesthetic mapping `shape`
-
-```{r new_mpg_plot_h, cache = TRUE, fig.width=8, fig.height=4.5, warning=FALSE}
-ggplot(data = mpg) + 
-  geom_point(mapping = aes(x = displ, y = hwy, shape = class))
-```
-
-##  Aesthetic
-
-You can also set the aesthetic properties of your geom manually. For example, we can make all of the points in our plot blue:
-
-```{r new_mpg_plot_i, cache = TRUE, fig.width=8, fig.height=4.5}
-ggplot(data = mpg) + 
-  geom_point(mapping = aes(x = displ, y = hwy), color = "blue")
-```
-
-## Second challenge
-
-- What’s gone wrong with this code? Why are the points not blue?
-
-```R
-ggplot(data = mpg) + 
-  geom_point(mapping = aes(x = displ, y = hwy, color = "blue"))
-```
-
-- Which variables in `mpg` are **categorical**? Which variables are **continuous**? (Hint: type `mpg`)
-- Map a **continuous** variable to color, size, and shape.
-- What does the `stroke` aesthetic do? What shapes does it work with? (Hint: use ?geom_point)
-- What happens if you map an aesthetic to something other than a variable name, like `color = displ < 5`?
-
-## Facets
-
-```{r new_mpg_plot_j, cache = TRUE, fig.width=8, fig.height=4.5}
-ggplot(data = mpg) + 
-  geom_point(mapping = aes(x = displ, y = hwy)) + 
-  facet_wrap(~class)
-```
-
-## Facets
-
-```{r new_mpg_plot_k, cache = TRUE, fig.width=8, fig.height=4.5}
-ggplot(data = mpg) + 
-  geom_point(mapping = aes(x = displ, y = hwy)) + 
-  facet_wrap(~class, nrow = 2)
-```
-
-## Facets
-
-```{r new_mpg_plot_l, cache = TRUE, fig.width=8, fig.height=4.5}
-ggplot(data = mpg) + 
-  geom_point(mapping = aes(x = displ, y = hwy)) + 
-  facet_wrap(~ fl + class, nrow = 2)
-```
-
-## Composition
-
-There are different ways to represent the information
-
-```{r new_mpg_plot_o, cache = TRUE, fig.width=8, fig.height=4.5}
-ggplot(data = mpg) + 
-  geom_point(mapping = aes(x = displ, y = hwy))
-```
-
-## Composition
-
-There are different ways to represent the information
-
-```{r new_mpg_plot_p, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE}
-ggplot(data = mpg) + 
-  geom_smooth(mapping = aes(x = displ, y = hwy))
-```
-
-
-## Composition
-
-We can add as many layers as we want
-
-```{r new_mpg_plot_q, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE}
-ggplot(data = mpg) + 
-  geom_point(mapping = aes(x = displ, y = hwy)) +
-  geom_smooth(mapping = aes(x = displ, y = hwy))
-```
-
-
-## Composition
-
-We can avoid code duplication
-
-```{r new_mpg_plot_r, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE}
-ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) + 
-  geom_point() +
-  geom_smooth()
-```
-
-
-## Composition
-
-We can make `mapping` layer specific
-
-```{r new_mpg_plot_s, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE}
-ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) + 
-  geom_point(mapping = aes(color = class)) +
-  geom_smooth()
-```
-
-## Composition
-
-We can use different `data` for different layer (You will lean more on `filter()` later)
-
-```{r new_mpg_plot_t, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE}
-ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) + 
-  geom_point(mapping = aes(color = class)) +
-  geom_smooth(data = filter(mpg, class == "subcompact"))
-```
-
-## Fird challenge
-
-- Run this code in your head and predict what the output will look like. Then, run the code in R and check your predictions.
-```R
-ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = drv)) + 
-  geom_point() + 
-  geom_smooth(se = FALSE)
-```
-**http://perso.ens-lyon.fr/laurent.modolo/R/2_d**
-
-- What does `show.legend = FALSE` do?
-- What does the `se` argument to `geom_smooth()` do?
-
-## Third challenge
-
-- Recreate the R code necessary to generate the following graph
-
-```{r new_mpg_plot_u, echo = FALSE, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE}
-ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = drv)) + 
-  geom_point() +
-  geom_smooth(mapping = aes(linetype = drv))
-```
-
-## Third challenge
-
-```{r new_mpg_plot_v, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE}
-ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = drv)) + 
-  geom_point() +
-  geom_smooth(mapping = aes(linetype = drv))
-```
\ No newline at end of file
diff --git a/session_2/tp.R b/session_2/tp.R
deleted file mode 100644
index e69de29..0000000
diff --git a/session_2/tp.md b/session_2/tp.md
deleted file mode 100644
index e69de29..0000000
diff --git a/session_3/slides.Rmd b/session_3/slides.Rmd
deleted file mode 100644
index 78123e1..0000000
--- a/session_3/slides.Rmd
+++ /dev/null
@@ -1,279 +0,0 @@
----
-title: "R#3: stats with ggplot2"
-author: "Laurent Modolo [laurent.modolo@ens-lyon.fr](mailto:laurent.modolo@ens-lyon.fr)"
-date: "08 Nov 2019"
-output:
-  slidy_presentation:
-    highlight: tango
-  beamer_presentation:
-    theme: metropolis
-    slide_level: 3
-    fig_caption: no
-    df_print: tibble
-    highlight: tango
-    latex_engine: xelatex
----
-
-```{r setup, include=FALSE, cache=TRUE}
-knitr::opts_chunk$set(echo = FALSE)
-library(tidyverse)
-```
-
-## R#3: stats with ggplot2
-The goal of this practical is to practices advanced features of `ggplot2`.
-
-The objectives of this session will be to:
-
-- learn about statistical transformations
-- practices position adjustments
-- change the coordinate systems
-
-## `ggplot2` statistical transformations
-
-We are going to use the `diamonds` data set included in `tidyverse`.
-
-- Use the `help` and `View` command to explore this data set.
-- Try the `str` command, which information are displayed ?
-
-## `ggplot2` statistical transformations
-
-```{r diamonds_barplot, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE}
-ggplot(data = diamonds) + 
-  geom_bar(mapping = aes(x = cut))
-```
-
-More diamonds are available with high quality cuts.
-
-## `ggplot2` statistical transformations
-
-On the x-axis, the chart displays cut, a variable from diamonds. On the y-axis, it displays count, but count is not a variable in diamonds!
-
-The algorithm used to calculate new values for a graph is called a **stat**, short for statistical transformation. The figure below describes how this process works with `geom_bar()`.
-
-\includegraphics[width=\textwidth]{img/visualization-stat-bar.png}
-
-## `ggplot2` statistical transformations
-
-You can generally use geoms and stats interchangeably. For example, you can recreate the previous plot using `stat_count()` instead of `geom_bar()`:
-
-```{r diamonds_stat_count, eval=FALSE, message=FALSE}
-ggplot(data = diamonds) + 
-  stat_count(mapping = aes(x = cut))
-```
-
-## `ggplot2` statistical transformations
-
-Every geom has a default stat; and every stat has a default geom. This means that you can typically use geoms without worrying about the underlying statistical transformation. There are three reasons you might need to use a stat explicitly:
-
-- You might want to override the default stat. **3_a**
-- You might want to override the default mapping from transformed variables to aesthetics. **3_b**
-- You might want to draw greater attention to the statistical transformation in your code. **3_c**
-
-## Statistical transformation challenge
-
-- What does `geom_col()` do? How is it different to `geom_bar()`?
-- What variables does `stat_smooth()` compute? What parameters control its behaviour?
-- In our proportion bar chart, we need to set `group = 1`. Why? In other words what is the problem with these two graphs?
-
-```{r diamonds_stats_challenge, eval=FALSE, message=FALSE}
-ggplot(data = diamonds) + 
-  geom_bar(mapping = aes(x = cut, y = ..prop..))
-ggplot(data = diamonds) + 
-  geom_bar(mapping = aes(x = cut, fill = color, y = ..prop..))
-```
-
-## Position adjustments
-You can colour a bar chart using either the `colour` aesthetic, or, more usefully, `fill`:
-
-```{r diamonds_barplot_color, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE}
-ggplot(data = diamonds) + 
-  geom_bar(mapping = aes(x = cut, colour = cut))
-```
-
-## Position adjustments
-You can colour a bar chart using either the `colour` aesthetic, or, more usefully, `fill`:
-
-```{r diamonds_barplot_fill, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE}
-ggplot(data = diamonds) + 
-  geom_bar(mapping = aes(x = cut, fill = cut))
-```
-
-## Position adjustments
-
-You can also use `fill` with another variable:
-
-```{r diamonds_barplot_fill_clarity, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE}
-ggplot(data = diamonds) + 
-  geom_bar(mapping = aes(x = cut, fill = clarity))
-```
-
-## Position adjustments
-
-The stacking is performed by the position adjustment `position`
-
-```{r diamonds_barplot_pos_identity, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE}
-ggplot(data = diamonds,
-       mapping = aes(x = cut, colour = clarity)) + 
-  geom_bar(fill = NA, position = "identity")
-```
-
-## Position adjustments
-
-The stacking is performed by the position adjustment `position`
-
-```{r diamonds_barplot_pos_fill, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE}
-ggplot(data = diamonds) + 
-  geom_bar(mapping = aes(x = cut, fill = clarity),
-           position = "fill")
-```
-
-## Position adjustments
-
-The stacking is performed by the position adjustment `position`
-
-```{r diamonds_barplot_pos_dodge, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE}
-ggplot(data = diamonds) + 
-  geom_bar(mapping = aes(x = cut, fill = clarity),
-           position = "dodge")
-```
-
-## Position adjustments
-
-The stacking is performed by the position adjustment `position`
-
-```{r mpg_point_pos_jitter, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE}
-ggplot(data = mpg) + 
-  geom_point(mapping = aes(x = displ, y = hwy),
-             position = "jitter")
-```
-
-## Position adjustments
-
-The stacking is performed by the position adjustment `position`
-
-```{r mpg_jitter, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE}
-ggplot(data = mpg) + 
-  geom_jitter(mapping = aes(x = displ, y = hwy))
-```
-
-## Position adjustments challenges
-
-- What is the problem with this plot? How could you improve it?
-```{r mpg_point, eval=F, message=FALSE}
-ggplot(data = mpg, mapping = aes(x = cty, y = hwy)) + 
-  geom_point()
-```
-- What parameters to `geom_jitter()` control the amount of jittering?
-- Compare and contrast `geom_jitter()` with `geom_count()`
-- What’s the default position adjustment for `geom_boxplot()` ? Create a visualisation of the `mpg` dataset that demonstrates it.
-
-## Coordinate systems
-
-Cartesian coordinate system where the x and y positions act independently to determine the location of each point. There are a number of other coordinate systems that are occasionally helpful.
-
-## Coordinate systems
-
-```{r mpg_boxplot, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE}
-ggplot(data = mpg, mapping = aes(x = class, y = hwy)) + 
-  geom_boxplot()
-```
-
-## Coordinate systems
-
-```{r mpg_boxplot_flip, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE}
-ggplot(data = mpg, mapping = aes(x = class, y = hwy)) + 
-  geom_boxplot() +
-  coord_flip()
-```
-
-## Coordinate systems
-
-```{r diamonds_bar, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE}
-bar <- ggplot(data = diamonds) + 
-  geom_bar(
-    mapping = aes(x = cut, fill = cut), 
-    show.legend = FALSE,
-    width = 1
-  ) + 
-  theme(aspect.ratio = 1) +
-  labs(x = NULL, y = NULL)
-```
-**3_d**
-
-## Coordinate systems
-
-```{r diamonds_bar_plot, echo=F, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE}
-bar
-```
-
-**3_d**
-
-## Coordinate systems
-```{r diamonds_bar_flip, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE}
-bar + coord_flip()
-```
-
-## Coordinate systems
-
-```{r mpg_jitter_noquickmap, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE}
-ggplot(data = mpg) + 
-  geom_jitter(mapping = aes(x = cty, y = hwy))
-```
-
-## Coordinate systems
-
-```{r mpg_jitter_quickmap, cache = TRUE, fig.width=3.5, fig.height=3.5, message=FALSE}
-ggplot(data = mpg) + 
-  geom_jitter(mapping = aes(x = cty, y = hwy)) +
-  coord_quickmap()
-```
-
-## Coordinate systems
-
-```{r mpg_jitter_log, cache = TRUE, fig.width=8.5, fig.height=3.5, message=FALSE}
-ggplot(data = mpg) + 
-  geom_jitter(mapping = aes(x = cty, y = hwy)) +
-  scale_y_log10() +
-  scale_x_log10()
-```
-
-## Coordinate systems
-```{r diamonds_bar_polar, cache = TRUE, fig.width=5, fig.height=3.5, message=FALSE}
-bar + coord_polar()
-```
-
-## Coordinate systems challenges
-
-- Turn a stacked bar chart into a pie chart using `coord_polar()`.
-- What does `labs()` do? Read the documentation.
-- What does the plot below tell you about the relationship between `city` and highway `mpg`? Why is `coord_fixed()` important? What does `geom_abline()` do?
-
-```{r mpg_point_fixed, eval = F, cache = TRUE, fig.width=4.5, fig.height=3.5, message=FALSE}
-ggplot(data = mpg, mapping = aes(x = cty, y = hwy)) +
-  geom_point() + 
-  geom_abline() +
-  coord_fixed()
-```
-
-## Coordinate systems challenges
-
-```{r diamonds_barplot_pos_fill_polar, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE}
-ggplot(data = diamonds) + 
-  geom_bar(mapping = aes(x = cut, fill = clarity),
-           position = "fill") +
-  coord_polar()
-```
-
-## Coordinate systems challenges
-
-```{r mpg_point_nofixed_plot, eval = T, cache = TRUE, fig.width=8, fig.height=3.5, message=FALSE}
-ggplot(data = mpg, mapping = aes(x = cty, y = hwy)) +
-  geom_point() +  geom_abline()
-```
-
-## Coordinate systems challenges
-
-```{r mpg_point_fixed_plot, eval = T, cache = TRUE, fig.width=8, fig.height=3.5, message=FALSE}
-ggplot(data = mpg, mapping = aes(x = cty, y = hwy)) +
-  geom_point() +  geom_abline() + coord_fixed()
-```
diff --git a/session_3/tp.R b/session_3/tp.R
deleted file mode 100644
index e69de29..0000000
diff --git a/session_3/tp.md b/session_3/tp.md
deleted file mode 100644
index e69de29..0000000
diff --git a/session_4/challengeTime.Rmd b/session_4/challengeTime.Rmd
deleted file mode 100644
index 1986436..0000000
--- a/session_4/challengeTime.Rmd
+++ /dev/null
@@ -1,139 +0,0 @@
----
-title: "Challenge time!"
-author: "Laurent Modolo [laurent.modolo@ens-lyon.fr](mailto:laurent.modolo@ens-lyon.fr), Hélène Polvèche [hpolveche@istem.fr](mailto:hpolveche@istem.fr)"
-date: "Mars 2020"
-output:
-  html_document: default
-  pdf_document: default
----
-  <style type="text/css">
-  h3 { /* Header 3 */
-      position: relative ;
-    color: #729FCF ;
-      left: 5%;
-  }
-h2 { /* Header 2 */
-    color: darkblue ;
-  left: 10%;
-} 
-h1 { /* Header 1 */
-    color: #034b6f ;
-} 
-#pencadre{
-border:1px; 
-border-style:solid; 
-border-color: #034b6f; 
-  background-color: #EEF3F9; 
-  padding: 1em;
-text-align: center ;
-border-radius : 5px 4px 3px 2px;
-}
-legend{
-  color: #034b6f ;
-}
-#pquestion {
-color: darkgreen;
-font-weight: bold;
-}
-</style>
-  
-  ```{r setup, include=FALSE, cache=TRUE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-
-### Filter challenges :
-
-Find all flights that:
-  
-  - Had an arrival delay of two or more hours
-- Were operated by United, American, or Delta
-- Departed between midnight and 6am (inclusive)
-
-Another useful dplyr filtering helper is `between()`. What does it do? Can you use it to simplify the code needed to answer the previous challenges?
-
-How many flights have a missing `dep_time`? What other variables are missing? What might these rows represent?
-
-Why is `NA ^ 0` not `NA`? Why is `NA | TRUE` not `NA`? Why is `FALSE & NA` not `NA`? Can you figure out the general rule? (`NA * 0` is a tricky counter-example!)
-
-### Arrange challenges :
-
-- Sort flights to find the most delayed flights. Find the flights that left earliest.
-- Sort flights to find the fastest flights.
-- Which flights traveled the longest? Which traveled the shortest?
-
-### Select challenges :
-
-- Brainstorm as many ways as possible to select `dep_time`, `dep_delay`, `arr_time`, and `arr_delay` from `flights`.
-- What does the `one_of()` function do? Why might it be helpful in conjunction with this vector?
-```{r select_one_of, eval=F, message=F, cache=T}
-vars <- c("year", "month", "day", "dep_delay", "arr_delay")
-```
-- Does the result of running the following code surprise you? How do the select helpers deal with case by default? How can you change that default?
-```{r select_contains, eval=F, message=F, cache=T}
-select(flights, contains("TIME"))
-```
-
-
-### Mutate challenges :
-
-- Currently `dep_time` and `sched_dep_time` are convenient to look at, but hard to compute with because they’re not really continuous numbers. Convert them to a more convenient representation of number of minutes since midnight.
-
-
-```{r mutate_challenges_a, eval=F, message=F, cache=T}
-mutate(
-  flights,
-  dep_time = (dep_time %/% 100) * 60 +
-    dep_time %% 100,
-  sched_dep_time = (sched_dep_time %/% 100) * 60 +
-    sched_dep_time %% 100
-)
-```
-
-\ 
-
-- Compare `dep_time`, `sched_dep_time`, and `dep_delay`. How would you expect those three numbers to be related?
-
-```{r mutate_challenge_b, eval=F, message=F, cache=T}
-mutate(
-  flights,
-  dep_time = (dep_time %/% 100) * 60 + 
-    dep_time %% 100,
-  sched_dep_time = (sched_dep_time %/% 100) * 60 +
-    sched_dep_time %% 100
-)
-```
-
-\ 
-
-### Challenge with `summarise()` and `group_by()`
-
-Imagine that we want to explore the relationship between the distance and average delay for each location. 
-here are three steps to prepare this data: 
-
-- Group flights by destination.
-- Summarise to compute distance, average delay, and number of flights.
-- Filter to remove noisy points and Honolulu airport, which is almost twice as far away as the next closest airport.
-
-```{r summarise_group_by_ggplot_a, eval = F}
-flights %>% 
-  group_by(dest)
-```
-
- \ 
-
-Imagine that we want to explore the relationship between the distance and average delay for each location. 
-
-- Filter to remove noisy points and Honolulu airport, which is almost twice as far away as the next closest airport.
-
-```{r summarise_group_by_ggplot_b, eval = F}
-flights %>% 
-  group_by(dest) %>% 
-  summarise(
-    count = n(),
-    dist = mean(distance, na.rm = TRUE),
-    delay = mean(arr_delay, na.rm = TRUE)
-  )
-```
-
-
diff --git a/session_4/slides.Rmd b/session_4/slides.Rmd
deleted file mode 100644
index 3aec2bd..0000000
--- a/session_4/slides.Rmd
+++ /dev/null
@@ -1,315 +0,0 @@
----
-title: "R#4: data transformation"
-author: "Laurent Modolo [laurent.modolo@ens-lyon.fr](mailto:laurent.modolo@ens-lyon.fr)"
-date: "08 Nov 2019"
-output:
-  beamer_presentation:
-    theme: metropolis
-    slide_level: 3
-    fig_caption: no
-    df_print: tibble
-    highlight: tango
-    latex_engine: xelatex
-  slidy_presentation:
-    highlight: tango
----
-```{r setup, include=FALSE, cache=TRUE}
-knitr::opts_chunk$set(echo = FALSE)
-library(tidyverse)
-```
-
-## R#4: data transformation
-The goal of this practical is to practices data transformation with `tidyverse`.
-The objectives of this session will be to:
-
-- Filter rows with `filter()`
-- Arrange rows with `arrange()`
-- Select columns with `select()`
-- Add new variables with `mutate()`
-- Combining multiple operations with the pipe `%>%`
-
-## **nycflights13**
-
-`nycflights13::flights`contains all 336,776 flights that departed from New York City in 2013. The data comes from the US Bureau of Transportation Statistics, and is documented in `?flights`
-
-```{r load_data, eval=T, message=FALSE, cache=T}
-library(nycflights13)
-library(tidyverse)
-```
-
-
-
-## **nycflights13**
-
-```{r display_data, eval=F, message=FALSE, cache=T}
-flights
-```
-
-- **int** stands for integers.
-- **dbl** stands for doubles, or real numbers.
-- **chr** stands for character vectors, or strings.
-- **dttm** stands for date-times (a date + a time).
-- **lgl** stands for logical, vectors that contain only TRUE or FALSE.
-- **fctr** stands for factors, which R uses to represent categorical variables with fixed possible values.
-- **date** stands for dates.
-
-## Filter rows with `filter()`
-
-`filter()` allows you to subset observations based on their values. 
-
-```{r filter_month_day, eval=T, message=T, cache=T}
-filter(flights, month == 1, day == 1)
-```
-
-## Filter rows with `filter()`
-
-`dplyr` functions never modify their inputs, so if you want to save the result, you’ll need to use the assignment operator, `<-`
-
-```{r filter_month_day_sav, eval=T, message=F, cache=T}
-jan1 <- filter(flights, month == 1, day == 1)
-```
-
-R either prints out the results, or saves them to a variable.
-
-```{r filter_month_day_sav_display, eval=T, message=F, cache=T}
-(dec25 <- filter(flights, month == 12, day == 25))
-```
-
-## Logical operators
-
-Multiple arguments to `filter()` are combined with “and”: every expression must be true in order for a row to be included in the output.
-
-```{r logical_operator, echo=FALSE, out.width='100%'}
-knitr::include_graphics('img/transform-logical.png')
-```
-
-## Logical operators
-
-Test the following operations:
-
-```{r filter_logical_operators, eval=T, message=F, cache=T}
-filter(flights, month == 11 | month == 12)
-filter(flights, month %in% c(11, 12))
-filter(flights, !(arr_delay > 120 | dep_delay > 120))
-filter(flights, arr_delay <= 120, dep_delay <= 120)
-```
-
-## Missing values
-
-One important feature of R that can make comparison tricky are missing values, or `NA`s (“not availables”). 
-
-```{r filter_logical_operators_NA, eval=T, message=T, cache=T}
-NA > 5
-10 == NA
-NA + 10
-NA / 2
-```
-
-## Missing values
-
-```{r filter_logical_operators_test_NA, eval=T, message=T, cache=T}
-NA == NA
-is.na(NA)
-```
-
-## Filter challenges
-
-Find all flights that:
-
-- Had an arrival delay of two or more hours
-- Were operated by United, American, or Delta
-- Departed between midnight and 6am (inclusive)
-
-Another useful dplyr filtering helper is `between()`. What does it do? Can you use it to simplify the code needed to answer the previous challenges?
-
-How many flights have a missing `dep_time`? What other variables are missing? What might these rows represent?
-
-Why is `NA ^ 0` not `NA`? Why is `NA | TRUE` not `NA`? Why is `FALSE & NA` not `NA`? Can you figure out the general rule? (`NA * 0` is a tricky counter-example!)
-
-
-## Arrange rows with `arrange()`
-
-`arrange()` works similarly to `filter()` except that instead of selecting rows, it changes their order.
-
-```{r arrange_ymd, eval=F, message=F, cache=T}
-arrange(flights, year, month, day)
-```
-
-Use `desc()` to re-order by a column in descending order:
-
-```{r arrange_desc, eval=F, message=F, cache=T}
-arrange(flights, desc(dep_delay))
-```
-
-Missing values are always sorted at the end:
-
-```{r arrange_NA, eval=F, message=F, cache=T}
-arrange(tibble(x = c(5, 2, NA)), x)
-arrange(tibble(x = c(5, 2, NA)), desc(x))
-```
-
-## Arrange challenges
-
-- Sort flights to find the most delayed flights. Find the flights that left earliest.
-- Sort flights to find the fastest flights.
-- Which flights traveled the longest? Which traveled the shortest?
-
-## Select columns with `select()`
-
-`select()` allows you to rapidly zoom in on a useful subset using operations based on the names of the variables.
-
-```{r select_ymd, eval=F, message=F, cache=T}
-select(flights, year, month, day)
-select(flights, year:day)
-select(flights, -(year:day))
-```
-
-## Select columns with `select()`
-
-here are a number of helper functions you can use within `select()`:
-
-- `starts_with("abc")`: matches names that begin with “abc”.
-- `ends_with("xyz")`: matches names that end with “xyz”.
-- `contains("ijk")`: matches names that contain “ijk”.
-- `matches("(.)\\1")`: selects variables that match a regular expression. This one matches any variables that contain repeated characters. You’ll learn more about regular expressions in strings.
-- `num_range("x", 1:3)`: matches `x1`, `x2` and `x3`.
-
-See `?select` for more details.
-
-## Select challenges
-
-- Brainstorm as many ways as possible to select `dep_time`, `dep_delay`, `arr_time`, and `arr_delay` from `flights`.
-- What does the `one_of()` function do? Why might it be helpful in conjunction with this vector?
-```{r select_one_of, eval=F, message=F, cache=T}
-vars <- c("year", "month", "day", "dep_delay", "arr_delay")
-```
-- Does the result of running the following code surprise you? How do the select helpers deal with case by default? How can you change that default?
-```{r select_contains, eval=F, message=F, cache=T}
-select(flights, contains("TIME"))
-```
-
-## Add new variables with `mutate()`
-
-It’s often useful to add new columns that are functions of existing columns. That’s the job of `mutate()`.
-
-```{r mutate, eval=F, message=F, cache=T}
-flights_sml <- select(flights, 
-  year:day, 
-  ends_with("delay"), 
-  distance, 
-  air_time
-)
-mutate(flights_sml,
-  gain = dep_delay - arr_delay,
-  speed = distance / air_time * 60
-)
-```
-
-**4_a**
-
-## Add new variables with `mutate()`
-
-You can refer to columns that you’ve just created:
-
-```{r mutate_reuse, eval=F, message=F, cache=T}
-mutate(flights,
-  gain = dep_delay - arr_delay,
-  hours = air_time / 60,
-  gain_per_hour = gain / hours
-)
-```
-
-## Useful creation functions
-
-- Offsets: `lead()` and `lag()` allow you to refer to leading or lagging values. This allows you to compute running differences (e.g. `x - lag(x)`) or find when values change (`x != lag(x)`).
-- Cumulative and rolling aggregates: R provides functions for running sums, products, mins and maxes: `cumsum()`, `cumprod()`, `cummin()`, `cummax()`; and dplyr provides `cummean()` for cumulative means. 
-- Logical comparisons, `<`, `<=`, `>`, `>=`, `!=`, and `==`
-- Ranking: there are a number of ranking functions, but you should start with `min_rank()`. There is also `row_number()`, `dense_rank()`, `percent_rank()`, `cume_dist()`, `ntile()`
-
-## Mutate challenges
-
-- Currently `dep_time` and `sched_dep_time` are convenient to look at, but hard to compute with because they’re not really continuous numbers. Convert them to a more convenient representation of number of minutes since midnight.
-
-\pause
-
-```{r mutate_challenges_a, eval=F, message=F, cache=T}
-mutate(
-  flights,
-  dep_time = (dep_time %/% 100) * 60 +
-    dep_time %% 100,
-  sched_dep_time = (sched_dep_time %/% 100) * 60 +
-    sched_dep_time %% 100
-)
-```
-
-**4_b**
-
-## Mutate challenges
-
-- Compare `dep_time`, `sched_dep_time`, and `dep_delay`. How would you expect those three numbers to be related?
-
-\pause
-
-```{r mutate_challenge_b, eval=F, message=F, cache=T}
-mutate(
-  flights,
-  dep_time = (dep_time %/% 100) * 60 + 
-    dep_time %% 100,
-  sched_dep_time = (sched_dep_time %/% 100) * 60 +
-    sched_dep_time %% 100
-)
-```
-
-**4_c**
-
-## Mutate challenges
-
-- Find the 10 most delayed flights using a ranking function. How do you want to handle ties? Carefully read the documentation for `min_rank()`
-
-\pause
-
-```{r mutate_challenge_c, eval=F, message=F, cache=T}
-flights_md <- mutate(flights, most_delay = min_rank(desc(dep_delay)))
-filter(flights_md, most_delay < 10)
-```
-
-**4_d**
-
-## Combining multiple operations with the pipe
-
-We don't want to create useless intermediate variables so we can use the pipe operator: `%>%`
-(`ctrl + shift + M`).
-
-```{r pipe_example_a, eval=F, message=F, cache=T}
-flights_md <- mutate(flights,
-                     most_delay = min_rank(desc(dep_delay)))
-flights_md <- filter(flights_md, most_delay < 10)
-flights_md <- arrange(flights_md, most_delay)
-```
-
-## Combining multiple operations with the pipe
-
-We don't want to create useless intermediate variables so we can use the pipe operator: `%>%`
-(`ctrl + shift + M`).
-
-```{r pipe_example_b, eval=F, message=F, cache=T}
-flights %>%
-  mutate(most_delay = min_rank(desc(dep_delay))) %>% 
-  filter(most_delay < 10) %>% 
-  arrange(most_delay)
-```
-
-## Combining multiple operations with the pipe
-
-Behind the scenes, `x %>% f(y)` turns into `f(x, y)`, and `x %>% f(y) %>% g(z)` turns into `g(f(x, y), z)` and so on. You can use the pipe to rewrite multiple operations in a way that you can read left-to-right, top-to-bottom. 
-
-You can access the transmitted variables with `.`
-
-```{r pipe_example_c, eval=F, message=F, cache=T}
-flights %>%
-  mutate(most_delay = min_rank(desc(dep_delay))) %>% 
-  filter(., most_delay < 10) %>% 
-  arrange(., most_delay)
-```
-
-Working with the pipe is one of the key criteria for belonging to the `tidyverse`. The only exception is `ggplot2`: it was written before the pipe was discovered. Unfortunately, the next iteration of `ggplot2`, `ggvis`, which does use the pipe, isn’t quite ready for prime time yet.
\ No newline at end of file
-- 
GitLab