diff --git a/session_5/slides.Rmd b/session_5/slides.Rmd
index 020085a9d39bacb77152ec07e300c463e5b81925..369cda9f284554f2639f749d7ef79dd84687a57f 100644
--- a/session_5/slides.Rmd
+++ b/session_5/slides.Rmd
@@ -3,6 +3,8 @@ title: "R#5: data transformation"
 author: "Laurent Modolo [laurent.modolo@ens-lyon.fr](mailto:laurent.modolo@ens-lyon.fr)"
 date: "28 Nov 2019"
 output:
+  slidy_presentation:
+    highlight: tango
   beamer_presentation:
     theme: metropolis
     slide_level: 3
@@ -10,8 +12,6 @@ output:
     df_print: tibble
     highlight: tango
     latex_engine: xelatex
-  slidy_presentation:
-    highlight: tango
 ---
 
 ```{r setup, include=FALSE, cache=TRUE}
@@ -183,12 +183,13 @@ flights %>%
   mutate(
     canceled = is.na(dep_time) | is.na(arr_time)
   ) %>% 
+  filter(canceled) %>% 
   mutate(wday = strftime(time_hour,'%A')) %>% 
   group_by(wday) %>% 
   summarise(
     cancel_day = n()
   ) %>%
-  ggplot(mapping = aes(x = wday, y = cancel_day)) +
+  ggplot(mapping = aes(x = fct_reorder(wday, cancel_day), y = cancel_day)) +
   geom_col()
 ```
 
@@ -264,29 +265,6 @@ flights %>%
 
 - What time of day should you fly if you want to avoid delays as much as possible?
 
-```{r group_filter_a, eval=F}
-flights %>% 
-  group_by(hour) %>% 
-  summarise(
-    mean_delay = mean(arr_delay, na.rm = T),
-    sd_delay = sd(arr_delay, na.rm = T),
-  ) %>% 
-  ggplot() +
-  geom_errorbar(mapping = aes(
-    x = hour,
-    ymax = mean_delay + sd_delay,
-    ymin = mean_delay - sd_delay)) +
-  geom_point(mapping = aes(
-    x = hour,
-    y = mean_delay,
-  ))
-```
-**5_d**
-
-## Goup by challenges
-
-- What time of day should you fly if you want to avoid delays as much as possible?
-
 ```{r group_filter_b, eval=T, echo = F, warning=F, message=FALSE, cache=T, fig.width=8, fig.height=3.5}
 flights %>% 
   group_by(hour) %>% 
@@ -303,4 +281,5 @@ flights %>%
     x = hour,
     y = mean_delay,
   ))
-```
\ No newline at end of file
+```
+ **5_d**
\ No newline at end of file
diff --git a/session_6/img/join-venn.png b/session_6/img/join-venn.png
new file mode 100644
index 0000000000000000000000000000000000000000..023cd2f26aab5b5b5b1acdbe183a6ef39dc2b133
Binary files /dev/null and b/session_6/img/join-venn.png differ
diff --git a/session_6/img/relational-nycflights.png b/session_6/img/relational-nycflights.png
new file mode 100644
index 0000000000000000000000000000000000000000..10b04ce0fa0609794c8c3780674d0d8bcb4209fe
Binary files /dev/null and b/session_6/img/relational-nycflights.png differ
diff --git a/session_6/slides.Rmd b/session_6/slides.Rmd
new file mode 100644
index 0000000000000000000000000000000000000000..72a183f798b30275dc66b75238ce6243571dc3c9
--- /dev/null
+++ b/session_6/slides.Rmd
@@ -0,0 +1,177 @@
+---
+title: "R#6: tidydata"
+author: "Laurent Modolo [laurent.modolo@ens-lyon.fr](mailto:laurent.modolo@ens-lyon.fr)"
+date: "19 Dec 2019"
+output:
+  slidy_presentation:
+    highlight: tango
+  beamer_presentation:
+    theme: metropolis
+    slide_level: 3
+    fig_caption: no
+    df_print: tibble
+    highlight: tango
+    latex_engine: xelatex
+---
+
+```{r setup, include=FALSE, echo = F}
+library(tidyverse)
+library(nycflights13)
+flights2 <- flights %>% 
+  select(year:day, hour, origin, dest, tailnum, carrier)
+```
+
+## Tidydata
+
+There are three interrelated rules which make a dataset tidy:
+
+- Each variable must have its own column.
+- Each observation must have its own row.
+- Each value must have its own cell.
+
+```{r load_data, eval=T, message=T}
+library(tidyverse)
+```
+
+## pivot longer
+
+```{r table4a, eval=T, message=T}
+table4a # number of TB cases
+```
+
+## pivot longer
+
+```{r pivot_longer, eval=T, message=T}
+table4a %>% 
+  pivot_longer(-country,
+               names_to = "year",
+               values_to = "case")
+```
+
+## pivot wider 
+
+```{r table2, eval=T, message=T} 
+table2
+```
+
+## pivot wider 
+
+```{r pivot_wider, eval=T, message=T}
+table2 %>% 
+  pivot_wider(names_from = type,
+              values_from = count)
+```
+
+## Relational data
+
+Sometime the information can be split between different table
+
+```{r airlines, eval=F, echo = T}
+library(nycflights13)
+flights
+airlines
+airports
+weather
+flights2 <- flights %>% 
+  select(year:day, hour, origin, dest, tailnum, carrier)
+```
+
+## Relational data
+
+```{r airlines_dag, echo=FALSE, out.width='100%'}
+knitr::include_graphics('img/relational-nycflights.png')
+```
+
+## joints
+
+```{r joints, echo=FALSE, out.width='100%'}
+knitr::include_graphics('img/join-venn.png')
+```
+
+## `inner_joint()`
+
+Matches pairs of observations whenever their keys are equal
+
+```{r inner_joint, eval=T}
+flights2 %>%
+  inner_join(airlines)
+```
+## `left_joint()`
+
+keeps all observations in `x`
+
+```{r left_joint, eval=T}
+flights2 %>%
+  left_join(airlines)
+```
+
+## `right_joint()`
+
+keeps all observations in `y`
+
+```{r right_joint, eval=T}
+flights2 %>%
+  right_join(airlines)
+```
+
+## `full_joint()`
+
+keeps all observations in `x` and `y`
+
+```{r full_joint, eval=T}
+flights2 %>%
+  full_join(airlines)
+```
+
+## Defining the key columns
+
+The default, `by = NULL`, uses all variables that appear in both tables, the so called natural join.
+
+```{r left_join_weather, eval=T}
+flights2 %>% 
+  left_join(weather)
+```
+
+## Defining the key columns
+
+The default, `by = NULL`, uses all variables that appear in both tables, the so called natural join.
+
+```{r left_join_tailnum, eval=T, echo = T}
+flights2 %>% 
+  left_join(planes, by = "tailnum")
+```
+
+## Defining the key columns
+
+A named character vector: `by = c("a" = "b")`. This will match variable `a` in table `x` to variable `b` in table `y`.
+
+```{r left_join_airport, eval=T, echo = T}
+flights2 %>% 
+  left_join(airports, c("dest" = "faa"))
+```
+
+## Filtering joins
+
+Filtering joins match observations in the same way as mutating joins, but affect the observations, not the variables. There are two types:
+
+- `semi_join(x, y)` keeps all observations in `x` that have a match in `y`.
+- `anti_join(x, y)` drops all observations in `x` that have a match in `y`.
+
+
+## Filtering joins
+
+```{r top_dest, eval=T, echo = T}
+top_dest <- flights %>%
+  count(dest, sort = TRUE) %>%
+  head(10)
+flights %>% 
+  semi_join(top_dest)
+```
+
+## Set operations
+
+These expect the x and y inputs to have the same variables, and treat the observations like sets:
+
+- `intersect(x, y)`: return only observations in both `x` and `y`.
+- `union(x, y)`: return unique observations in `x` and `y`.
+- `setdiff(x, y)`: return observations in `x`, but not in `y`.