From 68a83da0181899a2cb0f2aa97aa9a088d550dd72 Mon Sep 17 00:00:00 2001 From: Laurent Modolo <laurent.modolo@ens-lyon.fr> Date: Fri, 3 Sep 2021 11:57:05 +0200 Subject: [PATCH] README.md: update program --- README.md | 4 + session_3/{HTML_tuto_s3.Rmd => session_3.Rmd} | 131 ++++++------------ 2 files changed, 48 insertions(+), 87 deletions(-) rename session_3/{HTML_tuto_s3.Rmd => session_3.Rmd} (77%) diff --git a/README.md b/README.md index 9bb00ae..c2d7cf6 100644 --- a/README.md +++ b/README.md @@ -28,3 +28,7 @@ Groups: - Anissa Guillemin # program + +- Session 1: first step with R and RStudio +- Session 2: my first plot with `ggplot2` +- Session 3: more fun with `ggplot2` diff --git a/session_3/HTML_tuto_s3.Rmd b/session_3/session_3.Rmd similarity index 77% rename from session_3/HTML_tuto_s3.Rmd rename to session_3/session_3.Rmd index 4612e49..5295b4f 100644 --- a/session_3/HTML_tuto_s3.Rmd +++ b/session_3/session_3.Rmd @@ -1,47 +1,32 @@ --- -title: 'R#3: Transformations with ggplot2' +title: 'R.3: Transformations with ggplot2' author: "Laurent Modolo [laurent.modolo@ens-lyon.fr](mailto:laurent.modolo@ens-lyon.fr), Hélène Polvèche [hpolveche@istem.fr](mailto:hpolveche@istem.fr)" -date: "Mars 2020" +date: "2021" output: - html_document: default - pdf_document: default + rmdformats::downcute: + self_contain: false + use_bookdown: true + default_style: "dark" + lightbox: true + css: "../src/style.css" --- -<style type="text/css"> -h3 { /* Header 3 */ - position: relative ; - color: #729FCF ; - left: 5%; -} -h2 { /* Header 2 */ - color: darkblue ; - left: 10%; -} -h1 { /* Header 1 */ - color: #034b6f ; -} -#pencadre{ - border:1px; - border-style:solid; - border-color: #034b6f; - background-color: #EEF3F9; - padding: 1em; - text-align: center ; - border-radius : 5px 4px 3px 2px; -} -legend{ - color: #034b6f ; -} -#pquestion { - color: darkgreen; - font-weight: bold; -} -</style> - -```{r setup, include=FALSE, cache=TRUE} + +```{r setup, include=FALSE} +rm(list=ls()) knitr::opts_chunk$set(echo = TRUE) +knitr::opts_chunk$set(comment = NA) +``` +```{r klippy, echo=FALSE, include=TRUE} +klippy::klippy( + position = c('top', 'right'), + color = "white", + tooltip_message = 'Click to copy', + tooltip_success = 'Copied !') ``` -The goal of this practical is to practices advanced features of `ggplot2`. +# Introduction + +In the last session, we have seen how to use `ggplot2` and [The Grammar of Graphics](https://www.amazon.com/Grammar-Graphics-Statistics-Computing/dp/0387245448/ref=as_li_ss_tl). The goal of this practical is to practices more advanced features of `ggplot2`. The objectives of this session will be to: @@ -49,43 +34,31 @@ The objectives of this session will be to: - practices position adjustments - change the coordinate systems - \ - -# `ggplot2` statistical transformations - \ - +The first step is to load the `tidyverse`. + +<details><summary>Solution</summary> +<p> ```{r packageloaded, include=TRUE, message=FALSE} library("tidyverse") ``` +</p> +</details> - \ +Like in the previous sessions, it's good practice to create a new **.R** file to write your code instead of using directly the R terminal. +# `ggplot2` statistical transformations + + We are going to use the `diamonds` data set included in `tidyverse`. - Use the `help` and `view` command to explore this data set. - Try the `str` command, which information are displayed ? -```R +```{r str_diamon} str(diamonds) ``` -``` -## Classes 'tbl_df', 'tbl' and 'data.frame': 53940 obs. of 10 variables: -## $ carat : num 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ... -## $ cut : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ... -## $ color : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ... -## $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ... -## $ depth : num 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ... -## $ table : num 55 61 65 58 58 57 57 55 61 61 ... -## $ price : int 326 326 327 334 335 336 336 337 337 338 ... -## $ x : num 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ... -## $ y : num 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ... -## $ z : num 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ... -``` - - \ - We saw scatterplot (`geom_point()`), smoothplot (`geom_smooth()`). Now barplot with `geom_bar()` : ```{r diamonds_barplot, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE} @@ -109,8 +82,6 @@ ggplot(data = diamonds, mapping = aes(x = cut)) + stat_count() ``` - \ - Every geom has a default stat; and every stat has a default geom. This means that you can typically use geoms without worrying about the underlying statistical transformation. There are three reasons you might need to use a stat explicitly: - You might want to override the default stat. @@ -124,14 +95,15 @@ demo <- tribble( "Premium", 13791, "Ideal", 21551 ) +``` -# (Don't worry that you haven't seen <- or tribble() before. You might be able -# to guess at their meaning from the context, and you will learn exactly what -# they do soon!) +(Don't worry that you haven't seen `tribble()` before. You might be able +to guess at their meaning from the context, and you will learn exactly what +they do soon!) +```{r 3_ab, include=TRUE, fig.width=8, fig.height=4.5} ggplot(data = demo, mapping = aes(x = cut, y = freq)) + geom_bar(stat = "identity") - ``` - You might want to override the default mapping from transformed variables to aesthetics ( e.g. proportion). @@ -149,13 +121,11 @@ ggplot(data = diamonds, mapping = aes(x = cut, y = ..prop..)) + If group is not used, the proportion is calculated with respect to the data that contains that field and is ultimately going to be 100% in any case. For instance, The proportion of an ideal cut in the ideal cut specific data will be 1. - \ - - You might want to draw greater attention to the statistical transformation in your code. +you might use stat_summary(), which summarises the y values for each unique x +value, to draw attention to the summary that you are computing: ```{r 3_c, include=TRUE, fig.width=8, fig.height=4.5, message=FALSE} -# you might use stat_summary(), which summarises the y values for each unique x -# value, to draw attention to the summary that you are computing: ggplot(data = diamonds, mapping = aes(x = cut, y = depth)) + stat_summary() @@ -172,8 +142,6 @@ ggplot(data = diamonds, mapping = aes(x = cut, y = depth)) + # Position adjustments - \ - You can colour a bar chart using either the `color` aesthetic, ```{r diamonds_barplot_color, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE} @@ -181,8 +149,6 @@ ggplot(data = diamonds, mapping = aes(x = cut, color = cut)) + geom_bar() ``` - \ - or, more usefully, `fill`: ```{r diamonds_barplot_fill, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE} @@ -190,8 +156,6 @@ ggplot(data = diamonds, mapping = aes(x = cut, fill = cut)) + geom_bar() ``` - - You can also use `fill` with another variable: ```{r diamonds_barplot_fill_clarity, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE} @@ -199,33 +163,29 @@ ggplot(data = diamonds, mapping = aes(x = cut, fill = clarity)) + geom_bar() ``` - - The stacking is performed by the position adjustment `position` -### fill +## fill ```{r diamonds_barplot_pos_fill, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE} ggplot(data = diamonds, mapping = aes(x = cut, fill = clarity)) + geom_bar( position = "fill") ``` -### dodge +## dodge ```{r diamonds_barplot_pos_dodge, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE} ggplot(data = diamonds, mapping = aes(x = cut, fill = clarity)) + geom_bar( position = "dodge") ``` -### jitter +## jitter ```{r diamonds_barplot_pos_jitter, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE} ggplot(data = diamonds, mapping = aes(x = cut, fill = clarity)) + geom_bar( position = "jitter") ``` - - ```{r dia_jitter2, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE} ggplot(data = diamonds, mapping = aes(x = cut, y = depth, color = clarity)) + geom_point() @@ -236,14 +196,13 @@ ggplot(data = diamonds, mapping = aes(x = cut, y = depth, color = clarity)) + geom_jitter() ``` -### violin +## violin ```{r dia_violon, cache = TRUE, fig.width=8, fig.height=4.5, message=FALSE} ggplot(data = diamonds, mapping = aes(x = cut, y = depth, color = clarity)) + geom_violin() ``` - # Coordinate systems Cartesian coordinate system where the x and y positions act independently to determine the location of each point. There are a number of other coordinate systems that are occasionally helpful. @@ -294,5 +253,3 @@ bar bar + coord_polar() ``` - -##See you to Session#4 : "data transformation" \ No newline at end of file -- GitLab