Skip to content
Snippets Groups Projects
Verified Commit 38d0849f authored by Laurent Modolo's avatar Laurent Modolo
Browse files

Practical_a: code improvement

parent e3bfebb9
No related branches found
No related tags found
No related merge requests found
...@@ -7,7 +7,7 @@ output: ...@@ -7,7 +7,7 @@ output:
use_bookdown: true use_bookdown: true
default_style: "light" default_style: "light"
lightbox: true lightbox: true
css: "../www/style_Rmd.css" css: "./www/style_Rmd.css"
--- ---
...@@ -96,7 +96,6 @@ The data is tidy: ...@@ -96,7 +96,6 @@ The data is tidy:
Meeting these 3 criteria for your data will simplify most of your data processing and analysis. Meeting these 3 criteria for your data will simplify most of your data processing and analysis.
```{r} ```{r}
dim(penguins)
summary(penguins) summary(penguins)
``` ```
...@@ -255,7 +254,7 @@ With the `mutate()` function create a `diy_pca` tibble with the scaled version o ...@@ -255,7 +254,7 @@ With the `mutate()` function create a `diy_pca` tibble with the scaled version o
<details><summary>Solution</summary> <details><summary>Solution</summary>
<p> <p>
```{r} ```{r}
diy_pca <- data_f %>% diy_data_f <- data_f %>%
mutate( mutate(
bill_length_mm = (bill_length_mm - mean(bill_length_mm)) / sd(bill_length_mm), bill_length_mm = (bill_length_mm - mean(bill_length_mm)) / sd(bill_length_mm),
bill_depth_mm = (bill_depth_mm - mean(bill_depth_mm)) / sd(bill_depth_mm), bill_depth_mm = (bill_depth_mm - mean(bill_depth_mm)) / sd(bill_depth_mm),
...@@ -284,7 +283,7 @@ point_projection <- function(line_slope, x, y){ ...@@ -284,7 +283,7 @@ point_projection <- function(line_slope, x, y){
return(list(x = results[1], y = results[2])) return(list(x = results[1], y = results[2]))
} }
diy_pca %>% diy_pca <- diy_data_f %>%
rowwise() %>% # perform the subsequent opperation row by row rowwise() %>% # perform the subsequent opperation row by row
mutate( mutate(
projection_x = point_projection( projection_x = point_projection(
...@@ -295,7 +294,9 @@ diy_pca %>% ...@@ -295,7 +294,9 @@ diy_pca %>%
line_slope = line_slope, line_slope = line_slope,
x = bill_length_mm, x = bill_length_mm,
y = bill_depth_mm)$y y = bill_depth_mm)$y
) %>% )
diy_pca %>%
ggplot() + ggplot() +
geom_point(aes(x = bill_length_mm, y = bill_depth_mm)) + geom_point(aes(x = bill_length_mm, y = bill_depth_mm)) +
geom_abline(slope = line_slope, color = "red") + geom_abline(slope = line_slope, color = "red") +
...@@ -355,7 +356,7 @@ Write the formula to compute the `S_dist` and `Residuals` variables in the `muta ...@@ -355,7 +356,7 @@ Write the formula to compute the `S_dist` and `Residuals` variables in the `muta
```{r, eval=F} ```{r, eval=F}
line_slope <- 0.2 line_slope <- 0.2
diy_pca <- diy_pca %>% diy_pca <- diy_data_f %>%
rowwise() %>% # perform the subsequent opperation row by row rowwise() %>% # perform the subsequent opperation row by row
mutate( mutate(
projection_x = point_projection( projection_x = point_projection(
...@@ -405,7 +406,7 @@ You can use the `cov()` function to perform this computation ...@@ -405,7 +406,7 @@ You can use the `cov()` function to perform this computation
<details><summary>Solution</summary> <details><summary>Solution</summary>
<p> <p>
```{r} ```{r}
diy_cov <- diy_pca %>% as.matrix() %>% cov() diy_cov <- diy_data_f %>% as.matrix() %>% cov()
diy_cov diy_cov
``` ```
</p> </p>
...@@ -433,13 +434,12 @@ Then you will need to compute the `slope` value for the `geom_abline` function f ...@@ -433,13 +434,12 @@ Then you will need to compute the `slope` value for the `geom_abline` function f
```{r include=FALSE} ```{r include=FALSE}
point_projection <- function(diy_cov, x, y){ point_projection <- function(diy_cov, x, y){
a <- c(x, y) a <- c(x, y)
b <- eigen(diy_cov)$values[1] * eigen(diy_cov)$vector[, 1] b <- eigen(diy_cov)$vector[, 1]
scaled_b <- b / c(sqrt(sum(b^2))) results <- c(a %*% b) * b
results <- c(a %*% scaled_b) * scaled_b
list(x = results[1], y = results[2]) list(x = results[1], y = results[2])
} }
diy_pca <- diy_pca %>% diy_pca <- diy_data_f %>%
rowwise() %>% # perform the subsequent opperation row by row rowwise() %>% # perform the subsequent opperation row by row
mutate( mutate(
pc1_x = point_projection( pc1_x = point_projection(
...@@ -475,7 +475,7 @@ point_projection <- function(diy_cov, x, y){ ...@@ -475,7 +475,7 @@ point_projection <- function(diy_cov, x, y){
list(x = results[1], y = results[2]) list(x = results[1], y = results[2])
} }
diy_pca <- diy_pca %>% diy_pca <- diy_data_f %>%
rowwise() %>% # perform the subsequent opperation row by row rowwise() %>% # perform the subsequent opperation row by row
mutate( mutate(
pc1_x = point_projection( pc1_x = point_projection(
...@@ -494,7 +494,7 @@ diy_pca %>% ...@@ -494,7 +494,7 @@ diy_pca %>%
ggplot() + ggplot() +
geom_point(aes(x = bill_length_mm, y = bill_depth_mm)) + geom_point(aes(x = bill_length_mm, y = bill_depth_mm)) +
geom_abline(slope = , color = "red") + # missing slope value here geom_abline(slope = , color = "red") + # missing slope value here
geom_point(aes(x = projection_x, y = projection_y), color = "red") + geom_point(aes(x = pc1_x, y = pc1_y), color = "red") +
geom_segment( geom_segment(
aes(x = bill_length_mm, aes(x = bill_length_mm,
y = bill_depth_mm, y = bill_depth_mm,
...@@ -514,9 +514,8 @@ For the projection function: ...@@ -514,9 +514,8 @@ For the projection function:
```{r, eval=F} ```{r, eval=F}
point_projection <- function(diy_cov, x, y){ point_projection <- function(diy_cov, x, y){
a <- c(x, y) a <- c(x, y)
b <- eigen(diy_cov)$values[1] * eigen(diy_cov)$vector[, 1] b <- eigen(diy_cov)$vector[, 1]
scaled_b <- b / c(sqrt(sum(b^2))) results <- c(a %*% b) * b
results <- c(a %*% scaled_b) * scaled_b
list(x = results[1], y = results[2]) list(x = results[1], y = results[2])
} }
``` ```
...@@ -542,13 +541,12 @@ Adapt your previous code to perform the computation on the PC2 ...@@ -542,13 +541,12 @@ Adapt your previous code to perform the computation on the PC2
```{r include=FALSE} ```{r include=FALSE}
point_projection <- function(diy_cov, x, y){ point_projection <- function(diy_cov, x, y){
a <- c(x, y) a <- c(x, y)
b <- eigen(diy_cov)$values[2] * eigen(diy_cov)$vector[, 2] b <- eigen(diy_cov)$vector[, 2]
scaled_b <- b / c(sqrt(sum(b^2))) results <- c(a %*% b) * b
results <- c(a %*% scaled_b) * scaled_b
return(list(x = results[1], y = results[2])) return(list(x = results[1], y = results[2]))
} }
diy_pca <- diy_pca %>% diy_pca <- diy_data_f %>%
rowwise() %>% # perform the subsequent opperation row by row rowwise() %>% # perform the subsequent opperation row by row
mutate( mutate(
pc2_x = point_projection( pc2_x = point_projection(
...@@ -605,7 +603,7 @@ diy_pca %>% ...@@ -605,7 +603,7 @@ diy_pca %>%
geom_point(aes(x = bill_length_mm, y = bill_depth_mm)) + geom_point(aes(x = bill_length_mm, y = bill_depth_mm)) +
geom_abline(slope = , color = "red") + # slope of the PC1 geom_abline(slope = , color = "red") + # slope of the PC1
geom_abline(slope = , color = "blue") + # slope of the PC2 geom_abline(slope = , color = "blue") + # slope of the PC2
geom_point(aes(x = projection_x, y = projection_y), color = "blue") + geom_point(aes(x = pc2_x, y = pc2_y), color = "blue") +
geom_segment( geom_segment(
aes(x = bill_length_mm, aes(x = bill_length_mm,
y = bill_depth_mm, y = bill_depth_mm,
...@@ -623,9 +621,8 @@ For the projection function: ...@@ -623,9 +621,8 @@ For the projection function:
```{r, eval=F} ```{r, eval=F}
point_projection <- function(diy_cov, x, y){ point_projection <- function(diy_cov, x, y){
a <- c(x, y) a <- c(x, y)
b <- eigen(diy_cov)$values[2] * eigen(diy_cov)$vector[, 2] b <- eigen(diy_cov)$vector[, 2]
scaled_b <- b / c(sqrt(sum(b^2))) results <- c(a %*% b) * b
results <- c(a %*% scaled_b) * scaled_b
return(list(x = results[1], y = results[2])) return(list(x = results[1], y = results[2]))
} }
``` ```
...@@ -654,13 +651,12 @@ You can merge your previous computation to plot the projection on the 2 first PC ...@@ -654,13 +651,12 @@ You can merge your previous computation to plot the projection on the 2 first PC
```{r include=FALSE} ```{r include=FALSE}
point_projection <- function(diy_cov, x, y, PC){ point_projection <- function(diy_cov, x, y, PC){
a <- c(x, y) a <- c(x, y)
b <- eigen(diy_cov)$values[PC] * eigen(diy_cov)$vector[, PC] b <- eigen(diy_cov)$vector[, PC]
scaled_b <- b / c(sqrt(sum(b^2))) results <- c(a %*% b) * b
results <- c(a %*% scaled_b) * scaled_b
return(list(x = results[1], y = results[2])) return(list(x = results[1], y = results[2]))
} }
diy_pca <- diy_pca %>% diy_pca <- diy_data_f %>%
rowwise() %>% # perform the subsequent opperation row by row rowwise() %>% # perform the subsequent opperation row by row
mutate( mutate(
pc1_x = point_projection( pc1_x = point_projection(
...@@ -714,7 +710,7 @@ point_projection <- function(diy_cov, x, y, PC){ ...@@ -714,7 +710,7 @@ point_projection <- function(diy_cov, x, y, PC){
# your code # your code
} }
diy_pca <- diy_pca %>% diy_pca <- diy_data_f %>%
rowwise() %>% # perform the subsequent opperation row by row rowwise() %>% # perform the subsequent opperation row by row
mutate( mutate(
pc1_x = point_projection( pc1_x = point_projection(
...@@ -740,12 +736,11 @@ diy_pca %>% ...@@ -740,12 +736,11 @@ diy_pca %>%
```{r, echo = F} ```{r, echo = F}
point_projection <- function(diy_cov, x, y, PC){ point_projection <- function(diy_cov, x, y, PC){
a <- c(x, y) a <- c(x, y)
b <- eigen(diy_cov)$value[PC] * eigen(diy_cov)$vector[, PC] b <- eigen(diy_cov)$vector[, PC]
scaled_b <- b / c(sqrt(sum(b^2))) a %*% b
a %*% scaled_b
} }
diy_pca <- diy_pca %>% diy_pca <- diy_data_f %>%
rowwise() %>% # perform the subsequent opperation row by row rowwise() %>% # perform the subsequent opperation row by row
mutate( mutate(
pc1_x = point_projection( pc1_x = point_projection(
...@@ -773,9 +768,8 @@ diy_pca %>% ...@@ -773,9 +768,8 @@ diy_pca %>%
```{r eval=F} ```{r eval=F}
point_projection <- function(diy_cov, x, y, PC){ point_projection <- function(diy_cov, x, y, PC){
a <- c(x, y) a <- c(x, y)
b <- eigen(diy_cov)$value[PC] * eigen(diy_cov)$vector[, PC] b <- eigen(diy_cov)$vector[, PC]
scaled_b <- b / c(sqrt(sum(b^2))) a %*% b
a %*% scaled_b
} }
``` ```
</p> </p>
...@@ -786,14 +780,25 @@ point_projection <- function(diy_cov, x, y, PC){ ...@@ -786,14 +780,25 @@ point_projection <- function(diy_cov, x, y, PC){
In the `prcomp` output you can directly get the coordinates in PCs space from the `$x` slot. In the `prcomp` output you can directly get the coordinates in PCs space from the `$x` slot.
```{r} ```{r}
diy_pca <- diy_pca %>% diy_data_f %>%
rowwise() %>% # perform the subsequent opperation row by row
mutate(
pc1_x = point_projection(
diy_cov = diy_cov,
x = bill_length_mm,
y = bill_depth_mm,
PC = 1),
pc2_y = point_projection(
diy_cov = diy_cov,
x = bill_length_mm,
y = bill_depth_mm,
PC = 2),
) %>%
ungroup() %>% ungroup() %>%
mutate( mutate(
pc1_x_ref = data_f_pca$x[,1], pc1_x_ref = data_f_pca$x[,1],
pc2_y_ref = data_f_pca$x[,2] pc2_y_ref = data_f_pca$x[,2]
) ) %>%
diy_pca %>%
bind_cols( bind_cols(
data %>% select(-colnames(diy_pca)[1:2]) %>% filter(sex == "female") data %>% select(-colnames(diy_pca)[1:2]) %>% filter(sex == "female")
) %>% ) %>%
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment