#| fig.alt: "Scatterplot of bill length versus bill depth for the three penguin species, showing a positive linear relationship within species. If species is omitted as a variable, the relationship switches to a negative trend, another example of Simpson’s paradox in the data"
# Simpson's Paradox example (bill dimensions, omitting species):
simpson_nospecies_base <- penguins %>%
# doing this so ggiraph recognizes species across plots
mutate(species = as.character(species)) %>%
mutate(species = case_when(
species == "Adelie" ~ "Adélie",
TRUE ~ species)
) %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
theme(panel.border = element_rect(fill = NA, color = "gray70")) +
labs(x = "Bill length (mm)", y = "Bill depth (mm)")
# Bill dimensions, including species:
simpson_wspecies_base <-
penguins %>%
mutate(species = as.character(species)) %>%
mutate(species = case_when(
species == "Adelie" ~ "Adélie",
TRUE ~ species)
) %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm, group = species)) +