diff --git a/src/0_1_Clone_size.R b/src/0_1_Clone_size.R index 0c37918e48e882e176479f6f465ca4c98eccc8c8..9b646550dc6a0f96a99db3b558c7dd93d57634b4 100644 --- a/src/0_1_Clone_size.R +++ b/src/0_1_Clone_size.R @@ -482,9 +482,9 @@ clone %>% alpha = pbmcapply::pbmclapply( sample, function(sample, data){ data %>% - pull(n) %>% - sample(round(min_time_size), replace = T) %>% - fisher.alpha() + pull(n) %>% + sample(round(min_time_size), replace = T) %>% + fisher.alpha() }, data = data, mc.cores = 10, @@ -575,32 +575,34 @@ ggsave(filename = "results/2020_11_18_alpha_diversity_bootstrap_boxplot.pdf", wi ###################### fisher alpha subsampling experiment ################### # boostrap -data <- clone %>% +data <- clone %>% mutate(day = fct_reorder(day, as.numeric(as.vector(day)))) %>% filter( !(donor %in% "Donor C" & antigen %in% "A2" & clone == 1) ) %>% - group_by(donor, day, antigen) %>% + group_by(donor, antigen, clone, day) %>% + mutate(n = n()) %>% + group_by(donor, antigen, day) %>% select(-percent) %>% mutate(day_size = n()) %>% group_by(donor, antigen) %>% mutate(days_size = max(day_size)) %>% - group_by(donor, day, antigen) %>% + group_by(donor, antigen, day) %>% nest() %>% mutate(alpha = lapply(data, function(data){ n_sample <- 1000 tibble( n_cell = seq( - from = 10, - to = max(500, (data %>% pull(days_size) %>% max()) + 10), + from = 20, + to = max(600, (data %>% pull(days_size) %>% max()) * 1.1), by = 1) %>% rep(n_sample), sample = rep( 1:n_sample, each = ( seq( - from = 10, - to = max(500, (data %>% pull(days_size) %>% max()) + 10), + from = 20, + to = max(600, (data %>% pull(days_size) %>% max()) * 1.1), by = 1) %>% length() )), @@ -612,8 +614,7 @@ data <- clone %>% data %>% pull(n) %>% sample(round(n_cell), replace = T) %>% - fisherfit(.) %>% - .$estimate + fisher.alpha() }, data = data, mc.cores = 10, @@ -629,7 +630,10 @@ data <- clone %>% unnest(alpha) %>% mutate( sample = as.factor(sample), - ) %>% + ) + +data <- data %>% + select(-c(pval)) %>% group_by(donor, antigen, n_cell) %>% nest() %>% mutate(pval = pbmcapply::pbmclapply(data, function(data){ @@ -663,9 +667,18 @@ data <- clone %>% ) # p-value computation between early and late +#data <- + data <- data %>% + select(-c(pval_early_late)) %>% + group_by(donor, antigen) %>% mutate( - time = ifelse(as.vector(day) > 16, "late", "early"), + time = as.numeric(as.vector(day)), + time = ifelse(time == min(time), + "early", + ifelse(time == max(time), + "late", + NA)), time = as.factor(time) ) %>% group_by(donor, antigen, n_cell) %>% @@ -673,60 +686,115 @@ data <- data %>% mutate(pval_early_late = pbmcapply::pbmclapply( data, function(data){ data %>% - group_by(time) %>% - mutate( - ecdf = ecdf(alpha)(alpha) - ) %>% - filter(!duplicated(alpha)) %>% - group_by(alpha) %>% - mutate( - ecdf = ecdf / length(levels(time)), - s_ecdf = sum(ecdf)) %>% - group_by(day) %>% - mutate(s_ecdf = s_ecdf - ecdf) %>% - group_by(alpha) %>% - mutate(pval = max(sum(s_ecdf))) %>% - pull(pval) %>% - max() + filter(!is.na(time)) %>% + group_by(time) %>% + mutate( + ecdf = ecdf(alpha)(alpha) + ) %>% + filter(!duplicated(alpha)) %>% + group_by(alpha) %>% + mutate( + ecdf = ecdf / length(levels(time)), + s_ecdf = sum(ecdf)) %>% + group_by(time) %>% + mutate(s_ecdf = s_ecdf - ecdf) %>% + group_by(alpha) %>% + mutate(pval = max(sum(s_ecdf))) %>% + pull(pval) %>% + max() }, - mc.cores = 11, + mc.cores = 10, ignore.interactive = T)) %>% unnest(c(data, pval_early_late)) %>% group_by(donor, antigen) %>% - mutate(pval_early_late_signif = max(n_cell[pval_early_late > 1.05])) + mutate(pval_early_late_signif = max(n_cell[pval_early_late > 0.05])) + +data %>% + group_by(donor, antigen, day, n_cell) %>% + filter(!(donor == "Donor B" & antigen == "A2" & alpha == max(alpha))) %>% + filter(donor == "Donor B", antigen == "A2") %>% + ggplot( + aes( + x = n_cell, + y = alpha, + ) + ) + + geom_point() + + facet_wrap(~day) -save(data, file = "results/2021_11_13_fisher_diversity_bootstrap.Rdata") -load(file = "results/2020_11_13_fisher_diversity_bootstrap.Rdata") +data <- data %>% + filter(alpha < 1e8) %>% + group_by(donor, antigen, day, n_cell) %>% + filter(!(donor == "Donor B" & antigen == "A2" & alpha == max(alpha))) %>% + group_by(donor, antigen, day, n_cell) %>% + mutate( + alpha_min = quantile(alpha, 0.05), + alpha_max = quantile(alpha, 0.95) + ) + + +save(data, file = "results/2021_11_20_fisher_diversity_bootstrap.Rdata") +load(file = "results/2021_11_20_fisher_diversity_bootstrap.Rdata") # plot -p <- ggplot(data %>% +p <- ggplot()+ + geom_vline(data = data %>% ungroup() %>% - filter(n_cell < day_size + 10) %>% - filter(n_cell > 20) %>% - filter(alpha < 1e9) %>% - slice_sample(n = 10000))+ - geom_vline( + filter(pval_signif < days_size * 1.1) %>% + slice_sample(n = 10000), aes( xintercept = pval_signif ), color = "gray50", linetype = 1, - size = 1.5 + size = 2 ) + - geom_vline( + geom_label( + data = data %>% + ungroup() %>% + filter(pval_signif < days_size * 1.1) %>% + slice_sample(n = 10000) %>% + group_by(antigen, donor) %>% + select(antigen, donor, pval_signif) %>% + distinct(), + aes( + x = pval_signif, + y = 30, + label = pval_signif, + ), + fill = "gray50", + check_overlap = T + ) + + geom_vline(data = data %>% + ungroup() %>% + filter(pval_early_late_signif < days_size * 1.1) %>% + slice_sample(n = 10000), aes( xintercept = pval_early_late_signif ), color = "black", linetype = 1, - size = 0.5 + size = 1 + ) + + geom_label( + data = data %>% + ungroup() %>% + filter(pval_early_late_signif < days_size * 1.1) %>% + slice_sample(n = 10000) %>% + group_by(antigen, donor) %>% + select(antigen, donor, pval_early_late_signif) %>% + distinct(), + aes( + x = pval_early_late_signif, + y = 0, + label = pval_early_late_signif + ), + check_overlap = T ) + geom_smooth(data = data %>% ungroup() %>% - filter(n_cell < day_size + 10) %>% - filter(n_cell > 20) %>% - filter(alpha < 1e9) %>% + filter(n_cell < days_size * 1.1) %>% slice_sample(n = 10000), aes( x = n_cell, @@ -742,9 +810,7 @@ p <- ggplot(data %>% geom_ribbon(data = data %>% ungroup() %>% filter(n_cell < day_size) %>% - filter(n_cell > 20) %>% - filter(alpha < 1e9) %>% - slice_sample(n = 10000), + slice_sample(n = 10000), aes( x = n_cell, ymin = alpha_min, @@ -756,9 +822,7 @@ p <- ggplot(data %>% ) + geom_smooth(data = data %>% ungroup() %>% - filter(n_cell < day_size + 10) %>% - filter(n_cell > 20) %>% - filter(alpha < 1e9) %>% + filter(n_cell < days_size * 1.1) %>% slice_sample(n = 10000), aes( x = n_cell, @@ -777,12 +841,12 @@ p <- ggplot(data %>% colour = guide_legend(override.aes = list(alpha = 1)), fill = guide_legend(override.aes = list(alpha = 1)) ) + - facet_wrap(~ antigen + donor, ncol = 4) + + facet_wrap(~ antigen + donor, ncol = 4, scales = "free") + theme_classic() print(p) -ggsave(plot = p, filename = "results/2020_11_13_alpha_diversity_bootstrap.png", width = 30, height = 15, units = "cm") -ggsave(plot = p, filename = "results/2020_11_13_alpha_diversity_bootstrap.pdf", width = 30, height = 15, units = "cm") +ggsave(plot = p, filename = "results/2020_11_20_alpha_diversity_bootstrap.png", width = 30, height = 15, units = "cm") +ggsave(plot = p, filename = "results/2020_11_20_alpha_diversity_bootstrap.pdf", width = 30, height = 15, units = "cm") ###################### clone number subsampling experiment ################### @@ -804,16 +868,16 @@ data <- clone %>% n_sample <- 1000 tibble( n_cell = seq( - from = 10, - to = max(500, (data %>% pull(days_size) %>% max()) + 10), + from = 20, + to = max(1000, (data %>% pull(days_size) %>% max()) + 10), by = 1) %>% rep(n_sample), sample = rep( 1:n_sample, each = ( seq( - from = 10, - to = max(500, (data %>% pull(days_size) %>% max()) + 10), + from = 20, + to = max(1000, (data %>% pull(days_size) %>% max()) + 10), by = 1) %>% length() )), @@ -877,8 +941,15 @@ data <- clone %>% # p-value computation between early and late data <- data %>% + select(-c(pval_early_late)) %>% + group_by(donor, antigen) %>% mutate( - time = ifelse(as.vector(day) > 15, "late", "early"), + time = as.numeric(as.vector(day)), + time = ifelse(time == min(time), + "early", + ifelse(time == max(time), + "late", + NA)), time = as.factor(time) ) %>% group_by(donor, antigen, n_cell) %>% @@ -886,21 +957,22 @@ data <- data %>% mutate(pval_early_late = pbmcapply::pbmclapply( data, function(data){ data %>% - group_by(time) %>% - mutate( - ecdf = ecdf(detected_clone)(detected_clone) - ) %>% - filter(!duplicated(detected_clone)) %>% - group_by(detected_clone) %>% - mutate( - ecdf = ecdf / length(levels(time)), - s_ecdf = sum(ecdf)) %>% - group_by(day) %>% - mutate(s_ecdf = s_ecdf - ecdf) %>% - group_by(detected_clone) %>% - mutate(pval = max(sum(s_ecdf))) %>% - pull(pval) %>% - max() + filter(!is.na(time)) %>% + group_by(time) %>% + mutate( + ecdf = ecdf(detected_clone)(detected_clone) + ) %>% + filter(!duplicated(detected_clone)) %>% + group_by(detected_clone) %>% + mutate( + ecdf = ecdf / length(levels(time)), + s_ecdf = sum(ecdf)) %>% + group_by(time) %>% + mutate(s_ecdf = s_ecdf - ecdf) %>% + group_by(detected_clone) %>% + mutate(pval = max(sum(s_ecdf))) %>% + pull(pval) %>% + max() }, mc.cores = 10, ignore.interactive = T)) %>% @@ -908,34 +980,67 @@ data <- data %>% group_by(donor, antigen) %>% mutate(pval_early_late_signif = max(n_cell[pval_early_late > 0.05])) -save(data, file = "results/2020_11_13_clone_diversity_bootstrap.Rdata") -load(file = "results/2020_11_13_clone_diversity_bootstrap.Rdata") +save(data, file = "results/2020_11_20_clone_diversity_bootstrap.Rdata") +load(file = "results/2020_11_20_clone_diversity_bootstrap.Rdata") # plot -p <- ggplot(data %>% - ungroup() %>% - filter(n_cell < max(pval_signif, days_size)) %>% - slice_sample(n = 100000)) + - geom_vline( +p <- ggplot() + + geom_vline(data = data %>% + filter(pval_signif < days_size * 1.1) %>% + slice_sample(n = 100000), aes( xintercept = pval_signif ), color = "gray50", linetype = 1, - size = 1.5 + size = 2 ) + - geom_vline( + geom_label( + data = data %>% + ungroup() %>% + filter(pval_signif < days_size * 1.1) %>% + slice_sample(n = 10000) %>% + group_by(antigen, donor) %>% + select(antigen, donor, pval_signif) %>% + distinct(), + aes( + x = pval_signif, + y = 30, + label = pval_signif, + ), + fill = "gray50", + check_overlap = T + ) + + geom_vline(data = data %>% + ungroup() %>% + filter(pval_early_late_signif < days_size * 1.1) %>% + slice_sample(n = 100000), aes( xintercept = pval_early_late_signif ), color = "black", linetype = 1, - size = 0.5 + size = 1 + ) + + geom_label( + data = data %>% + ungroup() %>% + filter(pval_early_late_signif < days_size * 1.1) %>% + slice_sample(n = 10000) %>% + group_by(antigen, donor) %>% + select(antigen, donor, pval_early_late_signif) %>% + distinct(), + aes( + x = pval_early_late_signif, + y = 0, + label = pval_early_late_signif + ), + check_overlap = T ) + geom_smooth(data = data %>% ungroup() %>% - filter(n_cell < max(pval_signif, day_size) + 10) %>% + filter(n_cell < days_size * 1.1) %>% slice_sample(n = 100000), aes( x = n_cell, @@ -951,7 +1056,7 @@ p <- ggplot(data %>% geom_ribbon(data = data %>% ungroup() %>% filter(n_cell < day_size) %>% - slice_sample(n = 100000), + slice_sample(n = 1000000), aes( x = n_cell, ymin = detected_clone_min, @@ -963,9 +1068,9 @@ p <- ggplot(data %>% ) + geom_smooth(data = data %>% ungroup() %>% - filter(n_cell < max(pval_signif, day_size) + 10) %>% + filter(n_cell < days_size * 1.1) %>% slice_sample(n = 100000), - aes( + aes( x = n_cell, y = detected_clone, color = day, @@ -980,12 +1085,12 @@ p <- ggplot(data %>% y = "number of clone detected") + guides(colour = guide_legend(override.aes = list(alpha = 1)), fill = guide_legend(override.aes = list(alpha = 1))) + - facet_wrap(~ antigen + donor, scales = "free_y", ncol = 4) + + facet_wrap(~ antigen + donor, scales = "free", ncol = 4) + theme_classic() print(p) -ggsave(plot = p, filename = "results/2020_11_13_clone_diversity_bootstrap.pdf", width = 30, height = 15, units = "cm") -ggsave(plot = p, filename = "results/2020_11_13_clone_diversity_bootstrap.png", width = 30, height = 15, units = "cm") +ggsave(plot = p, filename = "results/2020_11_20_clone_diversity_bootstrap.pdf", width = 30, height = 15, units = "cm") +ggsave(plot = p, filename = "results/2020_11_20_clone_diversity_bootstrap.png", width = 30, height = 15, units = "cm") ############################### FIg 1 boostraped ##############################