diff --git a/src/01_get_condig_gene.py b/src/01_get_condig_gene.py index e3f0ba6ea25db30e907fa7f56a0e8c3d2dcdf592..98871e30eac6f7d2cb16d8e3186eaadfd562c469 100644 --- a/src/01_get_condig_gene.py +++ b/src/01_get_condig_gene.py @@ -15,7 +15,7 @@ import pandas as pd ################## GTF = Path(__file__).parents[1] / "data" / \ - "GCF_000001405.40_GRCh38.p14_genomic.gtf" + "GCF_000001405.39_GRCh38.p13_genomic.gtf_coding_annotation.gtf" OUTPUT = Path(__file__).parents[1] / "results" / "coding_genes" diff --git a/src/06_compute_tdd_index.R b/src/06_compute_tdd_index.R index 2c00edd6299c112e476662d726cbbbb0b187c4ca..b77bc2d937fd94916b481552c2a10aad04154c55 100644 --- a/src/06_compute_tdd_index.R +++ b/src/06_compute_tdd_index.R @@ -36,19 +36,19 @@ compute_tdd <- function(treatment = "BRAF", mean_gene_count = 10, ercc = F) { ) if (mean_gene_count > 0) { norm_df$rmean <- norm_df %>% - select(-gene) %>% + dplyr::select(-gene) %>% rowMeans() norm_df <- norm_df %>% filter(rmean >= mean_gene_count) # nolint - norm_df <- norm_df %>% select(-rmean) # nolint + norm_df <- norm_df %>% dplyr::select(-rmean) # nolint } norm_df$rmin <- norm_df %>% - select(-gene) %>% + dplyr::select(-gene) %>% apply(1, min) norm_df <- norm_df %>% filter(rmin > 0) # nolint - norm_df <- norm_df %>% select(-rmin) # nolint + norm_df <- norm_df %>% dplyr::select(-rmin) # nolint rep <- norm_df %>% - select(-gene) %>% + dplyr::select(-gene) %>% colnames() %>% str_extract("X...") %>% unique() @@ -111,7 +111,7 @@ create_density_figs <- function(tdd_tibble, treatment = "BRAF", ercc = F) { dir.create(output_folder, showWarnings = F) rep <- tdd_tibble %>% - select(-gene) %>% + dplyr::select(-gene) %>% colnames() %>% str_extract("X...") %>% unique() @@ -387,10 +387,10 @@ get_cor_value <- function(tdd_full, my_group = "BRAF_DOWN") { tdd <- tdd_full %>% filter(group == my_group) ## nolint c <- cor( tdd %>% filter(group == my_group) %>% # nolint - select(mean_DMSO_TDD) %>% unlist(), # nolint + dplyr::select(mean_DMSO_TDD) %>% unlist(), # nolint tdd %>% filter(group == my_group) %>% # nolint - select(mean_BRAF_TDD) %>% + dplyr::select(mean_BRAF_TDD) %>% unlist() # nolint ) res <- coef(lm(mean_BRAF_TDD ~ mean_DMSO_TDD, tdd %>% @@ -415,12 +415,13 @@ prepare_df_for_cor <- function(mean_gene_count = 10, ercc = F, ] tdd_full <- tdd_full %>% mutate(p_value = apply( - tdd_full %>% select(starts_with("X")), 1, + tdd_full %>% dplyr::select(starts_with("X")), 1, function(x) { t_test_func(x) } )) # nolint tdd_full <- tdd_full %>% arrange(-p_value) # nolint + write_tsv(tdd_full, paste0(output_folder, "/full_TDD_correlation_table.txt")) dir <- "./data/gene_lists/" down_braf <- read.table(paste0(dir, "BRAF_DOWN_1.5_name.txt"))$V1 up_braf <- read.table(paste0(dir, "BRAF_UP_1.5_name.txt"))$V1 @@ -436,11 +437,11 @@ prepare_df_for_cor <- function(mean_gene_count = 10, ercc = F, write_tsv(tdd_full, paste0(output_folder, "/TDD_correlation_table.txt")) # nolint tdd_full %>% filter(kind == "BRAF_DOWN_sig", mean_BRAF_TDD > mean_DMSO_TDD) %>% - select(gene) %>% + dplyr::select(gene) %>% write_tsv(paste0(output_folder, "/TDD_BRAF_BRAF_DOWN.txt")) tdd_full %>% filter(kind == "BRAF_UP_sig", mean_DMSO_TDD > mean_BRAF_TDD) %>% - select(gene) %>% + dplyr::select(gene) %>% write_tsv(paste0(output_folder, "/TDD_DMSO_BRAF_UP.txt")) return(tdd_full) } @@ -609,7 +610,7 @@ create_peaks_barplots <- function(tdd_filt, pval_glm_nb, output_fig, ifelse(!!as.symbol(data_col) > 0, 1, 0) ) tdd_fig <- tdd_filt %>% - select(!!as.symbol(data_col), !!as.symbol(has_col), group) %>% + dplyr::select(!!as.symbol(data_col), !!as.symbol(has_col), group) %>% group_by(group) %>% summarise( !!as.symbol(res_col) := mean(!!as.symbol(data_col)), @@ -772,7 +773,7 @@ get_peak_df <- function(peak_file, kind = "peak") { ) peak_table <- peak_table %>% mutate(size = stop - start) %>% - select(c(gene, size)) %>% + dplyr::select(c(gene, size)) %>% group_by(gene) %>% summarise(peak_surface = sum(size)) } @@ -808,7 +809,7 @@ get_tdd_pic_table <- function(peak_file, mean_gene_count = 10, ercc = F, TDD_BRAF = gene %in% tdd_braf_braf_down$gene, TDD_DMSO = gene %in% tdd_dmso_braf_up$gene ) %>% - select(gene, TDD_BRAF, TDD_DMSO) # nolint + dplyr::select(gene, TDD_BRAF, TDD_DMSO) # nolint tdd_full <- tdd_full %>% left_join(peak_table) # nolint tdd_full <- tdd_full %>% replace_na(list(peak = 0, peak_surface = 0)) # nolint return(tdd_full)