Skip to content
Snippets Groups Projects
Commit d424a0c0 authored by nfontrod's avatar nfontrod
Browse files

src/01_get_condig_gene.py: fix the gtf used

parent 35b8d955
Branches
No related tags found
No related merge requests found
......@@ -15,7 +15,7 @@ import pandas as pd
##################
GTF = Path(__file__).parents[1] / "data" / \
"GCF_000001405.40_GRCh38.p14_genomic.gtf"
"GCF_000001405.39_GRCh38.p13_genomic.gtf_coding_annotation.gtf"
OUTPUT = Path(__file__).parents[1] / "results" / "coding_genes"
......
......@@ -36,19 +36,19 @@ compute_tdd <- function(treatment = "BRAF", mean_gene_count = 10, ercc = F) {
)
if (mean_gene_count > 0) {
norm_df$rmean <- norm_df %>%
select(-gene) %>%
dplyr::select(-gene) %>%
rowMeans()
norm_df <- norm_df %>% filter(rmean >= mean_gene_count) # nolint
norm_df <- norm_df %>% select(-rmean) # nolint
norm_df <- norm_df %>% dplyr::select(-rmean) # nolint
}
norm_df$rmin <- norm_df %>%
select(-gene) %>%
dplyr::select(-gene) %>%
apply(1, min)
norm_df <- norm_df %>% filter(rmin > 0) # nolint
norm_df <- norm_df %>% select(-rmin) # nolint
norm_df <- norm_df %>% dplyr::select(-rmin) # nolint
rep <- norm_df %>%
select(-gene) %>%
dplyr::select(-gene) %>%
colnames() %>%
str_extract("X...") %>%
unique()
......@@ -111,7 +111,7 @@ create_density_figs <- function(tdd_tibble, treatment = "BRAF",
ercc = F) {
dir.create(output_folder, showWarnings = F)
rep <- tdd_tibble %>%
select(-gene) %>%
dplyr::select(-gene) %>%
colnames() %>%
str_extract("X...") %>%
unique()
......@@ -387,10 +387,10 @@ get_cor_value <- function(tdd_full, my_group = "BRAF_DOWN") {
tdd <- tdd_full %>% filter(group == my_group) ## nolint
c <- cor(
tdd %>% filter(group == my_group) %>% # nolint
select(mean_DMSO_TDD) %>% unlist(), # nolint
dplyr::select(mean_DMSO_TDD) %>% unlist(), # nolint
tdd %>%
filter(group == my_group) %>% # nolint
select(mean_BRAF_TDD) %>%
dplyr::select(mean_BRAF_TDD) %>%
unlist() # nolint
)
res <- coef(lm(mean_BRAF_TDD ~ mean_DMSO_TDD, tdd %>%
......@@ -415,12 +415,13 @@ prepare_df_for_cor <- function(mean_gene_count = 10, ercc = F,
]
tdd_full <- tdd_full %>%
mutate(p_value = apply(
tdd_full %>% select(starts_with("X")), 1,
tdd_full %>% dplyr::select(starts_with("X")), 1,
function(x) {
t_test_func(x)
}
)) # nolint
tdd_full <- tdd_full %>% arrange(-p_value) # nolint
write_tsv(tdd_full, paste0(output_folder, "/full_TDD_correlation_table.txt"))
dir <- "./data/gene_lists/"
down_braf <- read.table(paste0(dir, "BRAF_DOWN_1.5_name.txt"))$V1
up_braf <- read.table(paste0(dir, "BRAF_UP_1.5_name.txt"))$V1
......@@ -436,11 +437,11 @@ prepare_df_for_cor <- function(mean_gene_count = 10, ercc = F,
write_tsv(tdd_full, paste0(output_folder, "/TDD_correlation_table.txt")) # nolint
tdd_full %>%
filter(kind == "BRAF_DOWN_sig", mean_BRAF_TDD > mean_DMSO_TDD) %>%
select(gene) %>%
dplyr::select(gene) %>%
write_tsv(paste0(output_folder, "/TDD_BRAF_BRAF_DOWN.txt"))
tdd_full %>%
filter(kind == "BRAF_UP_sig", mean_DMSO_TDD > mean_BRAF_TDD) %>%
select(gene) %>%
dplyr::select(gene) %>%
write_tsv(paste0(output_folder, "/TDD_DMSO_BRAF_UP.txt"))
return(tdd_full)
}
......@@ -609,7 +610,7 @@ create_peaks_barplots <- function(tdd_filt, pval_glm_nb, output_fig,
ifelse(!!as.symbol(data_col) > 0, 1, 0)
)
tdd_fig <- tdd_filt %>%
select(!!as.symbol(data_col), !!as.symbol(has_col), group) %>%
dplyr::select(!!as.symbol(data_col), !!as.symbol(has_col), group) %>%
group_by(group) %>%
summarise(
!!as.symbol(res_col) := mean(!!as.symbol(data_col)),
......@@ -772,7 +773,7 @@ get_peak_df <- function(peak_file, kind = "peak") {
)
peak_table <- peak_table %>%
mutate(size = stop - start) %>%
select(c(gene, size)) %>%
dplyr::select(c(gene, size)) %>%
group_by(gene) %>%
summarise(peak_surface = sum(size))
}
......@@ -808,7 +809,7 @@ get_tdd_pic_table <- function(peak_file, mean_gene_count = 10, ercc = F,
TDD_BRAF = gene %in% tdd_braf_braf_down$gene,
TDD_DMSO = gene %in% tdd_dmso_braf_up$gene
) %>%
select(gene, TDD_BRAF, TDD_DMSO) # nolint
dplyr::select(gene, TDD_BRAF, TDD_DMSO) # nolint
tdd_full <- tdd_full %>% left_join(peak_table) # nolint
tdd_full <- tdd_full %>% replace_na(list(peak = 0, peak_surface = 0)) # nolint
return(tdd_full)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment