#!/bin/bash python3 -m src.bed_handler python3 -m src.bed_handler.filter_bed \ --bed_file data/bed/gene.bed \ --filter_file data/gene_5-3_loop_tot.txt \ --col_name 'id' \ --outfile gene_with_5-3p_loop_tot.txt python3 -m src.bed_handler.filter_bed \ --bed_file data/bed/gene.bed \ --filter_file data/gene_5-3_loop_ctrl.txt \ --col_name 'id' \ --outfile gene_with_5-3p_loop_ctrl.txt python3 -m src.bed_handler.filter_bed \ --bed_file data/bed/gene.bed \ --filter_file data/gene_5-3_loop_siPPdown.txt \ --col_name 'id' \ --outfile gene_with_5-3p_loop_siPPdown.txt python3 -m src.bed_handler.filter_bed \ --bed_file data/bed/gene.bed \ --filter_file data/gene_without_5-3_loop_siPPdown.txt \ --col_name 'id' \ --outfile gene_without_5-3p_loop_siPPdown.txt python3 -m src.bed_handler.filter_bed \ --bed_file data/bed/gene.bed \ --filter_file data/gene_without_loops_siPPdown.txt \ --col_name 'id' \ --outfile gene_without_loops_siPPdown.txt mkdir results/figures array=(all_replicates rep1 rep2 rep3) for myrep in ${array[*]}; do python3 -m src.visu \ --design data/design_exp_${myrep}.txt \ --bw_folder data/bigwig/ \ --region_bed results/bed_file/filtered_gene.bed \ --region_name gene \ --output results/figures/ \ --border_name TSS TTS \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm 'None' python3 -m src.visu \ --design data/design_exp_${myrep}.txt \ --bw_folder data/bigwig/ \ --region_bed results/bed_file/filtered_gene.bed \ --region_name gene \ --output results/figures/ \ --border_name TSS TTS \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm '0' mv results/figures/metagene_gene_100bin_10000_nt-around-25-bin_b0_norm.pdf results/figures/metagene_gene_100bin_10000_nt-around-25-bin_b0_norm_${myrep}.pdf mv results/figures/metagene_gene_100bin_10000_nt-around-25-bin.pdf results/figures/metagene_gene_100bin_10000_nt-around-25-bin_${myrep}.pdf done array=(all_replicates rep1 rep2 rep3) for myrep in ${array[*]}; do python3 -m src.visu \ --design data/design_exp_${myrep}.txt \ --bw_folder data/bigwig/ \ --region_bed results/bed_file/gene_with_5-3p_loop_siPPdown.txt \ --region_name gene \ --output results/figures/ \ --border_name TSS TTS \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm 'None' python3 -m src.visu \ --design data/design_exp_${myrep}.txt \ --bw_folder data/bigwig/ \ --region_bed results/bed_file/gene_with_5-3p_loop_siPPdown.txt \ --region_name gene \ --output results/figures/ \ --border_name TSS TTS \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm '0' mv results/figures/metagene_gene_100bin_10000_nt-around-25-bin_b0_norm.pdf results/figures/metagene_gene_100bin_10000_nt-around-25-bin_b0_norm_${myrep}_5-3-loop.pdf mv results/figures/metagene_gene_100bin_10000_nt-around-25-bin.pdf results/figures/metagene_gene_100bin_10000_nt-around-25-bin_${myrep}_5-3-loop.pdf done for gene_bed in $(ls results/bed_file/CTCF*gene.bed); do exon_bed=${gene_bed/gene/exon} exon_name=(${exon_bed//\// }) full_name=${exon_name[-1]/.bed/} full_name=${full_name/exon/gene-dup} file_name=${exon_name[-1]/.bed/} gene_bed=${gene_bed/.bed/-dup.bed} python3 -m src.visu \ --design data/design_exp_all_replicates.txt \ --bw_folder data/bigwig/ \ --region_bed ${exon_bed} \ --region_name exon \ --output results/figures/ \ --border_name start_exon end_exon \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm 'None' mv results/figures/metagene_exon_100bin_10000_nt-around-25-bin.pdf results/figures/${file_name}_metagene_exon_100bin_10000_nt-around-25-bin.pdf python3 -m src.visu \ --design data/design_exp_all_replicates.txt \ --bw_folder data/bigwig/ \ --region_bed ${gene_bed} \ --region_name gene \ --output results/figures/ \ --border_name start_exon end_exon \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm '0' rm results/figures/metagene_gene_100bin_10000_nt-around-25-bin_b0_norm.pdf python3 -m src.visu \ --design data/design_exp_all_replicates.txt \ --bw_folder data/bigwig/ \ --region_bed ${exon_bed} \ --region_name exon \ --output results/figures/ \ --border_name start_exon end_exon \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm "results/figures/coef_table/tmp_cov_table_design_exp_all_replicates_${full_name}_100bin_10000_nt-around-25-bin_bin0_norm.txt" mv results/figures/metagene_exon_100bin_10000_nt-around-25-bin_file_norm.pdf results/figures/${file_name}_metagene_exon_100bin_10000_nt-around-25-bin_file_norm.pdf done ######################################################## # Condition siPP/siCTRL - exon ddx_down vs exon ddx_down_ctcf # ######################################################## # exon ddx_down_ctcf corresponds to exons down-regulated by ddx5/17 near a ctcf site and # exon ddx_down corresponds to exons down-regulated by ddx5/17 far from ctcf sites exps=(siCTRL siDDX) for exp in ${exps[*]}; do python3 -m src.visu \ --design data/design_exp_${exp}.txt \ --bw_folder data/bigwig/ \ --region_bed results/bed_file/CTCF_2000_both_ddx_down_with0_gene.bed results/bed_file/Far_CTCF_2000_both_ddx_down_with0_gene.bed \ --region_name ddx_down_ctcf ddx_down \ --output results/figures/ \ --border_name start_gene end_gene \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm "None" mv results/figures/metagene_ddx_down_ctcf-ddx_down_100bin_10000_nt-around-25-bin.pdf results/figures/exp_${exp}_metagene_ddx_down_ctcf-ddx_down_100bin_10000_nt-around-25-bin_gene.pdf python3 -m src.visu \ --design data/design_exp_${exp}.txt \ --bw_folder data/bigwig/ \ --region_bed results/bed_file/CTCF_2000_both_ddx_down_with0_gene.bed results/bed_file/Far_CTCF_2000_both_ddx_down_with0_gene.bed \ --region_name ddx_down_ctcf ddx_down \ --output results/figures/ \ --border_name start_gene end_gene \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm "0" mv results/figures/metagene_ddx_down_ctcf-ddx_down_100bin_10000_nt-around-25-bin_b0_norm.pdf results/figures/exp_${exp}_metagene_ddx_down_ctcf-ddx_down_100bin_10000_nt-around-25-bin_b0_norm_gene.pdf python3 -m src.visu \ --design data/design_exp_${exp}.txt \ --bw_folder data/bigwig/ \ --region_bed results/bed_file/CTCF_2000_both_ddx_down_with0_gene-dup.bed results/bed_file/Far_CTCF_2000_both_ddx_down_with0_gene-dup.bed \ --region_name ddx_down_ctcf ddx_down \ --output results/figures/ \ --border_name start_gene end_gene \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm "0" rm results/figures/metagene_ddx_down_ctcf-ddx_down_100bin_10000_nt-around-25-bin_b0_norm.pdf python3 -m src.visu \ --design data/design_exp_${exp}.txt \ --bw_folder data/bigwig/ \ --region_bed results/bed_file/CTCF_2000_both_ddx_down_with0_exon.bed results/bed_file/Far_CTCF_2000_both_ddx_down_with0_exon.bed \ --region_name ddx_down_ctcf ddx_down \ --output results/figures/ \ --border_name start_exon end_exon \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm "results/figures/coef_table/tmp_cov_table_design_exp_${exp}_CTCF_2000_both_ddx_down_with0_gene-dup-Far_CTCF_2000_both_ddx_down_with0_gene-dup_100bin_10000_nt-around-25-bin_bin0_norm.txt" mv results/figures/metagene_ddx_down_ctcf-ddx_down_100bin_10000_nt-around-25-bin_file_norm.pdf results/figures/exp_${exp}_metagene_ddx_down_ctcf-ddx_down_100bin_10000_nt-around-25-bin_file_norm_exon.pdf done ######################################################## # Condition siPP and siCTRL - ddx_down_5-3 # ######################################################## python3 -m src.visu \ --design data/design_exp_all_replicates.txt \ --bw_folder data/bigwig/ \ --region_bed results/bed_file/gene_with_5-3p_loop_siPPdown.txt \ --region_name ddx_down_5-3 \ --output results/figures/ \ --border_name start_gene end_gene \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm "0" mv results/figures/metagene_ddx_down_5-3_100bin_10000_nt-around-25-bin_b0_norm.pdf results/figures/exp_all_metagene_ddx_down_5-3_100bin_10000_nt-around-25-bin_b0_norm_gene.pdf ######################################################## # Condition siPP - ddx_down_5-3 vs exon ddx_down # ######################################################## # gene ddx_down_5-3 corresponds to genes containing at least one exons down-regulated by ddx5/17 and having a 5'-3' loop and # gene ddx_down corresponds to genes containing exons down-regulated by ddx but without a 5'-3' loop python3 -m src.visu \ --design data/design_exp_siDDX.txt \ --bw_folder data/bigwig/ \ --region_bed results/bed_file/gene_with_5-3p_loop_siPPdown.txt results/bed_file/gene_without_loops_siPPdown.txt \ --region_name ddx_down_5-3 ddx_down \ --output results/figures/ \ --border_name start_gene end_gene \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm "0" mv results/figures/metagene_ddx_down_5-3-ddx_down_100bin_10000_nt-around-25-bin_b0_norm.pdf results/figures/exp_siPP_metagene_ddx_down_5-3-ddx_down_100bin_10000_nt-around-25-bin_b0_norm_gene.pdf python3 -m src.visu \ --design data/design_exp_siDDX.txt \ --bw_folder data/bigwig/ \ --region_bed results/bed_file/gene_with_5-3p_loop_siPPdown.txt results/bed_file/gene_without_loops_siPPdown.txt \ --region_name ddx_down_5-3 ddx_down \ --output results/figures/ \ --border_name start_gene end_gene \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm "None" mv results/figures/metagene_ddx_down_5-3-ddx_down_100bin_10000_nt-around-25-bin.pdf results/figures/exp_siPP_metagene_ddx_down_5-3-ddx_down_100bin_10000_nt-around-25-bin_gene.pdf rep=(1 2 3) for i in ${rep[*]}; do python3 -m src.visu \ --design data/design_exp_siDDX_rep${i}.txt \ --bw_folder data/bigwig/ \ --region_bed results/bed_file/gene_with_5-3p_loop_siPPdown.txt results/bed_file/gene_without_loops_siPPdown.txt \ --region_name ddx_down_5-3 ddx_down \ --output results/figures/ \ --border_name start_gene end_gene \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm "0" mv results/figures/metagene_ddx_down_5-3-ddx_down_100bin_10000_nt-around-25-bin_b0_norm.pdf results/figures/exp_siPP_rep${i}_metagene_ddx_down_5-3-ddx_down_100bin_10000_nt-around-25-bin_b0_normgene.pdf done ######################################################## # Condition siCTRL - ddx_down_5-3 vs exon ddx_down # ######################################################## # gene ddx_down_5-3 corresponds to genes containing at least one exons down-regulated by ddx5/17 and having a 5'-3' loop and # gene ddx_down corresponds to genes containing exons down-regulated by ddx but without a 5'-3' loop python3 -m src.visu \ --design data/design_exp_siCTRL.txt \ --bw_folder data/bigwig/ \ --region_bed results/bed_file/gene_with_5-3p_loop_siPPdown.txt results/bed_file/gene_without_loops_siPPdown.txt \ --region_name ddx_down_5-3 ddx_down \ --output results/figures/ \ --border_name start_gene end_gene \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm "0" mv results/figures/metagene_ddx_down_5-3-ddx_down_100bin_10000_nt-around-25-bin_b0_norm.pdf results/figures/exp_siCTRL_metagene_ddx_down_5-3-ddx_down_100bin_10000_nt-around-25-bin_b0_norm_gene.pdf python3 -m src.visu \ --design data/design_exp_siCTRL.txt \ --bw_folder data/bigwig/ \ --region_bed results/bed_file/gene_with_5-3p_loop_siPPdown.txt results/bed_file/gene_without_loops_siPPdown.txt \ --region_name ddx_down_5-3 ddx_down \ --output results/figures/ \ --border_name start_gene end_gene \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm "None" mv results/figures/metagene_ddx_down_5-3-ddx_down_100bin_10000_nt-around-25-bin.pdf results/figures/exp_siCTRL_metagene_ddx_down_5-3-ddx_down_100bin_10000_nt-around-25-bin_gene.pdf rep=(1 2 3) for i in ${rep[*]}; do python3 -m src.visu \ --design data/design_exp_siCTRL_rep${i}.txt \ --bw_folder data/bigwig/ \ --region_bed results/bed_file/gene_with_5-3p_loop_siPPdown.txt results/bed_file/gene_without_loops_siPPdown.txt \ --region_name ddx_down_5-3 ddx_down \ --output results/figures/ \ --border_name start_gene end_gene \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm "0" mv results/figures/metagene_ddx_down_5-3-ddx_down_100bin_10000_nt-around-25-bin_b0_norm.pdf results/figures/exp_siCTRL_rep${i}_metagene_ddx_down_5-3-ddx_down_100bin_10000_nt-around-25-bin_b0_norm_gene.pdf done ############################################################ # GC content exon ddx_down vs exon ddx_down_ctcf # ############################################################ python3 -m src.gc_content -B results/bed_file/Far_CTCF_2000_both_ddx_down_with0_exon.bed results/bed_file/CTCF_2000_both_ddx_down_with0_exon.bed -b ddx_down ddx_down_ctcf -g data/Homo_sapiens.GRCh37.dna.primary_assembly.fa -f "exons" -e 2000 ############################################################################ # other Exon in genes containing an exon regulated by DDX ############################################################################ ######################################################## # Condition siCTRL - exon ddx_down vs exon ddx_down_ctcf # ######################################################## python3 -m src.bed_handler.get_other_exon_in_same_gene -b results/bed_file/CTCF_2000_both_ddx_down_with0_exon.bed -d 2000 -o oexon_2000_CTCF_2000_both_ddx_down_with0_exon.bed python3 -m src.bed_handler.get_other_exon_in_same_gene -b results/bed_file/Far_CTCF_2000_both_ddx_down_with0_exon.bed -d 2000 -o oexon_2000_Far_CTCF_2000_both_ddx_down_with0_exon.bed exps=(siCTRL siDDX) for exp in ${exps[*]}; do python3 -m src.visu \ --design data/design_exp_${exp}.txt \ --bw_folder data/bigwig/ \ --region_bed results/bed_file/oexon_2000_CTCF_2000_both_ddx_down_with0_gene.bed results/bed_file/oexon_2000_Far_CTCF_2000_both_ddx_down_with0_gene.bed \ --region_name other_ddx_down_ctcf other_ddx_down \ --output results/figures/ \ --border_name start_gene end_gene \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm "None" mv results/figures/metagene_other_ddx_down_ctcf-other_ddx_down_100bin_10000_nt-around-25-bin.pdf results/figures/exp_${exp}_metagene_other_ddx_down_ctcf-other_ddx_down_100bin_10000_nt-around-25-bin_gene.pdf python3 -m src.visu \ --design data/design_exp_${exp}.txt \ --bw_folder data/bigwig/ \ --region_bed results/bed_file/oexon_2000_CTCF_2000_both_ddx_down_with0_gene.bed results/bed_file/oexon_2000_Far_CTCF_2000_both_ddx_down_with0_gene.bed \ --region_name other_ddx_down_ctcf other_ddx_down \ --output results/figures/ \ --border_name start_gene end_gene \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm "0" mv results/figures/metagene_other_ddx_down_ctcf-other_ddx_down_100bin_10000_nt-around-25-bin_b0_norm.pdf results/figures/exp_${exp}_metagene_other_ddx_down_ctcf-other_ddx_down_100bin_10000_nt-around-25-bin_gene_b0_norm.pdf python3 -m src.visu \ --design data/design_exp_${exp}.txt \ --bw_folder data/bigwig/ \ --region_bed results/bed_file/oexon_2000_CTCF_2000_both_ddx_down_with0_gene-dup.bed results/bed_file/oexon_2000_Far_CTCF_2000_both_ddx_down_with0_gene-dup.bed \ --region_name other_ddx_down_ctcf other_ddx_down \ --output results/figures/ \ --border_name start_gene end_gene \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm "0" rm results/figures/metagene_other_ddx_down_ctcf-other_ddx_down_100bin_10000_nt-around-25-bin_b0_norm.pdf python3 -m src.visu \ --design data/design_exp_${exp}.txt \ --bw_folder data/bigwig/ \ --region_bed results/bed_file/oexon_2000_CTCF_2000_both_ddx_down_with0_exon.bed results/bed_file/oexon_2000_Far_CTCF_2000_both_ddx_down_with0_exon.bed \ --region_name other_ddx_down_ctcf other_ddx_down \ --output results/figures/ \ --border_name start_exon end_exon \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm "results/figures/coef_table/tmp_cov_table_design_exp_${exp}_oexon_2000_CTCF_2000_both_ddx_down_with0_gene-gene-dup-oexon_2000_Far_CTCF_2000_both_ddx_down_with0_exon-gene-dup_100bin_10000_nt-around-25-bin_bin0_norm.txt" mv results/figures/metagene_other_ddx_down_ctcf-other_ddx_down_100bin_10000_nt-around-25-bin_file_norm.pdf results/figures/exp_${exp}_metagene_other_ddx_down_ctcf-other_ddx_down_100bin_10000_nt-around-25-bin_file_norm_exon.pdf done ########################################################### # Figures siPP vs siCTRL pour ddx_down_ctcf, # other_ddx_down_ctcf, ddx_down ########################################################### list_names=(ddx_down_ctcf other_ddx_down_ctcf ddx_down) bed_names=(CTCF_2000_both_ddx_down_with0_exon.bed oexon_2000_CTCF_2000_both_ddx_down_with0_exon.bed Far_CTCF_2000_both_ddx_down_with0_exon.bed) for i in ${!list_names[*]}; do cname=${list_names[i]} bed=${bed_names[i]} gbed=${bed/exon\.bed/gene-dup.bed} nbed=${gbed/\.bed/} python3 -m src.visu \ --design data/design_exp_all_replicates.txt \ --bw_folder data/bigwig/ \ --region_bed results/bed_file/${gbed} \ --region_name ${cname} \ --output results/figures/ \ --border_name TSS TTS \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm '0' rm results/figures/metagene_${cname}_100bin_10000_nt-around-25-bin_b0_norm.pdf python3 -m src.visu \ --design data/design_exp_all_replicates.txt \ --bw_folder data/bigwig/ \ --region_bed results/bed_file/${bed} \ --region_name ${cname} \ --output results/figures/ \ --border_name " " " " \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 30 \ -y 0.15 0.4 \ --norm "results/figures/coef_table/tmp_cov_table_design_exp_all_replicates_${nbed}_100bin_10000_nt-around-25-bin_bin0_norm.txt" mv results/figures/metagene_${cname}_30bin_10000_nt-around-25-bin_file_norm.pdf results/figures/all_replicates_metagene_${cname}_100bin_10000_nt-around-25-bin_file_norm.pdf done ## Recap python3 -m src.visu \ --design data/design_exp_all_replicates.txt \ --bw_folder data/bigwig/ \ --region_bed results/bed_file/CTCF_2000_both_ddx_down_with0_gene-dup.bed results/bed_file/oexon_2000_CTCF_2000_both_ddx_down_with0_gene-dup.bed results/bed_file/Far_CTCF_2000_both_ddx_down_with0_gene-dup.bed \ --region_name ddx_down_ctcf other_ddx_down_ctcf ddx_down \ --output results/figures/ \ --border_name TSS TTS \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm '0' rm results/figures/metagene_ddx_down_ctcf-other_ddx_down_ctcf-ddx_down_100bin_10000_nt-around-25-bin_b0_norm.pdf python3 -m src.visu \ --design data/design_exp_all_replicates.txt \ --bw_folder data/bigwig/ \ --region_bed results/bed_file/CTCF_2000_both_ddx_down_with0_exon.bed results/bed_file/oexon_2000_CTCF_2000_both_ddx_down_with0_exon.bed results/bed_file/Far_CTCF_2000_both_ddx_down_with0_exon.bed \ --region_name ddx_down_ctcf other_ddx_down_ctcf ddx_down \ --output results/figures/ \ --border_name exon_start exon_stop \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm 'results/figures/coef_table/tmp_cov_table_design_exp_all_replicates_CTCF_2000_both_ddx_down_with0_gene-dup-oexon_2000_CTCF_2000_both_ddx_down_with0_gene-dup-Far_CTCF_2000_both_ddx_down_with0_gene-dup_100bin_10000_nt-around-25-bin_bin0_norm.txt' mv results/figures/metagene_ddx_down_ctcf-other_ddx_down_ctcf-ddx_down_100bin_10000_nt-around-25-bin_file_norm.pdf results/figures/all_replicates_metagene_ddx_down_ctcf-other_ddx_down_ctcf-ddx_down_100bin_10000_nt-around-25-bin_file_norm.pdf ############################################################################### # Bigwig from MCF7 rnaseq ############################################################################### # Create a bed file containing all gene containing one DDX-down exons echo -e "#ref\tstart\tend\tid\tscore\tstrand" > results/bed_file/ddx_down_gene.bed cat results/bed_file/CTCF_2000_both_ddx_down_with0_gene.bed \ results/bed_file/Far_CTCF_2000_both_ddx_down_with0_gene.bed | \ sort -u | \ grep -v "#ref" >> results/bed_file/ddx_down_gene.bed # Zoom on the 1500 pb at the beginning of the exons. python3 -m src.bed_handler.bed_resize \ -b results/bed_file/ddx_down_gene.bed \ -s 1500 \ -o ddx_down_gene_size1500.bed python3 -m src.visu \ --design data/bam_mcf7_rnaseq/design_exp_all_replicates.txt \ --bw_folder data/bam_mcf7_rnaseq \ --region_bed results/bed_file/ddx_down_gene.bed \ --region_name all_ddx_down \ --output results/figures/ \ --border_name TSS TTS \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm 'None' mv results/figures/metagene_all_ddx_down_100bin_10000_nt-around-25-bin.pdf results/figures/metagene_MCF_rnaseq_all_ddx_down.pdf python3 -m src.visu \ --design data/bam_mcf7_rnaseq/design_exp_all_replicates.txt \ --bw_folder data/bam_mcf7_rnaseq \ --region_bed results/bed_file/ddx_down_gene_size1500.bed \ --region_name all_ddx_down \ --output results/figures/ \ --border_name TSS TSS_1.5kb \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm 'None' mv results/figures/metagene_all_ddx_down_100bin_10000_nt-around-25-bin.pdf results/figures/metagene_MCF_rnaseq_all_ddx_down_tss1.5kb.pdf #### Readthrough # Create readthrough bed files python3 -m src.bed_handler.filter_bed -b data/bed/gene.bed -f data/readthrough_gene.txt -c score -o readthrough_gene.bed python3 -m src.bed_handler.filter_bed -b data/bed/gene.bed -f data/readthrough_gene.txt -c score -o no_readthrough_gene.bed -k 'n' # Filtering only expressed genes with basemean > 5. python3 -m src.bed_handler.filter_bed -b results/bed_file/readthrough_gene.bed -f data/5y_expressed_genes_basemean\>5.txt -c id -o readthrough_expressed_gene.bed python3 -m src.bed_handler.filter_bed -b results/bed_file/no_readthrough_gene.bed -f data/5y_expressed_genes_basemean\>5.txt -c id -o no_readthrough_expressed_gene.bed # Create bed file corresponding to the 10kb downstream regions of the previous beds python3 -m src.bed_handler.bed_resize \ -b results/bed_file/no_readthrough_expressed_gene.bed \ -s 10000 \ -r "end" \ -t "outer" \ -o no_readthrough_expressed_gene_10kb.bed python3 -m src.bed_handler.bed_resize \ -b results/bed_file/readthrough_expressed_gene.bed \ -s 10000 \ -r "end" \ -t "outer" \ -o readthrough_expressed_gene_10kb.bed exps=(all_replicates siCTRL siDDX) bins=(0 99) for exp in ${exps[*]}; do for bin in ${bins[*]}; do python3 -m src.visu \ --design data/design_exp_${exp}.txt \ --bw_folder data/bigwig/ \ --region_bed results/bed_file/readthrough_expressed_gene.bed results/bed_file/no_readthrough_expressed_gene.bed \ --region_name readthrough no_readthrough \ --output results/figures/ \ --border_name TSS TTS \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm ${bin} mv results/figures/metagene_readthrough-no_readthrough_100bin_10000_nt-around-25-bin_b${bin}_norm.pdf results/figures/${exp}_metagene_readthrough-no_readthrough_100bin_10000_nt-around-25-bin_b${bin}_norm.pdf done done exps=(all_replicates siCTRL siDDX) bins=(0 99) loc=(TSS TTS) for exp in ${exps[*]}; do for i in ${!bins[@]}; do python3 -m src.visu \ --design data/design_exp_${exp}.txt \ --bw_folder data/bigwig/ \ --region_bed results/bed_file/readthrough_expressed_gene_10kb.bed results/bed_file/no_readthrough_expressed_gene_10kb.bed \ --region_name readthrough no_readthrough \ --output results/figures/ \ --border_name TTS '' \ --environment 0 0 \ --show_replicate n \ --figure_type metagene \ --nb_bin 25 \ --norm "results/figures/coef_table/tmp_cov_table_design_exp_${exp}_readthrough_expressed_gene-no_readthrough_expressed_gene_100bin_10000_nt-around-25-bin_bin${bins[$i]}_norm.txt" mv results/figures/metagene_readthrough-no_readthrough_25bin_0_nt-around-0-bin_file_norm.pdf results/figures/${exp}_TTS10kb_readthrough-no_readthrough_${loc[$i]}_norm.pdf done done # Figure 1C names=(IP) designs=(data/designnew_exp_all_replicates_IP.txt) bins=(99) # 0) beds=(readthrough) # no_readthrough) for bed in ${beds[*]}; do for bin in ${bins[*]}; do for i in ${!designs[@]}; do python3 -m src.visu \ --design ${designs[i]} \ --bw_folder data/bigwig_newnorm/ \ --region_bed results/bed_file/${bed}_expressed_gene.bed \ --region_name ${bed} \ --output results/figures/ \ --border_name TSS TTS \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm ${bin} \ --stat True mv results/figures/metagene_${bed}_100bin_10000_nt-around-25-bin_b${bin}_norm.pdf results/figures/Fig1C_${bed}_b${bin}_norm_${names[$i]}.pdf done done done names=(IP) designs=(data/designnew_exp_all_replicates_IP.txt) beds=(readthrough) # no_readthrough) bins=(99) # 0) loc=(TTS) # TSS) for bed in ${beds[*]}; do for i in ${!bins[@]}; do for j in ${!designs[@]}; do python3 -m src.visu \ --design ${designs[i]} \ --bw_folder data/bigwig_newnorm/ \ --region_bed results/bed_file/${bed}_expressed_gene_10kb.bed \ --region_name ${bed} \ --output results/figures/ \ --border_name TTS '' \ --environment 0 0 \ --show_replicate n \ --figure_type metagene \ --nb_bin 25 \ --norm "results/figures/coef_table/tmp_cov_table_designnew_exp_all_replicates_${names[$j]}_${bed}_expressed_gene_100bin_10000_nt-around-25-bin_bin${bins[$i]}_norm.txt" \ --stat True mv results/figures/metagene_${bed}_25bin_0_nt-around-0-bin_file_norm.pdf results/figures/Fig_1C_TTS10kb_${bed}_${loc[$i]}-bin_norm_${names[$j]}.pdf done done done # Graphics ############################## # SHY5Y # ############################## # Creating a bed file only containing expressed gene in 5y cells python3 -m src.bed_handler.filter_bed \ -b data/bed/gene.bed \ -f data/5y_expressed_genes_basemean\>5.txt \ -c id \ -o 5y_expressed_gene.bed # Create a bed file containing bins=('None' 0 99) bin_names=('' '_b0_norm' '_b99_norm') for i in ${!bins[@]}; do python3 -m src.visu \ --design data/bigwig_SHY5Y/design_exp_all_replicates.txt \ --bw_folder data/bigwig_SHY5Y/ \ --region_bed results/bed_file/5y_expressed_gene.bed \ --region_name all_expressed_gene \ --output results/figures/ \ --border_name TSS TTS \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm ${bins[$i]} mv results/figures/metagene_all_expressed_gene_100bin_10000_nt-around-25-bin${bin_names[$i]}.pdf results/figures/metagene_5y_rnaseq_all_expressed_gene${bin_names[$i]}.pdf done tts_sizes=(10000 50000) bins=(25 25) for i in ${!tts_sizes[@]}; do size=${tts_sizes[$i]} bin=${bins[$i]} kb_size=$(python -c "print(int(${size}/1000)) if ${size}/1000 == int(${size}/1000) else print(${size}/1000)") # Create a bed file containing all gene containing one DDX-down exons python3 -m src.bed_handler.bed_resize \ -b results/bed_file/5y_expressed_gene.bed \ -s ${size} \ -r "end" \ -t "outer" \ -o all_expressed_gene_end${size}.bed python3 -m src.visu \ --design data/bigwig_SHY5Y/design_exp_all_replicates.txt \ --bw_folder data/bigwig_SHY5Y/ \ --region_bed results/bed_file/all_expressed_gene_end${size}.bed \ --region_name all_expressed_gene \ --output results/figures/ \ --border_name TTS '' \ --environment 0 0 \ --show_replicate n \ --figure_type metagene \ --nb_bin ${bin} \ --norm "results/figures/coef_table/tmp_cov_table_design_exp_all_replicates_5y_expressed_gene_100bin_10000_nt-around-25-bin_bin99_norm.txt" mv results/figures/metagene_all_expressed_gene_${bin}bin_0_nt-around-0-bin_file_norm.pdf results/figures/metagene_5y_rnaseq_TTS2${kb_size}kb_expressed_gene_${bin}bin_bin_TTS_norm.pdf done # 2kb region after TSS python3 -m src.bed_handler.bed_resize \ -b data/bed/gene.bed \ -s 2000 \ -r "start" \ -o all_gene_TSS-2kb.bed python3 -m src.visu \ --design data/bigwig_SHY5Y/design_exp_all_replicates.txt \ --bw_folder data/bigwig_SHY5Y/ \ --region_bed results/bed_file/all_gene_TSS-2kb.bed \ --region_name all_gene \ --output results/figures/ \ --border_name TSS '' \ --environment 0 0 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm "None" \ --stat True mv results/figures/metagene_all_gene_100bin_0_nt-around-0-bin.pdf results/figures/metagene_5y_rnaseq_TSS-2kb_all_gene.pdf ##################################################### # Metagene figure of last exons in readthrough genes near (<=2000 nt) or far (> 2000nt) from a CTCF file and last exons from non readthrough genes # figures 5C ##################################################### # Bed file containing the last exons of expressed gene with readthrough python3 -m src.bed_handler.get_last_exons -g results/bed_file/readthrough_expressed_gene.bed -o results/bed_file/readthrough_expressed_last_exon.bed # Bed file containing the last exons of expressed gene without readthrough python3 -m src.bed_handler.get_last_exons -g results/bed_file/no_readthrough_expressed_gene.bed -o results/bed_file/no_readthrough_expressed_last_exon.bed # Bed file containing the last exon in expressed genes with readthrough near CTCF (<=2000nt) python3 -m src.bed_handler.select_regulated_near_ctcf_exons -e results/bed_file/readthrough_expressed_last_exon.bed -t 2000 -l both -i True -N True -n readthrough_last_exon # Bed file containing the last exon in expressed genes with readthrough near CTCF (>2000nt) python3 -m src.bed_handler.select_regulated_near_ctcf_exons -e results/bed_file/readthrough_expressed_last_exon.bed -t 2000 -l both -i True -N False -n readthrough_last_exon cp results/bed_file/no_readthrough_expressed_gene.bed results/bed_file/no_readthrough_expressed_last_gene-dup.bed list_names=(readthrough_ctcf readthrough no_readthrough) bed_names=(readthrough_last_exon_near_CTCF_2000_both_ddx_with0_exon.bed readthrough_last_exon_far_CTCF_2000_both_ddx_with0_exon.bed no_readthrough_expressed_last_exon.bed) for i in ${!list_names[*]}; do cname=${list_names[i]} bed=${bed_names[i]} gbed=${bed/exon\.bed/gene-dup.bed} nbed=${gbed/\.bed/} python3 -m src.visu \ --design data/designnew_exp_all_replicates_IP.txt \ --bw_folder data/bigwig_newnorm \ --region_bed results/bed_file/${gbed} \ --region_name ${cname} \ --output results/figures/ \ --border_name TSS TTS \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm '0' rm results/figures/metagene_${cname}_100bin_10000_nt-around-25-bin_b0_norm.pdf python3 -m src.visu \ --design data/designnew_exp_all_replicates_IP.txt \ --bw_folder data/bigwig_newnorm \ --region_bed results/bed_file/${bed} \ --region_name ${cname} \ --output results/figures/ \ --border_name " " " " \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 30 \ --stat True \ -y 0.15 0.4 \ --norm "results/figures/coef_table/tmp_cov_table_designnew_exp_all_replicates_IP_${nbed}_100bin_10000_nt-around-25-bin_bin0_norm.txt" \ --stat True mv results/figures/metagene_${cname}_30bin_10000_nt-around-25-bin_file_norm.pdf results/figures/all_replicates_metaexon_${cname}_30bin_10000_nt-around-25-bin_file_norm.pdf done ########################################################### # Figures siPP vs siCTRL pour ddx_down_ctcf, # , ddx_down figure 5B ########################################################### list_names=(ddx_down_ctcf ddx_down) bed_names=(CTCF_2000_both_ddx_down_with0_exon.bed Far_CTCF_2000_both_ddx_down_with0_exon.bed) for i in ${!list_names[*]}; do cname=${list_names[i]} bed=${bed_names[i]} gbed=${bed/exon\.bed/gene-dup.bed} nbed=${gbed/\.bed/} python3 -m src.visu \ --design data/designnew_exp_all_replicates_IP.txt \ --bw_folder data/bigwig_newnorm/ \ --region_bed results/bed_file/${gbed} \ --region_name ${cname} \ --output results/figures/ \ --border_name TSS TTS \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 100 \ --norm '0' rm results/figures/metagene_${cname}_100bin_10000_nt-around-25-bin_b0_norm.pdf python3 -m src.visu \ --design data/designnew_exp_all_replicates_IP.txt \ --bw_folder data/bigwig_newnorm/ \ --region_bed results/bed_file/${bed} \ --region_name ${cname} \ --output results/figures/ \ --border_name " " " " \ --environment 10000 25 \ --show_replicate n \ --figure_type metagene \ --nb_bin 30 \ -y 0.15 0.4 \ --stat True \ --norm "results/figures/coef_table/tmp_cov_table_designnew_exp_all_replicates_IP_${nbed}_100bin_10000_nt-around-25-bin_bin0_norm.txt" \ --stat True mv results/figures/metagene_${cname}_30bin_10000_nt-around-25-bin_file_norm.pdf results/figures/all_replicates_metagene_${cname}_100bin_10000_nt-around-25-bin_file_norm.pdf done python3 -m src.gc_content -B results/bed_file/readthrough_last_exon_near_CTCF_2000_both_ddx_with0_exon.bed results/bed_file/readthrough_last_exon_far_CTCF_2000_both_ddx_with0_exon.bed results/bed_file/no_readthrough_expressed_last_exon.bed -b readthrough_ctcf readthrough no_readthrough -g data/Homo_sapiens.GRCh37.dna.primary_assembly.fa -f "exons" -e 2000