Skip to content
Snippets Groups Projects
commands.sh 34.4 KiB
Newer Older
#!/bin/bash

python3 -m src.bed_handler
python3 -m src.bed_handler.filter_bed \
  --bed_file data/bed/gene.bed \
  --filter_file data/gene_5-3_loop_tot.txt \
  --col_name 'id' \
  --outfile gene_with_5-3p_loop_tot.txt

python3 -m src.bed_handler.filter_bed \
  --bed_file data/bed/gene.bed \
  --filter_file data/gene_5-3_loop_ctrl.txt \
  --col_name 'id' \
  --outfile gene_with_5-3p_loop_ctrl.txt

python3 -m src.bed_handler.filter_bed \
  --bed_file data/bed/gene.bed \
  --filter_file data/gene_5-3_loop_siPPdown.txt \
  --col_name 'id' \
  --outfile gene_with_5-3p_loop_siPPdown.txt

python3 -m src.bed_handler.filter_bed \
  --bed_file data/bed/gene.bed \
  --filter_file data/gene_without_5-3_loop_siPPdown.txt \
  --col_name 'id' \
  --outfile gene_without_5-3p_loop_siPPdown.txt

python3 -m src.bed_handler.filter_bed \
  --bed_file data/bed/gene.bed \
  --filter_file data/gene_without_loops_siPPdown.txt \
  --col_name 'id' \
  --outfile gene_without_loops_siPPdown.txt

mkdir results/figures

array=(all_replicates rep1 rep2 rep3)
for myrep in ${array[*]}; do
	python3 -m src.visu \
		--design data/design_exp_${myrep}.txt \
		--bw_folder data/bigwig/ \
		--region_bed results/bed_file/filtered_gene.bed \
		--region_name gene \
		--output results/figures/  \
		--border_name TSS TTS \
		--environment 10000 25 \
		--show_replicate n \
		--figure_type metagene \
		--nb_bin 100 \
		--norm 'None'

	python3 -m src.visu \
		--design data/design_exp_${myrep}.txt \
		--bw_folder data/bigwig/ \
		--region_bed results/bed_file/filtered_gene.bed \
		--region_name gene \
		--output results/figures/  \
		--border_name TSS TTS \
		--environment 10000 25 \
		--show_replicate n \
		--figure_type metagene \
		--nb_bin 100 \
		--norm '0'
	mv results/figures/metagene_gene_100bin_10000_nt-around-25-bin_b0_norm.pdf results/figures/metagene_gene_100bin_10000_nt-around-25-bin_b0_norm_${myrep}.pdf
	mv results/figures/metagene_gene_100bin_10000_nt-around-25-bin.pdf results/figures/metagene_gene_100bin_10000_nt-around-25-bin_${myrep}.pdf
done



array=(all_replicates rep1 rep2 rep3)
for myrep in ${array[*]}; do
	python3 -m src.visu \
		--design data/design_exp_${myrep}.txt \
		--bw_folder data/bigwig/ \
		--region_bed results/bed_file/gene_with_5-3p_loop_siPPdown.txt \
		--region_name gene \
		--output results/figures/  \
		--border_name TSS TTS \
		--environment 10000 25 \
		--show_replicate n \
		--figure_type metagene \
		--nb_bin 100 \
		--norm 'None'

	python3 -m src.visu \
		--design data/design_exp_${myrep}.txt \
		--bw_folder data/bigwig/ \
		--region_bed results/bed_file/gene_with_5-3p_loop_siPPdown.txt \
		--region_name gene \
		--output results/figures/  \
		--border_name TSS TTS \
		--environment 10000 25 \
		--show_replicate n \
		--figure_type metagene \
		--nb_bin 100 \
		--norm '0'
	mv results/figures/metagene_gene_100bin_10000_nt-around-25-bin_b0_norm.pdf results/figures/metagene_gene_100bin_10000_nt-around-25-bin_b0_norm_${myrep}_5-3-loop.pdf
	mv results/figures/metagene_gene_100bin_10000_nt-around-25-bin.pdf results/figures/metagene_gene_100bin_10000_nt-around-25-bin_${myrep}_5-3-loop.pdf
done


for gene_bed in $(ls results/bed_file/CTCF*gene.bed); do
	exon_bed=${gene_bed/gene/exon}
	exon_name=(${exon_bed//\// })
	full_name=${exon_name[-1]/.bed/}
	full_name=${full_name/exon/gene-dup}
	file_name=${exon_name[-1]/.bed/}
	gene_bed=${gene_bed/.bed/-dup.bed}

	python3 -m src.visu \
		--design data/design_exp_all_replicates.txt \
		--bw_folder data/bigwig/ \
		--region_bed ${exon_bed} \
		--region_name exon \
		--output results/figures/  \
		--border_name start_exon end_exon \
		--environment 10000 25 \
		--show_replicate n \
		--figure_type metagene \
		--nb_bin 100 \
		--norm 'None'
	mv results/figures/metagene_exon_100bin_10000_nt-around-25-bin.pdf results/figures/${file_name}_metagene_exon_100bin_10000_nt-around-25-bin.pdf

	python3 -m src.visu \
		--design data/design_exp_all_replicates.txt \
		--bw_folder data/bigwig/ \
		--region_bed ${gene_bed} \
		--region_name gene \
		--output results/figures/  \
		--border_name start_exon end_exon \
		--environment 10000 25 \
		--show_replicate n \
		--figure_type metagene \
		--nb_bin 100 \
		--norm '0'
	rm results/figures/metagene_gene_100bin_10000_nt-around-25-bin_b0_norm.pdf 

	python3 -m src.visu \
		--design data/design_exp_all_replicates.txt \
		--bw_folder data/bigwig/ \
		--region_bed ${exon_bed} \
		--region_name exon \
		--output results/figures/  \
		--border_name start_exon end_exon \
		--environment 10000 25 \
		--show_replicate n \
		--figure_type metagene \
		--nb_bin 100 \
		--norm "results/figures/coef_table/tmp_cov_table_design_exp_all_replicates_${full_name}_100bin_10000_nt-around-25-bin_bin0_norm.txt"
	mv results/figures/metagene_exon_100bin_10000_nt-around-25-bin_file_norm.pdf results/figures/${file_name}_metagene_exon_100bin_10000_nt-around-25-bin_file_norm.pdf
	

done

########################################################
# Condition siPP/siCTRL - exon ddx_down vs exon ddx_down_ctcf #
########################################################
# exon ddx_down_ctcf corresponds to exons down-regulated by ddx5/17 near a ctcf site and
# exon ddx_down corresponds to exons down-regulated by ddx5/17 far from ctcf sites

exps=(siCTRL siDDX)
for exp in ${exps[*]}; do
python3 -m src.visu \
	--design data/design_exp_${exp}.txt \
	--bw_folder data/bigwig/ \
	--region_bed results/bed_file/CTCF_2000_both_ddx_down_with0_gene.bed results/bed_file/Far_CTCF_2000_both_ddx_down_with0_gene.bed \
	--region_name ddx_down_ctcf ddx_down \
	--output results/figures/  \
	--border_name start_gene end_gene \
	--environment 10000 25 \
	--show_replicate n \
	--figure_type metagene \
	--nb_bin 100 \
	--norm "None"

mv results/figures/metagene_ddx_down_ctcf-ddx_down_100bin_10000_nt-around-25-bin.pdf results/figures/exp_${exp}_metagene_ddx_down_ctcf-ddx_down_100bin_10000_nt-around-25-bin_gene.pdf


python3 -m src.visu \
	--design data/design_exp_${exp}.txt \
	--bw_folder data/bigwig/ \
	--region_bed results/bed_file/CTCF_2000_both_ddx_down_with0_gene.bed results/bed_file/Far_CTCF_2000_both_ddx_down_with0_gene.bed \
	--region_name ddx_down_ctcf ddx_down \
	--output results/figures/  \
	--border_name start_gene end_gene \
	--environment 10000 25 \
	--show_replicate n \
	--figure_type metagene \
	--nb_bin 100 \
	--norm "0"

mv results/figures/metagene_ddx_down_ctcf-ddx_down_100bin_10000_nt-around-25-bin_b0_norm.pdf results/figures/exp_${exp}_metagene_ddx_down_ctcf-ddx_down_100bin_10000_nt-around-25-bin_b0_norm_gene.pdf


python3 -m src.visu \
	--design data/design_exp_${exp}.txt \
	--bw_folder data/bigwig/ \
	--region_bed results/bed_file/CTCF_2000_both_ddx_down_with0_gene-dup.bed results/bed_file/Far_CTCF_2000_both_ddx_down_with0_gene-dup.bed \
	--region_name ddx_down_ctcf ddx_down \
	--output results/figures/  \
	--border_name start_gene end_gene \
	--environment 10000 25 \
	--show_replicate n \
	--figure_type metagene \
	--nb_bin 100 \
	--norm "0"

rm results/figures/metagene_ddx_down_ctcf-ddx_down_100bin_10000_nt-around-25-bin_b0_norm.pdf


python3 -m src.visu \
	--design data/design_exp_${exp}.txt \
	--bw_folder data/bigwig/ \
	--region_bed results/bed_file/CTCF_2000_both_ddx_down_with0_exon.bed results/bed_file/Far_CTCF_2000_both_ddx_down_with0_exon.bed \
	--region_name ddx_down_ctcf ddx_down \
	--output results/figures/  \
	--border_name start_exon end_exon \
	--environment 10000 25 \
	--show_replicate n \
	--figure_type metagene \
	--nb_bin 100 \
	--norm "results/figures/coef_table/tmp_cov_table_design_exp_${exp}_CTCF_2000_both_ddx_down_with0_gene-dup-Far_CTCF_2000_both_ddx_down_with0_gene-dup_100bin_10000_nt-around-25-bin_bin0_norm.txt"

mv results/figures/metagene_ddx_down_ctcf-ddx_down_100bin_10000_nt-around-25-bin_file_norm.pdf results/figures/exp_${exp}_metagene_ddx_down_ctcf-ddx_down_100bin_10000_nt-around-25-bin_file_norm_exon.pdf
done

########################################################
# Condition siPP and siCTRL - ddx_down_5-3              #
########################################################

python3 -m src.visu \
	--design data/design_exp_all_replicates.txt \
	--bw_folder data/bigwig/ \
	--region_bed results/bed_file/gene_with_5-3p_loop_siPPdown.txt \
	--region_name ddx_down_5-3 \
	--output results/figures/  \
	--border_name start_gene end_gene \
	--environment 10000 25 \
	--show_replicate n \
	--figure_type metagene \
	--nb_bin 100 \
	--norm "0"

mv results/figures/metagene_ddx_down_5-3_100bin_10000_nt-around-25-bin_b0_norm.pdf results/figures/exp_all_metagene_ddx_down_5-3_100bin_10000_nt-around-25-bin_b0_norm_gene.pdf

########################################################
# Condition siPP - ddx_down_5-3 vs exon ddx_down #
########################################################
# gene ddx_down_5-3 corresponds to genes containing at least one exons down-regulated by ddx5/17 and having a 5'-3' loop and
# gene ddx_down corresponds to genes containing exons down-regulated by ddx but without a 5'-3' loop


python3 -m src.visu \
	--design data/design_exp_siDDX.txt \
	--bw_folder data/bigwig/ \
	--region_bed results/bed_file/gene_with_5-3p_loop_siPPdown.txt results/bed_file/gene_without_loops_siPPdown.txt \
	--region_name ddx_down_5-3 ddx_down \
	--output results/figures/  \
	--border_name start_gene end_gene \
	--environment 10000 25 \
	--show_replicate n \
	--figure_type metagene \
	--nb_bin 100 \
	--norm "0"

mv results/figures/metagene_ddx_down_5-3-ddx_down_100bin_10000_nt-around-25-bin_b0_norm.pdf results/figures/exp_siPP_metagene_ddx_down_5-3-ddx_down_100bin_10000_nt-around-25-bin_b0_norm_gene.pdf

python3 -m src.visu \
	--design data/design_exp_siDDX.txt \
	--bw_folder data/bigwig/ \
	--region_bed results/bed_file/gene_with_5-3p_loop_siPPdown.txt results/bed_file/gene_without_loops_siPPdown.txt \
	--region_name ddx_down_5-3 ddx_down \
	--output results/figures/  \
	--border_name start_gene end_gene \
	--environment 10000 25 \
	--show_replicate n \
	--figure_type metagene \
	--nb_bin 100 \
	--norm "None"

mv results/figures/metagene_ddx_down_5-3-ddx_down_100bin_10000_nt-around-25-bin.pdf results/figures/exp_siPP_metagene_ddx_down_5-3-ddx_down_100bin_10000_nt-around-25-bin_gene.pdf

rep=(1 2 3)
for i in ${rep[*]}; do
  python3 -m src.visu \
    --design data/design_exp_siDDX_rep${i}.txt \
    --bw_folder data/bigwig/ \
    --region_bed results/bed_file/gene_with_5-3p_loop_siPPdown.txt results/bed_file/gene_without_loops_siPPdown.txt \
    --region_name ddx_down_5-3 ddx_down \
    --output results/figures/  \
    --border_name start_gene end_gene \
    --environment 10000 25 \
    --show_replicate n \
    --figure_type metagene \
    --nb_bin 100 \
    --norm "0"

  mv results/figures/metagene_ddx_down_5-3-ddx_down_100bin_10000_nt-around-25-bin_b0_norm.pdf results/figures/exp_siPP_rep${i}_metagene_ddx_down_5-3-ddx_down_100bin_10000_nt-around-25-bin_b0_normgene.pdf
done

########################################################
# Condition siCTRL - ddx_down_5-3 vs exon ddx_down #
########################################################
# gene ddx_down_5-3 corresponds to genes containing at least one exons down-regulated by ddx5/17 and having a 5'-3' loop and
# gene ddx_down corresponds to genes containing exons down-regulated by ddx but without a 5'-3' loop

python3 -m src.visu \
	--design data/design_exp_siCTRL.txt \
	--bw_folder data/bigwig/ \
	--region_bed results/bed_file/gene_with_5-3p_loop_siPPdown.txt results/bed_file/gene_without_loops_siPPdown.txt \
	--region_name ddx_down_5-3 ddx_down \
	--output results/figures/  \
	--border_name start_gene end_gene \
	--environment 10000 25 \
	--show_replicate n \
	--figure_type metagene \
	--nb_bin 100 \
	--norm "0"

mv results/figures/metagene_ddx_down_5-3-ddx_down_100bin_10000_nt-around-25-bin_b0_norm.pdf results/figures/exp_siCTRL_metagene_ddx_down_5-3-ddx_down_100bin_10000_nt-around-25-bin_b0_norm_gene.pdf

python3 -m src.visu \
	--design data/design_exp_siCTRL.txt \
	--bw_folder data/bigwig/ \
	--region_bed results/bed_file/gene_with_5-3p_loop_siPPdown.txt results/bed_file/gene_without_loops_siPPdown.txt \
	--region_name ddx_down_5-3 ddx_down \
	--output results/figures/  \
	--border_name start_gene end_gene \
	--environment 10000 25 \
	--show_replicate n \
	--figure_type metagene \
	--nb_bin 100 \
	--norm "None"

mv results/figures/metagene_ddx_down_5-3-ddx_down_100bin_10000_nt-around-25-bin.pdf results/figures/exp_siCTRL_metagene_ddx_down_5-3-ddx_down_100bin_10000_nt-around-25-bin_gene.pdf

rep=(1 2 3)
for i in ${rep[*]}; do
  python3 -m src.visu \
    --design data/design_exp_siCTRL_rep${i}.txt \
    --bw_folder data/bigwig/ \
    --region_bed results/bed_file/gene_with_5-3p_loop_siPPdown.txt results/bed_file/gene_without_loops_siPPdown.txt \
    --region_name ddx_down_5-3 ddx_down \
    --output results/figures/  \
    --border_name start_gene end_gene \
    --environment 10000 25 \
    --show_replicate n \
    --figure_type metagene \
    --nb_bin 100 \
    --norm "0"

  mv results/figures/metagene_ddx_down_5-3-ddx_down_100bin_10000_nt-around-25-bin_b0_norm.pdf results/figures/exp_siCTRL_rep${i}_metagene_ddx_down_5-3-ddx_down_100bin_10000_nt-around-25-bin_b0_norm_gene.pdf
done



############################################################
#     GC content exon ddx_down vs exon ddx_down_ctcf       #
############################################################


python3 -m src.gc_content -B results/bed_file/Far_CTCF_2000_both_ddx_down_with0_exon.bed results/bed_file/CTCF_2000_both_ddx_down_with0_exon.bed -b ddx_down ddx_down_ctcf -g data/Homo_sapiens.GRCh37.dna.primary_assembly.fa -f "exons" -e 2000



############################################################################
# other Exon in genes containing an exon regulated by DDX
############################################################################

  ########################################################
  # Condition siCTRL - exon ddx_down vs exon ddx_down_ctcf #
  ########################################################



python3 -m src.bed_handler.get_other_exon_in_same_gene -b results/bed_file/CTCF_2000_both_ddx_down_with0_exon.bed -d 2000 -o oexon_2000_CTCF_2000_both_ddx_down_with0_exon.bed
python3 -m src.bed_handler.get_other_exon_in_same_gene -b results/bed_file/Far_CTCF_2000_both_ddx_down_with0_exon.bed -d 2000 -o oexon_2000_Far_CTCF_2000_both_ddx_down_with0_exon.bed

exps=(siCTRL siDDX)
for exp in ${exps[*]}; do
python3 -m src.visu \
	--design data/design_exp_${exp}.txt \
	--bw_folder data/bigwig/ \
	--region_bed results/bed_file/oexon_2000_CTCF_2000_both_ddx_down_with0_gene.bed results/bed_file/oexon_2000_Far_CTCF_2000_both_ddx_down_with0_gene.bed \
	--region_name other_ddx_down_ctcf other_ddx_down \
	--output results/figures/  \
	--border_name start_gene end_gene \
	--environment 10000 25 \
	--show_replicate n \
	--figure_type metagene \
	--nb_bin 100 \
	--norm "None"

mv results/figures/metagene_other_ddx_down_ctcf-other_ddx_down_100bin_10000_nt-around-25-bin.pdf results/figures/exp_${exp}_metagene_other_ddx_down_ctcf-other_ddx_down_100bin_10000_nt-around-25-bin_gene.pdf


python3 -m src.visu \
	--design data/design_exp_${exp}.txt \
	--bw_folder data/bigwig/ \
	--region_bed results/bed_file/oexon_2000_CTCF_2000_both_ddx_down_with0_gene.bed results/bed_file/oexon_2000_Far_CTCF_2000_both_ddx_down_with0_gene.bed \
	--region_name other_ddx_down_ctcf other_ddx_down \
	--output results/figures/  \
	--border_name start_gene end_gene \
	--environment 10000 25 \
	--show_replicate n \
	--figure_type metagene \
	--nb_bin 100 \
	--norm "0"

mv results/figures/metagene_other_ddx_down_ctcf-other_ddx_down_100bin_10000_nt-around-25-bin_b0_norm.pdf results/figures/exp_${exp}_metagene_other_ddx_down_ctcf-other_ddx_down_100bin_10000_nt-around-25-bin_gene_b0_norm.pdf


python3 -m src.visu \
	--design data/design_exp_${exp}.txt \
	--bw_folder data/bigwig/ \
	--region_bed results/bed_file/oexon_2000_CTCF_2000_both_ddx_down_with0_gene-dup.bed results/bed_file/oexon_2000_Far_CTCF_2000_both_ddx_down_with0_gene-dup.bed \
	--region_name other_ddx_down_ctcf other_ddx_down \
	--output results/figures/  \
	--border_name start_gene end_gene \
	--environment 10000 25 \
	--show_replicate n \
	--figure_type metagene \
	--nb_bin 100 \
	--norm "0"

rm results/figures/metagene_other_ddx_down_ctcf-other_ddx_down_100bin_10000_nt-around-25-bin_b0_norm.pdf


python3 -m src.visu \
	--design data/design_exp_${exp}.txt \
	--bw_folder data/bigwig/ \
	--region_bed results/bed_file/oexon_2000_CTCF_2000_both_ddx_down_with0_exon.bed results/bed_file/oexon_2000_Far_CTCF_2000_both_ddx_down_with0_exon.bed \
	--region_name other_ddx_down_ctcf other_ddx_down \
	--output results/figures/  \
	--border_name start_exon end_exon \
	--environment 10000 25 \
	--show_replicate n \
	--figure_type metagene \
	--nb_bin 100 \
	--norm "results/figures/coef_table/tmp_cov_table_design_exp_${exp}_oexon_2000_CTCF_2000_both_ddx_down_with0_gene-gene-dup-oexon_2000_Far_CTCF_2000_both_ddx_down_with0_exon-gene-dup_100bin_10000_nt-around-25-bin_bin0_norm.txt"

mv results/figures/metagene_other_ddx_down_ctcf-other_ddx_down_100bin_10000_nt-around-25-bin_file_norm.pdf results/figures/exp_${exp}_metagene_other_ddx_down_ctcf-other_ddx_down_100bin_10000_nt-around-25-bin_file_norm_exon.pdf
done


###########################################################
# Figures siPP vs siCTRL pour ddx_down_ctcf,
# other_ddx_down_ctcf, ddx_down
###########################################################

list_names=(ddx_down_ctcf other_ddx_down_ctcf ddx_down)
bed_names=(CTCF_2000_both_ddx_down_with0_exon.bed oexon_2000_CTCF_2000_both_ddx_down_with0_exon.bed Far_CTCF_2000_both_ddx_down_with0_exon.bed)
for i in ${!list_names[*]}; do
  cname=${list_names[i]}
  bed=${bed_names[i]}
  gbed=${bed/exon\.bed/gene-dup.bed}
  nbed=${gbed/\.bed/}
  python3 -m src.visu \
      --design data/design_exp_all_replicates.txt \
      --bw_folder data/bigwig/ \
      --region_bed results/bed_file/${gbed} \
      --region_name ${cname} \
      --output results/figures/  \
      --border_name TSS TTS \
      --environment 10000 25 \
      --show_replicate n \
      --figure_type metagene \
      --nb_bin 100 \
      --norm '0'

  rm results/figures/metagene_${cname}_100bin_10000_nt-around-25-bin_b0_norm.pdf

  python3 -m src.visu \
      --design data/design_exp_all_replicates.txt \
      --bw_folder data/bigwig/ \
      --region_bed results/bed_file/${bed} \
      --region_name ${cname} \
      --output results/figures/  \
      --environment 10000 25 \
      --show_replicate n \
      --figure_type metagene \
      --norm "results/figures/coef_table/tmp_cov_table_design_exp_all_replicates_${nbed}_100bin_10000_nt-around-25-bin_bin0_norm.txt"

  mv results/figures/metagene_${cname}_30bin_10000_nt-around-25-bin_file_norm.pdf results/figures/all_replicates_metagene_${cname}_100bin_10000_nt-around-25-bin_file_norm.pdf
done

## Recap

python3 -m src.visu \
    --design data/design_exp_all_replicates.txt \
    --bw_folder data/bigwig/ \
    --region_bed results/bed_file/CTCF_2000_both_ddx_down_with0_gene-dup.bed results/bed_file/oexon_2000_CTCF_2000_both_ddx_down_with0_gene-dup.bed results/bed_file/Far_CTCF_2000_both_ddx_down_with0_gene-dup.bed \
    --region_name ddx_down_ctcf other_ddx_down_ctcf ddx_down \
    --output results/figures/  \
    --border_name TSS TTS \
    --environment 10000 25 \
    --show_replicate n \
    --figure_type metagene \
    --nb_bin 100 \
    --norm '0'

rm results/figures/metagene_ddx_down_ctcf-other_ddx_down_ctcf-ddx_down_100bin_10000_nt-around-25-bin_b0_norm.pdf

python3 -m src.visu \
    --design data/design_exp_all_replicates.txt \
    --bw_folder data/bigwig/ \
    --region_bed results/bed_file/CTCF_2000_both_ddx_down_with0_exon.bed results/bed_file/oexon_2000_CTCF_2000_both_ddx_down_with0_exon.bed results/bed_file/Far_CTCF_2000_both_ddx_down_with0_exon.bed \
    --region_name ddx_down_ctcf other_ddx_down_ctcf ddx_down \
    --output results/figures/  \
    --border_name exon_start exon_stop \
    --environment 10000 25 \
    --show_replicate n \
    --figure_type metagene \
    --nb_bin 100 \
    --norm 'results/figures/coef_table/tmp_cov_table_design_exp_all_replicates_CTCF_2000_both_ddx_down_with0_gene-dup-oexon_2000_CTCF_2000_both_ddx_down_with0_gene-dup-Far_CTCF_2000_both_ddx_down_with0_gene-dup_100bin_10000_nt-around-25-bin_bin0_norm.txt'

mv results/figures/metagene_ddx_down_ctcf-other_ddx_down_ctcf-ddx_down_100bin_10000_nt-around-25-bin_file_norm.pdf results/figures/all_replicates_metagene_ddx_down_ctcf-other_ddx_down_ctcf-ddx_down_100bin_10000_nt-around-25-bin_file_norm.pdf


###############################################################################
#             Bigwig from MCF7 rnaseq
###############################################################################

# Create a bed file containing all gene containing one DDX-down exons
echo -e "#ref\tstart\tend\tid\tscore\tstrand" > results/bed_file/ddx_down_gene.bed
cat results/bed_file/CTCF_2000_both_ddx_down_with0_gene.bed \
  results/bed_file/Far_CTCF_2000_both_ddx_down_with0_gene.bed | \
  sort -u | \
  grep -v "#ref" >> results/bed_file/ddx_down_gene.bed

# Zoom on the 1500 pb at the beginning of the exons.
python3 -m src.bed_handler.bed_resize \
  -b results/bed_file/ddx_down_gene.bed \
  -s 1500 \
  -o ddx_down_gene_size1500.bed


python3 -m src.visu \
    --design data/bam_mcf7_rnaseq/design_exp_all_replicates.txt \
    --bw_folder data/bam_mcf7_rnaseq \
    --region_bed results/bed_file/ddx_down_gene.bed \
    --region_name all_ddx_down \
    --output results/figures/  \
    --border_name TSS TTS \
    --environment 10000 25 \
    --show_replicate n \
    --figure_type metagene \
    --nb_bin 100 \
    --norm 'None'

mv results/figures/metagene_all_ddx_down_100bin_10000_nt-around-25-bin.pdf results/figures/metagene_MCF_rnaseq_all_ddx_down.pdf


python3 -m src.visu \
    --design data/bam_mcf7_rnaseq/design_exp_all_replicates.txt \
    --bw_folder data/bam_mcf7_rnaseq \
    --region_bed results/bed_file/ddx_down_gene_size1500.bed \
    --region_name all_ddx_down \
    --output results/figures/  \
    --border_name TSS TSS_1.5kb \
    --environment 10000 25 \
    --show_replicate n \
    --figure_type metagene \
    --nb_bin 100 \
    --norm 'None'

mv results/figures/metagene_all_ddx_down_100bin_10000_nt-around-25-bin.pdf results/figures/metagene_MCF_rnaseq_all_ddx_down_tss1.5kb.pdf


#### Readthrough

# Create readthrough bed files
python3 -m src.bed_handler.filter_bed -b data/bed/gene.bed -f data/readthrough_gene.txt -c score -o readthrough_gene.bed
python3 -m src.bed_handler.filter_bed -b data/bed/gene.bed -f data/readthrough_gene.txt -c score -o no_readthrough_gene.bed -k 'n'

# Filtering only expressed genes with basemean > 5.
python3 -m src.bed_handler.filter_bed -b results/bed_file/readthrough_gene.bed -f data/5y_expressed_genes_basemean\>5.txt -c id -o readthrough_expressed_gene.bed
python3 -m src.bed_handler.filter_bed -b results/bed_file/no_readthrough_gene.bed -f data/5y_expressed_genes_basemean\>5.txt -c id -o no_readthrough_expressed_gene.bed


# Create bed file corresponding to the 10kb downstream regions of the previous beds
python3 -m src.bed_handler.bed_resize \
  -b results/bed_file/no_readthrough_expressed_gene.bed  \
  -s 10000 \
  -r "end" \
  -t "outer" \
  -o no_readthrough_expressed_gene_10kb.bed

python3 -m src.bed_handler.bed_resize \
-b results/bed_file/readthrough_expressed_gene.bed \
-s 10000 \
-r "end" \
-t "outer" \


exps=(all_replicates siCTRL siDDX)
bins=(0 99)
for exp in ${exps[*]}; do
  for bin in ${bins[*]}; do
    python3 -m src.visu \
        --design data/design_exp_${exp}.txt \
        --bw_folder data/bigwig/ \
        --region_bed results/bed_file/readthrough_expressed_gene.bed results/bed_file/no_readthrough_expressed_gene.bed \
        --region_name readthrough no_readthrough \
        --output results/figures/  \
        --border_name TSS TTS \
        --environment 10000 25 \
        --show_replicate n \
        --figure_type metagene \
        --nb_bin 100 \
        --norm ${bin}

    mv results/figures/metagene_readthrough-no_readthrough_100bin_10000_nt-around-25-bin_b${bin}_norm.pdf results/figures/${exp}_metagene_readthrough-no_readthrough_100bin_10000_nt-around-25-bin_b${bin}_norm.pdf
  done
done


exps=(all_replicates siCTRL siDDX)
bins=(0 99)
loc=(TSS TTS)
for exp in ${exps[*]}; do
  for i in ${!bins[@]}; do
    python3 -m src.visu \
        --design data/design_exp_${exp}.txt \
        --bw_folder data/bigwig/ \
        --region_bed results/bed_file/readthrough_expressed_gene_10kb.bed results/bed_file/no_readthrough_expressed_gene_10kb.bed \
        --region_name readthrough no_readthrough \
        --output results/figures/  \
        --border_name TTS '' \
        --environment 0 0 \
        --show_replicate n \
        --figure_type metagene \
        --nb_bin 25 \
        --norm "results/figures/coef_table/tmp_cov_table_design_exp_${exp}_readthrough_expressed_gene-no_readthrough_expressed_gene_100bin_10000_nt-around-25-bin_bin${bins[$i]}_norm.txt"
    mv results/figures/metagene_readthrough-no_readthrough_25bin_0_nt-around-0-bin_file_norm.pdf results/figures/${exp}_TTS10kb_readthrough-no_readthrough_${loc[$i]}_norm.pdf
# Figure 1C
names=(IP)
designs=(data/designnew_exp_all_replicates_IP.txt)
bins=(99) # 0)
beds=(readthrough) # no_readthrough)
for bed in ${beds[*]}; do
  for bin in ${bins[*]}; do
    for i in ${!designs[@]}; do
      python3 -m src.visu \
          --design ${designs[i]} \
          --bw_folder data/bigwig_newnorm/ \
          --region_bed results/bed_file/${bed}_expressed_gene.bed \
          --region_name ${bed} \
          --output results/figures/  \
          --border_name TSS TTS \
          --environment 10000 25 \
          --show_replicate n \
          --figure_type metagene \
          --nb_bin 100 \
          --norm ${bin} \
          --stat True
      mv results/figures/metagene_${bed}_100bin_10000_nt-around-25-bin_b${bin}_norm.pdf results/figures/Fig1C_${bed}_b${bin}_norm_${names[$i]}.pdf
    done
names=(IP)
designs=(data/designnew_exp_all_replicates_IP.txt)
beds=(readthrough) # no_readthrough)
bins=(99) # 0)
loc=(TTS) # TSS)
for bed in ${beds[*]}; do
  for i in ${!bins[@]}; do
     for j in ${!designs[@]}; do
      python3 -m src.visu \
            --design ${designs[i]} \
            --bw_folder data/bigwig_newnorm/ \
            --region_bed results/bed_file/${bed}_expressed_gene_10kb.bed \
            --region_name ${bed} \
            --output results/figures/  \
            --border_name TTS '' \
            --environment 0 0 \
            --show_replicate n \
            --figure_type metagene \
            --nb_bin 25 \
            --norm "results/figures/coef_table/tmp_cov_table_designnew_exp_all_replicates_${names[$j]}_${bed}_expressed_gene_100bin_10000_nt-around-25-bin_bin${bins[$i]}_norm.txt" \
            --stat True

    mv results/figures/metagene_${bed}_25bin_0_nt-around-0-bin_file_norm.pdf results/figures/Fig_1C_TTS10kb_${bed}_${loc[$i]}-bin_norm_${names[$j]}.pdf
    done
  done
done

# Graphics

##############################
#        SHY5Y               #
##############################

# Creating a bed file only containing expressed gene in 5y cells

python3 -m src.bed_handler.filter_bed \
  -b data/bed/gene.bed \
  -f data/5y_expressed_genes_basemean\>5.txt \
  -c id \
  -o 5y_expressed_gene.bed

# Create a bed file containing
bins=('None' 0 99)
bin_names=('' '_b0_norm' '_b99_norm')
for i in ${!bins[@]}; do
  python3 -m src.visu \
      --design data/bigwig_SHY5Y/design_exp_all_replicates.txt \
      --bw_folder data/bigwig_SHY5Y/ \
      --region_bed results/bed_file/5y_expressed_gene.bed \
      --region_name all_expressed_gene \
      --output results/figures/  \
      --border_name TSS TTS \
      --environment 10000 25 \
      --show_replicate n \
      --figure_type metagene \
      --nb_bin 100 \
      --norm ${bins[$i]}

  mv results/figures/metagene_all_expressed_gene_100bin_10000_nt-around-25-bin${bin_names[$i]}.pdf results/figures/metagene_5y_rnaseq_all_expressed_gene${bin_names[$i]}.pdf
done

tts_sizes=(10000 50000)
for i in ${!tts_sizes[@]}; do
  size=${tts_sizes[$i]}
  bin=${bins[$i]}
  kb_size=$(python -c "print(int(${size}/1000)) if ${size}/1000 == int(${size}/1000) else print(${size}/1000)")

  # Create a bed file containing all gene containing one DDX-down exons

  python3 -m src.bed_handler.bed_resize \
  -b results/bed_file/5y_expressed_gene.bed \
  -s ${size} \
  -r "end" \
  -t "outer" \
  -o all_expressed_gene_end${size}.bed

  python3 -m src.visu \
      --design data/bigwig_SHY5Y/design_exp_all_replicates.txt \
      --bw_folder data/bigwig_SHY5Y/ \
      --region_bed results/bed_file/all_expressed_gene_end${size}.bed \
      --region_name all_expressed_gene \
      --output results/figures/  \
      --border_name TTS '' \
      --environment 0 0 \
      --show_replicate n \
      --figure_type metagene \
      --nb_bin ${bin} \
      --norm "results/figures/coef_table/tmp_cov_table_design_exp_all_replicates_5y_expressed_gene_100bin_10000_nt-around-25-bin_bin99_norm.txt"
  mv results/figures/metagene_all_expressed_gene_${bin}bin_0_nt-around-0-bin_file_norm.pdf results/figures/metagene_5y_rnaseq_TTS2${kb_size}kb_expressed_gene_${bin}bin_bin_TTS_norm.pdf
done


# 2kb region after TSS

python3 -m src.bed_handler.bed_resize \
-b data/bed/gene.bed \
-s 2000 \
-r "start" \
-o all_gene_TSS-2kb.bed


python3 -m src.visu \
    --design data/bigwig_SHY5Y/design_exp_all_replicates.txt \
    --bw_folder data/bigwig_SHY5Y/ \
    --region_bed results/bed_file/all_gene_TSS-2kb.bed \
    --region_name all_gene \
    --output results/figures/  \
    --border_name TSS '' \
    --environment 0 0 \
    --show_replicate n \
    --figure_type metagene \
    --nb_bin 100 \
    --norm "None" \
    --stat True

mv results/figures/metagene_all_gene_100bin_0_nt-around-0-bin.pdf results/figures/metagene_5y_rnaseq_TSS-2kb_all_gene.pdf


#####################################################
# Metagene figure of last exons in readthrough genes near (<=2000 nt) or far (> 2000nt) from a CTCF file and last exons from non readthrough genes
nfontrod's avatar
nfontrod committed
# figures 5C
#####################################################

# Bed file containing the last exons of expressed gene with readthrough
python3 -m src.bed_handler.get_last_exons -g results/bed_file/readthrough_expressed_gene.bed -o results/bed_file/readthrough_expressed_last_exon.bed

# Bed file containing the last exons of expressed gene without readthrough
python3 -m src.bed_handler.get_last_exons -g results/bed_file/no_readthrough_expressed_gene.bed -o results/bed_file/no_readthrough_expressed_last_exon.bed

# Bed file containing the last exon in expressed genes with readthrough near CTCF (<=2000nt)
python3 -m src.bed_handler.select_regulated_near_ctcf_exons -e results/bed_file/readthrough_expressed_last_exon.bed -t 2000 -l both -i True -N True -n readthrough_last_exon

# Bed file containing the last exon in expressed genes with readthrough near CTCF (>2000nt)
python3 -m src.bed_handler.select_regulated_near_ctcf_exons -e results/bed_file/readthrough_expressed_last_exon.bed -t 2000 -l both -i True -N False -n readthrough_last_exon

cp results/bed_file/no_readthrough_expressed_gene.bed results/bed_file/no_readthrough_expressed_last_gene-dup.bed

list_names=(readthrough_ctcf readthrough no_readthrough)
bed_names=(readthrough_last_exon_near_CTCF_2000_both_ddx_with0_exon.bed readthrough_last_exon_far_CTCF_2000_both_ddx_with0_exon.bed no_readthrough_expressed_last_exon.bed)
for i in ${!list_names[*]}; do
  cname=${list_names[i]}
  bed=${bed_names[i]}
  gbed=${bed/exon\.bed/gene-dup.bed}
  nbed=${gbed/\.bed/}
  python3 -m src.visu \
      --design data/designnew_exp_all_replicates_IP.txt \
      --bw_folder data/bigwig_newnorm \
      --region_bed results/bed_file/${gbed} \
      --region_name ${cname} \
      --output results/figures/  \
      --border_name TSS TTS \
      --environment 10000 25 \
      --show_replicate n \
      --figure_type metagene \
      --nb_bin 100 \
      --norm '0'

  rm results/figures/metagene_${cname}_100bin_10000_nt-around-25-bin_b0_norm.pdf

  python3 -m src.visu \
      --design data/designnew_exp_all_replicates_IP.txt \
      --bw_folder data/bigwig_newnorm \
      --region_bed results/bed_file/${bed} \
      --region_name ${cname} \
      --output results/figures/  \
      --border_name " " " " \
      --environment 10000 25 \
      --show_replicate n \
      --figure_type metagene \
      --nb_bin 30 \
      --norm "results/figures/coef_table/tmp_cov_table_designnew_exp_all_replicates_IP_${nbed}_100bin_10000_nt-around-25-bin_bin0_norm.txt" \
      --stat True

  mv results/figures/metagene_${cname}_30bin_10000_nt-around-25-bin_file_norm.pdf results/figures/all_replicates_metaexon_${cname}_30bin_10000_nt-around-25-bin_file_norm.pdf
done

nfontrod's avatar
nfontrod committed

###########################################################
# Figures siPP vs siCTRL pour ddx_down_ctcf,
# , ddx_down figure 5B
###########################################################


list_names=(ddx_down_ctcf ddx_down)
bed_names=(CTCF_2000_both_ddx_down_with0_exon.bed Far_CTCF_2000_both_ddx_down_with0_exon.bed)
for i in ${!list_names[*]}; do
  cname=${list_names[i]}
  bed=${bed_names[i]}
  gbed=${bed/exon\.bed/gene-dup.bed}
  nbed=${gbed/\.bed/}
  python3 -m src.visu \
      --design data/designnew_exp_all_replicates_IP.txt \
      --bw_folder data/bigwig_newnorm/ \
nfontrod's avatar
nfontrod committed
      --region_bed results/bed_file/${gbed} \
      --region_name ${cname} \
      --output results/figures/  \
      --border_name TSS TTS \
      --environment 10000 25 \
      --show_replicate n \
      --figure_type metagene \
      --nb_bin 100 \
      --norm '0'

  rm results/figures/metagene_${cname}_100bin_10000_nt-around-25-bin_b0_norm.pdf

  python3 -m src.visu \
      --design data/designnew_exp_all_replicates_IP.txt \
      --bw_folder data/bigwig_newnorm/ \
nfontrod's avatar
nfontrod committed
      --region_bed results/bed_file/${bed} \
      --region_name ${cname} \
      --output results/figures/  \
      --border_name " " " " \
      --environment 10000 25 \
      --show_replicate n \
      --figure_type metagene \
      --nb_bin 30 \
      -y 0.15 0.4 \
      --norm "results/figures/coef_table/tmp_cov_table_designnew_exp_all_replicates_IP_${nbed}_100bin_10000_nt-around-25-bin_bin0_norm.txt" \
      --stat True
nfontrod's avatar
nfontrod committed

  mv results/figures/metagene_${cname}_30bin_10000_nt-around-25-bin_file_norm.pdf results/figures/all_replicates_metagene_${cname}_100bin_10000_nt-around-25-bin_file_norm.pdf
done

python3 -m src.gc_content -B results/bed_file/readthrough_last_exon_near_CTCF_2000_both_ddx_with0_exon.bed results/bed_file/readthrough_last_exon_far_CTCF_2000_both_ddx_with0_exon.bed results/bed_file/no_readthrough_expressed_last_exon.bed -b readthrough_ctcf readthrough no_readthrough -g data/Homo_sapiens.GRCh37.dna.primary_assembly.fa -f "exons" -e 2000