From cc6497621281f177e2e9f1a09b521b0bfeb4fe8c Mon Sep 17 00:00:00 2001 From: jplantad <julie.plantade@ens-lyon.fr> Date: Tue, 25 May 2021 15:07:01 +0200 Subject: [PATCH] compress, index and merge --- compress_vcf_to_gz.sh | 36 ++++++++++++++++++++++++++++++++++++ generate_index.sh | 17 +++++++++++++++++ merge_fastafiles.sh | 14 ++++++++++++++ 3 files changed, 67 insertions(+) create mode 100755 compress_vcf_to_gz.sh create mode 100755 generate_index.sh create mode 100755 merge_fastafiles.sh diff --git a/compress_vcf_to_gz.sh b/compress_vcf_to_gz.sh new file mode 100755 index 0000000..1462df2 --- /dev/null +++ b/compress_vcf_to_gz.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# set home directory +home="/home/stagiaire/Bureau/phylogenetics/" +cd ${home} + +# list the vcf that need to be compressed +data_999_folder="/home/stagiaire/Bureau/gitlab/data_fixed_999/" +vcf_list=$(ls ${data_999_folder}) +# check the list +echo ${vcf_list} + +# create a new folder that will contain the gz files, and get into it +mkdir -p data_999_gz +cd data_999_gz/ +# check working directory +pwd + +for vcf in ${vcf_list} +do + echo -e "#####\nProcessing "${vcf} + # suppress the GL line in the VCF file header and save the output into a temporary VCF file + sed '/^##FORMAT=<ID=GL/d' ${data_999_folder}${vcf} > ${vcf}_tmp.vcf + echo "temporary VCF file created" + # compress the temporary VCF file + bcftools view ${vcf}_tmp.vcf -Oz -o ${vcf}.gz + echo "compressed file computed" + # remove the temporary VCF file + rm ${vcf}_tmp.vcf + echo "temporary VCF file deleted" + echo -e ${vcf}" processed.\n#####" +done + +# check that the compressed files are in the folder +ls -l + diff --git a/generate_index.sh b/generate_index.sh new file mode 100755 index 0000000..6284763 --- /dev/null +++ b/generate_index.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# generate an index for a VCF file + +# set home directory +home="/home/stagiaire/Bureau/phylogenetics/data_999_gz/" +cd ${home} + +# list VCF files +vcf_list=$(ls ) + +for vcf in ${vcf_list} +do + # index the VCF file + bcftools index -f ${vcf} -o ${vcf}.csi + echo "##### "${vcf}" done." +done diff --git a/merge_fastafiles.sh b/merge_fastafiles.sh new file mode 100755 index 0000000..993eb1a --- /dev/null +++ b/merge_fastafiles.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# merging fasta files of all individuals for each gene + +# set home directory +home="/home/stagiaire/Bureau/phylogenetics/data_sequences/pon/" +cd ${home} + +indiv_list=$(ls *renamed_AB.fa) + +for indiv in ${indiv_list} +do + cat ${indiv} >> "BST2_ponAbe2_all.fa" +done -- GitLab