diff --git a/compress_vcf_to_gz.sh b/compress_vcf_to_gz.sh new file mode 100755 index 0000000000000000000000000000000000000000..1462df28c11f96672da6da46c5bff842900255ae --- /dev/null +++ b/compress_vcf_to_gz.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# set home directory +home="/home/stagiaire/Bureau/phylogenetics/" +cd ${home} + +# list the vcf that need to be compressed +data_999_folder="/home/stagiaire/Bureau/gitlab/data_fixed_999/" +vcf_list=$(ls ${data_999_folder}) +# check the list +echo ${vcf_list} + +# create a new folder that will contain the gz files, and get into it +mkdir -p data_999_gz +cd data_999_gz/ +# check working directory +pwd + +for vcf in ${vcf_list} +do + echo -e "#####\nProcessing "${vcf} + # suppress the GL line in the VCF file header and save the output into a temporary VCF file + sed '/^##FORMAT=<ID=GL/d' ${data_999_folder}${vcf} > ${vcf}_tmp.vcf + echo "temporary VCF file created" + # compress the temporary VCF file + bcftools view ${vcf}_tmp.vcf -Oz -o ${vcf}.gz + echo "compressed file computed" + # remove the temporary VCF file + rm ${vcf}_tmp.vcf + echo "temporary VCF file deleted" + echo -e ${vcf}" processed.\n#####" +done + +# check that the compressed files are in the folder +ls -l + diff --git a/generate_index.sh b/generate_index.sh new file mode 100755 index 0000000000000000000000000000000000000000..62847634e31deca463aa3c48d32139734d33975d --- /dev/null +++ b/generate_index.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# generate an index for a VCF file + +# set home directory +home="/home/stagiaire/Bureau/phylogenetics/data_999_gz/" +cd ${home} + +# list VCF files +vcf_list=$(ls ) + +for vcf in ${vcf_list} +do + # index the VCF file + bcftools index -f ${vcf} -o ${vcf}.csi + echo "##### "${vcf}" done." +done diff --git a/merge_fastafiles.sh b/merge_fastafiles.sh new file mode 100755 index 0000000000000000000000000000000000000000..993eb1acbc1e7038c2bc5ba8084bf7dfc348305b --- /dev/null +++ b/merge_fastafiles.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# merging fasta files of all individuals for each gene + +# set home directory +home="/home/stagiaire/Bureau/phylogenetics/data_sequences/pon/" +cd ${home} + +indiv_list=$(ls *renamed_AB.fa) + +for indiv in ${indiv_list} +do + cat ${indiv} >> "BST2_ponAbe2_all.fa" +done