compress, index and merge

cc649762 · jplantad · 61d48924 · cc649762 · cc649762 · cc649762
Commit cc649762 authored May 25, 2021 by jplantad
--- a/compress_vcf_to_gz.sh
+++ b/compress_vcf_to_gz.sh
+#!/bin/bash
+# set home directory
+home="/home/stagiaire/Bureau/phylogenetics/"
+cd ${home}
+# list the vcf that need to be compressed
+data_999_folder="/home/stagiaire/Bureau/gitlab/data_fixed_999/"
+vcf_list=$(ls ${data_999_folder})
+# check the list
+echo ${vcf_list}
+# create a new folder that will contain the gz files, and get into it
+mkdir -p data_999_gz
+cd data_999_gz/
+# check working directory
+pwd
+for vcf in ${vcf_list}
+do
+	echo -e "#####\nProcessing "${vcf}
+	# suppress the GL line in the VCF file header and save the output into a temporary VCF file
+	sed '/^##FORMAT=<ID=GL/d' ${data_999_folder}${vcf} > ${vcf}_tmp.vcf
+	echo "temporary VCF file created"
+	# compress the temporary VCF file
+	bcftools view ${vcf}_tmp.vcf -Oz -o ${vcf}.gz
+	echo "compressed file computed"
+	# remove the temporary VCF file
+	rm ${vcf}_tmp.vcf
+	echo "temporary VCF file deleted"
+	echo -e ${vcf}" processed.\n#####"
+done
+# check that the compressed files are in the folder
+ls -l
--- a/generate_index.sh
+++ b/generate_index.sh
+#!/bin/bash
+# generate an index for a VCF file
+# set home directory
+home="/home/stagiaire/Bureau/phylogenetics/data_999_gz/"
+cd ${home}
+# list VCF files
+vcf_list=$(ls )
+for vcf in ${vcf_list}
+do
+	# index the VCF file
+	bcftools index -f ${vcf} -o ${vcf}.csi
+	echo "##### "${vcf}" done."
+done
--- a/merge_fastafiles.sh
+++ b/merge_fastafiles.sh
+#!/bin/bash
+# merging fasta files of all individuals for each gene
+# set home directory
+home="/home/stagiaire/Bureau/phylogenetics/data_sequences/pon/"
+cd ${home}
+indiv_list=$(ls *renamed_AB.fa)
+for indiv in ${indiv_list}
+do
+	cat ${indiv} >> "BST2_ponAbe2_all.fa"
+done