Commit cc649762 authored by jplantad's avatar jplantad
Browse files

compress, index and merge

parent 61d48924
#!/bin/bash
# set home directory
home="/home/stagiaire/Bureau/phylogenetics/"
cd ${home}
# list the vcf that need to be compressed
data_999_folder="/home/stagiaire/Bureau/gitlab/data_fixed_999/"
vcf_list=$(ls ${data_999_folder})
# check the list
echo ${vcf_list}
# create a new folder that will contain the gz files, and get into it
mkdir -p data_999_gz
cd data_999_gz/
# check working directory
pwd
for vcf in ${vcf_list}
do
echo -e "#####\nProcessing "${vcf}
# suppress the GL line in the VCF file header and save the output into a temporary VCF file
sed '/^##FORMAT=<ID=GL/d' ${data_999_folder}${vcf} > ${vcf}_tmp.vcf
echo "temporary VCF file created"
# compress the temporary VCF file
bcftools view ${vcf}_tmp.vcf -Oz -o ${vcf}.gz
echo "compressed file computed"
# remove the temporary VCF file
rm ${vcf}_tmp.vcf
echo "temporary VCF file deleted"
echo -e ${vcf}" processed.\n#####"
done
# check that the compressed files are in the folder
ls -l
#!/bin/bash
# generate an index for a VCF file
# set home directory
home="/home/stagiaire/Bureau/phylogenetics/data_999_gz/"
cd ${home}
# list VCF files
vcf_list=$(ls )
for vcf in ${vcf_list}
do
# index the VCF file
bcftools index -f ${vcf} -o ${vcf}.csi
echo "##### "${vcf}" done."
done
#!/bin/bash
# merging fasta files of all individuals for each gene
# set home directory
home="/home/stagiaire/Bureau/phylogenetics/data_sequences/pon/"
cd ${home}
indiv_list=$(ls *renamed_AB.fa)
for indiv in ${indiv_list}
do
cat ${indiv} >> "BST2_ponAbe2_all.fa"
done
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment