Skip to content
Snippets Groups Projects
Commit cc649762 authored by jplantad's avatar jplantad
Browse files

compress, index and merge

parent 61d48924
No related branches found
No related tags found
No related merge requests found
#!/bin/bash
# set home directory
home="/home/stagiaire/Bureau/phylogenetics/"
cd ${home}
# list the vcf that need to be compressed
data_999_folder="/home/stagiaire/Bureau/gitlab/data_fixed_999/"
vcf_list=$(ls ${data_999_folder})
# check the list
echo ${vcf_list}
# create a new folder that will contain the gz files, and get into it
mkdir -p data_999_gz
cd data_999_gz/
# check working directory
pwd
for vcf in ${vcf_list}
do
echo -e "#####\nProcessing "${vcf}
# suppress the GL line in the VCF file header and save the output into a temporary VCF file
sed '/^##FORMAT=<ID=GL/d' ${data_999_folder}${vcf} > ${vcf}_tmp.vcf
echo "temporary VCF file created"
# compress the temporary VCF file
bcftools view ${vcf}_tmp.vcf -Oz -o ${vcf}.gz
echo "compressed file computed"
# remove the temporary VCF file
rm ${vcf}_tmp.vcf
echo "temporary VCF file deleted"
echo -e ${vcf}" processed.\n#####"
done
# check that the compressed files are in the folder
ls -l
#!/bin/bash
# generate an index for a VCF file
# set home directory
home="/home/stagiaire/Bureau/phylogenetics/data_999_gz/"
cd ${home}
# list VCF files
vcf_list=$(ls )
for vcf in ${vcf_list}
do
# index the VCF file
bcftools index -f ${vcf} -o ${vcf}.csi
echo "##### "${vcf}" done."
done
#!/bin/bash
# merging fasta files of all individuals for each gene
# set home directory
home="/home/stagiaire/Bureau/phylogenetics/data_sequences/pon/"
cd ${home}
indiv_list=$(ls *renamed_AB.fa)
for indiv in ${indiv_list}
do
cat ${indiv} >> "BST2_ponAbe2_all.fa"
done
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment