Skip to content
Snippets Groups Projects
Commit f9363a1f authored by jplantad's avatar jplantad
Browse files

General script for population genetics + data CD4 chimp

parent f56a9705
No related branches found
No related tags found
No related merge requests found
source diff could not be displayed: it is too large. Options to address this: view the blob.
v1_0126.R 0 → 100644
############################
## Population genetics : general script
## J. Plantade
## Jan 2021
############################
# packages
library(ade4)
library(pegas)
library(adegenet)
library(PopGenome)
library(hierfstat)
library(ggplot2)
library(poppr)
################
### setting data
# data import into a loci object
loci <- read.vcf("data/CD4_panTro3.vcf_fixed.vcf", from = 1, to = 572)
nloci <- length(loci)
# convert loci object into a genind object
genind <- loci2genind(loci)
# add population factor
pop(genind)<-rep(c("paniscus","troglodytes_ellioti",
"troglodytes_schweinfurthii","troglodytes_troglodytes","troglodytes_verus"),
c(11,10,6,4, 5))
# convert genind object into a hierfstat object
hierf <- genind2hierfstat(genind)
##################
### heterozygosity
div <- summary(genind)
locinfo<-VCFloci("data/CD4_panTro3.vcf_fixed.vcf", what = "all", chunk.size = 1e9, quiet = FALSE)
# observed heterozygosity VS loci position
plot(locinfo$POS, div$Hobs, xlab="Loci position", ylab="Observed Heterozygosity",
main="Observed heterozygosity per locus", pch=20)
# observed heterozygosity VS expected heterozygosity
plot(div$Hexp, div$Hobs, xlab="Hexp", ylab="Hobs",
main="Expected heterozygosity as a function of observed heterozygosity per locus")
abline(0, 1, col="blue")
# Hobs - Hexp VS position
plot(locinfo$POS, div$Hobs-div$Hexp, xlab = "loci position", ylab = "Hobs-Hexp", cex=0.3)
# Bartlett test (homogeneity of variance)
# H0 : Hobs = Hexp
bartlett.test(list(div$Hexp, div$Hobs))
# ggplot2 try
g1 <- ggplot() + geom_point(aes(x=locinfo$POS, y=div$Hobs)) + xlab("Position") + ylab("Observed heterozygosity") + ggtitle("Observed heterozygosity per locus") + theme_light()
g1
# splitting genind objet according to population
subspecies <- c("paniscus","troglodytes_ellioti","troglodytes_schweinfurthii","troglodytes_troglodytes","troglodytes_verus")
pop <- c("pan", "ellioti", "schwein", "troglo", "verus")
npop <- length(pop)
for (i in 1:length(subspecies)){
tmp <- popsub(genind, sublist = c(subspecies[i]))
assign(pop[i], tmp)
}
# Bartlett test on all pop except verus because geographically distant
quatre_pop <- popsub(genind, sublist = c("paniscus","troglodytes_ellioti","troglodytes_schweinfurthii","troglodytes_troglodytes"))
div_quatre <- summary(quatre_pop)
bartlett.test(list(div_quatre$Hexp, div_quatre$Hobs)) # S
# hist of Hobs and Hexp distribution for paniscus
par(mfrow=c(1, 2))
hist(pan_div$Hobs)
hist(pan_div$Hexp)
# Bartlett test on each pop
bartlett.test(list(pan_div$Hexp, pan_div$Hobs)) # S
bartlett.test(list(ellioti_div$Hexp, ellioti_div$Hobs)) # S
bartlett.test(list(schwein_div$Hexp, schwein_div$Hobs)) # S
bartlett.test(list(troglo_div$Hexp, troglo_div$Hobs)) # S
bartlett.test(list(verus_div$Hexp, verus_div$Hobs)) # S
# ? all test are significant ?
# extracting heterozygosity for each population
list_genind <- c(pan, ellioti, schwein, troglo, verus)
for (i in 1:length(list_genind)){
tmp <- summary(list_genind[[i]])
assign(paste0(pop[i], "_div"), tmp)
}
par(mfrow =c(2, 2))
plot(locinfo$POS, pan_div$Hobs, cex=0.3)
plot(locinfo$POS, ellioti_div$Hobs, cex=0.3)
plot(locinfo$POS, schwein_div$Hobs, cex=0.3)
plot(locinfo$POS, troglo_div$Hobs, cex=0.3)
# plot(locinfo$POS, verus_div$Hobs, cex=0.3)
#######
### Fst
duo <- c("pan_ellioti", "pan_schwein", "pan_troglo", "pan_verus", "ellioti_schwein", "ellioti_troglo", "ellioti_verus", "schwein_troglo", "schwein_verus", "troglo_verus")
# Matrix X with colnames and rownames = pop, create the name of pairs
M <- matrix(NA, nrow = (npop+1), ncol = (npop+1))
M[1, 2:6] <- pop
M[2:6, 1] <- pop
for (i in 2:(npop+1)){
start <- M[1, i]
for (j in 2:(npop+1)){
end <- M[j, 1]
M[j, i] <- paste0(start, "_", end)
}
}
# need a loop that creates a list of pair names
# genind subset -> couple of population.
pair_list <- NULL
for (i in 2:(npop)){ # in matrix M we go from col 2 to 5
for (j in (i+1):(npop+1)){ # in matrix M we go from row 3 to 6
tmp <- popsub(genind, sublist = c(subspecies[i-1], subspecies[j-1]))
tmp2 <- assign(M[j, i], tmp)
pair_list <- c(pair_list, tmp2)
}
}
# calculation of F statistics and extraction of Fst values
fst <- matrix(NA, nrow = nloci, ncol = length(pair_list))
for (i in 1:length(pair_list)){
tmp <- Fst(as.loci(pair_list[[i]]), na.alleles = "")
fst[,i] <- tmp[,2]
}
fst <- as.data.frame(fst)
colnames(fst) <- duo
# Fst values VS loci position
plot(locinfo$POS, fst[,1], cex=0.2, xlab="Position", ylab="Fst value", main="Fst value per locus")
abline(0.15, 0)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment