Skip to content
Snippets Groups Projects
Commit 60e00802 authored by jplantad's avatar jplantad
Browse files

Script general

parent 33806038
No related branches found
No related tags found
No related merge requests found
#######################################
## Population genetics : general script
## J. Plantade
## Jan 2021
#######################################
# packages
library(ade4)
library(pegas)
library(adegenet)
library(PopGenome)
library(hierfstat)
library(ggplot2)
library(poppr)
################
### setting data
# data import into a loci object
loci <- read.vcf("data/CD4/CD4_panTro3.vcf_fixed.vcf", from = 1, to = 572)
nloci <- length(loci)
# convert loci object into a genind object
genind <- loci2genind(loci)
# add population factor
pop(genind)<-rep(c("paniscus","troglodytes_ellioti",
"troglodytes_schweinfurthii","troglodytes_troglodytes","troglodytes_verus"),
c(11,10,6,4, 5))
# convert genind object into a hierfstat object
# hierf <- genind2hierfstat(genind)
##################
### heterozygosity
div <- summary(genind)
locinfo<-VCFloci("data/CD4_panTro3.vcf_fixed.vcf", what = "all", chunk.size = 1e9, quiet = FALSE)
# ggplot2 : Hobs-Hexp VS position
g1 <- ggplot() + geom_point(aes(x=locinfo$POS, y=div$Hobs-div$Hexp), size=0.3) + xlab("Position") + ylab("Hobs - Hexp") + ggtitle("Difference between Hobs and Hexp for each position") + theme_light()
g1
# Bartlett test (homogeneity of variance)
# H0 : Hobs = Hexp
bartlett.test(list(div$Hexp, div$Hobs)) # S : population is structured
# create one genind object for each population
subspecies <- c("paniscus","troglodytes_ellioti","troglodytes_schweinfurthii","troglodytes_troglodytes","troglodytes_verus")
pop <- c("pan", "ellioti", "schwein", "troglo", "verus")
npop <- length(pop)
for (i in 1:length(pop)){
tmp <- popsub(genind, sublist = c(subspecies[i]))
assign(pop[i], tmp)
}
# extracting heterozygosity for each population
list_genind <- c(pan, ellioti, schwein, troglo, verus)
for (i in 1:length(list_genind)){
tmp <- summary(list_genind[[i]])
assign(paste0(pop[i], "_div"), tmp)
}
# -> possibility to to Bartlett test and plot Hobs for each pop
#######
### Fst
# Matrix X with colnames and rownames = pop, create the name of pairs
npop <- 5
M <- matrix(NA, nrow = npop, ncol = npop)
M <- as.data.frame(M)
rownames(M) <- pop
colnames(M) <- pop
for (i in 1:npop){
start <- colnames(M)[i]
for (j in i+1:npop-i){
end <- rownames(M)[j]
M[j, i] <- paste0(start, "_", end)
}
}
# creates a list of pair names
pair_name <- c(rep("NA", (npop^2-npop)/2))
k <- 1
for (i in 1:(npop-1)){
for (j in i+1:(npop-i)){
pair_name[k] <- M[j, i]
k <- k+1
}
}
# genind subset -> couple of population.
pair_list <- NULL
for (i in 1:(npop-1)){ # in matrix M we go from col 2 to 5
for (j in (i+1):(npop)){ # in matrix M we go from row 3 to 6
tmp <- popsub(genind, sublist = c(subspecies[i], subspecies[j]))
tmp2 <- assign(M[j, i], tmp)
pair_list <- c(pair_list, tmp2) # list of all pop pairs
}
}
# calculation of F statistics and extraction of Fst values
fst <- matrix(NA, nrow = nloci, ncol = length(pair_list))
for (i in 1:length(pair_list)){
tmp <- Fst(as.loci(pair_list[[i]]), na.alleles = "")
fst[,i] <- tmp[,2]
}
fst <- as.data.frame(fst)
colnames(fst) <- pair_name
# Fst values VS loci position
plot(locinfo$POS, fst[,1], cex=0.2, xlab="Position", ylab="Fst value", main=colnames(fst)[1])
abline(0.15, 0)
#############
## Clustering with DAPC
# Identify the number of cluster
grp <- find.clusters(genind, max.n.clust = 10) #PCs to retain = 35; nb of clusters = 4
# Kstat : BIC for each value of k, here k=4 has the lower BIC
grp$Kstat
# stat : gives the selected nb of clusters (selected k) and its BIC value
grp$stat
# grp : gives the id of the group to which each individual is assigned
head(grp$grp)
# size : number of individuals assigned to each group
grp$size
# Compare inferred groups (inf) with actual groups (ori)
# Find actual groups with function pop on the genind object
pop(genind)
tab <- table(pop(genind), grp$grp)
# Display
table.value(tab, col.labels = colnames(tab), row.labels = rownames(tab))
# describe clusters using dapc1
dapc1 <- dapc(genind, grp$grp) # PCs=10, Linear discriminant=5
# visualization of clusters
myCol <- c("darkblue", "purple", "green", "orange")
scatter(dapc1, scree.da = FALSE, bg="white", pch=20, cell=0, cstar=0, col=myCol, solid=0.4, cex=1, clab=0, leg=TRUE, txt.leg = (paste("Cluster", 1:4)))
##########
## Tajimas'D with popGenome
# list of populations
populations <- list("paniscus","troglodytes_ellioti","troglodytes_schweinfurthii","troglodytes_troglodytes","troglodytes_verus")
# create a GENOME class object with vcf file
genome <- readData(path = "data/CD4/", format = "VCF")
# function that create a list, which element is the ensemble of individual for each pop
listing_ind <- function(npan, nellio, nschwein, ntroglo, nverus){
pan_vec <- c(rep("NA", npan))
ellio_vec <- c(rep("NA", nellio))
schwein_vec <- c(rep("NA", nschwein))
troglo_vec <- c(rep("NA", ntroglo))
verus_vec <- c(rep("NA", nverus))
for (i in 1:npan){
pan_vec[i] <- paste0("pan_", i)
}
for (i in 1:nellio){
ellio_vec[i] <- paste0("ellioti_", i)
}
for (i in 1:nschwein){
schwein_vec[i] <- paste0("schwein_", i)
}
for (i in 1:ntroglo){
troglo_vec[i] <- paste0("troglo_", i)
}
for (i in 1:nverus){
verus_vec[i] <- paste0("verus_", i)
}
populations <- list(pan_vec, ellio_vec, schwein_vec, troglo_vec, verus_vec)
}
# apply listing_ind with the rignt number of individuals for each pop
# and save it in the object 'populations'
populations <- listing_ind(11, 10, 6, 4, 5)
# set populations in the genome object
genome <- set.populations(genome, new.populations = populations, diploid = TRUE)
# check
genome@populations
# neutrality tests
genome <- neutrality.stats(genome) # error
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment