Skip to content
Snippets Groups Projects
Commit ba52f707 authored by jplantad's avatar jplantad
Browse files

Delete popg_general_01262021.R

parent 4fb66391
No related branches found
No related tags found
No related merge requests found
#######################################
## Population genetics : general script
## J. Plantade
## Jan 2021
#######################################
# packages
library(ade4)
library(pegas)
library(adegenet)
library(PopGenome)
library(hierfstat)
library(ggplot2)
library(poppr)
################
### setting data
# data import into a loci object
loci <- read.vcf("data/CD4/CD4_panTro3.vcf_fixed.vcf", from = 1, to = 572)
nloci <- length(loci)
# convert loci object into a genind object
genind <- loci2genind(loci)
# add population factor
pop(genind)<-rep(c("paniscus","troglodytes_ellioti",
"troglodytes_schweinfurthii","troglodytes_troglodytes","troglodytes_verus"),
c(11,10,6,4, 5))
# convert genind object into a hierfstat object
# hierf <- genind2hierfstat(genind)
##################
### heterozygosity
div <- summary(genind)
locinfo<-VCFloci("data/CD4_panTro3.vcf_fixed.vcf", what = "all", chunk.size = 1e9, quiet = FALSE)
# ggplot2 : Hobs-Hexp VS position
g1 <- ggplot() + geom_point(aes(x=locinfo$POS, y=div$Hobs-div$Hexp), size=0.3) + xlab("Position") + ylab("Hobs - Hexp") + ggtitle("Difference between Hobs and Hexp for each position") + theme_light()
g1
# Bartlett test (homogeneity of variance)
# H0 : Hobs = Hexp
bartlett.test(list(div$Hexp, div$Hobs)) # S : population is structured
# create one genind object for each population
subspecies <- c("paniscus","troglodytes_ellioti","troglodytes_schweinfurthii","troglodytes_troglodytes","troglodytes_verus")
pop <- c("pan", "ellioti", "schwein", "troglo", "verus")
npop <- length(pop)
for (i in 1:length(pop)){
tmp <- popsub(genind, sublist = c(subspecies[i]))
assign(pop[i], tmp)
}
# extracting heterozygosity for each population
list_genind <- c(pan, ellioti, schwein, troglo, verus)
for (i in 1:length(list_genind)){
tmp <- summary(list_genind[[i]])
assign(paste0(pop[i], "_div"), tmp)
}
# -> possibility to to Bartlett test and plot Hobs for each pop
#######
### Fst
# Matrix X with colnames and rownames = pop, create the name of pairs
npop <- 5
M <- matrix(NA, nrow = npop, ncol = npop)
M <- as.data.frame(M)
rownames(M) <- pop
colnames(M) <- pop
for (i in 1:npop){
start <- colnames(M)[i]
for (j in i+1:npop-i){
end <- rownames(M)[j]
M[j, i] <- paste0(start, "_", end)
}
}
# creates a list of pair names
pair_name <- c(rep("NA", (npop^2-npop)/2))
k <- 1
for (i in 1:(npop-1)){
for (j in i+1:(npop-i)){
pair_name[k] <- M[j, i]
k <- k+1
}
}
# genind subset -> couple of population.
pair_list <- NULL
for (i in 1:(npop-1)){ # in matrix M we go from col 2 to 5
for (j in (i+1):(npop)){ # in matrix M we go from row 3 to 6
tmp <- popsub(genind, sublist = c(subspecies[i], subspecies[j]))
tmp2 <- assign(M[j, i], tmp)
pair_list <- c(pair_list, tmp2) # list of all pop pairs
}
}
# calculation of F statistics and extraction of Fst values
fst <- matrix(NA, nrow = nloci, ncol = length(pair_list))
for (i in 1:length(pair_list)){
tmp <- Fst(as.loci(pair_list[[i]]), na.alleles = "")
fst[,i] <- tmp[,2]
}
fst <- as.data.frame(fst)
colnames(fst) <- pair_name
# Fst values VS loci position
plot(locinfo$POS, fst[,1], cex=0.2, xlab="Position", ylab="Fst value", main=colnames(fst)[1])
abline(0.15, 0)
#############
## Clustering with DAPC
# Identify the number of cluster
grp <- find.clusters(genind, max.n.clust = 10) #PCs to retain = 35; nb of clusters = 4
# Kstat : BIC for each value of k, here k=4 has the lower BIC
grp$Kstat
# stat : gives the selected nb of clusters (selected k) and its BIC value
grp$stat
# grp : gives the id of the group to which each individual is assigned
head(grp$grp)
# size : number of individuals assigned to each group
grp$size
# Compare inferred groups (inf) with actual groups (ori)
# Find actual groups with function pop on the genind object
pop(genind)
tab <- table(pop(genind), grp$grp)
# Display
table.value(tab, col.labels = colnames(tab), row.labels = rownames(tab))
# describe clusters using dapc1
dapc1 <- dapc(genind, grp$grp) # PCs=10, Linear discriminant=5
# visualization of clusters
myCol <- c("darkblue", "purple", "green", "orange")
scatter(dapc1, scree.da = FALSE, bg="white", pch=20, cell=0, cstar=0, col=myCol, solid=0.4, cex=1, clab=0, leg=TRUE, txt.leg = (paste("Cluster", 1:4)))
##########
## Tajimas'D with popGenome
# list of populations
populations <- list("paniscus","troglodytes_ellioti","troglodytes_schweinfurthii","troglodytes_troglodytes","troglodytes_verus")
# create a GENOME class object with vcf file
genome <- readData(path = "data/CD4/", format = "VCF")
# function that create a list, which element is the ensemble of individual for each pop
listing_ind <- function(npan, nellio, nschwein, ntroglo, nverus){
pan_vec <- c(rep("NA", npan))
ellio_vec <- c(rep("NA", nellio))
schwein_vec <- c(rep("NA", nschwein))
troglo_vec <- c(rep("NA", ntroglo))
verus_vec <- c(rep("NA", nverus))
for (i in 1:npan){
pan_vec[i] <- paste0("pan_", i)
}
for (i in 1:nellio){
ellio_vec[i] <- paste0("ellioti_", i)
}
for (i in 1:nschwein){
schwein_vec[i] <- paste0("schwein_", i)
}
for (i in 1:ntroglo){
troglo_vec[i] <- paste0("troglo_", i)
}
for (i in 1:nverus){
verus_vec[i] <- paste0("verus_", i)
}
populations <- list(pan_vec, ellio_vec, schwein_vec, troglo_vec, verus_vec)
}
# apply listing_ind with the rignt number of individuals for each pop
# and save it in the object 'populations'
populations <- listing_ind(11, 10, 6, 4, 5)
# set populations in the genome object
genome <- set.populations(genome, new.populations = populations, diploid = TRUE)
# check
genome@populations
# neutrality tests
genome <- neutrality.stats(genome) # error
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment