g1<-ggplot()+geom_point(aes(x=locinfo$POS,y=div$Hobs-div$Hexp),size=0.3)+xlab("Position")+ylab("Hobs - Hexp")+ggtitle("Difference between Hobs and Hexp for each position")+theme_light()
g1
# test Hobs and Hexp normality : Kolmogorov-Smirnov test for samples > 50
hist(div$Hobs)
ks.test(x=div$Hobs,y="pnorm")
hist(div$Hexp)
ks.test(x=div$Hobs,y="pnorm")
# WARNING : if distributions are not normal we cannot use the Bartlett test (not robust enough) -> use the Levene test
# BARTLETT TEST (homogeneity of variance between two samples : obs VS exp)
# H0 : Var(Hobs) = Var(Hexp)
bartlett.test(list(div$Hexp,div$Hobs))# S : population is structured
# theory : https://eric.univ-lyon2.fr/~ricco/cours/cours/Comp_Pop_Tests_Parametriques.pdf
# extract H values and bind them
Hobs<-div$Hobs
Hexp<-div$Hexp
H<-cbind(Hobs,Hexp)
H<-as.data.frame(H)
# tidy the data : df -> one column with values, one column with the origin of values (observed or expected)
gg<-ggplot(data=taj,mapping=aes(x=d_value,y=pop))+geom_point()+labs(title="Tajima's D value for each P. troglodytes subspecies",x="D value",y="populations")