do.pca<-function(gmat){ ### ---- function to calc pca from genetic covariances ### inds in columns, loci in rows gmn<-apply(gmat,1,mean, na.rm=T) gmnmat<-matrix(gmn,nrow=nrow(gmat),ncol=ncol(gmat)) gprime<-gmat-gmnmat ## remove mean gcovarmat<-matrix(NA,nrow=ncol(gmat),ncol=ncol(gmat)) for(i in 1:ncol(gmat)){ for(j in i:ncol(gmat)){ if (i==j){ gcovarmat[i,j]<-cov(gprime[,i],gprime[,j], use="pairwise.complete.obs") } else{ gcovarmat[i,j]<-cov(gprime[,i],gprime[,j], use="pairwise.complete.obs") gcovarmat[j,i]<-gcovarmat[i,j] } } } prcomp(x=gcovarmat,center=TRUE,scale=FALSE) } ## random matrix for 15 inds and five genetic loci (scores, measures) random.genotypes<-matrix(sample(c(0,1,2), 15*5, replace=T), ncol=15, nrow=5) ## pca of genetic covariances random.pca<-do.pca(random.genotypes) pcSummary.random<-summary(random.pca) par(mfrow=c(1,3), pty='s') plot(random.pca$x[,'PC1'], random.pca$x[,'PC2'], pch=19, cex=1.5, xlab=paste("PC1 (", round(pcSummary.random$importance[2,1]*100, 1), "%)", sep=""), ylab=paste("PC2 (", round(pcSummary.random$importance[2,2]*100, 1), "%)", sep=""), main="PCA") plot(random.pca$x[,'PC2'], random.pca$x[,'PC3'], pch=19, cex=1.5, xlab=paste("PC2 (", round(pcSummary.random$importance[2,2]*100, 1), "%)", sep=""), ylab=paste("PC3 (", round(pcSummary.random$importance[2,3]*100, 1), "%)", sep=""), main="PCA") ## For comparison, here is a pca of inds and loci, rather than genetic ## covariances between individuals. random.pca.x<-prcomp(x=t(random.genotypes),center=TRUE,scale=FALSE) pcSummary.random.x<-summary(random.pca.x) plot(random.pca.x$x[,'PC1'], random.pca.x$x[,'PC2'], pch=19, cex=1.5, xlab=paste("PC1 (", round(pcSummary.random.x$importance[2,1]*100, 1), "%)", sep=""), ylab=paste("PC2 (", round(pcSummary.random.x$importance[2,2]*100, 1), "%)", sep=""), main="PCA") random.pca.x2<-prcomp(x=random.genotypes,center=TRUE,scale=FALSE) pcSummary.random.x2<-summary(random.pca.x2) plot(random.pca.x2$x[,'PC1'], random.pca.x2$x[,'PC2'], pch=19, cex=1.5, xlab=paste("PC1 (", round(pcSummary.random.x2$importance[2,1]*100, 1), "%)", sep=""), ylab=paste("PC2 (", round(pcSummary.random.x2$importance[2,2]*100, 1), "%)", sep=""), main="PCA") ## check out pca return objects ## can we recapitulate the matrix algebra using the matrix, the eigenvector and the eigenvalue? # --------------- more structured simulation set.seed(15151) nloci <- 250 nind <- 25 simFst<-0.3 npop<-5 ## five populations ### can experiment here to see how ancestral polymorphism affects PCA and ### interacts with Fst sim.pi <- rbeta(nloci, 15, 15) ## simulate allele frequencies in npop derived (Fst=simFst) populations sim.p <- matrix(nrow=nloci, ncol=npop) for(j in 1:npop){ sim.p[,j] <- rbeta(nloci, sim.pi * (-1 + 1/simFst), (1-sim.pi) * (-1 + 1/simFst)) } ## simulate genotypes in each population sim.x <- array(0, dim=c(nloci, npop*nind)) for(j in (1:npop) - 1){ for(k in 1:nind){ sim.x[,j*nind + k] <- rbinom(nloci, 2, prob=sim.p[,j+1]) } } colors<-c("#8DD3C7","#FFFFB3","#BEBADA","#FB8072","#80B1D3","#FDB462", "#B3DE69","#FCCDE5","#D9D9D9","#BC80BD","#CCEBC5","#FFED6F") ## pca of genetic covariances sim.pca<-do.pca(sim.x) pcSummary.sim<-summary(sim.pca) par(mfrow=c(1,3), pty='s') ### note: colors are set up for max of 12 populations plot(sim.pca$x[,'PC1'], sim.pca$x[,'PC2'], col=rep(colors[1:npop], each=nind), pch=19, cex=1.5, xlab=paste("PC1 (", round(pcSummary.sim$importance[2,1]*100, 1), "%)", sep=""), ylab=paste("PC2 (", round(pcSummary.sim$importance[2,2]*100, 1), "%)", sep=""), main="PCA") mtext(paste("simulated Fst", simFst), 3,0.5) plot(sim.pca$x[,'PC2'], sim.pca$x[,'PC3'], col=rep(colors[1:npop], each=nind), pch=19, cex=1.5, xlab=paste("PC2 (", round(pcSummary.sim$importance[2,2]*100, 1), "%)", sep=""), ylab=paste("PC3 (", round(pcSummary.sim$importance[2,3]*100, 1), "%)", sep=""), main="PCA") #plot(sim.pca$x[,'PC4'], sim.pca$x[,'PC5'], col=rep(colors[1:npop], each=nind), pch=19, cex=1.5, # xlab=paste("PC3 (", round(pcSummary.sim$importance[2,3]*100, 1), "%)", sep=""), # ylab=paste("PC4 (", round(pcSummary.sim$importance[2,4]*100, 1), "%)", sep=""), main="PCA") ## For comparison, here is a pca of inds and loci, rather than genetic ## covariances between individuals. sim.pca.x<-prcomp(x=t(sim.x),center=TRUE,scale=FALSE) pcSummary.sim.x<-summary(sim.pca.x) plot(sim.pca.x$x[,'PC1'], sim.pca.x$x[,'PC2'], col=rep(colors[1:npop], each=nind), pch=19, cex=1.5, xlab=paste("PC1 (", round(pcSummary.sim.x$importance[2,1]*100, 1), "%)", sep=""), ylab=paste("PC2 (", round(pcSummary.sim.x$importance[2,2]*100, 1), "%)", sep=""), main="PCA")