## random matrix for 15 inds and five genetic loci (scores, measures) random.genotypes<-matrix(sample(c(0,1,2), 15*5, replace=T), ncol=15, nrow=5) random.genotypes.c<-random.genotypes - rowMeans(random.genotypes) ## center each genotype (measure) ## we want to consider the PCA of the following covariance matrix, which is 15 x 15, and represents the ## genetic similarity of individuals cov(random.genotypes.c) ## this should be the same as t(random.genotypes.c) %*% random.genotypes.c / 4, ## but there is something that causes them to differ in this example. ## See (first columns): ## plot(((t(random.genotypes.c) %*% random.genotypes.c) / (4))[,1], cov(random.genotypes.c)[,1]) ## abline(0,1) ## And (for the full matrix): ## plot(((t(random.genotypes.c) %*% random.genotypes.c) / (4)), cov(random.genotypes.c)) ## abline(0,1) ## Compare this to a different matrix ## Xcars<-as.matrix(mtcars) ## Xcars.centered<-scale(Xcars, center=T, scale=F) ##plot(((t(Xcars.centered) %*% Xcars.centered) / (nrow(Xcars.centered)-1))[,1], cov(Xcars.centered)[,1]) ## abline(0,1) ## plot(((t(Xcars.centered) %*% Xcars.centered) / (nrow(Xcars.centered)-1)), cov(Xcars.centered)) ## pca of genetic covariances random.pca<-prcomp(cov(random.genotypes.c)) pcSummary.random<-summary(random.pca) ## the first rotation from the SVD (the first eigenvector) can be applied to the matrix, ## to obtain the projection into PC1. We were failing with this on 6 March 2020 because we ## forgot to normalize the t(X) %*% X calculation by nrow(X) - 1. ## It is n-1 for the sample covariance, rather than the theoretical covariance. all.equal(as.numeric(cov(random.genotypes.c) %*% random.pca$rotation[,1]), random.pca$x[,'PC1']) par(mfrow=c(1,3), pty='s') plot(random.pca$x[,'PC1'], random.pca$x[,'PC2'], pch=19, cex=1.5, xlab=paste("PC1 (", round(pcSummary.random$importance[2,1]*100, 1), "%)", sep=""), ylab=paste("PC2 (", round(pcSummary.random$importance[2,2]*100, 1), "%)", sep=""), main="PCA") plot(random.pca$x[,'PC2'], random.pca$x[,'PC3'], pch=19, cex=1.5, xlab=paste("PC2 (", round(pcSummary.random$importance[2,2]*100, 1), "%)", sep=""), ylab=paste("PC3 (", round(pcSummary.random$importance[2,3]*100, 1), "%)", sep=""), main="PCA") ## For comparison, here is a pca of inds and loci, rather than genetic ## covariances between individuals. random.pca.x<-prcomp(x=t(random.genotypes),center=TRUE,scale=FALSE) pcSummary.random.x<-summary(random.pca.x) plot(random.pca.x$x[,'PC1'], -1*random.pca.x$x[,'PC2'], pch=19, cex=1.5, xlab=paste("PC1 (", round(pcSummary.random.x$importance[2,1]*100, 1), "%)", sep=""), ylab=paste("PC2 (", round(pcSummary.random.x$importance[2,2]*100, 1), "%)", sep=""), main="PCA", col="red")