##------------------------------SIMULATE DATA---------------------------## ## The design of the study is that there are two plant genotypes that were ## planted into two treatments (crowded and uncrowded planting). We are ## interested in the extent to which treatment or genotype affected phenotype. ## This is akin to a two-way analysis of variance, but is more flexible and ## robust. simdata<-data.frame(genotype=sample(c(0,1), 200, replace=T), treatment=sample(c(0,1), 200, replace=T), phenotype=rnorm(200, mean=25, sd=5)) category<-numeric(200) for(j in 1:200){ if(simdata[j,1]==0 & simdata[j,2]==0) category[j] <- 1 else if (simdata[j,1]==1 & simdata[j,2]==0) category[j] <- 2 else if (simdata[j,1]==0 & simdata[j,2]==1) category[j] <- 3 else if (simdata[j,1]==1 & simdata[j,2]==1) category[j] <- 4 } simdata.noeffect<-cbind(simdata,category) simdata.witheffect <- simdata.noeffect ## genotype affects phenotype by increasing the mean by 7.5 ## treatment (crowded versus uncrowded planting) affects phenotype by increasing the mean by 5 ## the residual variance in each group has a standard deviation of 5 ## the overall intercept is 15 simdata.witheffect$phenotype <- simdata.witheffect$genotype * 7.5 + simdata.witheffect$treatment * 5 + rnorm(200, 0, 5) + 15 ##-------------------------------FUNCTIONS------------------------------## ## calculates the metropolis hasting ratio for sigma parameters mhsigma<-function(sig=NULL, m=NULL, step=NULL, whichsig=NULL, simd=NULL){ ## calculate likelihood for old and new for the relevant sigma, mu and data liknew<-sum(dnorm(simdata[which(simd[,4]==whichsig),3],m[whichsig,step-1], sigma[whichsig,step]), log=T) likold<-sum(dnorm(simdata[which(simd[,4]==whichsig),3],m[whichsig,step-1], sigma[whichsig,step-1]), log=T) ## calc prior sigprnew <- dgamma(sigma[whichsig, i],shape=0.0001,scale=0.0001, log=T) if (sigprnew==-Inf){ sigprnew<-log(.Machine$double.xmin) } sigprold<-dgamma(sigma[whichsig, i-1],shape=0.0001,scale=0.0001, log=T) if (sigprold==-Inf){ sigprold<-log(.Machine$double.xmin) } ## sum the above pnew<- liknew + sigprnew pold<- likold + sigprold ## calculate probability of proposing new and old sigmas p_propnew <- dgamma(sigma[whichsig, i],shape=2,scale=3,log=T) p_propold <- dgamma(sigma[whichsig, i-1],shape=2,scale=3, log=T) ## mhratio mh = (pnew - p_propnew) - (pold - p_propold) ## accept or reject proposed value of mu ifelse(log(runif(1)) > mh, return(0), return(1)) } ## calculates the metropolis ratio for the beta parameters mhbeta<-function(m=NULL, sig=NULL, b0=NULL, b1=NULL, b2=NULL, step=NULL, simd=NULL){ ## calculate likelihood for old and new liknew<-0 for(j in 1:4){ liknew<-liknew + sum(dnorm(simd[which(simd[,4]==j),3], mu[j,step], sigma[j,step-1], log=T)) } likold<-0 for(j in 1:4){ likold<-likold + sum(dnorm(simd[which(simd[,4]==j),3], mu[j,step-1], sigma[j,step-1], log=T)) } ## calc prior for betas betapriornew <- log(dnorm(b0[step],0,1000)) + log(dnorm(b1[step],0,1000)) + log(dnorm(b2[step],0,1000)) betapriorold <- log(dnorm(b0[step-1],0,1000)) + log(dnorm(b1[step-1],0,1000)) + log(dnorm(b2[step-1],0,1000)) ## sum the above pnew<- liknew + betapriornew pold<- likold + betapriorold ## mhratio, proposal is uniform so this reduces to metropolis, hence no p_propnew, p_propold mh = pnew - pold ## accept or reject proposed value of mu ifelse(log(runif(1)) > mh, return(0), return(1)) } ##-------------------------------MCMC procedure------------------------------## ## choose which data set to analyze simdata<- simdata.noeffect simdata <- simdata.witheffect ## Number of steps for MCMC mcmcL<-5100 ## Vectors and arrays to save our estimates of mu, sigma, beta coefficients, and P(M|D) mu<-matrix(0, nrow=4, ncol=mcmcL) ## mu[1:4,] are parameters sigma<-matrix(0, nrow=4, ncol=mcmcL) beta0<-numeric(mcmcL) ## intercept beta1<-numeric(mcmcL) ## effect of level 1 for treatment 1 beta2<-numeric(mcmcL) ## effect of level 1 for treatment 2 prMD<-numeric(mcmcL) ## random seed for sigmas, and betas to intilize the mcmc sigma[,1]<-runif(4,0.000001,25) mndata<-mean(simdata[,3]) sddata<-sd(simdata[,3]) ## adjust mult if mixing of betas is poor: 1 does not work great for the simulated data mult<-0.25 move<-sddata * mult beta0[1]<-runif(1,mndata - sddata, mndata + sddata) beta1[1]<-runif(1,-sddata,sddata) beta2[1]<-runif(1,-sddata,sddata) ## calculate intitial mus mu[1,1]<-beta0[1] mu[2,1]<-beta0[1] + beta1[1] mu[3,1]<-beta0[1] + beta2[1] mu[4,1]<-beta0[1] + beta1[1] + beta2[1] ## calculate intital P(M|D) ## first calc likelihood, normal distributions with mu and sigma ## for appropriate combination of treatments lik<-0 for(j in 1:4){ lik<-lik + sum(dnorm(simdata[which(simdata[,4]==j),3],mu[j,1], sigma[j,1], log=T)) } ## prior on each sigma, these are gamma priors sigprior<-0 for(j in 1:4){ onesp<-log(dgamma(sigma[j,1],shape=0.0001,scale=0.0001)) if (onesp==-Inf){ onesp<-log(.Machine$double.xmin) } sigprior = sigprior + onesp } ## prior on each beta coefficient, these are normal priors betaprior <- log(dnorm(beta0[1],0,1000)) + log(dnorm(beta1[1],0,1000)) + log(dnorm(beta2[1],0,1000)) ## combind the likelihood and priors from above to get P(M|D) prMD[1]<-lik + sigprior + betaprior ## for printing ctr<-1 ## enter the MCMC for(i in 2:mcmcL){ ctr<-ctr + 1 if(ctr==100){ ctr<-0 cat("mcmc step: ",i,", P(M|D): ",prMD[i-1], fill=T) } ## proposal of candidate values for sigmas and betas ## sigma uses an independence chain sigma[,i]<-rgamma(4, shape = 2, scale = 3) ## the betas use random walk chains beta0[i]<-runif(1,beta0[i-1] - move, beta0[i-1] + move) beta1[i]<-runif(1,beta1[i-1] - move, beta1[i-1] + move) beta2[i]<-runif(1,beta2[i-1] - move, beta2[i-1] + move) ## calculate the new mus from betas mu[1,i]<-beta0[i] mu[2,i]<-beta0[i] + beta1[i] mu[3,i]<-beta0[i] + beta2[i] mu[4,i]<-beta0[i] + beta1[i] + beta2[i] ## determine whether to accept or reject proposed values of sigma based on mh ratio ## this is done one at a time for(j in 1:4){ keep <- mhsigma(sigma, mu, i, j, simdata) ## if we didn't accept the proposed value if (keep==0) sigma[j,i]<-sigma[j,i-1] } ## determine whether to accept or reject proposed values of betas based on mh ratio ## all at once because of likely correlated nature of betas keep <- mhbeta(mu, sigma, beta0, beta1, beta2, i, simdata) ## if we didn't accept the proposed values if (keep==0) { beta0[i]<-beta0[i-1] beta1[i]<-beta1[i-1] beta2[i]<-beta2[i-1] for (j in 1:4){ mu[j,i]<-mu[j,i-1] } } ## calculate new P(M|D) ## first calc likelihood, normal distributions with mu and sigma ## for appropriate combination of treatments lik<-0 for(j in 1:4){ lik<-lik + sum(log(dnorm(simdata[which(simdata[,4]==j),3],mu[j,i], sigma[j,i]))) } ## prior on each sigma, these are gamma priors sigprior<-0 for(j in 1:4){ onesp<-log(dgamma(sigma[j,i],shape=0.0001,scale=0.0001)) if (onesp==-Inf){ onesp<-log(.Machine$double.xmin) } sigprior = sigprior + onesp } ## prior on each beta coefficient, these are normal priors betaprior <- log(dnorm(beta0[i],0,1000)) + log(dnorm(beta1[i],0,1000)) + log(dnorm(beta2[i],0,1000)) ## combind the likelihood and priors from above to get P(M|D) prMD[i]<-lik + sigprior + betaprior } ##-----------------------A FEW PLOTS and STATS on chains ---------------------------## ## chain history plots ## p(M|D) plot(prMD,type="l", ylab="log P(M|D)", xlab="iteration") ## plot mixing for mu and sigma estimates par(mfrow=c(4,2), mar=c(4,4,1,1)) for(i in 1:4){ plot(mu[i,],type="l", ylab=substitute(mu[i], list(i=i)), xlab="iteration") plot(sigma[i,],type="l", ylab=substitute(sigma[i], list(i=i)), xlab="iteration") } ## the three betas par(mfrow=c(3,1), mar=c(4,4,1,1)) plot(beta0,type="l", ylab=expression(beta[0]), xlab="iteration") plot(beta1,type="l", ylab=expression(beta[1]), xlab="iteration") plot(beta2,type="l", ylab=expression(beta[2]), xlab="iteration") ## density for b1 and b2, suggests that genotype had a greater effect ## than the experimental treatment (with simdata.witheffect) par(mfrow=c(1,1)) plot(density(beta1[101:5100],adjust=2), xlab=expression(beta), xlim=c(0, 10)) lines(density(beta2[101:5100],adjust=2),col="darkblue", lty=2) summary(beta1[101:5100]) summary(beta2[101:5100]) ## boxplots of the posterior probability distributions for the four mus par(mfrow=c(2,1)) ## plot of parameter estimates (mean) for each group, variation reflects uncertainty about the mean (the parameter mu) boxplot(x=list(mu[1,101:5100],mu[2,101:5100],mu[3,101:5100],mu[4,101:5100]), names=c("6739/CR","913/CR","6739/UN","913/UN")) # plot of empirical distributions of data points boxplot(phenotype ~ genotype + treatment, data=simdata, names=c("6739/CR","913/CR","6739/UN","913/UN")) summary(beta1[101:5100]) summary(lm(dayflwr ~ genotype + treatmnt))