##------------------------------SIMULATE DATA---------------------------## ## The design of the study is that there are two plant genotypes that were ## planted into two treatments (crowded and uncrowded planting). We are ## interested in the extent to which treatment or genotype affected phenotype. ## This is akin to a two-way analysis of variance, but is more flexible and ## robust. simdata<-data.frame(genotype=sample(c(0,1), 200, replace=T), treatment=sample(c(0,1), 200, replace=T), phenotype=rnorm(200, mean=25, sd=5)) category<-numeric(200) for(j in 1:200){ if(simdata[j,1]==0 & simdata[j,2]==0) category[j] <- 1 else if (simdata[j,1]==1 & simdata[j,2]==0) category[j] <- 2 else if (simdata[j,1]==0 & simdata[j,2]==1) category[j] <- 3 else if (simdata[j,1]==1 & simdata[j,2]==1) category[j] <- 4 } simdata.noeffect<-cbind(simdata,category) simdata.witheffect <- simdata.noeffect ## genotype affects phenotype by increasing the mean by 7.5 ## treatment (crowded versus uncrowded planting) affects phenotype by increasing the mean by 5 ## the residual variance in each group has a standard deviation of 5 ## the overall intercept is 15 simdata.witheffect$phenotype <- simdata.witheffect$genotype * 7.5 + simdata.witheffect$treatment * 5 + rnorm(200, 0, 5) + 15 ##-------------------------------FUNCTIONS------------------------------## ## calculates the metropolis hasting ratio for a proposed sigma parameter and ## the current sigma parameter mhsigma<-function(Data,Mu,SigmaNew,SigmaOld){ ## calculate likelihood for old and new for the relevant sigma, mu and data liknew<-sum(dnorm(Data, Mu, SigmaNew, log=T)) likold<-sum(dnorm(Data, Mu, SigmaOld, log=T)) ## calc prior sigprnew <- dgamma(SigmaNew, shape=0.0001, scale=10000, log=T) # #scale is 1/rate ## could also use a uniform prior # sigprnew <- dunif(SigmaNew, 0.001, 100, log=T) if (sigprnew==-Inf){ sigprnew<-log(.Machine$double.xmin) } sigprold<-dgamma(SigmaOld, shape=0.0001, scale=10000, log=T) #sigprold <- dunif(SigmaOld, 0.001, 100, log=T) if (sigprold==-Inf){ sigprold <- log(.Machine$double.xmin) } ## sum the above pnew<- liknew + sigprnew pold<- likold + sigprold ## calculate probability of proposing new and old sigmas p_propnew <- dgamma(SigmaNew, shape=2, scale=3, log=T) p_propold <- dgamma(SigmaOld, shape=2, scale=3, log=T) ## mhratio mh <- (pnew - p_propnew) - (pold - p_propold) ## reject or accept proposed value of mu, with probability mh # cat("MHSIGMA: mh=", mh, " = (", liknew, " + ", sigprnew, " - ", p_propnew, ") / (", # likold, " + ", sigprold, " - ", p_propold, ")", # " (sigmas:", SigmaNew, " -- ", SigmaOld, ")", fill=T) ifelse(log(runif(1)) < mh, return(SigmaNew), return(SigmaOld)) } ## calculates the metropolis ratio for the beta parameters mhbeta<-function(Data,MuNew,MuOld,Sigma,BetasNew,BetasOld){ ## calculate likelihood for old and new liknew<-0 for(j in 1:4){ liknew<-liknew + sum(dnorm(Data[Data[,4]==j,3], MuNew[j], Sigma[j], log=T)) } likold<-0 for(j in 1:4){ likold<-likold + sum(dnorm(Data[Data[,4]==j,3], MuOld[j], Sigma[j], log=T)) } ## calc prior for betas betapriornew <- sum(dnorm(BetasNew, 0, 1000, log=T) ) betapriorold <- sum(dnorm(BetasOld, 0, 1000, log=T) ) ## sum the above pnew <- liknew + betapriornew pold <- likold + betapriorold if (pnew==-Inf){ pnew<-log(.Machine$double.xmin) } if (pold==-Inf){ pold<-log(.Machine$double.xmin) } ## mhratio, proposal is uniform so this reduces to metropolis, hence no p_propnew, p_propold mh <- pnew - pold ## accept or reject proposed value of mu ifelse(log(runif(1)) < mh, return(list(mu=MuNew,beta=BetasNew)), return(list(mu=MuOld,beta=BetasOld))) } init.chain<-function(Data, sddata){ ## Vectors and arrays to save our estimates of mu, sigma, beta coefficients, and P(M|D) mu<-matrix(0, nrow=4, ncol=mcmcL) ## mu[1:4,] are parameters sigma<-matrix(0, nrow=4, ncol=mcmcL) beta0<-numeric(mcmcL) ## intercept beta1<-numeric(mcmcL) ## effect of level 1 for treatment 1 beta2<-numeric(mcmcL) ## effect of level 1 for treatment 2 prMD<-numeric(mcmcL) ## random seed for sigmas, and betas to intilize the mcmc sigma[,1]<-runif(4,2,10) beta0[1]<-runif(1, mean(Data[,3]) - sddata, mean(Data[,3]) + sddata) beta1[1]<-runif(1,-sddata, sddata) beta2[1]<-runif(1,-sddata, sddata) ## calculate intitial mus mu[1,1]<-beta0[1] mu[2,1]<-beta0[1] + beta1[1] mu[3,1]<-beta0[1] + beta2[1] mu[4,1]<-beta0[1] + beta1[1] + beta2[1] ## calculate intital P(M|D) ## first calc likelihood, normal distributions with mu and sigma ## for appropriate combination of treatments lik<-0 for(j in 1:4){ lik<-lik + sum(dnorm(Data[Data[,4]==j,3], mu[j,1], sigma[j,1], log=T)) } ## prior on each sigma, these are gamma priors sigprior<-0 for(j in 1:4){ onesp<-dgamma(sigma[j,1], shape=0.0001, scale=10000, log=T) #onesp <- dunif(sigma[j,1], 0.001, 100, log=T) if (onesp==-Inf){ onesp<-log(.Machine$double.xmin) } sigprior <- sigprior + onesp } ## prior on each beta coefficient, these are normal priors betaprior <- sum(dnorm(c(beta0[1],beta1[1],beta2[1]), 0, 1000,log=T)) ## combind the likelihood and priors from above to get P(M|D) prMD[1]<-lik + sigprior + betaprior return(list(mu=mu, sigma=sigma, beta0=beta0, beta1=beta1, beta2=beta2, prMD=prMD)) } mcmcStep<-function(Data,MuOld,BetasOld,SigmaOld,move){ ## proposal of candidate values for sigmas and betas ## sigma uses an independence chain sigma<-rgamma(4, shape = 2, scale = 3) ## the betas use random walk chains beta0<-runif(1,BetasOld[1] - move, BetasOld[1] + move) beta1<-runif(1,BetasOld[2] - move, BetasOld[2] + move) beta2<-runif(1,BetasOld[3] - move, BetasOld[3] + move) ## calculate the new mus from betas mu<-numeric(4) mu[1]<-beta0 mu[2]<-beta0 + beta1 mu[3]<-beta0 + beta2 mu[4]<-beta0 + beta1 + beta2 ## determine whether to accept or reject proposed values of sigma based on mh ratio ## this is done one at a time for(j in 1:4){ sigma[j]<-mhsigma(Data=Data[Data[,4]==j,3], Mu=MuOld[j], SigmaNew=sigma[j], SigmaOld=SigmaOld[j] ) } ## determine whether to accept or reject proposed values of betas based on mh ratio ## all at once because of likely correlated nature of betas update<-mhbeta(Data=Data, MuNew=mu, MuOld=MuOld, Sigma=sigma, BetasNew=c(beta0,beta1,beta2), BetasOld=BetasOld) beta0<-update$beta[1] beta1<-update$beta[2] beta2<-update$beta[3] mu<-update$mu ## calculate new P(M|D) ## first calc likelihood, normal distributions with mu and sigma ## for appropriate combination of treatments lik<-0 for(j in 1:4){ lik<-lik + sum(dnorm(Data[Data[,4]==j,3], mu[j], sigma[j], log=T)) } ## prior on each sigma, these are gamma priors sigprior<-0 for(j in 1:4){ onesp<-dgamma(sigma[j],shape=0.0001,scale=10000, log=T) #onesp <- dunif(sigma[j], 0.001, 100, log=T) if (onesp==-Inf){ onesp<-log(.Machine$double.xmin) } sigprior <- sigprior + onesp } ## prior on each beta coefficient, these are normal priors betaprior <- sum(dnorm(c(beta0, beta1, beta2), 0,1000, log=T)) ## combind the likelihood and priors from above to get P(M|D) prMD<-lik + sigprior + betaprior return(list(mu=mu,beta0=beta0,beta1=beta1,beta2=beta2,sigma=sigma,prMD=prMD)) } ##-------------------------------MCMC procedure------------------------------## ## choose which data set to analyze simdata<- simdata.noeffect simdata <- simdata.witheffect ## Number of steps for MCMC mcmcL<-5100 sddata<-sd(simdata[,3]) ## adjust mult if mixing of betas is poor: 1 does not work great for the simulated data mult<-0.125 move<- sddata * mult mcmc.params<-init.chain(simdata,sddata) ## enter the MCMC for(i in 2:mcmcL){ if(! i%%100 ){ cat("mcmc step: ",i,", P(M|D): ", mcmc.params$prMD[i-1], fill=T) } StepResult<-mcmcStep(Data=simdata, MuOld=mcmc.params$mu[,i-1], BetasOld=c(mcmc.params$beta0[i-1], mcmc.params$beta1[i-1], mcmc.params$beta2[i-1]), SigmaOld=mcmc.params$sigma[,i-1], move=move) mcmc.params$mu[,i]<-StepResult$mu mcmc.params$sigma[,i]<-StepResult$sigma mcmc.params$beta0[i]<-StepResult$beta0 mcmc.params$beta1[i]<-StepResult$beta1 mcmc.params$beta2[i]<-StepResult$beta2 mcmc.params$prMD[i]<-StepResult$prMD } ##-----------------------A FEW PLOTS and STATS on chains ---------------------------## dev.off() ## chain history plots ## p(M|D) plot(mcmc.params$prMD,type="l", ylab="log P(M|D)", xlab="iteration") ## plot mixing for mu and sigma estimates par(mfrow=c(4,2), mar=c(4,4,1,1)) for(i in 1:4){ plot(mcmc.params$mu[i,],type="l", ylab=substitute(mu[i], list(i=i)), xlab="iteration") plot(mcmc.params$sigma[i,],type="l", ylab=substitute(sigma[i], list(i=i)), xlab="iteration") } ## the three betas par(mfrow=c(3,1), mar=c(4,4,1,1)) plot(mcmc.params$beta0,type="l", ylab=expression(beta[0]), xlab="iteration") plot(mcmc.params$beta1,type="l", ylab=expression(beta[1]), xlab="iteration") plot(mcmc.params$beta2,type="l", ylab=expression(beta[2]), xlab="iteration") ## compare beta estimates to what we simulated data.frame(param=c("beta0", "beta1", "beta2"), sim=c(15,7.5,5), mcmc=c(mean(mcmc.params$beta0[101:5100]), mean(mcmc.params$beta1[101:5100]), mean(mcmc.params$beta2[101:5100])), lm=coef(lm(phenotype ~ genotype + treatment, data=simdata)) ) ## compare mu estimates to what we simulated cbind(aggregate(phenotype ~ genotype + treatment, data=simdata, FUN=mean), mcmc.mean=c(mean(mcmc.params$mu[1,101:5100]),mean(mcmc.params$mu[2,101:5100]), mean(mcmc.params$mu[3,101:5100]),mean(mcmc.params$mu[4,101:5100])), mcmc.median=c(median(mcmc.params$mu[1,101:5100]),median(mcmc.params$mu[2,101:5100]), median(mcmc.params$mu[3,101:5100]),median(mcmc.params$mu[4,101:5100]))) ## density for b1 and b2, shows that genotype had a greater effect ## than the experimental treatment (with simdata.witheffect) par(mfrow=c(1,1)) plot(density(mcmc.params$beta1[101:5100],adjust=2), xlab=expression(beta), xlim=c(0, 10)) lines(density(mcmc.params$beta2[101:5100],adjust=2),col="darkblue", lty=2) ## boxplots of the posterior probability distributions for the four mus par(mfrow=c(2,1)) # plot of parameter estimates (mean) for each group, variation reflects # uncertainty about the mean (the parameter mu) boxplot(x=list(mcmc.params$mu[1,101:5100],mcmc.params$mu[2,101:5100], mcmc.params$mu[3,101:5100],mcmc.params$mu[4,101:5100]), names=c("6739/CR","913/CR","6739/UN","913/UN")) # plot of empirical distributions of data points boxplot(phenotype ~ genotype + treatment, data=simdata, names=c("6739/CR","913/CR","6739/UN","913/UN")) summary(mcmc.params$beta1[101:5100]) summary(lm(phenotype ~ genotype + treatment, data=simdata))