# SIMlogit01.R 

#
# Christopher J. Fariss (cjf0006@gmail.com), Keith E. Schnakenberg (keschnak@wustl.edu)
# Please contact us if you have any questions
#
# One variable simulation of a binary dependent variable y and one independent variable x
# using a Bayesian model passed to JAGS using the rjags package and then analysed using the coda package.
# make sure that the file SIMlogit01.bug is in the R working directory
#

# install Bayesian packages if necessary
# **NOTE** install JAGS to system before installing the rjags or R2jags packages in R
#install.packages("rjags")
#install.packages("R2jags")
#install.packages("coda")
#install.packages("graphics")

# Load packages
library(rjags)
library(R2jags)
library(coda)
library(graphics)


# simulate x1 and set the "true" population values alpha and beta 
n <- 100 
x1 <- rnorm(n,0,1) 
alpha <- 1.250000 
beta <- 2.500000

# define k as the number of betas to estimate (just 1 in this simulation) 
k <- 1

# systematic component of the model 
xb <- alpha + beta * x1

# transform the linear term xb using the logit function 
# so that theta is bound from 0 to 1 
theta <- 1 / (1 + exp(-xb))

# generate the dependent variable y with theta and measurment error 
error <- runif(n,0,1) 
y <- ifelse(error < theta,1,0)

# create a function to generate initial values drawn from the prior distributions of the parameters
# the values generated from this function are passed to JAGS
inits.function <- function(chain){
  return(switch(chain,
         "1"=list(alpha=rnorm(1), beta=rnorm(k)),
         "2"=list(alpha=rnorm(1), beta=rnorm(k)),
         "3"=list(alpha=rnorm(1), beta=rnorm(k)),
         "4"=list(alpha=rnorm(1), beta=rnorm(k))
                )
         )
}

# generate variables to pass to JAGS
CHAINS <- 4
ADAPT <- 1000
BURNIN <- 5000
DRAWS <- 2000
THIN <- 10


# rjags version of call to JAGS
m <- jags.model(file="SIMlogit01.bug",  data=list("y"=y, "x"=x1, "n"=n), inits=inits.function, n.chains=CHAINS, n.adapt=ADAPT)
update(m, BURNIN)
draws <- coda.samples(m, DRAWS, variable.names=c("alpha", "beta"), THIN)

# check for convergence
gelman.diag(draws)
gelman.plot(draws)
plot(draws)

# other convergence diagnostics 
geweke.diag(draws)
heidel.diag(draws)

# compute parameter estimates from the mcmc chains
mat1 <- as.matrix(draws[[1]])
mat2 <- as.matrix(draws[[2]])
mat3 <- as.matrix(draws[[3]])
mat4 <- as.matrix(draws[[4]])
posterior.estimates <- rbind(mat1, mat2, mat3, mat4)
vars <- t(as.matrix(posterior.estimates))
parameter.mean <- apply(vars, 1, mean)
parameter.sd <- apply(vars, 1, sd)
parameter.lower.ci <- apply(vars, 1, quantile, c(0.025))
parameter.upper.ci <- apply(vars, 1, quantile, c(0.975))

# print the parameter estimates to the screen
cbind(parameter.mean, parameter.sd, parameter.lower.ci, parameter.upper.ci)

# compare to MLE estimates
model <- glm(y ~ x1, binomial(link = "logit"))
summary(model)$coefficients