# SIMlogit.R 

#
# by Chris Fariss
#

# One variable simulation of a binary dependent variable y and one independent variable x
# using (1) a user defined numerical function, (2) a user defined function optimized with the
# optim function in R, (3) the generalized linear model function glm in R

#install.packages("graphics")
library(graphics)

# simulate x1 and set the "true" population values alpha and beta
n <- 100
x1 <- rnorm(n,0,1)
alpha <- 1.250000
beta <- 2.500000

# systematic component of the model
xb <-  alpha + beta * x1

# transform the linear term xb using the logit function so that theta is bound from 0 to 1
theta <-  1 / (1 + exp(-xb))

# generate the dependent variable y with theta and measurment error
error <- runif(n,0,1)
y <- ifelse(error < theta,1,0)

# column bind all x variables (there is only one x in this version of the simulation)
x <- cbind(x1)

# generate the number of k variables in the regression
k <- ncol(x) 

# column bind the x variables to a vector of 1s to form the X-variable matrix
X <- cbind(1, x)

# create a likelihood-matrix of possible values for beta0 and beta1 as the corrdinates within that matrix
b0 <- seq(-5, 5, 0.1)  # these sequences should technically range from -Inf to Inf
b1 <- seq(-5, 5, 0.1)  # but we know that the parameter values that maximize the likelihood lie within their range
likelihood <- matrix(NA, nrow=length(b0), ncol=length(b1))

# fill in the values of the matrix using the coordiates in b0 and b1 as possible parameter values that will maximize the likelihood
for(i in 1:length(b0)){
  for(j in 1:length(b1)){ 
    betas <- rbind(b0[i], b1[j])
    mu <- X %*% betas
    likelihood[i,j] <- -sum(log(1 + (exp((1-(2*y))*mu))))
  }
}

# print the maximum value from the matrix, which should approximate the maximum likelihood estimate
max(likelihood)

# determine the coordinates of the maximum value within the matrix
# these coordiates are also the parameters estimates for the values of alpha and beta that maximize the likelihood function
coordinates <- NA
for(i in 1:length(b0)){
  for(j in 1:length(b1)){
    if(max(likelihood)==likelihood[i,j]){
      coordinates <- c(i,j)
    }
  }
}

# find the coordinates from the matrix where the likelihood resides
max(likelihood) == likelihood[coordinates[1], coordinates[2]]

# the coordinates of the likelihood matrix are the parameters alpha and beta that maximize the likelihood function
par <- c(b0[coordinates[1]], b1[coordinates[2]])

# print the maximum likelihood estimates
par

# a faster alternative for finding the maximum value within the matrix
coordinates <- which(max(likelihood)==likelihood, arr.ind = TRUE)
par <- c(b0[coordinates[1]], b1[coordinates[2]])
par


# use the optim function R for faster and more precise results

# create a function that is passed to the optim function below
# par is the initial parameter values that optim starts with to maximize the function
# X is the matrix of covariates with a column of 1s, y is the observed dependent variable
# out the value of the log likelihood function that is returned by the function
logit.ll <- function(par, X, y){
 betas <- par[1:ncol(X)]
 mu <- X %*% betas
 out <- -sum(log(1 + (exp((1-(2*y))*mu))))
 return(out)
}

# pass the user defind function to optim with 0s as starting values for each parameter estimate
optim.out <- optim(par = rep(0,ncol(X)), logit.ll, y = y, X = X, method="BFGS", control=list(fnscale = -1), hessian = TRUE)

se <- sqrt(diag(solve(-optim.out$hessian))) #calculate standard errors
VCV <- solve(-optim.out$hessian) #compute variance-covariance matrix

# print the maximum likelihood, parameter estimates and standard errors of the parameter estimates
optim.out$value
optim.out$par
se

# check the values using the glm function
model <- glm(y ~ x1, binomial(link = "logit"))
logLik(model)
summary(model)$coefficients

# plot the likelihood function in space 
# 3D plot
persp(b0, b1,  likelihood, theta = 45, phi = 30, shade=NA, col=5)

# 2D plots
par(mfrow=c(1,2))
plot(b0, likelihood[,coordinates[2]], type="l", ylim=c(min(likelihood), max(likelihood)), lwd=3, col=4, xlab="alpha", ylab="Likelihood Function")
points(b0[coordinates[1]], likelihood[coordinates[1],coordinates[2]], col=2, pch=19)

plot(b1, likelihood[coordinates[1],], type="l", ylim=c(min(likelihood), max(likelihood)), lwd=3, col=4, xlab="beta", ylab="Likelihood Function")
points(b1[coordinates[2]], likelihood[coordinates[1],coordinates[2]], col=2, pch=19)

# reset the plot window
par(mfrow=c(1,1))