# SIMlogit.R # # by Chris Fariss # # One variable simulation of a binary dependent variable y and one independent variable x # using (1) a user defined numerical function, (2) a user defined function optimized with the # optim function in R, (3) the generalized linear model function glm in R #install.packages("graphics") library(graphics) # simulate x1 and set the "true" population values alpha and beta n <- 100 x1 <- rnorm(n,0,1) alpha <- 1.250000 beta <- 2.500000 # systematic component of the model xb <- alpha + beta * x1 # transform the linear term xb using the logit function so that theta is bound from 0 to 1 theta <- 1 / (1 + exp(-xb)) # generate the dependent variable y with theta and measurment error error <- runif(n,0,1) y <- ifelse(error < theta,1,0) # column bind all x variables (there is only one x in this version of the simulation) x <- cbind(x1) # generate the number of k variables in the regression k <- ncol(x) # column bind the x variables to a vector of 1s to form the X-variable matrix X <- cbind(1, x) # create a likelihood-matrix of possible values for beta0 and beta1 as the corrdinates within that matrix b0 <- seq(-5, 5, 0.1) # these sequences should technically range from -Inf to Inf b1 <- seq(-5, 5, 0.1) # but we know that the parameter values that maximize the likelihood lie within their range likelihood <- matrix(NA, nrow=length(b0), ncol=length(b1)) # fill in the values of the matrix using the coordiates in b0 and b1 as possible parameter values that will maximize the likelihood for(i in 1:length(b0)){ for(j in 1:length(b1)){ betas <- rbind(b0[i], b1[j]) mu <- X %*% betas likelihood[i,j] <- -sum(log(1 + (exp((1-(2*y))*mu)))) } } # print the maximum value from the matrix, which should approximate the maximum likelihood estimate max(likelihood) # determine the coordinates of the maximum value within the matrix # these coordiates are also the parameters estimates for the values of alpha and beta that maximize the likelihood function coordinates <- NA for(i in 1:length(b0)){ for(j in 1:length(b1)){ if(max(likelihood)==likelihood[i,j]){ coordinates <- c(i,j) } } } # find the coordinates from the matrix where the likelihood resides max(likelihood) == likelihood[coordinates[1], coordinates[2]] # the coordinates of the likelihood matrix are the parameters alpha and beta that maximize the likelihood function par <- c(b0[coordinates[1]], b1[coordinates[2]]) # print the maximum likelihood estimates par # a faster alternative for finding the maximum value within the matrix coordinates <- which(max(likelihood)==likelihood, arr.ind = TRUE) par <- c(b0[coordinates[1]], b1[coordinates[2]]) par # use the optim function R for faster and more precise results # create a function that is passed to the optim function below # par is the initial parameter values that optim starts with to maximize the function # X is the matrix of covariates with a column of 1s, y is the observed dependent variable # out the value of the log likelihood function that is returned by the function logit.ll <- function(par, X, y){ betas <- par[1:ncol(X)] mu <- X %*% betas out <- -sum(log(1 + (exp((1-(2*y))*mu)))) return(out) } # pass the user defind function to optim with 0s as starting values for each parameter estimate optim.out <- optim(par = rep(0,ncol(X)), logit.ll, y = y, X = X, method="BFGS", control=list(fnscale = -1), hessian = TRUE) se <- sqrt(diag(solve(-optim.out$hessian))) #calculate standard errors VCV <- solve(-optim.out$hessian) #compute variance-covariance matrix # print the maximum likelihood, parameter estimates and standard errors of the parameter estimates optim.out$value optim.out$par se # check the values using the glm function model <- glm(y ~ x1, binomial(link = "logit")) logLik(model) summary(model)$coefficients # plot the likelihood function in space # 3D plot persp(b0, b1, likelihood, theta = 45, phi = 30, shade=NA, col=5) # 2D plots par(mfrow=c(1,2)) plot(b0, likelihood[,coordinates[2]], type="l", ylim=c(min(likelihood), max(likelihood)), lwd=3, col=4, xlab="alpha", ylab="Likelihood Function") points(b0[coordinates[1]], likelihood[coordinates[1],coordinates[2]], col=2, pch=19) plot(b1, likelihood[coordinates[1],], type="l", ylim=c(min(likelihood), max(likelihood)), lwd=3, col=4, xlab="beta", ylab="Likelihood Function") points(b1[coordinates[2]], likelihood[coordinates[1],coordinates[2]], col=2, pch=19) # reset the plot window par(mfrow=c(1,1))