hierarchical.hw.part2.R

Jim — Oct 28, 2013, 8:04 PM

# Hierarchical Modeling Homework

# The dataset proficiency.txt contains results of the 2007  
# Ohio Graduation Test for 609 school districts.  

data=read.table("http://bayes.bgsu.edu/m6480/homework/proficiency.txt",
                header=TRUE)

# Variables

#  School -- the name of the school district
#  Advanced -- the number of students who scored at an advanced level in the math component of the OGT
#  N -- the sample size

# Choose a random sample of 10 schools and put the data into sample.data:

sample.data <- data[sample(609,10), ]

# Display your data.

sample.data
                            School Advanced   N
503                  LAKE_LOCAL_SD      173 318
484               FOSTORIA_CITY_SD       25 131
404        WEST_MUSKINGUM_LOCAL_SD       58 175
255         LOGAN-HOCKING_LOCAL_SD      104 336
186       PIKE-DELTA-YORK_LOCAL_SD       36 129
530              TWINSBURG_CITY_SD      111 326
551         WEATHERSFIELD_LOCAL_SD       27  70
425                WAVERLY_CITY_SD       33 168
80         CLINTON-MASSIE_LOCAL_SD       54 151
165 WASHINGTON_COURT_HOUSE_CITY_SD       43 121

# Using the function betabinexch in the LearnBayes package, find the 
# approximate mode and variance-covariance matrix of (logit eta, log K).

library(LearnBayes)
yn <- sample.data[, -1]
fit <- laplace(betabinexch, c(0, 2), yn)

fit$mode; fit$var
          [,1]      [,2]
[1,]  0.023202 -0.008169
[2,] -0.008169  0.280368

# Use a random walk Metropolis algorithm to sample 5000 values 
# from the posterior of (logit eta, log K).  

rwfit <- rwmetrop(betabinexch,
                  list(var=fit$var, scale=2),
                  c(0, 2), 5000, yn)

# acceptance rate is about 30%

# Find the posterior median and 90% interval estimate for eta and K.

dimnames(rwfit$par)[[2]] <- c("logit eta", "log K")

apply(rwfit$par, 2, quantile, c(0.05, 0.50, 0.95))
    logit eta log K
5%    -0.9606 1.885
50%   -0.6839 2.890
95%   -0.3881 3.717

# Find the posterior mean of the shrinkage K/(ni + K) for all schools.

shrink.j <- function(j){
  K <- exp(rwfit$par[, 2])
  mean(K / (K + yn[j, 2]))
}

round(sapply(1:10, shrink.j), 3)
 [1] 0.058 0.128 0.100 0.055 0.130 0.057 0.212 0.104 0.114 0.137

# Simulate 5000 draws from the posterior of each success 
# probability pi.  Construct 90% interval estimates for each 
# probability.

post.prob <- function(j){
  log.K <- rwfit$par[, 2]
  logit.eta <- rwfit$par[, 1]
  y <- yn[, 1]
  n <- yn[, 2]
  K <- exp(log.K)
  eta <- exp(logit.eta) / (1 + exp(logit.eta))
  pj <- rbeta(5000, y[j] + K * eta, n[j] - y[j] + K * (1 - eta) )
  quantile(pj, c(0.05, 0.95))
}

t(sapply(1:10, post.prob))
          5%    95%
 [1,] 0.4857 0.5778
 [2,] 0.1541 0.2663
 [3,] 0.2774 0.3892
 [4,] 0.2715 0.3513
 [5,] 0.2270 0.3516
 [6,] 0.2991 0.3829
 [7,] 0.2925 0.4614
 [8,] 0.1621 0.2624
 [9,] 0.2947 0.4168
[10,] 0.2876 0.4198

###########################################################