Jim — Oct 28, 2013, 8:04 PM
# Hierarchical Modeling Homework
# The dataset proficiency.txt contains results of the 2007
# Ohio Graduation Test for 609 school districts.
data=read.table("http://bayes.bgsu.edu/m6480/homework/proficiency.txt",
header=TRUE)
# Variables
# School -- the name of the school district
# Advanced -- the number of students who scored at an advanced level in the math component of the OGT
# N -- the sample size
# Choose a random sample of 10 schools and put the data into sample.data:
sample.data <- data[sample(609,10), ]
# Display your data.
sample.data
School Advanced N
503 LAKE_LOCAL_SD 173 318
484 FOSTORIA_CITY_SD 25 131
404 WEST_MUSKINGUM_LOCAL_SD 58 175
255 LOGAN-HOCKING_LOCAL_SD 104 336
186 PIKE-DELTA-YORK_LOCAL_SD 36 129
530 TWINSBURG_CITY_SD 111 326
551 WEATHERSFIELD_LOCAL_SD 27 70
425 WAVERLY_CITY_SD 33 168
80 CLINTON-MASSIE_LOCAL_SD 54 151
165 WASHINGTON_COURT_HOUSE_CITY_SD 43 121
# Using the function betabinexch in the LearnBayes package, find the
# approximate mode and variance-covariance matrix of (logit eta, log K).
library(LearnBayes)
yn <- sample.data[, -1]
fit <- laplace(betabinexch, c(0, 2), yn)
fit$mode; fit$var
[,1] [,2]
[1,] 0.023202 -0.008169
[2,] -0.008169 0.280368
# Use a random walk Metropolis algorithm to sample 5000 values
# from the posterior of (logit eta, log K).
rwfit <- rwmetrop(betabinexch,
list(var=fit$var, scale=2),
c(0, 2), 5000, yn)
# acceptance rate is about 30%
# Find the posterior median and 90% interval estimate for eta and K.
dimnames(rwfit$par)[[2]] <- c("logit eta", "log K")
apply(rwfit$par, 2, quantile, c(0.05, 0.50, 0.95))
logit eta log K
5% -0.9606 1.885
50% -0.6839 2.890
95% -0.3881 3.717
# Find the posterior mean of the shrinkage K/(ni + K) for all schools.
shrink.j <- function(j){
K <- exp(rwfit$par[, 2])
mean(K / (K + yn[j, 2]))
}
round(sapply(1:10, shrink.j), 3)
[1] 0.058 0.128 0.100 0.055 0.130 0.057 0.212 0.104 0.114 0.137
# Simulate 5000 draws from the posterior of each success
# probability pi. Construct 90% interval estimates for each
# probability.
post.prob <- function(j){
log.K <- rwfit$par[, 2]
logit.eta <- rwfit$par[, 1]
y <- yn[, 1]
n <- yn[, 2]
K <- exp(log.K)
eta <- exp(logit.eta) / (1 + exp(logit.eta))
pj <- rbeta(5000, y[j] + K * eta, n[j] - y[j] + K * (1 - eta) )
quantile(pj, c(0.05, 0.95))
}
t(sapply(1:10, post.prob))
5% 95%
[1,] 0.4857 0.5778
[2,] 0.1541 0.2663
[3,] 0.2774 0.3892
[4,] 0.2715 0.3513
[5,] 0.2270 0.3516
[6,] 0.2991 0.3829
[7,] 0.2925 0.4614
[8,] 0.1621 0.2624
[9,] 0.2947 0.4168
[10,] 0.2876 0.4198
###########################################################