Jim — Nov 19, 2013, 4:06 PM
# regression example
# use Prestige example from Fox and Weisberg's R book
library(car)
# Consider dataset Prestige of Canadian Occupations
###################################################################
# The Prestige data frame has 102 rows and 6 columns. The observations
# are occupations.
# This data frame contains the following columns:
# education:
# Average education of occupational incumbents, years, in 1971.
# income:
# Average income of incumbents, dollars, in 1971.
# women:
# Percentage of incumbents who are women.
####################################################################
# standard multiple regression fit
fit <- lm(prestige ~ education + log2(income) + women,
data=Prestige)
fit
Call:
lm(formula = prestige ~ education + log2(income) + women, data = Prestige)
Coefficients:
(Intercept) education log2(income) women
-110.9658 3.7305 9.3147 0.0469
# show some graphs
plot(fit)
# get data in right format for blinreg
# define response vector y and design matrix X
y <- Prestige$prestige
X <- with(Prestige, cbind(1, education, log2(income), women))
dimnames(X)[[2]][3] <- "log2.income"
library(LearnBayes)
# simulate 50000 draws from posterior distribution
# assuming a noninformative prior on (beta, sigma^2)
bfit <- blinreg(y, X, 50000)
# look at plots of marginal posteriors of beta2, beta3, beta4
par(mfrow=c(3, 1))
for(j in 1:3)
plot(density(bfit$beta[, j+1]), main=paste("Beta", j + 1))
par(mfrow=c(1, 1))
# compute posterior means and sds of regression coef
apply(bfit$beta, 2, mean)
X Xeducation Xlog2.income Xwomen
-110.93108 3.73298 9.31025 0.04679
apply(bfit$beta, 2, sd)
X Xeducation Xlog2.income Xwomen
14.97011 0.35772 1.33790 0.03009
# estimate of sigma
mean(bfit$sigma)
[1] 7.145
sd(bfit$sigma)
[1] 0.5147
# illustrate estimation and prediction of a linear combo
# suppose we are interested in the prestige for an occupation
# where the average education is 10 years, the average income
# (in log2 dollars) is 13, and the percentage of women is 20
covariates <- matrix(c(1, 10, 13, 20), 1, 4)
# function blinregexpected will simulate draws of the
# expected prestige
expected.lp <- blinregexpected(covariates, bfit)
library(coda)
Loading required package: lattice
summary(mcmc(expected.lp))
Iterations = 1:50000
Thinning interval = 1
Number of chains = 1
Sample size per chain = 50000
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
48.36790 1.01001 0.00452 0.00452
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
46.4 47.7 48.4 49.0 50.3
# suppose we are interested in predicting the prestige for a new
# occupation with same set of covariates
predicted.lp <- blinregpred(covariates, bfit)
library(coda)
summary(mcmc(predicted.lp))
Iterations = 1:50000
Thinning interval = 1
Number of chains = 1
Sample size per chain = 50000
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
48.3286 7.2099 0.0322 0.0320
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
34.2 43.5 48.3 53.1 62.4
# note the differences between the 95% interval estimate for E(y)
# and the 95% prediction interval for future y
# model selection
# suppose we are interested in selecting the best model
# function bayes.model.selection will compute the estimate of log f
# and the posterior model probability for each of 2^3 = 8 possible
# models
bayes.model.selection(y, X, 100)
$mod.prob
education log2.income women log.m Prob
1 FALSE FALSE FALSE -442.1 0.0000
2 TRUE FALSE FALSE -388.5 0.0000
3 FALSE TRUE FALSE -408.5 0.0000
4 TRUE TRUE FALSE -372.7 0.8163
5 FALSE FALSE TRUE -443.8 0.0000
6 TRUE FALSE TRUE -386.5 0.0000
7 FALSE TRUE TRUE -400.3 0.0000
8 TRUE TRUE TRUE -374.2 0.1837
$converge
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
# this indicates that the best model is (education, log2 income)