Chapter 9 Regression Models
9.1 An Example of Bayesian Regression
library(LearnBayes)
<- log(birdextinct$time)
logtime plot(birdextinct$nesting, logtime)
<- (logtime > 3)
out text(birdextinct$nesting[out], logtime[out],
label=birdextinct$species[out], pos = 2)
plot(jitter(birdextinct$size), logtime,
xaxp=c(0, 1, 1))
plot(jitter(birdextinct$status), logtime,
xaxp=c(0, 1, 1))
Least-squares fit:
<- lm(logtime ~ nesting + size + status,
fit data=birdextinct, x=TRUE, y=TRUE)
summary(fit)
##
## Call:
## lm(formula = logtime ~ nesting + size + status, data = birdextinct,
## x = TRUE, y = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8410 -0.2932 -0.0709 0.2165 2.5167
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.43087 0.20706 2.081 0.041870 *
## nesting 0.26501 0.03679 7.203 1.33e-09 ***
## size -0.65220 0.16667 -3.913 0.000242 ***
## status 0.50417 0.18263 2.761 0.007712 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6524 on 58 degrees of freedom
## Multiple R-squared: 0.5982, Adjusted R-squared: 0.5775
## F-statistic: 28.79 on 3 and 58 DF, p-value: 1.577e-11
Sampling from posterior using vague priors for parameters.
<- blinreg(fit$y, fit$x, 5000) theta.sample
par(mfrow=c(2,2))
hist(theta.sample$beta[,2], main="NESTING",
xlab=expression(beta[1]))
hist(theta.sample$beta[,3], main="SIZE",
xlab=expression(beta[2]))
hist(theta.sample$beta[,4], main="STATUS",
xlab=expression(beta[3]))
hist(theta.sample$sigma, main="ERROR SD",
xlab=expression(sigma))
apply(theta.sample$beta, 2, quantile,
c(.05, .5, .95))
## X(Intercept) Xnesting Xsize Xstatus
## 5% 0.08301736 0.2031766 -0.9385281 0.1871552
## 50% 0.42537546 0.2639260 -0.6541445 0.5055329
## 95% 0.79519902 0.3258697 -0.3713111 0.8134480
quantile(theta.sample$sigma, c(.05, .5, .95))
## 5% 50% 95%
## 0.5667969 0.6573449 0.7703726
Estimating mean extinction times:
<- c(1, 4, 0, 0)
cov1 <- c(1, 4, 1, 0)
cov2 <- c(1, 4, 0, 1)
cov3 <- c(1, 4, 1, 1)
cov4 <- rbind(cov1, cov2, cov3, cov4)
X1 <- blinregexpected(X1, theta.sample) mean.draws
<- c("A", "B", "C", "D")
c.labels par(mfrow=c(2, 2))
for (j in 1:4){
hist(mean.draws[, j],
main=paste("Covariate set",
c.labels[j]),xlab="log TIME")
}
Predicting future extinction times:
<- c(1, 4, 0, 0)
cov1 <- c(1, 4, 1, 0)
cov2 <- c(1, 4, 0, 1)
cov3 <- c(1, 4, 1, 1)
cov4 <- rbind(cov1, cov2, cov3, cov4)
X1 <- blinregpred(X1, theta.sample) pred.draws
<- c("A", "B", "C", "D")
c.labels par(mfrow=c(2,2))
for (j in 1:4){
hist(pred.draws[, j],
main=paste("Covariate set",
c.labels[j]),xlab="log TIME")
}
Model checking: posterior predictive distribution distributions of each future observation, showing actual observation as solid dot.
<- blinregpred(fit$x, theta.sample)
pred.draws <- apply(pred.draws, 2,
pred.sum c(.05,.95))
quantile, par(mfrow=c(1, 1))
<- 1:length(logtime)
ind matplot(rbind(ind, ind), pred.sum,
type="l", lty=1, col=1,
xlab="INDEX", ylab="log TIME")
points(ind, logtime, pch=19)
<- (logtime > pred.sum[2, ])
out text(ind[out], logtime[out],
label=birdextinct$species[out], pos = 4)
Model checking via bayes residuals \(y_i - x_i \beta\). Graph of absolute values of residuals that exceeds a particular constant.
<- bayesresiduals(fit, theta.sample, 2)
prob.out par(mfrow=c(1, 1))
plot(birdextinct$nesting, prob.out)
= (prob.out > 0.35)
out text(birdextinct$nesting[out], prob.out[out],
label=birdextinct$species[out], pos = 4)
9.2 Modeling Using Zellner’s g Prior
Illustrating the role of the parameter c:
<- cbind(1, puffin$Distance -
X mean(puffin$Distance))
<- c(0.1, 0.5, 5, 2)
c.prior <- vector("list", 4)
fit for (j in 1:4){
<- list(b0=c(8, 0), c0=c.prior[j])
prior <- blinreg(puffin$Nest, X, 1000, prior)
fit[[j]]
}<- NULL
BETA for (j in 1:4){
=data.frame(Prior=paste("c =",
sas.character(c.prior[j])),
beta0=fit[[j]]$beta[, 1],
beta1=fit[[j]]$beta[, 2])
<- rbind(BETA, s)
BETA
}library(lattice)
with(BETA,
xyplot(beta1 ~ beta0 | Prior,
type=c("p","g"),
col="black"))
Model selection of all regression models using g priors:
<- list(y=puffin$Nest,
data X=cbind(1, puffin$Grass, puffin$Soil))
<- list(b0=c(0, 0, 0), c0=100)
prior <- with(puffin,
beta.start lm(Nest ~ Grass + Soil)$coef)
laplace(reg.gprior.post,
c(beta.start, 0),
list(data=data, prior=prior))$int
## [1] -136.3957
<- puffin[, -1]
X <- puffin$Nest
y <- 100
c bayes.model.selection(y, X, c, constant=FALSE)
## $mod.prob
## Grass Soil Angle Distance log.m Prob
## 1 FALSE FALSE FALSE FALSE -132.18 0.00000
## 2 TRUE FALSE FALSE FALSE -134.05 0.00000
## 3 FALSE TRUE FALSE FALSE -134.51 0.00000
## 4 TRUE TRUE FALSE FALSE -136.40 0.00000
## 5 FALSE FALSE TRUE FALSE -112.67 0.00000
## 6 TRUE FALSE TRUE FALSE -113.18 0.00000
## 7 FALSE TRUE TRUE FALSE -114.96 0.00000
## 8 TRUE TRUE TRUE FALSE -115.40 0.00000
## 9 FALSE FALSE FALSE TRUE -103.30 0.03500
## 10 TRUE FALSE FALSE TRUE -105.57 0.00360
## 11 FALSE TRUE FALSE TRUE -100.37 0.65065
## 12 TRUE TRUE FALSE TRUE -102.35 0.08992
## 13 FALSE FALSE TRUE TRUE -102.81 0.05682
## 14 TRUE FALSE TRUE TRUE -105.09 0.00581
## 15 FALSE TRUE TRUE TRUE -101.88 0.14386
## 16 TRUE TRUE TRUE TRUE -104.19 0.01434
##
## $converge
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE
9.3 Survival Modeling
Traditional fit using a Weibull model:
library(survival)
survreg(Surv(time, status) ~ factor(treat) + age,
dist="weibull",
data = chemotherapy)
## Call:
## survreg(formula = Surv(time, status) ~ factor(treat) + age, data = chemotherapy,
## dist = "weibull")
##
## Coefficients:
## (Intercept) factor(treat)2 age
## 10.98683919 0.56145663 -0.07897718
##
## Scale= 0.5489202
##
## Loglik(model)= -88.7 Loglik(intercept only)= -98
## Chisq= 18.41 on 2 degrees of freedom, p= 0.000101
## n= 26
Bayesian fit:
<- c(-.5, 9, .5, -.05)
start <- with(chemotherapy,
d cbind(time, status, treat - 1, age))
<- laplace(weibullregpost, start, d)
fit fit
## $mode
## [1] -0.59986796 10.98663371 0.56151088 -0.07897316
##
## $var
## [,1] [,2] [,3] [,4]
## [1,] 0.057298875 0.13530436 0.004541435 -0.0020828431
## [2,] 0.135304360 1.67428176 -0.156631948 -0.0255278352
## [3,] 0.004541435 -0.15663195 0.115450201 0.0017880712
## [4,] -0.002082843 -0.02552784 0.001788071 0.0003995202
##
## $int
## [1] -25.31207
##
## $converge
## [1] TRUE
<- list(var=fit$var, scale=1.5)
proposal <- rwmetrop(weibullregpost,
bayesfit
proposal,$mode,
fit10000, d)
$accept bayesfit
## [1] 0.2849
par(mfrow=c(2, 2))
<- exp(bayesfit$par[, 1])
sigma <- bayesfit$par[, 2]
mu <- bayesfit$par[, 3]
beta1 <- bayesfit$par[, 4]
beta2 hist(beta1, xlab="treatment", main="")
hist(beta2, xlab="age", main="")
hist(sigma, xlab="sigma", main="")