R Scripts for Chapter 7
# Script 7.1 - First example
source("C:/R/functions.txt")
library(car)
# Reading the data
chap7.ex1 <- read.table("c:/rbook/chap6.ex1.txt", header = TRUE)
attach(chap7.ex1)
# Drawing Figure 7.5
plot(jitter(wkdaded, factor = 1), jitter(wkmomed, factor = 1),
pch = 20, xlim = c(0, 10), ylim = c(0, 10))
# Drawing Figure 7.6
plot(jitter(wkdaded,factor=1),jitter(wkmomed,factor=1),pch=20,xlim=c(0,10),ylim=c(0,10))
abline(3.0,.5)
abline(0,1)
text(2,7, "Line 1")
text(8,4, "Line 2")
arrows(1.9, 6.2, 1.9, 4, code = 2, length = .15, angle = 20)
arrows(8, 4.5, 8, 6.7, code = 2, length = .15, angle = 20)
# Finding the regression equation
reg <- lm(wkmomed~wkdaded)
reg
# Adding the predicted values and residuals to the data frame
chap7.ex1$pred <- fitted(reg)
chap7.ex1$res <- resid(reg)
names(chap7.ex1)
attach(chap7.ex1)
pred
res
# Looking at the intercorrelations
cor.mat <- cov2cor(cov(chap7.ex1))
print(cor.mat, digits = 3)
# Reversing the IV and DV
reg.rev <- lm(wkdaded~wkmomed)
reg.rev
# Plotting the confidence and prediction intervals: Figure 7.10
interval.frame <- data.frame(wkdaded = 0:10)
pp <- predict(reg, int = "p", newdata = interval.frame)
pc <- predict(reg, int = "c", newdata = interval.frame)
plot(jitter(wkdaded), jitter(wkmomed), ylim = c(0, 10),
xlim = c(0, 10), pch = 16)
pred.wkmomed <- interval.frame$wkdaded
matlines(pred.wkmomed, pc, lty = c(1, 2, 2), col = "black")
matlines(pred.wkmomed, pp, lty = c(1, 3, 3), col = "black")
chap7.ex1$z.wkmomed <- scale(wkmomed)
chap7.ex1$z.wkdaded <- scale(wkdaded)
attach(chap7.ex1)
z.reg <- lm(z.wkmomed~z.wkdaded)
z.reg
# Script 7.2 - Plotting Figure 7.7, the SSE as a function of intercept and slope
# Clean out workspace
rm(list = ls())
library(MASS)
library(lattice)
errors <- read.table("c:/rbook/sse.txt", header = TRUE)
names(errors)
attach(errors)
wireframe(sse ~ b0*b1, scales = list(arrows=FALSE,
cex = .45,
font = 3, tck = 1),
screen = list(z = -245, x = -50, y = 0),shade = FALSE, aspect = c(87/87, 1.0))
# Script 7.3 - Complete example with Males and Females
# Clear out workspace
rm(list = ls())
library(car)
source("C:/R/functions.txt")
# Read in the data
ecls200 <- read.table("c:/rbook/ecls200.txt", header = TRUE)
attach(ecls200)
names(ecls200)
# Create factor variable with labels
ecls200$f.gender <- factor(gender, levels = 1:2, labels =
c("Male", "Female"), ordered = TRUE)
# Create separate data frames by gender
ecls200.female <- subset(ecls200, f.gender == "Female")
ecls200.male <- subset(ecls200, f.gender == "Male")
attach(ecls200.male)
attach(ecls200.female)
attach(ecls200)
# Looking at General Knowledge measured Fall-K
tapply(c1rgscal, f.gender, length)
tapply(c1rgscal, f.gender, table)
tapply(c1rgscal, f.gender, summary)
tapply(c1rgscal, f.gender, sd)
tapply(c1rgscal, f.gender, skewness)
tapply(c1rgscal, f.gender, SEsk)
tapply(c1rgscal, f.gender, kurtosis)
tapply(c1rgscal, f.gender, SEku)
boxplot(c1rgscal ~ f.gender, ylab = "Fall-K General Knowledge Scores")
# Looking at Reading Scores measured Spring-1
tapply(c4rrscal, f.gender, length)
tapply(c4rrscal, f.gender, table)
tapply(c4rrscal, f.gender, summary)
tapply(c4rrscal, f.gender, sd)
tapply(c4rrscal, f.gender, skewness)
tapply(c4rrscal, f.gender, SEsk)
tapply(c4rrscal, f.gender, kurtosis)
tapply(c4rrscal, f.gender, SEku)
boxplot(c4rrscal ~ f.gender, ylab = "Spring-1 Reading Scores")
# Produce a scatterplot with subgroups
# Set plotting character to 1 for all cases
pchval <- rep(1, length = length(f.gender))
# Set plotting character to 18 for Males
pchval[f.gender == "Male"] <- 18
plot(jitter(c1rgscal, factor = 1), jitter(c4rrscal, factor = 1),
pch = pchval)
legend(32, 28, c("Male", "Female"), pch = c(18, 1))
legend(32, 40, c("Male", "Female"), lty = c(1, 2))
with(ecls200.male,abline(lm(c4rrscal ~ c1rgscal), lty = 1))
with(ecls200.female,abline(lm(c4rrscal~c1rgscal), lty = 2))
attach(ecls200.female)
mod.female <- lm (c4rrscal ~ c1rgscal)
mod.female
ecls200.female$res <- resid(mod.female)
attach(ecls200.female)
ecls200.female$z.res <- scale(res)
attach(ecls200.female)
range(z.res)
# Look at residuals)
plot(z.res, ylim = c(-5, 5))
abline(h=2.5, lty = 2)
abline(h= - 2.5, lty = 2)
which(z.res < -2)
which(z.res > 2)
# Look at Cook's D
# cook.d() function updated to cooks.distance()
denom <- length(res) - 2
plot(cooks.distance(mod.female))
abline(h = 4/denom, lty = 2)
which(cooks.distance(mod.female) > (4/denom))
detach(ecls200.female)
attach(ecls200.male)
mod.male <- lm (c4rrscal ~ c1rgscal)
mod.male
ecls200.male$res <- resid(mod.male)
attach(ecls200.male)
ecls200.male$z.res <- scale(res)
attach(ecls200.male)
range(z.res)
# Look at residuals)
plot(z.res, ylim = c(-5, 5))
abline(h = 2.5, lty = 2)
abline(h = - 2.5, lty = 2)
which(z.res < -2)
which(z.res > 2)
# Look at Cook's D
denom <- length(res) - 2
plot(cooks.distance(mod.male))
abline(h=4/denom, lty = 2)
which(cooks.distance(mod.male) > (4/denom))
detach(ecls200.male)
# Looking at the two groups as one group
attach(ecls200)
mod.ecls200 <- lm (c4rrscal ~ c1rgscal)
mod.ecls200
ecls200$res <- resid(mod.ecls200)
attach(ecls200)
ecls200$z.res <- scale(res)
attach(ecls200)
range(z.res)
# Look at residuals
plot(z.res, ylim = c(-5,5))
abline(h = 2.5, lty = 2)
abline(h= - 2.5, lty = 2)
which(z.res < -2)
which(z.res > 2)
# Look at Cook's D
denom <- length(res) - 2
plot(cooks.distance(mod.ecls200))
abline(h = 4/denom, lty = 2)
which(cooks.distance(mod.ecls200) > (4/denom))
# Script 7.4 - Solution to Exercises
rm(list = ls())
library(car)
source("C:/R/functions.txt")
# Read the data from Exercise 6.3
ex7.3 <- read.table("c:/rbook/chap6.ex3.txt", header = TRUE)
attach(ex7.3)
# Calculate predicted values
ex7.3$yhat <- -5.69 + .0677*iq
attach(ex7.3)
# Estimate errors
ex7.3$error <- gpa - yhat
attach(ex7.3)
# Find squared errors
ex7.3$error.sq <- error^2
attach(ex7.3)
# Examining the results
ex7.3
# Finding the sums
sum(gpa)
sum(yhat)
sum(error)
sum(error.sq)
# Using lm function in R to do analysis
model <- lm(gpa ~ iq)
summary(model)
anova(model)
Share with your friends: |