#R-help to exercise 5 in BSS
# QUESTION a)
# Read the data into a dataframe, give names to the variables, and inspect the data:
salinity<-read.table("http://www.math.uio.no/avdc/kurs/STK4900/data/exer5.dat")
names(salinity)<-c("salt","saltprev","trend","discharge")
salinity
# Check that the data correspond to those given in the exercise.
# Get an overview of the data:
summary(salinity)
plot(salinity)
# Make sure that you understand what the summary measures tell you!
# What do you see from the scatter plots?
# Attach the dataframe
attach(salinity)
# Do linear regression with all three covariates and inspect the results:
lmfull<-lm(salt~saltprev+trend+discharge)
summary(lmfull)
# How important are each of the covariates? How does this agree with the scatter plots?
# QUESTION ?b)
# Compute fitted values and residuals::
saltfit<-lmfull$fit
saltres<-salt-saltfit
summary(saltres)
# You may get the residuals directly as "lmfull$res". Check that this is the case:
summary(lmfull$res)
#(The sum of the residuals is zero, so small differences in their means are due to rounding.)
# QUESTION ?c)
# We will make various plots of the residuals
# Histogram and Q-Q plot (make one plot at a time)
hist(lmfull$res)
qqnorm(lmfull$res)
# What do the plots tell you?
# Residuals versus fitted values:
plot(lmfull$fit, lmfull$res, xlab="Fitted values", ylab="Residuals")
# What does the plot tell you?
# Residuals versus each of the covariates (make one at a time):
plot(saltprev, lmfull$res, ylab="Residuals")
plot(trend, lmfull$res, ylab="Residuals")
plot(discharge, lmfull$res, ylab="Residuals")
# What do the plots tell you? Are there indications of deviation from linearity?
# Residuals versus observation number (we do not know if ?the data are recorded in the order they are given, which would be the case in a real study):
plot(lmfull$res, xlab="obs.number", ylab="residuals")
# What do the plot tell you?
#Can you see any signs of autocorrelation?
# There are a number of other useful plots for the residuals.
# You get a number of plots (some of those just given and some others) by the command
# (the command may give six different plots ? we show the four first of these):
par(mfrow=c(2,2))
plot(lmfull, 1:4)
par(mfrow=c(1,1))
# Try to understand what each of the plots tells you!.
# QUESTION d)
# Fit a model without trend? (why?):
lmred1<-lm(salt~saltprev+discharge)
summary(lmred1)
# How is this model compared with the one that also includes trend?
# Try yourself other models (e.g. with second order term for discharge)
# Which model would you suggest to use for predicting salinity?