#Exercise 15
car = read.table("http://www.uio.no/studier/emner/matnat/math/STK3100/h14/data/car.txt", header=T, sep=c(","), dec=".")

car0 = car[car$claimcst0>0,]                    #pick out only the positive claims.
car0$agecat = as.factor(car0$agecat)
car0$gender = as.factor(car0$gender)
car0$area   = as.factor(car0$area)


### e)
# Fit the GLM model:
# 1) response distribution = inverse Gaussian,
# 2) link function = a log link,
# 3) the explanatory variables = driver’s age, gender and area.

fit = glm(claimcst0 ~ agecat+gender+area, data=car0, family=inverse.gaussian(link="log"))
summary(fit)
#2(l_sat - l_0)   = Null deviance:     6.4422  on 4623  degrees of freedom
#2(l_sat - l_hat) = Residual deviance: 6.3765  on 4612  degrees of freedom
### l_sat, l_0, l_hat  is the log likelihood for respectively the saturated model, model with only the intercept, our model.


#### f) test whether gender is significant:
#Wald test: Directly from the summary(fit) we find
beta_hat    = 0.15283
sd_beta_hat = 0.05119
test_statistic = (beta_hat/sd_beta_hat)^2
pValue      = pchisq( test_statistic ,  df=1, lower.tail=F )
#reject H0, gender is significant.

#LR-test:
fit0.restricted = glm(claimcst0~agecat+area,data=car0, family=inverse.gaussian(link="log"))

LRT = 2*(logLik(fit) - logLik(fit0.restricted))
pValue = pchisq( as.numeric(LRT) ,  df=1, lower.tail=F )
#reject H0, gender is significant.


#g, test whether driver’s age is significant:
# Since age is a factor with 6 levels, we need more information than what is written in the summary(fit) to do the Wald test. We need the covariance matrix (inverse fisher information).

# The LRT:
fit0.restricted2 = glm(claimcst0~gender+area,data=car0, family=inverse.gaussian(link="log"))

LRT = 2*(logLik(fit) - logLik(fit0.restricted2))
pValue = pchisq( as.numeric(LRT) ,  df=1, lower.tail=F )
# reject H0, age is significant.