#Exercise 15 car = read.table("http://www.uio.no/studier/emner/matnat/math/STK3100/h14/data/car.txt", header=T, sep=c(","), dec=".") car0 = car[car$claimcst0>0,] #pick out only the positive claims. car0$agecat = as.factor(car0$agecat) car0$gender = as.factor(car0$gender) car0$area = as.factor(car0$area) ### e) # Fit the GLM model: # 1) response distribution = inverse Gaussian, # 2) link function = a log link, # 3) the explanatory variables = driver¡¯s age, gender and area. fit = glm(claimcst0 ~ agecat+gender+area, data=car0, family=inverse.gaussian(link="log")) summary(fit) #2(l_sat - l_0) = Null deviance: 6.4422 on 4623 degrees of freedom #2(l_sat - l_hat) = Residual deviance: 6.3765 on 4612 degrees of freedom ### l_sat, l_0, l_hat is the log likelihood for respectively the saturated model, model with only the intercept, our model. #### f) test whether gender is significant: #Wald test: Directly from the summary(fit) we find beta_hat = 0.15283 sd_beta_hat = 0.05119 test_statistic = (beta_hat/sd_beta_hat)^2 pValue = pchisq( test_statistic , df=1, lower.tail=F ) #reject H0, gender is significant. #LR-test: fit0.restricted = glm(claimcst0~agecat+area,data=car0, family=inverse.gaussian(link="log")) LRT = 2*(logLik(fit) - logLik(fit0.restricted)) pValue = pchisq( as.numeric(LRT) , df=1, lower.tail=F ) #reject H0, gender is significant. #g, test whether driver¡¯s age is significant: # Since age is a factor with 6 levels, we need more information than what is written in the summary(fit) to do the Wald test. We need the covariance matrix (inverse fisher information). # The LRT: fit0.restricted2 = glm(claimcst0~gender+area,data=car0, family=inverse.gaussian(link="log")) LRT = 2*(logLik(fit) - logLik(fit0.restricted2)) pValue = pchisq( as.numeric(LRT) , df=1, lower.tail=F ) # reject H0, age is significant.