#Oppgave 23, model selection with random and fixed effects! library(MASS); library(nlme); data(petrol) plot(petrol, col= petrol$No) #V10+SG+VP is const within groups plot(petrol$EP, petrol$Y, col=petrol$No) #random intersept, maybe random slope ############################################################################################## #################################### find the best model: ############################################################################################## #Analyse the different random effects by nesten models: (Start with as many explanatory variables as possible in the fixed components) fit1 = gls(Y~1+EP+V10+SG+VP ,method="REML",data=petrol) fit2 = lme(Y~1+EP+V10+SG+VP,random=~1 |No,method="REML",data=petrol) fit3 = lme(Y~1+EP+V10+SG+VP,random=~1+EP|No,method="REML",data=petrol) anova(fit1, fit2, fit3) #the AIC, BIC sais that fit2 is the best model! #the likelihood ratio test for fit1 vs fit2: H0: d_1^2 = 0 (on the boundary) # P-value in anova() assume \chi^2_1 which is incorrect due to parameters on the boundary. # should use (1/2)\chi^2_1, thus the correct p-values in the table is 0.5*(1-pchisq(3.358266, 1)) = 0.03343422 #the likelihood ratio test for fit2 vs fit3: H0: d_2^2 = 0 (on the boundary) # P-value in anova() assume \chi^2_1 which is incorrect due to parameters on the boundary. # should use (1/2)\chi^2_1+(1/2)\chi^2_2, thus the correct p-values in the table is 0.5*((1-pchisq(0.007151,1))+(1-pchisq(0.007151,2))) = 0.9645196 #CONC: Only Random intercept is the best model ############################################################################################## #Analyse fixed effects: summary(fit2) #Can do the Wald test for each parameter separately #or wi can make nested models based on the fixed effects: fit2.1 = lme(Y~1+EP,random=~1 |No,method="ML",data=petrol) fit2.2 = lme(Y~1+EP+V10,random=~1 |No,method="ML",data=petrol) fit2.3 = lme(Y~1+EP+V10+SG,random=~1 |No,method="ML",data=petrol) fit2.4 = lme(Y~1+EP+V10+SG+VP,random=~1 |No,method="ML",data=petrol) summary(fit2.4) anova(fit2.1, fit2.2, fit2.3, fit2.4) #big p-verdi -> accept H0 -> put the coefficient to zero. #CONC: fixed effect with 1+EP+V10 is best # The Final best model: fit.final = lme(Y~1+EP+V10,random=~1|No,method="REML",data=petrol) ############################################################################################## ##################################### plot residuals against fitted values and each covarite: E <- resid(fit.final, type = "normalized") par(mfrow=c(1,3)) plot(fitted(fit.final), E, ylab="normalized res", xlab="fitted"); abline(h=0, col="red") plot(petrol$EP, E, ylab="normalized res", xlab="EP"); abline(h=0, col="red") plot(petrol$V10, E, ylab="normalized res", xlab="V10"); abline(h=0, col="red") #homogeneity! #Histogram of the residual, identify normality. par(mfrow=c(1,1)) hist(E)