#Oppgave 23, model selection with random and fixed effects! 

library(MASS); library(nlme); data(petrol)

plot(petrol, col= petrol$No)                    #V10+SG+VP is const within groups
plot(petrol$EP, petrol$Y, col=petrol$No)        #random intersept, maybe random slope


##############################################################################################
#################################### find the best model:

##############################################################################################
#Analyse the different random effects by nesten models: (Start with as many explanatory variables as possible in the fixed components)
fit1 = gls(Y~1+EP+V10+SG+VP                ,method="REML",data=petrol)
fit2 = lme(Y~1+EP+V10+SG+VP,random=~1   |No,method="REML",data=petrol)
fit3 = lme(Y~1+EP+V10+SG+VP,random=~1+EP|No,method="REML",data=petrol)

anova(fit1, fit2, fit3)
#the AIC, BIC sais that fit2 is the best model!
#the likelihood ratio test for fit1 vs fit2:    H0: d_1^2 = 0 (on the boundary)
    # P-value in anova() assume \chi^2_1 which is incorrect due to parameters on the boundary.
    # should use (1/2)\chi^2_1, thus the correct p-values in the table is 0.5*(1-pchisq(3.358266, 1)) = 0.03343422
#the likelihood ratio test for fit2 vs fit3:    H0: d_2^2 = 0 (on the boundary)
    # P-value in anova() assume \chi^2_1 which is incorrect due to parameters on the boundary.
    # should use (1/2)\chi^2_1+(1/2)\chi^2_2, thus the correct p-values in the table is 0.5*((1-pchisq(0.007151,1))+(1-pchisq(0.007151,2))) = 0.9645196

#CONC: Only Random intercept is the best model


##############################################################################################
#Analyse fixed effects:
summary(fit2)   #Can do the Wald test for each parameter separately

#or wi can make nested models based on the fixed effects:
fit2.1 = lme(Y~1+EP,random=~1   |No,method="ML",data=petrol)
fit2.2 = lme(Y~1+EP+V10,random=~1   |No,method="ML",data=petrol)
fit2.3 = lme(Y~1+EP+V10+SG,random=~1   |No,method="ML",data=petrol)
fit2.4 = lme(Y~1+EP+V10+SG+VP,random=~1   |No,method="ML",data=petrol)

summary(fit2.4)
anova(fit2.1, fit2.2, fit2.3, fit2.4)

#big p-verdi -> accept H0 -> put the coefficient to zero. 
#CONC: fixed effect with 1+EP+V10 is best


#    The Final best model:
fit.final = lme(Y~1+EP+V10,random=~1|No,method="REML",data=petrol)


##############################################################################################
##################################### plot residuals against fitted values and each covarite:
E <- resid(fit.final, type = "normalized")
par(mfrow=c(1,3))
    plot(fitted(fit.final), E, ylab="normalized res", xlab="fitted");  abline(h=0, col="red")
    plot(petrol$EP,         E, ylab="normalized res", xlab="EP");      abline(h=0, col="red")
    plot(petrol$V10,        E, ylab="normalized res", xlab="V10");     abline(h=0, col="red")
#homogeneity!

#Histogram of the residual, identify normality.
par(mfrow=c(1,1))
hist(E)