#Exercise 26, Consider the AR(1) model library(nlme); #a) notes on the black board. #write the correlation as a exponential correlation function in terms of d (the range parameter). #The advantage of this formulation is that we are able to generalise the model to situations where the time between time points may differ. ####################################################################### #b Load the dataset: Hawaii = read.table("http://www.uio.no/studier/emner/matnat/math/STK3100/data/Hawaii.txt", header=T) Hawaii$Birds=sqrt(Hawaii$Moorhen.Kauai) #Birds (the response Y) are measured almost each year, some NA. #Year (the time variable) #Covariates: Rainfall, Stilt.Oahu, Stilt.Maui, Coot.Oahu, Coot.Maui #Generalized least squares fit by REML: (independent error terms) M0 = gls(Birds~Rainfall+Year,na.action=na.omit,data=Hawaii) # Model1(M1) and Model2(M2) use the exponential correlation function with different parameterisations: M1 = gls(Birds~Rainfall+Year,na.action=na.omit,correlation=corAR1(form=~Year),data=Hawaii) summary(M1) #phi_hat for model 1 is 0.7734303 M2 = gls(Birds~Rainfall+Year,na.action=na.omit,correlation=corExp(form=~Year),data=Hawaii) summary(M2) #estimated d=range for model 2 is 3.892266 -1/log(0.7734303) #=3.892266 ####################################################################### ####################################################################### ####################################################################### #We will now look at the dataset spruce, which shows growth of different trees. #Response (Y) = logSize #Covariates: days(numerical), plot(factor - const within trees) spruce = read.table("http://www.uio.no/studier/emner/matnat/math/STK3100/data/Spruce.txt", header=T) #c #select a specific tree in the spruce data Spruce1=Spruce[Spruce$Tree=="O1T18",] # plot "logSize" against "days": plot(Spruce1$days,Spruce1$logSize) ####################################################################### #d: Try out a model with days as a linear fixed effect #Model with independent errorterm M0.1=gls(logSize~days,data=Spruce1) summary(M0.1) #(Intercept) 3.0034390 0.18888528 15.900864 0 #days 0.0029654 0.00040416 7.337056 0 # Model with exponential correlation function: M1.1=gls(logSize~days,data=Spruce1,correlation=corExp(form=~days)) summary(M1.1) #(Intercept) 2.3456068 70.30834 0.0333617 0.9740 #days 0.0040421 0.00184 2.2002955 0.0501 # d=range=5611265 , rho=exp(-1/d)= 0.9999998 ### we can not trust the p-values from M0.1, because the independent assumption is violated. ### Positive dependencies give too little uncertainties and too small p values #Which model seems to be best suited in this case? AIC(M0.1,M1.1) #residuals are much smaller in M1.1 ####################################################################### #e #Select all trees in the spruce data #It is then possible to include plot as a covariate. #Model with indep error term M0=gls(logSize~days+factor(plot),data=Spruce) summary(M0) #(Intercept) 4.023500 0.06015360 66.88710 0.0000 #days 0.003322 0.00011248 29.53293 0.0000 #factor(plot)2 0.016011 0.05097519 0.31410 0.7535 #factor(plot)3 0.231617 0.06498087 3.56438 0.0004 #factor(plot)4 0.376849 0.06322694 5.96025 0.0000 # Model with exponential correlationfunction: cexp=corExp(form=~days|Tree,fixed=FALSE) #exponential correlation function will be used on every single tree M1=gls(logSize~days+factor(plot),Spruce,correlation=cexp) summary(M1) #(Intercept) 3.534956 0.13444253 26.293437 0.0000 #days 0.004066 0.00016719 24.319393 0.0000 #factor(plot)2 -0.001693 0.16313073 -0.010376 0.9917 #factor(plot)3 0.181998 0.20795170 0.875194 0.3817 #factor(plot)4 0.323940 0.20233877 1.600977 0.1097 # d=range=596.0755 , rho=exp(-1/d)= 0.9983238 #Compare models AIC(M0,M1) ##################################################################### #Find the optimal model by selecting the significant fixed effects (specify to use ML): #indep error term M0.0 =gls(logSize~1 ,data=Spruce, method="ML") M0.d =gls(logSize~days ,data=Spruce, method="ML") M0.dp=gls(logSize~days+factor(plot),data=Spruce, method="ML") anova(M0.0,M0.d,M0.dp) #low p-values, sign different, include all fixed effects. ### BUT: we can not trust these p-values #exp cor-function M1.0 =gls(logSize~1 ,data=Spruce, correlation=cexp, method="ML") M1.d =gls(logSize~days ,data=Spruce, correlation=cexp, method="ML") M1.dp=gls(logSize~days+factor(plot),data=Spruce, correlation=cexp, method="ML") anova(M1.0,M1.d,M1.dp) #"factor(plot)" is not significant when we take the correlation into account. # The two models for correlation structure have a big influence on the model selection for the fixed effects!