e ############################################################################# #Exercise 12 ############################################################################# #Try out different link-functions. biller = read.table("http://www.uio.no/studier/emner/matnat/math/STK3100/h14/data/beetle.dat", header=F) colnames(biller) = c("dose", "antall", "dode") mod1 <- glm(cbind(dode,antall-dode)~dose,family=binomial,data=biller) #default: (link=logit) mod2 <- glm(cbind(dode,antall-dode)~dose,family=binomial(link=probit),data=biller) mod3 <- glm(cbind(dode,antall-dode)~dose,family=binomial(link=cloglog),data=biller) logLik(mod1) logLik(mod2) logLik(mod3) # plot plot(biller$dose,biller$dode/biller$antall,xlab="dose",ylab="observed probability") points(biller$dose,fitted.values(mod1),type="l",col="blue") # logit points(biller$dose,fitted.values(mod2),type="l",col="red") # probit points(biller$dose,fitted.values(mod3),type="l",col="green") # cloglog legend("topleft",legend=c("logit","probit","cloglog"),col=c("blue","red","green"),lty=1) #CONCLUSION: cloglog looks like it has the best fit to the data points, and the highest log likelihood. ############################################################################# #Exercise 13 ############################################################################# #Repeat the previous exercise but now with Poisson regression. #New data : number of previous children related to age of pregnant mother. birth<-read.table(file="http://www.uio.no/studier/emner/matnat/math/STK3100/data/Birth.txt", header=T) mod1 <- glm(children~age,family=poisson,data=birth) #default: (link = "log") mod2 <- glm(children~age,family=poisson(link=identity),data=birth, start=c(1,1)) mod3 <- glm(children~age,family=poisson(link=sqrt),data=birth) logLik(mod1) logLik(mod2) logLik(mod3) # plot plot(birth$age,birth$children,ylab="children",xlab="age") points(birth$age, fitted.values(mod1),col="blue") # log points(birth$age, fitted.values(mod2),col="red") # identity points(birth$age, fitted.values(mod3),col="green") # sqrt legend("topleft",legend=c("log","identity","sqrt"),col=c("blue","red","green"),lty=1) #CONCLUSION: sqrt has the best fit to the data points and the highest log likelihood. # NB: we get warnings when using link=identity, the reason for this is because the "algorithm stopped at boundary values". # In poisson y need to be larger than zero, if the fitted values of y becomes less than 0, we are outside the boundary for legal values of y. # Identity link: my=theta, my has no restrictions and for some estimates this value can be negative. # Another link, for example the log link: log(my)=theta, my=exp(theta), my will for all estimates of theta be positive. with this link we do not run into any trouble.