e
#############################################################################
#Exercise 12
#############################################################################
#Try out different link-functions.

biller = read.table("http://www.uio.no/studier/emner/matnat/math/STK3100/h14/data/beetle.dat", header=F)
colnames(biller) = c("dose", "antall", "dode")

mod1 <-  glm(cbind(dode,antall-dode)~dose,family=binomial,data=biller)    		#default: (link=logit)
mod2 <-  glm(cbind(dode,antall-dode)~dose,family=binomial(link=probit),data=biller)
mod3 <-  glm(cbind(dode,antall-dode)~dose,family=binomial(link=cloglog),data=biller)

logLik(mod1)
logLik(mod2)
logLik(mod3)

# plot
plot(biller$dose,biller$dode/biller$antall,xlab="dose",ylab="observed probability")
points(biller$dose,fitted.values(mod1),type="l",col="blue")             # logit
points(biller$dose,fitted.values(mod2),type="l",col="red")              # probit
points(biller$dose,fitted.values(mod3),type="l",col="green")            # cloglog
legend("topleft",legend=c("logit","probit","cloglog"),col=c("blue","red","green"),lty=1)

#CONCLUSION: cloglog looks like it has the best fit to the data points, and the highest log likelihood.


#############################################################################
#Exercise 13
#############################################################################

#Repeat the previous exercise but now with Poisson regression.
#New data : number of previous children related to age of pregnant mother.

birth<-read.table(file="http://www.uio.no/studier/emner/matnat/math/STK3100/data/Birth.txt", header=T)

mod1 <-  glm(children~age,family=poisson,data=birth)        #default: (link = "log")
mod2 <-  glm(children~age,family=poisson(link=identity),data=birth, start=c(1,1))
mod3 <-  glm(children~age,family=poisson(link=sqrt),data=birth)

logLik(mod1)
logLik(mod2)
logLik(mod3)

# plot
plot(birth$age,birth$children,ylab="children",xlab="age")
points(birth$age, fitted.values(mod1),col="blue")             # log
points(birth$age, fitted.values(mod2),col="red")              # identity
points(birth$age, fitted.values(mod3),col="green")            # sqrt
legend("topleft",legend=c("log","identity","sqrt"),col=c("blue","red","green"),lty=1)

#CONCLUSION: sqrt has the best fit to the data points and the highest log likelihood.

# NB: we get warnings when using link=identity, the reason for this is because the "algorithm stopped at boundary values". 
# In poisson y need to be larger than zero, if the fitted values of y becomes less than 0, we are outside the boundary for legal values of y. 
# Identity link: my=theta, my has no restrictions and for some estimates this value can be negative.
# Another link, for example the log link: log(my)=theta, my=exp(theta), my will for all estimates of theta be positive. with this link we do not run into any trouble.