################################## ### Exercise 19 from Exercises in STK3100/4100 . ################################## ## Looking at the number of people with and without diabetes by gender: n = matrix(c(377, 17869, 336, 20099), nrow=2) #obs #19d) chi-squared test: 13.3 in Devore & Berk e = matrix(c(336.3,17906.1,376.8,20059.8), nrow=2) #Expected, if H0 true = there are no differences between gender chi_test = sum((n-e)^2/e) pchisq(chi_test, 1, lower.tail=FALSE) #conclusion: 0.002058151 < 0.05. Reject H0. There are differences between gender #19e) CI for OR: OR_est <- n[1,1]*n[2,2]/(n[1,2]*n[2,1]) varOR_est <- 1/n[1,1] + 1/n[1,2] + 1/n[2,1] + 1/n[2,2] conf.int <- OR_est*exp(c(-1,1)*1.96*sqrt(varOR_est)) #conclusion: CI=(1.087970, 1.463981), and do not contain 1. Reject H0. There are differences between gender #19g) GLM: sich = c(377, 336) not_sick = c(17869, 20099) gender <- c(1,0) mod1 <- glm( cbind(sich,not_sick) ~ 1 + gender, family=binomial) summary(mod1) wald_test <- (0.23274)^2/(0.07573)^2 #est^2/var pchisq(wald_test, 1, lower.tail=FALSE) #= 0.00211715 # conclusion: 0.00211715 < 0.05. Reject H0. There are differences between gender #19h) Looking at number of people with and without diabetes against BMI over and under 25. n <- matrix(c(90, 21689, 623, 16274), nrow=2) #obs #calculate CI for OR OR_est <- n[1,1]*n[2,2]/(n[1,2]*n[2,1]) varOR_est <- 1/n[1,1] + 1/n[1,2] + 1/n[2,1] + 1/n[2,2] conf.int <- OR_est*exp(c(-1,1)*1.96*sqrt(varOR_est)) #conclusion: CI=(0.08681915,0.13533277), and do not contain 1. Reject H0. There are differences between for people over and under 25.