#Commands to the trial project
# This only gives the R commands for the trial project.
# It is not a draft solution of the project.
# Some comments on how to write a project report was given at the lectures
# Read the data into R and attach the survival library
cirrhosis=read.table("http://www.uio.no/studier/emner/matnat/math/STK4080/h08/computing/cirrhosis.txt", header=T)
library(survival)
# Define treatment, sex and acites as factors:
cirrhosis$treat=factor(cirrhosis$treat)
cirrhosis$sex=factor(cirrhosis$sex)
cirrhosis$asc=factor(cirrhosis$asc)
# Use years as time scale
cirrhosis$time=cirrhosis$time/365.25
# For ease of presentation, we give the commands for the questions a-c for one covariate at a time.
# Treatment
# a) Kaplan-Meier plots:
fit.a.treat=survfit(Surv(time,status)~treat,data=cirrhosis,conf.type="plain")
plot(fit.a.treat, mark.time=F, lty=1:2, xlab="Years since randomization", main="Treatment")
legend(9,0.97,legend=c("Prednisone","Placebo"),lty=1:2)
# b) Estimates of five years survival probabilities and median survival times
# with "plain" confidence intervals (alternatively we could have used "log-log")
summary(fit.a.treat)
print(fit.a.treat)
# We may read the five years survival probability with 95% confidence intervals from the output ?of the summery command
# (look at thesurvival estimate and confidence intervals for the largest time that is at most 5 years).
# We get the median survival time with confidence intervals from the print commend
# (but you should make sure that you also know how to get it from the summary command, since
# that is what you would have to do to find estimates of the quartiles with confidence intervals).
# c) Log-rank test:
survdiff(Surv(time,status)~treat,data=cirrhosis)
# Sex
# a)
fit.a.sex=survfit(Surv(time,status)~sex,data=cirrhosis,conf.type="plain")
plot(fit.a.sex, mark.time=F, lty=1:2, xlab="Years since randomization", main="Sex")
legend(9.5,0.97,legend=c("Females","Males"),lty=1:2)
#b)
summary(fit.a.sex)
print(fit.a.sex)
#c)
survdiff(Surv(time,status)~sex,data=cirrhosis)
# Ascites
#a)
fit.a.asc=survfit(Surv(time,status)~asc,data=cirrhosis,conf.type="plain")
plot(fit.a.asc, mark.time=F, lty=1:3, xlab="Years since randomization", main="Ascites")
legend(9.7,0.97,legend=c("None","Slight","Marked"),lty=1:3)
#b)
summary(fit.a.asc)
print(fit.a.asc)
#c)
survdiff(Surv(time,status)~asc,data=cirrhosis)
# Age.
# First inspect the age distribution for all patients and for those who die, and use this to define "sensible" age groups.
quantile(cirrhosis$age)
quantile(cirrhosis$age[cirrhosis$status==1])
cirrhosis$ageg=cut(cirrhosis$age,breaks=c(0,50,60,70,100),right=F,labels=1:4)
#a)
fit.a.age=survfit(Surv(time,status)~ageg,data=cirrhosis,conf.type="plain")
plot(fit.a.age, mark.time=F, lty=1:4, xlab="Years since randomization", main="Age")
legend(8.5,0.97,legend=c("Below 50","50-59","60-69","70 and above"),lty=1:4)
#b)
summary(fit.a.age)
print(fit.a.age)
#c)
survdiff(Surv(time,status)~ageg,data=cirrhosis)
# Prothrombin
# First inspect the distribution of the prothrombin index for all patients and for those who die, and use this to define "sensible" prothrombin groups.
quantile(cirrhosis$prot)
quantile(cirrhosis$prot[cirrhosis$status==1])
cirrhosis$protg=cut(cirrhosis$prot,breaks=c(0,50,70,90,140),right=F,labels=1:4)
#a)
fit.a.prot=survfit(Surv(time,status)~protg,data=cirrhosis,conf.type="plain")
plot(fit.a.prot, mark.time=F, lty=1:4, xlab="Years since randomization", main="Prothrombin")
legend(8.5,0.97,legend=c("Below 50","50-69","70-89","90 and above"),lty=1:4)
#b)
summary(fit.a.prot)
print(fit.a.prot)
#c)
survdiff(Surv(time,status)~protg,data=cirrhosis)
# d) Univariate Cox regressions
# Treatment
cox.d.treat=coxph(Surv(time,status)~treat,data=cirrhosis)
summary(cox.d.treat)
# Sex
cox.d.sex=coxph(Surv(time,status)~sex,data=cirrhosis)
summary(cox.d.sex)
# Ascites
cox.d.asc=coxph(Surv(time,status)~asc,data=cirrhosis)
summary(cox.d.asc)
# Age
# First we investigate how age should be coded
cox.d.ageg=coxph(Surv(time,status)~ageg,data=cirrhosis)
summary(cox.d.ageg)
ag=rep(0,4)
for (i in 1:4) ag[i]=mean(cirrhosis$age[cirrhosis$ageg==i])
plot(ag,c(0,cox.d.ageg$coef[1:3]),pch=1)
cox.d.psage=coxph(Surv(time,status)~pspline(age),data=cirrhosis)
print(cox.d.psage)
termplot(cox.d.psage)
# There seems to be a log-linear effect of age, so we may use this as it is coded on the data file:
cox.d.age=coxph(Surv(time,status)~age,data=cirrhosis)
summary(cox.d.age)
# Prothrombin
# First we investigate how prothrombin should be coded
cox.d.protg=coxph(Surv(time,status)~protg,data=cirrhosis)
summary(cox.d.protg)
pr=rep(0,4)
for (i in 1:4) pr[i]=mean(cirrhosis$prot[cirrhosis$protg==i])
plot(pr,c(0,cox.d.protg$coef[1:3]),pch=1)
cox.d.psprot=coxph(Surv(time,status)~pspline(prot),data=cirrhosis)
print(cox.d.psprot)
termplot(cox.d.psprot)
# There seems to be a fairly log-linear effect of prothrombin, so we may use this as it is coded on the data file:
cox.d.prot=coxph(Surv(time,status)~prot,data=cirrhosis)
summary(cox.d.prot)
# e) Multivariate Cox regression
# Fit model with all covariates
cox.e.all=coxph(Surv(time,status)~treat+sex+asc+age+prot,data=cirrhosis)
summary(cox.e.all)
# All covariates, except treatment, are significant, so no can be removed
# Check for first order interactions with treatment
cox.e.treat.sex=coxph(Surv(time,status)~treat+sex+asc+age+prot+treat:sex,data=cirrhosis)
summary(cox.e.treat.sex)
anova(cox.e.all,cox.e.treat.sex,test="Chisq")
cox.e.treat.asc=coxph(Surv(time,status)~treat+sex+asc+age+prot+treat:asc,data=cirrhosis)
summary(cox.e.treat.asc)
anova(cox.e.all,cox.e.treat.asc,test="Chisq")
cox.e.treat.age=coxph(Surv(time,status)~treat+sex+asc+age+prot+treat:age,data=cirrhosis)
summary(cox.e.treat.age)
anova(cox.e.all,cox.e.treat.age,test="Chisq")
cox.e.treat.prot=coxph(Surv(time,status)~treat+sex+asc+age+prot+treat:prot,data=cirrhosis)
summary(cox.e.treat.prot)
anova(cox.e.all,cox.e.treat.prot,test="Chisq")
# There is a significant interaction between treatment and ascites (p-vlaue 0.8 %), but not between treatment and the other covariates
# Check for other first order interactions (assuming interaction between treatment and ascites). For example:
cox.e.sex.age=coxph(Surv(time,status)~treat+sex+asc+age+prot+treat:asc+sex:age,data=cirrhosis)
summary(cox.e.sex.age)
anova(cox.e.treat.asc, cox.e.sex.age,test="Chisq")
# No interactions are significant at the 1 % level (but sex:age and sex:prot are significant at the 5% level)
# Check proportional hazards
cox.f.test=cox.zph(cox.e.treat.asc,transform='log')
print(cox.f.test)
par(mfrow=c(2,4))
plot(cox.f.test)
# There are no indication of violation of the assumption of proportional hazard
# "Final model":
summary(cox.e.treat.asc)
# f) Estimated survival curves for given values of covariates.
# It looks as if there are problems with ascites, which is a factor with three levels, so fit the final model with explicitely defined covariates:
cox.final=coxph(Surv(time,status)~treat+sex+I(asc==1)+I(asc==2)+age+prot+I((treat==1)&(asc==1))+I((treat==1)&(asc==2)),data=cirrhosis)
summary(cox.final)
# Plot estimated survival curves for all combinations of treatment and ascites for females, aged 60 years with prothrombin index 70 (the latter two are close to the average values):
par(mfrow=c(1,3))
plot(survfit(cox.final,newdata= data.frame(treat=0,sex=0,asc=0,age=60,prot=70),conf.type="none"),lty=1, main="None")
lines(survfit(cox.final,newdata= data.frame(treat=1,sex=0,asc=0,age=60,prot=70),conf.type="none"),lty=2)
plot(survfit(cox.final,newdata= data.frame(treat=0,sex=0,asc=1,age=60,prot=70),conf.type="none"),lty=1, main="Slight")
lines(survfit(cox.final,newdata= data.frame(treat=1,sex=0,asc=1,age=60,prot=70),conf.type="none"),lty=2)
plot(survfit(cox.final,newdata= data.frame(treat=0,sex=0,asc=2,age=60,prot=70),conf.type="none"),lty=1,main="Marked")
lines(survfit(cox.final,newdata= data.frame(treat=1,sex=0,asc=2,age=60,prot=70),conf.type="none"),lty=2)