#Commands to the trial project
# This only gives the R commands for the trial project.
# It is not a draft solution of the project.
# Read the data into R and attach the survival library
cirrhosis=read.table("http://www.uio.no/studier/emner/matnat/math/STK4080/h12/cirrhosis.txt", header=T)
library(survival)
# Define treatment, sex and acites as factors (the reference group is then coded as 0)
cirrhosis$treat=factor(cirrhosis$treat)
cirrhosis$sex=factor(cirrhosis$sex)
cirrhosis$asc=factor(cirrhosis$asc)
# Use years as time scale
cirrhosis$time=cirrhosis$time/365.25
# a)
# Simple univariate analyses for one covariate at a time
# Treatment
# Nelson-Aalen plots:
fit.naa=coxph(Surv(time,status)~strata(treat),data=cirrhosis)
surv.naa=survfit(fit.naa)
plot(surv.naa,fun="cumhaz", mark.time=F ,lty=1:2, xlab="Years since randomization",ylab="Cumulative hazard", main="Treatment")
legend("topleft",legend=c("Prednisone","Placebo"),lty=1:2)
# Kaplan-Meier plots:
fit.km=survfit(Surv(time,status)~treat,data=cirrhosis, conf.type="plain")
plot(fit.km, mark.time=F, lty=1:2, xlab="Years since randomization", ylab="Survival",main="Treatment")
legend("topright",legend=c("Prednisone","Placebo"),lty=1:2)
# Estimates of five years survival probabilities with "plain" confidence intervals (alternatively we could have used the option "log-log")
summary(fit.km,time=5)
# Estimates of median survival time
print(fit.km)
# Log-rank test:
survdiff(Surv(time,status)~treat,data=cirrhosis)
# The commands for sex and ascites are similar to the commands for treatment
# For numeric covariates age and prothrombin index, we have create a categorical covariate (factor) by a "reasonable" grouping
# For age
cirrhosis$agegroup=cut(cirrhosis$age,breaks=c(0,49,59,69,100), labels=1:4)
# For prothrombin index (approximately according to quartiles)
cirrhosis$protgroup=cut(cirrhosis$prot,breaks=c(0,49,69,89,150), labels=1:4)
# After we have obtained categorical covariates for age and prothrombin index, the commands are similar to the ones given above for treatment
# d) Univariate Cox regressions
# Cox regression for treatment
cox.treat=coxph(Surv(time,status)~treat,data=cirrhosis)
summary(cox.treat)
# The commands for sex and ascites are similar to the commands for treatment
# For the numeric covariates age and prothrombin index, we need to decide how thay should be coded
# (as given on the data file, or suitably transformed, or grouped).
# To see how age should be coded, we fit a model using a spline for age:
cox.psage=coxph(Surv(time,status)~pspline(age),data=cirrhosis)
print(cox.psage)
termplot(cox.psage,se=T)
# Both the plot (from the templot-command) and the test (from the print-command) show that it is reasonable to assume a log-linear effect of age
# We will therefore fit a Cox model using age as a numbeic covariate.
# It may be sensible to report the effect of age per 10 years, so we define a new covariate
# where age is given per 10 years and fit a Cox model with this covariate
cirrhosis$age10= cirrhosis$age/10
cox.age10=coxph(Surv(time,status)~age10,data=cirrhosis)
summary(cox.age10)
# Then we use a spline fit to see how prothrombin index should be coded
cox.psprot=coxph(Surv(time,status)~pspline(prot),data=cirrhosis)
print(cox.psprot)
termplot(cox.psprot,se=T)
# The plot indicates that we do not have a log-linear effect for low prothrombin values.
# As there is not easy to find a suitable transformation, we chose to work with grouped prothrombin index.
# (The non-linear part of prothrombin index is not significant, so could also be acceptable to use prothrombin index as a numeric covariate)
# When using grouped prothrombin index as a categorical covariate, it is natural to use group with highest values as reference:
cirrhosis$protgroup=relevel(cirrhosis$protgroup,ref=4)
cox.protgroup=coxph(Surv(time,status)~protgroup,data=cirrhosis)
summary(cox.protgroup)
# c) Multivariate Cox regression
# We then fit a Cox model with all the covariates
cox.all=coxph(Surv(time,status)~treat+sex+asc+age10+ protgroup, data=cirrhosis)
summary(cox.all)
# [In principle it may be the case that the coding of a numeric covariate that is appropriate for a univariate analysis,
# is not appropriate for a multivariate analysis and vice versa. But this does not seem to be the case here (commands not shown)]
# We then check for first order interactions between any pair of two covariates:
# We may e.g. check for interaction between treatment and sex by the commands:
cox.int=coxph(Surv(time,status)~treat+sex+asc+age10+ protgroup+treat:sex, data=cirrhosis)
anova(cox.all,cox.int)
# Similar commands may be used to check other first order interactions
# [Note, however, that for checking interaction between ascites and prothrombin group,
# we need to merge the two highest prothrombin groups (since there is only one person with severe ascites in the highest prothrombin group)]
# We find an interaction between treatment and ascites and between sex and age.
# To ease the interpretation of the interaction between sex and the numeric covariate age, it is useful to
# center age by subtracting 60 years (which is close to the mean age)
cirrhosis$cage10=(cirrhosis$age-60)/10
cox.final=coxph(Surv(time,status)~treat+sex+asc+cage10+protgroup +treat:asc+sex:cage10, data=cirrhosis)
summary(cox.final)
# Log-linearity of the numeric covariates has been checked along the way using splines
# [both for the unvariate and mulivariate Cox models (the latter commands not given here)]
# We also need to check for proportional hazards:
cox.test=cox.zph(cox.final,transform='log')
print(cox.test)
par(mfrow=c(2,6))
plot(cox.test)
# There are no indication of violation of the assumption of proportional hazard
# We will then check the global fit of the model
surv.final=survfit(cox.final)
# Make 4 groups according to the values of the prognostic index
break.pi=quantile(cox.final$linear.predictor)
cirrhosis$pred.group=cut(cox.final$linear.predictor,break.pi, include.lowest=T,label=1:4)
# Make Kaplan-Meier plots for the four groups:
par(mfrow=c(1,1))
plot(survfit(Surv(time,status)~pred.group,data=cirrhosis),mark.time=F,lty=1:4)
# Add lines for the survival function estimates based on the fitted Cox model
# for the mean value of the prognositc index within each group
for (i in 1:4)
{
mean.pi=mean(cox.final$linear.predictor[cirrhosis$pred.group==i])
lines(surv.final$time,surv.final$surv^exp(mean.pi),type="s",lty=i,col="red")
}
# For illustration we plot the estimated survival function for the following combinations of the covariates
# 1) prednisone, female, no ascites , age 60, prothrombin above 90
# 2) prednisone, female, slight ascites , age 60, prothrombin 50-69
# 3) prednisone, female, slight ascites , age 60, prothrombin below 50
# 4) prednisone, female, marked ascites , age 60, prothrombin below 50
# 5) prednisone, female, no ascites , age 60, prothrombin above 90
# 6) prednisone, female, slight ascites , age 60, prothrombin 50-69
# 7) prednisone, female, slight ascites , age 60, prothrombin below 50
# 8) prednisone, female, marked ascites , age 60, prothrombin below 50
cox.final=coxph(Surv(time,status)~treat+sex+asc+cage10+protgroup +treat:asc+sex:cage10, data=cirrhosis)
par(mfrow=c(1,2))
new.covariates=data.frame(treat=c("0","0","0","0"),sex=c("0","0","0","0"),asc=c("0","1","1","2"),cage10=c(0,0,0,0),protgroup=c("4","2","1","1"))
surv.final=survfit(cox.final,newdata=new.covariates)
plot(surv.final,mark.time=F, xlab="Years since randomization", ylab="Survival",lty=1:4, main="Prednisone")
legend("topright",c("1","2","3","4"),lty=1:4)
new.covariates=data.frame(treat=c("1","1","1","1"),sex=c("0","0","0","0"),asc=c("0","1","1","2"),cage10=c(0,0,0,0),protgroup=c("4","2","1","1"))
surv.final=survfit(cox.final,newdata=new.covariates)
plot(surv.final,mark.time=F, xlab="Years since randomization", ylab="Survival",lty=1:4, main="Placebo")
legend("topright",c("5","6","7","8"),lty=1:4)