#Commands to the trial project

 

# This only gives the R commands for the trial project.

# It is not a draft solution of the project.

# Some comments on how to write a project report was given at the lectures

 

# Read the data into R and attach the survival library

cirrhosis=read.table("http://www.uio.no/studier/emner/matnat/math/STK4080/h08/computing/cirrhosis.txt", header=T)

library(survival)

 

# Define treatment, sex and acites as factors:

cirrhosis$treat=factor(cirrhosis$treat)

cirrhosis$sex=factor(cirrhosis$sex)

cirrhosis$asc=factor(cirrhosis$asc)

 

# Use years as time scale

cirrhosis$time=cirrhosis$time/365.25

 

 

# For ease of presentation, we give the commands for the questions a-c for one covariate at a time.

 

 

# Treatment

 

# a) Kaplan-Meier plots:

fit.a.treat=survfit(Surv(time,status)~treat,data=cirrhosis,conf.type="plain")

plot(fit.a.treat, mark.time=F, lty=1:2, xlab="Years since randomization", main="Treatment")

legend(9,0.97,legend=c("Prednisone","Placebo"),lty=1:2)

 

# b) Estimates of five years survival probabilities and median survival times

# with "plain" confidence intervals (alternatively we could have used "log-log")

summary(fit.a.treat)

print(fit.a.treat)

# We may read the five years survival probability with 95% confidence intervals from the output ?of the summery command

# (look at thesurvival estimate and confidence intervals for the largest time that is at most 5 years).

# We get the median survival time with confidence intervals from the print commend

# (but you should make sure that you also know how to get it from the summary command, since

# that is what you would have to do to find estimates of the quartiles with confidence intervals).

 

# c) Log-rank test:

survdiff(Surv(time,status)~treat,data=cirrhosis)

 

 

# Sex

# a)

fit.a.sex=survfit(Surv(time,status)~sex,data=cirrhosis,conf.type="plain")

plot(fit.a.sex, mark.time=F, lty=1:2, xlab="Years since randomization", main="Sex")

legend(9.5,0.97,legend=c("Females","Males"),lty=1:2)

#b)

summary(fit.a.sex)

print(fit.a.sex)

#c)

survdiff(Surv(time,status)~sex,data=cirrhosis)

 

 

# Ascites

#a)

fit.a.asc=survfit(Surv(time,status)~asc,data=cirrhosis,conf.type="plain")

plot(fit.a.asc, mark.time=F, lty=1:3, xlab="Years since randomization", main="Ascites")

legend(9.7,0.97,legend=c("None","Slight","Marked"),lty=1:3)

#b)

summary(fit.a.asc)

print(fit.a.asc)

#c)

survdiff(Surv(time,status)~asc,data=cirrhosis)

 

 

# Age.

# First inspect the age distribution for all patients and for those who die, and use this to define "sensible" age groups.

quantile(cirrhosis$age)

quantile(cirrhosis$age[cirrhosis$status==1])

cirrhosis$ageg=cut(cirrhosis$age,breaks=c(0,50,60,70,100),right=F,labels=1:4)

#a)

fit.a.age=survfit(Surv(time,status)~ageg,data=cirrhosis,conf.type="plain")

plot(fit.a.age, mark.time=F, lty=1:4, xlab="Years since randomization", main="Age")

legend(8.5,0.97,legend=c("Below 50","50-59","60-69","70 and above"),lty=1:4)

#b)

summary(fit.a.age)

print(fit.a.age)

#c)

survdiff(Surv(time,status)~ageg,data=cirrhosis)

 

 

# Prothrombin

# First inspect the distribution of the prothrombin index for all patients and for those who die, and use this to define "sensible" prothrombin groups.

quantile(cirrhosis$prot)

quantile(cirrhosis$prot[cirrhosis$status==1])

cirrhosis$protg=cut(cirrhosis$prot,breaks=c(0,50,70,90,140),right=F,labels=1:4)

#a)

fit.a.prot=survfit(Surv(time,status)~protg,data=cirrhosis,conf.type="plain")

plot(fit.a.prot, mark.time=F, lty=1:4, xlab="Years since randomization", main="Prothrombin")

legend(8.5,0.97,legend=c("Below 50","50-69","70-89","90 and above"),lty=1:4)

#b)

summary(fit.a.prot)

print(fit.a.prot)

#c)

survdiff(Surv(time,status)~protg,data=cirrhosis)

 

 

# d) Univariate Cox regressions

 

# Treatment

cox.d.treat=coxph(Surv(time,status)~treat,data=cirrhosis)

summary(cox.d.treat)

 

# Sex

cox.d.sex=coxph(Surv(time,status)~sex,data=cirrhosis)

summary(cox.d.sex)

 

# Ascites

cox.d.asc=coxph(Surv(time,status)~asc,data=cirrhosis)

summary(cox.d.asc)

 

# Age

# First we investigate how age should be coded

cox.d.ageg=coxph(Surv(time,status)~ageg,data=cirrhosis)

summary(cox.d.ageg)

ag=rep(0,4)

for (i in 1:4) ag[i]=mean(cirrhosis$age[cirrhosis$ageg==i])

plot(ag,c(0,cox.d.ageg$coef[1:3]),pch=1)

 

cox.d.psage=coxph(Surv(time,status)~pspline(age),data=cirrhosis)

print(cox.d.psage)

termplot(cox.d.psage)

 

# There seems to be a log-linear effect of age, so we may use this as it is coded on the data file:

cox.d.age=coxph(Surv(time,status)~age,data=cirrhosis)

summary(cox.d.age)

 

# Prothrombin

# First we investigate how prothrombin should be coded

cox.d.protg=coxph(Surv(time,status)~protg,data=cirrhosis)

summary(cox.d.protg)

pr=rep(0,4)

for (i in 1:4) pr[i]=mean(cirrhosis$prot[cirrhosis$protg==i])

plot(pr,c(0,cox.d.protg$coef[1:3]),pch=1)

 

cox.d.psprot=coxph(Surv(time,status)~pspline(prot),data=cirrhosis)

print(cox.d.psprot)

termplot(cox.d.psprot)

 

# There seems to be a fairly log-linear effect of prothrombin, so we may use this as it is coded on the data file:

cox.d.prot=coxph(Surv(time,status)~prot,data=cirrhosis)

summary(cox.d.prot)

 

 

 

# e) Multivariate Cox regression

 

# Fit model with all covariates

cox.e.all=coxph(Surv(time,status)~treat+sex+asc+age+prot,data=cirrhosis)

summary(cox.e.all)

 

# All covariates, except treatment, are significant, so no can be removed

 

# Check for first order interactions with treatment

cox.e.treat.sex=coxph(Surv(time,status)~treat+sex+asc+age+prot+treat:sex,data=cirrhosis)

summary(cox.e.treat.sex)

anova(cox.e.all,cox.e.treat.sex,test="Chisq")

 

cox.e.treat.asc=coxph(Surv(time,status)~treat+sex+asc+age+prot+treat:asc,data=cirrhosis)

summary(cox.e.treat.asc)

anova(cox.e.all,cox.e.treat.asc,test="Chisq")

 

cox.e.treat.age=coxph(Surv(time,status)~treat+sex+asc+age+prot+treat:age,data=cirrhosis)

summary(cox.e.treat.age)

anova(cox.e.all,cox.e.treat.age,test="Chisq")

 

cox.e.treat.prot=coxph(Surv(time,status)~treat+sex+asc+age+prot+treat:prot,data=cirrhosis)

summary(cox.e.treat.prot)

anova(cox.e.all,cox.e.treat.prot,test="Chisq")

 

# There is a significant interaction between treatment and ascites (p-vlaue 0.8 %), but not between treatment and the other covariates

 

# Check for other first order interactions (assuming interaction between treatment and ascites). For example:

cox.e.sex.age=coxph(Surv(time,status)~treat+sex+asc+age+prot+treat:asc+sex:age,data=cirrhosis)

summary(cox.e.sex.age)

anova(cox.e.treat.asc, cox.e.sex.age,test="Chisq")

 

# No interactions are significant at the 1 % level (but sex:age and sex:prot are significant at the 5% level)

 

# Check proportional hazards

cox.f.test=cox.zph(cox.e.treat.asc,transform='log')

print(cox.f.test)

par(mfrow=c(2,4))

plot(cox.f.test)

 

# There are no indication of violation of the assumption of proportional hazard

 

# "Final model":

summary(cox.e.treat.asc)

 

 

 

# f) Estimated survival curves for given values of covariates.

# It looks as if there are problems with ascites, which is a factor with three levels, so fit the final model with explicitely defined covariates:

cox.final=coxph(Surv(time,status)~treat+sex+I(asc==1)+I(asc==2)+age+prot+I((treat==1)&(asc==1))+I((treat==1)&(asc==2)),data=cirrhosis)

summary(cox.final)

 

# Plot estimated survival curves for all combinations of treatment and ascites for females, aged 60 years with prothrombin index 70 (the latter two are close to the average values):

par(mfrow=c(1,3))

plot(survfit(cox.final,newdata= data.frame(treat=0,sex=0,asc=0,age=60,prot=70),conf.type="none"),lty=1, main="None")

lines(survfit(cox.final,newdata= data.frame(treat=1,sex=0,asc=0,age=60,prot=70),conf.type="none"),lty=2)

 

plot(survfit(cox.final,newdata= data.frame(treat=0,sex=0,asc=1,age=60,prot=70),conf.type="none"),lty=1, main="Slight")

 

lines(survfit(cox.final,newdata= data.frame(treat=1,sex=0,asc=1,age=60,prot=70),conf.type="none"),lty=2)

 

plot(survfit(cox.final,newdata= data.frame(treat=0,sex=0,asc=2,age=60,prot=70),conf.type="none"),lty=1,main="Marked")

lines(survfit(cox.final,newdata= data.frame(treat=1,sex=0,asc=2,age=60,prot=70),conf.type="none"),lty=2)