//==================// // EXERCISE 131 // REVISED FEB 2023 //==================// // Load the Melanoma data, keep those with localized stage use melanoma, clear keep if stage == 1 gen female = sex == 2 stset surv_mm, failure(status==1) exit(time 120.5) scale(12) // (a) Kaplan-Meier curve sts graph // (b) Weibull model (using stpm3) stpm3, scale(lncumhazard) df(1) predict s1, surv predict h1, hazard sts graph, addplot(line s1 _t, sort) name(km1, replace) ylabel(0.6(0.1)1) // (c) Plot hazard function sts graph, hazard kernel(epan2) addplot(line h1 _t, sort) name(hazard1, replace) // (d) using the timevar() option. // The predictions are at the values of _t by default. // It is useful to learn how to use the timevar() option. // Rather than predict at the observed values of time for each subject. // We can define our own time variable to predict at // This is useful in large datasets and/or predicting // for specific covariate patterns // We usually store predicted results in a separate frame (dataset) // to separate the analysis data from the prediction data. // The following specifies times between 0 and 10 in steps of 0.1 // Results will be saved to a frame called df1 predict s1, surv timevar(0 10, step(0.1)) frame(df1, replace) // shows predictions frame df1 frame df1: list in 1/10 // We could save the hazard to a separate frame // As we want to predict at the same values of time // it is better to use the same frame // Using the frame(df1, merge) option will do this // it will use the timevar already created in frame df1. predict h1, hazard frame(df1, merge) // use the user written addplot command to add data to a plot // that is stored in a different frame sts graph, name(km1, replace) ylabel(0.6(0.1)1) legend(on) frame df1: addplot: line s1 tt, sort sts graph, hazard kernel(epan2) name(haz1, replace) legend(on) frame df1: addplot: line h1 tt, sort // (e) try stpm2 with 4 df stpm3, scale(lncumhazard) df(4) predict s4, surv timevar(0 10, step(0.1)) frame(df4, replace) predict h4, hazard frame(df4, merge) sts graph, ylabel(0.6(0.1)1) legend(on) name(km4, replace) frame df4: addplot: line s4 tt, sort sts graph, hazard kernel(epan2) name(hazard4, replace) legend(off) frame df4: addplot: line h4 tt, sort // (f) Fit a Cox Model with year8594 as the only covariate stcox i.year8594 /* (g) Flexible parametric model */ stpm3 i.year8594, scale(lncumhazard) df(4) eform /* (h) predicted values */ predict S7584 S8594, surv timevar(0 10, step(0.1)) ci /// frame(f2, replace) /// at1(year8594 0) at2(year8594 1) predict h7584 h8594, hazard per(1000) ci frame(f2, merge) /// at1(year8594 0) at2(year8594 1) frame f2 { twoway (line S7584 tt, sort) /// (line S8594 tt, sort), /// legend(order(1 "1975-1984" 2 "1985-1994") /// ring(0) pos(1) col(1)) /// xtitle("Time since diagnosis (years)") /// ytitle("Survival") /// name(surv, replace) twoway (line h7584 tt, sort) /// (line h8594 tt, sort), /// legend(order(1 "1975-1984" 2 "1985-1994") /// ring(0) pos(1) col(1)) /// xtitle("Time since diagnosis (years)") /// ytitle("Cause specific mortality rate (per 1000 py's)") /// name(haz, replace) } /* (i) hazard on log scale */ frame f2 { twoway (line h7584 tt, sort) /// (line h8594 tt, sort) /// , legend(order(1 "1975-1984" 2 "1985-1994") /// ring(0) pos(1) col(1)) /// xtitle("Time since diagnosis (years)") /// ytitle("Cause specific mortality rate (per 1000 py's)") /// yscale(log) /// name(haz_log, replace) } /* (j) sensitivity to knots */ forvalues i = 1/6 { stpm3 i.year8594, scale(lncumhazard) df(`i') eform estimates store df`i' predict h_df`i', hazard per(1000) timevar(0 10,step(0.1)) zeros frame(f1,mergecreate) predict s_df`i', survival at1(year8594 0) frame(f1, merge) } // Compare the log hazard ratios, standard errors AIC and BIC from the different models. estimates table df*, keep(1.year8594) se stat(AIC BIC) // an alternative way to do this if you do not like mergecreate // is to create a frame first // the model variables must exist in the data, so create these in the new frame // I use the merge option for each predict command, // as we run predict from the results frame. frame create f4 frame f4 { set obs 101 range tt 0 10 gen year8594 = . } forvalues i = 1/6 { stpm3 i.year8594, scale(lncumhazard) df(`i') eform estimates store df`i' frame f4 { predict h_df`i', hazard per(1000) timevar(tt) merge zeros predict s_df`i', survival timevar(tt) merge zeros } } /* (k) compare hazard and survival */ frame f4 { line s_df* tt, sort /// legend(ring(0) cols(1) pos(1)) /// xtitle("Time since diagnosis (years)") /// ytitle("Survival") /// name(S_df, replace) line h_df* tt, sort /// legend(ring(0) cols(1) pos(1)) /// xtitle("Time since diagnosis (years)") /// ytitle("Cause specific mortality rate (per 1000 py's)") /// name(h_df, replace) } /* (l) knot locations */ // run the following code to fit 10 models with 5df (6 knots) where // the 4 internal knots are selected at random centiles of the // distribution of event times. // // As there are many ties in this data we add a small // random number to the survival times // (otherwise we risk having knots in the same location) replace _t = _t + runiform()*0.001 gen lnt = ln(_t) set seed 4231 global legorder forvalues i = 1/10 { local clist forvalues j = 1/4 { local z`j': display %3.1f runiform()*100 local clist `clist' `z`j'' } numlist "`clist'", sort local clist `r(numlist)' di "`clist'" _pctile lnt if _d, p(`clist') local klist `r(r1)' `r(r2)' `r(r3)' `r(r4)' summ lnt if _d == 1 local klist `r(min)' `klist' `r(max)' stpm3 i.year8594, scale(lncumhazard) knots(`klist') initmod(exp) predict sp`i', surv at1(year8594 0) timevar(0 10,step(0.1)) frame(f5,mergecreate) predict hp`i', hazard per(1000) at1(year8594 0) frame(f5,merge) estimates store mp`i' global legorder ${legorder} `i' `""`clist'""' } // compare log hazard ratios and standard errors estimates table mp*, keep(1.year8594) se(%5.4f) b(%5.4f) // compare baseline hazard and survival curves frame f5 { twoway (line hp* tt, sort), /// legend(order(${legorder}) ring(0) pos(1) cols(1)) /// name(hp,replace) twoway (line sp* tt, sort), /// legend(order(${legorder}) ring(0) pos(1) cols(1)) /// name(sp,replace) } /* (m) Include effect of age group and sex */ // use stset again as we have altered _t stset surv_mm, failure(status==1) exit(time 120.5) scale(12) stcox i.female i.year8594 i.agegrp estimate store cox stpm3 i.female i.year8594 i.agegrp, df(4) scale(lncumhazard) eform estimates store stpm3_ph // compare to Cox model estimates table cox stpm3_ph, equation(1) keep(#1:) se /* (n) obtaining predictions */ /* predict using at option*/ estimates restore stpm3_ph predict S0, survival zeros timevar(0 10,step(0.1)) frame(f6, replace) frame f6: line S0 tt, sort predict S_F_8594_age75, survival frame(f6, merge) ci /// at1(female 1 year8594 1 agegrp 3) frame f6 { twoway (rarea S_F_8594_age75_lci S_F_8594_age75_uci tt, pstyle(ci)) /// (line S_F_8594_age75 tt) /// , legend(off) /// xtitle("Time since diagnosis (years)") /// ytitle("S(t)") /// title("Female age 75+ diagnosed 1985-1994") } /* Some comments (b) - The Weibull model clearly does not fit well as there is disagreement with the KM curve. - It is hard to work out the shape of the hazard function just by looking at the survival function. (c) - The hazard clearly has a turning point. - The hazard function for a Weibull model is montonic, i.e. increasing, decreasing or constant. (d) - We advise using timevar and saving to a new frame unless you have a good reason not to. (e) - Hard to tell the difference between KM and model fitted values. (g) - Hazard ratios are very similar, as are standard errors and confidence intervals. (i) - The difference is constant as a multiplicative effect is additive on the log scale. (j) - The hazard ratios and their standard errors are very similar. - The lowest AIC is for 5df and the lowest BIC for 2 df. (k) - The estrimated curves are very similar excet for 1 df. - There is less variation on the survival scale. (l) - The fitted curves show very little difference, even when knots are close togther. (n) - Estimates are similar as we are trying to estimate the same thing. - The Cox model does not estimate the baseline hazard, so it can take any shape. - The splines modles are flexible and an approximate many comples shapes. (o) - (i) A male, in the youngest age group in the earlist time period. */