%SIMULATED EXAMPLE FROM MATLAB
%http://www.mathworks.com/help/stats/readmission-times.html
oldFolder = pwd;
%Navigate to a folder containing sample data.
cd(matlabroot)
cd('help/toolbox/stats/examples')
%Load the sample data.
load readmissiontimes
% Change the current folder to the previous location:
cd(oldFolder)
%The response variable is Readmission Time,
%which shows the readmission times for 100 patients.
%The predictor variables are Age, Sex, Weight, and the
% smoking status of each patient, Smoker = 1 indicates the
% patient is a smoker, and 0 indicates that the patient does
% not smoke. The column vector Censored has the censorship
% information for each patient, where 1 indicates censored
% data, and 0 indicates the exact readmission times are
% observed. This is simulated data.
%
% Step 2. Fit Cox proportional hazards function.
%Fit a Cox proportional hazard function with the variable Sex
%as the predictor variable, taking the censoring into account.
X = Sex;
[b,logl,H,stats] = coxphfit(X,ReadmissionTime,...
'censoring',Censored);
%Assess the statistical significance of the term Sex.
stats
% stats =
%
% covb: 0.1016
% beta: -1.7642
% se: 0.3188
% z: -5.5335
% p: 3.1392e-08
%
%The p-value 3.1392e-08 indicates that the term Sex is statistically significant.
%Save the loglikelihood value with a different name. You will use this to assess
%the significance of the extended models.
loglSex = logl
% loglSex =
%
% -262.1365
% Step 3. Add Age and Weight to the model.
%
% Fit a Cox proportional hazards model with the variables Sex, Age, and Weight.
X = [Sex Age Weight];
[b,logl,H,stats] = coxphfit(X,ReadmissionTime,...
'censoring',Censored);
% Assess the significance of the terms.
%
stats.beta
% ans =
%
% -0.5441
% 0.0143
% 0.0250
stats.p
% ans =
%
% 0.4953
% 0.3842
% 0.0960
% None of the terms, adjusted for others, is statistically significant.
%
% Assess the significance of the terms using the log likelihood ratio.
%You can assess the significance of the new model using the likelihood
%ratio statistic. First find the difference between the log-likelihood
%statistic of the model without the terms Age and Weight and the
%log-likelihood of the model with Sex, Age, and Weight.
-2*[loglSex - logl]
% ans =
%
% 3.6705
% Now, compute the p-value for the likelihood ratio statistic. The likelihood ratio statistic has a Chi-square distribution with a degrees of freedom equal to the number of predictor variables being assessed. In this case, the degrees of freedom is 2.
p = 1 - cdf('chi2',3.6705,2)
% p =
%
% 0.1596
% The p-value of 0.1596 indicates that the terms Age and Weight are not
%statistically significant, given the term Sex in the model.
%
% Step 4. Add Smoker to the model.
%
% Fit a Cox proportional hazards model with the variables Sex and Smoker.
%
X = [Sex Smoker];
[b,logl,H,stats] = coxphfit(X,ReadmissionTime,...
'censoring',Censored);
% Assess the significance of the terms in the model.
%
stats.p
% ans =
%
% 0.0000
% 0.0148
% Compare this model to the first model where Sex is the only term.
-2*[loglSex - logl]
% ans =
%
% 5.5789
% Compute the p-value for the likelihood ratio statistic. The likelihood ratio statistic has a Chi-square distribution with a degree of freedom of 1.
p = 1 - cdf('chi2',5.5789,1)
% p =
%
% 0.0182
% The p-value of 0.0182 indicates that Sex and Smoker are statistically significant given the other is in the model. The model with Sex and Smoker is a better fit compared to the model with only Sex.
%
% Request the coefficient estimates.
%
stats.beta
% ans =
%
% -1.7165
% 0.6338
%Fit a Cox ph model with a baseline of 0.
X = [Sex Smoker];
[b,logl,H,stats] = coxphfit(X,ReadmissionTime,...
'censoring',Censored,'baseline',0);
stats.beta
% ans =
%
% -1.7165
% 0.6338
% The coefficients are not affected, but the hazard rate differs from when the baseline is the mean of X.