def __init__(self, p_value, test_statistic, name=None, test_name=None, **kwargs): self.p_value = p_value self.test_statistic = test_statistic self.test_name = test_name self._p_value = utils._to_1d_array(p_value) self._test_statistic = utils._to_1d_array(test_statistic) assert len(self._p_value) == len(self._test_statistic) if name is not None: self.name = utils._to_list(name) assert len(self.name) == len(self._test_statistic) else: self.name = None for kw, value in kwargs.items(): setattr(self, kw, value) kwargs["test_name"] = test_name self._kwargs = kwargs
def survival_function_at_times(self, times, label=None) -> pd.Series: """ Return a Pandas series of the predicted survival value at specific times Parameters ----------- times: iterable or float label: str """ label = coalesce(label, self._label) return pd.Series(self.predict(times), index=_to_1d_array(times), name=label)
def cumulative_density_at_times(self, times, label=None) -> pd.Series: """ Return a Pandas series of the predicted cumulative density at specific times Parameters ----------- times: iterable or float Returns -------- pd.Series """ label = coalesce(label, self._label) return pd.Series(1 - self.predict(times), index=_to_1d_array(times), name=label)
def proportional_hazard_test(fitted_cox_model, training_df, time_transform="rank", precomputed_residuals=None, **kwargs) -> StatisticalResult: """ Test whether any variable in a Cox model breaks the proportional hazard assumption. This method uses an approximation that R's ``survival`` use to use, but changed it in late 2019, hence there will be differences here between lifelines and R. Parameters ---------- fitted_cox_model: CoxPHFitter the fitted Cox model, fitted with `training_df`, you wish to test. Currently only the CoxPHFitter is supported, but later CoxTimeVaryingFitter, too. training_df: DataFrame the DataFrame used in the call to the Cox model's ``fit``. time_transform: vectorized function, list, or string, optional (default='rank') {'all', 'km', 'rank', 'identity', 'log'} One of the strings above, a list of strings, or a function to transform the time (must accept (time, durations, weights) however). 'all' will present all the transforms. precomputed_residuals: DataFrame, optional specify the scaled Schoenfeld residuals, if already computed. kwargs: additional parameters to add to the StatisticalResult Notes ------ R uses the default `km`, we use `rank`, as this performs well versus other transforms. See http://eprints.lse.ac.uk/84988/1/06_ParkHendry2015-ReassessingSchoenfeldTests_Final.pdf References ----------- - http://eprints.lse.ac.uk/84988/1/06_ParkHendry2015-ReassessingSchoenfeldTests_Final.pdf - "Extending the Cox Model" - https://github.com/therneau/survival/commit/5da455de4f16fbed7f867b1fc5b15f2157a132cd#diff-c784cc3eeb38f0a6227988a30f9c0730R36 """ if "transform" in kwargs: warnings.warn( "Found 'transform' keyword being set. Did you mean to set 'time_transform' instead?", UserWarning) events, durations, weights = fitted_cox_model.event_observed, fitted_cox_model.durations, fitted_cox_model.weights n_deaths = events.sum() if precomputed_residuals is None: scaled_resids = fitted_cox_model.compute_residuals( training_df, kind="scaled_schoenfeld") else: scaled_resids = precomputed_residuals scaled_resids = ( fitted_cox_model.compute_residuals(training_df, kind="schoenfeld").dot( fitted_cox_model.variance_matrix_) * n_deaths) def compute_statistic(times, resids, n_deaths): demeaned_times = times - times.mean() T = (demeaned_times.values[:, None] * resids.values).sum(0)**2 / ( n_deaths * (fitted_cox_model.standard_errors_**2) * (demeaned_times**2).sum()) return T if time_transform == "all": time_transform = list(TimeTransformers.TIME_TRANSFOMERS.keys()) if isinstance(time_transform, list): result = StatisticalResult([], [], []) # yuck for transform_name, transform in ((_, TimeTransformers().get(_)) for _ in time_transform): times = transform(durations, events, weights)[events.values] T = compute_statistic(times, scaled_resids, n_deaths) p_values = utils._to_1d_array( [_chisq_test_p_value(t, 1) for t in T]) result += StatisticalResult( p_values, T, name=[(c, transform_name) for c in fitted_cox_model.params_.index], test_name="proportional_hazard_test", null_distribution="chi squared", degrees_of_freedom=1, model=str(fitted_cox_model), **kwargs) else: time_transformer = TimeTransformers().get(time_transform) assert callable( time_transformer ), "time_transform must be a callable function, or a string: {'rank', 'km', 'identity', 'log'}." times = time_transformer(durations, events, weights)[events.values] T = compute_statistic(times, scaled_resids, n_deaths) p_values = utils._to_1d_array([_chisq_test_p_value(t, 1) for t in T]) result = StatisticalResult( p_values, T, name=fitted_cox_model.params_.index.tolist(), test_name="proportional_hazard_test", time_transform=time_transform, null_distribution="chi squared", degrees_of_freedom=1, model=str(fitted_cox_model), **kwargs) return result
def proportional_hazard_test(fitted_cox_model, training_df, time_transform="rank", precomputed_residuals=None, **kwargs): """ Test whether any variable in a Cox model breaks the proportional hazard assumption. Parameters ---------- fitted_cox_model: CoxPHFitter the fitted Cox model, fitted with `training_df`, you wish to test. Currently only the CoxPHFitter is supported, but later CoxTimeVaryingFitter, too. training_df: DataFrame the DataFrame used in the call to the Cox model's ``fit``. time_transform: vectorized function, list, or string, optional (default='rank') {'all', 'km', 'rank', 'identity', 'log'} One of the strings above, a list of strings, or a function to transform the time (must accept (time, durations, weights) however). 'all' will present all the transforms. precomputed_residuals: DataFrame, optional specify the scaled schoenfeld residuals, if already computed. kwargs: additional parameters to add to the StatisticalResult Returns ------- StatisticalResult Notes ------ R uses the default `km`, we use `rank`, as this performs well versus other transforms. See http://eprints.lse.ac.uk/84988/1/06_ParkHendry2015-ReassessingSchoenfeldTests_Final.pdf """ events, durations, weights = fitted_cox_model.event_observed, fitted_cox_model.durations, fitted_cox_model.weights deaths = events.sum() if precomputed_residuals is None: scaled_resids = fitted_cox_model.compute_residuals( training_df, kind="scaled_schoenfeld") else: scaled_resids = precomputed_residuals def compute_statistic(times, resids): times -= times.mean() T = (times.values[:, None] * resids.values).sum(0)**2 / ( deaths * np.diag(fitted_cox_model.variance_matrix_) * (times**2).sum()) return T if time_transform == "all": time_transform = list(TimeTransformers.TIME_TRANSFOMERS.keys()) if isinstance(time_transform, list): result = StatisticalResult([], [], []) # yuck for transform_name, transform in ((_, TimeTransformers().get(_)) for _ in time_transform): times = transform(durations, events, weights)[events.values] T = compute_statistic(times, scaled_resids) p_values = _to_1d_array([chisq_test(t, 1) for t in T]) result += StatisticalResult( p_values, T, name=[(c, transform_name) for c in fitted_cox_model.params_.index], test_name="proportional_hazard_test", null_distribution="chi squared", degrees_of_freedom=1, **kwargs) else: time_transformer = TimeTransformers().get(time_transform) assert callable( time_transformer ), "time_transform must be a callable function, or a string: {'rank', 'km', 'identity', 'log'}." times = time_transformer(durations, events, weights)[events.values] T = compute_statistic(times, scaled_resids) p_values = _to_1d_array([chisq_test(t, 1) for t in T]) result = StatisticalResult( p_values, T, name=fitted_cox_model.params_.index.tolist(), test_name="proportional_hazard_test", time_transform=time_transform, null_distribution="chi squared", degrees_of_freedom=1, **kwargs) return result