def predict(self, times): """ Predict the {0} at certain point in time. Uses a linear interpolation if points in time are not in the index. Parameters ---------- times: a scalar or an array of times to predict the value of {0} at. Returns ------- predictions: a scalar if time is a scalar, a numpy array if time in an array. """ if callable(self._estimation_method): return pd.DataFrame(self._estimation_method(_to_array(times)), index=_to_array(times)).loc[times].squeeze() estimate = getattr(self, self._estimation_method) # non-linear interpolations can push the survival curves above 1 and below 0. return dataframe_interpolate_at_times(estimate, times)
def __init__(self, p_value, test_statistic, name=None, **kwargs): self.p_value = p_value self.test_statistic = test_statistic self._p_value = _to_array(p_value) self._test_statistic = _to_array(test_statistic) assert len(self._p_value) == len(self._test_statistic) if name is not None: self.name = _to_list(name) assert len(self.name) == len(self._test_statistic) else: self.name = None for kw, value in kwargs.items(): setattr(self, kw, value) self._kwargs = kwargs
def survival_function_at_times(self, times): """ Return a Pandas series of the predicted survival value at specific times Parameters ----------- times: iterable or float Returns -------- pd.Series """ return pd.Series(np.exp(-self.lambda_ * times), index=_to_array(times))
def cumulative_density_at_times(self, times, label=None): """ Return a Pandas series of the predicted cumulative density at specific times Parameters ----------- times: iterable or float Returns -------- pd.Series """ label = coalesce(label, self._label) return pd.Series(1 - self.predict(times), index=_to_array(times), name=label)
def survival_function_at_times(self, times, label=None): """ Return a Pandas series of the predicted survival value at specific times Parameters ----------- times: iterable or float Returns -------- pd.Series """ label = coalesce(label, self._label) return pd.Series(self.predict(times), index=_to_array(times), name=label)
def cumulative_density_at_times(self, times, label=None): """ Return a Pandas series of the predicted cumulative density at specific times Parameters ----------- times: iterable or float Returns -------- pd.Series """ label = coalesce(label, self._label) return pd.Series(1 - self.predict(times), index=_to_array(times), name=label)
def survival_function_at_times(self, times): """ Return a Pandas series of the predicted survival value at specific times Parameters ----------- times: iterable or float Returns -------- pd.Series """ return pd.Series(1 - norm.cdf((log(times) - self.mu_) / self.sigma_), index=_to_array(times))
def survival_function_at_times(self, times, label=None): """ Return a Pandas series of the predicted survival value at specific times Parameters ----------- times: iterable or float Returns -------- pd.Series """ label = coalesce(label, self._label) return pd.Series(self.predict(times), index=_to_array(times), name=label)
def hazard_at_times(self, times, label=None): """ Return a Pandas series of the predicted hazard at specific times. Parameters ----------- times: iterable or float values to return the hazard at. label: string, optional Rename the series returned. Useful for plotting. Returns -------- pd.Series """ label = coalesce(label, self._label) return pd.Series(self._hazard(self._fitted_parameters_, times), index=_to_array(times), name=label)
def cumulative_hazard_at_times(self, times): return pd.Series((self.lambda_ * times) ** self.rho_, index=_to_array(times))
def hazard_at_times(self, times): return pd.Series(self.lambda_ * self.rho_ * (self.lambda_ * times) ** (self.rho_ - 1), index=_to_array(times))
def survival_function_at_times(self, times): return pd.Series(np.exp(-self.cumulative_hazard_at_times(times)), index=_to_array(times))
def proportional_hazard_test(fitted_cox_model, training_df, time_transform="rank", precomputed_residuals=None, **kwargs): """ Test whether any variable in a Cox model breaks the proportional hazard assumption. Parameters ---------- fitted_cox_model: CoxPHFitter the fitted Cox model, fitted with `training_df`, you wish to test. Currently only the CoxPHFitter is supported, but later CoxTimeVaryingFitter, too. training_df: DataFrame the DataFrame used in the call to the Cox model's ``fit``. time_transform: vectorized function, list, or string, optional (default='rank') {'all', 'km', 'rank', 'identity', 'log'} One of the strings above, a list of strings, or a function to transform the time (must accept (time, durations, weights) however). 'all' will present all the transforms. precomputed_residuals: DataFrame, optional specify the residuals, if already computed. kwargs: additional parameters to add to the StatisticalResult Returns ------- StatisticalResult Notes ------ R uses the default `km`, we use `rank`, as this performs well versus other transforms. See http://eprints.lse.ac.uk/84988/1/06_ParkHendry2015-ReassessingSchoenfeldTests_Final.pdf """ events, durations, weights = fitted_cox_model.event_observed, fitted_cox_model.durations, fitted_cox_model.weights deaths = events.sum() if precomputed_residuals is None: scaled_resids = fitted_cox_model.compute_residuals( training_df, kind="scaled_schoenfeld") else: scaled_resids = precomputed_residuals def compute_statistic(times, resids): times -= times.mean() T = (times.values[:, None] * resids.values).sum(0)**2 / ( deaths * np.diag(fitted_cox_model.variance_matrix_) * (times**2).sum()) return T if time_transform == "all": time_transform = list(TimeTransformers.TIME_TRANSFOMERS.keys()) if isinstance(time_transform, list): result = StatisticalResult([], [], []) # yuck for transform_name, transform in ((_, TimeTransformers().get(_)) for _ in time_transform): times = transform(durations, events, weights)[events.values] T = compute_statistic(times, scaled_resids) p_values = _to_array([chisq_test(t, 1) for t in T]) result += StatisticalResult( p_values, T, name=[(c, transform_name) for c in fitted_cox_model.hazards_.index], test_name="proportional_hazard_test", null_distribution="chi squared", degrees_of_freedom=1, **kwargs) else: time_transformer = TimeTransformers().get(time_transform) assert callable( time_transformer ), "time_transform must be a callable function, or a string: {'rank', 'km', 'identity', 'log'}." times = time_transformer(durations, events, weights)[events.values] T = compute_statistic(times, scaled_resids) p_values = _to_array([chisq_test(t, 1) for t in T]) result = StatisticalResult( p_values, T, name=fitted_cox_model.hazards_.index.tolist(), test_name="proportional_hazard_test", time_transform=time_transform, null_distribution="chi squared", degrees_of_freedom=1, **kwargs) return result
def hazard_at_times(self, times): return pd.Series( norm.pdf((log(times) - self.mu_) / self.sigma_) / (self.sigma_ * times * self.survival_function_at_times(times)), index=_to_array(times), )
def hazard_at_times(self, times): return pd.Series(self.lambda_, index=_to_array(times))
def cumulative_hazard_at_times(self, times): return pd.Series( -log(1 - norm.cdf((log(times) - self.mu_) / self.sigma_)), index=_to_array(times))
def plot_covariate_groups(self, covariates, values, plot_baseline=True, **kwargs): """ Produces a visual representation comparing the baseline survival curve of the model versus what happens when a covariate(s) is varied over values in a group. This is useful to compare subjects' survival as we vary covariate(s), all else being held equal. The baseline survival curve is equal to the predicted survival curve at all average values in the original dataset. Parameters ---------- covariates: string or list a string (or list of strings) of the covariate in the original dataset that we wish to vary. values: 1d or 2d iterable an iterable of the values we wish the covariate to take on. plot_baseline: bool also display the baseline survival, defined as the survival at the mean of the original dataset. kwargs: pass in additional plotting commands Returns ------- ax: matplotlib axis, or list of axis' the matplotlib axis that be edited. Examples --------- >>> from lifelines import datasets, WeibullAFTFitter >>> rossi = datasets.load_rossi() >>> wf = WeibullAFTFitter().fit(rossi, 'week', 'arrest') >>> wf.plot_covariate_groups('prio', values=np.arange(0, 15), cmap='coolwarm') >>> # multiple variables at once >>> wf.plot_covariate_groups(['prio', 'paro'], values=[[0, 0], [5, 0], [10, 0], [0, 1], [5, 1], [10, 1]], cmap='coolwarm') >>> # if you have categorical variables, you can simply things: >>> wf.plot_covariate_groups(['dummy1', 'dummy2', 'dummy3'], values=np.eye(3)) """ from matplotlib import pyplot as plt covariates = _to_list(covariates) values = _to_array(values) if len(values.shape) == 1: values = values[None, :].T if len(covariates) != values.shape[1]: raise ValueError( "The number of covariates must equal to second dimension of the values array." ) original_columns = self.params_.index.get_level_values(1) for covariate in covariates: if covariate not in original_columns: raise KeyError( "covariate `%s` is not present in the original dataset" % covariate) ax = kwargs.pop("ax", None) or plt.figure().add_subplot(111) # model X x_bar = self._norm_mean.to_frame().T X = pd.concat([x_bar] * values.shape[0]) if np.array_equal(np.eye(len(covariates)), values): X.index = ["%s=1" % c for c in covariates] else: X.index = [ ", ".join("%s=%g" % (c, v) for (c, v) in zip(covariates, row)) for row in values ] for covariate, value in zip(covariates, values.T): X[covariate] = value # model ancillary X x_bar_anc = self._norm_mean_ancillary.to_frame().T ancillary_X = pd.concat([x_bar_anc] * values.shape[0]) for covariate, value in zip(covariates, values.T): ancillary_X[covariate] = value if self.fit_intercept: X["_intercept"] = 1.0 ancillary_X["_intercept"] = 1.0 self.predict_survival_function(X, ancillary_X=ancillary_X).plot(ax=ax, **kwargs) if plot_baseline: self.predict_survival_function( x_bar, ancillary_X=x_bar_anc).rename(columns={ 0: "baseline survival" }).plot(ax=ax, ls=":", color="k") return ax