def add_km_censor(self, steps='post'): """Add censoring estimates obtained by Kaplan-Meier on the test set (durations, 1-events). """ km = utils.kaplan_meier(self.durations, 1-self.events) surv = pd.DataFrame(np.repeat(km.values.reshape(-1, 1), len(self.durations), axis=1), index=km.index) return self.add_censor_est(surv, steps)
def test_kaplan_meier_vs_lifelines(n, p_cens): np.random.seed(0) durations = np.random.uniform(0, 100, n) events = np.random.binomial(1, 1 - p_cens, n).astype('float') km = utils.kaplan_meier(durations, events) kmf = KaplanMeierFitter().fit(durations, events).survival_function_['KM_estimate'] assert km.shape == kmf.shape assert (km - kmf).abs().max() < 1e-14 assert (km.index == kmf.index).all()
def cuts_quantiles(durations, events, num, min_=0., dtype='float64'): """ If min_ = None, we will use durations.min() for the first cut. """ km = utils.kaplan_meier(durations, events) surv_est, surv_durations = km.values, km.index.values s_cuts = np.linspace(km.values.min(), km.values.max(), num) cuts_idx = np.searchsorted(surv_est[::-1], s_cuts)[::-1] cuts = surv_durations[::-1][cuts_idx] cuts = np.unique(cuts) if len(cuts) != num: warnings.warn(f"cuts are not unique, continue with {len(cuts)} cuts instead of {num}") cuts[0] = durations.min() if min_ is None else min_ assert cuts[-1] == durations.max(), 'something wrong...' return cuts.astype(dtype)
def test_kaplan_meier(): durations = np.array([1., 1., 2., 3.]) events = np.array([1, 1, 1, 0]) surv = utils.kaplan_meier(durations, events) assert (surv.index.values == np.arange(4, dtype=float)).all() assert (surv.values == np.array([1., 0.5, 0.25, 0.25])).all()