def fit(self, durations, event_observed=None, timeline=None, entry=None, label='NA-estimate', alpha=None, ci_labels=None): """ Parameters: duration: an array, or pd.Series, of length n -- duration subject was observed for timeline: return the best estimate at the values in timelines (postively increasing) event_observed: an array, or pd.Series, of length n -- True if the the death was observed, False if the event was lost (right-censored). Defaults all True if event_observed==None entry: an array, or pd.Series, of length n -- relative time when a subject entered the study. This is useful for left-truncated observations, i.e the birth event was not observed. If None, defaults to all 0 (all birth events observed.) label: a string to name the column of the estimate. alpha: the alpha value in the confidence intervals. Overrides the initializing alpha for this call to fit only. ci_labels: add custom column names to the generated confidence intervals as a length-2 list: [<lower-bound name>, <upper-bound name>]. Default: <label>_lower_<alpha> Returns: self, with new properties like 'cumulative_hazard_'. """ v = preprocess_inputs(durations, event_observed, timeline, entry) self.durations, self.event_observed, self.timeline, self.entry, self.event_table = v cumulative_hazard_, cumulative_sq_ = _additive_estimate( self.event_table, self.timeline, self._additive_f, self._variance_f, False) # esimates self.cumulative_hazard_ = pd.DataFrame(cumulative_hazard_, columns=[label]) self.confidence_interval_ = self._bounds( cumulative_sq_[:, None], alpha if alpha else self.alpha, ci_labels) self._cumulative_sq = cumulative_sq_ # estimation functions self.predict = _predict(self, "cumulative_hazard_", label) self.subtract = _subtract(self, "cumulative_hazard_") self.divide = _divide(self, "cumulative_hazard_") # plotting self.plot = plot_estimate(self, "cumulative_hazard_") self.plot_cumulative_hazard = self.plot self.plot_hazard = plot_estimate(self, 'hazard_') return self
def fit(self, durations, event_observed=None, timeline=None, entry=None, label='KM-estimate', alpha=None, left_censorship=False, ci_labels=None): """ Parameters: duration: an array, or pd.Series, of length n -- duration subject was observed for timeline: return the best estimate at the values in timelines (postively increasing) event_observed: an array, or pd.Series, of length n -- True if the the death was observed, False if the event was lost (right-censored). Defaults all True if event_observed==None entry: an array, or pd.Series, of length n -- relative time when a subject entered the study. This is useful for left-truncated observations, i.e the birth event was not observed. If None, defaults to all 0 (all birth events observed.) label: a string to name the column of the estimate. alpha: the alpha value in the confidence intervals. Overrides the initializing alpha for this call to fit only. left_censorship: True if durations and event_observed refer to left censorship events. Default False ci_labels: add custom column names to the generated confidence intervals as a length-2 list: [<lower-bound name>, <upper-bound name>]. Default: <label>_lower_<alpha> Returns: self, with new properties like 'survival_function_'. """ # if the user is interested in left-censorship, we return the cumulative_density_, no survival_function_, estimate_name = 'survival_function_' if not left_censorship else 'cumulative_density_' v = preprocess_inputs(durations, event_observed, timeline, entry) self.durations, self.event_observed, self.timeline, self.entry, self.event_table = v self._label = label self.alpha = alpha if alpha else self.alpha log_survival_function, cumulative_sq_ = _additive_estimate(self.event_table, self.timeline, self._additive_f, self._additive_var, left_censorship) if entry is not None: # a serious problem with KM is that when the sample size is small and there are too few early # truncation times, it may happen that is the number of patients at risk and the number of deaths is the same. # we adjust for this using the Breslow-Fleming-Harrington estimator n = self.event_table.shape[0] net_population = (self.event_table['entrance'] - self.event_table['removed']).cumsum() if net_population.iloc[:int(n / 2)].min() == 0: ix = net_population.iloc[:int(n / 2)].argmin() raise StatError("""There are too few early truncation times and too many events. S(t)==0 for all t>%.1f. Recommend BreslowFlemingHarringtonFitter.""" % ix) # estimation setattr(self, estimate_name, pd.DataFrame(np.exp(log_survival_function), columns=[self._label])) self.__estimate = getattr(self, estimate_name) self.confidence_interval_ = self._bounds(cumulative_sq_[:, None], ci_labels) self.median_ = median_survival_times(self.__estimate) # estimation methods self.predict = _predict(self, estimate_name, label) self.subtract = _subtract(self, estimate_name) self.divide = _divide(self, estimate_name) # plotting functions self.plot = plot_estimate(self, estimate_name) setattr(self, "plot_" + estimate_name, self.plot) return self
def fit(self, durations, event_observed=None, timeline=None, entry=None, label='KM-estimate', alpha=None, left_censorship=False, ci_labels=None): """ Parameters: duration: an array, or pd.Series, of length n -- duration subject was observed for timeline: return the best estimate at the values in timelines (postively increasing) event_observed: an array, or pd.Series, of length n -- True if the the death was observed, False if the event was lost (right-censored). Defaults all True if event_observed==None entry: an array, or pd.Series, of length n -- relative time when a subject entered the study. This is useful for left-truncated observations, i.e the birth event was not observed. If None, defaults to all 0 (all birth events observed.) label: a string to name the column of the estimate. alpha: the alpha value in the confidence intervals. Overrides the initializing alpha for this call to fit only. left_censorship: True if durations and event_observed refer to left censorship events. Default False ci_labels: add custom column names to the generated confidence intervals as a length-2 list: [<lower-bound name>, <upper-bound name>]. Default: <label>_lower_<alpha> Returns: self, with new properties like 'survival_function_'. """ #if the user is interested in left-censorship, we return the cumulative_density_, no survival_function_, estimate_name = 'survival_function_' if not left_censorship else 'cumulative_density_' v = preprocess_inputs(durations, event_observed, timeline, entry) self.durations, self.event_observed, self.timeline, self.entry, self.event_table = v log_survival_function, cumulative_sq_ = _additive_estimate(self.event_table, self.timeline, self._additive_f, self._additive_var, left_censorship) if entry is not None: #a serious problem with KM is that when the sample size is small and there are too few early # truncation times, it may happen that is the number of patients at risk and the number of deaths is the same. # we adjust for this using the Breslow-Fleming-Harrington estimator n = self.event_table.shape[0] net_population = (self.event_table['entrance'] - self.event_table['removed']).cumsum() if net_population.iloc[:int(n/2)].min() == 0: ix = net_population.iloc[:int(n/2)].argmin() raise StatError("""There are too few early truncation times and too many events. S(t)==0 for all t>%.1f. Recommend BFH estimator."""%ix) # estimation setattr(self, estimate_name, pd.DataFrame(np.exp(log_survival_function), columns=[label])) self.__estimate = getattr(self,estimate_name) self.confidence_interval_ = self._bounds(cumulative_sq_[:, None], alpha if alpha else self.alpha, ci_labels) self.median_ = median_survival_times(self.__estimate) # estimation methods self.predict = _predict(self, estimate_name, label) self.subtract = _subtract(self, estimate_name) self.divide = _divide(self, estimate_name) # plotting functions self.plot = plot_estimate(self, estimate_name) setattr(self, "plot_" + estimate_name, self.plot) return self
def fit(self, durations, event_observed=None, timeline=None, entry=None, label='NA-estimate', alpha=None, ci_labels=None): """ Parameters: duration: an array, or pd.Series, of length n -- duration subject was observed for timeline: return the best estimate at the values in timelines (postively increasing) event_observed: an array, or pd.Series, of length n -- True if the the death was observed, False if the event was lost (right-censored). Defaults all True if event_observed==None entry: an array, or pd.Series, of length n -- relative time when a subject entered the study. This is useful for left-truncated observations, i.e the birth event was not observed. If None, defaults to all 0 (all birth events observed.) label: a string to name the column of the estimate. alpha: the alpha value in the confidence intervals. Overrides the initializing alpha for this call to fit only. ci_labels: add custom column names to the generated confidence intervals as a length-2 list: [<lower-bound name>, <upper-bound name>]. Default: <label>_lower_<alpha> Returns: self, with new properties like 'cumulative_hazard_'. """ v = preprocess_inputs(durations, event_observed, timeline, entry) self.durations, self.event_observed, self.timeline, self.entry, self.event_table = v cumulative_hazard_, cumulative_sq_ = _additive_estimate(self.event_table, self.timeline, self._additive_f, self._variance_f, False) # esimates self._label = label self.cumulative_hazard_ = pd.DataFrame(cumulative_hazard_, columns=[self._label]) self.confidence_interval_ = self._bounds(cumulative_sq_[:, None], alpha if alpha else self.alpha, ci_labels) self._cumulative_sq = cumulative_sq_ # estimation functions self.predict = _predict(self, "cumulative_hazard_", self._label) self.subtract = _subtract(self, "cumulative_hazard_") self.divide = _divide(self, "cumulative_hazard_") # plotting self.plot = plot_estimate(self, "cumulative_hazard_") self.plot_cumulative_hazard = self.plot self.plot_hazard = plot_estimate(self, 'hazard_') return self
def fit(self, durations, censorship=None, timeline=None, entry=None, label='KM-estimate', alpha=None): """ Parameters: duration: an array, or pd.Series, of length n -- duration subject was observed for timeline: return the best estimate at the values in timelines (postively increasing) censorship: an array, or pd.Series, of length n -- True if the the death was observed, False if the event was lost (right-censored). Defaults all True if censorship==None entry: an array, or pd.Series, of length n -- relative time when a subject entered the study. This is useful for left-truncated observations, i.e the birth event was not observed. If None, defaults to all 0 (all birth events observed.) label: a string to name the column of the estimate. alpha: the alpha value in the confidence intervals. Overrides the initializing alpha for this call to fit only. Returns: self, with new properties like 'survival_function_'. """ v = preprocess_inputs(durations, censorship, timeline, entry) self.durations, self.censorship, self.timeline, self.entry, self.event_table = v log_survival_function, cumulative_sq_ = _additive_estimate( self.event_table, self.timeline, self._additive_f, self._additive_var) if entry is not None: # a serious problem with KM is that when the sample size is small and there are too few early # truncation times, it may happen that is the number of patients at risk and the number of deaths is the same. # we adjust for this using the Breslow-Fleming-Harrington estimator n = self.event_table.shape[0] net_population = ( self.event_table['entrance'] - self.event_table['removed']).cumsum() if net_population.iloc[:int(n / 2)].min() == 0: ix = net_population.iloc[:int(n / 2)].argmin() raise StatError( """There are too few early truncation times and too many events. S(t)==0 for all t>%.1f. Recommend BFH estimator.""" % ix) # estimation self.survival_function_ = pd.DataFrame( np.exp(log_survival_function), columns=[label]) self.confidence_interval_ = self._bounds( cumulative_sq_[:, None], alpha if alpha else self.alpha) self.median_ = median_survival_times(self.survival_function_) # estimation methods self.predict = _predict(self, "survival_function_", label) self.subtract = _subtract(self, "survival_function_") self.divide = _divide(self, "survival_function_") # plotting functions self.plot = plot_estimate(self, "survival_function_") self.plot_survival_function = self.plot return self
def fit(self, durations, event_observed=None, timeline=None, entry=None, label='BFH-estimate', alpha=None, ci_labels=None): """ Parameters: duration: an array, or pd.Series, of length n -- duration subject was observed for timeline: return the best estimate at the values in timelines (postively increasing) event_observed: an array, or pd.Series, of length n -- True if the the death was observed, False if the event was lost (right-censored). Defaults all True if event_observed==None entry: an array, or pd.Series, of length n -- relative time when a subject entered the study. This is useful for left-truncated observations, i.e the birth event was not observed. If None, defaults to all 0 (all birth events observed.) label: a string to name the column of the estimate. alpha: the alpha value in the confidence intervals. Overrides the initializing alpha for this call to fit only. ci_labels: add custom column names to the generated confidence intervals as a length-2 list: [<lower-bound name>, <upper-bound name>]. Default: <label>_lower_<alpha> Returns: self, with new properties like 'survival_function_'. """ naf = NelsonAalenFitter(self.alpha) naf.fit(durations, event_observed=event_observed, timeline=timeline, label=label, entry=entry, ci_labels=ci_labels) self.durations, self.event_observed, self.timeline, self.entry, self.event_table = \ naf.durations, naf.event_observed, naf.timeline, naf.entry, naf.event_table # estimation self.survival_function_ = np.exp(-naf.cumulative_hazard_) self.confidence_interval_ = np.exp(-naf.confidence_interval_) self.median_ = median_survival_times(self.survival_function_) # estimation methods self.predict = _predict(self, "survival_function_", label) self.subtract = _subtract(self, "survival_function_") self.divide = _divide(self, "survival_function_") # plotting functions self.plot = plot_estimate(self, "survival_function_") self.plot_survival_function = self.plot return self
def fit(self, durations, censorship=None, timeline=None, entry=None, label="BFH-estimate", alpha=None): """ Parameters: duration: an array, or pd.Series, of length n -- duration subject was observed for timeline: return the best estimate at the values in timelines (postively increasing) censorship: an array, or pd.Series, of length n -- True if the the death was observed, False if the event was lost (right-censored). Defaults all True if censorship==None entry: an array, or pd.Series, of length n -- relative time when a subject entered the study. This is useful for left-truncated observations, i.e the birth event was not observed. If None, defaults to all 0 (all birth events observed.) label: a string to name the column of the estimate. alpha: the alpha value in the confidence intervals. Overrides the initializing alpha for this call to fit only. Returns: self, with new properties like 'survival_function_'. """ naf = NelsonAalenFitter(self.alpha) naf.fit(durations, censorship=censorship, timeline=timeline, label=label, entry=entry) self.durations, self.censorship, self.timeline, self.entry, self.event_table = ( naf.durations, naf.censorship, naf.timeline, naf.entry, naf.event_table, ) # estimation self.survival_function_ = np.exp(-naf.cumulative_hazard_) self.confidence_interval_ = np.exp(-naf.confidence_interval_) self.median_ = median_survival_times(self.survival_function_) # estimation methods self.predict = _predict(self, "survival_function_", label) self.subtract = _subtract(self, "survival_function_") self.divide = _divide(self, "survival_function_") # plotting functions self.plot = plot_estimate(self, "survival_function_") self.plot_survival_function = self.plot return self
def _plot_estimate(self, *args): return plot_estimate(self, *args)
def plot(self, *args, **kwargs): return plot_estimate(self, *args, **kwargs)