def _ll_null(self):
        if hasattr(self, "_ll_null_"):
            return self._ll_null_

        initial_point = np.zeros(len(self._fitted_parameter_names))

        model = self.__class__(breakpoints=self.breakpoints[:-1], penalizer=self.penalizer)
        regressors = {param_name: ["_intercept"] for param_name in self._fitted_parameter_names}
        if CensoringType.is_right_censoring(self):
            df = pd.DataFrame({"T": self.durations, "E": self.event_observed, "entry": self.entry, "_intercept": 1.0})
            model.fit_right_censoring(
                df, "T", "E", initial_point=initial_point, entry_col="entry", regressors=regressors
            )
        elif CensoringType.is_interval_censoring(self):
            df = pd.DataFrame(
                {
                    "lb": self.lower_bound,
                    "ub": self.upper_bound,
                    "E": self.event_observed,
                    "entry": self.entry,
                    "_intercept": 1.0,
                }
            )
            model.fit_interval_censoring(
                df, "lb", "ub", "E", initial_point=initial_point, entry_col="entry", regressors=regressors
            )
        if CensoringType.is_left_censoring(self):
            raise NotImplementedError()

        self._ll_null_ = model.log_likelihood_
        return self._ll_null_
Пример #2
0
def cdf_plot(model, timeline=None, **plot_kwargs):
    from lifelines import KaplanMeierFitter

    set_kwargs_ax(plot_kwargs)
    ax = plot_kwargs.pop("ax")

    if timeline is None:
        timeline = model.timeline

    COL_EMP = "empirical quantiles"

    if CensoringType.is_left_censoring(model):
        kmf = KaplanMeierFitter().fit_left_censoring(model.durations,
                                                     model.event_observed,
                                                     label=COL_EMP,
                                                     timeline=timeline)
    elif CensoringType.is_right_censoring(model):
        kmf = KaplanMeierFitter().fit_right_censoring(model.durations,
                                                      model.event_observed,
                                                      label=COL_EMP,
                                                      timeline=timeline)
    elif CensoringType.is_interval_censoring(model):
        raise NotImplementedError()

    kmf.plot_cumulative_density(ax=ax, **plot_kwargs)

    dist = get_distribution_name_of_lifelines_model(model)
    dist_object = create_scipy_stats_model_from_lifelines_model(model)
    ax.plot(timeline,
            dist_object.cdf(timeline),
            label="fitted %s" % dist,
            **plot_kwargs)
    ax.legend()
    return ax
Пример #3
0
def cdf_plot(model, timeline=None, ax=None, **plot_kwargs):
    """


    """
    from lifelines import KaplanMeierFitter

    if ax is None:
        ax = plt.gca()

    if timeline is None:
        timeline = model.timeline

    COL_EMP = "empirical CDF"

    if CensoringType.is_left_censoring(model):
        empirical_kmf = KaplanMeierFitter().fit_left_censoring(
            model.durations, model.event_observed, label=COL_EMP, timeline=timeline
        )
    elif CensoringType.is_right_censoring(model):
        empirical_kmf = KaplanMeierFitter().fit_right_censoring(
            model.durations, model.event_observed, label=COL_EMP, timeline=timeline
        )
    elif CensoringType.is_interval_censoring(model):
        raise NotImplementedError("lifelines does not have a non-parametric interval model yet.")

    empirical_kmf.plot_cumulative_density(ax=ax, **plot_kwargs)

    dist = get_distribution_name_of_lifelines_model(model)
    dist_object = create_scipy_stats_model_from_lifelines_model(model)
    ax.plot(timeline, dist_object.cdf(timeline), label="fitted %s" % dist, **plot_kwargs)
    ax.legend()
    return ax
Пример #4
0
 def plot_survival_function(self, **kwargs):
     """Alias of ``plot``"""
     if not CensoringType.is_interval_censoring(self):
         return _plot_estimate(self, estimate="survival_function_", **kwargs)
     else:
         # hack for now.
         color = coalesce(kwargs.get("c"), kwargs.get("color"), "k")
         self.survival_function_.plot(drawstyle="steps-pre", color=color, **kwargs)
Пример #5
0
 def _create_initial_point(self, Ts, E, *args):
     if CensoringType.is_right_censoring(self):
         T = Ts[0]
     elif CensoringType.is_left_censoring(self):
         T = Ts[1]
     elif CensoringType.is_interval_censoring(self):
         T = Ts[1] - Ts[0]
     return np.array([np.median(T), 1.0])
 def _create_initial_point(self, Ts, E, *args):
     if CensoringType.is_right_censoring(self):
         log_data = log(Ts[0])
     elif CensoringType.is_left_censoring(self):
         log_data = log(Ts[1])
     elif CensoringType.is_interval_censoring(self):
         log_data = log(Ts[1] - Ts[0])
     return np.array([log_data.mean(), log(log_data.std()), 0.1])
Пример #7
0
 def _get_initial_values(self, Ts, E, *args):
     if CensoringType.is_right_censoring(self):
         log_data = np.log(Ts[0])
     elif CensoringType.is_left_censoring(self):
         log_data = np.log(Ts[1])
     elif CensoringType.is_interval_censoring(self):
         log_data = np.log(Ts[1] - Ts[0])
     return np.array([log_data.mean(), np.log(log_data.std()), 1.0])
Пример #8
0
 def _create_initial_point(self, Ts, E, *args):
     if CensoringType.is_right_censoring(self):
         log_T = np.log(Ts[0])
     elif CensoringType.is_left_censoring(self):
         log_T = np.log(Ts[1])
     elif CensoringType.is_interval_censoring(self):
         log_T = np.log(Ts[1])
     return np.array([np.median(log_T), 1.0])
Пример #9
0
 def _create_initial_point(self, Ts, E, *args):
     if CensoringType.is_right_censoring(self):
         log_data = log(Ts[0])
     elif CensoringType.is_left_censoring(self):
         log_data = log(Ts[1])
     elif CensoringType.is_interval_censoring(self):
         # this fails if Ts[1] == Ts[0], so we add a some fudge factors.
         log_data = log(Ts[1] - Ts[0] + 0.01)
     return np.array([log_data.mean(), log(log_data.std() + 0.01), 0.1])
Пример #10
0
def cdf_plot(model, timeline=None, ax=None, **plot_kwargs):
    """
    This plot compares the empirical CDF (derived by KaplanMeier) vs the model CDF.

    Parameters
    ------------
    model: lifelines univariate model
    timeline: iterable
    ax: matplotlib axis

    """
    from lifelines import KaplanMeierFitter
    from matplotlib import pyplot as plt

    if ax is None:
        ax = plt.gca()

    if timeline is None:
        timeline = model.timeline

    COL_EMP = "empirical CDF"

    if CensoringType.is_left_censoring(model):
        empirical_kmf = KaplanMeierFitter().fit_left_censoring(
            model.durations,
            model.event_observed,
            label=COL_EMP,
            timeline=timeline,
            weights=model.weights,
            entry=model.entry)
    elif CensoringType.is_right_censoring(model):
        empirical_kmf = KaplanMeierFitter().fit_right_censoring(
            model.durations,
            model.event_observed,
            label=COL_EMP,
            timeline=timeline,
            weights=model.weights,
            entry=model.entry)
    elif CensoringType.is_interval_censoring(model):
        empirical_kmf = KaplanMeierFitter().fit_interval_censoring(
            model.lower_bound,
            model.upper_bound,
            label=COL_EMP,
            timeline=timeline,
            weights=model.weights,
            entry=model.entry)

    empirical_kmf.plot_cumulative_density(ax=ax, **plot_kwargs)

    dist = get_distribution_name_of_lifelines_model(model)
    dist_object = create_scipy_stats_model_from_lifelines_model(model)
    ax.plot(timeline,
            dist_object.cdf(timeline),
            label="fitted %s" % dist,
            **plot_kwargs)
    ax.legend()
    return ax
Пример #11
0
    def plot_cumulative_density(self, **kwargs):
        """
        Plots a pretty figure of the cumulative density function.

        Matplotlib plot arguments can be passed in inside the kwargs.

        Parameters
        -----------
        show_censors: bool
            place markers at censorship events. Default: False
        censor_styles: bool
            If show_censors, this dictionary will be passed into the plot call.
        ci_alpha: bool
            the transparency level of the confidence interval. Default: 0.3
        ci_force_lines: bool
            force the confidence intervals to be line plots (versus default shaded areas). Default: False
        ci_show: bool
            show confidence intervals. Default: True
        ci_legend: bool
            if ci_force_lines is True, this is a boolean flag to add the lines' labels to the legend. Default: False
        at_risk_counts: bool
            show group sizes at time points. See function ``add_at_risk_counts`` for details. Default: False
        loc: slice
            specify a time-based subsection of the curves to plot, ex:

            >>> model.plot(loc=slice(0.,10.))

            will plot the time values between t=0. and t=10.
        iloc: slice
            specify a location-based subsection of the curves to plot, ex:

            >>> model.plot(iloc=slice(0,10))

            will plot the first 10 time points.

        Returns
        -------
        ax:
            a pyplot axis object
        """
        if not CensoringType.is_interval_censoring(self):
            return _plot_estimate(self,
                                  estimate="cumulative_density_",
                                  **kwargs)
        else:
            # hack for now.
            color = coalesce(kwargs.get("c"), kwargs.get("color"), "k")
            self.cumulative_density_.plot(drawstyle="steps",
                                          color=color,
                                          **kwargs)
Пример #12
0
    def plot_survival_function(self, **kwargs):
        """Alias of ``plot``"""
        if not CensoringType.is_interval_censoring(self):
            return _plot_estimate(self, estimate="survival_function_", **kwargs)
        else:
            # hack for now.
            def safe_pop(dict, key):
                if key in dict:
                    return dict.pop(key)
                else:
                    return None

            color = coalesce(safe_pop(kwargs, "c"), safe_pop(kwargs, "color"), "k")
            self.survival_function_.plot(drawstyle="steps-pre", color=color, **kwargs)
Пример #13
0
 def _create_initial_point(self, Ts, E, *args):
     if CensoringType.is_right_censoring(self):
         T = Ts[0]
     elif CensoringType.is_left_censoring(self):
         T = np.clip(0.0001, np.inf, Ts[1])
     elif CensoringType.is_interval_censoring(self):
         if E.sum() > 0:
             # Ts[1] can contain infs, so ignore this data
             okay_data = Ts[1] < 1e10
             T = Ts[1]
             T = T[okay_data]
         else:
             T = np.array([1.0])
     return np.array([np.median(T), 1.0])
Пример #14
0
    def _ll_null(self):
        if hasattr(self, "_ll_null_"):
            return self._ll_null_

        initial_point = np.zeros(len(self._fitted_parameter_names))
        regressors = {
            name: ["intercept"]
            for name in self._fitted_parameter_names
        }

        model = self.__class__()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")

            if CensoringType.is_right_censoring(self):
                df = pd.DataFrame({
                    "T": self.durations,
                    "E": self.event_observed,
                    "entry": self.entry,
                    "intercept": 1
                })
                model.fit_right_censoring(df,
                                          "T",
                                          "E",
                                          initial_point=initial_point,
                                          entry_col="entry",
                                          regressors=regressors)
            elif CensoringType.is_interval_censoring(self):
                df = pd.DataFrame({
                    "lb": self.lower_bound,
                    "ub": self.upper_bound,
                    "E": self.event_observed,
                    "entry": self.entry,
                    "intercept": 1,
                })
                model.fit_interval_censoring(df,
                                             "lb",
                                             "ub",
                                             "E",
                                             initial_point=initial_point,
                                             entry_col="entry",
                                             regressors=regressors)
            if CensoringType.is_left_censoring(self):
                raise NotImplementedError()

        self._ll_null_ = model._log_likelihood
        return self._ll_null_
Пример #15
0
def cdf_plot(model, timeline=None, ax=None, **plot_kwargs):
    """
         Cumulative Distribution Function
    """
    from lifelines import KaplanMeierFitter

    #kmf = KaplanMeierFitter()
    #kmf.fit(durations = churn_data['tenure'], event_observed = churn_data['Churn - Yes'] )

    if ax is None:
        ax = plt.gca()

    if timeline is None:
        timeline = model.timeline

    CDL_EMP = "empirical CDF"
    if CensoringType.is_left_censoring(model):
        emp_kmf = KaplanMeierFitter().fit_left_censoring(model.durations,
                                                         model.event_observed,
                                                         label=CDL_EMP,
                                                         timeline=timeline,
                                                         weights=model.weights,
                                                         entry=model.entry)
    if CensoringType.is_right_censoring(model):
        emp_kmf = KaplanMeierFitter().fit_right_censoring(
            model.durations,
            model.event_observed,
            label=CDL_EMP,
            timeline=timeline,
            weights=model.weights,
            entry=model.entry)
    if CensoringType.is_interval_censoring(model):
        emp_kmf = KaplanMeierFitter().fit_interval_censoring(
            model.lower_bound,
            model.upper_bound,
            label=CDL_EMP,
            timeline=timeline,
            weights=model.weights,
            entry=model.entry)
Пример #16
0
def survival_probability_calibration(model: RegressionFitter,
                                     training_df: pd.DataFrame,
                                     t0: float,
                                     ax=None):
    r"""
    Smoothed calibration curves for time-to-event models. This is analogous to
    calibration curves for classification models, extended to handle survival probabilities
    and censoring. Produces a matplotlib figure and some metrics.

    We want to calibrate our model's prediction of :math:`P(T < \text{t0})` against the observed frequencies.

    Parameters
    -------------

    model:
        a fitted lifelines regression model to be evaluated
    training_df: DataFrame
        the DataFrame used to train the model
    t0: float
        the time to evaluate the probability of event occurring prior at.

    Returns
    ----------
    ax:
        mpl axes
    ICI:
        mean absolute difference between predicted and observed
    E50:
        median absolute difference between predicted and observed

    https://onlinelibrary.wiley.com/doi/full/10.1002/sim.8570

    """
    def ccl(p):
        return np.log(-np.log(1 - p))

    if ax is None:
        ax = plt.gca()

    T = model.duration_col
    E = model.event_col

    predictions_at_t0 = np.clip(
        1 -
        model.predict_survival_function(training_df, times=[t0]).T.squeeze(),
        1e-10, 1 - 1e-10)

    # create new dataset with the predictions
    prediction_df = pd.DataFrame({
        "ccl_at_%d" % t0: ccl(predictions_at_t0),
        "constant": 1,
        T: model.durations,
        E: model.event_observed
    })

    # fit new dataset to flexible spline model
    # this new model connects prediction probabilities and actual survival. It should be very flexible, almost to the point of overfitting. It's goal is just to smooth out the data!
    knots = 3
    regressors = {
        "beta_": ["ccl_at_%d" % t0],
        "gamma0_": ["constant"],
        "gamma1_": ["constant"],
        "gamma2_": ["constant"]
    }

    # this model is from examples/royson_crowther_clements_splines.py
    crc = CRCSplineFitter(knots, penalizer=0)
    if CensoringType.is_right_censoring(model):
        crc.fit_right_censoring(prediction_df, T, E, regressors=regressors)
    elif CensoringType.is_left_censoring(model):
        crc.fit_left_censoring(prediction_df, T, E, regressors=regressors)
    elif CensoringType.is_interval_censoring(model):
        crc.fit_interval_censoring(prediction_df, T, E, regressors=regressors)

    # predict new model at values 0 to 1, but remember to ccl it!
    x = np.linspace(np.clip(predictions_at_t0.min() - 0.01, 0, 1),
                    np.clip(predictions_at_t0.max() + 0.01, 0, 1), 100)
    y = 1 - crc.predict_survival_function(pd.DataFrame({
        "ccl_at_%d" % t0: ccl(x),
        "constant": 1
    }),
                                          times=[t0]).T.squeeze()

    # plot our results
    ax.set_title(
        "Smoothed calibration curve of \npredicted vs observed probabilities of t ≤ %d mortality"
        % t0)

    color = "tab:red"
    ax.plot(x, y, label="smoothed calibration curve", color=color)
    ax.set_xlabel("Predicted probability of \nt ≤ %d mortality" % t0)
    ax.set_ylabel("Observed probability of \nt ≤ %d mortality" % t0,
                  color=color)
    ax.tick_params(axis="y", labelcolor=color)

    # plot x=y line
    ax.plot(x, x, c="k", ls="--")
    ax.legend()

    # plot histogram of our original predictions
    color = "tab:blue"
    twin_ax = ax.twinx()
    twin_ax.set_ylabel("Count of \npredicted probabilities",
                       color=color)  # we already handled the x-label with ax1
    twin_ax.tick_params(axis="y", labelcolor=color)
    twin_ax.hist(predictions_at_t0, alpha=0.3, bins="sqrt", color=color)

    plt.tight_layout()

    deltas = ((1 - crc.predict_survival_function(
        prediction_df, times=[t0])).T.squeeze() - predictions_at_t0).abs()
    ICI = deltas.mean()
    E50 = np.percentile(deltas, 50)
    print("ICI = ", ICI)
    print("E50 = ", E50)

    return ax, ICI, E50
Пример #17
0
def qq_plot(model, ax=None, **plot_kwargs):
    """
    Produces a quantile-quantile plot of the empirical CDF against
    the fitted parametric CDF. Large deviances away from the line y=x
    can invalidate a model (though we expect some natural deviance in the tails).

    Parameters
    -----------
    model: obj
        A fitted lifelines univariate parametric model, like ``WeibullFitter``
    plot_kwargs:
        kwargs for the plot.

    Returns
    --------
    ax:
        The axes which was used.

    Examples
    ---------

    >>> from lifelines import *
    >>> from lifelines.plotting import qq_plot
    >>> from lifelines.datasets import load_rossi
    >>> df = load_rossi()
    >>> wf = WeibullFitter().fit(df['week'], df['arrest'])
    >>> qq_plot(wf)


    """
    from lifelines.utils import qth_survival_times
    from lifelines import KaplanMeierFitter

    if ax is None:
        ax = plt.gca()

    dist = get_distribution_name_of_lifelines_model(model)
    dist_object = create_scipy_stats_model_from_lifelines_model(model)

    COL_EMP = "empirical quantiles"
    COL_THEO = "fitted %s quantiles" % dist

    if CensoringType.is_left_censoring(model):
        kmf = KaplanMeierFitter().fit_left_censoring(model.durations, model.event_observed, label=COL_EMP)
    elif CensoringType.is_right_censoring(model):
        kmf = KaplanMeierFitter().fit_right_censoring(model.durations, model.event_observed, label=COL_EMP)
    elif CensoringType.is_interval_censoring(model):
        raise NotImplementedError("lifelines does not have a non-parametric interval model yet.")

    q = np.unique(kmf.cumulative_density_.values[:, 0])
    # this is equivalent to the old code `qth_survival_times(q, kmf.cumulative_density, cdf=True)`
    quantiles = qth_survival_times(1 - q, kmf.survival_function_)
    quantiles[COL_THEO] = dist_object.ppf(q)
    quantiles = quantiles.replace([-np.inf, 0, np.inf], np.nan).dropna()

    max_, min_ = quantiles[COL_EMP].max(), quantiles[COL_EMP].min()

    quantiles.plot.scatter(COL_THEO, COL_EMP, c="none", edgecolor="k", lw=0.5, ax=ax)
    ax.plot([min_, max_], [min_, max_], c="k", ls=":", lw=1.0)
    ax.set_ylim(min_, max_)
    ax.set_xlim(min_, max_)

    return ax
Пример #18
0
def qq_plot(model, ax=None, **plot_kwargs):
    """
    Produces a quantile-quantile plot of the empirical CDF against
    the fitted parametric CDF. Large deviances away from the line y=x
    can invalidate a model (though we expect some natural deviance in the tails).

    Parameters
    -----------
    model: obj
        A fitted lifelines univariate parametric model, like ``WeibullFitter``
    plot_kwargs:
        kwargs for the plot.

    Returns
    --------
    ax:
        The axes which was used.

    Examples
    ---------
    .. code:: python

        from lifelines import *
        from lifelines.plotting import qq_plot
        from lifelines.datasets import load_rossi
        df = load_rossi()
        wf = WeibullFitter().fit(df['week'], df['arrest'])
        qq_plot(wf)

    Notes
    ------
    The interval censoring case uses the mean between the upper and lower bounds.

    """
    from lifelines.utils import qth_survival_times
    from lifelines import KaplanMeierFitter

    if ax is None:
        ax = plt.gca()

    dist = get_distribution_name_of_lifelines_model(model)
    dist_object = create_scipy_stats_model_from_lifelines_model(model)

    COL_EMP = "empirical quantiles"
    COL_THEO = "fitted %s quantiles" % dist

    if CensoringType.is_left_censoring(model):
        kmf = KaplanMeierFitter().fit_left_censoring(
            model.durations, model.event_observed, label=COL_EMP, weights=model.weights, entry=model.entry
        )
        sf, cdf = kmf.survival_function_[COL_EMP], kmf.cumulative_density_[COL_EMP]
    elif CensoringType.is_right_censoring(model):
        kmf = KaplanMeierFitter().fit_right_censoring(
            model.durations, model.event_observed, label=COL_EMP, weights=model.weights, entry=model.entry
        )
        sf, cdf = kmf.survival_function_[COL_EMP], kmf.cumulative_density_[COL_EMP]

    elif CensoringType.is_interval_censoring(model):
        kmf = KaplanMeierFitter().fit_interval_censoring(
            model.lower_bound, model.upper_bound, label=COL_EMP, weights=model.weights, entry=model.entry
        )
        sf, cdf = kmf.survival_function_.mean(1), kmf.cumulative_density_[COL_EMP + "_lower"]

    q = np.unique(cdf.values)

    quantiles = qth_survival_times(1 - q, sf)
    quantiles[COL_THEO] = dist_object.ppf(q)
    quantiles = quantiles.replace([-np.inf, 0, np.inf], np.nan).dropna()

    max_, min_ = quantiles[COL_EMP].max(), quantiles[COL_EMP].min()

    quantiles.plot.scatter(COL_THEO, COL_EMP, c="none", edgecolor="k", lw=0.5, ax=ax)
    ax.plot([min_, max_], [min_, max_], c="k", ls=":", lw=1.0)
    ax.set_ylim(min_, max_)
    ax.set_xlim(min_, max_)

    return ax