示例#1
0
    def __init__(self,
                 p_value,
                 test_statistic,
                 name=None,
                 test_name=None,
                 **kwargs):
        self.p_value = p_value
        self.test_statistic = test_statistic
        self.test_name = test_name

        self._p_value = utils._to_1d_array(p_value)
        self._test_statistic = utils._to_1d_array(test_statistic)

        assert len(self._p_value) == len(self._test_statistic)

        if name is not None:
            self.name = utils._to_list(name)
            assert len(self.name) == len(self._test_statistic)
        else:
            self.name = None

        for kw, value in kwargs.items():
            setattr(self, kw, value)

        kwargs["test_name"] = test_name
        self._kwargs = kwargs
    def survival_function_at_times(self, times, label=None) -> pd.Series:
        """
        Return a Pandas series of the predicted survival value at specific times

        Parameters
        -----------
        times: iterable or float
        label: str

        """
        label = coalesce(label, self._label)
        return pd.Series(self.predict(times), index=_to_1d_array(times), name=label)
示例#3
0
    def cumulative_density_at_times(self, times, label=None) -> pd.Series:
        """
        Return a Pandas series of the predicted cumulative density at specific times

        Parameters
        -----------
        times: iterable or float

        Returns
        --------
        pd.Series

        """
        label = coalesce(label, self._label)
        return pd.Series(1 - self.predict(times), index=_to_1d_array(times), name=label)
示例#4
0
def proportional_hazard_test(fitted_cox_model,
                             training_df,
                             time_transform="rank",
                             precomputed_residuals=None,
                             **kwargs) -> StatisticalResult:
    """
    Test whether any variable in a Cox model breaks the proportional hazard assumption. This method uses an approximation
    that R's ``survival`` use to use, but changed it in late 2019, hence there will be differences here between lifelines and R.

    Parameters
    ----------
    fitted_cox_model: CoxPHFitter
        the fitted Cox model, fitted with `training_df`, you wish to test. Currently only the CoxPHFitter is supported,
        but later CoxTimeVaryingFitter, too.
    training_df: DataFrame
        the DataFrame used in the call to the Cox model's ``fit``.
    time_transform: vectorized function, list, or string, optional (default='rank')
        {'all', 'km', 'rank', 'identity', 'log'}
        One of the strings above, a list of strings, or a function to transform the time (must accept (time, durations, weights) however). 'all' will present all the transforms.
    precomputed_residuals: DataFrame, optional
        specify the scaled Schoenfeld residuals, if already computed.
    kwargs:
        additional parameters to add to the StatisticalResult

    Notes
    ------
    R uses the default `km`, we use `rank`, as this performs well versus other transforms. See
    http://eprints.lse.ac.uk/84988/1/06_ParkHendry2015-ReassessingSchoenfeldTests_Final.pdf

    References
    -----------
     - http://eprints.lse.ac.uk/84988/1/06_ParkHendry2015-ReassessingSchoenfeldTests_Final.pdf
     - "Extending the Cox Model"
     - https://github.com/therneau/survival/commit/5da455de4f16fbed7f867b1fc5b15f2157a132cd#diff-c784cc3eeb38f0a6227988a30f9c0730R36

    """
    if "transform" in kwargs:
        warnings.warn(
            "Found 'transform' keyword being set. Did you mean to set 'time_transform' instead?",
            UserWarning)

    events, durations, weights = fitted_cox_model.event_observed, fitted_cox_model.durations, fitted_cox_model.weights
    n_deaths = events.sum()

    if precomputed_residuals is None:
        scaled_resids = fitted_cox_model.compute_residuals(
            training_df, kind="scaled_schoenfeld")
    else:
        scaled_resids = precomputed_residuals

    scaled_resids = (
        fitted_cox_model.compute_residuals(training_df, kind="schoenfeld").dot(
            fitted_cox_model.variance_matrix_) * n_deaths)

    def compute_statistic(times, resids, n_deaths):
        demeaned_times = times - times.mean()
        T = (demeaned_times.values[:, None] * resids.values).sum(0)**2 / (
            n_deaths * (fitted_cox_model.standard_errors_**2) *
            (demeaned_times**2).sum())
        return T

    if time_transform == "all":
        time_transform = list(TimeTransformers.TIME_TRANSFOMERS.keys())

    if isinstance(time_transform, list):

        result = StatisticalResult([], [], [])

        # yuck
        for transform_name, transform in ((_, TimeTransformers().get(_))
                                          for _ in time_transform):
            times = transform(durations, events, weights)[events.values]
            T = compute_statistic(times, scaled_resids, n_deaths)
            p_values = utils._to_1d_array(
                [_chisq_test_p_value(t, 1) for t in T])
            result += StatisticalResult(
                p_values,
                T,
                name=[(c, transform_name)
                      for c in fitted_cox_model.params_.index],
                test_name="proportional_hazard_test",
                null_distribution="chi squared",
                degrees_of_freedom=1,
                model=str(fitted_cox_model),
                **kwargs)

    else:
        time_transformer = TimeTransformers().get(time_transform)
        assert callable(
            time_transformer
        ), "time_transform must be a callable function, or a string: {'rank', 'km', 'identity', 'log'}."

        times = time_transformer(durations, events, weights)[events.values]

        T = compute_statistic(times, scaled_resids, n_deaths)

        p_values = utils._to_1d_array([_chisq_test_p_value(t, 1) for t in T])
        result = StatisticalResult(
            p_values,
            T,
            name=fitted_cox_model.params_.index.tolist(),
            test_name="proportional_hazard_test",
            time_transform=time_transform,
            null_distribution="chi squared",
            degrees_of_freedom=1,
            model=str(fitted_cox_model),
            **kwargs)
    return result
示例#5
0
def proportional_hazard_test(fitted_cox_model,
                             training_df,
                             time_transform="rank",
                             precomputed_residuals=None,
                             **kwargs):
    """
    Test whether any variable in a Cox model breaks the proportional hazard assumption.

    Parameters
    ----------
    fitted_cox_model: CoxPHFitter
        the fitted Cox model, fitted with `training_df`, you wish to test. Currently only the CoxPHFitter is supported,
        but later CoxTimeVaryingFitter, too.
    training_df: DataFrame
        the DataFrame used in the call to the Cox model's ``fit``.
    time_transform: vectorized function, list, or string, optional (default='rank')
        {'all', 'km', 'rank', 'identity', 'log'}
        One of the strings above, a list of strings, or a function to transform the time (must accept (time, durations, weights) however). 'all' will present all the transforms.
    precomputed_residuals: DataFrame, optional
        specify the scaled schoenfeld residuals, if already computed.
    kwargs:
        additional parameters to add to the StatisticalResult

    Returns
    -------
    StatisticalResult

    Notes
    ------
    R uses the default `km`, we use `rank`, as this performs well versus other transforms. See
    http://eprints.lse.ac.uk/84988/1/06_ParkHendry2015-ReassessingSchoenfeldTests_Final.pdf

    """

    events, durations, weights = fitted_cox_model.event_observed, fitted_cox_model.durations, fitted_cox_model.weights
    deaths = events.sum()

    if precomputed_residuals is None:
        scaled_resids = fitted_cox_model.compute_residuals(
            training_df, kind="scaled_schoenfeld")
    else:
        scaled_resids = precomputed_residuals

    def compute_statistic(times, resids):
        times -= times.mean()
        T = (times.values[:, None] * resids.values).sum(0)**2 / (
            deaths * np.diag(fitted_cox_model.variance_matrix_) *
            (times**2).sum())
        return T

    if time_transform == "all":
        time_transform = list(TimeTransformers.TIME_TRANSFOMERS.keys())

    if isinstance(time_transform, list):

        result = StatisticalResult([], [], [])

        # yuck
        for transform_name, transform in ((_, TimeTransformers().get(_))
                                          for _ in time_transform):
            times = transform(durations, events, weights)[events.values]
            T = compute_statistic(times, scaled_resids)
            p_values = _to_1d_array([chisq_test(t, 1) for t in T])
            result += StatisticalResult(
                p_values,
                T,
                name=[(c, transform_name)
                      for c in fitted_cox_model.params_.index],
                test_name="proportional_hazard_test",
                null_distribution="chi squared",
                degrees_of_freedom=1,
                **kwargs)

    else:
        time_transformer = TimeTransformers().get(time_transform)
        assert callable(
            time_transformer
        ), "time_transform must be a callable function, or a string: {'rank', 'km', 'identity', 'log'}."

        times = time_transformer(durations, events, weights)[events.values]

        T = compute_statistic(times, scaled_resids)

        p_values = _to_1d_array([chisq_test(t, 1) for t in T])
        result = StatisticalResult(
            p_values,
            T,
            name=fitted_cox_model.params_.index.tolist(),
            test_name="proportional_hazard_test",
            time_transform=time_transform,
            null_distribution="chi squared",
            degrees_of_freedom=1,
            **kwargs)
    return result