Exemplo n.º 1
0
    def test_predict_log_hazard_relative_to_mean_with_normalization(self, rossi):
        cox = CoxPHFitter(normalize=True)
        cox.fit(rossi, 'week', 'arrest')

        # they are equal because the data is normalized, so the mean of the covarites is all 0,
        # thus exp(beta * 0) == 1, so exp(beta * X)/exp(beta * 0) = exp(beta * X)
        assert_frame_equal(cox.predict_log_hazard_relative_to_mean(rossi), np.log(cox.predict_partial_hazard(rossi)))
Exemplo n.º 2
0
 def test_coxph_plotting_normalized(self, block):
     df = load_regression_dataset()
     cp = CoxPHFitter()
     cp.fit(df, "T", "E")
     cp.plot(True)
     self.plt.title('test_coxph_plotting')
     self.plt.show(block=block)
Exemplo n.º 3
0
    def test_print_summary(self, rossi):

        import sys
        saved_stdout = sys.stdout
        try:
            out = StringIO()
            sys.stdout = out

            cp = CoxPHFitter()
            cp.fit(rossi, duration_col='week', event_col='arrest')
            cp.print_summary()
            output = out.getvalue().strip().split()
            expected = """n=432, number of events=114

           coef  exp(coef)  se(coef)          z         p  lower 0.95  upper 0.95
fin  -1.897e-01  8.272e-01 9.579e-02 -1.981e+00 4.763e-02  -3.775e-01  -1.938e-03   *
age  -3.500e-01  7.047e-01 1.344e-01 -2.604e+00 9.210e-03  -6.134e-01  -8.651e-02  **
race  1.032e-01  1.109e+00 1.012e-01  1.020e+00 3.078e-01  -9.516e-02   3.015e-01
wexp -7.486e-02  9.279e-01 1.051e-01 -7.124e-01 4.762e-01  -2.809e-01   1.311e-01
mar  -1.421e-01  8.675e-01 1.254e-01 -1.134e+00 2.570e-01  -3.880e-01   1.037e-01
paro -4.134e-02  9.595e-01 9.522e-02 -4.341e-01 6.642e-01  -2.280e-01   1.453e-01
prio  2.639e-01  1.302e+00 8.291e-02  3.182e+00 1.460e-03   1.013e-01   4.264e-01  **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Concordance = 0.640""".strip().split()
            for i in [0, 1, 2, -2, -1]:
                assert output[i] == expected[i]
        finally:
            sys.stdout = saved_stdout
Exemplo n.º 4
0
 def test_strata_works_if_only_a_single_element_is_in_the_strata(self):
     df = load_holly_molly_polly()
     del df['Start(days)']
     del df['Stop(days)']
     del df['ID']
     cp = CoxPHFitter()
     cp.fit(df, 'T', 'Status', strata=['Stratum'])
     assert True
Exemplo n.º 5
0
    def test_p_value_against_Survival_Analysis_by_John_Klein_and_Melvin_Moeschberger(self):
        # see table 8.1 in Survival Analysis by John P. Klein and Melvin L. Moeschberger, Second Edition
        df = load_larynx()
        cf = CoxPHFitter()
        cf.fit(df, duration_col='time', event_col='death')

        # p-values
        actual_p = cf._compute_p_values()
        expected_p = np.array([0.1847, 0.7644,  0.0730, 0.00])
        npt.assert_array_almost_equal(actual_p, expected_p, decimal=2)
Exemplo n.º 6
0
    def test_fit_methods_require_duration_col(self):
        X = load_regression_dataset()

        aaf = AalenAdditiveFitter()
        cph = CoxPHFitter()

        with pytest.raises(TypeError):
            aaf.fit(X)
        with pytest.raises(TypeError):
            cph.fit(X)
Exemplo n.º 7
0
    def test_se_against_Survival_Analysis_by_John_Klein_and_Melvin_Moeschberger(self):
        # see table 8.1 in Survival Analysis by John P. Klein and Melvin L. Moeschberger, Second Edition
        df = load_larynx()
        cf = CoxPHFitter(normalize=False)
        cf.fit(df, duration_col='time', event_col='death')

        # standard errors
        actual_se = cf._compute_standard_errors().values
        expected_se = np.array([[0.0143,  0.4623,  0.3561,  0.4222]])
        npt.assert_array_almost_equal(actual_se, expected_se, decimal=2)
Exemplo n.º 8
0
 def test_output_with_strata_against_R(self, rossi):
     """
     rossi <- read.csv('.../lifelines/datasets/rossi.csv')
     r = coxph(formula = Surv(week, arrest) ~ fin + age + strata(race,
                 paro, mar, wexp) + prio, data = rossi)
     """
     expected = np.array([[-0.335, -0.059, 0.100]])
     cf = CoxPHFitter(normalize=False)
     cf.fit(rossi, duration_col='week', event_col='arrest', strata=['race', 'paro', 'mar', 'wexp'])
     npt.assert_array_almost_equal(cf.hazards_.values, expected, decimal=3)
Exemplo n.º 9
0
 def test_penalized_output_against_R(self, rossi):
     # R code:
     #
     # rossi <- read.csv('.../lifelines/datasets/rossi.csv')
     # mod.allison <- coxph(Surv(week, arrest) ~ ridge(fin, age, race, wexp, mar, paro, prio,
     #                                                 theta=1.0, scale=FALSE), data=rossi)
     # cat(round(mod.allison$coefficients, 4), sep=", ")
     expected = np.array([[-0.3641, -0.0580, 0.2894, -0.1496, -0.3837, -0.0822, 0.0913]])
     cf = CoxPHFitter(normalize=False, penalizer=1.0)
     cf.fit(rossi, duration_col='week', event_col='arrest')
     npt.assert_array_almost_equal(cf.hazards_.values, expected, decimal=3)
Exemplo n.º 10
0
    def test_predict_methods_in_regression_return_same_types(self):
        X = load_regression_dataset()

        aaf = AalenAdditiveFitter()
        cph = CoxPHFitter()

        aaf.fit(X, duration_col='T', event_col='E')
        cph.fit(X, duration_col='T', event_col='E')

        for fit_method in ['predict_percentile', 'predict_median', 'predict_expectation', 'predict_survival_function', 'predict_cumulative_hazard']:
            assert isinstance(getattr(aaf, fit_method)(X), type(getattr(cph, fit_method)(X)))
Exemplo n.º 11
0
 def test_output_against_R(self, rossi):
     # from http://cran.r-project.org/doc/contrib/Fox-Companion/appendix-cox-regression.pdf
     # Link is now broken, but this is the code:
     #
     # rossi <- read.csv('.../lifelines/datasets/rossi.csv')
     # mod.allison <- coxph(Surv(week, arrest) ~ fin + age + race + wexp + mar + paro + prio,
     #     data=rossi)
     # cat(round(mod.allison$coefficients, 4), sep=", ")
     expected = np.array([[-0.3794, -0.0574, 0.3139, -0.1498, -0.4337, -0.0849,  0.0915]])
     cf = CoxPHFitter(normalize=False)
     cf.fit(rossi, duration_col='week', event_col='arrest')
     npt.assert_array_almost_equal(cf.hazards_.values, expected, decimal=3)
Exemplo n.º 12
0
    def test_coef_output_against_Survival_Analysis_by_John_Klein_and_Melvin_Moeschberger(self):
        # see example 8.3 in Survival Analysis by John P. Klein and Melvin L. Moeschberger, Second Edition
        df = load_kidney_transplant(usecols=['time', 'death',
                                             'black_male', 'white_male',
                                             'black_female'])
        cf = CoxPHFitter(normalize=False)
        cf.fit(df, duration_col='time', event_col='death')

        # coefs
        actual_coefs = cf.hazards_.values
        expected_coefs = np.array([[0.1596, 0.2484, 0.6567]])
        npt.assert_array_almost_equal(actual_coefs, expected_coefs, decimal=4)
Exemplo n.º 13
0
 def test_hazard_works_as_intended_with_strata_against_R_output(self, rossi):
     """
     > library(survival)
     > ross = read.csv('rossi.csv')
     > r = coxph(formula = Surv(week, arrest) ~ fin + age + strata(race,
         paro, mar, wexp) + prio, data = rossi)
     > basehaz(r, centered=FALSE)
     """
     cp = CoxPHFitter(normalize=False)
     cp.fit(rossi, 'week', 'arrest', strata=['race', 'paro', 'mar', 'wexp'])
     npt.assert_almost_equal(cp.baseline_cumulative_hazard_[(0, 0, 0, 0)].ix[[14, 35, 37, 43, 52]].values, [0.28665890, 0.63524149, 1.01822603, 1.48403930, 1.48403930], decimal=2)
     npt.assert_almost_equal(cp.baseline_cumulative_hazard_[(0, 0, 0, 1)].ix[[27, 43, 48, 52]].values, [0.35738173, 0.76415714, 1.26635373, 1.26635373], decimal=2)
Exemplo n.º 14
0
    def test_strata_against_r_output(self, rossi):
        """
        > r = coxph(formula = Surv(week, arrest) ~ fin + age + strata(race,
            paro, mar, wexp) + prio, data = rossi)
        > r
        > r$loglik
        """

        cp = CoxPHFitter(normalize=False)
        cp.fit(rossi, 'week', 'arrest', strata=['race', 'paro', 'mar', 'wexp'], include_likelihood=True)

        npt.assert_almost_equal(cp.summary['coef'].values, [-0.335, -0.059, 0.100], decimal=3)
        assert abs(cp._log_likelihood - -436.9339) / 436.9339 < 0.01
Exemplo n.º 15
0
    def test_summary(self, rossi):

        cp = CoxPHFitter()
        cp.fit(rossi, duration_col='week', event_col='arrest')
        summDf = cp.summary
        expectedColumns = ['coef',
                           'exp(coef)',
                           'se(coef)',
                           'z',
                           'p',
                           'lower 0.95',
                           'upper 0.95']
        assert all([col in summDf.columns for col in expectedColumns])
Exemplo n.º 16
0
    def test_warning_is_raised_if_df_has_a_near_constant_column(self, rossi):
        cox = CoxPHFitter()
        rossi['constant'] = 1.0

        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            try:
                cox.fit(rossi, 'week', 'arrest')
            except:
                pass
            assert len(w) == 1
            assert issubclass(w[-1].category, RuntimeWarning)
            assert "variance" in str(w[-1].message)
Exemplo n.º 17
0
    def test_fit_methods_can_accept_optional_event_col_param(self):
        X = load_regression_dataset()

        aaf = AalenAdditiveFitter()
        aaf.fit(X, 'T', event_col='E')
        assert_series_equal(aaf.event_observed.sort_index(), X['E'].astype(bool), check_names=False)

        aaf.fit(X, 'T')
        npt.assert_array_equal(aaf.event_observed.values, np.ones(X.shape[0]))

        cph = CoxPHFitter()
        cph.fit(X, 'T', event_col='E')
        assert_series_equal(cph.event_observed.sort_index(), X['E'].astype(bool), check_names=False)

        cph.fit(X, 'T')
        npt.assert_array_equal(cph.event_observed.values, np.ones(X.shape[0]))
Exemplo n.º 18
0
    def test_data_normalization(self, data_pred2):
        # During fit, CoxPH copies the training data and normalizes it.
        # Future calls should be normalized in the same way and
        # internal training set should not be saved in a normalized state.

        cf = CoxPHFitter(normalize=True)
        cf.fit(data_pred2, duration_col='t', event_col='E')

        # Internal training set
        ci_trn = concordance_index(cf.durations,
                                   -cf.predict_partial_hazard(cf.data).values,
                                   cf.event_observed)
        # New data should normalize in the exact same way
        ci_org = concordance_index(data_pred2['t'],
                                   -cf.predict_partial_hazard(data_pred2[['x1', 'x2']]).values,
                                   data_pred2['E'])

        assert ci_org == ci_trn
Exemplo n.º 19
0
    def test_efron_computed_by_hand_examples(self, data_nus):
        cox = CoxPHFitter()

        X = data_nus['x'][:, None]
        T = data_nus['t']
        E = data_nus['E']

        # Enforce numpy arrays
        X = np.array(X)
        T = np.array(T)
        E = np.array(E)

        # Want as bools
        E = E.astype(bool)

        # tests from http://courses.nus.edu.sg/course/stacar/internet/st3242/handouts/notes3.pdf
        beta = np.array([[0]])

        l, u = cox._get_efron_values(X, beta, T, E)
        l = -l

        assert np.abs(l[0][0] - 77.13) < 0.05
        assert np.abs(u[0] - -2.51) < 0.05
        beta = beta + u / l
        assert np.abs(beta - -0.0326) < 0.05

        l, u = cox._get_efron_values(X, beta, T, E)
        l = -l

        assert np.abs(l[0][0] - 72.83) < 0.05
        assert np.abs(u[0] - -0.069) < 0.05
        beta = beta + u / l
        assert np.abs(beta - -0.0325) < 0.01

        l, u = cox._get_efron_values(X, beta, T, E)
        l = -l

        assert np.abs(l[0][0] - 72.70) < 0.01
        assert np.abs(u[0] - -0.000061) < 0.01
        beta = beta + u / l
        assert np.abs(beta - -0.0335) < 0.01
Exemplo n.º 20
0
    def test_using_dataframes_vs_numpy_arrays(self, data_pred2):
        # First without normalization
        cf = CoxPHFitter(normalize=False)
        cf.fit(data_pred2, 't', 'E')

        X = data_pred2[cf.data.columns]
        hazards = cf.predict_partial_hazard(X)

        # A Numpy array should return the same result
        hazards_n = cf.predict_partial_hazard(np.array(X))
        assert np.all(hazards == hazards_n)

        # Now with normalization
        cf = CoxPHFitter(normalize=True)
        cf.fit(data_pred2, 't', 'E')

        hazards = cf.predict_partial_hazard(X)

        # Compare with array argument
        hazards_n = cf.predict_partial_hazard(np.array(X))
        assert np.all(hazards == hazards_n)
Exemplo n.º 21
0
 def test_predict_log_hazard_relative_to_mean_without_normalization(self, rossi):
     cox = CoxPHFitter(normalize=False)
     cox.fit(rossi, 'week', 'arrest')
     log_relative_hazards = cox.predict_log_hazard_relative_to_mean(rossi)
     means = rossi.mean(0).to_frame().T
     assert cox.predict_partial_hazard(means).values[0][0] != 1.0  
     assert_frame_equal(log_relative_hazards, np.log(cox.predict_partial_hazard(rossi) / cox.predict_partial_hazard(means).squeeze()))
Exemplo n.º 22
0
def test_concordance_index_fast_is_same_as_slow():
    size = 100
    T = np.random.normal(size=size)
    P = np.random.normal(size=size)
    C = np.random.choice([0, 1], size=size)
    Z = np.zeros_like(T)

    # Hard to imagine these failing
    assert slow_cindex(T, Z, C) == fast_cindex(T, Z, C)
    assert slow_cindex(T, T, C) == fast_cindex(T, T, C)
    # This is the real test though
    assert slow_cindex(T, P, C) == fast_cindex(T, P, C)

    cp = CoxPHFitter()
    df = load_rossi()
    cp.fit(df, duration_col='week', event_col='arrest')

    T = cp.durations.values.ravel()
    P = -cp.predict_partial_hazard(cp.data).values.ravel()
    E = cp.event_observed.values.ravel()

    assert slow_cindex(T, P, E) == fast_cindex(T, P, E)
Exemplo n.º 23
0
    def test_prediction_methods_respect_index(self, data_pred2):
        x = data_pred2[['x1', 'x2']].ix[:3].sort_index(ascending=False)
        expected_index = pd.Index(np.array([3, 2, 1, 0]))

        cph = CoxPHFitter()
        cph.fit(data_pred2, duration_col='t', event_col='E')
        npt.assert_array_equal(cph.predict_partial_hazard(x).index, expected_index)
        npt.assert_array_equal(cph.predict_percentile(x).index, expected_index)
        npt.assert_array_equal(cph.predict_expectation(x).index, expected_index)

        aaf = AalenAdditiveFitter()
        aaf.fit(data_pred2, duration_col='t', event_col='E')
        npt.assert_array_equal(aaf.predict_percentile(x).index, expected_index)
        npt.assert_array_equal(aaf.predict_expectation(x).index, expected_index)
Exemplo n.º 24
0
def test_cross_validator_with_specific_loss_function():
    def square_loss(y_actual, y_pred):
        return ((y_actual - y_pred)**2).mean()

    cf = CoxPHFitter()
    results_sq = utils.k_fold_cross_validation(
        cf,
        load_regression_dataset(),
        evaluation_measure=square_loss,
        duration_col="T",
        event_col="E",
    )
    results_con = utils.k_fold_cross_validation(cf,
                                                load_regression_dataset(),
                                                duration_col="T",
                                                event_col="E")
    assert list(results_sq) != list(results_con)
Exemplo n.º 25
0
    def test_prediction_methods_respect_index(self, data_pred2):
        x = data_pred2[['x1', 'x2']].ix[:3].sort_index(ascending=False)
        expected_index = pd.Index(np.array([3, 2, 1, 0]))

        cph = CoxPHFitter()
        cph.fit(data_pred2, duration_col='t', event_col='E')
        npt.assert_array_equal(cph.predict_partial_hazard(x).index, expected_index)
        npt.assert_array_equal(cph.predict_percentile(x).index, expected_index)
        npt.assert_array_equal(cph.predict(x).index, expected_index)
        npt.assert_array_equal(cph.predict_expectation(x).index, expected_index)

        aaf = AalenAdditiveFitter()
        aaf.fit(data_pred2, duration_col='t', event_col='E')
        npt.assert_array_equal(aaf.predict_percentile(x).index, expected_index)
        npt.assert_array_equal(aaf.predict(x).index, expected_index)
        npt.assert_array_equal(aaf.predict_expectation(x).index, expected_index)
Exemplo n.º 26
0
def test_cross_validator_returns_fitters_k_results():
    cf = CoxPHFitter()
    fitters = [cf, cf]
    results = utils.k_fold_cross_validation(fitters,
                                            load_regression_dataset(),
                                            duration_col='T',
                                            event_col='E',
                                            k=3)
    assert len(results) == 2
    assert len(results[0]) == len(results[1]) == 3

    results = utils.k_fold_cross_validation(fitters,
                                            load_regression_dataset(),
                                            duration_col='T',
                                            event_col='E',
                                            k=5)
    assert len(results) == 2
    assert len(results[0]) == len(results[1]) == 5
Exemplo n.º 27
0
    def test_crossval_for_cox_ph(self, data_pred2, data_pred1):
        cf = CoxPHFitter()

        for data_pred in [data_pred1, data_pred2]:
            scores = k_fold_cross_validation(
                cf,
                data_pred,
                duration_col='t',
                event_col='E',
                k=3,
                predictor='predict_partial_hazard')

            mean_score = 1 - np.mean(
                scores)  # this is because we are using predict_partial_hazard

            expected = 0.9
            msg = "Expected min-mean c-index {:.2f} < {:.2f}"
            assert mean_score > expected, msg.format(expected, mean_score)
Exemplo n.º 28
0
    def test_cox_ph_prediction_monotonicity(self, data_pred2):
        # Concordance wise, all prediction methods should be monotonic versions
        # of one-another, unless numerical factors screw it up.
        t = data_pred2['t']
        e = data_pred2['E']
        X = data_pred2[['x1', 'x2']]

        cf = CoxPHFitter()
        cf.fit(data_pred2, duration_col='t', event_col='E')

        # Base comparison is partial_hazards
        ci_ph = concordance_index(t, -cf.predict_partial_hazard(X).values, e)

        ci_med = concordance_index(t, cf.predict_median(X).ravel(), e)
        assert ci_ph == ci_med

        ci_exp = concordance_index(t, cf.predict_expectation(X).ravel(), e)
        assert ci_ph == ci_exp
Exemplo n.º 29
0
    def test_fit_methods_can_accept_optional_event_col_param(self):
        X = load_regression_dataset()

        aaf = AalenAdditiveFitter()
        aaf.fit(X, 'T', event_col='E')
        assert_series_equal(aaf.event_observed.sort_index(), X['E'].astype(bool), check_names=False)

        aaf.fit(X, 'T')
        npt.assert_array_equal(aaf.event_observed.values, np.ones(X.shape[0]))

        cph = CoxPHFitter()
        cph.fit(X, 'T', event_col='E')
        assert_series_equal(cph.event_observed.sort_index(), X['E'].astype(bool), check_names=False)

        cph.fit(X, 'T')
        npt.assert_array_equal(cph.event_observed.values, np.ones(X.shape[0]))
Exemplo n.º 30
0
    def test_cox_ph_prediction_monotonicity(self, data_pred2):
        # Concordance wise, all prediction methods should be monotonic versions
        # of one-another, unless numerical factors screw it up.
        t = data_pred2['t']
        e = data_pred2['E']
        X = data_pred2[['x1', 'x2']]

        for normalize in [True, False]:
            msg = ("Predict methods should get the same concordance" +
                   " when {}normalizing".format('' if normalize else 'not '))
            cf = CoxPHFitter(normalize=normalize)
            cf.fit(data_pred2, duration_col='t', event_col='E')

            # Base comparison is partial_hazards
            ci_ph = concordance_index(t, -cf.predict_partial_hazard(X).values, e)

            ci_med = concordance_index(t, cf.predict_median(X).ravel(), e)
            assert ci_ph == ci_med, msg

            ci_exp = concordance_index(t, cf.predict_expectation(X).ravel(), e)
            assert ci_ph == ci_exp, msg
Exemplo n.º 31
0
    def test_crossval_for_cox_ph_with_normalizing_times(self, data_pred2, data_pred1):
        cf = CoxPHFitter()

        for data_pred in [data_pred1, data_pred2]:

            # why does this
            data_norm = data_pred.copy()
            times = data_norm['t']
            # Normalize to mean = 0 and standard deviation = 1
            times -= np.mean(times)
            times /= np.std(times)
            data_norm['t'] = times

            scores = k_fold_cross_validation(cf, data_norm,
                                             duration_col='t',
                                             event_col='E', k=3,
                                             predictor='predict_partial_hazard')

            mean_score = 1 - np.mean(scores)

            expected = 0.9
            msg = "Expected min-mean c-index {:.2f} < {:.2f}"
            assert mean_score > expected, msg.format(expected, mean_score)
Exemplo n.º 32
0
    def test_data_normalization(self, data_pred2):
        # During fit, CoxPH copies the training data and normalizes it.
        # Future calls should be normalized in the same way and
        # internal training set should not be saved in a normalized state.

        cf = CoxPHFitter()
        cf.fit(data_pred2, duration_col='t', event_col='E')

        # Internal training set
        ci_trn = concordance_index(cf.durations,
                                   -cf.predict_partial_hazard(cf.data).values,
                                   cf.event_observed)
        # New data should normalize in the exact same way
        ci_org = concordance_index(data_pred2['t'],
                                   -cf.predict_partial_hazard(data_pred2[['x1', 'x2']]).values,
                                   data_pred2['E'])

        assert ci_org == ci_trn
Exemplo n.º 33
0
    def test_cox_ph_prediction_monotonicity(self, data_pred2):
        # Concordance wise, all prediction methods should be monotonic versions
        # of one-another, unless numerical factors screw it up.
        t = data_pred2['t']
        e = data_pred2['E']
        X = data_pred2[['x1', 'x2']]

        for normalize in [True, False]:
            msg = ("Predict methods should get the same concordance" +
                   " when {}normalizing".format('' if normalize else 'not '))
            cf = CoxPHFitter(normalize=normalize)
            cf.fit(data_pred2, duration_col='t', event_col='E')

            # Base comparison is partial_hazards
            ci_ph = concordance_index(t, -cf.predict_partial_hazard(X).values, e)

            ci_med = concordance_index(t, cf.predict_median(X).ravel(), e)
            assert ci_ph == ci_med, msg

            ci_exp = concordance_index(t, cf.predict_expectation(X).ravel(), e)
            assert ci_ph == ci_exp, msg
Exemplo n.º 34
0
    def test_print_summary(self):

        import sys
        try:
            from StringIO import StringIO
        except:
            from io import StringIO

        saved_stdout = sys.stdout
        try:
            out = StringIO()
            sys.stdout = out

            cp = CoxPHFitter()
            df = load_rossi()
            cp.fit(df, duration_col='week', event_col='arrest')
            cp.print_summary()
            output = out.getvalue().strip().split()
            expected = """n=432, number of events=114

           coef  exp(coef)  se(coef)          z         p  lower 0.95  upper 0.95
fin  -1.897e-01  8.272e-01 9.579e-02 -1.981e+00 4.763e-02  -3.775e-01  -1.938e-03   *
age  -3.500e-01  7.047e-01 1.344e-01 -2.604e+00 9.210e-03  -6.134e-01  -8.651e-02  **
race  1.032e-01  1.109e+00 1.012e-01  1.020e+00 3.078e-01  -9.516e-02   3.015e-01
wexp -7.486e-02  9.279e-01 1.051e-01 -7.124e-01 4.762e-01  -2.809e-01   1.311e-01
mar  -1.421e-01  8.675e-01 1.254e-01 -1.134e+00 2.570e-01  -3.880e-01   1.037e-01
paro -4.134e-02  9.595e-01 9.522e-02 -4.341e-01 6.642e-01  -2.280e-01   1.453e-01
prio  2.639e-01  1.302e+00 8.291e-02  3.182e+00 1.460e-03   1.013e-01   4.264e-01  **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Concordance = 0.640""".strip().split()
            for i in [0, 1, 2, -2, -1]:
                assert output[i] == expected[i]
        finally:
            sys.stdout = saved_stdout
Exemplo n.º 35
0
def test_cross_validator_with_stratified_cox_model():
    cf = CoxPHFitter(strata=["race"])
    utils.k_fold_cross_validation(cf, load_rossi(), duration_col="week", event_col="arrest")
Exemplo n.º 36
0
 def test_predict_log_hazard_relative_to_mean(self, rossi):
     cox = CoxPHFitter()
     cox.fit(rossi, 'week', 'arrest')
     log_relative_hazards = cox.predict_log_hazard_relative_to_mean(rossi)
     means = rossi.mean(0).to_frame().T
     assert_frame_equal(log_relative_hazards, np.log(cox.predict_partial_hazard(rossi) / cox.predict_partial_hazard(means).squeeze()))
Exemplo n.º 37
0
 def test_log_likelihood_is_available_in_output(self, data_nus):
     cox = CoxPHFitter()
     cox.fit(data_nus, duration_col='t', event_col='E', include_likelihood=True)
     assert abs(cox._log_likelihood - -12.7601409152) < 0.001
Exemplo n.º 38
0
 def test_input_column_order_is_equal_to_output_hazards_order(self, rossi):
     cp = CoxPHFitter()
     expected = ['fin', 'age', 'race', 'wexp', 'mar', 'paro', 'prio']
     cp.fit(rossi, event_col='week', duration_col='arrest')
     assert list(cp.hazards_.columns) == expected
Exemplo n.º 39
0
 def test_strata_removes_variable_from_summary_output(self, rossi):
     cp = CoxPHFitter()
     cp.fit(rossi, 'week', 'arrest', strata=['race'])
     assert 'race' not in cp.summary.index
Exemplo n.º 40
0
def test_cross_validator_with_predictor():
    cf = CoxPHFitter()
    results = utils.k_fold_cross_validation(cf, load_regression_dataset(),
                                            duration_col='T', event_col='E', k=3,
                                            predictor="predict_expectation")
    assert len(results) == 3
Exemplo n.º 41
0
 def test_fit_method(self, data_nus):
     cf = CoxPHFitter(normalize=False)
     cf.fit(data_nus, duration_col='t', event_col='E')
     assert np.abs(cf.hazards_.ix[0][0] - -0.0335) < 0.0001
Exemplo n.º 42
0
 def test_fit_method(self, data_nus):
     cf = CoxPHFitter()
     cf.fit(data_nus, duration_col='t', event_col='E')
     assert np.abs(cf.hazards_.ix[0][0] - -0.0335) < 0.0001
Exemplo n.º 43
0
 def test_strata_removes_variable_from_summary_output(self):
     df = load_rossi()
     cp = CoxPHFitter()
     cp.fit(df, 'week', 'arrest', strata=['race'])
     assert 'race' not in cp.summary.index
Exemplo n.º 44
0
 def test_log_likelihood_is_available_in_output(self, data_nus):
     cox = CoxPHFitter()
     cox.fit(data_nus, duration_col='t', event_col='E', include_likelihood=True)
     assert abs(cox._log_likelihood - -12.7601409152) < 0.001
Exemplo n.º 45
0
 def test_efron_newtons_method(self, data_nus):
     newton = CoxPHFitter()._newton_rhaphson
     X, T, E = data_nus[['x']], data_nus['t'], data_nus['E']
     assert np.abs(newton(X, T, E)[0][0] - -0.0335) < 0.0001
Exemplo n.º 46
0
 def regression_models(self):
     return [CoxPHFitter(), AalenAdditiveFitter(), CoxPHFitter(strata=['race', 'paro', 'mar', 'wexp'])]
kaplen_meier.fit(time_of_event, timeline=time, event_observed=event, label='All patients')
kaplen_meier.plot()
plt.show()

#stratify Congestive Heart Complications
history = df['chf'] == 1;

kaplen_meier = KaplanMeierFitter()
kaplen_meier.fit(time_of_event[history], timeline=time, event_observed=event[history], label='Congestive heart complications')
ax = kaplen_meier.plot()

kaplen_meier.fit(time_of_event[~history], timeline=time, event_observed=event[~history], label='No congestive heart complications')
kaplen_meier.plot(ax=ax, c="b")

plt.show()

#Cox proportional hazard
ph_data = df[["fstat", "lenfol", "bmi", "age"]]

ph = CoxPHFitter()
ph.fit(ph_data, 'lenfol', event_col='fstat')
ph.print_summary()

print(ph.baseline_hazard_.head())

#use predict_survival_function to get probability
x = ph_data[ph_data.columns.difference(['lenfol', 'fstat'])].ix[23:25]
print(x)
ph.predict_survival_function(x).plot()
plt.show()
Exemplo n.º 48
0
def test_cross_validator_with_predictor_and_kwargs():
    cf = CoxPHFitter()
    results_06 = utils.k_fold_cross_validation(cf, load_regression_dataset(),
                                               duration_col='T', k=3,
                                               predictor="predict_percentile", predictor_kwargs={'p': 0.6})
    assert len(results_06) == 3
Exemplo n.º 49
0
def test_cross_validator_with_stratified_cox_model():
    cf = CoxPHFitter(strata=['race'])
    utils.k_fold_cross_validation(cf,
                                  load_rossi(),
                                  duration_col='week',
                                  event_col='arrest')
Exemplo n.º 50
0
 def test_input_column_order_is_equal_to_output_hazards_order(self):
     rossi = load_rossi()
     cp = CoxPHFitter()
     expected = ['fin', 'age', 'race', 'wexp', 'mar', 'paro', 'prio']
     cp.fit(rossi, event_col='week', duration_col='arrest')
     assert list(cp.hazards_.columns) == expected
Exemplo n.º 51
0
#cox regression

if __name__ == "__main__":
    import pandas as pd
    import time

    from lifelines.estimation import CoxPHFitter
    from lifelines.datasets import load_rossi
    df = load_rossi()
    df = pd.concat([df] * 20)
    cp = CoxPHFitter()
    start_time = time.time()
    cp.fit(df, duration_col='week', event_col="arrest")
    print("--- %s seconds ---" % (time.time() - start_time))
Exemplo n.º 52
0
    def test_using_dataframes_vs_numpy_arrays(self, data_pred2):
        # First without normalization
        cf = CoxPHFitter(normalize=False)
        cf.fit(data_pred2, 't', 'E')

        X = data_pred2[cf.data.columns]
        hazards = cf.predict_partial_hazard(X)

        # A Numpy array should return the same result
        hazards_n = cf.predict_partial_hazard(np.array(X))
        assert np.all(hazards == hazards_n)

        # Now with normalization
        cf = CoxPHFitter(normalize=True)
        cf.fit(data_pred2, 't', 'E')

        hazards = cf.predict_partial_hazard(X)

        # Compare with array argument
        hazards_n = cf.predict_partial_hazard(np.array(X))
        assert np.all(hazards == hazards_n)