Exemplo n.º 1
0
    def test_fit_subsample(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        model = GradientBoostingSurvivalAnalysis(n_estimators=50, max_features=8, subsample=0.6,
                                                 random_state=0)
        model.fit(whas500_data.x, whas500_data.y)

        assert model.max_features_ == 8
        assert hasattr(model, "oob_improvement_")

        incl_mask = numpy.ones(whas500_data.x.shape[0], dtype=bool)
        incl_mask[[35, 111, 174, 206, 236, 268, 497]] = False
        x_test = whas500_data.x[incl_mask]
        y_test = whas500_data.y[incl_mask]

        p = model.predict(x_test)

        assert_cindex_almost_equal(y_test['fstat'], y_test['lenfol'], p,
                                   (0.8330510326740247, 60985, 12221, 2, 14))

        assert (50,) == model.train_score_.shape
        assert (50,) == model.oob_improvement_.shape

        with pytest.raises(ValueError, match="Number of features of the model must match the input. "
                                             "Model n_features is 14 and input n_features is 2 "):
            model.predict(whas500_data.x[:, :2])
Exemplo n.º 2
0
    def test_fit(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        model = ComponentwiseGradientBoostingSurvivalAnalysis(n_estimators=100)
        model.fit(whas500_data.x, whas500_data.y)
        p = model.predict(whas500_data.x)

        assert_cindex_almost_equal(whas500_data.y['fstat'], whas500_data.y['lenfol'], p,
                                   (0.7755659, 58283, 16866, 0, 14))

        expected_coef = pandas.Series(numpy.zeros(15, dtype=float), index=whas500_data.names)
        expected_coef['age'] = 0.040919
        expected_coef['hr'] = 0.004977
        expected_coef['diasbp'] = -0.003407
        expected_coef['bmi'] = -0.017938
        expected_coef['sho'] = 0.429904
        expected_coef['chf'] = 0.508211

        assert_array_almost_equal(expected_coef.values, model.coef_)

        assert (100,) == model.train_score_.shape

        with pytest.raises(ValueError, match='Dimensions of X are inconsistent with training data: '
                                             'expected 14 features, but got 2'):
            model.predict(whas500_data.x[:, :2])
Exemplo n.º 3
0
    def test_fit_subsample(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        model = ComponentwiseGradientBoostingSurvivalAnalysis(n_estimators=100, subsample=0.6, random_state=0)
        model.fit(whas500_data.x, whas500_data.y)
        p = model.predict(whas500_data.x)

        assert_cindex_almost_equal(whas500_data.y['fstat'], whas500_data.y['lenfol'], p,
                                   (0.7750602, 58245, 16904, 0, 14))

        expected_coef = pandas.Series(numpy.zeros(15, dtype=float), index=whas500_data.names)
        expected_coef['age'] = 0.041299
        expected_coef['hr'] = 0.00487
        expected_coef['diasbp'] = -0.003381
        expected_coef['bmi'] = -0.017018
        expected_coef['sho'] = 0.433685
        expected_coef['chf'] = 0.510277

        assert_array_almost_equal(expected_coef.values, model.coef_)

        assert (100,) == model.train_score_.shape
        assert (100,) == model.oob_improvement_.shape

        with pytest.raises(ValueError, match='Dimensions of X are inconsistent with training data: '
                                             'expected 14 features, but got 2'):
            model.predict(whas500_data.x[:, :2])
Exemplo n.º 4
0
    def test_compare_clinical_kernel(make_whas500):
        whas500 = make_whas500(to_numeric=True)

        trans = ClinicalKernelTransform()
        trans.fit(whas500.x_data_frame)

        kpca = KernelPCA(kernel=trans.pairwise_kernel, copy_X=True)
        xt = kpca.fit_transform(whas500.x)

        nrsvm = FastSurvivalSVM(optimizer='rbtree',
                                tol=1e-8,
                                max_iter=500,
                                random_state=0)
        nrsvm.fit(xt, whas500.y)

        rsvm = FastKernelSurvivalSVM(optimizer='rbtree',
                                     kernel=trans.pairwise_kernel,
                                     tol=1e-8,
                                     max_iter=500,
                                     random_state=0)
        rsvm.fit(whas500.x, whas500.y)

        pred_nrsvm = nrsvm.predict(kpca.transform(whas500.x))
        pred_rsvm = rsvm.predict(whas500.x)

        assert len(pred_nrsvm) == len(pred_rsvm)

        expected_cindex = concordance_index_censored(whas500.y['fstat'],
                                                     whas500.y['lenfol'],
                                                     pred_nrsvm)
        assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'],
                                   pred_rsvm, expected_cindex)
Exemplo n.º 5
0
    def test_survival_squared_hinge_loss(whas500_without_ties):
        x, y = whas500_without_ties

        nrsvm = NaiveSurvivalSVM(loss='squared_hinge',
                                 dual=False,
                                 tol=8e-7,
                                 max_iter=1000,
                                 random_state=0)
        nrsvm.fit(x, y)

        rsvm = FastSurvivalSVM(optimizer='avltree',
                               tol=8e-7,
                               max_iter=1000,
                               random_state=0)
        rsvm.fit(x, y)

        assert_array_almost_equal(nrsvm.coef_.ravel(), rsvm.coef_, 3)

        pred_nrsvm = nrsvm.predict(x)
        pred_rsvm = rsvm.predict(x)

        assert len(pred_nrsvm) == len(pred_rsvm)

        expected_cindex = concordance_index_censored(y['fstat'], y['lenfol'],
                                                     pred_nrsvm)
        assert_cindex_almost_equal(y['fstat'], y['lenfol'], pred_rsvm,
                                   expected_cindex)
Exemplo n.º 6
0
    def test_compare_builtin_kernel(make_whas500):
        whas500 = make_whas500(to_numeric=True)
        x = normalize(whas500.x)

        rsvm = FastKernelSurvivalSVM(optimizer='rbtree',
                                     kernel="polynomial",
                                     gamma=0.5,
                                     degree=2,
                                     tol=1e-8,
                                     max_iter=100,
                                     random_state=0xf38)
        rsvm.fit(x, whas500.y)
        pred_rsvm = rsvm.predict(x)

        kpca = KernelPCA(kernel="polynomial",
                         copy_X=True,
                         gamma=0.5,
                         degree=2,
                         random_state=0xf38)
        xt = kpca.fit_transform(x)
        nrsvm = FastSurvivalSVM(optimizer='rbtree',
                                tol=1e-8,
                                max_iter=100,
                                random_state=0xf38)
        nrsvm.fit(xt, whas500.y)
        pred_nrsvm = nrsvm.predict(xt)

        assert len(pred_nrsvm) == len(pred_rsvm)

        expected_cindex = concordance_index_censored(whas500.y['fstat'],
                                                     whas500.y['lenfol'],
                                                     pred_nrsvm)
        assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'],
                                   pred_rsvm, expected_cindex)
    def test_fit_custom_kernel(make_whas500):
        whas500 = make_whas500(with_mean=False, with_std=False, to_numeric=True)
        alphas = numpy.exp(numpy.linspace(numpy.log(0.001), numpy.log(0.5), 5))
        svm_grid = ParameterGrid({"alpha": alphas})

        transform = ClinicalKernelTransform(fit_once=True)
        transform.prepare(whas500.x_data_frame)

        base_estimators = []
        for i, params in enumerate(svm_grid):
            model = FastSurvivalSVM(max_iter=100, random_state=0, **params)
            base_estimators.append(("svm_linear_%d" % i, model))

        for i, params in enumerate(svm_grid):
            model = FastKernelSurvivalSVM(kernel=transform.pairwise_kernel, max_iter=45, tol=1e-5,
                                          random_state=0, **params)
            base_estimators.append(("svm_kernel_%d" % i, model))

        cv = KFold(n_splits=3, shuffle=True, random_state=0)
        meta = EnsembleSelection(base_estimators, n_estimators=0.4, scorer=score_cindex, cv=cv, n_jobs=4)

        meta.fit(whas500.x, whas500.y)
        assert len(meta) == 10
        assert meta.scores_.shape == (10,)

        p = meta.predict(whas500.x)

        assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'], p,
                                   (0.7978084, 59938, 15178, 33, 14))
Exemplo n.º 8
0
    def test_toy_minlip_predict_1_osqp(self, toy_data):
        x, y = toy_data
        m = self.minlip_model
        m.fit(x, y)

        p = m.predict(x)
        assert_cindex_almost_equal(y['status'], y['time'], p,
                                   (1.0, 11, 0, 0, 0))
Exemplo n.º 9
0
    def test_toy_minlip_predict_1_cvxopt(self, toy_data):
        x, y = toy_data
        m = self.minlip_model
        with pytest.deprecated_call():
            m.fit(x, y)

        p = m.predict(x)
        assert_cindex_almost_equal(y['status'], y['time'], p,
                                   (1.0, 11, 0, 0, 0))
Exemplo n.º 10
0
    def test_breast_cancer_cvxpy(gbsg2):
        x, y = gbsg2
        m = MinlipSurvivalAnalysis(solver="cvxpy", alpha=1, pairs="next")
        m.fit(x, y)

        assert (1, x.shape[0]) == m.coef_.shape

        p = m.predict(x)
        assert_cindex_almost_equal(y['cens'], y['time'], p,
                                   (0.59576770470121443, 79280, 53792, 0, 32))
    def test_breast_cancer_cvxopt(self, gbsg2):
        x, y = gbsg2
        m = self.model
        m.fit(x, y)

        assert (1, x.shape[0]) == m.coef_.shape

        p = m.predict(x)
        assert_cindex_almost_equal(y['cens'], y['time'], p,
                                   (0.59570007214139709, 79271, 53801, 0, 42))
Exemplo n.º 12
0
    def test_toy_hinge_fit(self, toy_data):
        x, y = toy_data
        m = self.svm_model
        m.fit(x, y)

        assert (1, x.shape[0]) == m.coef_.shape

        p = m.predict(x)
        assert_cindex_almost_equal(y['status'], y['time'], p,
                                   (1.0, 11, 0, 0, 0))
Exemplo n.º 13
0
def test_fit_no_bootstrap(make_whas500):
    whas500 = make_whas500(to_numeric=True)

    forest = RandomSurvivalForest(n_estimators=10, bootstrap=False, random_state=2)
    forest.fit(whas500.x, whas500.y)

    pred = forest.predict(whas500.x)

    expected_c = (0.931881994437717, 70030, 5119, 0, 14)
    assert_cindex_almost_equal(
        whas500.y["fstat"], whas500.y["lenfol"], pred, expected_c)
Exemplo n.º 14
0
    def test_breast_cancer_ecos(gbsg2):
        x, y = gbsg2
        x = scale(x)
        m = MinlipSurvivalAnalysis(solver="ecos", alpha=1, pairs="next")
        m.fit(x, y)

        assert (1, x.shape[0]) == m.coef_.shape

        p = m.predict(x)
        assert_cindex_almost_equal(y['cens'], y['time'], p,
                                   (0.5990741854033906, 79720, 53352, 0, 42))
    def test_fit_spearman_correlation(make_whas500):
        whas500 = make_whas500(with_mean=False, with_std=False, to_numeric=True)
        meta = _create_survival_ensemble(correlation="spearman")
        assert len(meta) == 0

        meta.fit(whas500.x, whas500.y)

        p = meta.predict(whas500.x)

        assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'], p,
                                   (0.7863312, 59088, 16053, 8, 119))
    def test_fit_kendall_correlation(make_whas500):
        whas500 = make_whas500(with_mean=False, with_std=False, to_numeric=True)
        meta = _create_survival_ensemble(correlation="kendall")
        assert len(meta) == 0

        meta.fit(whas500.x, whas500.y)

        p = meta.predict(whas500.x)

        assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'], p,
                                   (0.7663043, 57570, 17545, 34, 119))
Exemplo n.º 17
0
    def test_breast_cancer_osqp(gbsg2):
        x, y = gbsg2
        x = scale(x)
        m = MinlipSurvivalAnalysis(solver="osqp", alpha=1, pairs="next")
        m.fit(x, y)

        assert (1, x.shape[0]) == m.coef_.shape

        p = m.predict(x)
        assert_cindex_almost_equal(y['cens'], y['time'], p,
                                   (0.599066670674522, 79719, 53353, 0, 42))
    def test_breast_cancer_rbf_cvxpy(gbsg2):
        x, y = gbsg2
        x = scale(x)
        m = MinlipSurvivalAnalysis(solver="cvxpy", alpha=1, kernel="rbf",
                                   gamma=1./8, pairs="next", max_iter=1000)
        m.fit(x, y)

        assert (1, x.shape[0]) == m.coef_.shape

        p = m.predict(x)
        assert_cindex_almost_equal(y['cens'], y['time'], p,
                                   (0.6105867500300589, 81252, 51820, 0, 42))
Exemplo n.º 19
0
    def test_predict(make_whas500):
        whas500 = make_whas500()
        model = IPCRidge()
        model.fit(whas500.x[:400], whas500.y[:400])

        x_test = whas500.x[400:]
        y_test = whas500.y[400:]
        p = model.predict(x_test)
        assert_cindex_almost_equal(y_test['fstat'], y_test['lenfol'], -p,
                                   (0.66925817946226107, 2066, 1021, 0, 1))

        assert model.score(x_test, y_test) == 1.0 - 0.66925817946226107
    def test_breast_cancer_rbf_cvxopt(self, gbsg2):
        x, y = gbsg2
        x = scale(x)
        m = self.model
        m.set_params(kernel="rbf", gamma=1./8)
        m.fit(x, y)

        assert (1, x.shape[0]) == m.coef_.shape

        p = m.predict(x)
        assert_cindex_almost_equal(y['cens'], y['time'], p,
                                   (0.6106092942166647, 81255, 51817, 0, 42))
Exemplo n.º 21
0
    def test_breast_cancer_rbf_cvxopt(self, gbsg2):
        x, y = gbsg2
        x = scale(x)
        m = self.model
        m.set_params(kernel="rbf", gamma=32)
        m.fit(x, y)

        assert (1, x.shape[0]) == m.coef_.shape

        p = m.predict(x)
        assert_cindex_almost_equal(y['cens'], y['time'], p,
                                   (0.6487427858602861, 85974, 46387, 711, 32))
    def test_fit(make_whas500):
        whas500 = make_whas500(with_mean=False, with_std=False, to_numeric=True)
        meta = _create_survival_ensemble()
        assert len(meta) == 0

        meta.fit(whas500.x, whas500.y)
        assert len(meta) == 11
        assert meta.scores_.shape == (11,)

        p = meta.predict(whas500.x)

        assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'], p,
                                   (0.7863312, 59088, 16053, 8, 119))
Exemplo n.º 23
0
    def test_predict(make_whas500):
        whas500 = make_whas500(with_mean=False, with_std=False, to_numeric=True)

        meta = Stacking(MeanEstimator(),
                        [('coxph', CoxPHSurvivalAnalysis()),
                         ('svm', FastSurvivalSVM(random_state=0))],
                        probabilities=False)

        meta.fit(whas500.x, whas500.y)

        # result is different if randomForestSRC has not been compiled with OpenMP support
        p = meta.predict(whas500.x)
        assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'], p,
                                   (0.7848807, 58983, 16166, 0, 14))
Exemplo n.º 24
0
def test_fit_predict(make_whas500, name, expected_c):
    whas500 = make_whas500(to_numeric=True)

    forest = FORESTS[name](random_state=2)
    forest.fit(whas500.x, whas500.y)

    assert len(forest.estimators_) == 100

    pred = forest.predict(whas500.x)
    assert numpy.isfinite(pred).all()
    assert numpy.all(pred >= 0)

    assert_cindex_almost_equal(whas500.y["fstat"], whas500.y["lenfol"], pred,
                               expected_c)
Exemplo n.º 25
0
    def test_fit_dropout(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        model = GradientBoostingSurvivalAnalysis(n_estimators=100, max_features=8,
                                                 learning_rate=1.0, dropout_rate=0.03,
                                                 random_state=0)
        model.fit(whas500_data.x, whas500_data.y)

        assert not hasattr(model, "oob_improvement_")
        assert model.max_features_ == 8

        p = model.predict(whas500_data.x)

        assert_cindex_almost_equal(whas500_data.y['fstat'], whas500_data.y['lenfol'], p,
                                   (0.9094333, 68343, 6806, 0, 119))
Exemplo n.º 26
0
def test_fit_predict(make_whas500):
    whas500 = make_whas500(to_numeric=True)

    forest = RandomSurvivalForest(random_state=2)
    forest.fit(whas500.x, whas500.y)

    assert len(forest.estimators_) == 100

    pred = forest.predict(whas500.x)
    assert numpy.isfinite(pred).all()
    assert numpy.all(pred >= 0)

    expected_c = (0.9026201280123488, 67831, 7318, 0, 14)
    assert_cindex_almost_equal(whas500.y["fstat"], whas500.y["lenfol"], pred,
                               expected_c)
Exemplo n.º 27
0
    def test_toy_hinge_fit(self, toy_data):
        x, y = toy_data
        m = self.svm_model
        sd = numpy.std(x, axis=0)
        m.fit(x / sd, y)

        assert (1, x.shape[0]) == m.coef_.shape
        assert 1 == m.coef0
        expected_coef = numpy.array(
            [[-1.893832101337, 1.083653895940, 0.810178205398, -2., 2., 0.]])
        assert_array_almost_equal(m.coef_, expected_coef)

        p = m.predict(x / sd)
        assert_cindex_almost_equal(y['status'], y['time'], p,
                                   (1.0, 11, 0, 0, 0))
Exemplo n.º 28
0
    def test_breast_cancer_rbf_osqp(gbsg2):
        x, y = gbsg2
        x = scale(x)
        m = MinlipSurvivalAnalysis(solver="osqp",
                                   alpha=1,
                                   kernel="rbf",
                                   gamma=1. / 8,
                                   pairs="next",
                                   max_iter=1000)
        m.fit(x, y)

        assert (1, x.shape[0]) == m.coef_.shape

        p = m.predict(x)
        assert_cindex_almost_equal(y['cens'], y['time'], p,
                                   (0.6106168089455333, 81256, 51816, 0, 42))
Exemplo n.º 29
0
    def test_toy_hinge_nearest_fit(self, toy_data):
        x, y = toy_data
        m = self.svm_model
        m.set_params(pairs="nearest")
        sd = numpy.std(x, axis=0)
        with pytest.deprecated_call():
            m.fit(x / sd, y)

        assert (1, x.shape[0]) == m.coef_.shape
        assert 1 == m.coef0
        expected_coef = numpy.array(
            [[-1.893832101337, 1.083653895940, 0.810178205398, -2., 2., 0.]])
        assert_array_almost_equal(m.coef_, expected_coef, decimal=5)

        p = m.predict(x / sd)
        assert_cindex_almost_equal(y['status'], y['time'], p,
                                   (1.0, 11, 0, 0, 0))
Exemplo n.º 30
0
    def test_fit_int_param_as_float(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        model = GradientBoostingSurvivalAnalysis(n_estimators=100.0, max_depth=3.0, min_samples_split=10.0,
                                                 random_state=0)
        params = model.get_params()
        assert 100 == params["n_estimators"]
        assert 3 == params["max_depth"]
        assert 10 == params["min_samples_split"]

        model.set_params(max_leaf_nodes=15.0)
        assert 15 == model.get_params()["max_leaf_nodes"]

        model.fit(whas500_data.x, whas500_data.y)
        p = model.predict(whas500_data.x)

        assert_cindex_almost_equal(whas500_data.y['fstat'], whas500_data.y['lenfol'], p,
                                   (0.90256690042449006, 67826, 7321, 2, 119))