def test_default_optimizer(make_whas500):
     whas500 = make_whas500(to_numeric=True)
     ssvm = FastKernelSurvivalSVM(tol=1e-4, max_iter=25)
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", category=ConvergenceWarning)
         ssvm.fit(whas500.x, whas500.y)
     assert 'rbtree' == ssvm.optimizer
Пример #2
0
    def test_compare_clinical_kernel(self):
        x_full, y = load_whas500()

        trans = ClinicalKernelTransform()
        trans.fit(x_full)

        kpca = KernelPCA(kernel=trans.pairwise_kernel, copy_X=True)
        xt = kpca.fit_transform(self.x)

        nrsvm = FastSurvivalSVM(optimizer='rbtree',
                                tol=1e-8,
                                max_iter=500,
                                random_state=0)
        nrsvm.fit(xt, y)

        rsvm = FastKernelSurvivalSVM(optimizer='rbtree',
                                     kernel=trans.pairwise_kernel,
                                     tol=1e-8,
                                     max_iter=500,
                                     random_state=0)
        rsvm.fit(self.x.values, y)

        pred_nrsvm = nrsvm.predict(kpca.transform(self.x))
        pred_rsvm = rsvm.predict(self.x.values)

        self.assertEqual(len(pred_nrsvm), len(pred_rsvm))

        c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm)
        c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm)

        self.assertAlmostEqual(c1[0], c2[0])
        self.assertTupleEqual(c1[1:], c2[1:])
Пример #3
0
    def test_compare_clinical_kernel(make_whas500):
        whas500 = make_whas500(to_numeric=True)

        trans = ClinicalKernelTransform()
        trans.fit(whas500.x_data_frame)

        kpca = KernelPCA(kernel=trans.pairwise_kernel, copy_X=True)
        xt = kpca.fit_transform(whas500.x)

        nrsvm = FastSurvivalSVM(optimizer='rbtree',
                                tol=1e-8,
                                max_iter=500,
                                random_state=0)
        nrsvm.fit(xt, whas500.y)

        rsvm = FastKernelSurvivalSVM(optimizer='rbtree',
                                     kernel=trans.pairwise_kernel,
                                     tol=1e-8,
                                     max_iter=500,
                                     random_state=0)
        rsvm.fit(whas500.x, whas500.y)

        pred_nrsvm = nrsvm.predict(kpca.transform(whas500.x))
        pred_rsvm = rsvm.predict(whas500.x)

        assert len(pred_nrsvm) == len(pred_rsvm)

        expected_cindex = concordance_index_censored(whas500.y['fstat'],
                                                     whas500.y['lenfol'],
                                                     pred_nrsvm)
        assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'],
                                   pred_rsvm, expected_cindex)
Пример #4
0
    def test_compare_builtin_kernel(make_whas500):
        whas500 = make_whas500(to_numeric=True)
        x = normalize(whas500.x)

        rsvm = FastKernelSurvivalSVM(optimizer='rbtree',
                                     kernel="polynomial",
                                     gamma=0.5,
                                     degree=2,
                                     tol=1e-8,
                                     max_iter=100,
                                     random_state=0xf38)
        rsvm.fit(x, whas500.y)
        pred_rsvm = rsvm.predict(x)

        kpca = KernelPCA(kernel="polynomial",
                         copy_X=True,
                         gamma=0.5,
                         degree=2,
                         random_state=0xf38)
        xt = kpca.fit_transform(x)
        nrsvm = FastSurvivalSVM(optimizer='rbtree',
                                tol=1e-8,
                                max_iter=100,
                                random_state=0xf38)
        nrsvm.fit(xt, whas500.y)
        pred_nrsvm = nrsvm.predict(xt)

        assert len(pred_nrsvm) == len(pred_rsvm)

        expected_cindex = concordance_index_censored(whas500.y['fstat'],
                                                     whas500.y['lenfol'],
                                                     pred_nrsvm)
        assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'],
                                   pred_rsvm, expected_cindex)
    def test_unknown_optimizer(fake_data):
        x, y = fake_data

        ssvm = FastKernelSurvivalSVM(optimizer='random stuff')
        with pytest.raises(ValueError,
                           match="unknown optimizer: random stuff"):
            ssvm.fit(x, y)
Пример #6
0
    def test_compare_rbf(self):
        x, y = load_whas500()
        x = encode_categorical(standardize(x))

        kpca = KernelPCA(kernel="rbf")
        xt = kpca.fit_transform(x)

        nrsvm = FastSurvivalSVM(optimizer='rbtree',
                                tol=1e-8,
                                max_iter=1000,
                                random_state=0)
        nrsvm.fit(xt, y)

        rsvm = FastKernelSurvivalSVM(optimizer='rbtree',
                                     kernel="rbf",
                                     tol=1e-8,
                                     max_iter=1000,
                                     random_state=0)
        rsvm.fit(x, y)

        pred_nrsvm = nrsvm.predict(kpca.transform(x))
        pred_rsvm = rsvm.predict(x)

        self.assertEqual(len(pred_nrsvm), len(pred_rsvm))

        c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm)
        c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm)

        self.assertAlmostEqual(c1[0], c2[0])
        self.assertTupleEqual(c1[1:], c2[1:])
Пример #7
0
    def test_compare_builtin_kernel(self):
        x = normalize(self.x)
        y = self.y

        rsvm = FastKernelSurvivalSVM(optimizer='rbtree',
                                     kernel="polynomial",
                                     gamma=0.5,
                                     degree=2,
                                     tol=1e-8,
                                     max_iter=100,
                                     random_state=0xf38)
        rsvm.fit(x, y)
        pred_rsvm = rsvm.predict(x)

        kpca = KernelPCA(kernel="polynomial",
                         copy_X=True,
                         gamma=0.5,
                         degree=2,
                         random_state=0xf38)
        xt = kpca.fit_transform(x)
        nrsvm = FastSurvivalSVM(optimizer='rbtree',
                                tol=1e-8,
                                max_iter=100,
                                random_state=0xf38)
        nrsvm.fit(xt, y)
        pred_nrsvm = nrsvm.predict(xt)

        self.assertEqual(len(pred_nrsvm), len(pred_rsvm))

        c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm)
        c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm)

        self.assertAlmostEqual(c1[0], c2[0])
        self.assertTupleEqual(c1[1:], c2[1:])
    def test_fit_precomputed_kernel_invalid_shape(fake_data):
        x, y = fake_data
        ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel='precomputed', random_state=0)

        with pytest.raises(ValueError,
                           match=r"Precomputed metric requires shape \(n_queries, n_indexed\)\. "
                                 r"Got \(100, 11\) for 100 indexed\."):
            ssvm.fit(x, y)
    def test_predict_precomputed_kernel_invalid_shape(self):
        ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel='precomputed', random_state=0)
        x = numpy.dot(self.x.values, self.x.values.T)
        ssvm.fit(x, self.y)

        x_new = numpy.random.randn(100, 14)
        self.assertRaisesRegex(ValueError, r"Precomputed metric requires shape \(n_queries, n_indexed\)\. "
                                           r"Got \(100, 14\) for 500 indexed\.",
                               ssvm.predict, x_new)
    def test_fit_and_predict_rbf_avltree(self):
        ssvm = FastKernelSurvivalSVM(optimizer='avltree', kernel='rbf',
                                     tol=2e-6, max_iter=75, random_state=0)
        ssvm.fit(self.x.values, self.y)

        self.assertFalse(ssvm._pairwise)
        self.assertEquals(self.x.shape[0], ssvm.coef_.shape[0])

        c = ssvm.score(self.x.values, self.y)
        self.assertGreaterEqual(c, 0.965)
    def test_fit_and_predict_linear_regression_no_intercept(self):
        ssvm = FastKernelSurvivalSVM(optimizer="rbtree", rank_ratio=0.0, kernel="linear",
                                     max_iter=50, fit_intercept=False, random_state=0)
        ssvm.fit(self.x.values, self.y)

        self.assertFalse(hasattr(ssvm, "intercept_"))

        pred = ssvm.predict(self.x.values)
        rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'], pred))
        self.assertAlmostEqual(rmse, 15837.658418546907, 4)
    def test_fit_precomputed_kernel_not_symmetric():
        ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel='precomputed', random_state=0)
        x = numpy.random.randn(100, 100)
        x[10, 12] = -1
        x[12, 10] = 9
        y = Surv.from_arrays(numpy.ones(100).astype(bool), numpy.ones(100))

        with pytest.raises(ValueError,
                           match="kernel matrix is not symmetric"):
            ssvm.fit(x, y)
Пример #13
0
    def test_fit_and_predict_rbf_avltree(self):
        ssvm = FastKernelSurvivalSVM(optimizer="avltree",
                                     kernel='rbf',
                                     random_state=0)
        ssvm.fit(self.x.values, self.y)

        self.assertFalse(ssvm._pairwise)
        self.assertEquals(self.x.shape[0], ssvm.coef_.shape[0])

        c = ssvm.score(self.x.values, self.y)
        self.assertLessEqual(abs(0.92460312179802795 - c), 1e-3)
    def test_fit_and_predict_hybrid_rbf(self):
        ssvm = FastKernelSurvivalSVM(optimizer="rbtree", rank_ratio=0.5, kernel="rbf",
                                     max_iter=50, fit_intercept=True, random_state=0)
        ssvm.fit(self.x.values, self.y)

        self.assertFalse(ssvm._pairwise)
        self.assertLessEqual(abs(5.0289145697617164 - ssvm.intercept_), 0.04)

        pred = ssvm.predict(self.x.values)
        rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'], pred))
        self.assertLessEqual(abs(880.20361811281487 - rmse), 75)
    def test_fit_and_predict_regression_rbf(self):
        ssvm = FastKernelSurvivalSVM(optimizer="rbtree", rank_ratio=0.0, kernel="rbf",
                                     tol=1e-6, max_iter=50, fit_intercept=True, random_state=0)
        ssvm.fit(self.x.values, self.y)

        self.assertFalse(ssvm._pairwise)
        self.assertAlmostEqual(ssvm.intercept_, 4.9267218894089533)

        pred = ssvm.predict(self.x.values)
        rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'], pred))
        self.assertAlmostEqual(rmse, 783.525277, 6)
    def test_fit_and_predict_linear(self):
        ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel='linear', random_state=0)
        ssvm.fit(self.x.values, self.y)

        self.assertFalse(ssvm._pairwise)
        self.assertEquals(self.x.shape[0], ssvm.coef_.shape[0])

        i = numpy.arange(250)
        numpy.random.RandomState(0).shuffle(i)
        c = ssvm.score(self.x.values[i], self.y[i])
        self.assertAlmostEqual(c, 0.76923445664157997, 6)
    def test_predict_precomputed_kernel_invalid_shape(make_whas500):
        whas500 = make_whas500(to_numeric=True)
        ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel='precomputed', random_state=0)
        x = numpy.dot(whas500.x, whas500.x.T)
        ssvm.fit(x, whas500.y)

        x_new = numpy.random.randn(100, 14)
        with pytest.raises(ValueError,
                           match=r"Precomputed metric requires shape \(n_queries, n_indexed\)\. "
                                 r"Got \(100, 14\) for 500 indexed\."):
            ssvm.predict(x_new)
    def test_fit_and_predict_linear_regression_no_intercept(make_whas500):
        whas500 = make_whas500(to_numeric=True)
        ssvm = FastKernelSurvivalSVM(optimizer="rbtree", rank_ratio=0.0, kernel="linear",
                                     max_iter=50, fit_intercept=False, random_state=0)
        ssvm.fit(whas500.x, whas500.y)

        assert not hasattr(ssvm, "intercept_")

        pred = ssvm.predict(whas500.x)
        rmse = numpy.sqrt(mean_squared_error(whas500.y['lenfol'], pred))
        assert round(abs(rmse - 15837.658418546907), 4) == 0
Пример #19
0
    def test_fit_and_predict_rbf_rbtree(self):
        ssvm = FastKernelSurvivalSVM(optimizer="rbtree",
                                     kernel='rbf',
                                     random_state=0)
        ssvm.fit(self.x.values, self.y)

        self.assertFalse(ssvm._pairwise)
        self.assertEquals(self.x.shape[0], ssvm.coef_.shape[0])

        c = ssvm.score(self.x.values, self.y)
        self.assertAlmostEqual(0.92230102862313534, c, 3)
    def test_fit_and_predict_rbf(make_whas500, optimizer):
        whas500 = make_whas500(to_numeric=True)
        ssvm = FastKernelSurvivalSVM(optimizer=optimizer, kernel='rbf',
                                     tol=2e-6, max_iter=75, random_state=0)
        ssvm.fit(whas500.x, whas500.y)

        assert not ssvm._pairwise
        assert whas500.x.shape[0] == ssvm.coef_.shape[0]

        c = ssvm.score(whas500.x, whas500.y)
        assert c >= 0.965
    def test_fit_and_predict_hybrid_rbf(make_whas500):
        whas500 = make_whas500(to_numeric=True)
        ssvm = FastKernelSurvivalSVM(optimizer="rbtree", rank_ratio=0.5, kernel="rbf",
                                     max_iter=50, fit_intercept=True, random_state=0)
        ssvm.fit(whas500.x, whas500.y)

        assert not ssvm._pairwise
        assert abs(5.0289145697617164 - ssvm.intercept_) <= 0.04

        pred = ssvm.predict(whas500.x)
        rmse = numpy.sqrt(mean_squared_error(whas500.y['lenfol'], pred))
        assert abs(880.20361811281487 - rmse) <= 75
    def test_fit_and_predict_regression_rbf(make_whas500):
        whas500 = make_whas500(to_numeric=True)
        ssvm = FastKernelSurvivalSVM(optimizer="rbtree", rank_ratio=0.0, kernel="rbf",
                                     tol=1e-6, max_iter=50, fit_intercept=True, random_state=0)
        ssvm.fit(whas500.x, whas500.y)

        assert not ssvm._pairwise
        assert round(abs(ssvm.intercept_ - 4.9267218894089533), 7) == 0

        pred = ssvm.predict(whas500.x)
        rmse = numpy.sqrt(mean_squared_error(whas500.y['lenfol'], pred))
        assert round(abs(rmse - 783.525277), 6) == 0
    def test_fit_and_predict_linear(make_whas500):
        whas500 = make_whas500(to_numeric=True)
        ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel='linear', random_state=0)
        ssvm.fit(whas500.x, whas500.y)

        assert not ssvm._pairwise
        assert whas500.x.shape[0] == ssvm.coef_.shape[0]

        i = numpy.arange(250)
        numpy.random.RandomState(0).shuffle(i)
        c = ssvm.score(whas500.x[i], whas500.y[i])
        assert round(abs(c - 0.76923445664157997), 6) == 0
    def test_fit_and_predict_linear_regression_precomputed(self):
        ssvm = FastKernelSurvivalSVM(optimizer="rbtree", rank_ratio=0.0, kernel="precomputed",
                                     max_iter=50, fit_intercept=True, random_state=0)
        x = numpy.dot(self.x.values, self.x.values.T)
        ssvm.fit(x, self.y)

        self.assertTrue(ssvm._pairwise)
        self.assertAlmostEqual(ssvm.intercept_, 6.3979746625712295, 5)

        i = numpy.arange(250)
        numpy.random.RandomState(0).shuffle(i)
        pred = ssvm.predict(x[i])
        rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'][i], pred))
        self.assertLessEqual(abs(1339.3006854574726 - rmse), 0.25)
    def test_fit_and_predict_clinical_kernel(make_whas500):
        whas500 = make_whas500(to_numeric=True)

        trans = ClinicalKernelTransform()
        trans.fit(whas500.x_data_frame)

        ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel=trans.pairwise_kernel,
                                     tol=7e-7, max_iter=100, random_state=0)
        ssvm.fit(whas500.x, whas500.y)

        assert not ssvm._pairwise
        assert whas500.x.shape[0] == ssvm.coef_.shape[0]

        c = ssvm.score(whas500.x, whas500.y)
        assert c >= 0.854
    def test_fit_and_predict_linear_regression_precomputed(make_whas500):
        whas500 = make_whas500(to_numeric=True)
        ssvm = FastKernelSurvivalSVM(optimizer="rbtree", rank_ratio=0.0, kernel="precomputed",
                                     max_iter=50, fit_intercept=True, random_state=0)
        x = numpy.dot(whas500.x, whas500.x.T)
        ssvm.fit(x, whas500.y)

        assert ssvm._pairwise
        assert round(abs(ssvm.intercept_ - 6.3979746625712295), 5) == 0

        i = numpy.arange(250)
        numpy.random.RandomState(0).shuffle(i)
        pred = ssvm.predict(x[i])
        rmse = numpy.sqrt(mean_squared_error(whas500.y['lenfol'][i], pred))
        assert rmse <= 1339.3006854574726 + 0.293
    def test_fit_and_predict_clinical_kernel(self):
        x_full, y = load_whas500()

        trans = ClinicalKernelTransform()
        trans.fit(x_full)
        x = self.x

        ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel=trans.pairwise_kernel,
                                     tol=7e-7, max_iter=100, random_state=0)
        ssvm.fit(x.values, y)

        self.assertFalse(ssvm._pairwise)
        self.assertEquals(x.shape[0], ssvm.coef_.shape[0])

        c = ssvm.score(x.values, y)
        self.assertGreaterEqual(c, 0.854)
Пример #28
0
    def test_fit_and_predict_linear_regression(make_whas500):
        whas500 = make_whas500(to_numeric=True)
        ssvm = FastKernelSurvivalSVM(optimizer="rbtree", rank_ratio=0.0, kernel="linear",
                                     max_iter=50, tol=1e-8, fit_intercept=True, random_state=0)

        ssvm.fit(whas500.x, whas500.y)

        assert not ssvm._pairwise
        assert round(abs(ssvm.intercept_ - 6.416017539824949), 5) == 0

        i = numpy.arange(250)
        numpy.random.RandomState(0).shuffle(i)
        pred = ssvm.predict(whas500.x[i])
        rmse = numpy.sqrt(mean_squared_error(whas500.y['lenfol'][i], pred))
        assert rmse <= 1342.274550652291 + 0.293

        c = ssvm.score(whas500.x[i], whas500.y[i])
        assert round(abs(c - 0.7630027323714108), 6) == 0
Пример #29
0
    def test_fit_and_predict_clinical_kernel(self):
        x_full, y = load_whas500()

        trans = ClinicalKernelTransform()
        trans.fit(x_full)

        x = encode_categorical(standardize(x_full))

        ssvm = FastKernelSurvivalSVM(optimizer="rbtree",
                                     kernel=trans.pairwise_kernel,
                                     max_iter=100,
                                     random_state=0)
        ssvm.fit(x.values, y)

        self.assertFalse(ssvm._pairwise)
        self.assertEquals(x.shape[0], ssvm.coef_.shape[0])

        c = ssvm.score(x.values, y)
        self.assertLessEqual(abs(0.83699051218246412 - c), 1e-3)
Пример #30
0
    def test_fit_and_predict_linear_regression_precomputed(make_whas500):
        whas500 = make_whas500(to_numeric=True)
        ssvm = FastKernelSurvivalSVM(optimizer="rbtree",
                                     rank_ratio=0.0,
                                     kernel="precomputed",
                                     max_iter=50,
                                     tol=1e-8,
                                     fit_intercept=True,
                                     random_state=0)
        x = numpy.dot(whas500.x, whas500.x.T)
        ssvm.fit(x, whas500.y)

        assert ssvm._get_tags()["pairwise"]
        assert round(abs(ssvm.intercept_ - 6.416017539824949), 5) == 0

        i = numpy.arange(250)
        numpy.random.RandomState(0).shuffle(i)
        pred = ssvm.predict(x[i])
        rmse = numpy.sqrt(mean_squared_error(whas500.y['lenfol'][i], pred))
        assert rmse <= 1342.274550652291 + 0.293