Exemplo n.º 1
0
    def test_fit_and_predict_regression(make_whas500, optimizer_regression):
        whas500 = make_whas500(to_numeric=True)

        ssvm = FastSurvivalSVM(optimizer=optimizer_regression,
                               rank_ratio=0.0,
                               max_iter=50,
                               fit_intercept=True,
                               random_state=0)
        ssvm.fit(whas500.x, whas500.y)

        assert round(abs(6.4160179606675278 - ssvm.intercept_), 7) == 0
        expected_coef = numpy.array([
            -0.0730891368237, -0.536630355029, -0.497411603275, 0.269039958377,
            -0.730559850692, -0.0148443526234, 0.285916578892, 0.165960302339,
            -0.301749910087, 0.334855938531, 0.0886214732161, 0.0554890272028,
            -2.12680470014, 0.0421466831393
        ])
        assert_array_almost_equal(expected_coef, ssvm.coef_)

        pred = ssvm.predict(whas500.x)
        rmse = numpy.sqrt(mean_squared_error(whas500.y['lenfol'], pred))
        assert round(abs(1206.6556186869332 - rmse), 7) == 0
Exemplo n.º 2
0
    def test_fit_and_predict_hybrid_no_intercept(make_whas500,
                                                 optimizer_regression):
        whas500 = make_whas500(to_numeric=True)

        ssvm = FastSurvivalSVM(optimizer=optimizer_regression,
                               rank_ratio=0.5,
                               max_iter=50,
                               fit_intercept=False,
                               random_state=0)
        ssvm.fit(whas500.x, whas500.y)

        assert not hasattr(ssvm, "intercept_")
        expected_coef = numpy.array([
            0.00669121, -0.2754864, -0.14124808, 0.0748376, -0.2812598,
            0.07543884, 0.09845683, 0.08398258, -0.12182314, 0.02637739,
            0.03060149, 0.11870598, -0.52688224, -0.01762842
        ])
        assert_array_almost_equal(expected_coef, ssvm.coef_)

        pred = ssvm.predict(whas500.x)
        rmse = numpy.sqrt(mean_squared_error(whas500.y['lenfol'], pred))
        assert round(abs(1128.4460587629746 - rmse), 7) == 0
Exemplo n.º 3
0
    def test_fit_and_predict_regression_no_intercept(make_whas500,
                                                     optimizer_regression):
        whas500 = make_whas500(to_numeric=True)

        ssvm = FastSurvivalSVM(optimizer=optimizer_regression,
                               rank_ratio=0.0,
                               max_iter=50,
                               fit_intercept=False,
                               random_state=0)
        ssvm.fit(whas500.x, whas500.y)

        assert not hasattr(ssvm, "intercept_")
        expected_coef = numpy.array([
            1.39989875, -1.16903161, -0.40195857, -0.05848903, -0.08421557,
            4.11924729, 0.25135451, 1.89067276, -0.25751401, -0.10213143,
            1.56333622, 3.10136873, -2.23644848, -0.11620715
        ])
        assert_array_almost_equal(expected_coef, ssvm.coef_)

        pred = ssvm.predict(whas500.x)
        rmse = numpy.sqrt(mean_squared_error(whas500.y['lenfol'], pred))
        assert round(abs(15838.510668936022 - rmse), 7) == 0
Exemplo n.º 4
0
    def test_compare_builtin_kernel(make_whas500):
        whas500 = make_whas500(to_numeric=True)
        x = normalize(whas500.x)

        rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel="polynomial",
                                     gamma=0.5, degree=2, coef0=0,
                                     tol=2.5e-8, max_iter=100, random_state=0xf38)
        rsvm.fit(x, whas500.y)
        pred_rsvm = rsvm.predict(x)

        kpca = KernelPCA(kernel="polynomial", copy_X=True, gamma=0.5, degree=2, coef0=0,
                         random_state=0xf38)
        xt = kpca.fit_transform(x)
        nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=2.5e-8, max_iter=100, random_state=0xf38)
        nrsvm.fit(xt, whas500.y)
        pred_nrsvm = nrsvm.predict(xt)

        assert len(pred_nrsvm) == len(pred_rsvm)

        expected_cindex = concordance_index_censored(whas500.y['fstat'], whas500.y['lenfol'], pred_nrsvm)
        assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'], pred_rsvm,
                                   expected_cindex)
Exemplo n.º 5
0
    def test_fit_and_predict_hybrid(make_whas500, optimizer_regression):
        whas500 = make_whas500(to_numeric=True)

        ssvm = FastSurvivalSVM(optimizer=optimizer_regression,
                               rank_ratio=0.5,
                               max_iter=50,
                               fit_intercept=True,
                               random_state=0)
        ssvm.fit(whas500.x, whas500.y)

        assert round(abs(6.1409367385513729 - ssvm.intercept_), 7) == 0
        expected_coef = numpy.array([
            -0.0209254120718, -0.265768317208, -0.154254689136,
            0.0800600947891, -0.290121131022, -0.0288851785213,
            0.0998004550073, 0.0454100937492, -0.125863947621, 0.0343588337797,
            -0.000710219364914, 0.0546969104996, -0.5375338235,
            -0.0137995110308
        ])
        assert_array_almost_equal(expected_coef, ssvm.coef_)

        pred = ssvm.predict(whas500.x)
        rmse = numpy.sqrt(mean_squared_error(whas500.y['lenfol'], pred))
        assert round(abs(780.52617631863893 - rmse), 7) == 0
    def test_compare_rbf(self):
        x = normalize(self.x)
        y = self.y

        rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel="rbf",
                                     tol=1e-6, max_iter=65, random_state=0)
        rsvm.fit(x, y)

        kpca = KernelPCA(kernel="rbf", copy_X=True)
        xt = kpca.fit_transform(x)
        nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-6, max_iter=30, random_state=0)
        nrsvm.fit(xt, y)

        pred_nrsvm = nrsvm.predict(xt)
        pred_rsvm = rsvm.predict(x)

        self.assertEqual(len(pred_nrsvm), len(pred_rsvm))

        c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm)
        c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm)

        self.assertAlmostEqual(c1[0], c2[0])
        self.assertTupleEqual(c1[1:], c2[1:])
Exemplo n.º 7
0
    def test_fit_and_predict_hybrid_no_intercept(self):
        if self.OPTIMIZER in {'simple', 'PRSVM'}:
            raise unittest.SkipTest("regression not implemented for " +
                                    self.OPTIMIZER)

        ssvm = FastSurvivalSVM(optimizer=self.OPTIMIZER,
                               rank_ratio=0.5,
                               max_iter=50,
                               fit_intercept=False,
                               random_state=0)
        ssvm.fit(self.x.values, self.y)

        self.assertFalse(hasattr(ssvm, "intercept_"))
        expected_coef = numpy.array([
            0.00669121, -0.2754864, -0.14124808, 0.0748376, -0.2812598,
            0.07543884, 0.09845683, 0.08398258, -0.12182314, 0.02637739,
            0.03060149, 0.11870598, -0.52688224, -0.01762842
        ])
        assert_array_almost_equal(expected_coef, ssvm.coef_)

        pred = ssvm.predict(self.x.values)
        rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'], pred))
        self.assertAlmostEqual(1128.4460587629746, rmse)
Exemplo n.º 8
0
    def test_fit_and_predict_regression_no_intercept(self):
        if self.OPTIMIZER in {'simple', 'PRSVM'}:
            raise unittest.SkipTest("regression not implemented for " +
                                    self.OPTIMIZER)

        ssvm = FastSurvivalSVM(optimizer=self.OPTIMIZER,
                               rank_ratio=0.0,
                               max_iter=50,
                               fit_intercept=False,
                               random_state=0)
        ssvm.fit(self.x.values, self.y)

        self.assertFalse(hasattr(ssvm, "intercept_"))
        expected_coef = numpy.array([
            1.39989875, -1.16903161, -0.40195857, -0.05848903, -0.08421557,
            4.11924729, 0.25135451, 1.89067276, -0.25751401, -0.10213143,
            1.56333622, 3.10136873, -2.23644848, -0.11620715
        ])
        assert_array_almost_equal(expected_coef, ssvm.coef_)

        pred = ssvm.predict(self.x.values)
        rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'], pred))
        self.assertAlmostEqual(15838.510668936022, rmse)
    def test_survival_constraints_with_ties():
        y = numpy.array([True, True, False, False, True, False, True, True, False, False, False, True])
        time = numpy.array([20, 33, 33, 40, 50, 66, 66, 66, 89, 110, 110, 111])

        expected_order = numpy.array([0, 2, 1, 3, 4, 7, 5, 6, 8, 9, 10, 11])
        samples_order = FastSurvivalSVM._argsort_and_resolve_ties(time, numpy.random.RandomState(0))
        numpy.testing.assert_array_equal(expected_order, samples_order)

        expected = numpy.array([
            [-1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [-1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [-1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
            [-1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
            [-1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
            [-1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
            [-1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
            [-1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
            [-1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
            [-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
            [-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
            [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, -1, 0, 1, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, -1, 0, 0, 1, 0, 0, 0, 0, 0, 0],
            [0, 0, -1, 0, 0, 0, 1, 0, 0, 0, 0, 0],
            [0, 0, -1, 0, 0, 0, 0, 1, 0, 0, 0, 0],
            [0, 0, -1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
            [0, 0, -1, 0, 0, 0, 0, 0, 0, 1, 0, 0],
            [0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 1, 0],
            [0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 1],
            [0, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, -1, 0, 1, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, -1, 0, 0, 1, 0, 0, 0, 0],
            [0, 0, 0, 0, -1, 0, 0, 0, 1, 0, 0, 0],
            [0, 0, 0, 0, -1, 0, 0, 0, 0, 1, 0, 0],
            [0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 1, 0],
            [0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 1],
            [0, 0, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, -1, 0, 1, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, -1, 0, 0, 1, 0, 0, 0],
            [0, 0, 0, 0, 0, -1, 0, 0, 0, 1, 0, 0],
            [0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 1, 0],
            [0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 1],
            [0, 0, 0, 0, 0, 0, 0, -1, 1, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, -1, 0, 1, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 1, 0],
            [0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 1],
        ], dtype=numpy.int8)

        A = survival_constraints_simple(numpy.asarray(y[samples_order], dtype=numpy.uint8))
        assert_array_equal(expected, A.todense())
Exemplo n.º 10
0
    def test_compare_clinical_kernel(make_whas500):
        whas500 = make_whas500(to_numeric=True)

        trans = ClinicalKernelTransform()
        trans.fit(whas500.x_data_frame)

        kpca = KernelPCA(kernel=trans.pairwise_kernel, copy_X=True)
        xt = kpca.fit_transform(whas500.x)

        nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=500, random_state=0)
        nrsvm.fit(xt, whas500.y)

        rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel=trans.pairwise_kernel,
                                     tol=1e-8, max_iter=500, random_state=0)
        rsvm.fit(whas500.x, whas500.y)

        pred_nrsvm = nrsvm.predict(kpca.transform(whas500.x))
        pred_rsvm = rsvm.predict(whas500.x)

        assert len(pred_nrsvm) == len(pred_rsvm)

        expected_cindex = concordance_index_censored(whas500.y['fstat'], whas500.y['lenfol'], pred_nrsvm)
        assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'], pred_rsvm,
                                   expected_cindex)
Exemplo n.º 11
0
    def test_fit_and_predict_regression(self):
        if self.OPTIMIZER in {'simple', 'PRSVM'}:
            raise unittest.SkipTest("regression not implemented for " +
                                    self.OPTIMIZER)

        ssvm = FastSurvivalSVM(optimizer=self.OPTIMIZER,
                               rank_ratio=0.0,
                               max_iter=50,
                               fit_intercept=True,
                               random_state=0)
        ssvm.fit(self.x.values, self.y)

        self.assertAlmostEqual(6.4160179606675278, ssvm.intercept_)
        expected_coef = numpy.array([
            -0.0730891368237, -0.536630355029, -0.497411603275, 0.269039958377,
            -0.730559850692, -0.0148443526234, 0.285916578892, 0.165960302339,
            -0.301749910087, 0.334855938531, 0.0886214732161, 0.0554890272028,
            -2.12680470014, 0.0421466831393
        ])
        assert_array_almost_equal(expected_coef, ssvm.coef_)

        pred = ssvm.predict(self.x.values)
        rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'], pred))
        self.assertAlmostEqual(1206.6556186869332, rmse)
Exemplo n.º 12
0
    def test_rank_ratio_out_of_bounds(self):
        x = numpy.zeros((100, 10))
        y = Surv.from_arrays(numpy.ones(100, dtype=bool),
                             numpy.arange(100, dtype=float))

        ssvm = FastSurvivalSVM(rank_ratio=-1)
        self.assertRaisesRegex(ValueError, r"rank_ratio must be in \[0; 1\]",
                               ssvm.fit, x, y)

        ssvm.set_params(rank_ratio=1.2)
        self.assertRaisesRegex(ValueError, r"rank_ratio must be in \[0; 1\]",
                               ssvm.fit, x, y)

        ssvm.set_params(rank_ratio=numpy.nan)
        self.assertRaisesRegex(ValueError, r"rank_ratio must be in \[0; 1\]",
                               ssvm.fit, x, y)

        ssvm.set_params(rank_ratio=numpy.inf)
        self.assertRaisesRegex(ValueError, r"rank_ratio must be in \[0; 1\]",
                               ssvm.fit, x, y)
Exemplo n.º 13
0
    def test_rank_ratio_out_of_bounds(self):
        x = numpy.zeros((100, 10))
        y = numpy.empty(dtype=[('event', bool), ('time', float)], shape=100)
        y['event'] = numpy.ones(100, dtype=bool)
        y['time'] = numpy.arange(100, dtype=float)

        ssvm = FastSurvivalSVM(rank_ratio=-1)
        self.assertRaisesRegex(ValueError, r"rank_ratio must be in \[0; 1\]",
                               ssvm.fit, x, y)

        ssvm.set_params(rank_ratio=1.2)
        self.assertRaisesRegex(ValueError, r"rank_ratio must be in \[0; 1\]",
                               ssvm.fit, x, y)

        ssvm.set_params(rank_ratio=numpy.nan)
        self.assertRaisesRegex(ValueError, r"rank_ratio must be in \[0; 1\]",
                               ssvm.fit, x, y)

        ssvm.set_params(rank_ratio=numpy.inf)
        self.assertRaisesRegex(ValueError, r"rank_ratio must be in \[0; 1\]",
                               ssvm.fit, x, y)
Exemplo n.º 14
0
    def test_rank_ratio_out_of_bounds(fake_data, value):
        x, y = fake_data

        ssvm = FastSurvivalSVM(rank_ratio=value)
        with pytest.raises(ValueError, match=r"rank_ratio must be in \[0; 1\]"):
            ssvm.fit(x, y)
Exemplo n.º 15
0
    def test_alpha_negative(fake_data):
        x, y = fake_data

        ssvm = FastSurvivalSVM(alpha=-1)
        with pytest.raises(ValueError, match="alpha must be positive"):
            ssvm.fit(x, y)
Exemplo n.º 16
0
 def test_fit_uncomparable(whas500_uncomparable, optimizer):
     ssvm = FastSurvivalSVM(optimizer=optimizer)
     with pytest.raises(NoComparablePairException):
         ssvm.fit(whas500_uncomparable.x, whas500_uncomparable.y)
Exemplo n.º 17
0
 def test_default_optimizer(self):
     self.assertEqual(
         'avltree',
         FastSurvivalSVM().fit(self.x.values, self.y).optimizer)