def test_call_function(make_data):
        data, expected = make_data()
        t = ClinicalKernelTransform(fit_once=True)
        t.prepare(data)

        mat = t(t.X_fit_, t.X_fit_)
        assert_array_almost_equal(expected, mat, 4)
Exemplo n.º 2
0
    def test_compare_clinical_kernel(self):
        x_full, y = load_whas500()

        trans = ClinicalKernelTransform()
        trans.fit(x_full)

        kpca = KernelPCA(kernel=trans.pairwise_kernel, copy_X=True)
        xt = kpca.fit_transform(self.x)

        nrsvm = FastSurvivalSVM(optimizer='rbtree',
                                tol=1e-8,
                                max_iter=500,
                                random_state=0)
        nrsvm.fit(xt, y)

        rsvm = FastKernelSurvivalSVM(optimizer='rbtree',
                                     kernel=trans.pairwise_kernel,
                                     tol=1e-8,
                                     max_iter=500,
                                     random_state=0)
        rsvm.fit(self.x.values, y)

        pred_nrsvm = nrsvm.predict(kpca.transform(self.x))
        pred_rsvm = rsvm.predict(self.x.values)

        self.assertEqual(len(pred_nrsvm), len(pred_rsvm))

        c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm)
        c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm)

        self.assertAlmostEqual(c1[0], c2[0])
        self.assertTupleEqual(c1[1:], c2[1:])
    def test_fit_custom_kernel(make_whas500):
        whas500 = make_whas500(with_mean=False, with_std=False, to_numeric=True)
        alphas = numpy.exp(numpy.linspace(numpy.log(0.001), numpy.log(0.5), 5))
        svm_grid = ParameterGrid({"alpha": alphas})

        transform = ClinicalKernelTransform(fit_once=True)
        transform.prepare(whas500.x_data_frame)

        base_estimators = []
        for i, params in enumerate(svm_grid):
            model = FastSurvivalSVM(max_iter=100, random_state=0, **params)
            base_estimators.append(("svm_linear_%d" % i, model))

        for i, params in enumerate(svm_grid):
            model = FastKernelSurvivalSVM(kernel=transform.pairwise_kernel, max_iter=45, tol=1e-5,
                                          random_state=0, **params)
            base_estimators.append(("svm_kernel_%d" % i, model))

        cv = KFold(n_splits=3, shuffle=True, random_state=0)
        meta = EnsembleSelection(base_estimators, n_estimators=0.4, scorer=score_cindex, cv=cv, n_jobs=4)

        meta.fit(whas500.x, whas500.y)
        assert len(meta) == 10
        assert meta.scores_.shape == (10,)

        p = meta.predict(whas500.x)

        assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'], p,
                                   (0.7978084, 59938, 15178, 33, 119))
Exemplo n.º 4
0
    def test_compare_clinical_kernel(make_whas500):
        whas500 = make_whas500(to_numeric=True)

        trans = ClinicalKernelTransform()
        trans.fit(whas500.x_data_frame)

        kpca = KernelPCA(kernel=trans.pairwise_kernel, copy_X=True)
        xt = kpca.fit_transform(whas500.x)

        nrsvm = FastSurvivalSVM(optimizer='rbtree',
                                tol=1e-8,
                                max_iter=500,
                                random_state=0)
        nrsvm.fit(xt, whas500.y)

        rsvm = FastKernelSurvivalSVM(optimizer='rbtree',
                                     kernel=trans.pairwise_kernel,
                                     tol=1e-8,
                                     max_iter=500,
                                     random_state=0)
        rsvm.fit(whas500.x, whas500.y)

        pred_nrsvm = nrsvm.predict(kpca.transform(whas500.x))
        pred_rsvm = rsvm.predict(whas500.x)

        assert len(pred_nrsvm) == len(pred_rsvm)

        expected_cindex = concordance_index_censored(whas500.y['fstat'],
                                                     whas500.y['lenfol'],
                                                     pred_nrsvm)
        assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'],
                                   pred_rsvm, expected_cindex)
Exemplo n.º 5
0
    def test_kernel_transform_feature_mismatch(self):
        t = ClinicalKernelTransform()
        t.fit(self.data)

        self.assertRaisesRegex(ValueError,
                               'expected array with 4 features, but got 17',
                               t.transform, numpy.zeros((2, 17), dtype=float))
    def test_fit_custom_kernel(self):
        alphas = numpy.exp(numpy.linspace(numpy.log(0.001), numpy.log(0.5), 5))
        svm_grid = ParameterGrid({"alpha": alphas})

        transform = ClinicalKernelTransform(fit_once=True)
        transform.prepare(self.x)

        base_estimators = []
        for i, params in enumerate(svm_grid):
            model = FastSurvivalSVM(max_iter=100, random_state=0, **params)
            base_estimators.append(("svm_linear_%d" % i, model))

        for i, params in enumerate(svm_grid):
            model = FastKernelSurvivalSVM(kernel=transform.pairwise_kernel, max_iter=45, tol=1e-5,
                                          random_state=0, **params)
            base_estimators.append(("svm_kernel_%d" % i, model))

        cv = KFold(n_splits=3, shuffle=True, random_state=0)
        meta = EnsembleSelection(base_estimators, n_estimators=0.4, scorer=score_cindex, cv=cv, n_jobs=4)

        meta.fit(self.x.values, self.y)
        self.assertEqual(len(meta), 10)
        self.assertTupleEqual(meta.scores_.shape, (10,))

        p = meta.predict(self.x.values)

        score = concordance_index_censored(self.y['fstat'], self.y['lenfol'], p)
        expected_score = numpy.array([0.7978084, 59938, 15178, 33, 119])
        assert_array_almost_equal(score, expected_score)
Exemplo n.º 7
0
    def test_call_function(self):
        t = ClinicalKernelTransform(fit_once=True)
        t.prepare(self.data)

        mat = t(t.X_fit_, t.X_fit_)
        expected = _get_expected_matrix()
        assert_array_almost_equal(expected, mat, 4)
    def test_kernel_transform(make_data):
        data, expected = make_data()
        t = ClinicalKernelTransform()

        t.fit(data)
        mat = t.transform(t.X_fit_)

        assert_array_almost_equal(expected, mat, 4)
    def test_pairwise_feature_mismatch(self):
        t = ClinicalKernelTransform()
        t.fit(self.data)

        self.assertRaisesRegex(ValueError, r'Incompatible dimension for X and Y matrices: '
                                           r'X.shape\[1\] == 4 while Y.shape\[1\] == 17',
                               pairwise_kernels, t.X_fit_, numpy.zeros((2, 17), dtype=float),
                               metric=t.pairwise_kernel, n_jobs=1)
Exemplo n.º 10
0
    def test_pairwise_x_and_y_error_shape(self):
        t = ClinicalKernelTransform()
        t.fit(self.data)

        self.assertRaisesRegex(ValueError,
                               "X and Y have different number of features",
                               t.pairwise_kernel, self.data.iloc[0, :],
                               self.data.iloc[1, :2])
Exemplo n.º 11
0
    def test_kernel_transform(self):
        t = ClinicalKernelTransform()

        t.fit(self.data)
        mat = t.transform(t.X_fit_)

        expected = _get_expected_matrix()

        assert_array_almost_equal(expected, mat, 4)
    def test_pairwise_no_nominal(make_data):
        data, expected = make_data(with_nominal=False)
        t = ClinicalKernelTransform()
        t.fit(data)

        mat = pairwise_kernels(t.X_fit_[:3, :], t.X_fit_[3:, :],
                               metric=t.pairwise_kernel, n_jobs=1)

        assert_array_almost_equal(expected[:3:, 3:], mat, 4)
Exemplo n.º 13
0
    def test_prepare(self):
        t = ClinicalKernelTransform(fit_once=True)
        t.prepare(self.data)

        copy = clone(t).fit(t.X_fit_)
        mat = copy.transform(t.X_fit_[:4, :])

        expected = _get_expected_matrix()
        assert_array_almost_equal(expected[:4, :], mat, 4)
    def test_prepare_error_dtype():
        t = ClinicalKernelTransform(fit_once=True)
        data = pandas.DataFrame({"age": [12, 61, 18, 21, 57, 17],
                                 "date": numpy.array(
                                     ["2016-01-01", "1954-06-30", "1999-03-01", "2005-02-25", "2112-12-31",
                                      "1731-09-16"], dtype='datetime64')})

        with pytest.raises(TypeError, match=r'unsupported dtype: dtype\(.+\)'):
            t.prepare(data)
    def test_pairwise_feature_mismatch(make_data):
        data, _ = make_data()
        t = ClinicalKernelTransform()
        t.fit(data)

        with pytest.raises(ValueError, match=r'Incompatible dimension for X and Y matrices: '
                                             r'X.shape\[1\] == 4 while Y.shape\[1\] == 17'):
            pairwise_kernels(t.X_fit_, numpy.zeros((2, 17), dtype=float),
                             metric=t.pairwise_kernel, n_jobs=1)
    def test_pairwise_no_nominal(self):
        t = ClinicalKernelTransform()
        t.fit(self.data.drop('metastasis', axis=1))

        mat = pairwise_kernels(t.X_fit_[:3, :], t.X_fit_[3:, :],
                               metric=t.pairwise_kernel, n_jobs=1)

        expected = _get_expected_matrix(with_nominal=False)
        assert_array_almost_equal(expected[:3:, 3:], mat, 4)
    def test_pairwise(make_data):
        data, expected = make_data()
        t = ClinicalKernelTransform()
        t.fit(data)

        mat = pairwise_kernels(t.X_fit_, t.X_fit_,
                               metric=t.pairwise_kernel, n_jobs=1)

        assert_array_almost_equal(expected, mat, 4)
Exemplo n.º 18
0
    def test_call_function_x_and_y(self):
        t = ClinicalKernelTransform(fit_once=True)
        t.prepare(self.data)

        mat = t(t.X_fit_[:3, :], t.X_fit_[3:, :])
        m = _get_expected_matrix()
        expected = m[:3:, 3:]

        assert_array_almost_equal(expected, mat, 4)
    def test_call_function_x_and_y(make_data):
        data, m = make_data()
        t = ClinicalKernelTransform(fit_once=True)
        t.prepare(data)

        mat = t(t.X_fit_[:3, :], t.X_fit_[3:, :])
        expected = m[:3:, 3:]

        assert_array_almost_equal(expected, mat, 4)
    def test_prepare(make_data):
        data, expected = make_data()
        t = ClinicalKernelTransform(fit_once=True)
        t.prepare(data)

        copy = clone(t).fit(t.X_fit_)
        mat = copy.transform(t.X_fit_[:4, :])

        assert_array_almost_equal(expected[:4, :], mat, 4)
    def test_pairwise(self):
        t = ClinicalKernelTransform()
        t.fit(self.data)

        mat = pairwise_kernels(t.X_fit_, t.X_fit_,
                               metric=t.pairwise_kernel, n_jobs=1)

        expected = _get_expected_matrix()

        assert_array_almost_equal(expected, mat, 4)
    def test_prepare_error_fit_once(make_data):
        data = make_data()
        t = ClinicalKernelTransform(fit_once=False)

        with pytest.raises(
                ValueError,
                match=
                "prepare can only be used if fit_once parameter is set to True"
        ):
            t.prepare(data)
    def test_pairwise_x_and_y(make_data):
        data, m = make_data()
        t = ClinicalKernelTransform()
        t.fit(data)

        mat = pairwise_kernels(t.X_fit_[:3, :], t.X_fit_[3:, :],
                               metric=t.pairwise_kernel, n_jobs=1)

        expected = m[:3:, 3:]

        assert_array_almost_equal(expected, mat, 4)
    def test_pairwise_x_and_y(self):
        t = ClinicalKernelTransform()
        t.fit(self.data)

        mat = pairwise_kernels(t.X_fit_[:3, :], t.X_fit_[3:, :],
                               metric=t.pairwise_kernel, n_jobs=1)

        m = _get_expected_matrix()
        expected = m[:3:, 3:]

        assert_array_almost_equal(expected, mat, 4)
Exemplo n.º 25
0
    def test_fit_and_predict_clinical_kernel(make_whas500):
        whas500 = make_whas500(to_numeric=True)

        trans = ClinicalKernelTransform()
        trans.fit(whas500.x_data_frame)

        ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel=trans.pairwise_kernel,
                                     tol=7e-7, max_iter=100, random_state=0)
        ssvm.fit(whas500.x, whas500.y)

        assert not ssvm._pairwise
        assert whas500.x.shape[0] == ssvm.coef_.shape[0]

        c = ssvm.score(whas500.x, whas500.y)
        assert c >= 0.854
Exemplo n.º 26
0
    def test_prepare_error_fit_once(self):
        t = ClinicalKernelTransform(fit_once=False)

        self.assertRaisesRegex(
            ValueError,
            "prepare can only be used if fit_once parameter is set to True",
            t.prepare, self.data)
    def test_fit_and_predict_clinical_kernel(self):
        x_full, y = load_whas500()

        trans = ClinicalKernelTransform()
        trans.fit(x_full)
        x = self.x

        ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel=trans.pairwise_kernel,
                                     tol=7e-7, max_iter=100, random_state=0)
        ssvm.fit(x.values, y)

        self.assertFalse(ssvm._pairwise)
        self.assertEquals(x.shape[0], ssvm.coef_.shape[0])

        c = ssvm.score(x.values, y)
        self.assertGreaterEqual(c, 0.854)
Exemplo n.º 28
0
    def test_fit_error_ndim(self):
        t = ClinicalKernelTransform()

        self.assertRaisesRegex(ValueError, "expected 2d array, but got 1",
                               t.fit, numpy.random.randn(31))

        self.assertRaisesRegex(ValueError, "expected 2d array, but got 3",
                               t.fit, numpy.random.randn(31, 20, 2))
    def test_prepare_error_dtype(self):
        t = ClinicalKernelTransform(fit_once=True)
        data = pandas.DataFrame({"age": [12, 61, 18, 21, 57, 17],
                                 "date": numpy.array(
                                     ["2016-01-01", "1954-06-30", "1999-03-01", "2005-02-25", "2112-12-31",
                                      "1731-09-16"], dtype='datetime64')})

        self.assertRaisesRegex(TypeError, r'unsupported dtype: dtype\(.+\)',
                               t.prepare, data)
Exemplo n.º 30
0
    def test_fit_and_predict_clinical_kernel(self):
        x_full, y = load_whas500()

        trans = ClinicalKernelTransform()
        trans.fit(x_full)

        x = encode_categorical(standardize(x_full))

        ssvm = FastKernelSurvivalSVM(optimizer="rbtree",
                                     kernel=trans.pairwise_kernel,
                                     max_iter=100,
                                     random_state=0)
        ssvm.fit(x.values, y)

        self.assertFalse(ssvm._pairwise)
        self.assertEquals(x.shape[0], ssvm.coef_.shape[0])

        c = ssvm.score(x.values, y)
        self.assertLessEqual(abs(0.83699051218246412 - c), 1e-3)