def test_feature_mismatch(make_data):
        data, _ = make_data()
        x = data.iloc[:, :2]
        y = data.iloc[:, 2:]
        with pytest.raises(ValueError, match='columns do not match'):
            clinical_kernel(x, y)

        y = numpy.zeros((10, 17))
        with pytest.raises(ValueError, match='x and y have different number of features'):
            clinical_kernel(x, y)
Пример #2
0
 def test_clinical_kernel_only_nominal(self):
     mat = clinical_kernel(
         self.data.drop(['age', 'lymph node size', 'lymph node spread'],
                        axis=1))
     expected = _get_expected_matrix(with_continuous=False,
                                     with_ordinal=False)
     assert_array_almost_equal(expected, mat, 4)
Пример #3
0
 def test_clinical_kernel_no_continuous(self):
     mat = clinical_kernel(self.data.drop('age', axis=1))
     expected = _get_expected_matrix(with_continuous=False)
     assert_array_almost_equal(expected, mat, 4)
Пример #4
0
 def test_clinical_kernel_no_nominal(self):
     mat = clinical_kernel(self.data.drop('metastasis', axis=1))
     expected = _get_expected_matrix(with_nominal=False)
     assert_array_almost_equal(expected, mat, 4)
Пример #5
0
    def test_clinical_kernel_1(self):
        mat = clinical_kernel(self.data)
        expected = _get_expected_matrix()

        assert_array_almost_equal(expected, mat, 4)
Пример #6
0
    def test_clinical_kernel_x_and_y(self):
        mat = clinical_kernel(self.data.iloc[:3, :], self.data.iloc[3:, :])
        m = _get_expected_matrix()
        expected = m[:3:, 3:]

        assert_array_almost_equal(expected, mat, 4)
 def test_clinical_kernel_no_ordinal(make_data):
     data, expected = make_data(with_ordinal=False)
     mat = clinical_kernel(data)
     assert_array_almost_equal(expected, mat, 4)
    def test_clinical_kernel_1(make_data):
        data, expected = make_data()
        mat = clinical_kernel(data)

        assert_array_almost_equal(expected, mat, 4)
    def test_clinical_kernel_x_and_y(make_data):
        data, m = make_data()
        mat = clinical_kernel(data.iloc[:3, :], data.iloc[3:, :])
        expected = m[:3:, 3:]

        assert_array_almost_equal(expected, mat, 4)
 def test_clinical_kernel_no_continuous(make_data):
     data, expected = make_data(with_continuous=False)
     mat = clinical_kernel(data)
     assert_array_almost_equal(expected, mat, 4)
## Correct the follow uo days less than 0 to 0
for idx, item in enumerate(data_y['time_to_event']):
    if item < 0:
        data_y['time_to_event'][idx] = 0
# data_y
# df.groupby('status').count()

# Part 2: FastKernelSurvivalSVM

from sklearn.model_selection import ShuffleSplit, GridSearchCV
from sksurv.metrics import concordance_index_censored
from sksurv.svm import FastKernelSurvivalSVM
from sksurv.kernels import clinical_kernel

kernel_matrix = clinical_kernel(data_x)
kssvm = FastKernelSurvivalSVM(optimizer="rbtree",
                              kernel="precomputed",
                              random_state=0)


## define a function for evaluating the performance of models during grid search using Harrell's concordance index
def score_survival_model(model, X, y):
    prediction = model.predict(X)
    result = concordance_index_censored(y['status'], y['time_to_event'],
                                        prediction)
    return result[0]


param_grid = {'alpha': [0.001, 0.01, 0.1, 0.5, 1, 10, 100, 1000]}
cv = ShuffleSplit(n_splits=200, test_size=0.3, random_state=0)