def test_compare_clinical_kernel(self): x_full, y, _, _ = load_arff_file(WHAS500_FILE, ['fstat', 'lenfol'], '1', standardize_numeric=False, to_numeric=False) trans = ClinicalKernelTransform() trans.fit(x_full) x = encode_categorical(standardize(x_full)) kpca = KernelPCA(kernel=trans.pairwise_kernel) xt = kpca.fit_transform(x) nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=1000, random_state=0) nrsvm.fit(xt, y) rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel=trans.pairwise_kernel, tol=1e-8, max_iter=1000, random_state=0) rsvm.fit(x, y) pred_nrsvm = nrsvm.predict(kpca.transform(x)) pred_rsvm = rsvm.predict(x) self.assertEqual(len(pred_nrsvm), len(pred_rsvm)) c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm) c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm) self.assertAlmostEqual(c1[0], c2[0]) self.assertTupleEqual(c1[1:], c2[1:])
def test_compare_rbf(self): x, y, _, _ = load_arff_file(WHAS500_FILE, ['fstat', 'lenfol'], '1') kpca = KernelPCA(kernel="rbf") xt = kpca.fit_transform(x) nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=1000, random_state=0) nrsvm.fit(xt, y) rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel="rbf", tol=1e-8, max_iter=1000, random_state=0) rsvm.fit(x, y) pred_nrsvm = nrsvm.predict(kpca.transform(x)) pred_rsvm = rsvm.predict(x) self.assertEqual(len(pred_nrsvm), len(pred_rsvm)) c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm) c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm) self.assertAlmostEqual(c1[0], c2[0]) self.assertTupleEqual(c1[1:], c2[1:])
def test_predict_precomputed_kernel_invalid_shape(self): ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel='precomputed', random_state=0) x = numpy.dot(self.x.values, self.x.values.T) ssvm.fit(x, self.y) x_new = numpy.empty((100, 14)) self.assertRaisesRegex(ValueError, r"Precomputed metric requires shape \(n_queries, n_indexed\)\. " r"Got \(100, 14\) for 500 indexed\.", ssvm.predict, x_new)
def test_fit_and_predict_rbf_rbtree(self): ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel='rbf', random_state=0) ssvm.fit(self.x.values, self.y) self.assertFalse(ssvm._pairwise) self.assertEquals(self.x.shape[0], ssvm.coef_.shape[0]) c = ssvm.score(self.x.values, self.y) self.assertAlmostEqual(0.92230102862313534, c, 3)
def test_fit_and_predict_rbf_avltree(self): ssvm = FastKernelSurvivalSVM(optimizer="avltree", kernel='rbf', random_state=0) ssvm.fit(self.x.values, self.y) self.assertFalse(ssvm._pairwise) self.assertEquals(self.x.shape[0], ssvm.coef_.shape[0]) c = ssvm.score(self.x.values, self.y) self.assertLessEqual(abs(0.92460312179802795 - c), 1e-3)
def test_fit_and_predict_linear_regression_no_intercept(self): ssvm = FastKernelSurvivalSVM(optimizer="rbtree", rank_ratio=0.0, kernel="linear", max_iter=50, fit_intercept=False, random_state=0) ssvm.fit(self.x.values, self.y) self.assertFalse(hasattr(ssvm, "intercept_")) pred = ssvm.predict(self.x.values) rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'], pred)) self.assertAlmostEqual(15837.658418546907, rmse, 4)
def test_fit_and_predict_linear(self): ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel='linear', random_state=0) ssvm.fit(self.x.values, self.y) self.assertFalse(ssvm._pairwise) self.assertEquals(self.x.shape[0], ssvm.coef_.shape[0]) i = numpy.arange(250) numpy.random.RandomState(0).shuffle(i) c = ssvm.score(self.x.values[i], self.y[i]) self.assertAlmostEqual(0.76923445664157997, c, 6)
def test_fit_and_predict_hybrid_rbf(self): ssvm = FastKernelSurvivalSVM(optimizer="rbtree", rank_ratio=0.5, kernel="rbf", max_iter=50, fit_intercept=True, random_state=0) ssvm.fit(self.x.values, self.y) self.assertFalse(ssvm._pairwise) self.assertLessEqual(abs(5.0289145697617164 - ssvm.intercept_), 0.04) pred = ssvm.predict(self.x.values) rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'], pred)) self.assertLessEqual(abs(880.20361811281487 - rmse), 75)
def test_fit_and_predict_regression_rbf(self): ssvm = FastKernelSurvivalSVM(optimizer="rbtree", rank_ratio=0.0, kernel="rbf", max_iter=50, fit_intercept=True, random_state=0) ssvm.fit(self.x.values, self.y) self.assertFalse(ssvm._pairwise) self.assertAlmostEqual(4.9267218894089533, ssvm.intercept_) pred = ssvm.predict(self.x.values) rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'], pred)) self.assertAlmostEqual(783.525277, rmse, 6)
def test_predict_precomputed_kernel_invalid_shape(self): ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel='precomputed', random_state=0) x = numpy.dot(self.x.values, self.x.values.T) ssvm.fit(x, self.y) x_new = numpy.empty((100, 14)) self.assertRaisesRegex( ValueError, r"Precomputed metric requires shape \(n_queries, n_indexed\)\. " r"Got \(100, 14\) for 500 indexed\.", ssvm.predict, x_new)
def test_fit_and_predict_linear_regression_precomputed(self): ssvm = FastKernelSurvivalSVM(optimizer="rbtree", rank_ratio=0.0, kernel="precomputed", max_iter=50, fit_intercept=True, random_state=0) x = numpy.dot(self.x.values, self.x.values.T) ssvm.fit(x, self.y) self.assertTrue(ssvm._pairwise) self.assertAlmostEqual(6.3979746625712295, ssvm.intercept_, 5) i = numpy.arange(250) numpy.random.RandomState(0).shuffle(i) pred = ssvm.predict(x[i]) rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'][i], pred)) self.assertLessEqual(abs(1339.3006854574726 - rmse), 0.25)
def test_fit_and_predict_clinical_kernel(self): x_full, y, _, _ = load_arff_file(WHAS500_FILE, ['fstat', 'lenfol'], '1', standardize_numeric=False, to_numeric=False) trans = ClinicalKernelTransform() trans.fit(x_full) x = encode_categorical(standardize(x_full)) ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel=trans.pairwise_kernel, max_iter=100, random_state=0) ssvm.fit(x.values, y) self.assertFalse(ssvm._pairwise) self.assertEquals(x.shape[0], ssvm.coef_.shape[0]) c = ssvm.score(x.values, y) self.assertLessEqual(abs(0.83699051218246412 - c), 1e-3)
def test_unknown_optimizer(self): x = numpy.zeros((100, 10)) y = numpy.empty(dtype=[('event', bool), ('time', float)], shape=100) y['event'] = numpy.ones(100, dtype=bool) y['time'] = numpy.arange(1, 101, dtype=float) ssvm = FastKernelSurvivalSVM(optimizer='random stuff') self.assertRaisesRegex(ValueError, "unknown optimizer: random stuff", ssvm.fit, x, y)
def test_fit_precomputed_kernel_invalid_shape(self): ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel='precomputed', random_state=0) x = numpy.empty((100, 14)) y = numpy.fromiter(zip(numpy.ones(100), numpy.ones(100)), dtype=[('event', bool), ('time', float)]) self.assertRaisesRegex( ValueError, r"Precomputed metric requires shape \(n_queries, n_indexed\)\. " r"Got \(100, 14\) for 100 indexed\.", ssvm.fit, x, y)
def test_fit_precomputed_kernel_not_symmetric(self): ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel='precomputed', random_state=0) x = numpy.random.randn(100, 100) x[10, 12] = -1 x[12, 10] = 9 y = numpy.fromiter(zip(numpy.ones(100), numpy.ones(100)), dtype=[('event', bool), ('time', float)]) self.assertRaisesRegex(ValueError, "kernel matrix is not symmetric", ssvm.fit, x, y)
def test_default_optimizer(self): self.assertEqual( 'rbtree', FastKernelSurvivalSVM().fit(self.x.values, self.y).optimizer)