def test_survival_squared_hinge_loss(self): nrsvm = NaiveSurvivalSVM(loss='squared_hinge', dual=False, tol=1e-8, max_iter=1000, random_state=0) nrsvm.fit(self.x, self.y) rsvm = FastSurvivalSVM(optimizer='avltree', tol=1e-8, max_iter=1000, random_state=0) rsvm.fit(self.x, self.y) assert_array_almost_equal(nrsvm.coef_.ravel(), rsvm.coef_, 3) pred_nrsvm = nrsvm.predict(self.x) pred_rsvm = rsvm.predict(self.x) self.assertEqual(len(pred_nrsvm), len(pred_rsvm)) c1 = concordance_index_censored(self.y['fstat'], self.y['lenfol'], pred_nrsvm) c2 = concordance_index_censored(self.y['fstat'], self.y['lenfol'], pred_rsvm) self.assertAlmostEqual(c1[0], c2[0]) self.assertTupleEqual(c1[1:], c2[1:])
def test_compare_rbf(self): x, y, _, _ = load_arff_file(WHAS500_FILE, ['fstat', 'lenfol'], '1') kpca = KernelPCA(kernel="rbf") xt = kpca.fit_transform(x) nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=1000, random_state=0) nrsvm.fit(xt, y) rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel="rbf", tol=1e-8, max_iter=1000, random_state=0) rsvm.fit(x, y) pred_nrsvm = nrsvm.predict(kpca.transform(x)) pred_rsvm = rsvm.predict(x) self.assertEqual(len(pred_nrsvm), len(pred_rsvm)) c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm) c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm) self.assertAlmostEqual(c1[0], c2[0]) self.assertTupleEqual(c1[1:], c2[1:])
def test_compare_clinical_kernel(self): x_full, y, _, _ = load_arff_file(WHAS500_FILE, ['fstat', 'lenfol'], '1', standardize_numeric=False, to_numeric=False) trans = ClinicalKernelTransform() trans.fit(x_full) x = encode_categorical(standardize(x_full)) kpca = KernelPCA(kernel=trans.pairwise_kernel) xt = kpca.fit_transform(x) nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=1000, random_state=0) nrsvm.fit(xt, y) rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel=trans.pairwise_kernel, tol=1e-8, max_iter=1000, random_state=0) rsvm.fit(x, y) pred_nrsvm = nrsvm.predict(kpca.transform(x)) pred_rsvm = rsvm.predict(x) self.assertEqual(len(pred_nrsvm), len(pred_rsvm)) c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm) c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm) self.assertAlmostEqual(c1[0], c2[0]) self.assertTupleEqual(c1[1:], c2[1:])
def train_test_model(data): train_index, test_index, params, fold = data # Training est = clone(estimator) est.set_params(**params) ret = params.copy() try: est.fit(x[train_index, :], y[train_index]) # Testing p = est.predict(x[test_index, :]) test_y = y[test_index] c = concordance_index_censored(test_y['event'], test_y['time'], p) ret['c-index'] = c[0] # for c-index, the sign of the predictions is flipped, flip it again for regression p_regression = -p[test_y['event']] # convert from log-scale back to original scale and compute RMSE ret['error'] = numpy.sqrt(mean_squared_error(numpy.exp(test_y['time'][test_y['event']]), numpy.exp(p_regression))) ret['n_events'] = numpy.sum(test_y['event']) except Exception as e: # log errors to IPython profile's log files Application.instance().log.exception(e) ret['c-index'] = float('nan') ret['error'] = float('nan') ret['n_events'] = float('nan') ret['fold'] = fold return ret
def _score_cindex(est, X_test, y_test, **kwargs): y_pred = est.predict(X_test) name_event, name_time = y_test.dtype.names result = concordance_index_censored(y_test[name_event], y_test[name_time], y_pred) return result[0]
def train_test_model(data): train_index, test_index, params, fold = data # Training est = clone(estimator) est.set_params(**params) ret = params.copy() try: est.fit(x[train_index, :], y[train_index]) # Testing p = est.predict(x[test_index, :]) test_y = y[test_index] c = concordance_index_censored(test_y['event'], test_y['time'], p) ret['c-index'] = c[0] # for c-index, the sign of the predictions is flipped, flip it again for regression p_regression = -p[test_y['event']] # convert from log-scale back to original scale and compute RMSE ret['error'] = numpy.sqrt( mean_squared_error(numpy.exp(test_y['time'][test_y['event']]), numpy.exp(p_regression))) ret['n_events'] = numpy.sum(test_y['event']) except Exception as e: # log errors to IPython profile's log files Application.instance().log.exception(e) ret['c-index'] = float('nan') ret['error'] = float('nan') ret['n_events'] = float('nan') ret['fold'] = fold return ret
def test_breast_cancer_cvxpy(self): m = MinlipSurvivalAnalysis(solver="cvxpy", alpha=1, pairs="next") m.fit(self.x.values, self.y) self.assertTupleEqual((1, self.x.shape[0]), m.coef_.shape) p = m.predict(self.x.values) v = concordance_index_censored(self.y['cens'], self.y['time'], p) expected = numpy.array([0.59576770470121443, 79280, 53792, 0, 32]) assert_array_almost_equal(expected, v)
def test_concordance_index_no_censoring_all_correct(self): time = [1, 5, 6, 11, 34, 45, 46, 50] event = numpy.repeat(True, len(time)) estimate = numpy.arange(len(time))[::-1] c, con, dis, tie_r, tie_t = concordance_index_censored(event, time, estimate) self.assertEqual(28, con) self.assertEqual(0, dis) self.assertEqual(0, tie_r) self.assertEqual(0, tie_t) self.assertEqual(1.0, c)
def test_concordance_index_no_censoring_all_correct(self): time = [1, 5, 6, 11, 34, 45, 46, 50] event = numpy.repeat(True, len(time)) estimate = numpy.arange(len(time))[::-1] c, con, dis, tie_r, tie_t = concordance_index_censored( event, time, estimate) self.assertEqual(28, con) self.assertEqual(0, dis) self.assertEqual(0, tie_r) self.assertEqual(0, tie_t) self.assertEqual(1.0, c)
def test_breast_cancer_rbf_cvxopt(self): m = MinlipSurvivalAnalysis(solver="cvxopt", alpha=1, kernel="rbf", pairs="next") m.fit(self.x.values, self.y) self.assertTupleEqual((1, self.x.shape[0]), m.coef_.shape) p = m.predict(self.x.values) v = concordance_index_censored(self.y['cens'], self.y['time'], p) expected = numpy.array([0.63261242034387399, 84182, 48888, 2, 32]) assert_array_almost_equal(expected, v)
def test_toy_minlip_predict_1_cvxpy(self): m = MinlipSurvivalAnalysis(solver="cvxpy", alpha=1, pairs="next") m.fit(self.x, self.y) p = m.predict(self.x) v = concordance_index_censored(self.y['status'], self.y['time'], p) self.assertEqual(1.0, v[0]) self.assertEqual(11, v[1]) self.assertEqual(0, v[2]) self.assertEqual(0, v[3]) self.assertEqual(0, v[4])
def test_concordance_index_with_tied_event_and_time(self): event = [True, False, False, False, True, False, True, True, False, False, False, True] time = [34, 11, 11, 5, 1, 89, 13, 45, 7, 13, 9, 13] estimate = [1, 19, 13, 13, 15, 14, 19, 23, 11, 10, 11, 1] c, con, dis, tie_r, tie_t = concordance_index_censored(event, time, estimate) self.assertEqual(12, con) self.assertEqual(9, dis) self.assertEqual(1, tie_r) self.assertEqual(1, tie_t) self.assertAlmostEqual(0.5681818, c, 6)
def test_concordance_index_with_tied_event(self): event = [False, True, False, True, True, False, True, False, False] time = [1, 5, 6, 11, 11, 34, 45, 45, 50] estimate = [5, 8, 11, 19, 34, 12, 3, 9, 12] c, con, dis, tie_r, tie_t = concordance_index_censored(event[::-1], time[::-1], estimate[::-1]) self.assertEqual(9, con) self.assertEqual(8, dis) self.assertEqual(0, tie_r) self.assertEqual(1, tie_t) self.assertAlmostEqual(0.5294118, c, 6)
def test_concordance_index_with_tied_time2(self): event = [False, True, True, False, False, False, True, False, False] time = [1, 5, 6, 11, 11, 34, 45, 45, 50] estimate = [5, 8, 11, 19, 34, 12, 3, 9, 12] c, con, dis, tie_r, tie_t = concordance_index_censored(event, time, estimate) self.assertEqual(3, con) self.assertEqual(12, dis) self.assertEqual(0, tie_r) self.assertEqual(0, tie_t) self.assertAlmostEqual(0.2, c, 6)
def test_concordance_index_no_censoring_all_wrong(self): time = [1, 5, 6, 11, 34, 45, 46, 50] event = numpy.repeat(True, len(time)) # order is exactly reversed estimate = numpy.arange(len(time)) c, con, dis, tie_r, tie_t = concordance_index_censored(event, time, estimate) self.assertEqual(0, con) self.assertEqual(28, dis) self.assertEqual(0, tie_r) self.assertEqual(0, tie_t) self.assertEqual(0.0, c)
def test_concordance_index(self): dat = numpy.loadtxt(WHAS500_DATA_FILE, delimiter=",") event = dat[:, 0] == 1 time = dat[:, 1] risk = dat[:, 2] c, con, dis, tie_r, tie_t = concordance_index_censored(event, time, risk) self.assertEqual(57849, con) self.assertEqual(17300, dis) self.assertEqual(0, tie_r) self.assertEqual(119, tie_t) self.assertAlmostEqual(0.7697907, c, 6)
def test_concordance_index_no_censoring_all_wrong(self): time = [1, 5, 6, 11, 34, 45, 46, 50] event = numpy.repeat(True, len(time)) # order is exactly reversed estimate = numpy.arange(len(time)) c, con, dis, tie_r, tie_t = concordance_index_censored( event, time, estimate) self.assertEqual(0, con) self.assertEqual(28, dis) self.assertEqual(0, tie_r) self.assertEqual(0, tie_t) self.assertEqual(0.0, c)
def test_concordance_index(self): dat = numpy.loadtxt(WHAS500_DATA_FILE, delimiter=",") event = dat[:, 0] == 1 time = dat[:, 1] risk = dat[:, 2] c, con, dis, tie_r, tie_t = concordance_index_censored( event, time, risk) self.assertEqual(57849, con) self.assertEqual(17300, dis) self.assertEqual(0, tie_r) self.assertEqual(119, tie_t) self.assertAlmostEqual(0.7697907, c, 6)
def test_concordance_index_with_tied_event(self): event = [False, True, False, True, True, False, True, False, False] time = [1, 5, 6, 11, 11, 34, 45, 45, 50] estimate = [5, 8, 11, 19, 34, 12, 3, 9, 12] c, con, dis, tie_r, tie_t = concordance_index_censored( event[::-1], time[::-1], estimate[::-1]) self.assertEqual(9, con) self.assertEqual(8, dis) self.assertEqual(0, tie_r) self.assertEqual(1, tie_t) self.assertAlmostEqual(0.5294118, c, 6)
def test_concordance_index_with_tied_time2(self): event = [False, True, True, False, False, False, True, False, False] time = [1, 5, 6, 11, 11, 34, 45, 45, 50] estimate = [5, 8, 11, 19, 34, 12, 3, 9, 12] c, con, dis, tie_r, tie_t = concordance_index_censored( event, time, estimate) self.assertEqual(3, con) self.assertEqual(12, dis) self.assertEqual(0, tie_r) self.assertEqual(0, tie_t) self.assertAlmostEqual(0.2, c, 6)
def test_breast_cancer_rbf_cvxpy(self): m = MinlipSurvivalAnalysis(solver="cvxpy", alpha=1, kernel="rbf", pairs="next") m.fit(self.x.values, self.y) self.assertTupleEqual((1, self.x.shape[0]), m.coef_.shape) p = m.predict(self.x.values) v = concordance_index_censored(self.y['cens'], self.y['time'], p) self.assertAlmostEqual(0.6286334, v[0], 3) self.assertEqual(83653, v[1]) self.assertEqual(49418, v[2]) self.assertEqual(1, v[3]) self.assertEqual(32, v[4])
def test_toy_hinge_nearest_fit(self): m = HingeLossSurvivalSVM(alpha=1, pairs="nearest") m.fit(self.x, self.y) self.assertTupleEqual((1, self.x.shape[0]), m.coef_.shape) p = m.predict(self.x) v = concordance_index_censored(self.y['status'], self.y['time'], p) self.assertEqual(1.0, v[0]) self.assertEqual(11, v[1]) self.assertEqual(0, v[2]) self.assertEqual(0, v[3]) self.assertEqual(0, v[4])
def test_concordance_index_with_tied_event_and_time(self): event = [ True, False, False, False, True, False, True, True, False, False, False, True ] time = [34, 11, 11, 5, 1, 89, 13, 45, 7, 13, 9, 13] estimate = [1, 19, 13, 13, 15, 14, 19, 23, 11, 10, 11, 1] c, con, dis, tie_r, tie_t = concordance_index_censored( event, time, estimate) self.assertEqual(12, con) self.assertEqual(9, dis) self.assertEqual(1, tie_r) self.assertEqual(1, tie_t) self.assertAlmostEqual(0.5681818, c, 6)
def test_kernel_precomputed(self): from sklearn.metrics.pairwise import pairwise_kernels from sklearn.cross_validation import _safe_split m = MinlipSurvivalAnalysis(kernel="precomputed", solver="cvxpy") K = pairwise_kernels(self.x, metric="rbf") train_idx = numpy.arange(50, self.x.shape[0]) test_idx = numpy.arange(50) X_fit, y_fit = _safe_split(m, K, self.y, train_idx) X_test, y_test = _safe_split(m, K, self.y, test_idx, train_idx) m.fit(X_fit, y_fit) p = m.predict(X_test) v = concordance_index_censored(y_test['cens'], y_test['time'], p) expected = numpy.array([0.508748, 378, 365, 0, 0]) assert_array_almost_equal(expected, v)
def score_concordance_index(estimator, X, y, **predict_params): p = estimator.predict(X, **predict_params) result = concordance_index_censored(y['event'], y['time'], p) return result[0]