def refit_and_predict(cut_points_estimates, X_train, X_test, Y_train, delta_train, Y_test, delta_test): binarizer = FeaturesBinarizer(method='given', bins_boundaries=cut_points_estimates, remove_first=True) binarizer.fit(pd.concat([X_train, X_test])) X_bin_train = binarizer.transform(X_train) X_bin_test = binarizer.transform(X_test) learner = CoxRegression(penalty='none', tol=1e-5, solver='agd', verbose=False, max_iter=100, step=0.3, warm_start=True) learner._solver_obj.linesearch = False learner.fit(X_bin_train, Y_train, delta_train) coeffs = learner.coeffs marker = X_bin_test.dot(coeffs) lp_train = X_bin_train.dot(coeffs) c_index = concordance_index(Y_test, marker, delta_test) c_index = max(c_index, 1 - c_index) return c_index, marker, lp_train
def test_binarizer_fit(self): """...Test binarizer fit """ n_cuts = 3 enc = OneHotEncoder(sparse=True) expected_binarization = enc.fit_transform( self.default_expected_intervals) binarizer = FeaturesBinarizer(method='quantile', n_cuts=n_cuts, detect_column_type="auto", remove_first=False) # for pandas dataframe binarizer.fit(self.df_features) binarized_df = binarizer.transform(self.df_features) self.assertEqual(binarized_df.__class__, csr.csr_matrix) np.testing.assert_array_equal(expected_binarization.toarray(), binarized_df.toarray()) # for numpy array binarizer.fit(self.features) binarized_array = binarizer.transform(self.features) self.assertEqual(binarized_array.__class__, csr.csr_matrix) np.testing.assert_array_equal(expected_binarization.toarray(), binarized_array.toarray()) # test fit_transform binarized_array = binarizer.fit_transform(self.features) self.assertEqual(binarized_array.__class__, csr.csr_matrix) np.testing.assert_array_equal(expected_binarization.toarray(), binarized_array.toarray())
def test_binarizer_remove_first(self): """...Test binarizer fit when remove_first=True """ n_cuts = 3 one_hot_encoder = OneHotEncoder(sparse=True) expected_binarization = one_hot_encoder.fit_transform( self.default_expected_intervals) binarizer = FeaturesBinarizer(method='quantile', n_cuts=n_cuts, detect_column_type="auto", remove_first=True) binarizer.fit(self.features) binarized_array = binarizer.transform(self.features) self.assertEqual(binarized_array.__class__, csr.csr_matrix) expected_binarization_without_first = \ np.delete(expected_binarization.toarray(), [0, 4, 8, 10], 1) np.testing.assert_array_equal(expected_binarization_without_first, binarized_array.toarray()) return
predictions = estim_proba(marker_cox, lp_train, Y_train, delta_train) ibs_cox = integrated_brier_score(predictions['values'], predictions['times'], Y_test, delta_test, Y_train, delta_train) # Binacox print("Train Binacox screening_cox_topP...") X_train_ = X_train[screening_cox_topP] X_test_ = X_test[screening_cox_topP] # binarize feature n_cuts = 50 binarizer = FeaturesBinarizer(n_cuts=n_cuts) binarizer.fit(pd.concat([X_train_, X_test_])) X_bin_train = binarizer.transform(X_train_) blocks_start = binarizer.blocks_start blocks_length = binarizer.blocks_length boundaries = binarizer.boundaries # fit binacox learner = CoxRegression(penalty='binarsity', tol=1e-5, solver='agd', verbose=False, max_iter=100, step=0.3, blocks_start=blocks_start, blocks_length=blocks_length, C=C_chosen[cancer],