cost_mat_train, cost_mat_test = cost_mat[:ratio], cost_mat[ratio:] y_train, y_test, = np.argmax(y_train, axis=1), np.argmax(y_test, axis=1) print y_train.shape, y_test.shape #random forest rfc = RandomForestClassifier(random_state=0).fit(x_train, y_train) y_pred_test_rf = rfc.predict(x_test) print evaluate(y_pred_test_rf, y_test, cost_mat_test) #logistic regression lr = LogisticRegression(random_state=0).fit(x_train, y_train) y_pred_test_lr = lr.predict(x_test) print evaluate(y_pred_test_lr, y_test, cost_mat_test) #cost-sensitive decision trees CSDT = CostSensitiveDecisionTreeClassifier().fit(x_train, y_train, cost_mat_train) y_pred_test_csdt = CSDT.predict(x_test) print evaluate(y_pred_test_csdt, y_test, cost_mat_test) #cost-sensitive lr CSLR = CostSensitiveLogisticRegression() CSLR.fit(x_train, y_train, cost_mat_train) y_pred_test_cslr = CSLR.predict(x_test) print evaluate(y_pred_test_cslr, y_test, cost_mat_test)
y_prob_test = RandomForestClassifier(random_state=0).fit( X_train, y_train).predict_proba(X_test) f_bmr = BayesMinimumRiskClassifier(calibration=True) f_bmr.fit(y_test, y_prob_test) y_pred_test_bmr = f_bmr.predict(y_prob_test, cost_mat_test) fpr, tpr, threshold = metrics.roc_curve(y_test, y_pred_test_bmr) print( 'The auc_score of using RandomForest and BayesMinimumRiskClassifieris{:.2f}' .format(metrics.auc(fpr, tpr))) print('*' * 90) f = CostSensitiveLogisticRegression(solver='ga') f.fit(X_train, y_train, cost_mat_train) y_pred_test_cslr = f.predict(X_test) fpr, tpr, threshold = metrics.roc_curve(y_test, y_pred_test_lr) print('The auc_score of CostSensitiveLogisticRegression is {:.2f}'.format( metrics.auc(fpr, tpr))) print('*' * 90) f = CostSensitiveDecisionTreeClassifier() f.fit(X_train, y_train, cost_mat_train) y_pred_test_csdt = f.fit(X_train, y_train, cost_mat_train).predict(X_test) fpr, tpr, threshold = metrics.roc_curve(y_test, y_pred_test_csdt) print('The auc_score of using CostSensitiveDecisionTreeClassifier is {:.2f}'. format(metrics.auc(fpr, tpr))) print('*' * 90)