def one_out(scaled_X, y, As_Gunshot, y_A): ''' Leave one Africa data for testing and rest of data for training ''' score = [] for i in range(len(As_Gunshot)): test_x = As_Gunshot[i][np.newaxis, :] test_y = y_A[i] #The candidates to leave out X_train = np.delete(As_Gunshot, i, 0) Y_train = np.delete(y_A, i) X = np.vstack((scaled_X, X_train)).astype( np.float64) #Combine the rest Africa data with other data Y = np.hstack((y, Y_train)).astype(np.float64) #split = cross_validation(X,Y,classifier = 'svm') X_train, X_test, y_train, y_test = data_split(X, Y) Adacost = AdaCostClassifier(n_estimators=100) Adacost.fit(X_train[:-10], y_train[:-10]) test_x = np.vstack((test_x, X_train[-10:])) test_y = np.hstack( (test_y, y_train[-10:]) ) #Test on the chosen Africa data and 10 other data without trained print('Test Prediction of Adacost = ', Adacost.predict(test_x)) for i in range(len(test_x)): if Adacost.predict(test_x[i][np.newaxis, :]) == test_y[i]: score.append(1) else: score.append(0) print('The out put of one_out is', round(sum(score) / len(As_Gunshot) / 11, 4)) #Get the overall score
def Adacost(X_train,X_test,y_train,y_test): Adacost = AdaCostClassifier(n_estimators=100) t=time.time() Adacost.fit(X_train,y_train) t2 = time.time() print(round(t2-t, 2), 'Seconds to train Adacost...') # Check the score of the Adacost print('Test Accuracy of Adacost = ', round(Adacost.score(X_test, y_test), 4)) t0 = timer() y_pred = Adacost.predict(X_test) print("done in %0.3fs" % (timer() - t0)) target_names = ['not_gunshot','gunshot'] print(classification_report(y_test, y_pred, labels = [-1,1],target_names=target_names)) print(confusion_matrix(y_test, y_pred, labels=[-1,1])) acu_curve(y_pred,y_test)
print(df.shape) print(df.Class.value_counts()) return df.drop('Class', axis=1), df['Class'] if __name__ == '__main__': # X_train, y_train = load_creditcard_data()#输出预测数据时用 X, y= load_creditcard_data() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) #分割数据集,将部分训练集用作测试集 # X_test= pd.read_csv('creditcard_test.csv' #输出预测数据时用 # ZZ=load_creditcardtest_data() clf = AdaCostClassifier(n_estimators=100) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print(pd.Series(y_pred).value_counts()) print('recall=',recall_score(y_test, y_pred, pos_label=-1),' ' 'precision=',precision_score(y_test, y_pred, pos_label=-1),' ' 'f1_score=',f1_score(y_test, y_pred, pos_label=-1), ) # answer = pd.read_csv(open('sample_Submission.csv')) # 输出预测csv # for i in range(y_pred.shape[0]): # predict = y_pred[i] # if predict==1: # answer.loc[i, "class"] = "1" # else: # answer.loc[i, "class"] = "0" # answer.to_csv('submission1.csv', index=False)