def cross_validation(scaled_X,y,classifier,K_num=2):
    '''
    Doing cross-validation to get the split of training and testing data with the best performance.
    The spliting range from 2 to 9
    '''
    cross_scores = []
    if classifier =='knn':
        knn = KNeighborsClassifier(n_neighbors=K_num)
        for i in range(2,10):
            scores = cross_val_score(knn,scaled_X,y,cv =i )
            cross_scores.append(scores.mean())      #Record the scores of cross validation
        plt.plot(range(2,10),cross_scores)
        plt.show()
    elif classifier =='adacost':
        Adacost=AdaCostClassifier(n_estimators=100)
        for i in range(2,10):
            scores = cross_val_score(Adacost,scaled_X,y,cv =i )
            cross_scores.append(scores.mean())
        plt.plot(range(2,10),cross_scores)
        plt.show()
    elif classifier =='svm':
        svm=LinearSVC()
        for i in range(2,10):
            scores = cross_val_score(svm,scaled_X,y,cv =i )
            cross_scores.append(scores.mean())
        # plt.plot(range(2,10),cross_scores)
        # plt.show()
    return cross_scores.index(max(cross_scores))
示例#2
0
def one_out(scaled_X, y, As_Gunshot, y_A):
    '''
    Leave one Africa data for testing and rest of data for training
    '''
    score = []
    for i in range(len(As_Gunshot)):
        test_x = As_Gunshot[i][np.newaxis, :]
        test_y = y_A[i]  #The candidates to leave out
        X_train = np.delete(As_Gunshot, i, 0)
        Y_train = np.delete(y_A, i)
        X = np.vstack((scaled_X, X_train)).astype(
            np.float64)  #Combine the rest Africa data with other data
        Y = np.hstack((y, Y_train)).astype(np.float64)
        #split = cross_validation(X,Y,classifier = 'svm')
        X_train, X_test, y_train, y_test = data_split(X, Y)
        Adacost = AdaCostClassifier(n_estimators=100)
        Adacost.fit(X_train[:-10], y_train[:-10])
        test_x = np.vstack((test_x, X_train[-10:]))
        test_y = np.hstack(
            (test_y, y_train[-10:])
        )  #Test on the chosen Africa data and 10 other data without trained
        print('Test Prediction of Adacost = ', Adacost.predict(test_x))
        for i in range(len(test_x)):
            if Adacost.predict(test_x[i][np.newaxis, :]) == test_y[i]:
                score.append(1)
            else:
                score.append(0)
    print('The out put of one_out is',
          round(sum(score) / len(As_Gunshot) / 11, 4))  #Get the overall score
def Adacost(X_train,X_test,y_train,y_test):
    Adacost = AdaCostClassifier(n_estimators=100)
    t=time.time()
    Adacost.fit(X_train,y_train)
    t2 = time.time()
    print(round(t2-t, 2), 'Seconds to train Adacost...')
    # Check the score of the Adacost
    print('Test Accuracy of Adacost = ', round(Adacost.score(X_test, y_test), 4))
    t0 = timer()
    y_pred = Adacost.predict(X_test)
    print("done in %0.3fs" % (timer() - t0))
    target_names = ['not_gunshot','gunshot']
    print(classification_report(y_test, y_pred, labels = [-1,1],target_names=target_names))
    print(confusion_matrix(y_test, y_pred, labels=[-1,1]))
    acu_curve(y_pred,y_test)
示例#4
0
    df.loc[df.Class == 0, 'Class'] = 1
    print(df.shape)
    print(df.Class.value_counts())
    return df.drop('Class', axis=1), df['Class']



if __name__ == '__main__':
     # X_train, y_train = load_creditcard_data()#输出预测数据时用
     X, y= load_creditcard_data()
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) #分割数据集,将部分训练集用作测试集

     # X_test= pd.read_csv('creditcard_test.csv' #输出预测数据时用
     # ZZ=load_creditcardtest_data()

     clf = AdaCostClassifier(n_estimators=100)
     clf.fit(X_train, y_train)
     y_pred = clf.predict(X_test)
     print(pd.Series(y_pred).value_counts())
     print('recall=',recall_score(y_test, y_pred, pos_label=-1),'  '
           'precision=',precision_score(y_test, y_pred, pos_label=-1),'  '
           'f1_score=',f1_score(y_test, y_pred, pos_label=-1), )

     # answer = pd.read_csv(open('sample_Submission.csv')) # 输出预测csv
     # for i in range(y_pred.shape[0]):
     #     predict = y_pred[i]
     #     if predict==1:
     #         answer.loc[i, "class"] = "1"
     #     else:
     #         answer.loc[i, "class"] = "0"
     # answer.to_csv('submission1.csv', index=False)