Пример #1
0
def knnTuning():
    train = getTrainingData('train.csv', visualize=False)
    X = train.drop(['Exited'], axis=1)
    sc = StandardScaler()
    X = sc.fit_transform(X)
    y = train.Exited
    # split training data half half
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=0)
    params = {
        "n_neighbors": list(range(5, 131, 2)),
        "weights": ['uniform', 'distance']
    }
    model = neighbors.KNeighborsClassifier()
    grid_search_cv = GridSearchCV(model,
                                  params,
                                  verbose=1,
                                  n_jobs=-1,
                                  cv=3,
                                  scoring='accuracy')
    # print(grid_search_cv.best_params_)
    grid_search_cv.fit(X_train, y_train)
    print_score(grid_search_cv, X_train, y_train, X_test, y_test, train=True)
    print_score(grid_search_cv, X_train, y_train, X_test, y_test, train=False)
    ROC(grid_search_cv, X_train, y_train, X_test, y_test, train=True)
    ROC(grid_search_cv, X_train, y_train, X_test, y_test, train=False)
    results = pd.DataFrame(grid_search_cv.cv_results_)
    printFullRow(results[results['rank_test_score'] == 1])
    # best param setting: n_neighbors == 11/13, p ==2, weights = distance
    return
Пример #2
0
def useAdaboost():
    train = getTrainingData('train.csv', visualize=False)
    X = train.drop(['Exited'], axis=1)
    sc = StandardScaler()
    X = sc.fit_transform(X)
    y = train.Exited
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
    adaboost(X_train, X_test, y_train, y_test)
Пример #3
0
def useGnnb():
    train = getTrainingData('train.csv', visualize=False)
    X = train.drop(['Exited'], axis=1)
    y = train.Exited
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=0)
    X_train, X_test = standard(X_train, X_test)
    gnnb(X_train, X_test, y_train, y_test)