Пример #1
0
def modelfit(train,
             labels,
             test,
             features,
             useTrainCV=True,
             cv_folds=5,
             early_stopping_rounds=50):
    param_test1 = {
        'max_depth': range(3, 10, 2),
        'min_child_weight': range(1, 6, 2)
    }
    model = GridSearchCV(estimator=XGBClassifier(learning_rate=0.1,
                                                 n_estimators=140,
                                                 max_depth=5,
                                                 min_child_weight=1,
                                                 gamma=0,
                                                 subsample=0.8,
                                                 colsample_bytree=0.8,
                                                 objective='binary:logistic',
                                                 nthread=4,
                                                 scale_pos_weight=1,
                                                 seed=27),
                         param_grid=param_test1,
                         scoring='roc_auc',
                         n_jobs=4,
                         iid=False,
                         cv=5)

    test_percent = 0.2
    X_train, X_test, y_train, y_test = train_test_split(train,
                                                        labels,
                                                        test_size=test_percent,
                                                        random_state=23)

    xgb_param = model.get_xgb_params()

    #Fit the algorithm on the data
    model.fit(X_train, y_train)
    print(model.grid_scores_)
    print(model.best_params_)
    print(model.best_score_)
    ##training predictions
    proba = model.predict_proba(X_test)
    preds = proba[:, 1]
    score = roc_auc_score(y_test, preds)
    print("Area under ROC {0}".format(score))

    #Print model report:
    #	print "\nModel Report"
    #	print "Accuracy : %.4g" % accuracy_score(y_train, preds)
    #	print "AUC Score (Train): %f" % roc_auc_score(y_train, preds)

    feat_imp = pd.Series(
        model.booster().get_fscore()).sort_values(ascending=False)
    feat_imp.plot(kind='bar', title='Feature Importances')
    plt.ylabel('Feature Importance Score')
    #	plt.show()

    ##test predictions
    test_proba = model.predict_proba(test)
    test_preds = test_proba[:, 1]

    return test_preds