def modelfit(train, labels, test, features, useTrainCV=True, cv_folds=5, early_stopping_rounds=50): param_test1 = { 'max_depth': range(3, 10, 2), 'min_child_weight': range(1, 6, 2) } model = GridSearchCV(estimator=XGBClassifier(learning_rate=0.1, n_estimators=140, max_depth=5, min_child_weight=1, gamma=0, subsample=0.8, colsample_bytree=0.8, objective='binary:logistic', nthread=4, scale_pos_weight=1, seed=27), param_grid=param_test1, scoring='roc_auc', n_jobs=4, iid=False, cv=5) test_percent = 0.2 X_train, X_test, y_train, y_test = train_test_split(train, labels, test_size=test_percent, random_state=23) xgb_param = model.get_xgb_params() #Fit the algorithm on the data model.fit(X_train, y_train) print(model.grid_scores_) print(model.best_params_) print(model.best_score_) ##training predictions proba = model.predict_proba(X_test) preds = proba[:, 1] score = roc_auc_score(y_test, preds) print("Area under ROC {0}".format(score)) #Print model report: # print "\nModel Report" # print "Accuracy : %.4g" % accuracy_score(y_train, preds) # print "AUC Score (Train): %f" % roc_auc_score(y_train, preds) feat_imp = pd.Series( model.booster().get_fscore()).sort_values(ascending=False) feat_imp.plot(kind='bar', title='Feature Importances') plt.ylabel('Feature Importance Score') # plt.show() ##test predictions test_proba = model.predict_proba(test) test_preds = test_proba[:, 1] return test_preds