max_f1 = 0. # initializing with 0 best_params = None for max_depth, min_child_weight in gridsearch_params: print("CV with max_depth={}, min_child_weight={}".format( max_depth, min_child_weight)) # Update our parameters params['max_depth'] = max_depth params['min_child_weight'] = min_child_weight # Cross-validation cv_results = xgb.cv(params, dtrain, feval= custom_eval, num_boost_round=200, maximize=True, seed=16, nfold=5, early_stopping_rounds=10 ) # Finding best F1 Score mean_f1 = cv_results['test-f1_score-mean'].max() boost_rounds = cv_results['test-f1_score-mean'].argmax() print("\tF1 Score {} for {} rounds".format(mean_f1, boost_rounds)) if mean_f1 > max_f1: max_f1 = mean_f1 best_params = (max_depth,min_child_weight) print("Best params: {}, {}, F1 Score: {}".format(best_params[0], best_params[1], max_f1))
d_train = xgb.DMatrix(Xtrain_hashed, ytrain) d_valid = xgb.DMatrix(Xtest_hashed, ytest) # XGBoost parameters 1 params = { 'n_estimators': 1024, 'n_jobs': -1, 'learning_rate': 1, 'eval_metric': 'auc' } # Without early stopping, overfit model bst_cv = xgb.cv(params, d_train, num_boost_round=10000, nfold=10, verbose_eval=100, early_stopping_rounds=None, as_pandas=True) bst_cv.tail() bst_cv[['train-auc-mean', 'test-auc-mean']].plot() print(bst.eval(d_valid)) # With early stopping. Use CV to find the best number of trees bst_cv = xgb.cv( params, d_train,