X_train = pipeline.fit_transform(train) X_val = pipeline.transform(val) X_test = pipeline.transform(test) eval_set = [(X_train, y_train), (X_val, y_val)] model = XGBRFClassifier(n_jobs=-1, n_estimators=5000, early_stopping_rounds=100, random_state=42, scale_pos_weight=15, learning_rate=.005, reg_lambda=.01, verbosity=1) print('fitting...') model.fit(X_train, y_train, eval_set=eval_set, eval_metric='auc', verbose=True) y_pred_proba = model.predict_proba(X_val)[:, 1] print(f'Validation ROC AUC score: {roc_auc_score(y_val, y_pred_proba)}') print('permuting...') permuter = PermutationImportance(model, cv='prefit', n_iter=5, scoring='roc_auc', random_state=42) permuter.fit(X_val, y_val) features_of_import = pd.Series(permuter.feature_importances_, val.columns).sort_values(ascending=True) print('importance', features_of_import)
'colsample_bytree': 0.07, 'gamma': 0.005, 'max_depth': 3, 'min_child_weight': 3, 'n_estimators': 500, 'objective': 'binary:logistic', 'random_state': 10, 'reg_alpha': 9, 'reg_lambda': 0, 'subsample': 0.6, 'verbosity': 0 } model_L = XGBRFClassifier(**model_L_params) model_L.fit(X_train_scl_L, y_train_L, eval_set=eval_set, eval_metric='auc', early_stopping_rounds=20, verbose=False) pred_L = model_L.predict(X_clouds_scl_L) clouds_R, _ = get_clouds(roiR) X_clouds_R = clouds_R.reshape((clouds_R.shape[0], -1)) X_clouds_scl_R = X_clouds_R / scale_rnd_R model_R_params = { 'colsample_bytree': 0.07, 'gamma': 0.005, 'max_depth': 3, 'min_child_weight': 3, 'n_estimators': 500, 'objective': 'binary:logistic', 'random_state': 10,
xgbrf_classifier = XGBRFClassifier( learning_rate=0.1, n_estimators=1000, max_depth=7, min_child_weight=5 ) else: xgbrf_classifier = XGBRFClassifier( learning_rate=0.1, n_estimators=1000, max_depth=7, min_child_weight=3 ) print('[LOG] Fitting model...') xgbrf_classifier.fit(trainDataset.X, trainDataset.Y[:,0]) print('[LOG] Fitting done!') print('-- Model Report --') print('XGBoost train Accuracy: '+str(accuracy_score(xgbrf_classifier.predict(trainDataset.X), trainDataset.Y[:,0]))) print('XGBoost valid Accuracy: '+str(accuracy_score(xgbrf_classifier.predict(validDataset.X), validDataset.Y[:,0]))) if not os.path.isdir(arg.img): os.mkdir(arg.img) f, ax = plt.subplots(figsize=(10,5)) plot = sns.barplot(x=features, y=xgbrf_classifier.feature_importances_) ax.set_title('Feature Importance') plot.set_xticklabels(plot.get_xticklabels(),rotation='vertical') plt.savefig(os.path.join(arg.img, modelName)) # save model pickl = {'model': xgbrf_classifier}