X_train = pipeline.fit_transform(train)
X_val = pipeline.transform(val)
X_test = pipeline.transform(test)

eval_set = [(X_train, y_train), (X_val, y_val)]

model = XGBRFClassifier(n_jobs=-1,
                        n_estimators=5000,
                        early_stopping_rounds=100,
                        random_state=42,
                        scale_pos_weight=15,
                        learning_rate=.005,
                        reg_lambda=.01,
                        verbosity=1)
print('fitting...')
model.fit(X_train, y_train, eval_set=eval_set, eval_metric='auc', verbose=True)

y_pred_proba = model.predict_proba(X_val)[:, 1]
print(f'Validation ROC AUC score: {roc_auc_score(y_val, y_pred_proba)}')

print('permuting...')
permuter = PermutationImportance(model,
                                 cv='prefit',
                                 n_iter=5,
                                 scoring='roc_auc',
                                 random_state=42)
permuter.fit(X_val, y_val)
features_of_import = pd.Series(permuter.feature_importances_,
                               val.columns).sort_values(ascending=True)
print('importance', features_of_import)
    'colsample_bytree': 0.07,
    'gamma': 0.005,
    'max_depth': 3,
    'min_child_weight': 3,
    'n_estimators': 500,
    'objective': 'binary:logistic',
    'random_state': 10,
    'reg_alpha': 9,
    'reg_lambda': 0,
    'subsample': 0.6,
    'verbosity': 0
}
model_L = XGBRFClassifier(**model_L_params)
model_L.fit(X_train_scl_L,
            y_train_L,
            eval_set=eval_set,
            eval_metric='auc',
            early_stopping_rounds=20,
            verbose=False)
pred_L = model_L.predict(X_clouds_scl_L)

clouds_R, _ = get_clouds(roiR)
X_clouds_R = clouds_R.reshape((clouds_R.shape[0], -1))
X_clouds_scl_R = X_clouds_R / scale_rnd_R
model_R_params = {
    'colsample_bytree': 0.07,
    'gamma': 0.005,
    'max_depth': 3,
    'min_child_weight': 3,
    'n_estimators': 500,
    'objective': 'binary:logistic',
    'random_state': 10,
Пример #3
0
                xgbrf_classifier = XGBRFClassifier(
                                    learning_rate=0.1,
                                    n_estimators=1000,
                                    max_depth=7,
                                    min_child_weight=5
                                )
            else:
                xgbrf_classifier = XGBRFClassifier(
                                    learning_rate=0.1,
                                    n_estimators=1000,
                                    max_depth=7,
                                    min_child_weight=3
                                )

            print('[LOG] Fitting model...')
            xgbrf_classifier.fit(trainDataset.X, trainDataset.Y[:,0])
            print('[LOG] Fitting done!')
            print('-- Model Report --')
            print('XGBoost train Accuracy: '+str(accuracy_score(xgbrf_classifier.predict(trainDataset.X), trainDataset.Y[:,0])))
            print('XGBoost valid Accuracy: '+str(accuracy_score(xgbrf_classifier.predict(validDataset.X), validDataset.Y[:,0])))

            if not os.path.isdir(arg.img):
                os.mkdir(arg.img)
            f, ax = plt.subplots(figsize=(10,5))
            plot = sns.barplot(x=features, y=xgbrf_classifier.feature_importances_)
            ax.set_title('Feature Importance')
            plot.set_xticklabels(plot.get_xticklabels(),rotation='vertical')
            plt.savefig(os.path.join(arg.img, modelName))

            # save model
            pickl = {'model': xgbrf_classifier}