Пример #1
0
                y_train = y[train_idx]
                X_holdout = X[test_idx]
                clf.fit(X_train, y_train)
                y_pred = clf.predict_proba(X_holdout)[:, 1]
                S_train[test_idx, i] = y_pred
                S_test_i[:, j] = clf.predict_proba(T)[:, 1]

            S_test[:, i] = S_test_i.mean(1)
        
        self.stacker.fit(S_train, y)
        y_pred = self.stacker.predict_proba(S_test)[:, 1]
        return y_pred

base_models = [
    ExtraTreesClassifier(**best_params_etc, n_jobs=N_JOBS),
    RandomForestClassifier(**best_params_rfc, n_jobs=N_JOBS),
    GradientBoostingClassifier(**best_params_gbc),
    xgb.XGBClassifier(**best_params_xgb, n_jobs=N_JOBS),
]

stacker = xgb.XGBClassifier(n_jobs=N_JOBS)

model = Ensemble(5, stacker, base_models)
predictions = model.fit_predict(train_data, train_label, test_data)
submission = pd.DataFrame({'EID': ds_test['EID'],
    'FORTARGET': np.array(predictions > 0.5, dtype=np.int32), 'PROB': predictions})
submission.to_csv("submission.csv", index=False)

# xgb.plot_importance(model)
# plt.show()