def train_it(train, holdout_df, filename):
    train_y = train["is_attributed"]
    train_x = train[predict_col]
    X_train, X_valid, y_train, y_valid = model_selection.train_test_split(
        train_x, train_y, test_size=0.2, random_state=99)
    timer.time("prepare train in ")

    lgb = pocket_lgb.GoldenLgb()
    model = lgb.do_train_sk(X_train, X_valid, y_train, y_valid)
    lgb.show_feature_importance(model)
    del train, X_train, X_valid, y_train, y_valid
    gc.collect()
    timer.time("end train in ")

    validator = holdout_validator2.HoldoutValidator(model, holdout_df,
                                                    predict_col)
    validator.validate()
    validator.output_prediction(filename)
    timer.time("done validation in ")

    return model
Пример #2
0
predict_col = column_selector.get_predict_col()
train_y = train["is_attributed"]
train_x = train[predict_col]
X_train, X_valid, y_train, y_valid = model_selection.train_test_split(
    train_x, train_y, test_size=0.2, random_state=99)

timer.time("prepare train in ")
lgb = pocket_lgb.GoldenLgb()
model = lgb.do_train_sk(X_train, X_valid, y_train, y_valid)
lgb.show_feature_importance(model)

y_pred = model.predict(holdout_df[predict_col])

timer.time("end train in ")
validator = holdout_validator2.HoldoutValidator(model, holdout_df, predict_col)
validator.validate()
validator.validate_rmse(ERROR_ANALYSIS)
#validator.output_prediction(PREDICTION)
timer.time("done validation in ")

del validator
del model
gc.collect()

####################
# second round
####################

pl_data = holdout_df[predict_col].copy()
pl_data["pseudo_label"] = y_pred