Пример #1
0
    df = transform(df)

    os.makedirs(output_dir, exist_ok=True)
    params = {
        'objective': 'binary',
        'num_leaves': 32,
        'min_data_in_leaf': 15,  # 42,
        'max_depth': -1,
        'learning_rate': 0.1,
        'boosting': 'gbdt',
        'bagging_fraction': 0.7,  # 0.5,
        'feature_fraction': 0.5,
        'bagging_seed': 0,
        'reg_alpha': 0.1,  # 1.728910519108444,
        'reg_lambda': 1,
        'random_state': 0,
        'metric': 'auc',
        'verbosity': -1,
        "n_estimators": 10000,
        "early_stopping_rounds": 100
    }

    df = df.drop(["user_answer"], axis=1)

    train_lgbm_cv(df,
                  params=params,
                  output_dir=output_dir,
                  model_id=model_id,
                  exp_name="exp003")
    break
Пример #2
0
        'verbosity': -1,
        "n_estimators": 10000,
        "early_stopping_rounds": 50
    }
    df.tail(1000).to_csv("exp028.csv", index=False)

    df = df.drop(["user_answer", "tags", "type_of"], axis=1)
    df = df[df["answered_correctly"].notnull()]
    print(df.columns)
    print(df.shape)

    print(model_id)
    train_lgbm_cv(df,
                  params=params,
                  output_dir=output_dir,
                  model_id=model_id,
                  exp_name=model_id,
                  is_debug=is_debug,
                  drop_user_id=True)

    params = {
        'n_estimators': 12000,
        'learning_rate': 0.1,
        'eval_metric': 'AUC',
        'loss_function': 'Logloss',
        'random_seed': 0,
        'metric_period': 50,
        'od_wait': 400,
        'task_type': 'GPU',
        'max_depth': 8,
        "verbose": 100
Пример #3
0
    # df.to_pickle("../input/riiid-test-answer-prediction/train_merged.pickle")

    # print(len(df))
    # df = pd.read_pickle("../input/riiid-test-answer-prediction/train_merged.pickle")

    df = transform(df)

    os.makedirs(output_dir, exist_ok=True)
    params = {
        'objective': 'binary',
        'num_leaves': 32,
        'min_data_in_leaf': 15,  # 42,
        'max_depth': -1,
        'learning_rate': 0.1,
        'boosting': 'gbdt',
        'bagging_fraction': 0.7,  # 0.5,
        'feature_fraction': 0.5,
        'bagging_seed': 0,
        'reg_alpha': 0.1,  # 1.728910519108444,
        'reg_lambda': 1,
        'random_state': 0,
        'metric': 'auc',
        'verbosity': -1,
        "n_estimators": 10000,
        "early_stopping_rounds": 100
    }

    df = df.drop(["user_answer", "row_id"], axis=1)

    train_lgbm_cv(df, params=params, output_dir=output_dir, model_id=model_id)
    break
Пример #4
0
        'objective': 'binary',
        'num_leaves': 32,
        'min_data_in_leaf': 15,  # 42,
        'max_depth': -1,
        'learning_rate': 0.1,
        'boosting': 'gbdt',
        'bagging_fraction': 0.7,  # 0.5,
        'feature_fraction': 0.5,
        'bagging_seed': 0,
        'reg_alpha': 0.1,  # 1.728910519108444,
        'reg_lambda': 1,
        'random_state': 0,
        'metric': 'auc',
        'verbosity': -1,
        "n_estimators": 10000,
        "early_stopping_rounds": 100
    }

    df = df.drop(["user_answer"], axis=1)
    print(df.columns)

    model_id = os.path.basename(fname).replace(".pickle", "")
    print(model_id)
    train_lgbm_cv(df,
                  params=params,
                  output_dir=output_dir,
                  model_id=model_id,
                  exp_name=f"exp008_{model_id}",
                  drop_user_id=True)
    break
Пример #5
0
    print(model_id)
    for _ in range(10000):
        params = {
            'objective': 'binary',
            'num_leaves': random.choice([8, 16, 32, 64, 128]),
            'max_depth': -1,
            'learning_rate': 0.3,
            'boosting': random.choice(['gbdt', 'gbdt', 'gbdt', 'goss']),
            'bagging_fraction': random.choice([0.1, 0.5, 0.7, 0.9]),
            'feature_fraction': random.choice([0.1, 0.3, 0.5, 0.7, 0.9]),
            'bagging_seed': 0,
            'reg_alpha': random.choice([0, 0.1, 1, 5]),
            'reg_lambda': random.choice([0, 0.1, 1, 5]),
            'random_state': 0,
            'metric': 'auc',
            'verbosity': -1,
            "n_estimators": 10000,
            "early_stopping_rounds": 100
        }

        model_id = os.path.basename(fname).replace(".pickle", "")
        train_lgbm_cv(df,
                      params=params,
                      output_dir=output_dir,
                      model_id=model_id,
                      exp_name=model_id,
                      drop_user_id=True,
                      experiment_id=2)
        del params
    break
Пример #6
0
params = {
    'objective': 'binary',
    'num_leaves': 32,
    'min_data_in_leaf': 15,  # 42,
    'max_depth': -1,
    'learning_rate': 0.3,
    'boosting': 'gbdt',
    'bagging_fraction': 0.7,  # 0.5,
    'feature_fraction': 0.9,
    'bagging_seed': 0,
    'reg_alpha': 5,  # 1.728910519108444,
    'reg_lambda': 5,
    'random_state': 0,
    'metric': 'auc',
    'verbosity': -1,
    "n_estimators": 10000,
    "early_stopping_rounds": 100
}

df = df.drop(["user_answer", "tags", "type_of"], axis=1)
df = df[df["answered_correctly"].notnull()]
print(df.columns)
print(df.shape)

train_lgbm_cv(df,
              params=params,
              output_dir=output_dir,
              model_id=0,
              exp_name="exp11",
              drop_user_id=True)
Пример #7
0
        "n_estimators": 10000,
        "early_stopping_rounds": 50
    }
    df.tail(1000).to_csv("exp028.csv", index=False)

    df = df[df["answered_correctly"].notnull()]
    print("lgbm")
    print(df.columns)
    print(df.shape)

    model_id = os.path.basename(fname).replace(".pickle", "")
    print(model_id)
    train_lgbm_cv(df.drop(["user_answer", "tags", "type_of"], axis=1),
                  params=params,
                  output_dir=output_dir,
                  model_id=model_id,
                  exp_name=model_id,
                  is_debug=is_debug,
                  drop_user_id=True)
    useful_cols = [
        "target_enc_content_id",
        "target_enc_['content_id', 'prior_question_had_explanation']",
        "user_rate_mean_content_id", "user_rate_sum_content_id",
        "shiftdiff_timestamp_by_user_id",
        "previous_answer_['user_id', 'content_id']",
        "target_enc_['content_id', 'user_count_bin']",
        "target_enc_['user_id', 'part']",
        "diff_user_level_target_enc_content_id", "user_id",
        "answered_correctly"
    ]