Exemplo n.º 1
0
def main(debug=False, use_pkl=False):
    num_rows = 10000 if debug else None
    if use_pkl:
        df = loadpkl('../output/df.pkl')
    else:
        with timer("train & test"):
            df = train_test(num_rows)
        with timer("nightley"):
            df = pd.merge(df, nightley(num_rows), on=['datetime', 'park'], how='outer')
        with timer("hotlink"):
            df = pd.merge(df, hotlink(num_rows), on='datetime', how='outer')
        with timer("colopl"):
            df = pd.merge(df, colopl(num_rows), on=['park', 'year', 'month'], how='outer')
        with timer("weather"):
            df = pd.merge(df, weather(num_rows), on=['datetime', 'park'], how='outer')
        with timer("nied_oyama"):
            df = pd.merge(df, nied_oyama(num_rows), on=['datetime', 'park'], how='outer')
        with timer("agoop"):
            df = pd.merge(df, agoop(num_rows), on=['park', 'year','month'], how='outer')
        with timer("jorudan"):
            df = pd.merge(df, jorudan(num_rows), on=['datetime', 'park'], how='outer')
        with timer("save pkl"):
            save2pkl('../output/df.pkl', df)
    with timer("Run XGBoost with kfold"):
        print("df shape:", df.shape)
        feat_importance = kfold_xgboost(df, num_folds=NUM_FOLDS, stratified=True, debug=debug)
        display_importances(feat_importance ,'../output/xgb_importances.png', '../output/feature_importance_xgb.csv')
Exemplo n.º 2
0
################################################################################
# optunaによるhyper parameter最適化
# 参考: https://github.com/pfnet/optuna/blob/master/examples/lightgbm_simple.py
################################################################################

NUM_ROWS = None
USE_PKL = True

if USE_PKL:
    DF = loadpkl('../output/df.pkl')
else:
    DF = train_test(NUM_ROWS)
    DF = pd.merge(DF, nightley(NUM_ROWS), on=['datetime', 'park'], how='outer')
    DF = pd.merge(DF, hotlink(NUM_ROWS), on='datetime', how='outer')
    DF = pd.merge(DF, colopl(NUM_ROWS), on=['year', 'month'], how='outer')
    DF = pd.merge(DF, weather(NUM_ROWS), on=['datetime', 'park'], how='outer')
    DF = pd.merge(DF,
                  nied_oyama(NUM_ROWS),
                  on=['datetime', 'park'],
                  how='outer')
    DF = pd.merge(DF,
                  agoop(num_rows),
                  on=['park', 'year', 'month'],
                  how='outer')
    DF = pd.merge(DF, jorudan(num_rows), on=['datetime', 'park'], how='outer')

# split test & train
TRAIN_DF = DF[DF['visitors'].notnull()]
FEATS = [f for f in TRAIN_DF.columns if f not in FEATS_EXCLUDED]