Пример #1
0
def lgb_predict(lgb_model, X_test, submission_file):
    bst = lgb.Booster(model_file=lgb_model)
    y_pred = bst.predict(X_test)
    y_pred = inverse_transform_y(y_pred)

    df = pd.DataFrame({'Id': range(1, len(y_pred) + 1), 'Sales': y_pred})
    df.to_csv(submission_file, index=False)
Пример #2
0
def xgb_predict(model_file, submission_file):
    bst = xgb.Booster(model_file=model_file)
    train, test = load_data()
    X_test = extract_X(test)
    dtest = xgb.DMatrix(X_test)
    y_pred = bst.predict(dtest)
    y_pred = inverse_transform_y(y_pred)
    pred_df = pd.DataFrame({'Id': range(1, len(y_pred) + 1), 'Sales': y_pred})
    pred_df.to_csv(submission_file, index=False)
    submit_to_kaggle(submission_file)
Пример #3
0
def xgb_train():

    train_embed, train_sales, test_embed = get_embed_features()
    train_sales = train_sales.reshape(-1)
    feature_names = get_embed_fea_names()
    assert len(feature_names) == train_embed.shape[1]

    if DO_EVAL:
        X_train, X_val, y_train, y_val = train_test_split(train_embed, train_sales, test_size=0.048, shuffle=False)
    else:
        X_train = train_embed
        y_train = train_sales
    X_test = test_embed
    print('X_train:', X_train.shape)
    print('y_train:', y_train.shape)

    dtrain = xgb.DMatrix(X_train, y_train, feature_names=feature_names)
    watchlist = [(dtrain, 'train')]
    if DO_EVAL:
        dval = xgb.DMatrix(X_val, y_val, feature_names=feature_names)
        watchlist.append((dval, 'eval'))
    #dtest = xgb.DMatrix(X_test)

    param = {
        'objective': 'reg:linear',
        'eval_metric': 'mae',
        'max_depth': 10,
        'gamma': 0,
        'min_child_weight': 1,
        'eta': 0.02,
        'subsample': 0.8,
        'colsample_bytree': 0.95,
        'tree_method': 'hist'
    }
    nrounds = 5000
    bst = xgb.train(param, dtrain, nrounds, watchlist,
                    early_stopping_rounds=100,
                    feval=rmspe_xgb,
                    #model_file='xgb_1.model'
                    )
    #bst = xgb.Booster(model_file='xgb_1.model')
    bst.save_model('xgb_1.model')
    if not DO_EVAL:
        dtest = xgb.DMatrix(X_test)
        y_pred = bst.predict(dtest)
        y_pred = inverse_transform_y(y_pred)
        df = pd.DataFrame({'Id': range(1, len(y_pred) + 1), 'Sales': y_pred})
        df.to_csv('./output/xgb_pred1.csv', index=False)
        submit_to_kaggle('./output/xgb_pred1.csv')
Пример #4
0
 def evaluate(self, X, y):
     y_pred = self.predict(X)
     y_true = inverse_transform_y(y)
     err = rmspe(y_true, y_pred)
     return err
Пример #5
0
 def predict(self, X):
     y_pred = self.predict_raw(X)
     y_pred = inverse_transform_y(y_pred)
     return y_pred  # (len(X),)