def lgb_predict(lgb_model, X_test, submission_file): bst = lgb.Booster(model_file=lgb_model) y_pred = bst.predict(X_test) y_pred = inverse_transform_y(y_pred) df = pd.DataFrame({'Id': range(1, len(y_pred) + 1), 'Sales': y_pred}) df.to_csv(submission_file, index=False)
def xgb_predict(model_file, submission_file): bst = xgb.Booster(model_file=model_file) train, test = load_data() X_test = extract_X(test) dtest = xgb.DMatrix(X_test) y_pred = bst.predict(dtest) y_pred = inverse_transform_y(y_pred) pred_df = pd.DataFrame({'Id': range(1, len(y_pred) + 1), 'Sales': y_pred}) pred_df.to_csv(submission_file, index=False) submit_to_kaggle(submission_file)
def xgb_train(): train_embed, train_sales, test_embed = get_embed_features() train_sales = train_sales.reshape(-1) feature_names = get_embed_fea_names() assert len(feature_names) == train_embed.shape[1] if DO_EVAL: X_train, X_val, y_train, y_val = train_test_split(train_embed, train_sales, test_size=0.048, shuffle=False) else: X_train = train_embed y_train = train_sales X_test = test_embed print('X_train:', X_train.shape) print('y_train:', y_train.shape) dtrain = xgb.DMatrix(X_train, y_train, feature_names=feature_names) watchlist = [(dtrain, 'train')] if DO_EVAL: dval = xgb.DMatrix(X_val, y_val, feature_names=feature_names) watchlist.append((dval, 'eval')) #dtest = xgb.DMatrix(X_test) param = { 'objective': 'reg:linear', 'eval_metric': 'mae', 'max_depth': 10, 'gamma': 0, 'min_child_weight': 1, 'eta': 0.02, 'subsample': 0.8, 'colsample_bytree': 0.95, 'tree_method': 'hist' } nrounds = 5000 bst = xgb.train(param, dtrain, nrounds, watchlist, early_stopping_rounds=100, feval=rmspe_xgb, #model_file='xgb_1.model' ) #bst = xgb.Booster(model_file='xgb_1.model') bst.save_model('xgb_1.model') if not DO_EVAL: dtest = xgb.DMatrix(X_test) y_pred = bst.predict(dtest) y_pred = inverse_transform_y(y_pred) df = pd.DataFrame({'Id': range(1, len(y_pred) + 1), 'Sales': y_pred}) df.to_csv('./output/xgb_pred1.csv', index=False) submit_to_kaggle('./output/xgb_pred1.csv')
def evaluate(self, X, y): y_pred = self.predict(X) y_true = inverse_transform_y(y) err = rmspe(y_true, y_pred) return err
def predict(self, X): y_pred = self.predict_raw(X) y_pred = inverse_transform_y(y_pred) return y_pred # (len(X),)