print(f"X_train shape {X_train.shape}, y_train shape {y_train.shape}") print(f"X_test shape {X_test.shape}, y_test shape {y_test.shape}") #%% evaluate performance with training data eval_reg = HistGradientBoostingRegressor(random_state=1129) eval_reg.fit(X_train, y_train) print("-" * 10, "regression report", "-" * 10) report = regression_report(y_test, eval_reg.predict(X_test), X_test.shape[1]) print(report) print("-" * 10, "evaluation of label", "-" * 10) label_df = data.get_true_label( columns=["adr", "revenue", "is_canceled", "label"]) pred_label_df = data.predict_label(eval_reg, X_test_df) print("[ label evaluation ]") report_label = evaluate_by_label(pred_label_df, label_df, target="label") print(report_label) print("[ revenue_per_day evaluation ]") report_revenue = evaluate_by_label(pred_label_df, label_df, target="revenue") print(report_revenue) #%% training with all data X_df, y_df = data.processing(["revenue"]) reg = HistGradientBoostingRegressor(random_state=1129) reg.fit(X_df.to_numpy(), y_df["revenue"].to_numpy())
print(f"X_train shape {X_train.shape}, y_train shape {y_train.shape}") print(f"X_test shape {X_test.shape}, y_test shape {y_test.shape}") #%% evaluate performance with training data eval_reg = HistGradientBoostingRegressor(random_state=1129) eval_reg.fit(X_train, y_train) print("-" * 10, "regression report", "-" * 10) report = regression_report(y_test, eval_reg.predict(X_test), X_test.shape[1]) print(report) print("-" * 10, "evaluation of label", "-" * 10) label_df = data.get_true_label( columns=["adr", "revenue", "is_canceled", "label"]) pred_label_df = data.predict_label(eval_reg, X_test_df, reg_out="adr") #%% print("[ label evaluation ]") report_label = evaluate_by_label(pred_label_df, label_df, target="label") print(report_label) print("[ revenue_per_day evaluation ]") report_revenue = evaluate_by_label(pred_label_df, label_df, target="revenue") print(report_revenue) #%% training with all data X_df, y_df = data.processing(["actual_adr"]) reg = HistGradientBoostingRegressor(random_state=1129) reg.fit(X_df.to_numpy(), y_df["actual_adr"].to_numpy())