def predict_mlp(df, save_name): model = keras.models.load_model(save_name) return model.predict(get_inputs(df)[0], batch_size=1024) if __name__ == "__main__": """ python scripts/04_predict_mlp_meter.py --normalize_target python scripts/04_predict_mlp_meter.py """ args = parser.parse_args() with timer("Loading data"): if args.normalize_target: test = load_data("test_nn_target_normalized_meter") test_square_feet = load_data("test_clean")["square_feet"].values else: test = load_data("test_nn_meter") test["target"] = -1 with timer("Predicting"): test_preds = np.zeros(len(test)) for m in range(4): print(m) # get base file name model_name = f"mlp-split_meter" make_dir(f"{MODEL_PATH}/{model_name}") # create sub model path
"month_x", "month_y", "building_month", #"month", "gte_meter_building_id_month" ] if __name__ == "__main__": """ python scripts/04_predict_cb_site.py --normalize_target python scripts/04_predict_cb_site.py """ args = parser.parse_args() with timer("Loading data"): test = load_data("test_clean") test.drop(DROP_COLS, axis=1, inplace=True) with timer("Preprocesing"): for x in CAT_COLS: test[x] = test[x].astype("category") if args.normalize_target: target_encode_cols = [x for x in test.columns if "gte" in x] test[target_encode_cols] = test[target_encode_cols] / np.log1p( test[["square_feet"]].values) # get base file name test_preds = np.zeros(len(test)) for s in range(16):
import glob import numpy as np import pandas as pd from functools import partial from sklearn.metrics import mean_squared_error from ashrae.blenders import load_preds, GeneralizedMeanBlender from ashrae.utils import OUTPUT_PATH, load_data, rmsle, timer if __name__ == "__main__": """ python scripts/05_blend_predictions.py """ # load test and leak with timer("load test and leak"): test = load_data("test_clean") leak = load_data("is_leak") target = leak["meter_reading"].values # load predictions with timer("load predictions"): preds_matrix = [np.load(x) for x in glob.glob(f"{OUTPUT_PATH}/*.npy")] if len(glob.glob(f"{OUTPUT_PATH}/*.csv")) > 0: preds_matrix += [ pd.read_csv(x).meter_reading.values for x in glob.glob(f"{OUTPUT_PATH}/*.csv") ] preds_matrix = np.vstack(preds_matrix).T preds_matrix[preds_matrix < 0] = 0 # initialize data
df["building_month"] = bm_ + df.month.astype(str) df["building_hour"] = bm_ + df.hour.astype(str) df["building_meter"] = bm_ # get holidays dates_range = pd.date_range(start="2015-12-31", end="2019-01-01") us_holidays = calendar().holidays(start=dates_range.min(), end=dates_range.max()) df["is_holiday"] = ( df.ts.dt.date.astype("datetime64").isin(us_holidays)).astype(np.int8) if __name__ == "__main__": print("Loading data") train, test = load_data("input") building_meta = load_data("meta") train_weather, test_weather = load_data("weather") print("Process timestamp") train["ts"] = pd.to_datetime(train.timestamp) test["ts"] = pd.to_datetime(test.timestamp) process_timestamp(train) process_timestamp(test) process_timestamp(train_weather) process_timestamp(test_weather) print("Process weather") process_weather(train_weather, "train") process_weather(test_weather, "test")
"month_x", "month_y", "building_month", #"month", "gte_meter_building_id_month" ] if __name__ == "__main__": """ python scripts/03_train_lgb_site.py --normalize_target python scripts/03_train_lgb_site.py """ args = parser.parse_args() with timer("Loading data"): train = load_data("train_clean") train.drop(DROP_COLS, axis=1, inplace=True) train = train.loc[train.is_bad_meter_reading == 0].reset_index( drop=True) with timer("Preprocesing"): for x in CAT_COLS: train[x] = train[x].astype("category") if args.normalize_target: target_encode_cols = [x for x in train.columns if "gte" in x] train[target_encode_cols] = train[target_encode_cols] / np.log1p( train[["square_feet"]].values) train["target"] = np.log1p(train["meter_reading"]) / np.log1p( train["square_feet"]) else:
# for col in CAT_COLS: # x = np.concatenate([train.loc[train_indices, col], test.loc[test_indices, col]]) # encoder = FastLabelEncoder() # encoder.fit(x) # train.loc[train_indices, col] = encoder.transform(train.loc[train_indices, col]) # test.loc[test_indices, col] = encoder.transform(test.loc[test_indices, col]) # with timer("Save Data"): # train.to_pickle(f"{DATA_PATH}/preprocessed/train_nn_target_normalized_site.pkl") # test.to_pickle(f"{DATA_PATH}/preprocessed/test_nn_target_normalized_site.pkl") # meter site no normalization with timer("Loading data"): train, test = load_data("clean") with timer("Standardize Numeric Features"): for s in range(16): train_indices = train.site_id == s test_indices = test.site_id == s X = np.concatenate([ train.loc[train_indices, NUM_COLS].values, test.loc[test_indices, NUM_COLS].values ]) mu = X.mean(0) sig = X.std(0) train.loc[train_indices, NUM_COLS] = (train.loc[train_indices, NUM_COLS] -
monitor='val_loss', mode='min') ]) return if __name__ == "__main__": """ python scripts/03_train_mlp_meter.py --normalize_target python scripts/03_train_mlp_meter.py """ args = parser.parse_args() with timer("Loading data"): train = load_data("train_nn_meter") train = train.loc[train.is_bad_meter_reading == 0].reset_index( drop=True) with timer("Preprocesing"): meter_cat_counts = train.groupby( ["meter"])[CAT_COLS].agg(lambda x: len(np.unique(x))) # get base file name model_name = f"mlp-split_meter" make_dir(f"{MODEL_PATH}/{model_name}") for seed in range(3): for n_months in [1, 2, 3, 4, 5, 6]: validation_months_list = get_validation_months(6)
def predict_mlp(df, save_name): model = keras.models.load_model(save_name) return model.predict(get_inputs(df)[0], batch_size=1024) if __name__ == "__main__": """ python scripts/04_predict_mlp_meter.py --normalize_target python scripts/04_predict_mlp_meter.py """ args = parser.parse_args() with timer("Loading data"): if args.normalize_target: test = load_data("test_nn_tareget_normalized_meter") else: test = load_data("test_nn_meter") test_preds = np.zeros(len(test)) for m in range(4): print(m) # get base file name model_name = f"mlp-split_meter" make_dir(f"{MODEL_PATH}/{model_name}") # create sub model path if args.normalize_target: sub_model_path = f"{MODEL_PATH}/mlp-split_meter/target_normalization/meter_{m}" else:
mode='min' ) ] ) return if __name__ == "__main__": """ python scripts/03_train_mlp_meter.py --normalize_target python scripts/03_train_mlp_meter.py """ args = parser.parse_args() with timer("Loading data"): train = load_data("train_nn_meter") train = train.loc[train.is_bad_meter_reading==0].reset_index(drop=True) train.loc[(train.meter == 0) & (train.site_id == 0), "meter_reading"] *= 0.2931 if args.normalize_target: square_feet = load_data("train_clean")["square_feet"] train["target"] = np.log1p(train["target"]/square_feet) else: train["target"] = np.log1p(train["target"]) with timer("Preprocesing"): meter_cat_counts = train.groupby(["meter"])[CAT_COLS].agg(lambda x: len(np.unique(x))) # get base file name model_name = f"mlp-split_meter" make_dir(f"{MODEL_PATH}/{model_name}")