with timer("Loading data"): if args.normalize_target: test = load_data("test_nn_target_normalized_meter") test_square_feet = load_data("test_clean")["square_feet"].values else: test = load_data("test_nn_meter") test["target"] = -1 with timer("Predicting"): test_preds = np.zeros(len(test)) for m in range(4): print(m) # get base file name model_name = f"mlp-split_meter" make_dir(f"{MODEL_PATH}/{model_name}") # create sub model path if args.normalize_target: sub_model_path = f"{MODEL_PATH}/mlp-split_meter/target_normalization/meter_{m}" else: sub_model_path = f"{MODEL_PATH}/mlp-split_meter/no_normalization/meter_{m}" # remove indices not in this meter X = test.loc[test.meter == m, FEATURES + ["target"]] print(f"split meter {m}: test size {len(X)}") # load models model_list = glob.glob(f"{sub_model_path}/*") # predict
with timer("Preprocesing"): for x in CAT_COLS: train[x] = train[x].astype("category") if args.normalize_target: target_encode_cols = [x for x in train.columns if "gte" in x] train[target_encode_cols] = train[target_encode_cols] / np.log1p( train[["square_feet"]].values) train["target"] = np.log1p(train["meter_reading"]) / np.log1p( train["square_feet"]) else: train["target"] = np.log1p(train["meter_reading"]) # get base file name model_name = f"lgb-split_site" make_dir(f"{MODEL_PATH}/{model_name}") for seed in range(3): for n_months in [1, 2, 3, 4, 5, 6]: validation_months_list = get_validation_months(6) for fold_, validation_months in enumerate(validation_months_list): for s in range(16): # create sub model path if args.normalize_target: sub_model_path = f"{MODEL_PATH}/{model_name}/target_normalization/site_{s}" make_dir(sub_model_path) else: sub_model_path = f"{MODEL_PATH}/{model_name}/no_normalization/site_{s}" make_dir(sub_model_path)