Exemplo n.º 1
0
    with timer("Loading data"):
        if args.normalize_target:
            test = load_data("test_nn_target_normalized_meter")
            test_square_feet = load_data("test_clean")["square_feet"].values
        else:
            test = load_data("test_nn_meter")
        test["target"] = -1

    with timer("Predicting"):
        test_preds = np.zeros(len(test))

        for m in range(4):
            print(m)
            # get base file name
            model_name = f"mlp-split_meter"
            make_dir(f"{MODEL_PATH}/{model_name}")

            # create sub model path
            if args.normalize_target:
                sub_model_path = f"{MODEL_PATH}/mlp-split_meter/target_normalization/meter_{m}"
            else:
                sub_model_path = f"{MODEL_PATH}/mlp-split_meter/no_normalization/meter_{m}"

            # remove indices not in this meter
            X = test.loc[test.meter == m, FEATURES + ["target"]]
            print(f"split meter {m}: test size {len(X)}")

            # load models
            model_list = glob.glob(f"{sub_model_path}/*")

            # predict
    with timer("Preprocesing"):
        for x in CAT_COLS:
            train[x] = train[x].astype("category")

        if args.normalize_target:
            target_encode_cols = [x for x in train.columns if "gte" in x]
            train[target_encode_cols] = train[target_encode_cols] / np.log1p(
                train[["square_feet"]].values)
            train["target"] = np.log1p(train["meter_reading"]) / np.log1p(
                train["square_feet"])
        else:
            train["target"] = np.log1p(train["meter_reading"])

    # get base file name
    model_name = f"lgb-split_site"
    make_dir(f"{MODEL_PATH}/{model_name}")

    for seed in range(3):
        for n_months in [1, 2, 3, 4, 5, 6]:
            validation_months_list = get_validation_months(6)

            for fold_, validation_months in enumerate(validation_months_list):
                for s in range(16):

                    # create sub model path
                    if args.normalize_target:
                        sub_model_path = f"{MODEL_PATH}/{model_name}/target_normalization/site_{s}"
                        make_dir(sub_model_path)
                    else:
                        sub_model_path = f"{MODEL_PATH}/{model_name}/no_normalization/site_{s}"
                        make_dir(sub_model_path)