示例#1
0
def dow_analyze(test_df, model, col, period_from, period_to):
    validate_data = test_df[(test_df['visit_date'] >= period_from)
                            & (test_df["visit_date"] <= period_to)]
    validate_data = validate_data[col]

    score_list = []
    for dow_num in range(7):
        print("-" * 40)
        print("dow= ", dow_num)
        dowed_data = validate_data[validate_data["dow"] == dow_num]
        print(dowed_data["visitors"].describe())

        x_valid = dowed_data.drop('visitors', axis=1)
        y_valid = model.predict(x_valid)
        print(pd.DataFrame(y_valid).describe())
        validation_score = custom_metrics.rmse(dowed_data["visitors"], y_valid)
        score_list.append(validation_score)
    print(score_list)
def split_train_skt(df, col, period_list, model):
    models = []
    score = 0.0
    for period in period_list:
        train = cut_input(df, period[0], period[1])
        test = cut_input(df,  period[2], period[3])

        model.fit(train[col], train['visitors'])
        y_pred = model.predict(test[col])
        this_score = custom_metrics.rmse(test["visitors"], y_pred)
        print(this_score)
        score = score + this_score
        models.append(model)
    score = score / len(period_list)
    print("Average score:")
    print(score)

    return models
示例#3
0
def validate(test_df,
             model,
             col,
             period_from,
             period_to,
             verbose=False,
             save_model=False,
             save_name=None):
    validate_data = test_df[(test_df['visit_date'] >= period_from)
                            & (test_df["visit_date"] <= period_to)]
    if verbose:
        print("-" * 40)
        print(period_from, " to ", period_to)
        print(validate_data["visitors"].describe())
    x_valid = validate_data[col].drop('visitors', axis=1)
    y_valid = model.predict(x_valid)
    if verbose:
        print(pd.DataFrame(y_valid).describe())
    validation_score = custom_metrics.rmse(validate_data["visitors"], y_valid)
    print(validation_score)
    if save_model:
        save_prediction(validate_data, y_valid, save_name)
示例#4
0
def store_analyze(test_df, model, col, period_from, period_to):
    validate_data = test_df[(test_df['visit_date'] >= period_from)
                            & (test_df["visit_date"] <= period_to)]
    validate_data = validate_data[col]
    x_valid = validate_data.drop("visitors", axis=1)
    y_valid = model.predict(x_valid)
    validate_data["predicted"] = y_valid
    # print(validate_data.head())

    ret_list = []
    for air_store_num in range(812):
        single_store = validate_data[validate_data["air_store_num"] ==
                                     air_store_num]
        if single_store.empty:
            print("whoops:", air_store_num)
            continue
        validate_score = custom_metrics.rmse(single_store["visitors"],
                                             single_store["predicted"])
        ret_list.append([air_store_num, validate_score])

    ret_df = pd.DataFrame(ret_list, columns=["air_store_num", "score"])
    return ret_df
def validate(df,
             actual_col_name,
             validate_col_name,
             period_list=None,
             verbose=False):
    if period_list is None:
        period_list = pocket_periods.get_six_week_period_list()

    score_list = []
    for period_from, period_to in period_list:
        validate_data = df[(df['visit_date'] >= period_from)
                           & (df["visit_date"] <= period_to)]
        validation_score = custom_metrics.rmse(
            validate_data[actual_col_name], validate_data[validate_col_name])
        score_list.append(validation_score)
        if verbose:
            print("score from ", period_from, " to ", period_to, " is:")
            print(validation_score)

    print("average score for", validate_col_name, " is:")
    average_score = sum(score_list) / float(len(score_list))
    print(average_score)
    print("-" * 40)
示例#6
0
w2_good = get_joined_model(good_filenames)
short_filenames = get_filenames(prefix_w2, "_short.csv")
w2_short = get_joined_model(short_filenames)
out_filenames = get_filenames(prefix_w2, "_out.csv")
w2_out = get_joined_model(out_filenames)
nores_filenames = get_filenames(prefix_w2, "_no_res.csv")
w2_nores = get_joined_model(nores_filenames)
print("Loaded data")

# print(w2_normal["prediction"].count())
# print(w2_good["prediction"].count())
# print(w2_short["prediction"].count())
# print(w2_out["prediction"].count())
# print(w2_nores["prediction"].count())
print("-" * 40)
normal_score = custom_metrics.rmse(w2_normal["actual_log"],
                                   w2_normal["prediction"])
short_score = custom_metrics.rmse(w2_short["actual_log"],
                                  w2_short["prediction"])
out_score = custom_metrics.rmse(w2_out["actual_log"], w2_out["prediction"])
nores_score = custom_metrics.rmse(w2_nores["actual_log"],
                                  w2_nores["prediction"])
print("normal=", normal_score)
print("short=", short_score)
print("out=", out_score)
print("nores=", nores_score)
print("-" * 40)

df = pd.merge(w2_normal,
              w2_short,
              how="left",
              on=["air_store_id", "visit_date"],
示例#7
0
w2_short = pd.read_csv('../output/p1_w2_0416_0422_short_train.csv')
print("Loaded data")

print(all_model["actual"].count())
print(w2_model["actual"].count())
print(w2_good["actual"].count())
print(w2_short["actual"].count())
print("-" * 40)

df = pd.merge(all_model,
              w2_model,
              on=["air_store_id", "visit_date"],
              suffixes=["_all", "_w2"])
# print(df.describe())

all_score = custom_metrics.rmse(df["actual_log_all"], df["prediction_all"])
w2_score = custom_metrics.rmse(df["actual_log_all"], df["prediction_w2"])
print("all=", all_score)
print("w2=", w2_score)

df["averaged"] = df["prediction_all"] * 0.1 + df["prediction_w2"] * 0.9
avg_score = custom_metrics.rmse(df["actual_log_all"], df["averaged"])
print("all+w2=", avg_score)

df = pd.merge(df,
              w2_good,
              on=["air_store_id", "visit_date"],
              how="left",
              suffixes=["", "_good"])
df["with_good"] = np.where(df["prediction"].isnull(), df["prediction_w2"],
                           df["prediction"])