예제 #1
0
import pandas as pd
import util
from statsmodels import api as sm
from matplotlib import pyplot as plt
import datetime as dt

pid = 5
test_week_num = 6
alpha = 0.7

start_date = dt.datetime.strptime("2017-06-04", "%Y-%m-%d")
end_date = dt.datetime.strptime("2017-12-31", "%Y-%m-%d")
df_date = util.generate_time_index_df(start_date, end_date)

f = pd.read_csv("input/" + str(pid) + ".csv")
df_day = util.handle_original_df_day_sn(f, df_date)
df_week = util.handle_original_df_week_sn(df_day)

df = util.add_special_date(df_day, ahead_effect=7, behind_effect=5)

data = df[[
    "log_sale_cnt",
    "log_average_price",
    # "sale_cnt",
    # "average_price",
    "special_date",
    "ahead_special_date",
    "behind_special_date"
]].values

sample_num = len(data)
예제 #2
0
        str(i) for i in f[f.l3_gds_group_cd == category_id]["product_id"]
    ]

    # for test
    # category = [str(i) for i in [85, 83, 84, 285]]
    # category = [str(i) for i in [8, 9, 10, 11, 252]]

    # ============================== processing ==============================
    # time index
    start_date = dt.datetime.strptime("2017-06-04", "%Y-%m-%d")
    end_date = dt.datetime.strptime("2017-12-31", "%Y-%m-%d")
    df_date = util.generate_time_index_df(start_date, end_date)

    df_all = [pd.read_csv("input/" + x + ".csv") for x in category]

    df_day_all = [util.handle_original_df_day_sn(x, df_date) for x in df_all]
    df_week_all = [util.handle_original_df_week_sn(x) for x in df_day_all]

    # =============================== plot ===================================
    num_products = len(category)
    line_color = ["b", "g", "r", "y", "c", "k", "m"]
    line_marker = [".", "*", "o", "x", "^", "v", "<", ">"]
    line_kind = ["-", ":"]
    line = [[
        x[0] + x[1] + k for x in itertools.product(line_marker, line_color)
    ] for k in line_kind]
    fig = plt.figure()
    ax1 = fig.add_subplot(211)
    line1 = []
    for i in range(num_products):
        line1.append(