def function1(df, kunag, matnr): df_series = individual_series(df, kunag, matnr) train, validation, test = splitter_2(df_series) seas_pres = ljung_box_test(df, matnr) if not seas_pres: return None seasonality_product = product_seasonal_comp_7_point(df, matnr) score1, output1, output1_val, order1, mse_val_1 = arima_seasonality_added( train, validation, test, seasonality_product) score2, output2, output2_val, order2, mse_val_2 = arima( train, validation, test) input_df1 = output1.set_index("dt_week") input_df2 = output2.set_index("dt_week") output1_val = output1_val.set_index("dt_week") output2_val = output2_val.set_index("dt_week") plt.figure(figsize=(16, 8)) test_norm = pd.concat([output2_val, input_df2.iloc[-16:]]) test_seas = pd.concat([output1_val, input_df1.iloc[-16:]]) plt.plot(test_seas["prediction"], marker=".", color='red', label='test_seasonality') plt.plot(test_norm["prediction"], marker=".", color='blue', label='test_normal') plt.plot(output1_val["prediction"], marker=".", color='brown', label='val_seasonality') plt.plot(output2_val["prediction"], marker=".", label='val_normal') plt.plot(input_df1["quantity"], marker=".", color='orange', label='actual') plt.legend(('test_seasonality', 'test_normal', 'val_seasonality', 'val_normal', 'actual'), loc='upper left') plt.xlabel('time in weeks') plt.ylabel('quantities') if score2 < score1: plt.title('normal' + '\n' 'seasonality=' + str(order1) + ' test_mse_seasonality=' + str(round(score1, 3)) + ' val_mse_seasonality=' + str(round(mse_val_1, 3)) + ' normal=' + str(order2) + ' test_mse_normal=' + str(round(score2, 3)) + ' val_mse_normal=' + str(round(mse_val_2, 3))) else: plt.title('seasonality' + '\n' 'seasonality=' + str(order1) + ' test_mse_seasonality=' + str(round(score1, 3)) + ' val_mse_seasonality=' + str(round(mse_val_1, 3)) + ' normal=' + str(order2) + ' test_mse_normal=' + str(round(score2, 3)) + ' val_mse_normal=' + str(round(mse_val_2, 3))) # plt.text("05-04-2018", 0.1, 'seasonality = ' + str(order1) + '\n' # 'test_mse_seasonality = ' + str(score1) + '\n' # 'validation_mse_seasonality = ' + str(mse_val_1) + '\n' # 'normal = ' + str(order2) + '\n' # 'test_mse_normal = ' + str(score2) + '\n' # 'validation_mse_normal = ' + str(mse_val_2) + '\n') plt.savefig( "/home/aman/PycharmProjects/seasonality_hypothesis/stl_plots_seasonality_108_7_point_thresh_0.01/" + str(kunag) + "_" + str(matnr) + ".png")
def function1(df, kunag, matnr): df_series = individual_series(df, kunag, matnr) train, validation, test = splitter_2(df_series) seas_pres = ljung_box_test(df, matnr) if not seas_pres: return None seasonality_product = product_seasonal_comp_5_point(df, matnr) score1, output1, output1_val, order1, mse_val_1 = arima_seasonality_added( train, validation, test, seasonality_product) score2, output2, output2_val, order2, mse_val_2 = arima( train, validation, test) input_df1 = output1.set_index("dt_week") input_df2 = output2.set_index("dt_week") output1_val = output1_val.set_index("dt_week") output2_val = output2_val.set_index("dt_week") plt.figure(figsize=(16, 8)) plt.plot(input_df1["prediction"], marker=".", color='red', label='arima_with_seasonality') plt.plot(input_df2["prediction"], marker=".", color='blue', label='arima') plt.plot(output1_val["prediction"], marker=".", color='brown', label='arima_seasonality_validation') plt.plot(output2_val["prediction"], marker=".", label='arima_validation') plt.plot(input_df1["quantity"], marker=".", color='orange', label='actual') plt.xlabel('time in weeks') plt.ylabel('quantities') if score2 < score1: plt.title('normal') else: plt.title('seasonality') plt.text( "05-04-2018", 0.1, 'seasonality = ' + str(order1) + '\n' 'test_mse_seasonality = ' + str(score1) + '\n' 'validation_mse_seasonality = ' + str(mse_val_1) + '\n' 'normal = ' + str(order2) + '\n' 'test_mse_normal = ' + str(score2) + '\n' 'validation_mse_normal = ' + str(mse_val_2) + '\n') plt.savefig( "/home/aman/PycharmProjects/seasonality_hypothesis/plots_seasonality_108/" + str(kunag) + "_" + str(matnr) + ".png")
# bucket_1_sample = frequency_cleaveland[(frequency_cleaveland["frequency"] > 26) & (frequency_cleaveland["days"] > 730)].sample(400, random_state=1) # bucket_1_sample.to_csv( # "/home/aman/PycharmProjects/seasonality_hypothesis/data_generated/bucket_1_sample.csv", index=False) sample = frequency_cleaveland[(frequency_cleaveland["frequency"] >= 26) & (frequency_cleaveland["days"] > 92) & (frequency_cleaveland["days"] <= 365 + 183)] sample = sample["matnr"].unique() # sample.to_csv(folder_address+"/sample.csv") # sample = pd.read_csv(folder_address+"/sample.csv") report = pd.DataFrame() count = 0 # start = time.time() for matnr in sample: if count<321: count +=2 continue test1 = ljung_box_test(df, matnr) test_flag = test1[0] test_p_value = test1[1] aggregated_data = test1[4] plt.figure(figsize=(16, 8)) plt.plot(aggregated_data.set_index("dt_week")["quantity"], marker=".", markerfacecolor="red", label="aggregated_data") plt.xlabel("Date", fontsize=14) plt.ylabel("Quantity", fontsize=14) plt.legend() plt.savefig(folder_address + "/" + str(test_flag) + "_" + str(test_p_value) + str(matnr) + ".png") count += 1 print("count;", count) report = report.append([[matnr, test_flag, test_p_value]]) report.to_csv(file_address, index=False) count += 1 pass
from seasonality_detection import ljung_box_test_without_aggregation from dtw_check import dtw_check from hypothesis_2 import arima_seasonality_added_rolling from hypothesis_2 import arima_rolling from selection import individual_series from stl_decompose import product_seasonal_comp_7_point from sklearn.metrics import mean_squared_error import matplotlib.pyplot as plt df = load_data() result = pd.DataFrame() sample = pd.read_csv("/home/aman/PycharmProjects/seasonality_hypothesis/data_generated/bucket_1_sample.csv") count = 1 for matnr in sample["matnr"].unique(): seas_pres_1 = ljung_box_test(df, matnr) # seas_pres_2 = ljung_box_test_without_aggregation(df, matnr) plt.figure(figsize=(16, 8)) plt.plot(seas_pres_1[3], marker=".", label="aggregated") plt.legend() plt.title("ljung p_value: " + str(seas_pres_1[1]) + " dickey p_value: " + str(seas_pres_1[2])) plt.savefig("/home/aman/PycharmProjects/seasonality_hypothesis/seas_detection_compare_2018_02_08_ver_1/"+str(matnr)+".png") result = result.append([[matnr, seas_pres_1[1][0], seas_pres_1[2]]]) print(count) count += 1 result.columns = ["matnr", "ljung_p__value", "dickey_p__value"] result.to_csv("/home/aman/PycharmProjects/seasonality_hypothesis/seas_detection_compare_2018_02_08_ver_1/result.csv", index=False) # if not seas_pres: # continue # else: # dtw_flag = dtw_check(df, kunag, matnr)
from sklearn.metrics import mean_squared_error from sklearn.metrics import mean_absolute_error import matplotlib.pyplot as plt import time df = load_data() sample = pd.read_csv( "/home/aman/PycharmProjects/seasonality_hypothesis/data_generated/bucket_12_20_400_sample.csv" ) report = pd.DataFrame() count = 0 for index, row in sample.iterrows(): start = time.time() kunag = int(row["kunag"]) matnr = int(row["matnr"]) seas_pres = ljung_box_test(df, int(row["matnr"])) if not seas_pres[0]: count += 1 print(count) continue else: dtw_flag = dtw_check(df, kunag, matnr) seasonality_product = product_seasonal_comp_7_point(df, matnr).iloc[-55:-3] df_series = individual_series(df, kunag, matnr) result_1 = arima_seasonality_added_rolling_011(df_series, seasonality_product) break result_1 = result_1.set_index("dt_week") result_2 = arima_rolling_011(df_series) result_2 = result_2.set_index("dt_week")