from datetime import datetime import pandas as pd import numpy as np import matplotlib from lib.evaluation import mape matplotlib.use('Agg') import matplotlib.pyplot as plt from definitions import ROOT_DIR from global_sources.file_operation import read_feature_full, union_file df_feature_full = read_feature_full() df_predict = union_file('%s/%s' % (ROOT_DIR, 'sources/output/predict/')) df_predict['rundate'] = df_predict['rundate'].astype('datetime64[ns]') df_predict.columns = [ 'rundate', 'storeid', 'goodscode', 'sales', 'pred_sale', 'pred_stock' ] df_predict = pd.merge(df_predict, df_feature_full, how='left', on=['goodscode', 'rundate']) df_predict = df_predict[[ 'goodscode', 'rundate', 'result', 'pred_sale', 'pred_stock' ]] # print(len(df_result['rundate'])) # print(np.count_nonzero(df_result.isnull())) # df_result['mape'] = (df_result['result'] - df_result['pred'])/df_result['result'] # df_result = df_result[['goodscode', 'mape']].abs()
df_feature_full['max_15'] = pd.qcut(df_feature_full['max_15'], min(10, len(df_feature_full['max_15'].unique())), duplicates='drop').cat.codes df_feature_full['sum_15'] = pd.qcut(df_feature_full['sum_15'], min(10, len(df_feature_full['sum_15'].unique())), duplicates='drop').cat.codes df_feature_full['min_7'] = pd.qcut(df_feature_full['min_7'], min(10, len(df_feature_full['min_7'].unique())), duplicates='drop').cat.codes df_feature_full['std_7'] = pd.qcut(df_feature_full['std_7'], min(10, len(df_feature_full['std_7'].unique())), duplicates='drop').cat.codes df_feature_full['var_7'] = pd.qcut(df_feature_full['var_7'], min(10, len(df_feature_full['var_7'].unique())), duplicates='drop').cat.codes df_feature_full['max_7'] = pd.qcut(df_feature_full['max_7'], min(10, len(df_feature_full['max_7'].unique())), duplicates='drop').cat.codes df_feature_full['sum_7'] = pd.qcut(df_feature_full['sum_7'], min(10, len(df_feature_full['sum_7'].unique())), duplicates='drop').cat.codes df_feature_full['min_3'] = pd.qcut(df_feature_full['min_3'], min(10, len(df_feature_full['min_3'].unique())), duplicates='drop').cat.codes df_feature_full['std_3'] = pd.qcut(df_feature_full['std_3'], min(10, len(df_feature_full['std_3'].unique())), duplicates='drop').cat.codes df_feature_full['var_3'] = pd.qcut(df_feature_full['var_3'], min(10, len(df_feature_full['var_3'].unique())), duplicates='drop').cat.codes df_feature_full['max_3'] = pd.qcut(df_feature_full['max_3'], min(10, len(df_feature_full['max_3'].unique())), duplicates='drop').cat.codes df_feature_full['sum_3'] = pd.qcut(df_feature_full['sum_3'], min(10, len(df_feature_full['sum_3'].unique())), duplicates='drop').cat.codes # print(df_feature_full.head) return df_feature_full if __name__ == '__main__': df_feature_full = file_operation.read_feature_full() df_feature_full = binner_engine(df_feature_full) file_operation.write_feature_full(df_feature_full)
s1_result = pd.Series([]) s2_result = pd.Series([]) s3_result = pd.Series([]) for goodsn in goodsn_list: for store_id in store_ids: df_goods = df_feature_full[ (df_feature_full['storeid'] == store_id) & (df_feature_full['goodscode'] == goodsn)] if len(df_goods.index) < 2 * 7: continue y = [v for i, v in df_goods['saleqty'].items()] s1 = holt_winters_first_order_ewma(y, 0.3) s2 = holt_winters_second_order_ewma(y, 0.3, 0.3) s3 = triple_exponential_smoothing(y, 7, 0.3, 0.3, 0.1) s1_result = pd.concat( [s1_result, pd.Series(s1, index=df_goods.index)]) s2_result = pd.concat( [s2_result, pd.Series(s2, index=df_goods.index)]) s3_result = pd.concat( [s3_result, pd.Series(s3, index=df_goods.index)]) df_feature_full['smoothing1'] = s1_result df_feature_full['smoothing2'] = s2_result df_feature_full['smoothing3'] = s3_result return df_feature_full if __name__ == '__main__': df_sale_feature = file_operation.read_feature_full() df_sale_feature = smoothing(df_sale_feature) file_operation.write_feature_full(df_sale_feature)