def main(argv): # ########################### # parameters # ########################### time_scale = 'W' # forecasting time scale reset for daily ticket data init_date = pd.to_datetime('2016-01-01') upr = 12 lwr = 8 evals = 50 by_lang = False # ########################### # ########################### # ########################### print(argv) if len(argv[1:]) == 1: ts_name = argv[-1] cutoff_date = pd.to_datetime('today') to_table = False elif len(argv[1:]) == 2: ts_name, cutoff_date = argv[1:] try: cutoff_date = pd.to_datetime(cutoff_date) to_table = False except ValueError: s_ut.my_print( 'ERROR: invalid arguments (ts_name, cutoff_date, to_table): ' + str(argv)) sys.exit() elif len(argv[1:]) == 3: ts_name, cutoff_date, to_table = argv[1:] try: cutoff_date = pd.to_datetime(cutoff_date) to_table = bool(int(to_table)) except ValueError: s_ut.my_print( 'ERROR: invalid arguments (ts_name, cutoff_date, to_table): ' + str(argv)) sys.exit() else: s_ut.my_print( 'ERROR: invalid arguments (ts_name, cutoff_date, to_table): ' + str(argv)) sys.exit() ts_cfg, cols = dp.ts_setup(ts_name, cutoff_date, init_date, time_scale) # actuals actuals_df = dp.ts_actuals(ts_name, ts_cfg, cols) actuals_df.rename(columns={ts_cfg['ycol']: 'y'}, inplace=True) # forecasts f_df = fp.get_lang_fcast(ts_cfg, cutoff_date) fcast_date = cutoff_date + pd.to_timedelta(upr, unit=time_scale) perf_list = list() for xens in [ 'XGBRegressor', 'AdaBoostRegressor', 'BaggingRegressor', 'GradientBoostingRegressor', 'RandomForestRegressor', 'ExtraTreesRegressor', 'lasso' ]: fcast_df = ep.make_fcast(ts_name, f_df, actuals_df, cutoff_date, fcast_date, xens, evals, by_lang, (lwr, upr), lwr=lwr, upr=upr) perf_df = perf.fcast_perf(fcast_df, actuals_df, cutoff_date, lwr, upr, time_scale, xens) if perf_df is None: s_ut.my_print('pid: ' + str(os.getpid()) + ' WARNING: forecast performance detail failed for ' + ts_name + ' ,cutoff date ' + str(cutoff_date.date()) + ' and ensemble: ' + str(xens)) else: perf_df['ts_name'] = ts_name perf_list.append(perf_df) if len(perf_list) > 0: pf = pd.concat(perf_list, axis=0) p_ut.save_df( pf, '~/my_tmp/perf/fcast_perf_detail_' + ts_name + '_' + str(cutoff_date.date())) if to_table is True: tab_cols = ['language', 'y', 'yhat', 'err', 'lwr', 'upr', 'ens'] partition = {'cutoff': str(cutoff_date.date()), 'ts_name': ts_name} ret = hql.to_tble(pf, tab_cols, 'sup.cx_language_forecast_performance_detail', partition) if ret != 0: s_ut.my_print( 'pid: ' + str(os.getpid()) + ' ERROR: forecast performance detail failed for ' + ts_name + ' and cutoff date ' + str(cutoff_date.date())) sys.exit() print('DONE') else: s_ut.my_print( 'pid: ' + str(os.getpid()) + ' ERROR: no data for forecast performance detail failed for ' + ts_name + ' and cutoff date ' + str(cutoff_date.date())) sys.exit()
def main(argv): # ########################### # parameters # ########################### time_scale = 'W' # forecasting time scale reset for daily ticket data init_date = pd.to_datetime('2016-01-01') froot = '~/my_tmp/fbp/' # ########################### # ########################### print(argv) if len(argv) == 2: ts_name = argv[-1] to_table = False run_date = pd.to_datetime('today') elif len(argv) == 3: ts_name, run_date = argv[-2:] try: run_date = pd.to_datetime(run_date) to_table = False except ValueError: s_ut.my_print( 'ERROR: invalid arguments (ts_name, run_date, to_table): ' + str(argv)) sys.exit() elif len(argv) == 4: ts_name, run_date, to_table = argv[1:] try: run_date = pd.to_datetime(run_date) to_table = bool(int(to_table)) except ValueError: s_ut.my_print( 'ERROR: invalid arguments (ts_name, run_date, to_table): ' + str(argv)) sys.exit() else: s_ut.my_print( 'ERROR: invalid arguments (ts_name, run_date, to_table): ' + str(argv)) sys.exit() # data cfg cutoff_date = tm_ut.get_last_sat( run_date ) # set to last saturday before run_date or the run_date if a saturday ts_cfg, cols = dp.ts_setup(ts_name, cutoff_date, init_date, time_scale) FCAST_DICT['outlier_coef'] = ts_cfg.get('outlier_coef', [3.0]) fcast_days = ts_cfg.get('fcast_days', None) if fcast_days is None: s_ut.my_print('pid: ' + str(os.getpid()) + ' ERROR" fcast_days must be specified in data_cfg') sys.exit() else: fcast_date = cutoff_date + pd.to_timedelta(fcast_days, unit='D') if time_scale == 'W': fcast_date = fcast_date - pd.to_timedelta( 1 + fcast_date.weekday(), unit='D') # set to week starting Sunday cu = cutoff_date - pd.to_timedelta( 1 + cutoff_date.weekday(), unit='D') # set to week starting Sunday fcast_days = (fcast_date - cu).days # multiple of 7 upr_horizon = int(fcast_days / 7) # in time scale units elif time_scale == 'D': upr_horizon = int(fcast_days) # in time scale units else: s_ut.my_print('pid: ' + str(os.getpid()) + ' invalid time scale: ' + str(time_scale)) sys.exit() s_ut.my_print('pid: ' + str(os.getpid()) + ' ------------------------ start language forecast for ' + str(ts_name) + ' from cutoff date ' + str(cutoff_date.date()) + ' (excluded) to forecast date ' + str(fcast_date.date()) + ' (included) -----------------------') # get actuals actuals_df = dp.ts_actuals( ts_name, ts_cfg, cols) # may have data past cutoff for accuracy checking if actuals_df['ds'].max() < cutoff_date: s_ut.my_print( 'ERROR: no actuals available for forecast from cutoff date: ' + str(cutoff_date.date())) sys.exit() f_actuals_df = actuals_df[actuals_df['ds'] <= cutoff_date].copy( ) # actuals for forecast: only use up to cutoff date # adjust FCAST_DICT if len(FCAST_DICT['do_res']) == 2: # True, False FCAST_DICT['do_res'] = [ True ] # MUST overwrite: the False care is always included and otherwise we double count. if len(ts_cfg.get('regressors', list())) == 0: FCAST_DICT['r_mode'] = [None] reg_dict = dict() else: reg_dict = regs.ens_fcast( ts_name, ts_cfg['regressors'], cutoff_date, time_scale, fcast_days, init_date, f_actuals_df) # stored by cutoff date on last Sat of the month # update init_date init_date = max([f_actuals_df['ds'].min()] + [f['ds'].min() for f in reg_dict.values()]) f_actuals_df = f_actuals_df[f_actuals_df['ds'] >= init_date].copy() reg_dict = { lx: f[f['ds'] >= init_date].copy() for lx, f in reg_dict.items() } ts_cfg['init_date'] = init_date # set the list of fcast cfgs tlist = get_f_cfg(FCAST_DICT, cutoff_date, init_date, time_scale) # list of fcast cfg's fix_pars = [ f_actuals_df, ts_name, reg_dict, fcast_date, cutoff_date, ts_cfg, time_scale, upr_horizon ] arg_list = [ fix_pars + [tlist[ix]] for ix in range(len(tlist)) ] # 2 fcasts are done per input cfg (do_res = true and do_res = false) n_fcfg = 2 * len(arg_list) s_ut.my_print('pid: ' + str(os.getpid()) + ' ++++++++ there are ' + str(n_fcfg) + ' fcast configs per language **********') # ############################################################################### # ############################################################################### # ############################################################################### if is_test: df_list_ = s_ut.do_mp(fcast_lang, arg_list, is_mp=False, cpus=None, do_sigkill=True) else: df_list_ = s_ut.do_mp(fcast_lang, arg_list, is_mp=True, cpus=None, do_sigkill=True) # ############################################################################### # ############################################################################### # ############################################################################### # join all the fcasted data into a flat list df_list = [f for f in df_list_ if f is not None] if len(df_list) > 0: ylist, alist = list(), list() for fl in df_list: if fl is not None: fl = set_cfg(fl.copy(), CFG_COLS) ylist.append(fl[[ 'ds', 'language', 'yhat', 'ts_name', 'cutoff', 'dim_cfg', 'fcast_date' ]].copy()) alist.append(fl) # save basic fcast data fcast_df = pd.concat( ylist, axis=0) # now all the list elements have the same columns fcast_df.reset_index(inplace=True, drop=True) ok_cfg = fcast_df['dim_cfg'].unique() s_ut.my_print('pid: ' + str(os.getpid()) + str(len(ok_cfg)) + ' forecasts cfgs available for ' + str(ts_name) + ' from cutoff date ' + str(cutoff_date.date()) + ' (excluded) to forecast date ' + str(fcast_date.date()) + ' (included) -----------------------') # fcast_df = fcast_df[fcast_df['dim_cfg'].isin(ok_cfg)].copy() fname = froot + 'lang_fcast_' p_ut.save_df(fcast_df, fname + ts_name + '_' + str(cutoff_date.date())) if to_table is True: tab_cols = ['ds', 'language', 'dim_cfg', 'yhat'] partition = { 'cutoff': str(cutoff_date.date()), 'ts_name': ts_cfg['ts_key'] } ret = hql.to_tble(fcast_df, tab_cols, 'sup.cx_language_forecast', partition) if ret != 0: s_ut.my_print('pid: ' + str(os.getpid()) + ' ERROR: no forecasts loaded to table for ' + str(ts_cfg['ts_key']) + ' and cutoff date ' + str(cutoff_date.date())) sys.exit() # save all fcast data (y_upr, y_lwr, ...) all_df = pd.concat( alist, axis=0) # now all the list elements have the same columns all_cols = list(set([c for c in all_df.columns if c not in CFG_COLS])) all_df.reset_index(inplace=True, drop=True) all_df = all_df[all_cols].copy() all_df = all_df[all_df['dim_cfg'].isin(ok_cfg)].copy() fname = froot + 'fcast_all_' p_ut.save_df(all_df, fname + ts_name + '_' + str(cutoff_date.date())) if to_table is True: all_df.drop(['cutoff', 'ts_name'], axis=1, inplace=True) mf = pd.melt(all_df, id_vars=['ds', 'language', 'dim_cfg'], var_name='key', value_name='value') mf.dropna(subset=['value'], inplace=True) mf = mf[mf['value'] != 0.0].copy() partition = { 'cutoff': str(cutoff_date.date()), 'ts_name': ts_cfg['ts_key'] } ret = hql.to_tble(mf, list(mf.columns), 'sup.cx_language_forecast_detail', partition) if ret != 0: s_ut.my_print('pid: ' + str(os.getpid()) + ' ERROR: no forecasts loaded to table for ' + str(ts_cfg['ts_key']) + ' and cutoff date ' + str(cutoff_date.date())) sys.exit() print('DONE') else: s_ut.my_print('pid: ' + str(os.getpid()) + ' ERROR: no forecasts available for ' + str(ts_cfg['ts_key']) + ' from cutoff date ' + str(cutoff_date.date()) + ' (excluded) to forecast date ' + str(fcast_date.date()) + ' (included) -----------------------')
def main(argv): print(argv) if len(argv) == 2: ts_name = argv[-1] to_table = False elif len(argv) == 3: ts_name, to_table = argv[1:] try: to_table = bool(int(to_table)) except ValueError: s_ut.my_print('ERROR: invalid arguments (ts_name, to_table): ' + str(argv)) sys.exit() else: s_ut.my_print('ERROR: invalid arguments (ts_name, to_table): ' + str(argv)) sys.exit() if any([bu in ts_name for bu in ['Homes', 'Experiences', 'China']]): s_ut.my_print('ERROR: time series cannot be a BU time series: ' + str(ts_name)) sys.exit() data_cfg = os.path.expanduser( '~/my_repos/capacity_planning/forecast/config/ratio_forecast_cfg.json') if os.path.isfile(data_cfg): with open(data_cfg, 'r') as fptr: rf_dict = json.load(fptr) else: s_ut.my_print('ERROR: ' + data_cfg + ' file not found') sys.exit() d_date = rf_dict.get('data_date', None) if d_date is None: s_ut.my_print('ERROR: data_date cannot be null') sys.exit() data_date = pd.to_datetime( d_date) # this is the cutoff date we get data from tables a_date = rf_dict.get( 'adjust_date', None) # if None, nothing to adjust and adj_date = data_date adjust_date = data_date if a_date is None else pd.to_datetime( a_date) # this is the actual cutoff date window = rf_dict.get('ratio_windows', dict()) if len(window) == 0: # not set window = { 'default': { 'start': adjust_date - pd.to_timedelta(6, unit='W'), 'end': adjust_date } } else: for k, v in window.items(): for kk, vv in v.items(): v[kk] = pd.to_datetime(vv) s_ut.my_print('************************* read table date: ' + str(data_date.date()) + ' ********************************************') s_ut.my_print('************************* write table date: ' + str(adjust_date.date()) + ' *******************************************') # ############################### # ############################### time_scale = 'W' init_date = pd.to_datetime('2016-01-01') # ############################### # ############################### df_tilde, bottom_ts = hts.main( ts_name, data_date, do_cov=True ) # coherent forecasts at language level + language level adjustments f_df = adj.main( df_tilde, 'language', bottom_ts, ts_name, adjust_date ) # must adjust at language level before service level ratios fr_list, fr_cols = list(), list() a_list = list() ts_list = bottom_ts # ratios only on bottom_ts then aggregate to top TS for ts in ts_list: s_ut.my_print('============= starting ' + str(ts)) ts_cfg, _ = dp.ts_setup(ts, data_date, init_date, time_scale) a_df = dp.ts_actuals( ts, ts_cfg, ['language', 'business_unit', 'channel', 'service_tier'], drop_cols=False) b_df = filter_actuals(a_df, window) fr = ts_ratio(ts, b_df.copy(), f_df[['ds', 'language', ts + '_tilde']].copy(), window, data_date) fr_list.append(fr) fr_cols.append(fr.columns) a_list.append(b_df) check_ratios(ts, b_df, fr, True, 'service_tier') check_ratios(ts, b_df, fr, False, 'service_tier') check_ratios(ts, b_df, fr, True, 'channel') check_ratios(ts, b_df, fr, False, 'channel') # must adjust together to ensure coherence fr = reduce( lambda x, y: x.merge( y, on=['ds', 'language', 'channel', 'service_tier'], how='outer'), fr_list) if len(fr_list) > 0 else None fr.fillna(0, inplace=True) for k_col in ['channel', 'service_tier']: # language adj must be done before ratios fr = adj.main(fr, k_col, bottom_ts, ts_name, adjust_date) # save data f_list = list() for idx in range(len(ts_list)): ts = ts_list[idx] fx = fr[fr_cols[idx]].copy() fx.rename(columns={ts + '_tilde': 'yhat'}, inplace=True) fx['yhat'] = np.round( fx['yhat'].values, 0) # this makes input totals and output totals to be a bit off fx = fx[fx['yhat'] > 0] fx['ts_name'] = ts fx['cutoff'] = adjust_date f_list.append(fx) # get the aggregate series fall = pd.concat(f_list, axis=0) gall = fall.groupby(['ds', 'language', 'channel', 'service_tier']).sum(numeric_only=True).reset_index() gall['cutoff'] = adjust_date gall['ts_name'] = ts_name # align cols (ap gets confused otherwise?) tcols = [ 'ds', 'language', 'channel', 'service_tier', 'yhat', 'ts_name', 'cutoff' ] fall = fall[tcols].copy() gall = gall[tcols].copy() # final DF to save fout = pd.concat([gall, fall], axis=0) p_ut.save_df( fout, '~/my_tmp/fbp/ratios_fcast_' + ts_name + '_' + str(adjust_date.date())) ts_cfg, _ = dp.ts_setup('ticket_count', data_date, init_date, time_scale) a_df = dp.ts_actuals( 'ticket_count', ts_cfg, ['language', 'business_unit', 'channel', 'service_tier'], drop_cols=False) p_ut.save_df(a_df, '~/my_tmp/a_df_ticket_count_' + str(adjust_date.date())) # data summary s_ut.my_print('**************** Data Summary *******************') for c in ['language', 'channel', 'service_tier']: s_ut.my_print('unique ' + c + ': ' + str(fout[c].unique())) # save to DB if to_table is True: tcols.remove('cutoff') tcols.remove('ts_name') tcols.insert(1, 'ds_week_starting') tcols.insert(2, 'fcst_date_inv_ending') # ds_week_ending fout = fout[fout['ds'] > adjust_date.date()].copy( ) # only save forecasted values fout['ds_week_starting'] = fout['ds'] fout['fcst_date_inv_ending'] = fout['ds'] + pd.to_timedelta(6, unit='D') for ts in fout['ts_name'].unique(): partition = {'cutoff': str(adjust_date.date()), 'ts_name': ts} tb_df = fout[fout['ts_name'] == ts].copy() ret = hql.to_tble(tb_df, tcols, 'sup.cx_weekly_forecasts', partition) if ret != 0: s_ut.my_print('pid: ' + str(os.getpid()) + ' ERROR: no forecasts loaded to table for ' + ts_name + ' and cutoff date ' + str(adjust_date.date())) sys.exit() else: s_ut.my_print( '>>>>>>>>>>>>>>> SUCCESS: data saved to table <<<<<<<<<<<<<<<<<<<' ) else: s_ut.my_print( '>>>>>>>>>>>>>>> WARNING: no data saved to table <<<<<<<<<<<<<<<<<<<' )
def main(argv): # ########################### # parameters # ########################### time_scale = 'W' # forecasting time scale reset for daily ticket data init_date = pd.to_datetime('2016-01-01') froot = os.path.expanduser('~/my_tmp/fbp/') evals = 250 by_lang = False lwr, upr = 9, 12 # ########################### # ########################### print(argv) if len(argv) == 3: ts_name, cutoff_date = argv[-2:] try: cutoff_date = pd.to_datetime(cutoff_date) to_table = False except ValueError: s_ut.my_print('ERROR: invalid arguments (ts_name, cutoff_date, to_table): ' + str(argv)) sys.exit() elif len(argv) == 4: ts_name, cutoff_date, to_table = argv[1:] try: cutoff_date = pd.to_datetime(cutoff_date) to_table = bool(int(to_table)) except ValueError: s_ut.my_print('ERROR: invalid arguments (ts_name, cutoff_date, to_table): ' + str(argv)) sys.exit() else: s_ut.my_print('ERROR: invalid arguments (ts_name, cutoff_date, to_table): ' + str(argv)) sys.exit() # data cfg ts_cfg, cols = dp.ts_setup(ts_name, cutoff_date, init_date, time_scale) fcast_days = ts_cfg.get('fcast_days', None) if fcast_days is None: s_ut.my_print('pid: ' + str(os.getpid()) + ' ERROR" fcast_days must be specified in data_cfg') sys.exit() else: fcast_date = cutoff_date + pd.to_timedelta(fcast_days, unit='D') if time_scale == 'W' and fcast_date.weekday() != 6: # set fcast date to week starting Sunday unless it is a Sunday already fcast_date = fcast_date - pd.to_timedelta(1 + fcast_date.weekday(), unit='D') s_ut.my_print('pid: ' + str(os.getpid()) + ' ------------------------ start ens forecast for ' + str(ts_name) + ' from cutoff date ' + str(cutoff_date.date()) + ' (excluded) to forecast date ' + str(fcast_date.date()) + ' (included) -----------------------') a_df = dp.ts_actuals(ts_name, ts_cfg, cols) # get actuals a_df.rename(columns={ts_cfg['ycol']: 'y'}, inplace=True) fcast_df = fp.get_lang_fcast(ts_cfg, cutoff_date) # get fcasts if fcast_df is not None and a_df is not None: s_ut.my_print(ts_name + ': combining ' + str(fcast_df['dim_cfg'].nunique()) + ' forecast configs') xens_ = get_ens(ts_name, cutoff_date) # ts_cfg['ens'].get(str(cutoff_date.month), ens_dict['default']) s_ut.my_print('aggregation for ' + ts_name + ' done with ' + xens_) ts_fcast = ep.make_fcast(ts_name, fcast_df, a_df, cutoff_date, fcast_date, xens_, evals, by_lang, (lwr, upr), lwr=lwr, upr=upr) ts_fcast['fcast_date'] = fcast_date p_ut.save_df(ts_fcast, froot + 'ens_fcast_' + ts_name + '_' + str(cutoff_date.date())) if to_table is True: cols = ['ds', 'language', 'ens', 'yhat'] partition = {'cutoff': str(cutoff_date.date()), 'ts_name': ts_cfg['ts_key']} ret = hql.to_tble(ts_fcast, cols, 'sup.cx_ens_forecast', partition) if ret != 0: s_ut.my_print('ERROR: DB write for ' + ts_name + ' ens forecast ' + ' at ' + str(cutoff_date.date()) + ' failed') sys.exit() print('DONE') else: s_ut.my_print('ERROR: no actuals or no data for errors of ' + ts_name + ' at ' + str(cutoff_date.date()))
def main(argv): # ########################### # parameters # ########################### time_scale = 'W' # forecasting time scale reset for daily ticket data init_date = pd.to_datetime('2016-01-01') upr = 12 lwr = 9 # ########################### # ########################### # ########################### print(argv) if len(argv[1:]) == 1: ts_name = argv[-1] cutoff_date = pd.to_datetime('today') to_table = False n_features = 25 elif len(argv[1:]) == 2: ts_name, cutoff_date = argv[1:] try: cutoff_date = pd.to_datetime(cutoff_date) to_table = False n_features = 25 except ValueError: s_ut.my_print( 'ERROR: invalid arguments (ts_name, cutoff_date, to_table): ' + str(argv)) sys.exit() elif len(argv[1:]) == 3: ts_name, cutoff_date, n_features = argv[1:] try: cutoff_date = pd.to_datetime(cutoff_date) to_table = False n_features = int(n_features) except ValueError: s_ut.my_print( 'ERROR: invalid arguments (ts_name, cutoff_date, to_table): ' + str(argv)) sys.exit() elif len(argv[1:]) == 4: ts_name, cutoff_date, n_features, to_table = argv[1:] try: cutoff_date = pd.to_datetime(cutoff_date) n_features = int(n_features) to_table = bool(int(to_table)) except ValueError: s_ut.my_print( 'ERROR: invalid arguments (ts_name, cutoff_date, to_table): ' + str(argv)) sys.exit() else: s_ut.my_print( 'ERROR: invalid arguments (ts_name, cutoff_date, to_table): ' + str(argv)) sys.exit() # set dates sun_date = cutoff_date - pd.to_timedelta( 6, unit='D') # cutoff date in week starting Sunday upr_date = sun_date + pd.to_timedelta( upr, unit=time_scale) # horizon for the perf testing at cutoff_date lwr_date = sun_date + pd.to_timedelta( lwr, unit=time_scale) # lwr date for perf testing window a_df, d_ff, cfg_dict = fp.cross_validation(ts_name, cutoff_date, upr, lwr, n_features, init_date=init_date, time_scale=time_scale) p_ut.save_df(d_ff, '~/my_tmp/d_ff') f_list = list() for lg, flf_ in d_ff.groupby('language'): flf_.drop('language', axis=1, inplace=True) flf_['y'] = flf_['y'].astype(float) flf_['yhat'] = flf_['yhat'].astype(float) a_perf = flf_[['dim_cfg', 'a_err']].drop_duplicates() flf = flf_[flf_['dim_cfg'].isin(cfg_dict[lg][0])].copy() flf.dropna( axis=1, inplace=True, how='all') # drop all-null cols (fcast cfgs for other languages) p_flf = pd.pivot_table(flf[['ds', 'dim_cfg', 'yhat']].copy(), index=['ds'], columns=['dim_cfg'], values=['yhat']).reset_index() cols = [str(c[1]) if c[0] == 'yhat' else c[0] for c in p_flf.columns] p_flf.columns = cols p_flf = p_flf.merge(flf[['ds', 'y', 'y_shifted', 'adj_y_shifted']].drop_duplicates(), on='ds', how='left') s_ut.my_print( '\n\n+++++++++++++++++++++++++ starting aggregation for ' + lg + ' ++++++++++++++++++++++++++++++++++++++++++++++++++++\n') d_list = ep.lang_perf(lg, p_flf, a_perf, cutoff_date, upr, lwr) pl = pd.DataFrame(d_list) pl['language'] = lg pl['n_forecasts'] = n_features f_list.append(pl) if len(f_list) > 0: pf = pd.concat(f_list, axis=0) pf['ts_name'] = ts_name pf['upr'] = upr_date pf['lwr'] = lwr_date pf['cutoff'] = cutoff_date print(pf) print('overall: ' + str(pf['avg_err'].mean())) p_ut.save_df( pf, '~/my_tmp/perf/fcast_perf_' + ts_name + '_' + str(cutoff_date.date())) if to_table is True: tab_cols = [ 'language', 'err', 'df', 'lwr', 'upr', 'train_cutoff', 'ens', 'n_features', 'n_forecasts' ] partition = {'cutoff': str(cutoff_date.date()), 'ts_name': ts_name} ret = hql.to_tble(pf, tab_cols, 'sup.cx_language_forecast_performance', partition) if ret != 0: s_ut.my_print( 'pid: ' + str(os.getpid()) + ' ERROR: forecast performance detail failed for ' + ts_name + ' and cutoff date ' + str(cutoff_date.date())) sys.exit() print('DONE') else: s_ut.my_print( 'pid: ' + str(os.getpid()) + ' ERROR: no data for forecast performance detail failed for ' + ts_name + ' and cutoff date ' + str(cutoff_date.date())) sys.exit()
def main(argv): # ########################### # parameters # ########################### time_scale = 'W' # forecasting time scale reset for daily ticket data init_date = pd.to_datetime('2016-01-01') upr = 12 lwr = 8 # ########################### # ########################### # ########################### print(argv) if len(argv) == 2: ts_name = argv[-2:] cutoff_date = pd.to_datetime('today') to_table = False elif len(argv) == 3: ts_name, cutoff_date = argv[-2:] try: cutoff_date = pd.to_datetime(cutoff_date) to_table = False except ValueError: s_ut.my_print( 'ERROR: invalid arguments (ts_name, cutoff_date, to_table): ' + str(argv)) sys.exit() elif len(argv) == 4: ts_name, cutoff_date, to_table = argv[1:] try: cutoff_date = pd.to_datetime(cutoff_date) to_table = bool(int(to_table)) except ValueError: s_ut.my_print( 'ERROR: invalid arguments (ts_name, cutoff_date, to_table): ' + str(argv)) sys.exit() else: s_ut.my_print( 'ERROR: invalid arguments (ts_name, cutoff_date, to_table): ' + str(argv)) sys.exit() ts_cfg, cols = dp.ts_setup(ts_name, cutoff_date, init_date, time_scale) actuals_df = dp.ts_actuals(ts_name, ts_cfg, cols) actuals_df.rename(columns={ts_cfg['ycol']: 'y'}, inplace=True) fcast_df = fp.get_ens_fcast(ts_name, ts_cfg, cutoff_date) ens = fcast_df.loc[fcast_df.index[0], 'ens'] f_df = ep.fcast_filter(fcast_df, actuals_df, ts_name, cutoff_date + pd.to_timedelta(upr, unit=time_scale), cutoff_date, time_scale) pf = fcast_perf(f_df, actuals_df, cutoff_date, lwr, upr, time_scale, ens) if pf is None: return else: pf['ts_name'] = ts_name p_ut.save_df( pf, '~/my_tmp/perf/fcast_perf_' + ts_name + '_' + str(cutoff_date.date())) if to_table is True: tab_cols = ['language', 'y', 'yhat', 'err', 'lwr', 'upr', 'ens'] partition = {'cutoff': str(cutoff_date.date()), 'ts_name': ts_name} ret = hql.to_tble(pf, tab_cols, 'sup.cx_language_forecast_performance', partition) if ret != 0: s_ut.my_print('pid: ' + str(os.getpid()) + ' ERROR: forecast performance failed for ' + ts_name + ' and cutoff date ' + str(cutoff_date.date())) sys.exit() print('DONE')