Exemplo n.º 1
0
def get_actuals(ts_dict, gcols, use_cache=None):          # actuals with a max ds >= cutoff_date
    cutoff_date = ts_dict['cutoff_date']
    init_date = ts_dict['init_date']
    ts_name = ts_dict['name']
    ycol = ts_dict['ycol']
    s_ut.my_print('getting ' + ts_name + ' actuals from table')
    r_date = hql.get_rmax(ycol, use_cache=USE_CACHE)
    qcols = list(set(['ds', 'language', 'y'] + gcols))
    col_str = ','.join(qcols)
    print('rmax: ' + str(r_date))
    qry = 'select ' + col_str + ' from sup.cx_weekly_actuals where ts_name=\'' + ycol + '\' and run_date=\'' + r_date + '\';'
    try:
        uc = USE_CACHE if use_cache is None else use_cache
        df = hql.from_tble(qry, ['ds'], use_cache=uc, renew=RENEW)
        s_ut.my_print(qry + ' completed. Got ' + str(len(df)) + ' rows')
    except:
        s_ut.my_print('pid: ' + str(os.getpid()) + ' ERROR: query: ' + qry + ' failed. No data for ts ' + ts_name)
        sys.exit()
    s_ut.my_print('pid: ' + str(os.getpid()) + ' got actuals for ' + ts_name + ' from table ' + 'sup.cx_weekly_actuals')
    df.rename(columns={'y': ycol}, inplace=True)    # unique name needed (may mix with regressors later)
    df = df[(df['ds'] >= init_date)].copy()
    if df['ds'].max() < cutoff_date:
        s_ut.my_print('ERROR: no actuals up to cutoff date for ' + ts_name)
        return None
    else:
        if len(df) > 0:
            return df.groupby(['ds'] + gcols).sum().reset_index()
        else:
            return None
Exemplo n.º 2
0
def ens_fcast(ts_name, regs, cutoff_date, time_scale, fcast_days, init_date,
              a_df):
    r_list = list()
    for rname in regs:
        r_cfg, _ = dp.ts_setup(rname, cutoff_date, init_date, time_scale)
        if r_cfg is None:
            s_ut.my_print('ERROR: invalid regressor name: ' + rname)
            sys.exit()

        if r_cfg['do_fcast'] is True:
            qry = 'select * from sup.cx_ens_forecast where cutoff = \'' + str(
                cutoff_date.date()) + '\' and ts_name = \'' + rname + '\';'
            rdf = hql.from_tble(qry, ['ds'], use_cache=USE_CACHE, renew=RENEW)
            if rdf is None:  # no ens fcast file found
                s_ut.my_print('ERROR: no forecast for regressor: ' + rname)
                sys.exit()
            else:
                cols = ['ds', 'language', 'yhat'
                        ] if 'language' in rdf.columns else ['ds', 'yhat']
                rdf = rdf[rdf['ds'] > cutoff_date][cols].copy()
                adf = get_actuals(r_cfg, init_date='2016-01-01')
                adf = adf[adf['ds'] <= cutoff_date].copy()
                adf.rename(columns={r_cfg['ycol']: 'yhat'}, inplace=True)
                rdf = pd.concat([adf[cols].copy(), rdf], axis=0)
        else:  # static regressors
            s_ut.my_print(rname + ' is a static regressor')
            try:
                reg_func = getattr(
                    sys.modules[__name__],
                    rname)  # function to set up the static regressor
                args = [cutoff_date, init_date, fcast_days, time_scale]
                rdf = reg_func(*args)
            except AttributeError as e:
                s_ut.my_print('pid: ' + str(os.getpid()) +
                              ' No static regressor with name ' + rname +
                              ': ' + str(e))
                rdf = None

            if rdf is not None:
                s_ut.my_print('pid: ' + str(os.getpid()) +
                              ' found static regressor: ' + str(rname))
            else:
                s_ut.my_print('pid: ' + str(os.getpid()) +
                              ' WARNING: regressor ' + rname +
                              ' not found <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
        if rdf is not None and len(rdf) > 0:
            rdf.rename(columns={'yhat': rname}, inplace=True)
            r_list.append(rdf)
    if len(r_list) > 0:
        r_dict = merge_regressors_(r_list, init_date)
        r_dict = {
            lx: selector(ts_name, lx, a_df[a_df['language'] == lx].copy(), rl,
                         cutoff_date)
            for lx, rl in r_dict.items()
        }
        s_dict = {lx: fl for lx, fl in r_dict.items() if fl is not None}
        return s_dict
    else:
        return dict()
Exemplo n.º 3
0
def get_ens_fcast(ts_name, ts_cfg, cutoff_date):
    tble = 'sup.cx_ens_forecast'
    s_ut.my_print('getting ' + ts_cfg['ts_key'] + ' ens forecast from table ' + tble + ' and cutoff date: ' + str(cutoff_date.date()))
    qry = 'select * from ' + tble + ' where cutoff = \'' + str(cutoff_date.date()) + '\' and ts_name = \'' + ts_cfg['ts_key'] + '\';'
    try:
        fcast_df = hql.from_tble(qry, ['ds', 'cutoff'], use_cache=USE_CACHE, renew=RENEW)
    except:
        s_ut.my_print('ERROR: ' + qry + ' failed')
        sys.exit()
    if fcast_df is None:
        s_ut.my_print('pid: ' + str(os.getpid()) + ' ERROR: no data for ' + ts_name + ' and cutoff date ' + str(cutoff_date.date()))
    return fcast_df
Exemplo n.º 4
0
def get_ratio_fcast(ts_name, ts_cfg,  cutoff_date, use_cache=None):  # >>>>>>>>>>>>> used in to_excel <<<<<<<<<<<<<<<<<
    tble = 'sup.cx_weekly_forecasts'
    s_ut.my_print('get_ratio_forecast: getting ' + ts_cfg['ts_key'] + ' ratio forecast from ' + tble + ' and cutoff date ' + str(cutoff_date.date()))
    qry = 'select * from ' + tble + ' where cutoff = \'' + str(cutoff_date.date()) + '\' and ts_name = \'' + ts_cfg['ts_key'] + '\';'
    try:
        uc = USE_CACHE if use_cache is None else use_cache
        fcast_df = hql.from_tble(qry, ['ds', 'cutoff'], use_cache=uc, renew=RENEW)
    except:
        s_ut.my_print('ERROR: ' + qry + ' failed')
        sys.exit()
    if fcast_df is None:
        s_ut.my_print('pid: ' + str(os.getpid()) + ' ERROR: no data for ' + ts_name + ' and cutoff date ' + str(cutoff_date.date()))
    return fcast_df
Exemplo n.º 5
0
def get_ens(ts_name, cutoff_date, exclude=('2019-11-30', '2019-12-28', '2020-01-25', '2020-02-29', '2020-03-28', '2020-04-25', '2020-05-30', '2020-06-27')):
    df = hql.from_tble('select * from sup.cx_language_forecast_performance_detail where ts_name = \''
                       + ts_name + '\' and cutoff <= \'' + str(cutoff_date.date()) + '\';', ['cutoff', 'upr', 'lwr'])
    df = df[df['language'] == 'All'].copy()
    if len(exclude) != 0:
        df = df[~df['cutoff'].isin(list(exclude))].copy()
    if len(df) > 0:
        gf = df.groupby('ens').apply(lambda x: x['err'].mean()).reset_index()
        s_ut.my_print('+++++++++++++++++ Past performance ++++++++++++++')
        print(gf)
        z = gf.nsmallest(1, columns=[0])
        return z.loc[z.index[0], 'ens']
    else:
        return 'lasso'
Exemplo n.º 6
0
def get_lang_fcast(ts_cfg, cutoff_date, eq=True):
    ts_name = ts_cfg['name']
    ts = ts_cfg['ts_key']
    tble = 'sup.cx_language_forecast'
    s_ut.my_print('getting ' + ts + ' language forecast from table ' + tble + ' and cutoff date: ' + str(cutoff_date.date()))
    if eq is True:
        qry = 'select ds, language, yhat, dim_cfg, cutoff from ' + tble + ' where cutoff = \'' + str(cutoff_date.date()) + '\' and ts_name = \'' + ts + '\';'
    else:
        qry = 'select ds, language, yhat, dim_cfg, cutoff from ' + tble + ' where cutoff <= \'' + str(cutoff_date.date()) + '\' and ts_name = \'' + ts + '\';'
    try:
        fcast_df = hql.from_tble(qry, ['ds', 'cutoff'], use_cache=USE_CACHE, renew=RENEW)
        s_ut.my_print(qry + ' completed. Got ' + str(len(fcast_df)) + ' rows')
    except:
        s_ut.my_print('ERROR: ' + qry + ' failed')
        sys.exit()
    if fcast_df is None:
        s_ut.my_print('pid: ' + str(os.getpid()) + ' ERROR: no data for eq: ' + str(eq) + ', ts: ' + ts_name + ' and cutoff date ' + str(cutoff_date.date()))
    return fcast_df
Exemplo n.º 7
0
def get_actuals(rcfg, init_date='2016-01-01'):
    cutoff_date = rcfg['cutoff_date']
    init_date = rcfg.get('init_date', init_date)
    r_name = rcfg['name']
    int_type = rcfg.get('interaction_type', None)
    s_ut.my_print('getting ' + r_name + ' regressor actuals from table')
    ts_name = rcfg['ycol']
    r_date = hql.get_rmax(ts_name, use_cache=USE_CACHE)
    qry = 'select ds, language, y from sup.cx_weekly_actuals where ts_name=\'' + ts_name + '\' and run_date=\'' + r_date + '\';'
    try:
        rdf = hql.from_tble(qry, ['ds'], use_cache=USE_CACHE, renew=RENEW)
        s_ut.my_print(qry + ' completed. Got ' + str(len(rdf)) + ' rows')
    except:
        s_ut.my_print('pid: ' + str(os.getpid()) + ' ERROR: query: ' + qry +
                      ' failed. No data for regressor ' + r_name)
        sys.exit()
    rdf.rename(columns={'y': rcfg['ycol']}, inplace=True)
    s_ut.my_print('pid: ' + str(os.getpid()) + ' got ts_name ' + ts_name +
                  ' from table ' + 'sup.cx_weekly_actuals')
    if rdf is None:
        return None
    else:
        rdf = rdf[(rdf['ds'] >= pd.to_datetime(init_date))].copy()
        if rdf['ds'].max() < pd.to_datetime(cutoff_date):
            s_ut.my_print('pid: ' + str(os.getpid()) + ' WARNING: ' + r_name +
                          ' max date (' + str(rdf['ds'].max().date()) +
                          ') is smaller than cutoff date (' +
                          str(cutoff_date.date()) + ')')
            return None
        elif len(rdf) > 0:
            if 'interaction_type' in rdf.columns and int_type is not None:
                rdf = rdf[rdf['interaction_type'] == int_type].copy()
            rdf.reset_index(inplace=True, drop=True)
            rdf.rename(columns={'y': rcfg['ycol']}, inplace=True)
            return rdf
        else:
            s_ut.my_print('pid: ' + str(os.getpid()) +
                          ' WARNING: no data for regressor  ' + r_name)
            return None