Пример #1
0
def load_data(target, mode):

    if 'wu' == target:
        input_data, ans, date = load_wu([
            'AED', 'pf_price', 'temp', 'humidity', 'wind_speed',
            'wu_ex_weight', 'wu_ex_price', 'wu_month_price'
        ])

        if 'day' == mode:
            processed_data = preprocess(input_data, ans, date, 1, 1, 11, 3)
        elif 'week' == mode:
            processed_data = preprocess(input_data, ans, date, 7, 7, 8, 2)
    elif 'chi' == target:
        input_data, ans, date = load_chi(
            ['wind_max', 'chi_day_price', 'chi_month_amount'])

        if 'day' == mode:
            processed_data = preprocess(input_data, ans, date, 1, 1, 11, 3)
        elif 'week' == mode:
            processed_data = preprocess(input_data, ans, date, 7, 7, 8, 2)

    data, time = split(processed_data,
                       tr_ratio=0.8,
                       va_ratio=0.1,
                       te_ratio=0.1)
    return data, time
Пример #2
0
def exp1():  # {{{
    # data before 2013 to drop or not

    d = RowDataHandler()
    for filename, inpu_method in data_common.items():
        d.add(pd.read_csv(f'{path}/{filename}.csv'), inpu_method)
    for filename, inpu_method in data_wu.items():
        d.add(pd.read_csv(f'{path}/wu/{filename}.csv'), inpu_method)

    start, end = d.get_start_end_tick()
    merged_data = d.get_merged_data(start, end)
    data, ans, date = merged_data.iloc[:, 1:], merged_data['wu_day_price'],\
                      merged_data['date']
    data_retain, _ = preprocess(data, ans, date, 1, 1, 7, 1)

    merged_data = d.get_merged_data('2014-01-01', end)
    data_drop, _ = preprocess(merged_data, 1, 1, 7, 1)

    retain = []
    drop = []
    for i in range(10):
        res_retain = train_and_eval_model('test',
                                          'large',
                                          **data_retain,
                                          drop_model=True)
        res_drop = train_and_eval_model('test',
                                        'large',
                                        **data_drop,
                                        drop_model=True)
        retain.append(res_retain)
        drop.append(res_drop)

    print('retain data before 2013:', retain)
    print('drop data before 2013:', drop)
Пример #3
0
def exp6():  # {{{

    input_data, ans, date = load_wu([
        'SAR', 'yb_price', 'pa', 'humidity_low', 'wind_max_dir',
        'wu_day_price', 'wu_day_amount'
    ])

    for input_size in range(1, 15):
        mses = []
        for i in range(5):
            processed_data = preprocess(input_data, ans, date, 30, 30,
                                        input_size, 1)
            data, _ = split(processed_data,
                            tr_ratio=0.8,
                            va_ratio=0.1,
                            te_ratio=0.1)

            npad = ((0, 0), (0, 3), (0, 0))
            data['train_x'] = np.pad(data['train_x'],
                                     npad,
                                     'constant',
                                     constant_values=0)
            data['valid_x'] = np.pad(data['valid_x'],
                                     npad,
                                     'constant',
                                     constant_values=0)
            data['test_x'] = np.pad(data['test_x'],
                                    npad,
                                    'constant',
                                    constant_values=0)

            mse = train_and_eval_model('test', 'base', **data, drop_model=True)
            mses.append(mse)
        print(f'inupt_size={input_size}\nmse={np.mean(mse)}')
Пример #4
0
def test():
    input_data, ans, date = load_wu([
        'AED', 'pf_price', 'temp', 'humidity', 'wind_speed', 'wu_ex_weight',
        'wu_ex_price', 'wu_month_price'
    ])
    processed_data = preprocess(input_data, ans, date, 1, 1, 7, 1)
    data, _ = split(processed_data, tr_ratio=0.8, va_ratio=0.1, te_ratio=0.1)
    res = train_and_eval_model('test', 'base', **data)
    print(res)
Пример #5
0
def exp3():  # {{{
    # testing for results of exp2, results of selection

    target = argv[2]

    if 'wu' == target:
        input_data, ans, date = load_wu()
    elif 'chi' == target:
        input_data, ans, date = load_chi()
    else:
        raise Exception(f'Error target: {target}')

    with open(f'logs/selec_res_{target}', 'r') as f:
        cols_set = [
            line.strip()[11:-1].replace('\'', '').split(' ')
            for line in f.readlines()
        ]

    for use_cols in cols_set:
        input_data = input_data[use_cols]
        processed_data = preprocess(input_data, ans, date, 1, 1, 7, 1)
        data, _ = split(processed_data,
                        tr_ratio=0.8,
                        va_ratio=0.1,
                        te_ratio=0.1)
        res_base = []
        res_large = []

        for i in range(5):
            res1 = train_and_eval_model('test',
                                        'base',
                                        **data,
                                        drop_model=True)
            res2 = train_and_eval_model('test',
                                        'large',
                                        **data,
                                        drop_model=True)
            res_base.append(res1)
            res_large.append(res2)

        with open(f'logs/exp3_eval_selection_{target}', 'a+') as f:
            f.write(f'1.\n')
            f.write(f'Use columns: {use_cols}\n')
            f.write(f'base model: {res_base}\n')
            f.write(f'base mean: {np.mean(res_base)}\n')
            f.write(f'large model: {res_large}\n')
            f.write(f'large mean: {np.mean(res_large)}\n\n')
Пример #6
0
def exp2():  # {{{
    # which features to use

    if 'wu' == argv[2]:
        input_data, ans, date = load_wu()
    elif 'chi' == argv[2]:
        input_data, ans, date = load_chi()
    else:
        exit()
    columns = input_data.columns.to_numpy()

    columns_history = []
    mse_history = []
    ori_columns = columns.copy()
    for _ in range(len(columns) - 2):
        res_list = []
        for i in range(len(columns)):
            mask = np.ones(len(columns), dtype=bool)
            mask[i] = False
            data_mask = columns[mask]
            processed_data = preprocess(input_data[data_mask], ans, date, 1, 1,
                                        7, 1)
            data, _ = split(processed_data,
                            tr_ratio=0.8,
                            va_ratio=0.1,
                            te_ratio=0.1)
            res = train_and_eval_model('test', 'base', **data, drop_model=True)
            res_list.append(res)
        idx = np.argmin(res_list)
        columns = np.delete(columns, idx)
        columns_history.append(columns)
        mse_history.append(res_list)
    print('The origin columns: ')
    print(ori_columns)
    print('The final columns: ')
    print(columns)
    for i in range(len(columns_history)):
        print('res: ', mse_history[i])
        print('columns: ', columns_history[i])
        print('')
Пример #7
0
def main():  # {{{

    path = './data/use'

    # value is None for daily data
    data_common = {
        'currency': None,
        'powder_feed': ['same', 'same'],
        'yellow_bean': ['same'],
        'weather': None
    }
    data_wu = {
        'wu_export': ['divide', 'same'],
        'wu_price_perDate': None,
        # 'wu_price_perMonth': ['same', 'divide'],
    }
    data_chi = {
        'chi_export': ['divide', 'same'],
        'chi_price_perDate': None,
        'chi_price_perMonth': ['same', 'divede'],
        'chi_small_fish': ['divide', 'same']
    }

    d = RowDataHandler()
    for filename, inpu_method in data_common.items():
        d.add(pd.read_csv(f'{path}/{filename}.csv'), inpu_method)
    for filename, inpu_method in data_wu.items():
        d.add(pd.read_csv(f'{path}/wu/{filename}.csv'), inpu_method)

    # ['date', 'USD', 'CAD', 'SAR', 'AED', 'pf_weight', 'pf_price', 'yb_price', 'temp', 'temp_high', 'temp_low', 'point_temp', 'wu_ex_weight', 'wu_ex_price', 'wu_day_price', 'wu_day_amount']
    merged_data = d.get_merged_data(*d.get_start_end_tick())[[
        'date', 'wu_day_price'
    ]]
    data, time = preprocess(merged_data, 1, 1, 7, 1)
    linear_regression(data['test_x'], data['test_y'])

    merged_data = d.get_merged_data(*d.get_start_end_tick())
    merged_data = merged_data[[
        'date', 'temp', 'temp_high', 'temp_low', 'wu_ex_weight', 'wu_ex_price',
        'wu_day_price', 'wu_day_amount'
    ]]
    merged_data = merged_data.dropna().reset_index(drop=True)
    data, time = preprocess(merged_data, 1, 1, 7, 1)
    train_and_eval_model('weather_7dl', 'large', **data)

    merged_data = d.get_merged_data(*d.get_start_end_tick())
    merged_data = merged_data[[
        'date', 'temp', 'temp_high', 'temp_low', 'wu_ex_weight', 'wu_ex_price',
        'wu_day_price', 'wu_day_amount'
    ]]
    merged_data = merged_data.dropna().reset_index(drop=True)
    data, time = preprocess(merged_data, 1, 1, 14, 1)
    train_and_eval_model('weather_14dl', 'large', **data)

    merged_data = d.get_merged_data(*d.get_start_end_tick())
    merged_data = merged_data[[
        'date', 'temp', 'temp_high', 'temp_low', 'wu_ex_weight', 'wu_ex_price',
        'wu_day_price', 'wu_day_amount'
    ]]
    merged_data = merged_data.dropna().reset_index(drop=True)
    data, time = preprocess(merged_data, 7, 7, 4, 1)
    train_and_eval_model('weather_4wl', 'large', **data)

    merged_data = d.get_merged_data(*d.get_start_end_tick())
    merged_data = merged_data[[
        'date', 'temp', 'temp_high', 'temp_low', 'wu_ex_weight', 'wu_ex_price',
        'wu_day_price', 'wu_day_amount'
    ]]
    merged_data = merged_data.dropna().reset_index(drop=True)
    data, time = preprocess(merged_data, 30, 30, 4, 1)
    train_and_eval_model('weather_4ml', 'large', **data)