Пример #1
0
def main(tag,
         config: DefaultConfig,
         target,
         mode,
         eval_mode,
         model_name='gbrt',
         csv_result_list=None,
         features=None,
         with_spatial=False):
    dir_log_mode = make_dir(os.path.join(DIR_LOG, tag, model_name))
    dir_log_target = make_dir(os.path.join(dir_log_mode, target))
    if features is None:
        features = [target, 'NEXT_NWP_{}'.format(target)]

    if mode.startswith('run'):
        data_generator_list = []
        for obs_data_path in config.obs_data_path_list:
            data_generator = DataGenerator(config.period,
                                           config.window,
                                           path=obs_data_path)
            data_generator_list.append(data_generator)

        for wid in range(TESTING_SLIDING_WINDOW, len(MONTH_LIST)):
            dir_log_exp = make_dir(
                os.path.join(dir_log_target, str(MONTH_LIST[wid])))
            months = get_month_list(eval_mode, wid)
            for data_generator in data_generator_list:
                data_generator.set_data(months)
                data_generator.prepare_data(config.target_size,
                                            train_step=config.train_step,
                                            test_step=config.test_step,
                                            single_step=config.single_step)

            x_train_list, x_val_list, x_test_list = [], [], []
            y_train_list, y_val_list, y_test_list = [], [], []
            for data_generator in data_generator_list:
                (x_train, y_train), (x_val, y_val), (x_test, y_test) = \
                    data_generator.extract_training_data(x_attributes=features, y_attributes=[target])
                x_train_list.append(x_train)
                x_val_list.append(x_val)
                x_test_list.append(x_test)
                y_train_list.append(y_train)
                y_val_list.append(y_val)
                y_test_list.append(y_test)

            if with_spatial:
                run_func = run_spatial
            else:
                run_func = run
            batch_run(
                config.n_runs, dir_log_exp, lambda dir_log_curr: run_func(
                    data_generator_list, model_name, dir_log_curr, target,
                    x_train_list, x_val_list, x_test_list, y_train_list,
                    y_val_list, y_test_list))

    elif mode.startswith('reduce'):
        if csv_result_list is None:
            csv_result_list = ['metrics_model.csv', 'metrics_nwp.csv']
        reduce(csv_result_list, target, dir_log_target, config.n_runs,
               config.station_name_list)
Пример #2
0
def main(mode, config: DefaultConfig, eval_mode, file_exp_in, tag_file_list):
    target = 'DIR'
    dir_in = os.path.join(DIR_LOG, file_exp_in)
    dir_log_target = os.path.join(dir_in, target)
    make_dir(dir_log_target)

    if mode.startswith('run'):
        data_generator_list = []
        for obs_data_path in config.obs_data_path_list:
            data_generator = DataGenerator(config.period, config.window, path=obs_data_path)
            data_generator_list.append(data_generator)

        for wid in range(TESTING_SLIDING_WINDOW, len(MONTH_LIST)):
            dir_log_exp = os.path.join(dir_log_target, str(MONTH_LIST[wid]))
            months = get_month_list(eval_mode, wid)
            for data_generator in data_generator_list:
                data_generator.set_data(months)
                data_generator.prepare_data(config.target_size,
                                            train_step=config.train_step, test_step=config.test_step,
                                            single_step=config.single_step)
            dir_vx = os.path.join(dir_in, 'VX', str(MONTH_LIST[wid]))
            dir_vy = os.path.join(dir_in, 'VY', str(MONTH_LIST[wid]))

            for tag_file in tag_file_list:
                run_dir_from_vxy(data_generator_list, dir_vx, dir_vy, dir_log_exp, tag_file, n_runs=config.n_runs)

    elif mode.startswith('reduce'):
        csv_result_list = []
        for tag_file in tag_file_list:
            csv = 'metrics_model.csv' if tag_file is None else 'metrics_model_{}.csv'.format(tag_file)
            csv_result_list.append(csv)
            csv = 'metrics_nwp.csv' if tag_file is None else 'metrics_nwp_{}.csv'.format(tag_file)
            csv_result_list.append(csv)
        reduce(csv_result_list, target, dir_log_target, config.n_runs, config.station_name_list)
Пример #3
0
def main(tag, mode):
    target_size = DefaultConfig.target_size
    period = DefaultConfig.period
    window = DefaultConfig.window
    train_step = DefaultConfig.train_step
    test_step = DefaultConfig.test_step
    single_step = DefaultConfig.single_step
    obs_data_path_list = DefaultConfig.obs_data_path_list

    dir_log = os.path.join(DIR_LOG, tag, mode)
    make_dir(dir_log)

    data_generator_list = []
    for obs_data_path in obs_data_path_list:
        data_generator = DataGenerator(period, window, path=obs_data_path)
        data_generator.prepare_data(target_size,
                                    train_step=train_step,
                                    test_step=test_step,
                                    single_step=single_step)
        data_generator_list.append(data_generator)

    if mode == 'corr_with_nwp':
        corr_with_nwp(data_generator_list, dir_log)
    elif mode == 'corr_variables':
        corr_variables(data_generator_list, dir_log)
    elif mode == 'corr_spatial':
        corr_spatial(data_generator_list, dir_log)
    elif mode == 'corr_auto':
        corr_auto(data_generator_list, dir_log)
    else:
        raise ValueError('The mode = {} can not be found!'.format(mode))
Пример #4
0
def run_dir_from_vxy(data_generator_list, dir_vx, dir_vy, dir_log, tag_file=None, n_runs=10, target='DIR'):
    file_suffix = "" if tag_file is None else '_' + tag_file
    for i_run in range(n_runs):
        dir_log_curr = os.path.join(dir_log, str(i_run))
        make_dir(dir_log_curr)
        evaluator_model = EvaluatorDir(dir_log_curr, 'model' + file_suffix)
        evaluator_nwp = EvaluatorDir(dir_log_curr, 'nwp' + file_suffix)
        for data_generator in data_generator_list:
            station_name = data_generator.station_name
            speed, nwp, obs, filter_big_wind = data_generator.extract_evaluation_data(target)
            y_pred_vx = np.loadtxt(os.path.join(dir_vx, str(i_run), 'y_pred_{}.txt'.format(station_name + file_suffix)))
            y_pred_vy = np.loadtxt(os.path.join(dir_vy, str(i_run), 'y_pred_{}.txt'.format(station_name + file_suffix)))
            y_pred_dir = vxy_to_dir_vec(y_pred_vx, y_pred_vy)

            plot_and_save_comparison(obs, y_pred_dir, dir_log_curr,
                                     filename='compare_{}.png'.format(station_name+file_suffix))
            evaluator_model.append(obs, y_pred_dir, filter_big_wind, key=station_name)
            evaluator_nwp.append(obs, nwp, filter_big_wind, key=station_name)
            np.savetxt(os.path.join(dir_log_curr, 'y_pred_{}.txt'.format(station_name+file_suffix)), y_pred_dir)
Пример #5
0
def main(tag):
    target_size = DefaultConfig.target_size
    period = DefaultConfig.period
    window = DefaultConfig.window
    train_step = DefaultConfig.train_step
    test_step = DefaultConfig.test_step
    single_step = DefaultConfig.single_step
    obs_data_path_list = DefaultConfig.obs_data_path_list

    dir_log = os.path.join(DIR_LOG, tag)
    make_dir(dir_log)

    data_generator_list = []
    for obs_data_path in obs_data_path_list:
        data_generator = DataGenerator(period, window, path=obs_data_path)
        data_generator.prepare_data(target_size, train_step=train_step, test_step=test_step, single_step=single_step)
        data_generator_list.append(data_generator)

    # visualize(data_generator_list, dir_log)
    visualize_couple(data_generator_list, dir_log)
Пример #6
0
def main(target, mode, eval_mode, config, tag, model_name, features_history,
         features_future, adjacency_norm):
    target_size = config.target_size
    period = config.period
    window = config.window
    train_step = config.train_step
    test_step = config.test_step
    single_step = config.single_step
    norm = config.norm
    x_divide_std = config.x_divide_std
    n_epochs = config.n_epochs
    n_runs = config.n_runs
    obs_data_path_list = config.obs_data_path_list
    station_name_list = config.station_name_list

    dir_log_target = os.path.join(DIR_LOG, tag, target)
    make_dir(dir_log_target)

    if mode == 'run':
        data_generator_spatial = DataGeneratorSpatial(
            period, window, norm=norm, x_divide_std=x_divide_std)
        for wid in range(TESTING_SLIDING_WINDOW, len(MONTH_LIST)):
            dir_log_exp = os.path.join(dir_log_target, str(MONTH_LIST[wid]))
            months = get_month_list(eval_mode, wid)
            data_generator_spatial.set_data(months)
            data_generator_spatial.prepare_data(target_size,
                                                train_step=train_step,
                                                test_step=test_step,
                                                single_step=single_step)
            batch_run(
                n_runs, dir_log_exp, lambda dir_log_curr: run(
                    station_name_list, dir_log_curr, data_generator_spatial,
                    target, n_epochs, features_history, features_future,
                    adjacency_norm, model_name))
    elif mode == 'reduce':
        csv_result_list = [
            'metrics_model_{}.csv'.format(model_name),
            'metrics_nwp_{}.csv'.format(model_name)
        ]
        reduce(csv_result_list, target, dir_log_target, n_runs,
               station_name_list)
Пример #7
0
def plot_dir(tag, config, target, mhstn_root):
    i_run = 0
    month = str(MONTH_LIST[-1])

    dir_log = make_dir(os.path.join(DIR_LOG, tag, target, str(i_run)))

    data_generator_list = []
    for obs_data_path in config.obs_data_path_list:
        data_generator = DataGenerator(config.period,
                                       config.window,
                                       path=obs_data_path)
        data_generator.prepare_data(config.target_size,
                                    train_step=config.train_step,
                                    test_step=config.test_step,
                                    single_step=config.single_step)
        data_generator_list.append(data_generator)

    for data_generator in data_generator_list:
        station_name = data_generator.station_name

        _, nwp, obs, _ = data_generator.extract_evaluation_data(target)
        mhstn_path = '{}/{}/{}/{}/y_pred_{}_combine_module_conv.txt'.format(
            mhstn_root, target, month, str(i_run), station_name)
        mhstn_pred = np.loadtxt(os.path.join(DIR_LOG, mhstn_path))

        delta_nwp = cal_delta(obs, nwp)
        delta_cnn = cal_delta(obs, mhstn_pred)

        grid = plt.GridSpec(3, 1)

        ax_main = plt.subplot(grid[0:2, 0])
        plt.plot(obs, label='TRUTH')
        plt.plot(nwp, label='NWP')
        plt.plot(mhstn_pred, label='MHSTN')
        plt.legend(loc='best')
        plt.ylabel('Value (degree)')

        ax_err = plt.subplot(grid[2, 0], sharex=ax_main)
        plt.plot(np.zeros(len(delta_nwp)))
        plt.plot(delta_nwp)
        plt.plot(delta_cnn)
        plt.ylabel('Error (degree)')
        plt.xlabel('Time (hours)')

        plt.savefig(os.path.join(dir_log, "{}".format(station_name)),
                    dpi=750,
                    bbox_inches='tight')

        pdf = PdfPages(os.path.join(dir_log, "{}.pdf".format(station_name)))
        pdf.savefig()
        pdf.close()

        plt.close()
Пример #8
0
def main(tag, config: DefaultConfig, dir_log, eval_mode):
    # target_size = config.target_size
    # period = config.period
    # window = config.window
    # train_step = config.train_step
    # test_step = config.test_step
    # single_step = config.single_step
    # obs_data_path_list = config.obs_data_path_list

    target = 'V'
    dir_log_target = os.path.join(dir_log, tag, target)
    make_dir(dir_log_target)

    data_generator_list = []
    for obs_data_path in config.obs_data_path_list:
        data_generator = DataGenerator(config.period,
                                       config.window,
                                       path=obs_data_path)
        data_generator_list.append(data_generator)

    for wid in range(TESTING_SLIDING_WINDOW, len(MONTH_LIST)):
        dir_log_exp = os.path.join(dir_log_target, str(MONTH_LIST[wid]))
        months = get_month_list(eval_mode, wid)
        for data_generator in data_generator_list:
            data_generator.set_data(months)
            data_generator.prepare_data(config.target_size,
                                        train_step=config.train_step,
                                        test_step=config.test_step,
                                        single_step=config.single_step)
        run(data_generator_list, dir_log_exp, target)

    csv_list = ['metrics_model.csv', 'metrics_nwp.csv']
    if target == 'DIR':
        reduce_multiple_splits_dir(dir_log_target, csv_list)
    else:
        reduce_multiple_splits(dir_log_target, csv_list)
Пример #9
0
def plot(tag, config, target, mhstn_root, lstm_root=None):
    i_run = 0
    month = str(MONTH_LIST[-1])
    dir_log = make_dir(os.path.join(DIR_LOG, tag, target, str(i_run)))

    data_generator_list = []
    for obs_data_path in config.obs_data_path_list:
        data_generator = DataGenerator(config.period,
                                       config.window,
                                       path=obs_data_path)
        data_generator.prepare_data(config.target_size,
                                    train_step=config.train_step,
                                    test_step=config.test_step,
                                    single_step=config.single_step)
        data_generator_list.append(data_generator)

    for data_generator in data_generator_list:
        station_name = data_generator.station_name

        _, nwp, obs, _ = data_generator.extract_evaluation_data(target)
        plt.plot(obs, label='TRUTH')
        plt.plot(nwp, label='NWP')

        if lstm_root is not None:
            lstm_h_path = '{}/history/{}/{}/{}/y_pred_{}.txt'.format(
                lstm_root, target, month, str(i_run), station_name)
            lstm_h_pred = np.loadtxt(os.path.join(DIR_LOG, lstm_h_path))
            plt.plot(lstm_h_pred, label='LSTM(h)')

        mhstn_path = '{}/{}/{}/{}/y_pred_{}_combine_module_conv.txt'.format(
            mhstn_root, target, month, str(i_run), station_name)
        mhstn_pred = np.loadtxt(os.path.join(DIR_LOG, mhstn_path))
        plt.plot(mhstn_pred, label='MHSTN')

        plt.legend(loc='best')
        plt.ylabel('Value (meter/second)')
        plt.xlabel('Time (hours)')
        plt.tight_layout()

        plt.savefig(os.path.join(dir_log, "{}".format(station_name)),
                    dpi=750,
                    bbox_inches='tight')

        pdf = PdfPages(os.path.join(dir_log, "{}.pdf".format(station_name)))
        pdf.savefig()
        pdf.close()

        plt.close()
Пример #10
0
        features_selected_dic[station_name] = features_selected
    with open(
            os.path.join(dir_log, '{}_selected_features.json'.format(method)),
            'w') as f:
        json.dump(features_selected_dic, f)


if __name__ == '__main__':
    tag = tag_path(os.path.abspath(__file__), 2)
    config = DefaultConfig()
    config.single_step = False

    target = 'V'
    method = 'ridge'
    dir_log = os.path.join(DIR_LOG, tag, target)
    make_dir(dir_log)

    data_generator_list = []
    for obs_data_path in config.obs_data_path_list:
        data_generator = DataGenerator(config.period,
                                       config.window,
                                       path=obs_data_path,
                                       norm=config.norm,
                                       x_divide_std=config.x_divide_std)
        data_generator.prepare_data(config.target_size,
                                    train_step=config.train_step,
                                    test_step=config.test_step,
                                    single_step=config.single_step)
        data_generator_list.append(data_generator)

    features = get_covariates_future_all()
Пример #11
0

def load_data(path):
    df = pd.read_csv(path)
    df['DateTime'] = pd.to_datetime(df['DateTime'])
    columns = list(df.columns)
    columns.remove('DateTime')
    df[columns] = df[columns].astype(np.float)
    print("Finish to load data from path={}:".format(path), df.shape)
    return df


if __name__ == '__main__':
    tag = tag_path(os.path.abspath(__file__), 2)
    config = DefaultConfig()
    dir_log = make_dir(os.path.join(DIR_LOG, tag))

    df_obs_list = {}
    for path in config.obs_data_path_list:
        station_name = get_station_name(path)
        df = load_data(path)
        df_obs_list[station_name] = df
    df_nwp = load_data(config.nwp_path)

    # missing value
    print("**** check data")
    print("** station")
    TAG_MISSING = get_missing_tag()

    def check_spd(df_spd):
        spd_outliers = np.where((df_spd.values < 0) & (df_spd.values > 15))[0]
Пример #12
0
def main(tag,
         config: DefaultConfig,
         target,
         mode,
         eval_mode,
         cls_model,
         csv_result_list=None):
    dir_log_mode = os.path.join(DIR_LOG, tag, mode.split('-')[-1])
    dir_log_target = os.path.join(dir_log_mode, target)
    make_dir(dir_log_target)

    if mode.startswith('run'):
        data_generator_list = []
        for obs_data_path in config.obs_data_path_list:
            data_generator = DataGenerator(config.period,
                                           config.window,
                                           path=obs_data_path)
            data_generator_list.append(data_generator)

        for wid in range(TESTING_SLIDING_WINDOW, len(MONTH_LIST)):
            dir_log_exp = os.path.join(dir_log_target, str(MONTH_LIST[wid]))
            months = get_month_list(eval_mode, wid)
            for data_generator in data_generator_list:
                data_generator.set_data(months)
                data_generator.prepare_data(config.target_size,
                                            train_step=config.train_step,
                                            test_step=config.test_step,
                                            single_step=config.single_step)

            if mode == 'run-history':
                features = [target]
            elif mode == 'run-future':
                features = ['NEXT_NWP_{}'.format(target)]
            elif mode == 'run-history_future':
                features = [target, 'NEXT_NWP_{}'.format(target)]
            else:
                raise ValueError('mode={} can not be found!'.format(mode))
            x_train_list, x_val_list, x_test_list = [], [], []
            y_train_list, y_val_list, y_test_list = [], [], []
            for data_generator in data_generator_list:
                (x_train, y_train), (x_val, y_val), (x_test, y_test) = \
                    data_generator.extract_training_data(x_attributes=features, y_attributes=[target])
                x_train_list.append(x_train)
                x_val_list.append(x_val)
                x_test_list.append(x_test)
                y_train_list.append(y_train)
                y_val_list.append(y_val)
                y_test_list.append(y_test)
            input_shape = x_train_list[0].shape[1:]

            batch_run(
                config.n_runs, dir_log_exp, lambda dir_log_curr: run(
                    data_generator_list, cls_model, dir_log_curr, target,
                    config.n_epochs, x_train_list, x_val_list, x_test_list,
                    y_train_list, y_val_list, y_test_list, input_shape))

    elif mode.startswith('reduce'):
        if csv_result_list is None:
            csv_result_list = ['metrics_model.csv', 'metrics_nwp.csv']
        reduce(csv_result_list, target, dir_log_target, config.n_runs,
               config.station_name_list)