def main(tag, config: DefaultConfig, target, mode, eval_mode, model_name='gbrt', csv_result_list=None, features=None, with_spatial=False): dir_log_mode = make_dir(os.path.join(DIR_LOG, tag, model_name)) dir_log_target = make_dir(os.path.join(dir_log_mode, target)) if features is None: features = [target, 'NEXT_NWP_{}'.format(target)] if mode.startswith('run'): data_generator_list = [] for obs_data_path in config.obs_data_path_list: data_generator = DataGenerator(config.period, config.window, path=obs_data_path) data_generator_list.append(data_generator) for wid in range(TESTING_SLIDING_WINDOW, len(MONTH_LIST)): dir_log_exp = make_dir( os.path.join(dir_log_target, str(MONTH_LIST[wid]))) months = get_month_list(eval_mode, wid) for data_generator in data_generator_list: data_generator.set_data(months) data_generator.prepare_data(config.target_size, train_step=config.train_step, test_step=config.test_step, single_step=config.single_step) x_train_list, x_val_list, x_test_list = [], [], [] y_train_list, y_val_list, y_test_list = [], [], [] for data_generator in data_generator_list: (x_train, y_train), (x_val, y_val), (x_test, y_test) = \ data_generator.extract_training_data(x_attributes=features, y_attributes=[target]) x_train_list.append(x_train) x_val_list.append(x_val) x_test_list.append(x_test) y_train_list.append(y_train) y_val_list.append(y_val) y_test_list.append(y_test) if with_spatial: run_func = run_spatial else: run_func = run batch_run( config.n_runs, dir_log_exp, lambda dir_log_curr: run_func( data_generator_list, model_name, dir_log_curr, target, x_train_list, x_val_list, x_test_list, y_train_list, y_val_list, y_test_list)) elif mode.startswith('reduce'): if csv_result_list is None: csv_result_list = ['metrics_model.csv', 'metrics_nwp.csv'] reduce(csv_result_list, target, dir_log_target, config.n_runs, config.station_name_list)
def main(mode, config: DefaultConfig, eval_mode, file_exp_in, tag_file_list): target = 'DIR' dir_in = os.path.join(DIR_LOG, file_exp_in) dir_log_target = os.path.join(dir_in, target) make_dir(dir_log_target) if mode.startswith('run'): data_generator_list = [] for obs_data_path in config.obs_data_path_list: data_generator = DataGenerator(config.period, config.window, path=obs_data_path) data_generator_list.append(data_generator) for wid in range(TESTING_SLIDING_WINDOW, len(MONTH_LIST)): dir_log_exp = os.path.join(dir_log_target, str(MONTH_LIST[wid])) months = get_month_list(eval_mode, wid) for data_generator in data_generator_list: data_generator.set_data(months) data_generator.prepare_data(config.target_size, train_step=config.train_step, test_step=config.test_step, single_step=config.single_step) dir_vx = os.path.join(dir_in, 'VX', str(MONTH_LIST[wid])) dir_vy = os.path.join(dir_in, 'VY', str(MONTH_LIST[wid])) for tag_file in tag_file_list: run_dir_from_vxy(data_generator_list, dir_vx, dir_vy, dir_log_exp, tag_file, n_runs=config.n_runs) elif mode.startswith('reduce'): csv_result_list = [] for tag_file in tag_file_list: csv = 'metrics_model.csv' if tag_file is None else 'metrics_model_{}.csv'.format(tag_file) csv_result_list.append(csv) csv = 'metrics_nwp.csv' if tag_file is None else 'metrics_nwp_{}.csv'.format(tag_file) csv_result_list.append(csv) reduce(csv_result_list, target, dir_log_target, config.n_runs, config.station_name_list)
def main(tag, mode): target_size = DefaultConfig.target_size period = DefaultConfig.period window = DefaultConfig.window train_step = DefaultConfig.train_step test_step = DefaultConfig.test_step single_step = DefaultConfig.single_step obs_data_path_list = DefaultConfig.obs_data_path_list dir_log = os.path.join(DIR_LOG, tag, mode) make_dir(dir_log) data_generator_list = [] for obs_data_path in obs_data_path_list: data_generator = DataGenerator(period, window, path=obs_data_path) data_generator.prepare_data(target_size, train_step=train_step, test_step=test_step, single_step=single_step) data_generator_list.append(data_generator) if mode == 'corr_with_nwp': corr_with_nwp(data_generator_list, dir_log) elif mode == 'corr_variables': corr_variables(data_generator_list, dir_log) elif mode == 'corr_spatial': corr_spatial(data_generator_list, dir_log) elif mode == 'corr_auto': corr_auto(data_generator_list, dir_log) else: raise ValueError('The mode = {} can not be found!'.format(mode))
def run_dir_from_vxy(data_generator_list, dir_vx, dir_vy, dir_log, tag_file=None, n_runs=10, target='DIR'): file_suffix = "" if tag_file is None else '_' + tag_file for i_run in range(n_runs): dir_log_curr = os.path.join(dir_log, str(i_run)) make_dir(dir_log_curr) evaluator_model = EvaluatorDir(dir_log_curr, 'model' + file_suffix) evaluator_nwp = EvaluatorDir(dir_log_curr, 'nwp' + file_suffix) for data_generator in data_generator_list: station_name = data_generator.station_name speed, nwp, obs, filter_big_wind = data_generator.extract_evaluation_data(target) y_pred_vx = np.loadtxt(os.path.join(dir_vx, str(i_run), 'y_pred_{}.txt'.format(station_name + file_suffix))) y_pred_vy = np.loadtxt(os.path.join(dir_vy, str(i_run), 'y_pred_{}.txt'.format(station_name + file_suffix))) y_pred_dir = vxy_to_dir_vec(y_pred_vx, y_pred_vy) plot_and_save_comparison(obs, y_pred_dir, dir_log_curr, filename='compare_{}.png'.format(station_name+file_suffix)) evaluator_model.append(obs, y_pred_dir, filter_big_wind, key=station_name) evaluator_nwp.append(obs, nwp, filter_big_wind, key=station_name) np.savetxt(os.path.join(dir_log_curr, 'y_pred_{}.txt'.format(station_name+file_suffix)), y_pred_dir)
def main(tag): target_size = DefaultConfig.target_size period = DefaultConfig.period window = DefaultConfig.window train_step = DefaultConfig.train_step test_step = DefaultConfig.test_step single_step = DefaultConfig.single_step obs_data_path_list = DefaultConfig.obs_data_path_list dir_log = os.path.join(DIR_LOG, tag) make_dir(dir_log) data_generator_list = [] for obs_data_path in obs_data_path_list: data_generator = DataGenerator(period, window, path=obs_data_path) data_generator.prepare_data(target_size, train_step=train_step, test_step=test_step, single_step=single_step) data_generator_list.append(data_generator) # visualize(data_generator_list, dir_log) visualize_couple(data_generator_list, dir_log)
def main(target, mode, eval_mode, config, tag, model_name, features_history, features_future, adjacency_norm): target_size = config.target_size period = config.period window = config.window train_step = config.train_step test_step = config.test_step single_step = config.single_step norm = config.norm x_divide_std = config.x_divide_std n_epochs = config.n_epochs n_runs = config.n_runs obs_data_path_list = config.obs_data_path_list station_name_list = config.station_name_list dir_log_target = os.path.join(DIR_LOG, tag, target) make_dir(dir_log_target) if mode == 'run': data_generator_spatial = DataGeneratorSpatial( period, window, norm=norm, x_divide_std=x_divide_std) for wid in range(TESTING_SLIDING_WINDOW, len(MONTH_LIST)): dir_log_exp = os.path.join(dir_log_target, str(MONTH_LIST[wid])) months = get_month_list(eval_mode, wid) data_generator_spatial.set_data(months) data_generator_spatial.prepare_data(target_size, train_step=train_step, test_step=test_step, single_step=single_step) batch_run( n_runs, dir_log_exp, lambda dir_log_curr: run( station_name_list, dir_log_curr, data_generator_spatial, target, n_epochs, features_history, features_future, adjacency_norm, model_name)) elif mode == 'reduce': csv_result_list = [ 'metrics_model_{}.csv'.format(model_name), 'metrics_nwp_{}.csv'.format(model_name) ] reduce(csv_result_list, target, dir_log_target, n_runs, station_name_list)
def plot_dir(tag, config, target, mhstn_root): i_run = 0 month = str(MONTH_LIST[-1]) dir_log = make_dir(os.path.join(DIR_LOG, tag, target, str(i_run))) data_generator_list = [] for obs_data_path in config.obs_data_path_list: data_generator = DataGenerator(config.period, config.window, path=obs_data_path) data_generator.prepare_data(config.target_size, train_step=config.train_step, test_step=config.test_step, single_step=config.single_step) data_generator_list.append(data_generator) for data_generator in data_generator_list: station_name = data_generator.station_name _, nwp, obs, _ = data_generator.extract_evaluation_data(target) mhstn_path = '{}/{}/{}/{}/y_pred_{}_combine_module_conv.txt'.format( mhstn_root, target, month, str(i_run), station_name) mhstn_pred = np.loadtxt(os.path.join(DIR_LOG, mhstn_path)) delta_nwp = cal_delta(obs, nwp) delta_cnn = cal_delta(obs, mhstn_pred) grid = plt.GridSpec(3, 1) ax_main = plt.subplot(grid[0:2, 0]) plt.plot(obs, label='TRUTH') plt.plot(nwp, label='NWP') plt.plot(mhstn_pred, label='MHSTN') plt.legend(loc='best') plt.ylabel('Value (degree)') ax_err = plt.subplot(grid[2, 0], sharex=ax_main) plt.plot(np.zeros(len(delta_nwp))) plt.plot(delta_nwp) plt.plot(delta_cnn) plt.ylabel('Error (degree)') plt.xlabel('Time (hours)') plt.savefig(os.path.join(dir_log, "{}".format(station_name)), dpi=750, bbox_inches='tight') pdf = PdfPages(os.path.join(dir_log, "{}.pdf".format(station_name))) pdf.savefig() pdf.close() plt.close()
def main(tag, config: DefaultConfig, dir_log, eval_mode): # target_size = config.target_size # period = config.period # window = config.window # train_step = config.train_step # test_step = config.test_step # single_step = config.single_step # obs_data_path_list = config.obs_data_path_list target = 'V' dir_log_target = os.path.join(dir_log, tag, target) make_dir(dir_log_target) data_generator_list = [] for obs_data_path in config.obs_data_path_list: data_generator = DataGenerator(config.period, config.window, path=obs_data_path) data_generator_list.append(data_generator) for wid in range(TESTING_SLIDING_WINDOW, len(MONTH_LIST)): dir_log_exp = os.path.join(dir_log_target, str(MONTH_LIST[wid])) months = get_month_list(eval_mode, wid) for data_generator in data_generator_list: data_generator.set_data(months) data_generator.prepare_data(config.target_size, train_step=config.train_step, test_step=config.test_step, single_step=config.single_step) run(data_generator_list, dir_log_exp, target) csv_list = ['metrics_model.csv', 'metrics_nwp.csv'] if target == 'DIR': reduce_multiple_splits_dir(dir_log_target, csv_list) else: reduce_multiple_splits(dir_log_target, csv_list)
def plot(tag, config, target, mhstn_root, lstm_root=None): i_run = 0 month = str(MONTH_LIST[-1]) dir_log = make_dir(os.path.join(DIR_LOG, tag, target, str(i_run))) data_generator_list = [] for obs_data_path in config.obs_data_path_list: data_generator = DataGenerator(config.period, config.window, path=obs_data_path) data_generator.prepare_data(config.target_size, train_step=config.train_step, test_step=config.test_step, single_step=config.single_step) data_generator_list.append(data_generator) for data_generator in data_generator_list: station_name = data_generator.station_name _, nwp, obs, _ = data_generator.extract_evaluation_data(target) plt.plot(obs, label='TRUTH') plt.plot(nwp, label='NWP') if lstm_root is not None: lstm_h_path = '{}/history/{}/{}/{}/y_pred_{}.txt'.format( lstm_root, target, month, str(i_run), station_name) lstm_h_pred = np.loadtxt(os.path.join(DIR_LOG, lstm_h_path)) plt.plot(lstm_h_pred, label='LSTM(h)') mhstn_path = '{}/{}/{}/{}/y_pred_{}_combine_module_conv.txt'.format( mhstn_root, target, month, str(i_run), station_name) mhstn_pred = np.loadtxt(os.path.join(DIR_LOG, mhstn_path)) plt.plot(mhstn_pred, label='MHSTN') plt.legend(loc='best') plt.ylabel('Value (meter/second)') plt.xlabel('Time (hours)') plt.tight_layout() plt.savefig(os.path.join(dir_log, "{}".format(station_name)), dpi=750, bbox_inches='tight') pdf = PdfPages(os.path.join(dir_log, "{}.pdf".format(station_name))) pdf.savefig() pdf.close() plt.close()
features_selected_dic[station_name] = features_selected with open( os.path.join(dir_log, '{}_selected_features.json'.format(method)), 'w') as f: json.dump(features_selected_dic, f) if __name__ == '__main__': tag = tag_path(os.path.abspath(__file__), 2) config = DefaultConfig() config.single_step = False target = 'V' method = 'ridge' dir_log = os.path.join(DIR_LOG, tag, target) make_dir(dir_log) data_generator_list = [] for obs_data_path in config.obs_data_path_list: data_generator = DataGenerator(config.period, config.window, path=obs_data_path, norm=config.norm, x_divide_std=config.x_divide_std) data_generator.prepare_data(config.target_size, train_step=config.train_step, test_step=config.test_step, single_step=config.single_step) data_generator_list.append(data_generator) features = get_covariates_future_all()
def load_data(path): df = pd.read_csv(path) df['DateTime'] = pd.to_datetime(df['DateTime']) columns = list(df.columns) columns.remove('DateTime') df[columns] = df[columns].astype(np.float) print("Finish to load data from path={}:".format(path), df.shape) return df if __name__ == '__main__': tag = tag_path(os.path.abspath(__file__), 2) config = DefaultConfig() dir_log = make_dir(os.path.join(DIR_LOG, tag)) df_obs_list = {} for path in config.obs_data_path_list: station_name = get_station_name(path) df = load_data(path) df_obs_list[station_name] = df df_nwp = load_data(config.nwp_path) # missing value print("**** check data") print("** station") TAG_MISSING = get_missing_tag() def check_spd(df_spd): spd_outliers = np.where((df_spd.values < 0) & (df_spd.values > 15))[0]
def main(tag, config: DefaultConfig, target, mode, eval_mode, cls_model, csv_result_list=None): dir_log_mode = os.path.join(DIR_LOG, tag, mode.split('-')[-1]) dir_log_target = os.path.join(dir_log_mode, target) make_dir(dir_log_target) if mode.startswith('run'): data_generator_list = [] for obs_data_path in config.obs_data_path_list: data_generator = DataGenerator(config.period, config.window, path=obs_data_path) data_generator_list.append(data_generator) for wid in range(TESTING_SLIDING_WINDOW, len(MONTH_LIST)): dir_log_exp = os.path.join(dir_log_target, str(MONTH_LIST[wid])) months = get_month_list(eval_mode, wid) for data_generator in data_generator_list: data_generator.set_data(months) data_generator.prepare_data(config.target_size, train_step=config.train_step, test_step=config.test_step, single_step=config.single_step) if mode == 'run-history': features = [target] elif mode == 'run-future': features = ['NEXT_NWP_{}'.format(target)] elif mode == 'run-history_future': features = [target, 'NEXT_NWP_{}'.format(target)] else: raise ValueError('mode={} can not be found!'.format(mode)) x_train_list, x_val_list, x_test_list = [], [], [] y_train_list, y_val_list, y_test_list = [], [], [] for data_generator in data_generator_list: (x_train, y_train), (x_val, y_val), (x_test, y_test) = \ data_generator.extract_training_data(x_attributes=features, y_attributes=[target]) x_train_list.append(x_train) x_val_list.append(x_val) x_test_list.append(x_test) y_train_list.append(y_train) y_val_list.append(y_val) y_test_list.append(y_test) input_shape = x_train_list[0].shape[1:] batch_run( config.n_runs, dir_log_exp, lambda dir_log_curr: run( data_generator_list, cls_model, dir_log_curr, target, config.n_epochs, x_train_list, x_val_list, x_test_list, y_train_list, y_val_list, y_test_list, input_shape)) elif mode.startswith('reduce'): if csv_result_list is None: csv_result_list = ['metrics_model.csv', 'metrics_nwp.csv'] reduce(csv_result_list, target, dir_log_target, config.n_runs, config.station_name_list)