def test_da_test(self): with torch.cuda.device(2): df1 = GagesModel.load_datamodel( self.config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') data_model = GagesDaDataModel(df1) pred, obs = test_lstm_da(data_model, epoch=300) basin_area = data_model.data_model.data_source.read_attr( data_model.data_model.t_s_dict["sites_id"], ['DRAIN_SQKM'], is_return_dict=False) mean_prep = data_model.data_model.data_source.read_attr( data_model.data_model.t_s_dict["sites_id"], ['PPTAVG_BASIN'], is_return_dict=False) mean_prep = mean_prep / 365 * 10 pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False) obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False) flow_pred_file = os.path.join( data_model.data_model.data_source.data_config. data_path['Temp'], 'flow_pred') flow_obs_file = os.path.join( data_model.data_model.data_source.data_config. data_path['Temp'], 'flow_obs') serialize_numpy(pred, flow_pred_file) serialize_numpy(obs, flow_obs_file)
def test_test_gages_iter(self): data_model = GagesModel.load_datamodel(self.config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') with torch.cuda.device(1): obs_lst = [] pred_lst = [] for i in range(0, data_model.data_flow.shape[0]): print("\n", "Testing model", str(i + 1), ":\n") data_models_i = GagesModel.which_data_model(data_model, i) pred, obs = master_test_1by1(data_models_i) basin_area = data_models_i.data_source.read_attr(data_models_i.t_s_dict["sites_id"], ['DRAIN_SQKM'], is_return_dict=False) mean_prep = data_models_i.data_source.read_attr(data_models_i.t_s_dict["sites_id"], ['PPTAVG_BASIN'], is_return_dict=False) mean_prep = mean_prep / 365 * 10 pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False) obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False) obs_lst.append(obs.flatten()) pred_lst.append(pred.flatten()) preds = np.array(pred_lst) obss = np.array(obs_lst) flow_pred_file = os.path.join(data_model.data_source.data_config.data_path['Temp'], 'flow_pred') flow_obs_file = os.path.join(data_model.data_source.data_config.data_path['Temp'], 'flow_obs') serialize_numpy(preds, flow_pred_file) serialize_numpy(obss, flow_obs_file)
def test_test_gages_iter(self): data_config = self.config_data.read_data_config() regions = data_config["regions"] data_model_test_lst = [] with torch.cuda.device(1): obs_lsts = [] pred_lsts = [] for i in range(1, len(regions) + 1): data_dir_i_temp = '/'.join( self.config_data.data_path['Temp'].split('/')[:-1]) data_dir_i = os.path.join(data_dir_i_temp, "exp" + str(i)) data_model_i = GagesModel.load_datamodel( data_dir_i, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') data_model_test_lst.append(data_model_i) obs_lst = [] pred_lst = [] for j in range(0, data_model_i.data_flow.shape[0]): print("\n", "Testing model", str(j + 1), "of", regions[i - 1], "region", ":\n") data_models_j = GagesModel.which_data_model( data_model_i, j) pred, obs = master_test_1by1(data_models_j) basin_area = data_models_j.data_source.read_attr( data_models_j.t_s_dict["sites_id"], ['DRAIN_SQKM'], is_return_dict=False) mean_prep = data_models_j.data_source.read_attr( data_models_j.t_s_dict["sites_id"], ['PPTAVG_BASIN'], is_return_dict=False) mean_prep = mean_prep / 365 * 10 pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False) obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False) obs_lst.append(obs.flatten()) pred_lst.append(pred.flatten()) preds = np.array(pred_lst) obss = np.array(obs_lst) obs_lsts.append(obss) pred_lsts.append(preds) obs_final = reduce(lambda a, b: np.vstack((a, b)), obs_lsts) pred_final = reduce(lambda a, b: np.vstack((a, b)), pred_lsts) serialize_numpy(pred_final, self.flow_pred_file) serialize_numpy(obs_final, self.flow_obs_file)
def save_result(save_dir, epoch, pred, obs, pred_name='flow_pred', obs_name='flow_obs'): """save the pred value of testing period and obs value""" flow_pred_file = os.path.join(save_dir, 'epoch' + str(epoch) + pred_name) flow_obs_file = os.path.join(save_dir, 'epoch' + str(epoch) + obs_name) serialize_numpy(pred, flow_pred_file) serialize_numpy(obs, flow_obs_file)
def test_basin_avg_netcdf(self): """读取netcdf文件,计算给定的shapefile代表的范围内该netcdf文件的给定变量的流域平均值 算法需要优化: 1.判断区域那块,可以根据bound迅速地排除大部分不需要判断的点,只判断在bound内的点 2.其他的优化和shp_trans_coord下的差不多 """ # 先读取一个netcdf文件,然后把shapefile选择一张,先测试下上面的程序。 file_path = self.netcdf_file shp_file = os.path.join(self.shpfile_folder, "01013500.shp") mask_file = os.path.join(self.shpfile_folder, "mask_01013500") avgs = basin_avg_netcdf(file_path, shp_file, mask_file) daymet_myself_file = os.path.join(self.netcdf_dir, "daymet_01013500_mean_2000_myself") serialize_numpy(np.array(avgs), daymet_myself_file)
def test_test_gages_sim(self): with torch.cuda.device(2): data_model1 = GagesModel.load_datamodel( self.config_data_lstm.data_path["Temp"], "1", data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') data_model1.update_model_param('train', nEpoch=300) data_model2 = GagesModel.load_datamodel( self.config_data_lstm.data_path["Temp"], "2", data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') model_input = GagesSimDataModel(data_model1, data_model2) pred, obs = master_test_natural_flow(model_input, epoch=self.test_epoch) basin_area = model_input.data_model2.data_source.read_attr( model_input.data_model2.t_s_dict["sites_id"], ['DRAIN_SQKM'], is_return_dict=False) mean_prep = model_input.data_model2.data_source.read_attr( model_input.data_model2.t_s_dict["sites_id"], ['PPTAVG_BASIN'], is_return_dict=False) mean_prep = mean_prep / 365 * 10 pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False) obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False) flow_pred_file = os.path.join( model_input.data_model2.data_source.data_config. data_path['Temp'], 'epoch' + str(self.test_epoch) + 'flow_pred') flow_obs_file = os.path.join( model_input.data_model2.data_source.data_config. data_path['Temp'], 'epoch' + str(self.test_epoch) + 'flow_obs') serialize_numpy(pred, flow_pred_file) serialize_numpy(obs, flow_obs_file)
def test_forecast(self): source_data = unserialize_pickle(self.data_source_test_file) # 存储data_model,因为data_model里的数据如果直接序列化会比较慢,所以各部分分别序列化,dict的直接序列化为json文件,数据的HDF5 stat_dict = unserialize_json(self.stat_file) data_flow = unserialize_numpy(self.flow_npy_file) data_forcing = unserialize_numpy(self.forcing_npy_file) data_attr = unserialize_numpy(self.attr_npy_file) # dictFactorize.json is the explanation of value of categorical variables var_dict = unserialize_json(self.var_dict_file) f_dict = unserialize_json(self.f_dict_file) t_s_dict = unserialize_json(self.t_s_dict_file) data_model_test = DataModel(source_data, data_flow, data_forcing, data_attr, var_dict, f_dict, stat_dict, t_s_dict) pred, obs = hydroDL.master_test(data_model_test) print(pred) print(obs) serialize_numpy(pred, self.flow_pred_file) serialize_numpy(obs, self.flow_obs_file)
def save_datamodel(data_model, num_str=None, **kwargs): if num_str: dir_temp = os.path.join( data_model.data_source.data_config.data_path["Temp"], num_str) else: dir_temp = data_model.data_source.data_config.data_path["Temp"] if not os.path.isdir(dir_temp): os.makedirs(dir_temp) data_source_file = os.path.join(dir_temp, kwargs['data_source_file_name']) stat_file = os.path.join(dir_temp, kwargs['stat_file_name']) flow_file = os.path.join(dir_temp, kwargs['flow_file_name']) forcing_file = os.path.join(dir_temp, kwargs['forcing_file_name']) attr_file = os.path.join(dir_temp, kwargs['attr_file_name']) f_dict_file = os.path.join(dir_temp, kwargs['f_dict_file_name']) var_dict_file = os.path.join(dir_temp, kwargs['var_dict_file_name']) t_s_dict_file = os.path.join(dir_temp, kwargs['t_s_dict_file_name']) serialize_pickle(data_model.data_source, data_source_file) serialize_json(data_model.stat_dict, stat_file) serialize_numpy(data_model.data_flow, flow_file) serialize_numpy(data_model.data_forcing, forcing_file) serialize_numpy(data_model.data_attr, attr_file) # dictFactorize.json is the explanation of value of categorical variables serialize_json(data_model.f_dict, f_dict_file) serialize_json(data_model.var_dict, var_dict_file) serialize_json(data_model.t_s_dict, t_s_dict_file)
def test_Susquehanna(self): t_test = self.config_data.model_dict["data"]["tRangeTest"] source_data = SusquehannaSource(self.config_data, t_test) # 构建输入数据类对象 data_model = SusquehannaModel(source_data) with torch.cuda.device(1): # pred, obs = master_test(data_model) pred, obs = master_test(data_model, epoch=300) flow_pred_file = os.path.join( data_model.data_source.data_config.data_path['Temp'], 'flow_pred') flow_obs_file = os.path.join( data_model.data_source.data_config.data_path['Temp'], 'flow_obs') serialize_numpy(pred, flow_pred_file) serialize_numpy(obs, flow_obs_file) plot_we_need(data_model, obs, pred, id_col="id", lon_col="lon", lat_col="lat")
def test_test_camels(self): data_model = CamelsModel.load_datamodel( self.config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') with torch.cuda.device(1): # pred, obs = master_test(data_model) pred, obs = master_test(data_model, epoch=300) basin_area = data_model.data_source.read_attr( data_model.t_s_dict["sites_id"], ['area_gages2'], is_return_dict=False) mean_prep = data_model.data_source.read_attr( data_model.t_s_dict["sites_id"], ['p_mean'], is_return_dict=False) pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False) obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False) flow_pred_file = os.path.join( data_model.data_source.data_config.data_path['Temp'], 'flow_pred') flow_obs_file = os.path.join( data_model.data_source.data_config.data_path['Temp'], 'flow_obs') serialize_numpy(pred, flow_pred_file) serialize_numpy(obs, flow_obs_file) plot_we_need(data_model, obs, pred, id_col="id", lon_col="lon", lat_col="lat")
def test_data_model_test(self): source_data = unserialize_pickle(self.data_source_test_file) data_model = DataModel(source_data) # 存储data_model,因为data_model里的数据如果直接序列化会比较慢,所以各部分分别序列化,dict的直接序列化为json文件,数据的HDF5 serialize_json(data_model.stat_dict, self.stat_file) serialize_numpy(data_model.data_flow, self.flow_file) serialize_numpy(data_model.data_forcing, self.forcing_file) serialize_numpy(data_model.data_attr, self.attr_file) # dictFactorize.json is the explanation of value of categorical variables serialize_json(data_model.f_dict, self.f_dict_file) serialize_json(data_model.var_dict, self.var_dict_file) serialize_json(data_model.t_s_dict, self.t_s_dict_file)