def test_plot_map(self): data_model = GagesModel.load_datamodel( self.dir_temp, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') gauge_dict = data_model.data_source.gage_dict t_s_dict = unserialize_json(self.t_s_dict_file) sites = np.array(t_s_dict["sites_id"]) keys = ["NSE"] inds_test = subset_of_dict(self.inds, keys) sites_df = pd.DataFrame({"sites": sites, keys[0]: inds_test[keys[0]]}) nse_range = [0, 1] idx_lstl_nse = sites_df[(sites_df[keys[0]] >= nse_range[0]) & ( sites_df[keys[0]] <= nse_range[1])].index.tolist() colorbar_size = [0.91, 0.323, 0.02, 0.346] # colorbar_size = None plot_gages_map(data_model, sites_df, keys[0], idx_lstl_nse, colorbar_size=colorbar_size, cbar_font_size=14) plt.savefig(os.path.join(self.dir_out, 'map_NSE.png'), dpi=500, bbox_inches="tight") plt.show()
def plot_we_need(data_model_test, obs, pred, show_me_num=5, point_file=None, **kwargs): pred = pred.reshape(pred.shape[0], pred.shape[1]) obs = obs.reshape(pred.shape[0], pred.shape[1]) inds = statError(obs, pred) # plot box keys = ["Bias", "RMSE", "NSE"] inds_test = subset_of_dict(inds, keys) box_fig = plot_diff_boxes(inds_test) box_fig.savefig( os.path.join(data_model_test.data_source.data_config.data_path["Out"], "box_fig.png")) # plot ts t_s_dict = data_model_test.t_s_dict sites = np.array(t_s_dict["sites_id"]) t_range = np.array(t_s_dict["t_final_range"]) ts_fig = plot_ts_obs_pred(obs, pred, sites, t_range, show_me_num) ts_fig.savefig( os.path.join(data_model_test.data_source.data_config.data_path["Out"], "ts_fig.png")) # plot nse ecdf sites_df_nse = pd.DataFrame({"sites": sites, keys[2]: inds_test[keys[2]]}) plot_ecdf(sites_df_nse, keys[2]) # plot map if point_file is None: gauge_dict = data_model_test.data_source.gage_dict plot_map(gauge_dict, sites_df_nse, **kwargs) else: plot_ind_map(point_file, sites_df_nse, percentile=25)
def test_regions_stat(self): gages_data_model = GagesModel.load_datamodel( self.config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') id_regions_idx, id_regions_sites_ids = ids_of_regions(gages_data_model) preds, obss, inds_dfs = split_results_to_regions( gages_data_model, self.test_epoch, id_regions_idx, id_regions_sites_ids) regions_name = [ "allref", "cntplain", "esthgnlnd", "mxwdshld", "northest", "secstplain", "seplains", "wstmnts", "wstplains", "wstxeric" ] frames = [] x_name = "regions" y_name = "NSE" for i in range(len(id_regions_idx)): # plot box,使用seaborn库 keys = ["NSE"] inds_test = subset_of_dict(inds_dfs[i], keys) inds_test = inds_test[keys[0]].values df_dict_i = {} str_i = regions_name[i] df_dict_i[x_name] = np.full([inds_test.size], str_i) df_dict_i[y_name] = inds_test df_i = pd.DataFrame(df_dict_i) frames.append(df_i) result = pd.concat(frames) plot_boxs(result, x_name, y_name)
def test_ensemble_results_plot_box(self): preds = [] obss = [] # cases_exps = ["basic_exp11", "basic_exp17"] cases_exps = [ "basic_exp12", "basic_exp13", "basic_exp14", "basic_exp15", "basic_exp16", "basic_exp18" ] for case_exp in cases_exps: config_data_i = load_dataconfig_case_exp(case_exp) pred_i, obs_i = load_result(config_data_i.data_path['Temp'], self.test_epoch) pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1]) obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1]) print(obs_i) preds.append(pred_i) obss.append(obs_i) preds_np = np.array(preds) obss_np = np.array(obss) pred_mean = np.mean(preds_np, axis=0) obs_mean = np.mean(obss_np, axis=0) inds = statError(obs_mean, pred_mean) keys = ["Bias", "RMSE", "NSE"] inds_test = subset_of_dict(inds, keys) box_fig = plot_diff_boxes(inds_test)
def test_inv_plot(self): data_model = GagesModel.load_datamodel( self.config_data_2.data_path["Temp"], "2", data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') flow_pred_file = os.path.join( data_model.data_source.data_config.data_path['Temp'], 'epoch' + str(self.test_epoch) + 'flow_pred.npy') flow_obs_file = os.path.join( data_model.data_source.data_config.data_path['Temp'], 'epoch' + str(self.test_epoch) + 'flow_obs.npy') pred = unserialize_numpy(flow_pred_file) obs = unserialize_numpy(flow_obs_file) pred = pred.reshape(pred.shape[0], pred.shape[1]) obs = obs.reshape(obs.shape[0], obs.shape[1]) inds = statError(obs, pred) inds['STAID'] = data_model.t_s_dict["sites_id"] inds_df = pd.DataFrame(inds) inds_df.to_csv( os.path.join(self.config_data_2.data_path["Out"], 'data_df.csv')) # plot box,使用seaborn库 keys = ["Bias", "RMSE", "NSE"] inds_test = subset_of_dict(inds, keys) box_fig = plot_diff_boxes(inds_test) box_fig.savefig( os.path.join(self.config_data_2.data_path["Out"], "box_fig.png")) # plot ts show_me_num = 5 t_s_dict = data_model.t_s_dict sites = np.array(t_s_dict["sites_id"]) t_range = np.array(t_s_dict["t_final_range"]) time_seq_length = self.config_data_1.model_dict['model']['seqLength'] time_start = np.datetime64(t_range[0]) + np.timedelta64( time_seq_length - 1, 'D') t_range[0] = np.datetime_as_string(time_start, unit='D') ts_fig = plot_ts_obs_pred(obs, pred, sites, t_range, show_me_num) ts_fig.savefig( os.path.join(self.config_data_2.data_path["Out"], "ts_fig.png")) # plot nse ecdf sites_df_nse = pd.DataFrame({ "sites": sites, keys[2]: inds_test[keys[2]] }) plot_ecdf(sites_df_nse, keys[2]) # plot map gauge_dict = data_model.data_source.gage_dict plot_map(gauge_dict, sites_df_nse, id_col="STAID", lon_col="LNG_GAGE", lat_col="LAT_GAGE")
def test_plot_ind_map(self): """plot nse value on map""" t_s_dict = unserialize_json(self.t_s_dict_file) sites = np.array(t_s_dict["sites_id"]) keys = ["NSE"] inds_test = subset_of_dict(self.inds, keys) # concat sites and inds sites_df = pd.DataFrame({"sites": sites, keys[0]: inds_test[keys[0]]}) plot_ind_map(self.gage_point_file, sites_df)
def test_plot_kuai_cdf(self): t_s_dict = unserialize_json(self.t_s_dict_file) sites = np.array(t_s_dict["sites_id"]) keys = ["NSE"] inds_test = subset_of_dict(self.inds, keys) plotCDF([inds_test[keys[0]]], ref=None, legendLst=["LSTM"], linespec=['-', '-', ':', ':', ':'])
def test_plot_pdf_cdf(self): t_s_dict = unserialize_json(self.t_s_dict_file) sites = np.array(t_s_dict["sites_id"]) keys = ["NSE"] inds_test = subset_of_dict(self.inds, keys) x = pd.DataFrame(inds_test) # x = inds_test[keys[0]] # plot_dist(x) plot_pdf_cdf(x, keys[0])
def test_subset_of_dict(self): prices = { 'ACME': 45.23, 'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.20, 'FB': 10.75 } tech_names = ['AAPL', 'IBM', 'HPQ', 'MSFT'] print(subset_of_dict(prices, tech_names))
def test_plot_box(self): """测试可视化代码""" # plot box,使用seaborn库 keys = ["Bias", "NSE", "FLV", "FHV"] inds_test = subset_of_dict(self.inds, keys) plot_diff_boxes(inds_test, title_str="Metrics of streamflow prediction") plt.savefig(os.path.join(self.dir_out, 'boxes.png'), dpi=500, bbox_inches="tight") plt.show()
def test_forecast_test(self): sim_df = GagesModel.load_datamodel( self.sim_config_data.data_path["Temp"], "1", data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') df = GagesModel.load_datamodel( self.config_data.data_path["Temp"], "2", data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') data_input = GagesForecastDataModel(sim_df, df) pred, obs = test_lstm_forecast(data_input) pred = pred.reshape(pred.shape[0], pred.shape[1]) obs = obs.reshape(obs.shape[0], obs.shape[1]) inds = statError(obs, pred) show_me_num = 5 t_s_dict = data_input.model_data.t_s_dict sites = np.array(t_s_dict["sites_id"]) t_range = np.array(t_s_dict["t_final_range"]) time_seq_length = data_input.model_data.data_source.data_config.model_dict[ 'model']['seqLength'] time_start = np.datetime64(t_range[0]) + np.timedelta64( time_seq_length, 'D') t_range[0] = np.datetime_as_string(time_start, unit='D') ts_fig = plot_ts_obs_pred(obs, pred, sites, t_range, show_me_num) ts_fig.savefig( os.path.join(self.config_data.data_path["Out"], "ts_fig.png")) # # plot box,使用seaborn库 keys = ["Bias", "RMSE", "NSE"] inds_test = subset_of_dict(inds, keys) box_fig = plot_diff_boxes(inds_test) box_fig.savefig( os.path.join(self.config_data.data_path["Out"], "box_fig.png")) # plot map sites_df = pd.DataFrame({"sites": sites, keys[2]: inds_test[keys[2]]}) plot_ind_map(df.data_source.all_configs['gage_point_file'], sites_df)
def test_explore_test(self): models_num = 0 dirs = os.listdir(self.config_data.data_path["Temp"]) for dir_temp in dirs: if os.path.isdir( os.path.join(self.config_data.data_path["Temp"], dir_temp)): models_num += 1 for count in range(models_num): print("\n", "testing model", str(count + 1), ":\n") data_model = GagesModel.load_datamodel( self.config_data.data_path["Temp"], str(count), data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') pred, obs = master_test(data_model) pred = pred.reshape(pred.shape[0], pred.shape[1]) obs = obs.reshape(obs.shape[0], obs.shape[1]) inds = statError(obs, pred) show_me_num = 5 t_s_dict = data_model.t_s_dict sites = np.array(t_s_dict["sites_id"]) t_range = np.array(t_s_dict["t_final_range"]) ts_fig = plot_ts_obs_pred(obs, pred, sites, t_range, show_me_num) ts_fig.savefig( os.path.join( data_model.data_source.data_config.data_path["Out"], "ts_fig.png")) # # plot box,使用seaborn库 keys = ["Bias", "RMSE", "NSE"] inds_test = subset_of_dict(inds, keys) box_fig = plot_diff_boxes(inds_test) box_fig.savefig( os.path.join( data_model.data_source.data_config.data_path["Out"], "box_fig.png")) # plot map sites_df = pd.DataFrame({ "sites": sites, keys[2]: inds_test[keys[2]] }) plot_ind_map(data_model.data_source.all_configs['gage_point_file'], sites_df)
def setUp(self): self.test_epoch = 20 flow_pred_file = os.path.join( self.dir_temp, "epoch" + str(self.test_epoch) + 'flow_pred.npy') flow_obs_file = os.path.join( self.dir_temp, "epoch" + str(self.test_epoch) + 'flow_obs.npy') pred = unserialize_numpy(flow_pred_file) obs = unserialize_numpy(flow_obs_file) self.pred = pred.reshape(pred.shape[0], pred.shape[1]) self.obs = obs.reshape(pred.shape[0], pred.shape[1]) # # 统计性能指标 self.inds = statError(self.obs, self.pred) # t_s_dict = unserialize_json(self.t_s_dict_file) # sites = np.array(t_s_dict["sites_id"]) self.keys = ["NSE"] self.inds_test = subset_of_dict(self.inds, self.keys)
def test_plot_pretrained_model_test(self): data_model_test = GagesModel.load_datamodel( self.majordam_config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') pretrained_model_name = "nodam" + self.nomajordam_subdir.split( "/")[1] + "_pretrained_model" save_dir = os.path.join( data_model_test.data_source.data_config.data_path['Out'], pretrained_model_name) pred, obs = load_result(save_dir, self.test_epoch) pred = pred.reshape(pred.shape[0], pred.shape[1]) obs = obs.reshape(pred.shape[0], pred.shape[1]) inds = statError(obs, pred) # plot box,使用seaborn库 keys = ["Bias", "RMSE", "NSE"] inds_test = subset_of_dict(inds, keys) box_fig = plot_diff_boxes(inds_test) box_fig.savefig(os.path.join(save_dir, "box_fig.png")) # plot ts show_me_num = 5 t_s_dict = data_model_test.t_s_dict sites = np.array(t_s_dict["sites_id"]) t_range = np.array(t_s_dict["t_final_range"]) ts_fig = plot_ts_obs_pred(obs, pred, sites, t_range, show_me_num) ts_fig.savefig(os.path.join(save_dir, "ts_fig.png")) # plot nse ecdf sites_df_nse = pd.DataFrame({ "sites": sites, keys[2]: inds_test[keys[2]] }) plot_ecdf(sites_df_nse, keys[2]) # plot map gauge_dict = data_model_test.data_source.gage_dict plot_map(gauge_dict, sites_df_nse, id_col="STAID", lon_col="LNG_GAGE", lat_col="LAT_GAGE")
def plot_region_seperately(gages_data_model, epoch, id_regions_idx, preds, obss, inds_dfs): df_id_region = np.array(gages_data_model.t_s_dict["sites_id"]) regions_name = gages_data_model.data_source.all_configs.get("regions") for i in range(len(id_regions_idx)): # plot box keys = ["Bias", "RMSE", "NSE"] inds_test = subset_of_dict(inds_dfs[i], keys) box_fig = plot_diff_boxes(inds_test) box_fig.savefig( os.path.join( gages_data_model.data_source.data_config.data_path["Out"], regions_name[i] + "epoch" + str(epoch) + "box_fig.png")) # plot ts sites = np.array(df_id_region[id_regions_idx[i]]) t_range = np.array(gages_data_model.t_s_dict["t_final_range"]) show_me_num = 5 ts_fig = plot_ts_obs_pred(obss[i], preds[i], sites, t_range, show_me_num) ts_fig.savefig( os.path.join( gages_data_model.data_source.data_config.data_path["Out"], regions_name[i] + "epoch" + str(epoch) + "ts_fig.png")) # plot nse ecdf sites_df_nse = pd.DataFrame({ "sites": sites, keys[2]: inds_test[keys[2]] }) plot_ecdf( sites_df_nse, keys[2], os.path.join( gages_data_model.data_source.data_config.data_path["Out"], regions_name[i] + "epoch" + str(epoch) + "ecdf_fig.png")) # plot map gauge_dict = gages_data_model.data_source.gage_dict save_map_file = os.path.join( gages_data_model.data_source.data_config.data_path["Out"], regions_name[i] + "epoch" + str(epoch) + "map_fig.png") plot_map(gauge_dict, sites_df_nse, save_file=save_map_file, id_col="STAID", lon_col="LNG_GAGE", lat_col="LAT_GAGE")
def test_plot_pretrained_model_test(self): data_model_test = GagesModel.load_datamodel( self.config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') save_dir = data_model_test.data_source.data_config.data_path['Temp'] pred, obs = load_result(save_dir, self.config_data.config_file.TEST_EPOCH) pred = pred.reshape(pred.shape[0], pred.shape[1]) obs = obs.reshape(pred.shape[0], pred.shape[1]) inds = statError(obs, pred) keys = ["Bias", "RMSE", "NSE"] t_s_dict = data_model_test.t_s_dict sites = np.array(t_s_dict["sites_id"]) t_range = np.array(t_s_dict["t_final_range"]) # plot nse ecdf sites_df_nse = pd.DataFrame({"sites": sites, keys[2]: inds[keys[2]]}) plot_ecdf(sites_df_nse, keys[2]) # plot map gauge_dict = data_model_test.data_source.gage_dict plot_map(gauge_dict, sites_df_nse, id_col="STAID", lon_col="LNG_GAGE", lat_col="LAT_GAGE") # plot box,使用seaborn库 inds_test = subset_of_dict(inds, keys) box_fig = plot_diff_boxes(inds_test) # plot ts show_me_num = 5 ts_fig = plot_ts_obs_pred(obs, pred, sites, t_range, show_me_num) plt.show()
def test_3factors(self): data_model = self.data_model config_data = self.config_data test_epoch = self.test_epoch # plot three factors attr_lst = ["RUNAVE7100", "STOR_NOR_2009"] usgs_id = data_model.t_s_dict["sites_id"] attrs_runavg_stor = data_model.data_source.read_attr( usgs_id, attr_lst, is_return_dict=False) run_avg = attrs_runavg_stor[:, 0] * (10**(-3)) * (10**6 ) # m^3 per year nor_storage = attrs_runavg_stor[:, 1] * 1000 # m^3 dors_value = nor_storage / run_avg dors = np.full(len(usgs_id), "dor<0.02") for i in range(len(usgs_id)): if dors_value[i] >= 0.02: dors[i] = "dor≥0.02" diversions = np.full(len(usgs_id), "no ") diversion_strs = ["diversion", "divert"] attr_lst = ["WR_REPORT_REMARKS", "SCREENING_COMMENTS"] data_attr = data_model.data_source.read_attr_origin(usgs_id, attr_lst) diversion_strs_lower = [elem.lower() for elem in diversion_strs] data_attr0_lower = np.array([ elem.lower() if type(elem) == str else elem for elem in data_attr[0] ]) data_attr1_lower = np.array([ elem.lower() if type(elem) == str else elem for elem in data_attr[1] ]) data_attr_lower = np.vstack((data_attr0_lower, data_attr1_lower)).T for i in range(len(usgs_id)): if is_any_elem_in_a_lst(diversion_strs_lower, data_attr_lower[i], include=True): diversions[i] = "yes" nid_dir = os.path.join( "/".join(config_data.data_path["DB"].split("/")[:-1]), "nid", "test") gage_main_dam_purpose = unserialize_json( os.path.join(nid_dir, "dam_main_purpose_dict.json")) gage_main_dam_purpose_lst = list(gage_main_dam_purpose.values()) gage_main_dam_purpose_lst_merge = "".join(gage_main_dam_purpose_lst) gage_main_dam_purpose_unique = np.unique( list(gage_main_dam_purpose_lst_merge)) # gage_main_dam_purpose_unique = np.unique(gage_main_dam_purpose_lst) purpose_regions = {} for i in range(gage_main_dam_purpose_unique.size): sites_id = [] for key, value in gage_main_dam_purpose.items(): if gage_main_dam_purpose_unique[i] in value: sites_id.append(key) assert (all(x < y for x, y in zip(sites_id, sites_id[1:]))) purpose_regions[gage_main_dam_purpose_unique[i]] = sites_id id_regions_idx = [] id_regions_sites_ids = [] regions_name = [] show_min_num = 10 df_id_region = np.array(data_model.t_s_dict["sites_id"]) for key, value in purpose_regions.items(): gages_id = value c, ind1, ind2 = np.intersect1d(df_id_region, gages_id, return_indices=True) if c.size < show_min_num: continue assert (all(x < y for x, y in zip(ind1, ind1[1:]))) assert (all(x < y for x, y in zip(c, c[1:]))) id_regions_idx.append(ind1) id_regions_sites_ids.append(c) regions_name.append(key) preds, obss, inds_dfs = split_results_to_regions( data_model, test_epoch, id_regions_idx, id_regions_sites_ids) frames = [] x_name = "purposes" y_name = "NSE" hue_name = "DOR" col_name = "diversion" for i in range(len(id_regions_idx)): # plot box,使用seaborn库 keys = ["NSE"] inds_test = subset_of_dict(inds_dfs[i], keys) inds_test = inds_test[keys[0]].values df_dict_i = {} str_i = regions_name[i] df_dict_i[x_name] = np.full([inds_test.size], str_i) df_dict_i[y_name] = inds_test df_dict_i[hue_name] = dors[id_regions_idx[i]] df_dict_i[col_name] = diversions[id_regions_idx[i]] # df_dict_i[hue_name] = nor_storage[id_regions_idx[i]] df_i = pd.DataFrame(df_dict_i) frames.append(df_i) result = pd.concat(frames) plot_boxs(result, x_name, y_name, ylim=[0, 1.0]) plt.savefig(os.path.join(config_data.data_path["Out"], 'purpose_distribution.png'), dpi=500, bbox_inches="tight") # g = sns.catplot(x=x_name, y=y_name, hue=hue_name, col=col_name, # data=result, kind="swarm", # height=4, aspect=.7) sns.set(font_scale=1.5) fig, ax = plt.subplots() fig.set_size_inches(11.7, 8.27) g = sns.catplot(ax=ax, x=x_name, y=y_name, hue=hue_name, col=col_name, data=result, palette="Set1", kind="box", dodge=True, showfliers=False) # g.set(ylim=(-1, 1)) plt.savefig(os.path.join(config_data.data_path["Out"], '3factors_distribution.png'), dpi=500, bbox_inches="tight") plt.show()
idx_lstl_nse = inds_df[(inds_df[show_ind_NSE] >= nse_range[0]) & ( inds_df[show_ind_NSE] <= nse_range[1])].index.tolist() plot_gages_map(data_model, inds_df, show_ind_NSE, idx_lstl_nse, cbar_font_size=14) plt.savefig(os.path.join(config_data.data_path["Out"], 'map_NSE.png'), dpi=FIGURE_DPI, bbox_inches="tight") # plt.figure() # plot box,使用seaborn库 keys = ["Bias", "NSE", "FHV", "FLV"] inds_test = subset_of_dict(inds_df, keys) plot_diff_boxes(inds_test) plt.savefig(os.path.join(config_data.data_path["Out"], 'boxes.png'), dpi=FIGURE_DPI, bbox_inches="tight") # plt.figure() ############################ plot map box ########################### # plot NSE nse_range = [0, 1] # nse_range = [-10000, 1] # nse_range = [-10000, 0] idx_lstl_nse = inds_df[(inds_df[show_ind_NSE] >= nse_range[0]) & ( inds_df[show_ind_NSE] <= nse_range[1])].index.tolist() plot_gages_map_and_box(data_model, inds_df,
def test_purposes_seperate(self): quick_data_dir = os.path.join(self.config_data.data_path["DB"], "quickdata") data_dir = os.path.join(quick_data_dir, "allnonref-dam_95-05_nan-0.1_00-1.0") data_model_test = GagesModel.load_datamodel( data_dir, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') data_model = GagesModel.update_data_model(self.config_data, data_model_test) nid_dir = os.path.join( "/".join(self.config_data.data_path["DB"].split("/")[:-1]), "nid", "quickdata") gage_main_dam_purpose = unserialize_json( os.path.join(nid_dir, "dam_main_purpose_dict.json")) gage_main_dam_purpose_lst = list(gage_main_dam_purpose.values()) gage_main_dam_purpose_unique = np.unique(gage_main_dam_purpose_lst) purpose_regions = {} for i in range(gage_main_dam_purpose_unique.size): sites_id = [] for key, value in gage_main_dam_purpose.items(): if value == gage_main_dam_purpose_unique[i]: sites_id.append(key) assert (all(x < y for x, y in zip(sites_id, sites_id[1:]))) purpose_regions[gage_main_dam_purpose_unique[i]] = sites_id id_regions_idx = [] id_regions_sites_ids = [] df_id_region = np.array(data_model.t_s_dict["sites_id"]) for key, value in purpose_regions.items(): gages_id = value c, ind1, ind2 = np.intersect1d(df_id_region, gages_id, return_indices=True) assert (all(x < y for x, y in zip(ind1, ind1[1:]))) assert (all(x < y for x, y in zip(c, c[1:]))) id_regions_idx.append(ind1) id_regions_sites_ids.append(c) pred_all, obs_all = load_result(self.config_data.data_path["Temp"], self.test_epoch) pred_all = pred_all.reshape(pred_all.shape[0], pred_all.shape[1]) obs_all = obs_all.reshape(obs_all.shape[0], obs_all.shape[1]) for i in range(9, len(gage_main_dam_purpose_unique)): pred = pred_all[id_regions_idx[i], :] obs = obs_all[id_regions_idx[i], :] inds = statError(obs, pred) inds['STAID'] = id_regions_sites_ids[i] inds_df = pd.DataFrame(inds) inds_df.to_csv( os.path.join( self.config_data.data_path["Out"], gage_main_dam_purpose_unique[i] + "epoch" + str(self.test_epoch) + 'data_df.csv')) # plot box,使用seaborn库 keys = ["Bias", "RMSE", "NSE"] inds_test = subset_of_dict(inds, keys) box_fig = plot_diff_boxes(inds_test) box_fig.savefig( os.path.join( self.config_data.data_path["Out"], gage_main_dam_purpose_unique[i] + "epoch" + str(self.test_epoch) + "box_fig.png")) # plot ts sites = np.array(df_id_region[id_regions_idx[i]]) t_range = np.array(data_model.t_s_dict["t_final_range"]) show_me_num = 1 ts_fig = plot_ts_obs_pred(obs, pred, sites, t_range, show_me_num) ts_fig.savefig( os.path.join( self.config_data.data_path["Out"], gage_main_dam_purpose_unique[i] + "epoch" + str(self.test_epoch) + "ts_fig.png")) # plot nse ecdf sites_df_nse = pd.DataFrame({ "sites": sites, keys[2]: inds_test[keys[2]] }) plot_ecdf( sites_df_nse, keys[2], os.path.join( self.config_data.data_path["Out"], gage_main_dam_purpose_unique[i] + "epoch" + str(self.test_epoch) + "ecdf_fig.png")) # plot map gauge_dict = data_model.data_source.gage_dict save_map_file = os.path.join( self.config_data.data_path["Out"], gage_main_dam_purpose_unique[i] + "epoch" + str(self.test_epoch) + "map_fig.png") plot_map(gauge_dict, sites_df_nse, save_file=save_map_file, id_col="STAID", lon_col="LNG_GAGE", lat_col="LAT_GAGE")
def test_scatter_dam_purpose(self): attr_lst = ["RUNAVE7100", "STOR_NOR_2009"] sites_nonref = self.data_model.t_s_dict["sites_id"] attrs_runavg_stor = self.data_model.data_source.read_attr( sites_nonref, attr_lst, is_return_dict=False) run_avg = attrs_runavg_stor[:, 0] * (10**(-3)) * (10**6 ) # m^3 per year nor_storage = attrs_runavg_stor[:, 1] * 1000 # m^3 dors = nor_storage / run_avg nid_dir = os.path.join(self.config_data.data_path["DB"], "nid", "test") gage_main_dam_purpose = unserialize_json( os.path.join(nid_dir, "dam_main_purpose_dict.json")) gage_main_dam_purpose_lst = list(gage_main_dam_purpose.values()) gage_main_dam_purpose_unique = np.unique(gage_main_dam_purpose_lst) purpose_regions = {} for i in range(gage_main_dam_purpose_unique.size): sites_id = [] for key, value in gage_main_dam_purpose.items(): if value == gage_main_dam_purpose_unique[i]: sites_id.append(key) assert (all(x < y for x, y in zip(sites_id, sites_id[1:]))) purpose_regions[gage_main_dam_purpose_unique[i]] = sites_id id_regions_idx = [] id_regions_sites_ids = [] regions_name = [] show_min_num = 10 df_id_region = np.array(self.data_model.t_s_dict["sites_id"]) for key, value in purpose_regions.items(): gages_id = value c, ind1, ind2 = np.intersect1d(df_id_region, gages_id, return_indices=True) if c.size < show_min_num: continue assert (all(x < y for x, y in zip(ind1, ind1[1:]))) assert (all(x < y for x, y in zip(c, c[1:]))) id_regions_idx.append(ind1) id_regions_sites_ids.append(c) regions_name.append(key) preds, obss, inds_dfs = split_results_to_regions( self.data_model, self.test_epoch, id_regions_idx, id_regions_sites_ids) frames = [] x_name = "purposes" y_name = "NSE" hue_name = "DOR" # hue_name = "STOR" for i in range(len(id_regions_idx)): # plot box,使用seaborn库 keys = ["NSE"] inds_test = subset_of_dict(inds_dfs[i], keys) inds_test = inds_test[keys[0]].values df_dict_i = {} str_i = regions_name[i] df_dict_i[x_name] = np.full([inds_test.size], str_i) df_dict_i[y_name] = inds_test df_dict_i[hue_name] = dors[id_regions_idx[i]] # df_dict_i[hue_name] = nor_storage[id_regions_idx[i]] df_i = pd.DataFrame(df_dict_i) frames.append(df_i) result = pd.concat(frames) # can remove high hue value to keep a good map plot_boxs(result, x_name, y_name, ylim=[-1.0, 1.0]) plt.savefig(os.path.join(self.config_data.data_path["Out"], 'purpose_distribution_test.png'), dpi=500, bbox_inches="tight") plt.show() # plot_boxs(result, x_name, y_name, uniform_color="skyblue", swarm_plot=True, hue=hue_name, colormap=True, # ylim=[-1.0, 1.0]) cmap_str = 'viridis' # cmap = plt.get_cmap('Spectral') cbar_label = hue_name plt.title('Distribution of different purposes') swarmplot_with_cbar(cmap_str, cbar_label, [-1, 1.0], x=x_name, y=y_name, hue=hue_name, palette=cmap_str, data=result)
def test_scatter_diversion(self): attr_lst = ["RUNAVE7100", "STOR_NOR_2009"] sites_nonref = self.data_model.t_s_dict["sites_id"] attrs_runavg_stor = self.data_model.data_source.read_attr( sites_nonref, attr_lst, is_return_dict=False) run_avg = attrs_runavg_stor[:, 0] * (10**(-3)) * (10**6 ) # m^3 per year nor_storage = attrs_runavg_stor[:, 1] * 1000 # m^3 dors = nor_storage / run_avg diversion_yes = True diversion_no = False source_data_diversion = GagesSource.choose_some_basins( self.config_data, self.config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, diversion=diversion_yes) source_data_nodivert = GagesSource.choose_some_basins( self.config_data, self.config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, diversion=diversion_no) sites_id_diversion = source_data_diversion.all_configs[ 'flow_screen_gage_id'] sites_id_nodivert = source_data_nodivert.all_configs[ 'flow_screen_gage_id'] divert_regions = {} divert_regions["diversion"] = sites_id_diversion divert_regions["not_diverted"] = sites_id_nodivert id_regions_idx = [] id_regions_sites_ids = [] regions_name = [] df_id_region = np.array(self.data_model.t_s_dict["sites_id"]) for key, value in divert_regions.items(): gages_id = value c, ind1, ind2 = np.intersect1d(df_id_region, gages_id, return_indices=True) assert (all(x < y for x, y in zip(ind1, ind1[1:]))) assert (all(x < y for x, y in zip(c, c[1:]))) id_regions_idx.append(ind1) id_regions_sites_ids.append(c) regions_name.append(key) preds, obss, inds_dfs = split_results_to_regions( self.data_model, self.test_epoch, id_regions_idx, id_regions_sites_ids) frames = [] x_name = "is_diverted" y_name = "NSE" hue_name = "DOR" # hue_name = "STOR" for i in range(len(id_regions_idx)): # plot box,使用seaborn库 keys = ["NSE"] inds_test = subset_of_dict(inds_dfs[i], keys) inds_test = inds_test[keys[0]].values df_dict_i = {} str_i = regions_name[i] df_dict_i[x_name] = np.full([inds_test.size], str_i) df_dict_i[y_name] = inds_test df_dict_i[hue_name] = dors[id_regions_idx[i]] # df_dict_i[hue_name] = nor_storage[id_regions_idx[i]] df_i = pd.DataFrame(df_dict_i) frames.append(df_i) result = pd.concat(frames) # can remove high hue value to keep a good map plot_boxs(result, x_name, y_name, ylim=[-1.0, 1.0]) # plot_boxs(result, x_name, y_name, uniform_color="skyblue", swarm_plot=True, hue=hue_name, colormap=True, # ylim=[-1.0, 1.0]) cmap_str = 'viridis' # cmap = plt.get_cmap('Spectral') cbar_label = hue_name plt.title('Distribution of w/wo diversion') swarmplot_with_cbar(cmap_str, cbar_label, [-1, 1.0], x=x_name, y=y_name, hue=hue_name, palette=cmap_str, data=result)