Exemplo n.º 1
0
 def test_show_multi_exps_results(self):
     periods = [["1980-01-01", "1990-01-01"], ["1990-01-01", "2000-01-01"],
                ["2000-01-01", "2010-01-01"], ["2010-01-01", "2020-01-01"]]
     train_test_period_pairs = list(itertools.permutations(periods, 2))
     sub_lst = [
         "basic/exp6", "basic/exp46", "basic/exp47", "basic/exp7",
         "basic/exp37", "basic/exp1", "basic/exp48", "basic/exp8",
         "basic/exp5", "basic/exp49", "basic/exp50", "basic/exp9"
     ]
     exp_lst = [["basic_exp6"], ["basic_exp46"], ["basic_exp47"],
                ["basic_exp7"], ["basic_exp37"], ["basic_exp1"],
                ["basic_exp48"], ["basic_exp8"], ["basic_exp5"],
                ["basic_exp49"], ["basic_exp50"], ["basic_exp9"]]
     for i in range(len(exp_lst)):
         config_file = copy.deepcopy(cfg)
         args = cmd(
             sub=sub_lst[i],
             train_period=train_test_period_pairs[i][0],
             train_mode=0,
             test_period=train_test_period_pairs[i][1],
             quick_data=0,
             cache_state=1,
             flow_screen={
                 'missing_data_ratio': 1,
                 'zero_value_ratio': 1
             },
             te=300,
             gage_id_file=
             "/mnt/data/owen411/code/hydro-anthropogenic-lstm/example/output/gages/basic/exp37/3557basins_ID_NSE_DOR.csv"
         )
         update_cfg(config_file, args)
         config_data = GagesConfig(config_file)
         test_epoch = config_data.config_file.TEST_EPOCH
         inds_df, pred_mean, obs_mean = load_ensemble_result(
             config_file, exp_lst[i], test_epoch, return_value=True)
         hydro_logger.info("the median NSE of %s is %s", sub_lst[i],
                           inds_df["NSE"].median())
Exemplo n.º 2
0
                   var_dict_file_name='test_dictAttribute.json',
                   t_s_dict_file_name='test_dictTimeSpace.json')
    hydro_logger.info("read and save camels 531 data model")
# plot
data_model = GagesModel.load_datamodel(
    config_data.data_path["Temp"],
    data_source_file_name='test_data_source.txt',
    stat_file_name='test_Statistics.json',
    flow_file_name='test_flow.npy',
    forcing_file_name='test_forcing.npy',
    attr_file_name='test_attr.npy',
    f_dict_file_name='test_dictFactorize.json',
    var_dict_file_name='test_dictAttribute.json',
    t_s_dict_file_name='test_dictTimeSpace.json')
inds_df_camels, pred_mean, obs_mean = load_ensemble_result(cfg,
                                                           exp_lst,
                                                           test_epoch,
                                                           return_value=True)

data_model_conus = GagesModel.load_datamodel(
    all_config_Data.data_path["Temp"],
    data_source_file_name='test_data_source.txt',
    stat_file_name='test_Statistics.json',
    flow_file_name='test_flow.npy',
    forcing_file_name='test_forcing.npy',
    attr_file_name='test_attr.npy',
    f_dict_file_name='test_dictFactorize.json',
    var_dict_file_name='test_dictAttribute.json',
    t_s_dict_file_name='test_dictTimeSpace.json')
all_sites = data_model_conus.t_s_dict["sites_id"]
idx_lst_camels = [
    i for i in range(len(all_sites))
Exemplo n.º 3
0
    def test_plot_map_cartopy_multi_vars(self):
        conus_exps = ["basic_exp37"]
        config_data = load_dataconfig_case_exp(cfg, conus_exps[0])

        dor_1 = -0.02
        source_data_dor1 = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            DOR=dor_1)
        # basins with dams
        source_data_withdams = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            dam_num=[1, 10000])
        # basins without dams
        source_data_withoutdams = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            dam_num=0)

        sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id']
        sites_id_withdams = source_data_withdams.all_configs[
            'flow_screen_gage_id']

        sites_id_nodam = source_data_withoutdams.all_configs[
            'flow_screen_gage_id']
        sites_id_smalldam = np.intersect1d(
            np.array(sites_id_dor1), np.array(sites_id_withdams)).tolist()

        data_model = GagesModel.load_datamodel(
            config_data.data_path["Temp"],
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        all_lat = data_model.data_source.gage_dict["LAT_GAGE"]
        all_lon = data_model.data_source.gage_dict["LNG_GAGE"]

        conus_sites = data_model.t_s_dict["sites_id"]
        idx_lst_nodam_in_conus = [
            i for i in range(len(conus_sites))
            if conus_sites[i] in sites_id_nodam
        ]
        idx_lst_smalldam_in_conus = [
            i for i in range(len(conus_sites))
            if conus_sites[i] in sites_id_smalldam
        ]

        attr_lst = ["SLOPE_PCT", "ELEV_MEAN_M_BASIN"]
        attrs = data_model.data_source.read_attr(conus_sites,
                                                 attr_lst,
                                                 is_return_dict=False)

        test_epoch = 300
        inds_df, pred, obs = load_ensemble_result(cfg,
                                                  conus_exps,
                                                  test_epoch,
                                                  return_value=True)
        show_ind_key = "NSE"
        nse_range = [0, 1]
        idx_lst_nse = inds_df[(inds_df[show_ind_key] >= nse_range[0]) & (
            inds_df[show_ind_key] < nse_range[1])].index.tolist()

        type_1_index_lst = np.intersect1d(idx_lst_nodam_in_conus,
                                          idx_lst_nse).tolist()
        type_2_index_lst = np.intersect1d(idx_lst_smalldam_in_conus,
                                          idx_lst_nse).tolist()
        frame = []
        df_type1 = pd.DataFrame({
            "type":
            np.full(len(type_1_index_lst), "zero-dor"),
            show_ind_key:
            inds_df[show_ind_key].values[type_1_index_lst],
            "lat":
            all_lat[type_1_index_lst],
            "lon":
            all_lon[type_1_index_lst],
            "slope":
            attrs[type_1_index_lst, 0],
            "elevation":
            attrs[type_1_index_lst, 1]
        })
        frame.append(df_type1)
        df_type2 = pd.DataFrame({
            "type":
            np.full(len(type_2_index_lst), "small-dor"),
            show_ind_key:
            inds_df[show_ind_key].values[type_2_index_lst],
            "lat":
            all_lat[type_2_index_lst],
            "lon":
            all_lon[type_2_index_lst],
            "slope":
            attrs[type_2_index_lst, 0],
            "elevation":
            attrs[type_2_index_lst, 1]
        })
        frame.append(df_type2)
        data_df = pd.concat(frame)
        idx_lst = [
            np.arange(len(type_1_index_lst)),
            np.arange(len(type_1_index_lst),
                      len(type_1_index_lst) + len(type_2_index_lst))
        ]
        plot_gages_map_and_scatter(data_df,
                                   [show_ind_key, "lat", "lon", "elevation"],
                                   idx_lst,
                                   cmap_strs=["Reds", "Blues"],
                                   labels=["zero-dor", "small-dor"],
                                   scatter_label=[attr_lst[1], show_ind_key])
        # matplotlib.rcParams.update({'font.size': 12})
        plt.tight_layout()
        plt.show()
idx_lst_largedam_in_pair3 = [
    i for i in range(len(pair3_sites)) if pair3_sites[i] in sites_id_largedam
]
idx_lst_largedam_in_conus = [
    i for i in range(len(conus_sites)) if conus_sites[i] in sites_id_largedam
]

compare_item = 2
# 0: plot the map for comparing NSE of small-dor and zero-dor
# 1: ecdf plots of different cases
# 2: box plots pf different cases
# 3: show the median NSE values of different cases
# 4: see if the differences between different cases are significant
if compare_item == 0:
    inds_df, pred, obs = load_ensemble_result(cfg,
                                              conus_exps,
                                              test_epoch,
                                              return_value=True)

    all_lat = conus_data_model.data_source.gage_dict["LAT_GAGE"]
    all_lon = conus_data_model.data_source.gage_dict["LNG_GAGE"]
    show_ind_key = "NSE"
    attr_lst = ["SLOPE_PCT", "ELEV_MEAN_M_BASIN"]
    attrs = conus_data_model.data_source.read_attr(conus_sites,
                                                   attr_lst,
                                                   is_return_dict=False)

    western_lon_idx = [i for i in range(all_lon.size) if all_lon[i] < -100]

    nse_range = [0, 1]
    idx_lst_nse = inds_df[(inds_df[show_ind_key] >= nse_range[0]) & (
        inds_df[show_ind_key] < nse_range[1])].index.tolist()
Exemplo n.º 5
0
    def test_zero_small_dor_basins_locations(self):
        conus_exps = self.exp_lst
        test_epoch = self.test_epoch
        inds_df, pred, obs = load_ensemble_result(self.config_file,
                                                  conus_exps,
                                                  test_epoch,
                                                  return_value=True)
        conus_config_data = load_dataconfig_case_exp(self.config_file,
                                                     conus_exps[0])
        conus_data_model = GagesModel.load_datamodel(
            conus_config_data.data_path["Temp"],
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        conus_sites = conus_data_model.t_s_dict["sites_id"]

        all_lat = conus_data_model.data_source.gage_dict["LAT_GAGE"]
        all_lon = conus_data_model.data_source.gage_dict["LNG_GAGE"]
        show_ind_key = "NSE"
        attr_lst = ["SLOPE_PCT", "ELEV_MEAN_M_BASIN"]
        attrs = conus_data_model.data_source.read_attr(conus_sites,
                                                       attr_lst,
                                                       is_return_dict=False)

        western_lon_idx = [i for i in range(all_lon.size) if all_lon[i] < -100]

        nse_range = [0, 1]
        idx_lst_nse = inds_df[(inds_df[show_ind_key] >= nse_range[0]) & (
            inds_df[show_ind_key] < nse_range[1])].index.tolist()
        idx_lst_nse = np.intersect1d(western_lon_idx, idx_lst_nse)

        # small dor
        source_data_dor1 = GagesSource.choose_some_basins(
            conus_config_data,
            conus_config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            DOR=-self.dor)

        # basins with dams
        source_data_withdams = GagesSource.choose_some_basins(
            conus_config_data,
            conus_config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            dam_num=[1, 10000])
        # basins without dams
        source_data_withoutdams = GagesSource.choose_some_basins(
            conus_config_data,
            conus_config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            dam_num=0)

        sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id']
        sites_id_withdams = source_data_withdams.all_configs[
            'flow_screen_gage_id']

        sites_id_nodam = source_data_withoutdams.all_configs[
            'flow_screen_gage_id']
        sites_id_smalldam = np.intersect1d(
            np.array(sites_id_dor1), np.array(sites_id_withdams)).tolist()

        idx_lst_nodam_in_conus = [
            i for i in range(len(conus_sites))
            if conus_sites[i] in sites_id_nodam
        ]
        idx_lst_smalldam_in_conus = [
            i for i in range(len(conus_sites))
            if conus_sites[i] in sites_id_smalldam
        ]

        type_1_index_lst = np.intersect1d(idx_lst_nodam_in_conus,
                                          idx_lst_nse).tolist()
        type_2_index_lst = np.intersect1d(idx_lst_smalldam_in_conus,
                                          idx_lst_nse).tolist()
        pd.DataFrame({
            "GAGE_ID": np.array(conus_sites)[type_1_index_lst]
        }).to_csv(
            os.path.join(conus_config_data.data_path["Out"],
                         "western-zero-dor-sites.csv"))
        pd.DataFrame({
            "GAGE_ID": np.array(conus_sites)[type_2_index_lst]
        }).to_csv(
            os.path.join(conus_config_data.data_path["Out"],
                         "western-small-dor-sites.csv"))
        frame = []
        df_type1 = pd.DataFrame({
            "type":
            np.full(len(type_1_index_lst), "zero-dor"),
            show_ind_key:
            inds_df[show_ind_key].values[type_1_index_lst],
            "lat":
            all_lat[type_1_index_lst],
            "lon":
            all_lon[type_1_index_lst],
            "slope":
            attrs[type_1_index_lst, 0],
            "elevation":
            attrs[type_1_index_lst, 1]
        })
        frame.append(df_type1)
        df_type2 = pd.DataFrame({
            "type":
            np.full(len(type_2_index_lst), "small-dor"),
            show_ind_key:
            inds_df[show_ind_key].values[type_2_index_lst],
            "lat":
            all_lat[type_2_index_lst],
            "lon":
            all_lon[type_2_index_lst],
            "slope":
            attrs[type_2_index_lst, 0],
            "elevation":
            attrs[type_2_index_lst, 1]
        })
        frame.append(df_type2)
        data_df = pd.concat(frame)
        idx_lst = [
            np.arange(len(type_1_index_lst)),
            np.arange(len(type_1_index_lst),
                      len(type_1_index_lst) + len(type_2_index_lst))
        ]
        plot_gages_map_and_scatter(data_df,
                                   [show_ind_key, "lat", "lon", "slope"],
                                   idx_lst,
                                   cmap_strs=["Reds", "Blues"],
                                   labels=["zero-dor", "small-dor"],
                                   scatter_label=[attr_lst[0], show_ind_key],
                                   wspace=2,
                                   hspace=1.5,
                                   legend_y=.8,
                                   sub_fig_ratio=[6, 4, 1])
        plt.tight_layout()
        plt.show()
Exemplo n.º 6
0
    def test_diff_dor(self):
        dor_1 = -self.dor
        dor_2 = self.dor
        test_epoch = self.test_epoch
        config_file = self.config_file

        conus_exps = ["basic_exp37"]
        pair1_exps = ["dam_exp1"]
        pair2_exps = ["nodam_exp7"]
        pair3_exps = ["dam_exp27"]
        nodam_exp_lst = ["nodam_exp1"]
        smalldam_exp_lst = [
            "dam_exp17"
        ]  # -0.003["dam_exp11"] -0.08["dam_exp17"] -1["dam_exp32"]
        largedam_exp_lst = [
            "dam_exp4"
        ]  # 0.003["dam_exp12"] 0.08["dam_exp18"] 1["dam_exp33"]
        pair1_config_data = load_dataconfig_case_exp(config_file,
                                                     pair1_exps[0])
        pair2_config_data = load_dataconfig_case_exp(config_file,
                                                     pair2_exps[0])
        pair3_config_data = load_dataconfig_case_exp(config_file,
                                                     pair3_exps[0])
        conus_config_data = load_dataconfig_case_exp(config_file,
                                                     conus_exps[0])

        conus_data_model = GagesModel.load_datamodel(
            conus_config_data.data_path["Temp"],
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        conus_sites = conus_data_model.t_s_dict["sites_id"]

        source_data_dor1 = GagesSource.choose_some_basins(
            conus_config_data,
            conus_config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            DOR=dor_1)
        source_data_dor2 = GagesSource.choose_some_basins(
            conus_config_data,
            conus_config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            DOR=dor_2)
        # basins with dams
        source_data_withdams = GagesSource.choose_some_basins(
            conus_config_data,
            conus_config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            dam_num=[1, 10000])
        # basins without dams
        source_data_withoutdams = GagesSource.choose_some_basins(
            conus_config_data,
            conus_config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            dam_num=0)

        sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id']
        sites_id_withdams = source_data_withdams.all_configs[
            'flow_screen_gage_id']

        sites_id_nodam = source_data_withoutdams.all_configs[
            'flow_screen_gage_id']
        sites_id_smalldam = np.intersect1d(
            np.array(sites_id_dor1), np.array(sites_id_withdams)).tolist()
        sites_id_largedam = source_data_dor2.all_configs['flow_screen_gage_id']

        # sites_id_nolargedam = np.sort(np.union1d(np.array(sites_id_nodam), np.array(sites_id_largedam))).tolist()
        # pair1_sites = np.sort(np.intersect1d(np.array(sites_id_dor1), np.array(conus_sites))).tolist()
        # pair2_sites = np.sort(np.intersect1d(np.array(sites_id_nolargedam), np.array(conus_sites))).tolist()
        # pair3_sites = np.sort(np.intersect1d(np.array(sites_id_withdams), np.array(conus_sites))).tolist()

        pair1_data_model = GagesModel.load_datamodel(
            pair1_config_data.data_path["Temp"],
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        pair1_sites = pair1_data_model.t_s_dict["sites_id"]
        pair2_data_model = GagesModel.load_datamodel(
            pair2_config_data.data_path["Temp"],
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        pair2_sites = pair2_data_model.t_s_dict["sites_id"]
        pair3_data_model = GagesModel.load_datamodel(
            pair3_config_data.data_path["Temp"],
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        pair3_sites = pair3_data_model.t_s_dict["sites_id"]

        idx_lst_nodam_in_pair1 = [
            i for i in range(len(pair1_sites))
            if pair1_sites[i] in sites_id_nodam
        ]
        idx_lst_nodam_in_pair2 = [
            i for i in range(len(pair2_sites))
            if pair2_sites[i] in sites_id_nodam
        ]
        idx_lst_nodam_in_pair3 = [
            i for i in range(len(pair3_sites))
            if pair3_sites[i] in sites_id_nodam
        ]
        idx_lst_nodam_in_conus = [
            i for i in range(len(conus_sites))
            if conus_sites[i] in sites_id_nodam
        ]

        idx_lst_smalldam_in_pair1 = [
            i for i in range(len(pair1_sites))
            if pair1_sites[i] in sites_id_smalldam
        ]
        idx_lst_smalldam_in_pair2 = [
            i for i in range(len(pair2_sites))
            if pair2_sites[i] in sites_id_smalldam
        ]
        idx_lst_smalldam_in_pair3 = [
            i for i in range(len(pair3_sites))
            if pair3_sites[i] in sites_id_smalldam
        ]
        idx_lst_smalldam_in_conus = [
            i for i in range(len(conus_sites))
            if conus_sites[i] in sites_id_smalldam
        ]

        idx_lst_largedam_in_pair1 = [
            i for i in range(len(pair1_sites))
            if pair1_sites[i] in sites_id_largedam
        ]
        idx_lst_largedam_in_pair2 = [
            i for i in range(len(pair2_sites))
            if pair2_sites[i] in sites_id_largedam
        ]
        idx_lst_largedam_in_pair3 = [
            i for i in range(len(pair3_sites))
            if pair3_sites[i] in sites_id_largedam
        ]
        idx_lst_largedam_in_conus = [
            i for i in range(len(conus_sites))
            if conus_sites[i] in sites_id_largedam
        ]

        print("multi box")
        inds_df_pair1 = load_ensemble_result(config_file, pair1_exps,
                                             test_epoch)
        inds_df_pair2 = load_ensemble_result(config_file, pair2_exps,
                                             test_epoch)
        inds_df_pair3 = load_ensemble_result(config_file, pair3_exps,
                                             test_epoch)
        inds_df_conus = load_ensemble_result(config_file, conus_exps,
                                             test_epoch)

        fig = plt.figure(figsize=(15, 8))
        gs = gridspec.GridSpec(1, 3)
        keys_nse = "NSE"
        color_chosen = ["Greens", "Blues", "Reds"]
        median_loc = 0.015
        decimal_places = 2
        sns.despine()
        sns.set(font_scale=1.5)

        attr_nodam = "zero_dor"
        cases_exps_legends_nodam = [
            "LSTM-Z", "LSTM-ZS", "LSTM-ZL", "LSTM-CONUS"
        ]
        frames_nodam = []
        inds_df_nodam = load_ensemble_result(config_file, nodam_exp_lst,
                                             test_epoch)
        df_nodam_alone = pd.DataFrame({
            attr_nodam:
            np.full([inds_df_nodam.shape[0]], cases_exps_legends_nodam[0]),
            keys_nse:
            inds_df_nodam[keys_nse]
        })
        frames_nodam.append(df_nodam_alone)

        df_nodam_in_pair1 = pd.DataFrame({
            attr_nodam:
            np.full([
                inds_df_pair1[keys_nse].iloc[idx_lst_nodam_in_pair1].shape[0]
            ], cases_exps_legends_nodam[1]),
            keys_nse:
            inds_df_pair1[keys_nse].iloc[idx_lst_nodam_in_pair1]
        })
        frames_nodam.append(df_nodam_in_pair1)

        df_nodam_in_pair2 = pd.DataFrame({
            attr_nodam:
            np.full([
                inds_df_pair2[keys_nse].iloc[idx_lst_nodam_in_pair2].shape[0]
            ], cases_exps_legends_nodam[2]),
            keys_nse:
            inds_df_pair2[keys_nse].iloc[idx_lst_nodam_in_pair2]
        })
        frames_nodam.append(df_nodam_in_pair2)

        df_nodam_in_conus = pd.DataFrame({
            attr_nodam:
            np.full([
                inds_df_conus[keys_nse].iloc[idx_lst_nodam_in_conus].shape[0]
            ], cases_exps_legends_nodam[3]),
            keys_nse:
            inds_df_conus[keys_nse].iloc[idx_lst_nodam_in_conus]
        })
        frames_nodam.append(df_nodam_in_conus)
        result_nodam = pd.concat(frames_nodam)
        ax1 = plt.subplot(gs[0])
        # ax1.set_title("(a)")
        ax1.set_xticklabels(ax1.get_xticklabels(), rotation=30)
        ax1.set_ylim([0, 1])
        sns.boxplot(ax=ax1,
                    x=attr_nodam,
                    y=keys_nse,
                    data=result_nodam,
                    showfliers=False,
                    palette=color_chosen[0])
        medians_nodam = result_nodam.groupby(
            [attr_nodam], sort=False)[keys_nse].median().values
        median_labels_nodam = [
            str(np.round(s, decimal_places)) for s in medians_nodam
        ]
        pos1 = range(len(medians_nodam))
        for tick, label in zip(pos1, ax1.get_xticklabels()):
            ax1.text(pos1[tick],
                     medians_nodam[tick] + median_loc,
                     median_labels_nodam[tick],
                     horizontalalignment='center',
                     size='x-small',
                     weight='semibold')

        attr_smalldam = "small_dor"
        cases_exps_legends_smalldam = [
            "LSTM-S", "LSTM-ZS", "LSTM-SL", "LSTM-CONUS"
        ]
        frames_smalldam = []
        inds_df_smalldam = load_ensemble_result(config_file, smalldam_exp_lst,
                                                test_epoch)
        df_smalldam_alone = pd.DataFrame({
            attr_smalldam:
            np.full([inds_df_smalldam.shape[0]],
                    cases_exps_legends_smalldam[0]),
            keys_nse:
            inds_df_smalldam[keys_nse]
        })
        frames_smalldam.append(df_smalldam_alone)

        df_smalldam_in_pair1 = pd.DataFrame({
            attr_smalldam:
            np.full([
                inds_df_pair1[keys_nse].iloc[idx_lst_smalldam_in_pair1].
                shape[0]
            ], cases_exps_legends_smalldam[1]),
            keys_nse:
            inds_df_pair1[keys_nse].iloc[idx_lst_smalldam_in_pair1]
        })
        frames_smalldam.append(df_smalldam_in_pair1)

        df_smalldam_in_pair3 = pd.DataFrame({
            attr_smalldam:
            np.full([
                inds_df_pair3[keys_nse].iloc[idx_lst_smalldam_in_pair3].
                shape[0]
            ], cases_exps_legends_smalldam[2]),
            keys_nse:
            inds_df_pair3[keys_nse].iloc[idx_lst_smalldam_in_pair3]
        })
        frames_smalldam.append(df_smalldam_in_pair3)

        df_smalldam_in_conus = pd.DataFrame({
            attr_smalldam:
            np.full([
                inds_df_conus[keys_nse].iloc[idx_lst_smalldam_in_conus].
                shape[0]
            ], cases_exps_legends_smalldam[3]),
            keys_nse:
            inds_df_conus[keys_nse].iloc[idx_lst_smalldam_in_conus]
        })
        frames_smalldam.append(df_smalldam_in_conus)
        result_smalldam = pd.concat(frames_smalldam)
        ax2 = plt.subplot(gs[1])
        # ax2.set_title("(b)")
        ax2.set_xticklabels(ax2.get_xticklabels(), rotation=30)
        ax2.set_ylim([0, 1])
        ax2.set(ylabel=None)
        sns.boxplot(ax=ax2,
                    x=attr_smalldam,
                    y=keys_nse,
                    data=result_smalldam,
                    showfliers=False,
                    palette=color_chosen[1])
        medians_smalldam = result_smalldam.groupby(
            [attr_smalldam], sort=False)[keys_nse].median().values
        median_labels_smalldam = [
            str(np.round(s, decimal_places)) for s in medians_smalldam
        ]
        pos2 = range(len(medians_smalldam))
        for tick, label in zip(pos2, ax2.get_xticklabels()):
            ax2.text(pos2[tick],
                     medians_smalldam[tick] + median_loc,
                     median_labels_smalldam[tick],
                     horizontalalignment='center',
                     size='x-small',
                     weight='semibold')

        attr_largedam = "large_dor"
        cases_exps_legends_largedam = [
            "LSTM-L", "LSTM-ZL", "LSTM-SL", "LSTM-CONUS"
        ]
        frames_largedam = []
        inds_df_largedam = load_ensemble_result(config_file, largedam_exp_lst,
                                                test_epoch)
        df_largedam_alone = pd.DataFrame({
            attr_largedam:
            np.full([inds_df_largedam.shape[0]],
                    cases_exps_legends_largedam[0]),
            keys_nse:
            inds_df_largedam[keys_nse]
        })
        frames_largedam.append(df_largedam_alone)

        df_largedam_in_pair2 = pd.DataFrame({
            attr_largedam:
            np.full([
                inds_df_pair2[keys_nse].iloc[idx_lst_largedam_in_pair2].
                shape[0]
            ], cases_exps_legends_largedam[1]),
            keys_nse:
            inds_df_pair2[keys_nse].iloc[idx_lst_largedam_in_pair2]
        })
        frames_largedam.append(df_largedam_in_pair2)

        df_largedam_in_pair3 = pd.DataFrame({
            attr_largedam:
            np.full([
                inds_df_pair3[keys_nse].iloc[idx_lst_largedam_in_pair3].
                shape[0]
            ], cases_exps_legends_largedam[2]),
            keys_nse:
            inds_df_pair3[keys_nse].iloc[idx_lst_largedam_in_pair3]
        })
        frames_largedam.append(df_largedam_in_pair3)

        df_largedam_in_conus = pd.DataFrame({
            attr_largedam:
            np.full([
                inds_df_conus[keys_nse].iloc[idx_lst_largedam_in_conus].
                shape[0]
            ], cases_exps_legends_largedam[3]),
            keys_nse:
            inds_df_conus[keys_nse].iloc[idx_lst_largedam_in_conus]
        })
        frames_largedam.append(df_largedam_in_conus)
        result_largedam = pd.concat(frames_largedam)
        ax3 = plt.subplot(gs[2])
        # ax3.set_title("(c)")
        ax3.set_xticklabels(ax3.get_xticklabels(), rotation=30)
        ax3.set_ylim([0, 1])
        ax3.set(ylabel=None)
        sns.boxplot(ax=ax3,
                    x=attr_largedam,
                    y=keys_nse,
                    data=result_largedam,
                    showfliers=False,
                    palette=color_chosen[2])
        medians_largedam = result_largedam.groupby(
            [attr_largedam], sort=False)[keys_nse].median().values
        median_labels_largedam = [
            str(np.round(s, decimal_places)) for s in medians_largedam
        ]
        pos3 = range(len(medians_largedam))
        for tick, label in zip(pos3, ax3.get_xticklabels()):
            ax3.text(pos3[tick],
                     medians_largedam[tick] + median_loc,
                     median_labels_largedam[tick],
                     horizontalalignment='center',
                     size='x-small',
                     weight='semibold')
        # sns.despine()
        plt.tight_layout()
        plt.show()
Exemplo n.º 7
0
    def test_diff_dor_fig2_in_the_paper(self):
        data_model = GagesModel.load_datamodel(
            self.config_data.data_path["Temp"],
            data_source_file_name='data_source.txt',
            stat_file_name='Statistics.json',
            flow_file_name='flow.npy',
            forcing_file_name='forcing.npy',
            attr_file_name='attr.npy',
            f_dict_file_name='dictFactorize.json',
            var_dict_file_name='dictAttribute.json',
            t_s_dict_file_name='dictTimeSpace.json')
        config_data = self.config_data
        config_file = self.config_file
        test_epoch = self.test_epoch
        exp_lst = self.exp_lst
        figure_dpi = self.FIGURE_DPI
        inds_df, pred_mean, obs_mean = load_ensemble_result(config_file,
                                                            exp_lst,
                                                            test_epoch,
                                                            return_value=True)
        diversion_yes = True
        diversion_no = False
        source_data_diversion = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            diversion=diversion_yes)
        source_data_nodivert = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            diversion=diversion_no)
        sites_id_nodivert = source_data_nodivert.all_configs[
            'flow_screen_gage_id']
        sites_id_diversion = source_data_diversion.all_configs[
            'flow_screen_gage_id']

        dor_1 = -self.dor
        dor_2 = self.dor
        source_data_dor1 = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            DOR=dor_1)
        source_data_dor2 = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            DOR=dor_2)
        sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id']
        sites_id_dor2 = source_data_dor2.all_configs['flow_screen_gage_id']

        # basins with dams
        source_data_withdams = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            dam_num=[1, 100000])
        sites_id_withdams = source_data_withdams.all_configs[
            'flow_screen_gage_id']
        sites_id_dor1 = np.intersect1d(np.array(sites_id_dor1),
                                       np.array(sites_id_withdams)).tolist()

        no_divert_small_dor = np.intersect1d(sites_id_nodivert, sites_id_dor1)
        no_divert_large_dor = np.intersect1d(sites_id_nodivert, sites_id_dor2)
        diversion_small_dor = np.intersect1d(sites_id_diversion, sites_id_dor1)
        diversion_large_dor = np.intersect1d(sites_id_diversion, sites_id_dor2)

        all_sites = data_model.t_s_dict["sites_id"]
        idx_lst_nodivert_smalldor = [
            i for i in range(len(all_sites))
            if all_sites[i] in no_divert_small_dor
        ]
        idx_lst_nodivert_largedor = [
            i for i in range(len(all_sites))
            if all_sites[i] in no_divert_large_dor
        ]
        idx_lst_diversion_smalldor = [
            i for i in range(len(all_sites))
            if all_sites[i] in diversion_small_dor
        ]
        idx_lst_diversion_largedor = [
            i for i in range(len(all_sites))
            if all_sites[i] in diversion_large_dor
        ]

        keys_nse = "NSE"
        xs = []
        ys = []
        cases_exps_legends_together = [
            "not_diverted_small_dor", "not_diverted_large_dor",
            "diversion_small_dor", "diversion_large_dor", "CONUS"
        ]

        x1, y1 = ecdf(inds_df[keys_nse].iloc[idx_lst_nodivert_smalldor])
        xs.append(x1)
        ys.append(y1)

        x2, y2 = ecdf(inds_df[keys_nse].iloc[idx_lst_nodivert_largedor])
        xs.append(x2)
        ys.append(y2)

        x3, y3 = ecdf(inds_df[keys_nse].iloc[idx_lst_diversion_smalldor])
        xs.append(x3)
        ys.append(y3)

        x4, y4 = ecdf(inds_df[keys_nse].iloc[idx_lst_diversion_largedor])
        xs.append(x4)
        ys.append(y4)

        x_conus, y_conus = ecdf(inds_df[keys_nse])
        xs.append(x_conus)
        ys.append(y_conus)
        hydro_logger.info(
            "The median NSEs of all five curves (%s) are \n %.2f, %.2f, %.2f, %.2f, %.2f",
            cases_exps_legends_together, np.median(x1), np.median(x2),
            np.median(x3), np.median(x4), np.median(x_conus))
        # plot_ecdfs_matplot(xs, ys, cases_exps_legends_together,
        #                    colors=["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "grey"],
        #                    dash_lines=[False, False, False, False, True], x_str="NSE", y_str="CDF")
        # plot using two linestyles and two colors for dor and diversion.
        # plot_ecdfs(xs, ys, cases_exps_legends_together, x_str="NSE", y_str="CDF")
        # define color scheme and line style
        colors = ["#1f77b4", "#d62728"]
        linestyles = ['-', "--"]
        markers = ["", "."]

        fig = plt.figure(figsize=(8, 6))
        axes = fig.add_axes([0.1, 0.1, 0.8, 0.8])
        # for i, marker in enumerate(markers):
        for i, linestyle in enumerate(linestyles):
            for j, color in enumerate(colors):
                plt.plot(
                    xs[i * 2 + j],
                    ys[i * 2 + j],
                    color=color,
                    ls=linestyle,  # marker=marker,
                    label=cases_exps_legends_together[i * 2 + j])
        line_i, = axes.plot(x_conus,
                            y_conus,
                            color="grey",
                            label=cases_exps_legends_together[4])
        line_i.set_dashes([2, 2, 10, 2])

        x_str = "NSE"
        y_str = "CDF"
        x_lim = (0, 1)
        y_lim = (0, 1)
        x_interval = 0.1
        y_interval = 0.1
        plt.xlabel(x_str, fontsize=18)
        plt.ylabel(y_str, fontsize=18)
        axes.set_xlim(x_lim[0], x_lim[1])
        axes.set_ylim(y_lim[0], y_lim[1])
        # set x y number font size
        plt.xticks(np.arange(x_lim[0], x_lim[1] + x_lim[1] / 100, x_interval),
                   fontsize=16)
        plt.yticks(np.arange(y_lim[0], y_lim[1] + y_lim[1] / 100, y_interval),
                   fontsize=16)
        plt.grid()
        # Hide the right and top spines
        axes.spines['right'].set_visible(False)
        axes.spines['top'].set_visible(False)
        axes.legend()
        plt.legend(prop={'size': 16})
        plt.savefig(os.path.join(config_data.data_path["Out"],
                                 'new_dor_divert_comp_matplotlib.png'),
                    dpi=figure_dpi,
                    bbox_inches="tight")
        plt.show()
Exemplo n.º 8
0
    def test_gages_nse_dam_attr(self):
        figure_dpi = 600
        config_data = self.config_data
        data_dir = config_data.data_path["Temp"]
        data_model = GagesModel.load_datamodel(
            data_dir,
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        gages_id = data_model.t_s_dict["sites_id"]

        exp_lst = [
            "basic_exp37", "basic_exp39", "basic_exp40", "basic_exp41",
            "basic_exp42", "basic_exp43"
        ]
        self.inds_df, pred_mean, obs_mean = load_ensemble_result(
            config_data.config_file,
            exp_lst,
            config_data.config_file.TEST_EPOCH,
            return_value=True)
        show_ind_key = 'NSE'

        plt.rcParams['font.family'] = 'serif'
        plt.rcParams['font.serif'] = ['Times New Roman'
                                      ] + plt.rcParams['font.serif']
        # plot NSE-DOR
        attr_lst = ["RUNAVE7100", "STOR_NOR_2009"]
        attrs_runavg_stor = data_model.data_source.read_attr(
            gages_id, attr_lst, is_return_dict=False)
        run_avg = attrs_runavg_stor[:, 0] * (10**(-3)) * (10**6
                                                          )  # m^3 per year
        nor_storage = attrs_runavg_stor[:, 1] * 1000  # m^3
        dors = nor_storage / run_avg
        # dor = 0 is not totally same with dam_num=0 (some dammed basins' dor is about 0.00),
        # here for zero-dor we mainly rely on dam_num = 0
        attr_dam_num = ["NDAMS_2009"]
        attrs_dam_num = data_model.data_source.read_attr(gages_id,
                                                         attr_dam_num,
                                                         is_return_dict=False)
        df = pd.DataFrame({
            "DOR": dors,
            "DAM_NUM": attrs_dam_num[:, 0],
            show_ind_key: self.inds_df[show_ind_key].values
        })
        hydro_logger.info("statistics of dors:\n %s", df.describe())
        hydro_logger.info("percentiles of dors:\n %s", df.quantile(q=0.95))
        hydro_logger.info("ecdf of dors:\n %s", ecdf(dors))

        # boxplot
        # add a column to represent the dor range for the df
        dor_value_range_lst = [[0, 0], [0, 0.02], [0.02, 0.05], [0.05, 0.1],
                               [0.1, 0.2], [0.2, 0.4], [0.4, 0.8],
                               [0.8, 10000]]
        dor_range_lst = ["0"] + [
            str(dor_value_range_lst[i][0]) + "-" +
            str(dor_value_range_lst[i][1])
            for i in range(1,
                           len(dor_value_range_lst) - 1)
        ] + [">" + str(dor_value_range_lst[-1][0])]

        # add a column to represent the dam_num range for the df
        dam_num_value_range_lst = [[0, 0], [0, 1], [1, 3], [3, 5], [5, 10],
                                   [10, 20], [20, 50], [50, 10000]]
        dam_num_range_lst = ["0", "1"] + [
            str(dam_num_value_range_lst[i][0]) + "-" +
            str(dam_num_value_range_lst[i][1])
            for i in range(2,
                           len(dam_num_value_range_lst) - 1)
        ] + [">" + str(dam_num_value_range_lst[-1][0])]

        def in_which_range(value_temp):
            if value_temp == 0:
                return "0"
            the_range = [
                a_range for a_range in dor_value_range_lst
                if a_range[0] < value_temp <= a_range[1]
            ]
            if the_range[0][0] == dor_value_range_lst[-1][0]:
                the_range_str = ">" + str(the_range[0][0])
            else:
                the_range_str = str(the_range[0][0]) + "-" + str(
                    the_range[0][1])
            return the_range_str

        def in_which_dam_num_range(value_tmp):
            if value_tmp == 0:
                return "0"
            if value_tmp == 1:
                return "1"
            the_ran = [
                a_ran for a_ran in dam_num_value_range_lst
                if a_ran[0] < value_tmp <= a_ran[1]
            ]
            if the_ran[0][0] == dam_num_value_range_lst[-1][0]:
                the_ran_str = ">" + str(the_ran[0][0])
            else:
                the_ran_str = str(the_ran[0][0]) + "-" + str(the_ran[0][1])
            return the_ran_str

        df["DOR_RANGE"] = df["DOR"].apply(in_which_range)
        df["DAM_NUM_RANGE"] = df["DAM_NUM"].apply(in_which_dam_num_range)
        df.loc[(df["DAM_NUM"] > 0) & (df["DOR_RANGE"] == "0"),
               "DOR_RANGE"] = dor_range_lst[1]
        shown_nse_range_boxplots = [-0.5, 1.0]
        sns.set(font="serif", font_scale=1.5, color_codes=True)
        plot_boxs(df,
                  "DOR_RANGE",
                  show_ind_key,
                  ylim=shown_nse_range_boxplots,
                  order=dor_range_lst)
        plt.savefig(os.path.join(
            config_data.data_path["Out"],
            'NSE~DOR-boxplots-' + str(shown_nse_range_boxplots) + '.png'),
                    dpi=figure_dpi,
                    bbox_inches="tight")
        plt.figure()
        shown_nse_range_boxplots = [0, 1.0]
        sns.set(font="serif", font_scale=1.5, color_codes=True)
        plot_boxs(df,
                  "DAM_NUM_RANGE",
                  show_ind_key,
                  ylim=shown_nse_range_boxplots,
                  order=dam_num_range_lst)
        plt.savefig(os.path.join(
            config_data.data_path["Out"],
            'NSE~DAM_NUM-boxplots-' + str(shown_nse_range_boxplots) + '.png'),
                    dpi=figure_dpi,
                    bbox_inches="tight")
        nums_in_dor_range = [
            df[df["DOR_RANGE"] == a_range_rmp].shape[0]
            for a_range_rmp in dor_range_lst
        ]
        ratios_in_dor_range = [
            a_num / df.shape[0] for a_num in nums_in_dor_range
        ]
        hydro_logger.info(
            "the number and ratio of basins in each dor range\n: %s \n %s",
            nums_in_dor_range, ratios_in_dor_range)

        nums_in_dam_num_range = [
            df[df["DAM_NUM_RANGE"] == a_range_rmp].shape[0]
            for a_range_rmp in dam_num_range_lst
        ]
        ratios_in_dam_num_range = [
            a_num / df.shape[0] for a_num in nums_in_dam_num_range
        ]
        hydro_logger.info(
            "the number and ratio of basins in each dam_num range\n: %s \n %s",
            nums_in_dam_num_range, ratios_in_dam_num_range)

        # regplot
        plt.figure()
        sns.set(font="serif", font_scale=1.5, color_codes=True)
        sr = sns.regplot(x="DOR",
                         y=show_ind_key,
                         data=df[df[show_ind_key] >= 0],
                         scatter_kws={'s': 10})
        show_dor_max = df.quantile(
            q=0.95)["DOR"]  # 30  # max(dors)  # 0.8  # 10
        show_dor_min = min(dors)
        plt.ylim(0, 1)
        plt.xlim(show_dor_min, show_dor_max)
        plt.savefig(os.path.join(
            config_data.data_path["Out"],
            'NSE~DOR-shown-max-' + str(show_dor_max) + '.png'),
                    dpi=figure_dpi,
                    bbox_inches="tight")

        # jointplot
        # dor_range = [0.2, 0.9]
        dor_range = [0.002, 0.2]
        # plt.figure()
        sns.set(font="serif", font_scale=1.5, color_codes=True)
        # g = sns.jointplot(x="DOR", y=show_ind_key, data=df[(df["DOR"] < 1) & (df[show_ind_key] >= 0)], kind="reg",
        #                   marginal_kws=dict(bins=25))
        # g = sns.jointplot(x="DOR", y=show_ind_key, data=df[(df["DOR"] < 1) & (df[show_ind_key] >= 0)], kind="hex",
        #                   color="b", marginal_kws=dict(bins=50))
        g = sns.jointplot(
            x="DOR",
            y=show_ind_key,
            data=df[(df["DOR"] < dor_range[1]) & (df["DOR"] > dor_range[0]) &
                    (df[show_ind_key] >= 0)],
            kind="hex",
            color="b")
        g.ax_marg_x.set_xlim(dor_range[0], dor_range[1])
        # g.ax_marg_y.set_ylim(-0.5, 1)
        plt.savefig(os.path.join(
            config_data.data_path["Out"],
            'NSE~DOR(range-)' + str(dor_range) + '-jointplot.png'),
                    dpi=figure_dpi,
                    bbox_inches="tight")

        nid_dir = os.path.join(
            "/".join(self.config_data.data_path["DB"].split("/")[:-1]), "nid",
            "test")
        nid_input = NidModel.load_nidmodel(
            nid_dir,
            nid_source_file_name='nid_source.txt',
            nid_data_file_name='nid_data.shp')
        gage_main_dam_purpose = unserialize_json(
            os.path.join(nid_dir, "dam_main_purpose_dict.json"))
        data_input = GagesDamDataModel(data_model, nid_input,
                                       gage_main_dam_purpose)
        dam_coords = unserialize_json_ordered(
            os.path.join(nid_dir, "dam_points_dict.json"))
        dam_storages = unserialize_json_ordered(
            os.path.join(nid_dir, "dam_storages_dict.json"))
        dam_ids_1 = list(gage_main_dam_purpose.keys())
        dam_ids_2 = list(dam_coords.keys())
        dam_ids_3 = list(dam_storages.keys())
        assert (all(x < y for x, y in zip(dam_ids_1, dam_ids_1[1:])))
        assert (all(x < y for x, y in zip(dam_ids_2, dam_ids_2[1:])))
        assert (all(x < y for x, y in zip(dam_ids_3, dam_ids_3[1:])))

        sites = list(dam_coords.keys())
        c, ind1, idx_lst_nse_range = np.intersect1d(sites,
                                                    gages_id,
                                                    return_indices=True)

        std_storage_in_a_basin = list(map(np.std, dam_storages.values()))
        log_std_storage_in_a_basin = list(
            map(np.log,
                np.array(std_storage_in_a_basin) + 1))
        nse_values = self.inds_df["NSE"].values[idx_lst_nse_range]
        df = pd.DataFrame({
            "DAM_STORAGE_STD": log_std_storage_in_a_basin,
            show_ind_key: nse_values
        })
        plt.figure()
        sns.set(font="serif", font_scale=1.5, color_codes=True)
        g = sns.regplot(x="DAM_STORAGE_STD",
                        y=show_ind_key,
                        data=df[df[show_ind_key] >= 0],
                        scatter_kws={'s': 10})
        show_max = max(log_std_storage_in_a_basin)
        show_min = min(log_std_storage_in_a_basin)
        if show_min < 0:
            show_min = 0
        # g.ax_marg_x.set_xlim(show_min, show_max)
        # g.ax_marg_y.set_ylim(0, 1)
        plt.ylim(0, 1)
        plt.xlim(show_min, show_max)
        plt.savefig(os.path.join(config_data.data_path["Out"],
                                 'NSE~' + "DAM_STORAGE_STD" + '.png'),
                    dpi=figure_dpi,
                    bbox_inches="tight")

        gages_loc_lat = data_model.data_source.gage_dict["LAT_GAGE"]
        gages_loc_lon = data_model.data_source.gage_dict["LNG_GAGE"]
        gages_loc = [[gages_loc_lat[i], gages_loc_lon[i]]
                     for i in range(len(gages_id))]
        # calculate index of dispersion, then plot the NSE-dispersion scatterplot
        # Geo coord system of gages_loc and dam_coords are both NAD83
        coefficient_of_var = list(
            map(coefficient_of_variation, gages_loc, dam_coords.values()))
        coefficient_of_var_min = min(coefficient_of_var)
        coefficient_of_var_max = max(coefficient_of_var)
        dispersion_var = "DAM_GAGE_DIS_VAR"
        nse_values = self.inds_df["NSE"].values[idx_lst_nse_range]
        df = pd.DataFrame({
            dispersion_var: coefficient_of_var,
            show_ind_key: nse_values
        })
        plt.figure()
        sns.set(font="serif", font_scale=1.5, color_codes=True)
        g = sns.regplot(x=dispersion_var,
                        y=show_ind_key,
                        data=df[df[show_ind_key] >= 0],
                        scatter_kws={'s': 10})
        show_max = coefficient_of_var_max
        show_min = coefficient_of_var_min
        if show_min < 0:
            show_min = 0
        # g.ax_marg_x.set_xlim(show_min, show_max)
        # g.ax_marg_y.set_ylim(0, 1)
        plt.ylim(0, 1)
        plt.xlim(show_min, show_max)
        plt.savefig(os.path.join(config_data.data_path["Out"],
                                 'NSE~' + dispersion_var + '.png'),
                    dpi=figure_dpi,
                    bbox_inches="tight")

        idx_dispersions = list(
            map(ind_of_dispersion, gages_loc, dam_coords.values()))
        idx_dispersion_min = min(idx_dispersions)
        idx_dispersion_max = max(idx_dispersions)
        dispersion_var = "DAM_DISPERSION_BASIN"
        # nse_range = [0, 1]
        # idx_lst_nse_range = inds_df_now[(inds_df_now[show_ind_key] >= nse_range[0]) & (inds_df_now[show_ind_key] < nse_range[1])].index.tolist()
        nse_values = self.inds_df["NSE"].values[idx_lst_nse_range]
        df = pd.DataFrame({
            dispersion_var: idx_dispersions,
            show_ind_key: nse_values
        })
        # g = sns.regplot(x=dispersion_var, y=show_ind_key, data=df[df[show_ind_key] >= 0], scatter_kws={'s': 10})
        if idx_dispersion_min < 0:
            idx_dispersion_min = 0
        plt.ylim(0, 1)
        plt.xlim(idx_dispersion_min, idx_dispersion_max)
        # plt.figure()
        sns.set(font="serif", font_scale=1.5, color_codes=True)
        g = sns.jointplot(x=dispersion_var,
                          y=show_ind_key,
                          data=df[df[show_ind_key] >= 0],
                          kind="reg")
        g.ax_marg_x.set_xlim(idx_dispersion_min, idx_dispersion_max)
        g.ax_marg_y.set_ylim(0, 1)
        plt.show()
Exemplo n.º 9
0
        var_dict_file_name='test_dictAttribute.json',
        t_s_dict_file_name='test_dictTimeSpace.json')
    gages_model_train = GagesModel.update_data_model(
        config_data,
        data_model_train,
        data_attr_update=True,
        screen_basin_area_huc4=False)
    data_model = GagesModel.update_data_model(
        config_data,
        data_model_test,
        data_attr_update=True,
        train_stat_dict=gages_model_train.stat_dict,
        screen_basin_area_huc4=False)

    inds_df, pred_mean, obs_mean = load_ensemble_result(cfg,
                                                        exp_lst,
                                                        test_epoch,
                                                        return_value=True)
    plt.rcParams['font.family'] = 'serif'
    plt.rcParams['font.serif'] = ['Times New Roman'
                                  ] + plt.rcParams['font.serif']
    ########################### plot diversion dor ecdf  ###########################
    diversion_yes = True
    diversion_no = False
    source_data_diversion = GagesSource.choose_some_basins(
        config_data,
        config_data.model_dict["data"]["tRangeTrain"],
        screen_basin_area_huc4=False,
        diversion=diversion_yes)
    source_data_nodivert = GagesSource.choose_some_basins(
        config_data,
        config_data.model_dict["data"]["tRangeTrain"],
Exemplo n.º 10
0
    paper_dor_tmp = (capacity_reservoir * 1000000 / (watershed_area * 1000000)) * 1000 / mean_runoff
    paper_dors.append(paper_dor_tmp)
paper_dors = np.array(paper_dors)
hydro_logger.info("The dor values of those basins: %s", paper_dors)
hydro_logger.info("Are dor values of those basins bigger than 0.02: %s", paper_dors > 0.02)
hydro_logger.info("Are dor values of those basins bigger than 0.1: %s", paper_dors > 0.1)
hydro_logger.info("Are dor values of those basins bigger than 1: %s", paper_dors > 1)
test_epoch = 300

all_exps_lst = ["basic_exp39", "basic_exp37", "basic_exp40", "basic_exp41", "basic_exp42", "basic_exp43",
                "basic_exp32", "basic_exp31", "basic_exp33", "basic_exp34", "basic_exp35", "basic_exp36"]
all_exps_random_seeds = ["123", "1234", "12345", "111", "1111", "11111", "123", "1234", "12345", "111", "1111", "11111"]
idx_tmp_now = 0
for exp_tmp in all_exps_lst:
    exp_tmp_lst = [exp_tmp]
    inds_df_tmp = load_ensemble_result(cfg, exp_tmp_lst, test_epoch)
    hydro_logger.info("The median NSE value of %s is %.2f (random seed: %s)", exp_tmp, inds_df_tmp.median()["NSE"],
                      all_exps_random_seeds[idx_tmp_now])
    idx_tmp_now = idx_tmp_now + 1

exp_lst = ["basic_exp37"]
config_data = load_dataconfig_case_exp(cfg, exp_lst[0])
data_model = GagesModel.load_datamodel(config_data.data_path["Temp"],
                                       data_source_file_name='test_data_source.txt',
                                       stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy',
                                       forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy',
                                       f_dict_file_name='test_dictFactorize.json',
                                       var_dict_file_name='test_dictAttribute.json',
                                       t_s_dict_file_name='test_dictTimeSpace.json')
camels_gageid_file = os.path.join(config_data.data_path["DB"], "camels_attributes_v2.0", "camels_attributes_v2.0",
                                  "camels_name.txt")