Exemplo n.º 1
0
    def test_screen_some_gauge_and_save(self):
        config_dir = definitions.CONFIG_DIR
        config_file = os.path.join(config_dir, "transdata/config_exp12.ini")
        subdir = r"transdata/exp12"
        config_data = GagesConfig.set_subdir(config_file, subdir)

        ref_source_data = GagesSource.choose_some_basins(
            self.config_data,
            self.config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            ref="Ref")
        ref_sites_id = ref_source_data.all_configs['flow_screen_gage_id']
        ref_sites_id_df = pd.DataFrame({"STAID": ref_sites_id})
        dapeng_dir = os.path.join(self.config_data.data_path["DB"], "dapeng")
        if not os.path.isdir(dapeng_dir):
            os.makedirs(dapeng_dir)
        dapeng_v2_gageid_file = os.path.join(dapeng_dir, "v2.csv")
        ref_sites_id_df.to_csv(dapeng_v2_gageid_file, index=False)

        gages_model = GagesModels(config_data,
                                  screen_basin_area_huc4=False,
                                  major_dam_num=0)
        sites_id_df = pd.DataFrame(
            {"STAID": gages_model.data_model_train.t_s_dict["sites_id"]})
        dapeng_v1_gageid_file = os.path.join(dapeng_dir, "v1.csv")
        sites_id_df.to_csv(dapeng_v1_gageid_file, index=False)

        print("read and save data screen")
Exemplo n.º 2
0
 def test_nonref_interscet_camels(self):
     t_train = self.config_data.model_dict["data"]["tRangeTrain"]
     camels_source_data = CamelsSource(self.camels_config_data, t_train)
     source_data = GagesSource.choose_some_basins(self.config_data, t_train, ref="Non-ref")
     camels_ids = np.array(camels_source_data.gage_dict["id"])
     assert (all(x < y for x, y in zip(camels_ids, camels_ids[1:])))
     gages_id = np.array(source_data.all_configs["flow_screen_gage_id"])
     intersect_ids = np.intersect1d(camels_ids, gages_id)
     print(intersect_ids)
Exemplo n.º 3
0
 def test_explore_damcls_datamodel(self):
     config_data = self.config_data
     sites_id_dict = unserialize_json(
         "/mnt/data/owen411/code/hydro-anthropogenic-lstm/example/data/gages/nid/test/dam_main_purpose_dict.json")
     sites_id = list(sites_id_dict.keys())
     source_data_dor1 = GagesSource.choose_some_basins(config_data,
                                                       config_data.model_dict["data"]["tRangeTrain"],
                                                       screen_basin_area_huc4=False,
                                                       sites_id=sites_id)
     norsto = source_data_dor1.read_attr(sites_id, ["STOR_NOR_2009"], is_return_dict=False)
     df = pd.DataFrame({"GAGE_ID": sites_id, "STOR_NOR": norsto.flatten()})
     # df.to_csv(os.path.join(source_data_dor1.all_configs["out_dir"], '3557basins_NORSTOR.csv'),
     #           quoting=csv.QUOTE_NONNUMERIC, index=None)
     df.to_csv(os.path.join(source_data_dor1.all_configs["out_dir"], '2909basins_NORSTOR.csv'),
               quoting=csv.QUOTE_NONNUMERIC, index=None)
Exemplo n.º 4
0
 def test_major_dam_interscet_camels(self):
     # choose basins with major dams' num >= 1
     t_train = self.config_data.model_dict["data"]["tRangeTrain"]
     camels_source_data = CamelsSource(self.camels_config_data, t_train)
     conus_source_data = GagesSource.choose_some_basins(self.conus_config_data, t_train,
                                                        screen_basin_area_huc4=False, major_dam_num=0)
     camels_ids = np.array(camels_source_data.gage_dict["id"])
     assert (all(x < y for x, y in zip(camels_ids, camels_ids[1:])))
     gages_id = np.array(conus_source_data.all_configs["flow_screen_gage_id"])
     intersect_ids = np.intersect1d(camels_ids, gages_id)
     print(intersect_ids.size)
     print(intersect_ids)
     source_data_ref = GagesSource.choose_some_basins(self.conus_config_data, t_train, screen_basin_area_huc4=False,
                                                      ref='Ref')
     gages_id_ref = np.array(source_data_ref.all_configs["flow_screen_gage_id"])
     intersect_ids_ref = np.intersect1d(gages_id, gages_id_ref)
     print(intersect_ids_ref.size)
     print(intersect_ids_ref)
     source_data_nonref = GagesSource.choose_some_basins(self.conus_config_data, t_train,
                                                         screen_basin_area_huc4=False, ref='Non-ref')
     gages_id_nonref = np.array(source_data_nonref.all_configs["flow_screen_gage_id"])
     intersect_ids_nonref = np.intersect1d(gages_id, gages_id_nonref)
     print(intersect_ids_nonref.size)
     print(intersect_ids_nonref)
Exemplo n.º 5
0
 def test_explore_dor_dam_num(self):
     config_data = self.config_data
     dor_2 = 0.1
     source_data_dor2 = GagesSource.choose_some_basins(config_data,
                                                       config_data.model_dict["data"]["tRangeTrain"],
                                                       screen_basin_area_huc4=False,
                                                       DOR=dor_2)
     sites_id_largedam = source_data_dor2.all_configs['flow_screen_gage_id']
     sites_id = np.intersect1d(np.array(self.sites_id), np.array(sites_id_largedam)).tolist()
     norsto = source_data_dor2.read_attr(sites_id, ["STOR_NOR_2009"], is_return_dict=False)
     dam_num = source_data_dor2.read_attr(sites_id, ["NDAMS_2009"], is_return_dict=False)
     df = pd.DataFrame({"GAGE_ID": sites_id, "STOR_NOR": norsto.flatten(), "DAM_NUM": dam_num.flatten()})
     # df.to_csv(os.path.join(source_data_dor1.all_configs["out_dir"], '3557basins_NORSTOR.csv'),
     #           quoting=csv.QUOTE_NONNUMERIC, index=None)
     sns.distplot(df["DAM_NUM"], bins=50)
     plt.show()
     df.to_csv(os.path.join(source_data_dor2.all_configs["out_dir"], '1185largedor_basins_NORSTOR_DAMNUM.csv'),
               quoting=csv.QUOTE_NONNUMERIC, index=None)
Exemplo n.º 6
0
 def test_explore_(self):
     config_data = self.config_data
     sites_id_dict = unserialize_json(
         "/mnt/data/owen411/code/hydro-anthropogenic-lstm/example/data/gages/nid/test/dam_main_purpose_dict.json")
     sites_id = list(sites_id_dict.keys())
     source_data_dor1 = GagesSource.choose_some_basins(config_data,
                                                       config_data.model_dict["data"]["tRangeTrain"],
                                                       screen_basin_area_huc4=False,
                                                       sites_id=sites_id)
     nse_all = pd.read_csv(
         "/mnt/data/owen411/code/hydro-anthropogenic-lstm/example/output/gages/basic/exp37/3557basins_ID_NSE_DOR.csv",
         dtype={0: str})
     sites_ids = nse_all["GAUGE ID"].values
     idx = [i for i in range(len(sites_ids)) if sites_ids[i] in sites_id]
     df = pd.DataFrame({"GAGE_ID": sites_id, "NSE": nse_all["NSE"].values[idx]})
     # df.to_csv(os.path.join(source_data_dor1.all_configs["out_dir"], '3557basins_NORSTOR.csv'),
     #           quoting=csv.QUOTE_NONNUMERIC, index=None)
     df.to_csv(os.path.join(source_data_dor1.all_configs["out_dir"], '2909basins_NSE.csv'),
               quoting=csv.QUOTE_NONNUMERIC, index=None)
Exemplo n.º 7
0
 def test_read_sites_id_see_dor(self):
     exp_lst = ["exp18", "exp19", "exp20", "exp21", "exp22", "exp23"]
     sub_lst = ["0", "1"]
     diff_lst = [
         "dictTimeSpace.json", "test_dictTimeSpace.json",
         "test_dictTimeSpace_2.json"
     ]
     for exp_str in exp_lst:
         for sub_str in sub_lst:
             comp_sites = []
             for item in diff_lst:
                 gage_id_file = os.path.join(
                     self.config_data.config_file["ROOT_DIR"], "temp",
                     "gages", "ecoregion", exp_str, sub_str, item)
                 usgs_id = unserialize_json(gage_id_file)["sites_id"]
                 assert (all(x < y for x, y in zip(usgs_id, usgs_id[1:])))
                 comp_sites.append(usgs_id)
                 # mm/year 1-km grid,  megaliters total storage per sq km  (1 megaliters = 1,000,000 liters = 1,000 cubic meters)
                 # attr_lst = ["RUNAVE7100", "STOR_NID_2009"]
                 attr_lst = ["RUNAVE7100", "STOR_NOR_2009"]
                 source_data = GagesSource.choose_some_basins(
                     self.config_data,
                     self.config_data.model_dict["data"]["tRangeTrain"],
                     screen_basin_area_huc4=False,
                     sites_id=usgs_id)
                 data_attr, var_dict, f_dict = source_data.read_attr(
                     usgs_id, attr_lst)
                 run_avg = data_attr[:, 0] * (10**(-3)) * (10**6
                                                           )  # m^3 per year
                 nor_storage = data_attr[:, 1] * 1000  # m^3
                 dors = nor_storage / run_avg
                 results = [round(i, 3) for i in dors]
                 hydro_logger.info(
                     exp_str + "-" + sub_str + "-" + item + " DOR: %s",
                     results)
             hydro_logger.info(
                 "the intersection of each pair of sites: %s, %s, %s",
                 np.intersect1d(comp_sites[0], comp_sites[1]),
                 np.intersect1d(comp_sites[0], comp_sites[2]),
                 np.intersect1d(comp_sites[1], comp_sites[2]))
Exemplo n.º 8
0
    def test_some_reservoirs(self):
        """choose some small reservoirs to train and test"""
        # 读取模型配置文件
        config_data = self.config_data
        source_data = GagesSource.choose_some_basins(config_data, config_data.model_dict["data"]["tRangeTrain"],
                                                     major_dam=1)
        sites_id = source_data.all_configs['flow_screen_gage_id']
        quick_data_dir = os.path.join(self.config_data.data_path["DB"], "quickdata")
        data_dir = os.path.join(quick_data_dir, "allnonref_85-05_nan-0.1_00-1.0")
        data_model_train = GagesModel.load_datamodel(data_dir,
                                                     data_source_file_name='data_source.txt',
                                                     stat_file_name='Statistics.json', flow_file_name='flow.npy',
                                                     forcing_file_name='forcing.npy', attr_file_name='attr.npy',
                                                     f_dict_file_name='dictFactorize.json',
                                                     var_dict_file_name='dictAttribute.json',
                                                     t_s_dict_file_name='dictTimeSpace.json')
        data_model_test = GagesModel.load_datamodel(data_dir,
                                                    data_source_file_name='test_data_source.txt',
                                                    stat_file_name='test_Statistics.json',
                                                    flow_file_name='test_flow.npy',
                                                    forcing_file_name='test_forcing.npy',
                                                    attr_file_name='test_attr.npy',
                                                    f_dict_file_name='test_dictFactorize.json',
                                                    var_dict_file_name='test_dictAttribute.json',
                                                    t_s_dict_file_name='test_dictTimeSpace.json')

        gages_model_train = GagesModel.update_data_model(self.config_data, data_model_train, sites_id_update=sites_id)
        gages_model_test = GagesModel.update_data_model(self.config_data, data_model_test, sites_id_update=sites_id,
                                                        train_stat_dict=gages_model_train.stat_dict)
        save_datamodel(gages_model_train, data_source_file_name='data_source.txt',
                       stat_file_name='Statistics.json', flow_file_name='flow', forcing_file_name='forcing',
                       attr_file_name='attr', f_dict_file_name='dictFactorize.json',
                       var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json')
        save_datamodel(gages_model_test, data_source_file_name='test_data_source.txt',
                       stat_file_name='test_Statistics.json', flow_file_name='test_flow',
                       forcing_file_name='test_forcing', attr_file_name='test_attr',
                       f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json',
                       t_s_dict_file_name='test_dictTimeSpace.json')
        print("read and save data model")
Exemplo n.º 9
0
    def test_gages_dam_stor_hist_basin(self):
        nid_dir = os.path.join(
            "/".join(self.config_data.data_path["DB"].split("/")[:-1]), "nid",
            "test")
        dam_storages = unserialize_json(
            os.path.join(nid_dir, "dam_storages_dict.json"))

        sites = np.array(list(dam_storages.keys()))

        dor_2 = 0.02
        source_data_dor2 = GagesSource.choose_some_basins(
            self.config_data,
            self.config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            DOR=dor_2)
        sites_id_largedam = source_data_dor2.all_configs['flow_screen_gage_id']
        c, ind1, idx_lst_nse_range = np.intersect1d(sites,
                                                    sites_id_largedam,
                                                    return_indices=True)

        num = 4
        num_lst = np.sort(np.random.choice(len(c), num, replace=False))
        chosen_sites = c[num_lst]
        hist_bins = 20

        fig = plt.figure(figsize=(8, 9))
        gs = gridspec.GridSpec(2, 2)

        for i in range(num):
            ax_k = plt.subplot(gs[int(i / 2), i % 2])
            ax_k.hist(dam_storages[chosen_sites[i]],
                      hist_bins,
                      orientation='vertical',
                      color='red',
                      alpha=0.5)
        plt.show()
Exemplo n.º 10
0
        attr_file_name='test_attr.npy',
        f_dict_file_name='test_dictFactorize.json',
        var_dict_file_name='test_dictAttribute.json',
        t_s_dict_file_name='test_dictTimeSpace.json')

    camels531_gageid_file = os.path.join(zerodor_config_data.data_path["DB"],
                                         "camels531", "camels531.txt")
    gauge_df = pd.read_csv(camels531_gageid_file, dtype={"GaugeID": str})
    gauge_list = gauge_df["GaugeID"].values
    all_sites_camels_531 = np.sort(
        [str(gauge).zfill(8) for gauge in gauge_list])

    # basins without dams
    source_data_withoutdams = GagesSource.choose_some_basins(
        zerodor_config_data,
        zerodor_config_data.model_dict["data"]["tRangeTrain"],
        screen_basin_area_huc4=False,
        dam_num=0)
    sites_id_zerodor = source_data_withoutdams.all_configs[
        'flow_screen_gage_id']
    sites_zero_dor_not_in_camels = [
        sites_id_zerodor[i] for i in range(len(sites_id_zerodor))
        if sites_id_zerodor[i] not in all_sites_camels_531
    ]

    smalldor_config_data = load_dataconfig_case_exp(
        cfg, camels_pub_on_diff_dor_exp_lst[1])
    source_data_dor1 = GagesSource.choose_some_basins(
        smalldor_config_data,
        smalldor_config_data.model_dict["data"]["tRangeTrain"],
        screen_basin_area_huc4=False,
Exemplo n.º 11
0
    def test_plot_map_cartopy_multi_vars(self):
        conus_exps = ["basic_exp37"]
        config_data = load_dataconfig_case_exp(cfg, conus_exps[0])

        dor_1 = -0.02
        source_data_dor1 = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            DOR=dor_1)
        # basins with dams
        source_data_withdams = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            dam_num=[1, 10000])
        # basins without dams
        source_data_withoutdams = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            dam_num=0)

        sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id']
        sites_id_withdams = source_data_withdams.all_configs[
            'flow_screen_gage_id']

        sites_id_nodam = source_data_withoutdams.all_configs[
            'flow_screen_gage_id']
        sites_id_smalldam = np.intersect1d(
            np.array(sites_id_dor1), np.array(sites_id_withdams)).tolist()

        data_model = GagesModel.load_datamodel(
            config_data.data_path["Temp"],
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        all_lat = data_model.data_source.gage_dict["LAT_GAGE"]
        all_lon = data_model.data_source.gage_dict["LNG_GAGE"]

        conus_sites = data_model.t_s_dict["sites_id"]
        idx_lst_nodam_in_conus = [
            i for i in range(len(conus_sites))
            if conus_sites[i] in sites_id_nodam
        ]
        idx_lst_smalldam_in_conus = [
            i for i in range(len(conus_sites))
            if conus_sites[i] in sites_id_smalldam
        ]

        attr_lst = ["SLOPE_PCT", "ELEV_MEAN_M_BASIN"]
        attrs = data_model.data_source.read_attr(conus_sites,
                                                 attr_lst,
                                                 is_return_dict=False)

        test_epoch = 300
        inds_df, pred, obs = load_ensemble_result(cfg,
                                                  conus_exps,
                                                  test_epoch,
                                                  return_value=True)
        show_ind_key = "NSE"
        nse_range = [0, 1]
        idx_lst_nse = inds_df[(inds_df[show_ind_key] >= nse_range[0]) & (
            inds_df[show_ind_key] < nse_range[1])].index.tolist()

        type_1_index_lst = np.intersect1d(idx_lst_nodam_in_conus,
                                          idx_lst_nse).tolist()
        type_2_index_lst = np.intersect1d(idx_lst_smalldam_in_conus,
                                          idx_lst_nse).tolist()
        frame = []
        df_type1 = pd.DataFrame({
            "type":
            np.full(len(type_1_index_lst), "zero-dor"),
            show_ind_key:
            inds_df[show_ind_key].values[type_1_index_lst],
            "lat":
            all_lat[type_1_index_lst],
            "lon":
            all_lon[type_1_index_lst],
            "slope":
            attrs[type_1_index_lst, 0],
            "elevation":
            attrs[type_1_index_lst, 1]
        })
        frame.append(df_type1)
        df_type2 = pd.DataFrame({
            "type":
            np.full(len(type_2_index_lst), "small-dor"),
            show_ind_key:
            inds_df[show_ind_key].values[type_2_index_lst],
            "lat":
            all_lat[type_2_index_lst],
            "lon":
            all_lon[type_2_index_lst],
            "slope":
            attrs[type_2_index_lst, 0],
            "elevation":
            attrs[type_2_index_lst, 1]
        })
        frame.append(df_type2)
        data_df = pd.concat(frame)
        idx_lst = [
            np.arange(len(type_1_index_lst)),
            np.arange(len(type_1_index_lst),
                      len(type_1_index_lst) + len(type_2_index_lst))
        ]
        plot_gages_map_and_scatter(data_df,
                                   [show_ind_key, "lat", "lon", "elevation"],
                                   idx_lst,
                                   cmap_strs=["Reds", "Blues"],
                                   labels=["zero-dor", "small-dor"],
                                   scatter_label=[attr_lst[1], show_ind_key])
        # matplotlib.rcParams.update({'font.size': 12})
        plt.tight_layout()
        plt.show()
    conus_config_data.data_path["Temp"],
    data_source_file_name='test_data_source.txt',
    stat_file_name='test_Statistics.json',
    flow_file_name='test_flow.npy',
    forcing_file_name='test_forcing.npy',
    attr_file_name='test_attr.npy',
    f_dict_file_name='test_dictFactorize.json',
    var_dict_file_name='test_dictAttribute.json',
    t_s_dict_file_name='test_dictTimeSpace.json')
conus_sites = conus_data_model.t_s_dict["sites_id"]

dor_1 = -dor_cutoff
dor_2 = dor_cutoff
source_data_dor1 = GagesSource.choose_some_basins(
    conus_config_data,
    conus_config_data.model_dict["data"]["tRangeTrain"],
    screen_basin_area_huc4=False,
    DOR=dor_1)
source_data_dor2 = GagesSource.choose_some_basins(
    conus_config_data,
    conus_config_data.model_dict["data"]["tRangeTrain"],
    screen_basin_area_huc4=False,
    DOR=dor_2)
# basins with dams
source_data_withdams = GagesSource.choose_some_basins(
    conus_config_data,
    conus_config_data.model_dict["data"]["tRangeTrain"],
    screen_basin_area_huc4=False,
    dam_num=[1, 10000])
# basins without dams
source_data_withoutdams = GagesSource.choose_some_basins(
Exemplo n.º 13
0
    def test_some_reservoirs(self):
        """choose some small reservoirs randomly to train and test"""
        # 读取模型配置文件
        config_data = self.config_data_1
        # according to paper "High-resolution mapping of the world's reservoirs and dams for sustainable river-flow management"
        dor = -0.02
        source_data = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            DOR=dor)
        sites_id = source_data.all_configs['flow_screen_gage_id']

        # data1 is historical data as input of LSTM-Inv, which will be a kernel for the second LSTM
        quick_data_dir = os.path.join(self.config_data_1.data_path["DB"],
                                      "quickdata")
        data_dir = os.path.join(quick_data_dir,
                                "allnonref_85-05_nan-0.1_00-1.0")
        # for inv model, datamodel of  train and test are same
        data_model_8595 = GagesModel.load_datamodel(
            data_dir,
            data_source_file_name='data_source.txt',
            stat_file_name='Statistics.json',
            flow_file_name='flow.npy',
            forcing_file_name='forcing.npy',
            attr_file_name='attr.npy',
            f_dict_file_name='dictFactorize.json',
            var_dict_file_name='dictAttribute.json',
            t_s_dict_file_name='dictTimeSpace.json')
        # for 2nd model, datamodel of train and test belong to parts of the test time
        data_model_9505 = GagesModel.load_datamodel(
            data_dir,
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')

        t_range1_train = self.config_data_1.model_dict["data"]["tRangeTrain"]
        t_range1_test = self.config_data_1.model_dict["data"]["tRangeTest"]
        gages_model1_train = GagesModel.update_data_model(
            self.config_data_1,
            data_model_8595,
            sites_id_update=sites_id,
            t_range_update=t_range1_train,
            data_attr_update=True)
        # Because we know data of period "90-95", so that we can get its statistics according to this period
        gages_model1_test = GagesModel.update_data_model(
            self.config_data_1,
            data_model_8595,
            sites_id_update=sites_id,
            t_range_update=t_range1_test,
            data_attr_update=True)
        t_range2_train = self.config_data_2.model_dict["data"]["tRangeTrain"]
        t_range2_test = self.config_data_2.model_dict["data"]["tRangeTest"]
        gages_model2_train = GagesModel.update_data_model(
            self.config_data_2,
            data_model_8595,
            sites_id_update=sites_id,
            t_range_update=t_range2_train,
            data_attr_update=True)
        gages_model2_test = GagesModel.update_data_model(
            self.config_data_2,
            data_model_9505,
            sites_id_update=sites_id,
            t_range_update=t_range2_test,
            data_attr_update=True,
            train_stat_dict=gages_model2_train.stat_dict)
        save_datamodel(gages_model1_train,
                       "1",
                       data_source_file_name='data_source.txt',
                       stat_file_name='Statistics.json',
                       flow_file_name='flow',
                       forcing_file_name='forcing',
                       attr_file_name='attr',
                       f_dict_file_name='dictFactorize.json',
                       var_dict_file_name='dictAttribute.json',
                       t_s_dict_file_name='dictTimeSpace.json')
        save_datamodel(gages_model1_test,
                       "1",
                       data_source_file_name='test_data_source.txt',
                       stat_file_name='test_Statistics.json',
                       flow_file_name='test_flow',
                       forcing_file_name='test_forcing',
                       attr_file_name='test_attr',
                       f_dict_file_name='test_dictFactorize.json',
                       var_dict_file_name='test_dictAttribute.json',
                       t_s_dict_file_name='test_dictTimeSpace.json')
        save_datamodel(gages_model2_train,
                       "2",
                       data_source_file_name='data_source.txt',
                       stat_file_name='Statistics.json',
                       flow_file_name='flow',
                       forcing_file_name='forcing',
                       attr_file_name='attr',
                       f_dict_file_name='dictFactorize.json',
                       var_dict_file_name='dictAttribute.json',
                       t_s_dict_file_name='dictTimeSpace.json')
        save_datamodel(gages_model2_test,
                       "2",
                       data_source_file_name='test_data_source.txt',
                       stat_file_name='test_Statistics.json',
                       flow_file_name='test_flow',
                       forcing_file_name='test_forcing',
                       attr_file_name='test_attr',
                       f_dict_file_name='test_dictFactorize.json',
                       var_dict_file_name='test_dictAttribute.json',
                       t_s_dict_file_name='test_dictTimeSpace.json')
        print("read and save data model")
Exemplo n.º 14
0
def pub_lstm(args):
    update_cfg(cfg, args)
    random_seed = cfg.RANDOM_SEED
    test_epoch = cfg.TEST_EPOCH
    gpu_num = cfg.CTX
    train_mode = cfg.TRAIN_MODE
    cache = cfg.CACHE.STATE
    pub_plan = cfg.PUB_PLAN
    plus = cfg.PLUS
    dor = cfg.GAGES.attrScreenParams.DOR
    split_num = cfg.SPLIT_NUM
    print("train and test for PUB: \n")
    config_data = GagesConfig(cfg)
    if cache:
        eco_names = [
            ("ECO2_CODE", 5.2), ("ECO2_CODE", 5.3), ("ECO2_CODE", 6.2),
            ("ECO2_CODE", 7.1), ("ECO2_CODE", 8.1), ("ECO2_CODE", 8.2),
            ("ECO2_CODE", 8.3), ("ECO2_CODE", 8.4), ("ECO2_CODE", 8.5),
            ("ECO2_CODE", 9.2), ("ECO2_CODE", 9.3), ("ECO2_CODE", 9.4),
            ("ECO2_CODE", 9.5), ("ECO2_CODE", 9.6), ("ECO2_CODE", 10.1),
            ("ECO2_CODE", 10.2), ("ECO2_CODE", 10.4), ("ECO2_CODE", 11.1),
            ("ECO2_CODE", 12.1), ("ECO2_CODE", 13.1)
        ]
        quick_data_dir = os.path.join(config_data.data_path["DB"], "quickdata")
        data_dir = os.path.join(quick_data_dir,
                                "conus-all_90-10_nan-0.0_00-1.0")
        data_model_train = GagesModel.load_datamodel(
            data_dir,
            data_source_file_name='data_source.txt',
            stat_file_name='Statistics.json',
            flow_file_name='flow.npy',
            forcing_file_name='forcing.npy',
            attr_file_name='attr.npy',
            f_dict_file_name='dictFactorize.json',
            var_dict_file_name='dictAttribute.json',
            t_s_dict_file_name='dictTimeSpace.json')
        data_model_test = GagesModel.load_datamodel(
            data_dir,
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        conus_sites_id = data_model_train.t_s_dict["sites_id"]
        if pub_plan == 0:
            """do a pub test like freddy's"""
            camels531_gageid_file = os.path.join(config_data.data_path["DB"],
                                                 "camels531", "camels531.txt")
            gauge_df = pd.read_csv(camels531_gageid_file,
                                   dtype={"GaugeID": str})
            gauge_list = gauge_df["GaugeID"].values
            all_sites_camels_531 = np.sort(
                [str(gauge).zfill(8) for gauge in gauge_list])
            sites_id_train = np.intersect1d(conus_sites_id,
                                            all_sites_camels_531)
            # basins not in CAMELS
            sites_id_test = [
                a_temp_site for a_temp_site in conus_sites_id
                if a_temp_site not in all_sites_camels_531
            ]
            assert (all(x < y
                        for x, y in zip(sites_id_test, sites_id_test[1:])))
        elif pub_plan == 1 or pub_plan == 4:
            source_data_dor1 = GagesSource.choose_some_basins(
                config_data,
                config_data.model_dict["data"]["tRangeTrain"],
                screen_basin_area_huc4=False,
                DOR=-dor)
            # basins with dams
            source_data_withdams = GagesSource.choose_some_basins(
                config_data,
                config_data.model_dict["data"]["tRangeTrain"],
                screen_basin_area_huc4=False,
                dam_num=[1, 100000])
            # basins without dams
            source_data_withoutdams = GagesSource.choose_some_basins(
                config_data,
                config_data.model_dict["data"]["tRangeTrain"],
                screen_basin_area_huc4=False,
                dam_num=0)

            sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id']
            sites_id_withdams = source_data_withdams.all_configs[
                'flow_screen_gage_id']

            if pub_plan == 1:
                sites_id_train = source_data_withoutdams.all_configs[
                    'flow_screen_gage_id']
                sites_id_test = np.intersect1d(
                    np.array(sites_id_dor1),
                    np.array(sites_id_withdams)).tolist()
            else:
                sites_id_train = np.intersect1d(
                    np.array(sites_id_dor1),
                    np.array(sites_id_withdams)).tolist()
                sites_id_test = source_data_withoutdams.all_configs[
                    'flow_screen_gage_id']

        elif pub_plan == 2 or pub_plan == 5:
            source_data_dor1 = GagesSource.choose_some_basins(
                config_data,
                config_data.model_dict["data"]["tRangeTrain"],
                screen_basin_area_huc4=False,
                DOR=dor)
            # basins without dams
            source_data_withoutdams = GagesSource.choose_some_basins(
                config_data,
                config_data.model_dict["data"]["tRangeTrain"],
                screen_basin_area_huc4=False,
                dam_num=0)

            if pub_plan == 2:
                sites_id_train = source_data_withoutdams.all_configs[
                    'flow_screen_gage_id']
                sites_id_test = source_data_dor1.all_configs[
                    'flow_screen_gage_id']
            else:
                sites_id_train = source_data_dor1.all_configs[
                    'flow_screen_gage_id']
                sites_id_test = source_data_withoutdams.all_configs[
                    'flow_screen_gage_id']

        elif pub_plan == 3 or pub_plan == 6:
            dor_1 = -dor
            dor_2 = dor
            source_data_dor1 = GagesSource.choose_some_basins(
                config_data,
                config_data.model_dict["data"]["tRangeTrain"],
                screen_basin_area_huc4=False,
                DOR=dor_1)
            # basins with dams
            source_data_withdams = GagesSource.choose_some_basins(
                config_data,
                config_data.model_dict["data"]["tRangeTrain"],
                screen_basin_area_huc4=False,
                dam_num=[1, 100000])
            sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id']
            sites_id_withdams = source_data_withdams.all_configs[
                'flow_screen_gage_id']

            source_data_dor2 = GagesSource.choose_some_basins(
                config_data,
                config_data.model_dict["data"]["tRangeTrain"],
                screen_basin_area_huc4=False,
                DOR=dor_2)

            if pub_plan == 3:
                sites_id_train = np.intersect1d(
                    np.array(sites_id_dor1),
                    np.array(sites_id_withdams)).tolist()
                sites_id_test = source_data_dor2.all_configs[
                    'flow_screen_gage_id']
            else:
                sites_id_train = source_data_dor2.all_configs[
                    'flow_screen_gage_id']
                sites_id_test = np.intersect1d(
                    np.array(sites_id_dor1),
                    np.array(sites_id_withdams)).tolist()

        else:
            print("wrong plan")
            sites_id_train = None
            sites_id_test = None

        train_sites_in_conus = np.intersect1d(conus_sites_id, sites_id_train)
        test_sites_in_conus = np.intersect1d(conus_sites_id, sites_id_test)

        if plus == 0:
            all_index_lst_train_1 = []
            # all sites come from train1 dataset
            sites_lst_train = []
            all_index_lst_test_1 = []
            sites_lst_test_1 = []
            all_index_lst_test_2 = []
            sites_lst_test_2 = []
            np.random.seed(random_seed)
            kf = KFold(n_splits=split_num,
                       shuffle=True,
                       random_state=random_seed)
            eco_name_chosen = []
            for eco_name in eco_names:
                eco_source_data = GagesSource.choose_some_basins(
                    config_data,
                    config_data.model_dict["data"]["tRangeTrain"],
                    screen_basin_area_huc4=False,
                    ecoregion=eco_name)
                eco_sites_id = eco_source_data.all_configs[
                    'flow_screen_gage_id']
                train_sites_id_inter = np.intersect1d(train_sites_in_conus,
                                                      eco_sites_id)
                test_sites_id_inter = np.intersect1d(test_sites_in_conus,
                                                     eco_sites_id)
                if train_sites_id_inter.size < split_num or test_sites_id_inter.size < 1:
                    continue
                for train, test in kf.split(train_sites_id_inter):
                    all_index_lst_train_1.append(train)
                    sites_lst_train.append(train_sites_id_inter[train])
                    all_index_lst_test_1.append(test)
                    sites_lst_test_1.append(train_sites_id_inter[test])
                    if test_sites_id_inter.size < test.size:
                        all_index_lst_test_2.append(
                            np.arange(test_sites_id_inter.size))
                        sites_lst_test_2.append(test_sites_id_inter)
                    else:
                        test2_chosen_idx = np.random.choice(
                            test_sites_id_inter.size, test.size, replace=False)
                        all_index_lst_test_2.append(test2_chosen_idx)
                        sites_lst_test_2.append(
                            test_sites_id_inter[test2_chosen_idx])
                eco_name_chosen.append(eco_name)
        elif plus == -1:
            print("camels pub, only do pub on the camels basins")
            all_index_lst_train_1 = []
            # all sites come from train1 dataset
            sites_lst_train = []
            all_index_lst_test_1 = []
            sites_lst_test_1 = []
            np.random.seed(random_seed)
            kf = KFold(n_splits=split_num,
                       shuffle=True,
                       random_state=random_seed)
            eco_name_chosen = []
            for eco_name in eco_names:
                eco_source_data = GagesSource.choose_some_basins(
                    config_data,
                    config_data.model_dict["data"]["tRangeTrain"],
                    screen_basin_area_huc4=False,
                    ecoregion=eco_name)
                eco_sites_id = eco_source_data.all_configs[
                    'flow_screen_gage_id']
                train_sites_id_inter = np.intersect1d(train_sites_in_conus,
                                                      eco_sites_id)
                if train_sites_id_inter.size < split_num:
                    continue
                for train, test in kf.split(train_sites_id_inter):
                    all_index_lst_train_1.append(train)
                    sites_lst_train.append(train_sites_id_inter[train])
                    all_index_lst_test_1.append(test)
                    sites_lst_test_1.append(train_sites_id_inter[test])
                eco_name_chosen.append(eco_name)
        elif plus == -2:
            print(
                "camels pub, only do pub on the camels basins, same with freddy's split method"
            )
            all_index_lst_train_1 = []
            # all sites come from train1 dataset
            sites_lst_train = []
            all_index_lst_test_1 = []
            sites_lst_test_1 = []
            np.random.seed(random_seed)
            kf = KFold(n_splits=split_num,
                       shuffle=True,
                       random_state=random_seed)

            for train, test in kf.split(train_sites_in_conus):
                all_index_lst_train_1.append(train)
                sites_lst_train.append(train_sites_in_conus[train])
                all_index_lst_test_1.append(test)
                sites_lst_test_1.append(train_sites_in_conus[test])
        else:
            sites_lst_train = []
            sites_lst_test_1 = []
            sites_lst_test_2 = []

            np.random.seed(random_seed)
            kf = KFold(n_splits=split_num,
                       shuffle=True,
                       random_state=random_seed)
            eco_name_chosen = []
            for eco_name in eco_names:
                eco_source_data = GagesSource.choose_some_basins(
                    config_data,
                    config_data.model_dict["data"]["tRangeTrain"],
                    screen_basin_area_huc4=False,
                    ecoregion=eco_name)
                eco_sites_id = eco_source_data.all_configs[
                    'flow_screen_gage_id']
                sites_id_inter_1 = np.intersect1d(train_sites_in_conus,
                                                  eco_sites_id)
                sites_id_inter_2 = np.intersect1d(test_sites_in_conus,
                                                  eco_sites_id)

                if sites_id_inter_1.size < sites_id_inter_2.size:
                    if sites_id_inter_1.size < split_num:
                        continue
                    for train, test in kf.split(sites_id_inter_1):
                        sites_lst_train_1 = sites_id_inter_1[train]
                        sites_lst_test_1.append(sites_id_inter_1[test])

                        chosen_lst_2 = random_choice_no_return(
                            sites_id_inter_2, [train.size, test.size])
                        sites_lst_train_2 = chosen_lst_2[0]
                        sites_lst_test_2.append(chosen_lst_2[1])

                        sites_lst_train.append(
                            np.sort(
                                np.append(sites_lst_train_1,
                                          sites_lst_train_2)))

                else:
                    if sites_id_inter_2.size < split_num:
                        continue
                    for train, test in kf.split(sites_id_inter_2):
                        sites_lst_train_2 = sites_id_inter_2[train]
                        sites_lst_test_2.append(sites_id_inter_2[test])

                        chosen_lst_1 = random_choice_no_return(
                            sites_id_inter_1, [train.size, test.size])
                        sites_lst_train_1 = chosen_lst_1[0]
                        sites_lst_test_1.append(chosen_lst_1[1])

                        sites_lst_train.append(
                            np.sort(
                                np.append(sites_lst_train_1,
                                          sites_lst_train_2)))

                eco_name_chosen.append(eco_name)
        for i in range(split_num):
            sites_ids_train_ilst = [
                sites_lst_train[j] for j in range(len(sites_lst_train))
                if j % split_num == i
            ]
            sites_ids_train_i = np.sort(
                reduce(lambda x, y: np.hstack((x, y)), sites_ids_train_ilst))
            sites_ids_test_ilst_1 = [
                sites_lst_test_1[j] for j in range(len(sites_lst_test_1))
                if j % split_num == i
            ]
            sites_ids_test_i_1 = np.sort(
                reduce(lambda x, y: np.hstack((x, y)), sites_ids_test_ilst_1))

            if plus >= 0:
                sites_ids_test_ilst_2 = [
                    sites_lst_test_2[j] for j in range(len(sites_lst_test_2))
                    if j % split_num == i
                ]
                sites_ids_test_i_2 = np.sort(
                    reduce(lambda x, y: np.hstack((x, y)),
                           sites_ids_test_ilst_2))
            config_data_i = GagesConfig.set_subdir(cfg, str(i))

            gages_model_train_i = GagesModel.update_data_model(
                config_data_i,
                data_model_train,
                sites_id_update=sites_ids_train_i,
                data_attr_update=True,
                screen_basin_area_huc4=False)
            gages_model_test_baseline_i = GagesModel.update_data_model(
                config_data_i,
                data_model_test,
                sites_id_update=sites_ids_train_i,
                data_attr_update=True,
                train_stat_dict=gages_model_train_i.stat_dict,
                screen_basin_area_huc4=False)
            gages_model_test_i_1 = GagesModel.update_data_model(
                config_data_i,
                data_model_test,
                sites_id_update=sites_ids_test_i_1,
                data_attr_update=True,
                train_stat_dict=gages_model_train_i.stat_dict,
                screen_basin_area_huc4=False)
            if plus >= 0:
                gages_model_test_i_2 = GagesModel.update_data_model(
                    config_data_i,
                    data_model_test,
                    sites_id_update=sites_ids_test_i_2,
                    data_attr_update=True,
                    train_stat_dict=gages_model_train_i.stat_dict,
                    screen_basin_area_huc4=False)
            save_datamodel(gages_model_train_i,
                           data_source_file_name='data_source.txt',
                           stat_file_name='Statistics.json',
                           flow_file_name='flow',
                           forcing_file_name='forcing',
                           attr_file_name='attr',
                           f_dict_file_name='dictFactorize.json',
                           var_dict_file_name='dictAttribute.json',
                           t_s_dict_file_name='dictTimeSpace.json')
            save_datamodel(gages_model_test_baseline_i,
                           data_source_file_name='test_data_source_base.txt',
                           stat_file_name='test_Statistics_base.json',
                           flow_file_name='test_flow_base',
                           forcing_file_name='test_forcing_base',
                           attr_file_name='test_attr_base',
                           f_dict_file_name='test_dictFactorize_base.json',
                           var_dict_file_name='test_dictAttribute_base.json',
                           t_s_dict_file_name='test_dictTimeSpace_base.json')
            save_datamodel(gages_model_test_i_1,
                           data_source_file_name='test_data_source.txt',
                           stat_file_name='test_Statistics.json',
                           flow_file_name='test_flow',
                           forcing_file_name='test_forcing',
                           attr_file_name='test_attr',
                           f_dict_file_name='test_dictFactorize.json',
                           var_dict_file_name='test_dictAttribute.json',
                           t_s_dict_file_name='test_dictTimeSpace.json')
            if plus >= 0:
                save_datamodel(gages_model_test_i_2,
                               data_source_file_name='test_data_source_2.txt',
                               stat_file_name='test_Statistics_2.json',
                               flow_file_name='test_flow_2',
                               forcing_file_name='test_forcing_2',
                               attr_file_name='test_attr_2',
                               f_dict_file_name='test_dictFactorize_2.json',
                               var_dict_file_name='test_dictAttribute_2.json',
                               t_s_dict_file_name='test_dictTimeSpace_2.json')
            print("save ecoregion " + str(i) + " data model")
    with torch.cuda.device(gpu_num):
        if train_mode:
            for i in range(split_num):
                data_model = GagesModel.load_datamodel(
                    config_data.data_path["Temp"],
                    str(i),
                    data_source_file_name='data_source.txt',
                    stat_file_name='Statistics.json',
                    flow_file_name='flow.npy',
                    forcing_file_name='forcing.npy',
                    attr_file_name='attr.npy',
                    f_dict_file_name='dictFactorize.json',
                    var_dict_file_name='dictAttribute.json',
                    t_s_dict_file_name='dictTimeSpace.json')
                master_train(data_model, random_seed=random_seed)
        for i in range(split_num):
            data_model_baseline = GagesModel.load_datamodel(
                config_data.data_path["Temp"],
                str(i),
                data_source_file_name='test_data_source_base.txt',
                stat_file_name='test_Statistics_base.json',
                flow_file_name='test_flow_base.npy',
                forcing_file_name='test_forcing_base.npy',
                attr_file_name='test_attr_base.npy',
                f_dict_file_name='test_dictFactorize_base.json',
                var_dict_file_name='test_dictAttribute_base.json',
                t_s_dict_file_name='test_dictTimeSpace_base.json')
            data_model = GagesModel.load_datamodel(
                config_data.data_path["Temp"],
                str(i),
                data_source_file_name='test_data_source.txt',
                stat_file_name='test_Statistics.json',
                flow_file_name='test_flow.npy',
                forcing_file_name='test_forcing.npy',
                attr_file_name='test_attr.npy',
                f_dict_file_name='test_dictFactorize.json',
                var_dict_file_name='test_dictAttribute.json',
                t_s_dict_file_name='test_dictTimeSpace.json')
            if plus >= 0:
                data_model_2 = GagesModel.load_datamodel(
                    config_data.data_path["Temp"],
                    str(i),
                    data_source_file_name='test_data_source_2.txt',
                    stat_file_name='test_Statistics_2.json',
                    flow_file_name='test_flow_2.npy',
                    forcing_file_name='test_forcing_2.npy',
                    attr_file_name='test_attr_2.npy',
                    f_dict_file_name='test_dictFactorize_2.json',
                    var_dict_file_name='test_dictAttribute_2.json',
                    t_s_dict_file_name='test_dictTimeSpace_2.json')
            pred_baseline, obs_baseline = master_test(data_model_baseline,
                                                      epoch=test_epoch,
                                                      save_file_suffix="base")
            basin_area_baseline = data_model_baseline.data_source.read_attr(
                data_model_baseline.t_s_dict["sites_id"], ['DRAIN_SQKM'],
                is_return_dict=False)
            mean_prep_baseline = data_model_baseline.data_source.read_attr(
                data_model_baseline.t_s_dict["sites_id"], ['PPTAVG_BASIN'],
                is_return_dict=False)
            mean_prep_baseline = mean_prep_baseline / 365 * 10
            pred_baseline = _basin_norm(pred_baseline,
                                        basin_area_baseline,
                                        mean_prep_baseline,
                                        to_norm=False)
            obs_baseline = _basin_norm(obs_baseline,
                                       basin_area_baseline,
                                       mean_prep_baseline,
                                       to_norm=False)
            save_result(
                data_model_baseline.data_source.data_config.data_path['Temp'],
                test_epoch,
                pred_baseline,
                obs_baseline,
                pred_name='flow_pred_base',
                obs_name='flow_obs_base')

            pred, obs = master_test(data_model, epoch=test_epoch)
            basin_area = data_model.data_source.read_attr(
                data_model.t_s_dict["sites_id"], ['DRAIN_SQKM'],
                is_return_dict=False)
            mean_prep = data_model.data_source.read_attr(
                data_model.t_s_dict["sites_id"], ['PPTAVG_BASIN'],
                is_return_dict=False)
            mean_prep = mean_prep / 365 * 10
            pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False)
            obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False)
            save_result(data_model.data_source.data_config.data_path['Temp'],
                        test_epoch, pred, obs)
            if plus >= 0:
                pred_2, obs_2 = master_test(data_model_2,
                                            epoch=test_epoch,
                                            save_file_suffix="2")
                basin_area_2 = data_model_2.data_source.read_attr(
                    data_model_2.t_s_dict["sites_id"], ['DRAIN_SQKM'],
                    is_return_dict=False)
                mean_prep_2 = data_model_2.data_source.read_attr(
                    data_model_2.t_s_dict["sites_id"], ['PPTAVG_BASIN'],
                    is_return_dict=False)
                mean_prep_2 = mean_prep_2 / 365 * 10
                pred_2 = _basin_norm(pred_2,
                                     basin_area_2,
                                     mean_prep_2,
                                     to_norm=False)
                obs_2 = _basin_norm(obs_2,
                                    basin_area_2,
                                    mean_prep_2,
                                    to_norm=False)
                save_result(
                    data_model_2.data_source.data_config.data_path['Temp'],
                    test_epoch,
                    pred_2,
                    obs_2,
                    pred_name='flow_pred_2',
                    obs_name='flow_obs_2')
Exemplo n.º 15
0
 def test_gages_data_model(self):
     config_data = self.config_data
     dam_num = 0
     source_data = GagesSource.choose_some_basins(
         config_data,
         config_data.model_dict["data"]["tRangeTrain"],
         screen_basin_area_huc4=False,
         dam_num=dam_num)
     sites_id = source_data.all_configs['flow_screen_gage_id']
     quick_data_dir = os.path.join(self.config_data.data_path["DB"],
                                   "quickdata")
     data_dir = os.path.join(quick_data_dir,
                             "conus-all_85-05_nan-0.1_00-1.0")
     data_model_train = GagesModel.load_datamodel(
         data_dir,
         data_source_file_name='data_source.txt',
         stat_file_name='Statistics.json',
         flow_file_name='flow.npy',
         forcing_file_name='forcing.npy',
         attr_file_name='attr.npy',
         f_dict_file_name='dictFactorize.json',
         var_dict_file_name='dictAttribute.json',
         t_s_dict_file_name='dictTimeSpace.json')
     data_model_test = GagesModel.load_datamodel(
         data_dir,
         data_source_file_name='test_data_source.txt',
         stat_file_name='test_Statistics.json',
         flow_file_name='test_flow.npy',
         forcing_file_name='test_forcing.npy',
         attr_file_name='test_attr.npy',
         f_dict_file_name='test_dictFactorize.json',
         var_dict_file_name='test_dictAttribute.json',
         t_s_dict_file_name='test_dictTimeSpace.json')
     gages_model_train = GagesModel.update_data_model(
         self.config_data,
         data_model_train,
         sites_id_update=sites_id,
         screen_basin_area_huc4=False)
     gages_model_test = GagesModel.update_data_model(
         self.config_data,
         data_model_test,
         sites_id_update=sites_id,
         train_stat_dict=gages_model_train.stat_dict,
         screen_basin_area_huc4=False)
     save_datamodel(gages_model_train,
                    data_source_file_name='data_source.txt',
                    stat_file_name='Statistics.json',
                    flow_file_name='flow',
                    forcing_file_name='forcing',
                    attr_file_name='attr',
                    f_dict_file_name='dictFactorize.json',
                    var_dict_file_name='dictAttribute.json',
                    t_s_dict_file_name='dictTimeSpace.json')
     save_datamodel(gages_model_test,
                    data_source_file_name='test_data_source.txt',
                    stat_file_name='test_Statistics.json',
                    flow_file_name='test_flow',
                    forcing_file_name='test_forcing',
                    attr_file_name='test_attr',
                    f_dict_file_name='test_dictFactorize.json',
                    var_dict_file_name='test_dictAttribute.json',
                    t_s_dict_file_name='test_dictTimeSpace.json')
     print("read and save data model")
Exemplo n.º 16
0
    def test_some_reservoirs(self):
        config_data = self.config_data

        dam_num = 0
        dor = 0.02
        source_data_dor1 = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            DOR=dor)
        # basins with dams
        source_data_withoutdams = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            dam_num=dam_num)

        sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id']
        sites_id_withoutdams = source_data_withoutdams.all_configs[
            'flow_screen_gage_id']
        sites_id = np.sort(
            np.union1d(np.array(sites_id_dor1),
                       np.array(sites_id_withoutdams))).tolist()

        quick_data_dir = os.path.join(self.config_data.data_path["DB"],
                                      "quickdata")
        data_dir = os.path.join(quick_data_dir,
                                "conus-all_90-10_nan-0.0_00-1.0")
        data_model_train = GagesModel.load_datamodel(
            data_dir,
            data_source_file_name='data_source.txt',
            stat_file_name='Statistics.json',
            flow_file_name='flow.npy',
            forcing_file_name='forcing.npy',
            attr_file_name='attr.npy',
            f_dict_file_name='dictFactorize.json',
            var_dict_file_name='dictAttribute.json',
            t_s_dict_file_name='dictTimeSpace.json')
        data_model_test = GagesModel.load_datamodel(
            data_dir,
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')

        gages_model_train = GagesModel.update_data_model(
            self.config_data,
            data_model_train,
            sites_id_update=sites_id,
            data_attr_update=True,
            screen_basin_area_huc4=False)
        gages_model_test = GagesModel.update_data_model(
            self.config_data,
            data_model_test,
            sites_id_update=sites_id,
            data_attr_update=True,
            train_stat_dict=gages_model_train.stat_dict,
            screen_basin_area_huc4=False)
        save_datamodel(gages_model_train,
                       data_source_file_name='data_source.txt',
                       stat_file_name='Statistics.json',
                       flow_file_name='flow',
                       forcing_file_name='forcing',
                       attr_file_name='attr',
                       f_dict_file_name='dictFactorize.json',
                       var_dict_file_name='dictAttribute.json',
                       t_s_dict_file_name='dictTimeSpace.json')
        save_datamodel(gages_model_test,
                       data_source_file_name='test_data_source.txt',
                       stat_file_name='test_Statistics.json',
                       flow_file_name='test_flow',
                       forcing_file_name='test_forcing',
                       attr_file_name='test_attr',
                       f_dict_file_name='test_dictFactorize.json',
                       var_dict_file_name='test_dictAttribute.json',
                       t_s_dict_file_name='test_dictTimeSpace.json')
        print("read and save data model")
Exemplo n.º 17
0
    def test_some_reservoirs(self):
        """choose some small reservoirs to train and test"""
        # 读取模型配置文件
        config_data = self.config_data
        # according to paper "High-resolution mapping of the world's reservoirs and dams for sustainable river-flow management"
        dor = -0.02  # meaning dor < 0.02
        source_data = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            DOR=dor)
        sites_id = source_data.all_configs['flow_screen_gage_id']
        quick_data_dir = os.path.join(self.config_data.data_path["DB"],
                                      "quickdata")
        data_dir = os.path.join(quick_data_dir,
                                "conus-all_90-10_nan-0.0_00-1.0")
        data_model_train = GagesModel.load_datamodel(
            data_dir,
            data_source_file_name='data_source.txt',
            stat_file_name='Statistics.json',
            flow_file_name='flow.npy',
            forcing_file_name='forcing.npy',
            attr_file_name='attr.npy',
            f_dict_file_name='dictFactorize.json',
            var_dict_file_name='dictAttribute.json',
            t_s_dict_file_name='dictTimeSpace.json')
        data_model_test = GagesModel.load_datamodel(
            data_dir,
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')

        gages_model_train = GagesModel.update_data_model(
            self.config_data,
            data_model_train,
            sites_id_update=sites_id,
            data_attr_update=True,
            screen_basin_area_huc4=False)
        gages_model_test = GagesModel.update_data_model(
            self.config_data,
            data_model_test,
            sites_id_update=sites_id,
            data_attr_update=True,
            train_stat_dict=gages_model_train.stat_dict,
            screen_basin_area_huc4=False)
        save_datamodel(gages_model_train,
                       data_source_file_name='data_source.txt',
                       stat_file_name='Statistics.json',
                       flow_file_name='flow',
                       forcing_file_name='forcing',
                       attr_file_name='attr',
                       f_dict_file_name='dictFactorize.json',
                       var_dict_file_name='dictAttribute.json',
                       t_s_dict_file_name='dictTimeSpace.json')
        save_datamodel(gages_model_test,
                       data_source_file_name='test_data_source.txt',
                       stat_file_name='test_Statistics.json',
                       flow_file_name='test_flow',
                       forcing_file_name='test_forcing',
                       attr_file_name='test_attr',
                       f_dict_file_name='test_dictFactorize.json',
                       var_dict_file_name='test_dictAttribute.json',
                       t_s_dict_file_name='test_dictTimeSpace.json')
        print("read and save data model")
Exemplo n.º 18
0
        train_stat_dict=gages_model_train.stat_dict,
        screen_basin_area_huc4=False)

    inds_df, pred_mean, obs_mean = load_ensemble_result(cfg,
                                                        exp_lst,
                                                        test_epoch,
                                                        return_value=True)
    plt.rcParams['font.family'] = 'serif'
    plt.rcParams['font.serif'] = ['Times New Roman'
                                  ] + plt.rcParams['font.serif']
    ########################### plot diversion dor ecdf  ###########################
    diversion_yes = True
    diversion_no = False
    source_data_diversion = GagesSource.choose_some_basins(
        config_data,
        config_data.model_dict["data"]["tRangeTrain"],
        screen_basin_area_huc4=False,
        diversion=diversion_yes)
    source_data_nodivert = GagesSource.choose_some_basins(
        config_data,
        config_data.model_dict["data"]["tRangeTrain"],
        screen_basin_area_huc4=False,
        diversion=diversion_no)
    sites_id_nodivert = source_data_nodivert.all_configs['flow_screen_gage_id']
    sites_id_diversion = source_data_diversion.all_configs[
        'flow_screen_gage_id']

    dor_1 = -dor_cutoff
    dor_2 = dor_cutoff
    source_data_dor1 = GagesSource.choose_some_basins(
        config_data,
Exemplo n.º 19
0
    def test_some_reservoirs(self):
        # # a control group for simulate/exp3
        dor = -0.02  # meaning dor < 0.02
        source_data = GagesSource.choose_some_basins(
            self.config_data,
            self.config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            DOR=dor)
        sites_id_dor = source_data.all_configs['flow_screen_gage_id']

        quick_data_dir = os.path.join(self.config_data.data_path["DB"],
                                      "quickdata")
        data_dir = os.path.join(quick_data_dir,
                                "conus-all_90-10_nan-0.0_00-1.0")
        data_model_9000 = GagesModel.load_datamodel(
            data_dir,
            data_source_file_name='data_source.txt',
            stat_file_name='Statistics.json',
            flow_file_name='flow.npy',
            forcing_file_name='forcing.npy',
            attr_file_name='attr.npy',
            f_dict_file_name='dictFactorize.json',
            var_dict_file_name='dictAttribute.json',
            t_s_dict_file_name='dictTimeSpace.json')
        data_model_0010 = GagesModel.load_datamodel(
            data_dir,
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        conus_sites_id_all = data_model_9000.t_s_dict["sites_id"]
        nomajordam_source_data = GagesSource.choose_some_basins(
            self.config_data,
            self.config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            major_dam_num=0)
        nomajordam_sites_id = nomajordam_source_data.all_configs[
            'flow_screen_gage_id']
        # In no major dam case, all sites are chosen as natural flow generator
        nomajordam_in_conus = np.intersect1d(conus_sites_id_all,
                                             nomajordam_sites_id)

        conus_sites_id_dor = np.intersect1d(conus_sites_id_all, sites_id_dor)
        majordam_source_data = GagesSource.choose_some_basins(
            self.config_data,
            self.config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            major_dam_num=[1, 2000])
        majordam_sites_id = majordam_source_data.all_configs[
            'flow_screen_gage_id']
        majordam_in_conus = np.intersect1d(conus_sites_id_dor,
                                           majordam_sites_id)

        chosen_sites_id = np.sort(
            np.append(nomajordam_in_conus, majordam_in_conus))

        gages_model_train_lstm = GagesModel.update_data_model(
            self.config_data,
            data_model_9000,
            sites_id_update=chosen_sites_id,
            data_attr_update=True,
            screen_basin_area_huc4=False)

        gages_model_test_lstm = GagesModel.update_data_model(
            self.config_data,
            data_model_0010,
            sites_id_update=chosen_sites_id,
            data_attr_update=True,
            train_stat_dict=gages_model_train_lstm.stat_dict,
            screen_basin_area_huc4=False)

        save_datamodel(gages_model_train_lstm,
                       data_source_file_name='data_source.txt',
                       stat_file_name='Statistics.json',
                       flow_file_name='flow',
                       forcing_file_name='forcing',
                       attr_file_name='attr',
                       f_dict_file_name='dictFactorize.json',
                       var_dict_file_name='dictAttribute.json',
                       t_s_dict_file_name='dictTimeSpace.json')
        save_datamodel(gages_model_test_lstm,
                       data_source_file_name='test_data_source.txt',
                       stat_file_name='test_Statistics.json',
                       flow_file_name='test_flow',
                       forcing_file_name='test_forcing',
                       attr_file_name='test_attr',
                       f_dict_file_name='test_dictFactorize.json',
                       var_dict_file_name='test_dictAttribute.json',
                       t_s_dict_file_name='test_dictTimeSpace.json')
        print("read and save data model")
Exemplo n.º 20
0
    def test_stor_seperate(self):
        config_dir = definitions.CONFIG_DIR
        config_file = os.path.join(config_dir, "basic/config_exp18.ini")
        subdir = r"basic/exp18"
        config_data = GagesConfig.set_subdir(config_file, subdir)
        data_model = GagesModel.load_datamodel(
            config_data.data_path["Temp"],
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        all_sites = data_model.t_s_dict["sites_id"]
        storage_nor_1 = [0, 50]
        storage_nor_2 = [50, 15000]  # max is 14348.6581036888
        source_data_nor1 = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            STORAGE=storage_nor_1)
        source_data_nor2 = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            STORAGE=storage_nor_2)
        sites_id_nor1 = source_data_nor1.all_configs['flow_screen_gage_id']
        sites_id_nor2 = source_data_nor2.all_configs['flow_screen_gage_id']
        idx_lst_nor1 = [
            i for i in range(len(all_sites)) if all_sites[i] in sites_id_nor1
        ]
        idx_lst_nor2 = [
            i for i in range(len(all_sites)) if all_sites[i] in sites_id_nor2
        ]

        pred, obs = load_result(
            data_model.data_source.data_config.data_path['Temp'],
            self.test_epoch)
        pred = pred.reshape(pred.shape[0], pred.shape[1])
        obs = obs.reshape(pred.shape[0], pred.shape[1])
        inds = statError(obs, pred)
        inds_df = pd.DataFrame(inds)

        keys_nse = "NSE"
        xs = []
        ys = []
        cases_exps_legends_together = ["small_stor", "large_stor"]

        x1, y1 = ecdf(inds_df[keys_nse].iloc[idx_lst_nor1])
        xs.append(x1)
        ys.append(y1)

        x2, y2 = ecdf(inds_df[keys_nse].iloc[idx_lst_nor2])
        xs.append(x2)
        ys.append(y2)

        cases_exps = ["dam_exp12", "dam_exp11"]
        cases_exps_legends_separate = ["small_stor", "large_stor"]
        # cases_exps = ["dam_exp4", "dam_exp5", "dam_exp6"]
        # cases_exps = ["dam_exp1", "dam_exp2", "dam_exp3"]
        # cases_exps_legends = ["dam-lstm", "dam-with-natural-flow", "dam-with-kernel"]
        for case_exp in cases_exps:
            config_data_i = load_dataconfig_case_exp(case_exp)
            pred_i, obs_i = load_result(config_data_i.data_path['Temp'],
                                        self.test_epoch)
            pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1])
            obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1])
            inds_i = statError(obs_i, pred_i)
            x, y = ecdf(inds_i[keys_nse])
            xs.append(x)
            ys.append(y)

        plot_ecdfs(xs,
                   ys,
                   cases_exps_legends_together + cases_exps_legends_separate,
                   style=["together", "together", "separate", "separate"])
Exemplo n.º 21
0
    def test_zero_small_dor_basins_locations(self):
        conus_exps = self.exp_lst
        test_epoch = self.test_epoch
        inds_df, pred, obs = load_ensemble_result(self.config_file,
                                                  conus_exps,
                                                  test_epoch,
                                                  return_value=True)
        conus_config_data = load_dataconfig_case_exp(self.config_file,
                                                     conus_exps[0])
        conus_data_model = GagesModel.load_datamodel(
            conus_config_data.data_path["Temp"],
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        conus_sites = conus_data_model.t_s_dict["sites_id"]

        all_lat = conus_data_model.data_source.gage_dict["LAT_GAGE"]
        all_lon = conus_data_model.data_source.gage_dict["LNG_GAGE"]
        show_ind_key = "NSE"
        attr_lst = ["SLOPE_PCT", "ELEV_MEAN_M_BASIN"]
        attrs = conus_data_model.data_source.read_attr(conus_sites,
                                                       attr_lst,
                                                       is_return_dict=False)

        western_lon_idx = [i for i in range(all_lon.size) if all_lon[i] < -100]

        nse_range = [0, 1]
        idx_lst_nse = inds_df[(inds_df[show_ind_key] >= nse_range[0]) & (
            inds_df[show_ind_key] < nse_range[1])].index.tolist()
        idx_lst_nse = np.intersect1d(western_lon_idx, idx_lst_nse)

        # small dor
        source_data_dor1 = GagesSource.choose_some_basins(
            conus_config_data,
            conus_config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            DOR=-self.dor)

        # basins with dams
        source_data_withdams = GagesSource.choose_some_basins(
            conus_config_data,
            conus_config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            dam_num=[1, 10000])
        # basins without dams
        source_data_withoutdams = GagesSource.choose_some_basins(
            conus_config_data,
            conus_config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            dam_num=0)

        sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id']
        sites_id_withdams = source_data_withdams.all_configs[
            'flow_screen_gage_id']

        sites_id_nodam = source_data_withoutdams.all_configs[
            'flow_screen_gage_id']
        sites_id_smalldam = np.intersect1d(
            np.array(sites_id_dor1), np.array(sites_id_withdams)).tolist()

        idx_lst_nodam_in_conus = [
            i for i in range(len(conus_sites))
            if conus_sites[i] in sites_id_nodam
        ]
        idx_lst_smalldam_in_conus = [
            i for i in range(len(conus_sites))
            if conus_sites[i] in sites_id_smalldam
        ]

        type_1_index_lst = np.intersect1d(idx_lst_nodam_in_conus,
                                          idx_lst_nse).tolist()
        type_2_index_lst = np.intersect1d(idx_lst_smalldam_in_conus,
                                          idx_lst_nse).tolist()
        pd.DataFrame({
            "GAGE_ID": np.array(conus_sites)[type_1_index_lst]
        }).to_csv(
            os.path.join(conus_config_data.data_path["Out"],
                         "western-zero-dor-sites.csv"))
        pd.DataFrame({
            "GAGE_ID": np.array(conus_sites)[type_2_index_lst]
        }).to_csv(
            os.path.join(conus_config_data.data_path["Out"],
                         "western-small-dor-sites.csv"))
        frame = []
        df_type1 = pd.DataFrame({
            "type":
            np.full(len(type_1_index_lst), "zero-dor"),
            show_ind_key:
            inds_df[show_ind_key].values[type_1_index_lst],
            "lat":
            all_lat[type_1_index_lst],
            "lon":
            all_lon[type_1_index_lst],
            "slope":
            attrs[type_1_index_lst, 0],
            "elevation":
            attrs[type_1_index_lst, 1]
        })
        frame.append(df_type1)
        df_type2 = pd.DataFrame({
            "type":
            np.full(len(type_2_index_lst), "small-dor"),
            show_ind_key:
            inds_df[show_ind_key].values[type_2_index_lst],
            "lat":
            all_lat[type_2_index_lst],
            "lon":
            all_lon[type_2_index_lst],
            "slope":
            attrs[type_2_index_lst, 0],
            "elevation":
            attrs[type_2_index_lst, 1]
        })
        frame.append(df_type2)
        data_df = pd.concat(frame)
        idx_lst = [
            np.arange(len(type_1_index_lst)),
            np.arange(len(type_1_index_lst),
                      len(type_1_index_lst) + len(type_2_index_lst))
        ]
        plot_gages_map_and_scatter(data_df,
                                   [show_ind_key, "lat", "lon", "slope"],
                                   idx_lst,
                                   cmap_strs=["Reds", "Blues"],
                                   labels=["zero-dor", "small-dor"],
                                   scatter_label=[attr_lst[0], show_ind_key],
                                   wspace=2,
                                   hspace=1.5,
                                   legend_y=.8,
                                   sub_fig_ratio=[6, 4, 1])
        plt.tight_layout()
        plt.show()
Exemplo n.º 22
0
    def test_diff_dor(self):
        dor_1 = -self.dor
        dor_2 = self.dor
        test_epoch = self.test_epoch
        config_file = self.config_file

        conus_exps = ["basic_exp37"]
        pair1_exps = ["dam_exp1"]
        pair2_exps = ["nodam_exp7"]
        pair3_exps = ["dam_exp27"]
        nodam_exp_lst = ["nodam_exp1"]
        smalldam_exp_lst = [
            "dam_exp17"
        ]  # -0.003["dam_exp11"] -0.08["dam_exp17"] -1["dam_exp32"]
        largedam_exp_lst = [
            "dam_exp4"
        ]  # 0.003["dam_exp12"] 0.08["dam_exp18"] 1["dam_exp33"]
        pair1_config_data = load_dataconfig_case_exp(config_file,
                                                     pair1_exps[0])
        pair2_config_data = load_dataconfig_case_exp(config_file,
                                                     pair2_exps[0])
        pair3_config_data = load_dataconfig_case_exp(config_file,
                                                     pair3_exps[0])
        conus_config_data = load_dataconfig_case_exp(config_file,
                                                     conus_exps[0])

        conus_data_model = GagesModel.load_datamodel(
            conus_config_data.data_path["Temp"],
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        conus_sites = conus_data_model.t_s_dict["sites_id"]

        source_data_dor1 = GagesSource.choose_some_basins(
            conus_config_data,
            conus_config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            DOR=dor_1)
        source_data_dor2 = GagesSource.choose_some_basins(
            conus_config_data,
            conus_config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            DOR=dor_2)
        # basins with dams
        source_data_withdams = GagesSource.choose_some_basins(
            conus_config_data,
            conus_config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            dam_num=[1, 10000])
        # basins without dams
        source_data_withoutdams = GagesSource.choose_some_basins(
            conus_config_data,
            conus_config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            dam_num=0)

        sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id']
        sites_id_withdams = source_data_withdams.all_configs[
            'flow_screen_gage_id']

        sites_id_nodam = source_data_withoutdams.all_configs[
            'flow_screen_gage_id']
        sites_id_smalldam = np.intersect1d(
            np.array(sites_id_dor1), np.array(sites_id_withdams)).tolist()
        sites_id_largedam = source_data_dor2.all_configs['flow_screen_gage_id']

        # sites_id_nolargedam = np.sort(np.union1d(np.array(sites_id_nodam), np.array(sites_id_largedam))).tolist()
        # pair1_sites = np.sort(np.intersect1d(np.array(sites_id_dor1), np.array(conus_sites))).tolist()
        # pair2_sites = np.sort(np.intersect1d(np.array(sites_id_nolargedam), np.array(conus_sites))).tolist()
        # pair3_sites = np.sort(np.intersect1d(np.array(sites_id_withdams), np.array(conus_sites))).tolist()

        pair1_data_model = GagesModel.load_datamodel(
            pair1_config_data.data_path["Temp"],
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        pair1_sites = pair1_data_model.t_s_dict["sites_id"]
        pair2_data_model = GagesModel.load_datamodel(
            pair2_config_data.data_path["Temp"],
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        pair2_sites = pair2_data_model.t_s_dict["sites_id"]
        pair3_data_model = GagesModel.load_datamodel(
            pair3_config_data.data_path["Temp"],
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        pair3_sites = pair3_data_model.t_s_dict["sites_id"]

        idx_lst_nodam_in_pair1 = [
            i for i in range(len(pair1_sites))
            if pair1_sites[i] in sites_id_nodam
        ]
        idx_lst_nodam_in_pair2 = [
            i for i in range(len(pair2_sites))
            if pair2_sites[i] in sites_id_nodam
        ]
        idx_lst_nodam_in_pair3 = [
            i for i in range(len(pair3_sites))
            if pair3_sites[i] in sites_id_nodam
        ]
        idx_lst_nodam_in_conus = [
            i for i in range(len(conus_sites))
            if conus_sites[i] in sites_id_nodam
        ]

        idx_lst_smalldam_in_pair1 = [
            i for i in range(len(pair1_sites))
            if pair1_sites[i] in sites_id_smalldam
        ]
        idx_lst_smalldam_in_pair2 = [
            i for i in range(len(pair2_sites))
            if pair2_sites[i] in sites_id_smalldam
        ]
        idx_lst_smalldam_in_pair3 = [
            i for i in range(len(pair3_sites))
            if pair3_sites[i] in sites_id_smalldam
        ]
        idx_lst_smalldam_in_conus = [
            i for i in range(len(conus_sites))
            if conus_sites[i] in sites_id_smalldam
        ]

        idx_lst_largedam_in_pair1 = [
            i for i in range(len(pair1_sites))
            if pair1_sites[i] in sites_id_largedam
        ]
        idx_lst_largedam_in_pair2 = [
            i for i in range(len(pair2_sites))
            if pair2_sites[i] in sites_id_largedam
        ]
        idx_lst_largedam_in_pair3 = [
            i for i in range(len(pair3_sites))
            if pair3_sites[i] in sites_id_largedam
        ]
        idx_lst_largedam_in_conus = [
            i for i in range(len(conus_sites))
            if conus_sites[i] in sites_id_largedam
        ]

        print("multi box")
        inds_df_pair1 = load_ensemble_result(config_file, pair1_exps,
                                             test_epoch)
        inds_df_pair2 = load_ensemble_result(config_file, pair2_exps,
                                             test_epoch)
        inds_df_pair3 = load_ensemble_result(config_file, pair3_exps,
                                             test_epoch)
        inds_df_conus = load_ensemble_result(config_file, conus_exps,
                                             test_epoch)

        fig = plt.figure(figsize=(15, 8))
        gs = gridspec.GridSpec(1, 3)
        keys_nse = "NSE"
        color_chosen = ["Greens", "Blues", "Reds"]
        median_loc = 0.015
        decimal_places = 2
        sns.despine()
        sns.set(font_scale=1.5)

        attr_nodam = "zero_dor"
        cases_exps_legends_nodam = [
            "LSTM-Z", "LSTM-ZS", "LSTM-ZL", "LSTM-CONUS"
        ]
        frames_nodam = []
        inds_df_nodam = load_ensemble_result(config_file, nodam_exp_lst,
                                             test_epoch)
        df_nodam_alone = pd.DataFrame({
            attr_nodam:
            np.full([inds_df_nodam.shape[0]], cases_exps_legends_nodam[0]),
            keys_nse:
            inds_df_nodam[keys_nse]
        })
        frames_nodam.append(df_nodam_alone)

        df_nodam_in_pair1 = pd.DataFrame({
            attr_nodam:
            np.full([
                inds_df_pair1[keys_nse].iloc[idx_lst_nodam_in_pair1].shape[0]
            ], cases_exps_legends_nodam[1]),
            keys_nse:
            inds_df_pair1[keys_nse].iloc[idx_lst_nodam_in_pair1]
        })
        frames_nodam.append(df_nodam_in_pair1)

        df_nodam_in_pair2 = pd.DataFrame({
            attr_nodam:
            np.full([
                inds_df_pair2[keys_nse].iloc[idx_lst_nodam_in_pair2].shape[0]
            ], cases_exps_legends_nodam[2]),
            keys_nse:
            inds_df_pair2[keys_nse].iloc[idx_lst_nodam_in_pair2]
        })
        frames_nodam.append(df_nodam_in_pair2)

        df_nodam_in_conus = pd.DataFrame({
            attr_nodam:
            np.full([
                inds_df_conus[keys_nse].iloc[idx_lst_nodam_in_conus].shape[0]
            ], cases_exps_legends_nodam[3]),
            keys_nse:
            inds_df_conus[keys_nse].iloc[idx_lst_nodam_in_conus]
        })
        frames_nodam.append(df_nodam_in_conus)
        result_nodam = pd.concat(frames_nodam)
        ax1 = plt.subplot(gs[0])
        # ax1.set_title("(a)")
        ax1.set_xticklabels(ax1.get_xticklabels(), rotation=30)
        ax1.set_ylim([0, 1])
        sns.boxplot(ax=ax1,
                    x=attr_nodam,
                    y=keys_nse,
                    data=result_nodam,
                    showfliers=False,
                    palette=color_chosen[0])
        medians_nodam = result_nodam.groupby(
            [attr_nodam], sort=False)[keys_nse].median().values
        median_labels_nodam = [
            str(np.round(s, decimal_places)) for s in medians_nodam
        ]
        pos1 = range(len(medians_nodam))
        for tick, label in zip(pos1, ax1.get_xticklabels()):
            ax1.text(pos1[tick],
                     medians_nodam[tick] + median_loc,
                     median_labels_nodam[tick],
                     horizontalalignment='center',
                     size='x-small',
                     weight='semibold')

        attr_smalldam = "small_dor"
        cases_exps_legends_smalldam = [
            "LSTM-S", "LSTM-ZS", "LSTM-SL", "LSTM-CONUS"
        ]
        frames_smalldam = []
        inds_df_smalldam = load_ensemble_result(config_file, smalldam_exp_lst,
                                                test_epoch)
        df_smalldam_alone = pd.DataFrame({
            attr_smalldam:
            np.full([inds_df_smalldam.shape[0]],
                    cases_exps_legends_smalldam[0]),
            keys_nse:
            inds_df_smalldam[keys_nse]
        })
        frames_smalldam.append(df_smalldam_alone)

        df_smalldam_in_pair1 = pd.DataFrame({
            attr_smalldam:
            np.full([
                inds_df_pair1[keys_nse].iloc[idx_lst_smalldam_in_pair1].
                shape[0]
            ], cases_exps_legends_smalldam[1]),
            keys_nse:
            inds_df_pair1[keys_nse].iloc[idx_lst_smalldam_in_pair1]
        })
        frames_smalldam.append(df_smalldam_in_pair1)

        df_smalldam_in_pair3 = pd.DataFrame({
            attr_smalldam:
            np.full([
                inds_df_pair3[keys_nse].iloc[idx_lst_smalldam_in_pair3].
                shape[0]
            ], cases_exps_legends_smalldam[2]),
            keys_nse:
            inds_df_pair3[keys_nse].iloc[idx_lst_smalldam_in_pair3]
        })
        frames_smalldam.append(df_smalldam_in_pair3)

        df_smalldam_in_conus = pd.DataFrame({
            attr_smalldam:
            np.full([
                inds_df_conus[keys_nse].iloc[idx_lst_smalldam_in_conus].
                shape[0]
            ], cases_exps_legends_smalldam[3]),
            keys_nse:
            inds_df_conus[keys_nse].iloc[idx_lst_smalldam_in_conus]
        })
        frames_smalldam.append(df_smalldam_in_conus)
        result_smalldam = pd.concat(frames_smalldam)
        ax2 = plt.subplot(gs[1])
        # ax2.set_title("(b)")
        ax2.set_xticklabels(ax2.get_xticklabels(), rotation=30)
        ax2.set_ylim([0, 1])
        ax2.set(ylabel=None)
        sns.boxplot(ax=ax2,
                    x=attr_smalldam,
                    y=keys_nse,
                    data=result_smalldam,
                    showfliers=False,
                    palette=color_chosen[1])
        medians_smalldam = result_smalldam.groupby(
            [attr_smalldam], sort=False)[keys_nse].median().values
        median_labels_smalldam = [
            str(np.round(s, decimal_places)) for s in medians_smalldam
        ]
        pos2 = range(len(medians_smalldam))
        for tick, label in zip(pos2, ax2.get_xticklabels()):
            ax2.text(pos2[tick],
                     medians_smalldam[tick] + median_loc,
                     median_labels_smalldam[tick],
                     horizontalalignment='center',
                     size='x-small',
                     weight='semibold')

        attr_largedam = "large_dor"
        cases_exps_legends_largedam = [
            "LSTM-L", "LSTM-ZL", "LSTM-SL", "LSTM-CONUS"
        ]
        frames_largedam = []
        inds_df_largedam = load_ensemble_result(config_file, largedam_exp_lst,
                                                test_epoch)
        df_largedam_alone = pd.DataFrame({
            attr_largedam:
            np.full([inds_df_largedam.shape[0]],
                    cases_exps_legends_largedam[0]),
            keys_nse:
            inds_df_largedam[keys_nse]
        })
        frames_largedam.append(df_largedam_alone)

        df_largedam_in_pair2 = pd.DataFrame({
            attr_largedam:
            np.full([
                inds_df_pair2[keys_nse].iloc[idx_lst_largedam_in_pair2].
                shape[0]
            ], cases_exps_legends_largedam[1]),
            keys_nse:
            inds_df_pair2[keys_nse].iloc[idx_lst_largedam_in_pair2]
        })
        frames_largedam.append(df_largedam_in_pair2)

        df_largedam_in_pair3 = pd.DataFrame({
            attr_largedam:
            np.full([
                inds_df_pair3[keys_nse].iloc[idx_lst_largedam_in_pair3].
                shape[0]
            ], cases_exps_legends_largedam[2]),
            keys_nse:
            inds_df_pair3[keys_nse].iloc[idx_lst_largedam_in_pair3]
        })
        frames_largedam.append(df_largedam_in_pair3)

        df_largedam_in_conus = pd.DataFrame({
            attr_largedam:
            np.full([
                inds_df_conus[keys_nse].iloc[idx_lst_largedam_in_conus].
                shape[0]
            ], cases_exps_legends_largedam[3]),
            keys_nse:
            inds_df_conus[keys_nse].iloc[idx_lst_largedam_in_conus]
        })
        frames_largedam.append(df_largedam_in_conus)
        result_largedam = pd.concat(frames_largedam)
        ax3 = plt.subplot(gs[2])
        # ax3.set_title("(c)")
        ax3.set_xticklabels(ax3.get_xticklabels(), rotation=30)
        ax3.set_ylim([0, 1])
        ax3.set(ylabel=None)
        sns.boxplot(ax=ax3,
                    x=attr_largedam,
                    y=keys_nse,
                    data=result_largedam,
                    showfliers=False,
                    palette=color_chosen[2])
        medians_largedam = result_largedam.groupby(
            [attr_largedam], sort=False)[keys_nse].median().values
        median_labels_largedam = [
            str(np.round(s, decimal_places)) for s in medians_largedam
        ]
        pos3 = range(len(medians_largedam))
        for tick, label in zip(pos3, ax3.get_xticklabels()):
            ax3.text(pos3[tick],
                     medians_largedam[tick] + median_loc,
                     median_labels_largedam[tick],
                     horizontalalignment='center',
                     size='x-small',
                     weight='semibold')
        # sns.despine()
        plt.tight_layout()
        plt.show()
Exemplo n.º 23
0
    def test_diff_dor_fig2_in_the_paper(self):
        data_model = GagesModel.load_datamodel(
            self.config_data.data_path["Temp"],
            data_source_file_name='data_source.txt',
            stat_file_name='Statistics.json',
            flow_file_name='flow.npy',
            forcing_file_name='forcing.npy',
            attr_file_name='attr.npy',
            f_dict_file_name='dictFactorize.json',
            var_dict_file_name='dictAttribute.json',
            t_s_dict_file_name='dictTimeSpace.json')
        config_data = self.config_data
        config_file = self.config_file
        test_epoch = self.test_epoch
        exp_lst = self.exp_lst
        figure_dpi = self.FIGURE_DPI
        inds_df, pred_mean, obs_mean = load_ensemble_result(config_file,
                                                            exp_lst,
                                                            test_epoch,
                                                            return_value=True)
        diversion_yes = True
        diversion_no = False
        source_data_diversion = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            diversion=diversion_yes)
        source_data_nodivert = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            diversion=diversion_no)
        sites_id_nodivert = source_data_nodivert.all_configs[
            'flow_screen_gage_id']
        sites_id_diversion = source_data_diversion.all_configs[
            'flow_screen_gage_id']

        dor_1 = -self.dor
        dor_2 = self.dor
        source_data_dor1 = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            DOR=dor_1)
        source_data_dor2 = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            DOR=dor_2)
        sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id']
        sites_id_dor2 = source_data_dor2.all_configs['flow_screen_gage_id']

        # basins with dams
        source_data_withdams = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            dam_num=[1, 100000])
        sites_id_withdams = source_data_withdams.all_configs[
            'flow_screen_gage_id']
        sites_id_dor1 = np.intersect1d(np.array(sites_id_dor1),
                                       np.array(sites_id_withdams)).tolist()

        no_divert_small_dor = np.intersect1d(sites_id_nodivert, sites_id_dor1)
        no_divert_large_dor = np.intersect1d(sites_id_nodivert, sites_id_dor2)
        diversion_small_dor = np.intersect1d(sites_id_diversion, sites_id_dor1)
        diversion_large_dor = np.intersect1d(sites_id_diversion, sites_id_dor2)

        all_sites = data_model.t_s_dict["sites_id"]
        idx_lst_nodivert_smalldor = [
            i for i in range(len(all_sites))
            if all_sites[i] in no_divert_small_dor
        ]
        idx_lst_nodivert_largedor = [
            i for i in range(len(all_sites))
            if all_sites[i] in no_divert_large_dor
        ]
        idx_lst_diversion_smalldor = [
            i for i in range(len(all_sites))
            if all_sites[i] in diversion_small_dor
        ]
        idx_lst_diversion_largedor = [
            i for i in range(len(all_sites))
            if all_sites[i] in diversion_large_dor
        ]

        keys_nse = "NSE"
        xs = []
        ys = []
        cases_exps_legends_together = [
            "not_diverted_small_dor", "not_diverted_large_dor",
            "diversion_small_dor", "diversion_large_dor", "CONUS"
        ]

        x1, y1 = ecdf(inds_df[keys_nse].iloc[idx_lst_nodivert_smalldor])
        xs.append(x1)
        ys.append(y1)

        x2, y2 = ecdf(inds_df[keys_nse].iloc[idx_lst_nodivert_largedor])
        xs.append(x2)
        ys.append(y2)

        x3, y3 = ecdf(inds_df[keys_nse].iloc[idx_lst_diversion_smalldor])
        xs.append(x3)
        ys.append(y3)

        x4, y4 = ecdf(inds_df[keys_nse].iloc[idx_lst_diversion_largedor])
        xs.append(x4)
        ys.append(y4)

        x_conus, y_conus = ecdf(inds_df[keys_nse])
        xs.append(x_conus)
        ys.append(y_conus)
        hydro_logger.info(
            "The median NSEs of all five curves (%s) are \n %.2f, %.2f, %.2f, %.2f, %.2f",
            cases_exps_legends_together, np.median(x1), np.median(x2),
            np.median(x3), np.median(x4), np.median(x_conus))
        # plot_ecdfs_matplot(xs, ys, cases_exps_legends_together,
        #                    colors=["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "grey"],
        #                    dash_lines=[False, False, False, False, True], x_str="NSE", y_str="CDF")
        # plot using two linestyles and two colors for dor and diversion.
        # plot_ecdfs(xs, ys, cases_exps_legends_together, x_str="NSE", y_str="CDF")
        # define color scheme and line style
        colors = ["#1f77b4", "#d62728"]
        linestyles = ['-', "--"]
        markers = ["", "."]

        fig = plt.figure(figsize=(8, 6))
        axes = fig.add_axes([0.1, 0.1, 0.8, 0.8])
        # for i, marker in enumerate(markers):
        for i, linestyle in enumerate(linestyles):
            for j, color in enumerate(colors):
                plt.plot(
                    xs[i * 2 + j],
                    ys[i * 2 + j],
                    color=color,
                    ls=linestyle,  # marker=marker,
                    label=cases_exps_legends_together[i * 2 + j])
        line_i, = axes.plot(x_conus,
                            y_conus,
                            color="grey",
                            label=cases_exps_legends_together[4])
        line_i.set_dashes([2, 2, 10, 2])

        x_str = "NSE"
        y_str = "CDF"
        x_lim = (0, 1)
        y_lim = (0, 1)
        x_interval = 0.1
        y_interval = 0.1
        plt.xlabel(x_str, fontsize=18)
        plt.ylabel(y_str, fontsize=18)
        axes.set_xlim(x_lim[0], x_lim[1])
        axes.set_ylim(y_lim[0], y_lim[1])
        # set x y number font size
        plt.xticks(np.arange(x_lim[0], x_lim[1] + x_lim[1] / 100, x_interval),
                   fontsize=16)
        plt.yticks(np.arange(y_lim[0], y_lim[1] + y_lim[1] / 100, y_interval),
                   fontsize=16)
        plt.grid()
        # Hide the right and top spines
        axes.spines['right'].set_visible(False)
        axes.spines['top'].set_visible(False)
        axes.legend()
        plt.legend(prop={'size': 16})
        plt.savefig(os.path.join(config_data.data_path["Out"],
                                 'new_dor_divert_comp_matplotlib.png'),
                    dpi=figure_dpi,
                    bbox_inches="tight")
        plt.show()
Exemplo n.º 24
0
    def update_data_model(cls,
                          config_data,
                          data_model_origin,
                          sites_id_update=None,
                          t_range_update=None,
                          data_attr_update=False,
                          train_stat_dict=None,
                          screen_basin_area_huc4=False):
        t_s_dict_origin = data_model_origin.t_s_dict
        data_flow_origin = data_model_origin.data_flow
        data_forcing_origin = data_model_origin.data_forcing
        data_attr_origin = data_model_origin.data_attr
        var_dict_origin = data_model_origin.var_dict
        f_dict_origin = data_model_origin.f_dict
        stat_dict_origin = data_model_origin.stat_dict
        if sites_id_update is not None:
            t_s_dict = {}
            t_range_origin_cpy = t_s_dict_origin["t_final_range"].copy()
            sites_id_origin_cpy = t_s_dict_origin["sites_id"].copy()
            sites_id_new = sites_id_update
            assert (all(
                x < y
                for x, y in zip(sites_id_origin_cpy, sites_id_origin_cpy[1:])))
            assert (all(x < y for x, y in zip(sites_id_new, sites_id_new[1:])))
            sites_id = np.intersect1d(sites_id_origin_cpy, sites_id_new)
            assert sites_id.size > 0
            new_source_data = GagesSource.choose_some_basins(
                config_data,
                t_range_origin_cpy,
                screen_basin_area_huc4=screen_basin_area_huc4,
                sites_id=sites_id.tolist())
            t_s_dict["t_final_range"] = t_range_origin_cpy
            t_s_dict["sites_id"] = sites_id.tolist()
            chosen_idx = [
                i for i in range(len(sites_id_origin_cpy))
                if sites_id_origin_cpy[i] in sites_id
            ]
            data_flow = data_flow_origin[chosen_idx, :]
            data_forcing = data_forcing_origin[chosen_idx, :, :]
            data_attr = data_attr_origin[chosen_idx, :]
        else:
            t_range_origin_cpy = t_s_dict_origin["t_final_range"].copy()
            t_s_dict = copy.deepcopy(t_s_dict_origin)
            new_source_data = GagesSource.choose_some_basins(
                config_data,
                t_range_origin_cpy,
                screen_basin_area_huc4=screen_basin_area_huc4)
            data_flow = data_flow_origin.copy()
            data_forcing = data_forcing_origin.copy()
            data_attr = data_attr_origin.copy()
        if data_attr_update:
            attr_lst = new_source_data.all_configs.get("attr_chosen")
            data_attr, var_dict, f_dict = new_source_data.read_attr(
                t_s_dict["sites_id"], attr_lst)
        else:
            var_dict = var_dict_origin.copy()
            f_dict = f_dict_origin.copy()
        data_model = cls(new_source_data, data_flow, data_forcing, data_attr,
                         var_dict, f_dict, stat_dict_origin, t_s_dict)
        if t_range_update is not None:
            sites_id_temp = data_model.t_s_dict['sites_id'].copy()
            t_range = t_range_update.copy()
            stat_dict_temp = {}
            t_s_dict_temp = {}
            start_index = int(
                (np.datetime64(t_range[0]) -
                 np.datetime64(data_model.t_s_dict["t_final_range"][0])) /
                np.timedelta64(1, 'D'))
            assert start_index >= 0
            t_lst_temp = hydro_time.t_range_days(t_range)
            end_index = start_index + t_lst_temp.size
            data_flow = data_model.data_flow[:, start_index:end_index]
            data_forcing = data_model.data_forcing[:, start_index:end_index, :]

            data_model = cls(new_source_data, data_flow, data_forcing,
                             data_attr, var_dict, f_dict, stat_dict_temp,
                             t_s_dict_temp)
            t_s_dict_temp['sites_id'] = sites_id_temp
            t_s_dict_temp['t_final_range'] = t_range
            data_model.t_s_dict = t_s_dict_temp
            data_model.data_source.t_range = t_range
        if not data_model.data_source.gage_dict["STAID"].tolist(
        ) == data_model.t_s_dict['sites_id']:
            gage_dict_new = dict()
            usgs_all_sites = data_model.data_source.gage_dict["STAID"]
            sites_chosen = np.zeros(usgs_all_sites.shape[0])
            usgs_ids = data_model.t_s_dict['sites_id']
            sites_index = np.where(np.in1d(usgs_all_sites, usgs_ids))[0]
            sites_chosen[sites_index] = 1
            for key, value in data_model.data_source.gage_dict.items():
                value_new = np.array([
                    value[i] for i in range(len(sites_chosen))
                    if sites_chosen[i] > 0
                ])
                gage_dict_new[key] = value_new
            data_model.data_source.gage_dict = gage_dict_new
            assert (np.array(usgs_ids) == gage_dict_new["STAID"]).all()
        if train_stat_dict is None:
            stat_dict_temp = data_model.cal_stat_all()
        else:
            stat_dict_temp = train_stat_dict
        data_model.stat_dict = stat_dict_temp

        return data_model
Exemplo n.º 25
0
def synergy_ecoregion(args):
    update_cfg(cfg, args)
    cache = cfg.CACHE.STATE
    train_mode = cfg.TRAIN_MODE
    test_epoch = cfg.TEST_EPOCH
    config_data = GagesConfig(cfg)
    eco_names = [("ECO2_CODE", 5.2), ("ECO2_CODE", 5.3), ("ECO2_CODE", 6.2),
                 ("ECO2_CODE", 7.1), ("ECO2_CODE", 8.1), ("ECO2_CODE", 8.2),
                 ("ECO2_CODE", 8.3), ("ECO2_CODE", 8.4), ("ECO2_CODE", 8.5),
                 ("ECO2_CODE", 9.2), ("ECO2_CODE", 9.3), ("ECO2_CODE", 9.4),
                 ("ECO2_CODE", 9.5), ("ECO2_CODE", 9.6), ("ECO2_CODE", 10.1),
                 ("ECO2_CODE", 10.2), ("ECO2_CODE", 10.4), ("ECO2_CODE", 11.1),
                 ("ECO2_CODE", 12.1), ("ECO2_CODE", 13.1)]

    quick_data_dir = os.path.join(config_data.data_path["DB"], "quickdata")
    data_dir = os.path.join(quick_data_dir, "conus-all_90-10_nan-0.0_00-1.0")
    data_model_train = GagesModel.load_datamodel(
        data_dir,
        data_source_file_name='data_source.txt',
        stat_file_name='Statistics.json',
        flow_file_name='flow.npy',
        forcing_file_name='forcing.npy',
        attr_file_name='attr.npy',
        f_dict_file_name='dictFactorize.json',
        var_dict_file_name='dictAttribute.json',
        t_s_dict_file_name='dictTimeSpace.json')
    data_model_test = GagesModel.load_datamodel(
        data_dir,
        data_source_file_name='test_data_source.txt',
        stat_file_name='test_Statistics.json',
        flow_file_name='test_flow.npy',
        forcing_file_name='test_forcing.npy',
        attr_file_name='test_attr.npy',
        f_dict_file_name='test_dictFactorize.json',
        var_dict_file_name='test_dictAttribute.json',
        t_s_dict_file_name='test_dictTimeSpace.json')

    for eco_name in eco_names:
        source_data = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            ecoregion=eco_name)
        sites_id = source_data.all_configs['flow_screen_gage_id']
        sites_id_inter = np.intersect1d(data_model_train.t_s_dict["sites_id"],
                                        sites_id)
        if sites_id_inter.size < 1:
            continue
        config_data = GagesConfig.set_subdir(cfg, str(eco_name[1]))
        gages_model_train = GagesModel.update_data_model(
            config_data,
            data_model_train,
            sites_id_update=sites_id,
            data_attr_update=True,
            screen_basin_area_huc4=False)
        gages_model_test = GagesModel.update_data_model(
            config_data,
            data_model_test,
            sites_id_update=sites_id,
            data_attr_update=True,
            train_stat_dict=gages_model_train.stat_dict,
            screen_basin_area_huc4=False)
        if cache:
            save_datamodel(gages_model_train,
                           data_source_file_name='data_source.txt',
                           stat_file_name='Statistics.json',
                           flow_file_name='flow',
                           forcing_file_name='forcing',
                           attr_file_name='attr',
                           f_dict_file_name='dictFactorize.json',
                           var_dict_file_name='dictAttribute.json',
                           t_s_dict_file_name='dictTimeSpace.json')
            save_datamodel(gages_model_test,
                           data_source_file_name='test_data_source.txt',
                           stat_file_name='test_Statistics.json',
                           flow_file_name='test_flow',
                           forcing_file_name='test_forcing',
                           attr_file_name='test_attr',
                           f_dict_file_name='test_dictFactorize.json',
                           var_dict_file_name='test_dictAttribute.json',
                           t_s_dict_file_name='test_dictTimeSpace.json')
            print("save ecoregion " + str(eco_name[1]) + " data model")

        with torch.cuda.device(0):
            if train_mode:
                master_train(gages_model_train)
            pred, obs = master_test(gages_model_test, epoch=test_epoch)
            basin_area = gages_model_test.data_source.read_attr(
                gages_model_test.t_s_dict["sites_id"], ['DRAIN_SQKM'],
                is_return_dict=False)
            mean_prep = gages_model_test.data_source.read_attr(
                gages_model_test.t_s_dict["sites_id"], ['PPTAVG_BASIN'],
                is_return_dict=False)
            mean_prep = mean_prep / 365 * 10
            pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False)
            obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False)
            save_result(
                gages_model_test.data_source.data_config.data_path['Temp'],
                test_epoch, pred, obs)
Exemplo n.º 26
0
    def test_siminv_data_temp(self):
        quick_data_dir = os.path.join(self.config_data_natflow.data_path["DB"],
                                      "quickdata")
        # data_dir = os.path.join(quick_data_dir, "conus-all_85-05_nan-0.1_00-1.0")
        data_dir = os.path.join(quick_data_dir,
                                "conus-all_90-10_nan-0.0_00-1.0")
        data_model_8595 = GagesModel.load_datamodel(
            data_dir,
            data_source_file_name='data_source.txt',
            stat_file_name='Statistics.json',
            flow_file_name='flow.npy',
            forcing_file_name='forcing.npy',
            attr_file_name='attr.npy',
            f_dict_file_name='dictFactorize.json',
            var_dict_file_name='dictAttribute.json',
            t_s_dict_file_name='dictTimeSpace.json')
        data_model_9505 = GagesModel.load_datamodel(
            data_dir,
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        conus_sites_id = data_model_8595.t_s_dict["sites_id"]
        nomajordam_source_data = GagesSource.choose_some_basins(
            self.config_data_natflow,
            self.config_data_natflow.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            major_dam_num=0)
        nomajordam_sites_id = nomajordam_source_data.all_configs[
            'flow_screen_gage_id']
        nomajordam_in_conus = np.intersect1d(conus_sites_id,
                                             nomajordam_sites_id)
        majordam_source_data = GagesSource.choose_some_basins(
            self.config_data_natflow,
            self.config_data_natflow.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            major_dam_num=[1, 2000])
        majordam_sites_id = majordam_source_data.all_configs[
            'flow_screen_gage_id']
        majordam_in_conus = np.intersect1d(conus_sites_id, majordam_sites_id)

        gages_model_train_natflow = GagesModel.update_data_model(
            self.config_data_natflow,
            data_model_8595,
            sites_id_update=nomajordam_in_conus,
            data_attr_update=True,
            screen_basin_area_huc4=False)
        gages_model_test_natflow = GagesModel.update_data_model(
            self.config_data_natflow,
            data_model_9505,
            sites_id_update=nomajordam_in_conus,
            data_attr_update=True,
            train_stat_dict=gages_model_train_natflow.stat_dict,
            screen_basin_area_huc4=False)

        gages_model_train_lstm = GagesModel.update_data_model(
            self.config_data_lstm,
            data_model_8595,
            sites_id_update=majordam_in_conus,
            data_attr_update=True,
            screen_basin_area_huc4=False)

        gages_model_test_lstm = GagesModel.update_data_model(
            self.config_data_lstm,
            data_model_9505,
            sites_id_update=majordam_in_conus,
            data_attr_update=True,
            train_stat_dict=gages_model_train_lstm.stat_dict,
            screen_basin_area_huc4=False)

        save_datamodel(gages_model_train_natflow,
                       "1",
                       data_source_file_name='data_source.txt',
                       stat_file_name='Statistics.json',
                       flow_file_name='flow',
                       forcing_file_name='forcing',
                       attr_file_name='attr',
                       f_dict_file_name='dictFactorize.json',
                       var_dict_file_name='dictAttribute.json',
                       t_s_dict_file_name='dictTimeSpace.json')
        save_datamodel(gages_model_test_natflow,
                       "1",
                       data_source_file_name='test_data_source.txt',
                       stat_file_name='test_Statistics.json',
                       flow_file_name='test_flow',
                       forcing_file_name='test_forcing',
                       attr_file_name='test_attr',
                       f_dict_file_name='test_dictFactorize.json',
                       var_dict_file_name='test_dictAttribute.json',
                       t_s_dict_file_name='test_dictTimeSpace.json')
        save_datamodel(gages_model_train_lstm,
                       "2",
                       data_source_file_name='data_source.txt',
                       stat_file_name='Statistics.json',
                       flow_file_name='flow',
                       forcing_file_name='forcing',
                       attr_file_name='attr',
                       f_dict_file_name='dictFactorize.json',
                       var_dict_file_name='dictAttribute.json',
                       t_s_dict_file_name='dictTimeSpace.json')
        save_datamodel(gages_model_test_lstm,
                       "2",
                       data_source_file_name='test_data_source.txt',
                       stat_file_name='test_Statistics.json',
                       flow_file_name='test_flow',
                       forcing_file_name='test_forcing',
                       attr_file_name='test_attr',
                       f_dict_file_name='test_dictFactorize.json',
                       var_dict_file_name='test_dictAttribute.json',
                       t_s_dict_file_name='test_dictTimeSpace.json')
        print("read and save data model")
Exemplo n.º 27
0
def dam_lstm(args):
    update_cfg(cfg, args)
    random_seed = cfg.RANDOM_SEED
    test_epoch = cfg.TEST_EPOCH
    gpu_num = cfg.CTX
    train_mode = cfg.TRAIN_MODE
    dor = cfg.GAGES.attrScreenParams.DOR
    cache = cfg.CACHE.STATE
    print("train and test in basins with dams: \n")
    config_data = GagesConfig(cfg)

    source_data_dor1 = GagesSource.choose_some_basins(
        config_data,
        config_data.model_dict["data"]["tRangeTrain"],
        screen_basin_area_huc4=False,
        DOR=dor)
    # basins with dams
    source_data_withdams = GagesSource.choose_some_basins(
        config_data,
        config_data.model_dict["data"]["tRangeTrain"],
        screen_basin_area_huc4=False,
        dam_num=[1, 100000])

    sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id']
    sites_id_withdams = source_data_withdams.all_configs['flow_screen_gage_id']
    sites_id_chosen = np.intersect1d(np.array(sites_id_dor1),
                                     np.array(sites_id_withdams)).tolist()

    gages_model = GagesModels(config_data,
                              screen_basin_area_huc4=False,
                              sites_id=sites_id_chosen)
    gages_model_train = gages_model.data_model_train
    gages_model_test = gages_model.data_model_test
    if cache:
        save_datamodel(gages_model_train,
                       data_source_file_name='data_source.txt',
                       stat_file_name='Statistics.json',
                       flow_file_name='flow',
                       forcing_file_name='forcing',
                       attr_file_name='attr',
                       f_dict_file_name='dictFactorize.json',
                       var_dict_file_name='dictAttribute.json',
                       t_s_dict_file_name='dictTimeSpace.json')
        save_datamodel(gages_model_test,
                       data_source_file_name='test_data_source.txt',
                       stat_file_name='test_Statistics.json',
                       flow_file_name='test_flow',
                       forcing_file_name='test_forcing',
                       attr_file_name='test_attr',
                       f_dict_file_name='test_dictFactorize.json',
                       var_dict_file_name='test_dictAttribute.json',
                       t_s_dict_file_name='test_dictTimeSpace.json')
    with torch.cuda.device(gpu_num):
        if train_mode:
            master_train(gages_model_train, random_seed=random_seed)
        pred, obs = master_test(gages_model_test, epoch=test_epoch)
        basin_area = gages_model_test.data_source.read_attr(
            gages_model_test.t_s_dict["sites_id"], ['DRAIN_SQKM'],
            is_return_dict=False)
        mean_prep = gages_model_test.data_source.read_attr(
            gages_model_test.t_s_dict["sites_id"], ['PPTAVG_BASIN'],
            is_return_dict=False)
        mean_prep = mean_prep / 365 * 10
        pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False)
        obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False)
        save_result(gages_model_test.data_source.data_config.data_path['Temp'],
                    test_epoch, pred, obs)
Exemplo n.º 28
0
    def test_some_reservoirs(self):
        print("train and test in basins with different combination: \n")
        dam_plan = self.dam_plan
        config_data = self.config_data
        test_epoch = self.test_epoch
        if dam_plan == 2:
            dam_num = 0
            dor = self.config_file.GAGES.attrScreenParams.DOR
            source_data_dor1 = GagesSource.choose_some_basins(
                config_data,
                config_data.model_dict["data"]["tRangeTrain"],
                screen_basin_area_huc4=False,
                DOR=dor)
            # basins with dams
            source_data_withoutdams = GagesSource.choose_some_basins(
                config_data,
                config_data.model_dict["data"]["tRangeTrain"],
                screen_basin_area_huc4=False,
                dam_num=dam_num)

            sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id']
            sites_id_withoutdams = source_data_withoutdams.all_configs[
                'flow_screen_gage_id']
            sites_id_chosen = np.sort(
                np.union1d(np.array(sites_id_dor1),
                           np.array(sites_id_withoutdams))).tolist()
        elif dam_plan == 3:
            dam_num = [1, 100000]
            # basins with dams
            source_data_withdams = GagesSource.choose_some_basins(
                config_data,
                config_data.model_dict["data"]["tRangeTrain"],
                screen_basin_area_huc4=False,
                dam_num=dam_num)
            sites_id_chosen = source_data_withdams.all_configs[
                'flow_screen_gage_id']
        else:
            print("wrong choice")
            sites_id_chosen = None
        gages_model = GagesModels(config_data,
                                  screen_basin_area_huc4=False,
                                  sites_id=sites_id_chosen)
        gages_model_train = gages_model.data_model_train
        gages_model_test = gages_model.data_model_test
        if self.cache:
            save_datamodel(gages_model_train,
                           data_source_file_name='data_source.txt',
                           stat_file_name='Statistics.json',
                           flow_file_name='flow',
                           forcing_file_name='forcing',
                           attr_file_name='attr',
                           f_dict_file_name='dictFactorize.json',
                           var_dict_file_name='dictAttribute.json',
                           t_s_dict_file_name='dictTimeSpace.json')
            save_datamodel(gages_model_test,
                           data_source_file_name='test_data_source.txt',
                           stat_file_name='test_Statistics.json',
                           flow_file_name='test_flow',
                           forcing_file_name='test_forcing',
                           attr_file_name='test_attr',
                           f_dict_file_name='test_dictFactorize.json',
                           var_dict_file_name='test_dictAttribute.json',
                           t_s_dict_file_name='test_dictTimeSpace.json')
        with torch.cuda.device(self.gpu_num):
            if self.train_mode:
                master_train(gages_model_train, random_seed=self.random_seed)
            pred, obs = master_test(gages_model_test, epoch=test_epoch)
            basin_area = gages_model_test.data_source.read_attr(
                gages_model_test.t_s_dict["sites_id"], ['DRAIN_SQKM'],
                is_return_dict=False)
            mean_prep = gages_model_test.data_source.read_attr(
                gages_model_test.t_s_dict["sites_id"], ['PPTAVG_BASIN'],
                is_return_dict=False)
            mean_prep = mean_prep / 365 * 10
            pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False)
            obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False)
            save_result(
                gages_model_test.data_source.data_config.data_path['Temp'],
                test_epoch, pred, obs)
Exemplo n.º 29
0
 def test_gages_data_model(self):
     config_data = self.config_data
     major_dam_num = [1, 200]  # max major dam num is 155
     if cfg.CACHE.QUICK_DATA:
         source_data = GagesSource.choose_some_basins(
             config_data,
             config_data.model_dict["data"]["tRangeTrain"],
             screen_basin_area_huc4=False,
             major_dam_num=major_dam_num)
         sites_id = source_data.all_configs['flow_screen_gage_id']
         print("The binary data has exsited")
         quick_data_dir = os.path.join(self.config_data.data_path["DB"],
                                       "quickdata")
         # data_dir = os.path.join(quick_data_dir, "conus-all_85-05_nan-0.1_00-1.0")
         data_dir = os.path.join(quick_data_dir,
                                 "conus-all_90-10_nan-0.0_00-1.0")
         data_model_train = GagesModel.load_datamodel(
             data_dir,
             data_source_file_name='data_source.txt',
             stat_file_name='Statistics.json',
             flow_file_name='flow.npy',
             forcing_file_name='forcing.npy',
             attr_file_name='attr.npy',
             f_dict_file_name='dictFactorize.json',
             var_dict_file_name='dictAttribute.json',
             t_s_dict_file_name='dictTimeSpace.json')
         data_model_test = GagesModel.load_datamodel(
             data_dir,
             data_source_file_name='test_data_source.txt',
             stat_file_name='test_Statistics.json',
             flow_file_name='test_flow.npy',
             forcing_file_name='test_forcing.npy',
             attr_file_name='test_attr.npy',
             f_dict_file_name='test_dictFactorize.json',
             var_dict_file_name='test_dictAttribute.json',
             t_s_dict_file_name='test_dictTimeSpace.json')
         gages_model_train = GagesModel.update_data_model(
             self.config_data,
             data_model_train,
             sites_id_update=sites_id,
             screen_basin_area_huc4=False)
         gages_model_test = GagesModel.update_data_model(
             self.config_data,
             data_model_test,
             sites_id_update=sites_id,
             train_stat_dict=gages_model_train.stat_dict,
             screen_basin_area_huc4=False)
     else:
         gages_model = GagesModels(config_data,
                                   screen_basin_area_huc4=False,
                                   major_dam_num=major_dam_num)
         gages_model_train = gages_model.data_model_train
         gages_model_test = gages_model.data_model_test
     if cfg.CACHE.STATE:
         save_datamodel(gages_model_train,
                        data_source_file_name='data_source.txt',
                        stat_file_name='Statistics.json',
                        flow_file_name='flow',
                        forcing_file_name='forcing',
                        attr_file_name='attr',
                        f_dict_file_name='dictFactorize.json',
                        var_dict_file_name='dictAttribute.json',
                        t_s_dict_file_name='dictTimeSpace.json')
         save_datamodel(gages_model_test,
                        data_source_file_name='test_data_source.txt',
                        stat_file_name='test_Statistics.json',
                        flow_file_name='test_flow',
                        forcing_file_name='test_forcing',
                        attr_file_name='test_attr',
                        f_dict_file_name='test_dictFactorize.json',
                        var_dict_file_name='test_dictAttribute.json',
                        t_s_dict_file_name='test_dictTimeSpace.json')
         print("read and save data model")
Exemplo n.º 30
0
    def test_some_reservoirs(self):
        """choose some small reservoirs for 2nd lstm not for simulate"""
        # 读取模型配置文件
        config_data = self.config_data_lstm
        # according to paper "High-resolution mapping of the world's reservoirs and dams for sustainable river-flow management"
        dor = 0.02
        source_data = GagesSource.choose_some_basins(config_data, config_data.model_dict["data"]["tRangeTrain"],
                                                     screen_basin_area_huc4=False, DOR=dor)
        sites_id_dor = source_data.all_configs['flow_screen_gage_id']

        quick_data_dir = os.path.join(self.config_data_lstm.data_path["DB"], "quickdata")
        data_dir = os.path.join(quick_data_dir, "conus-all_90-10_nan-0.0_00-1.0")
        data_model_8595 = GagesModel.load_datamodel(data_dir,
                                                    data_source_file_name='data_source.txt',
                                                    stat_file_name='Statistics.json', flow_file_name='flow.npy',
                                                    forcing_file_name='forcing.npy', attr_file_name='attr.npy',
                                                    f_dict_file_name='dictFactorize.json',
                                                    var_dict_file_name='dictAttribute.json',
                                                    t_s_dict_file_name='dictTimeSpace.json')
        data_model_9505 = GagesModel.load_datamodel(data_dir,
                                                    data_source_file_name='test_data_source.txt',
                                                    stat_file_name='test_Statistics.json',
                                                    flow_file_name='test_flow.npy',
                                                    forcing_file_name='test_forcing.npy',
                                                    attr_file_name='test_attr.npy',
                                                    f_dict_file_name='test_dictFactorize.json',
                                                    var_dict_file_name='test_dictAttribute.json',
                                                    t_s_dict_file_name='test_dictTimeSpace.json')
        conus_sites_id_all = data_model_8595.t_s_dict["sites_id"]
        nomajordam_source_data = GagesSource.choose_some_basins(self.config_data_natflow,
                                                                self.config_data_natflow.model_dict["data"][
                                                                    "tRangeTrain"],
                                                                screen_basin_area_huc4=False, major_dam_num=0)
        nomajordam_sites_id = nomajordam_source_data.all_configs['flow_screen_gage_id']
        # In no major dam case, all sites are chosen as natural flow generator
        nomajordam_in_conus = np.intersect1d(conus_sites_id_all, nomajordam_sites_id)

        conus_sites_id_dor = np.intersect1d(conus_sites_id_all, sites_id_dor)
        majordam_source_data = GagesSource.choose_some_basins(self.config_data_natflow,
                                                              self.config_data_natflow.model_dict["data"][
                                                                  "tRangeTrain"],
                                                              screen_basin_area_huc4=False, major_dam_num=[1, 2000])
        majordam_sites_id = majordam_source_data.all_configs['flow_screen_gage_id']
        majordam_in_conus = np.intersect1d(conus_sites_id_dor, majordam_sites_id)

        gages_model_train_natflow = GagesModel.update_data_model(self.config_data_natflow, data_model_8595,
                                                                 sites_id_update=nomajordam_in_conus,
                                                                 data_attr_update=True, screen_basin_area_huc4=False)
        gages_model_test_natflow = GagesModel.update_data_model(self.config_data_natflow, data_model_9505,
                                                                sites_id_update=nomajordam_in_conus,
                                                                data_attr_update=True,
                                                                train_stat_dict=gages_model_train_natflow.stat_dict,
                                                                screen_basin_area_huc4=False)

        gages_model_train_lstm = GagesModel.update_data_model(self.config_data_lstm, data_model_8595,
                                                              sites_id_update=majordam_in_conus, data_attr_update=True,
                                                              screen_basin_area_huc4=False)

        gages_model_test_lstm = GagesModel.update_data_model(self.config_data_lstm, data_model_9505,
                                                             sites_id_update=majordam_in_conus, data_attr_update=True,
                                                             train_stat_dict=gages_model_train_lstm.stat_dict,
                                                             screen_basin_area_huc4=False)

        save_datamodel(gages_model_train_natflow, "1", data_source_file_name='data_source.txt',
                       stat_file_name='Statistics.json', flow_file_name='flow', forcing_file_name='forcing',
                       attr_file_name='attr', f_dict_file_name='dictFactorize.json',
                       var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json')
        save_datamodel(gages_model_test_natflow, "1", data_source_file_name='test_data_source.txt',
                       stat_file_name='test_Statistics.json', flow_file_name='test_flow',
                       forcing_file_name='test_forcing', attr_file_name='test_attr',
                       f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json',
                       t_s_dict_file_name='test_dictTimeSpace.json')
        save_datamodel(gages_model_train_lstm, "2", data_source_file_name='data_source.txt',
                       stat_file_name='Statistics.json', flow_file_name='flow', forcing_file_name='forcing',
                       attr_file_name='attr', f_dict_file_name='dictFactorize.json',
                       var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json')
        save_datamodel(gages_model_test_lstm, "2", data_source_file_name='test_data_source.txt',
                       stat_file_name='test_Statistics.json', flow_file_name='test_flow',
                       forcing_file_name='test_forcing', attr_file_name='test_attr',
                       f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json',
                       t_s_dict_file_name='test_dictTimeSpace.json')
        print("read and save data model")