Пример #1
0
 def setUp(self) -> None:
     """before all of these, natural flow model need to be generated by config.ini of gages dataset, and it need
     to be moved to right dir manually """
     config_dir = definitions.CONFIG_DIR
     self.config_file_natflow = os.path.join(config_dir,
                                             "storage/config1_exp1.ini")
     self.config_file_lstm = os.path.join(config_dir,
                                          "storage/config2_exp1.ini")
     self.subdir = r"storage/exp1"
     self.config_data_natflow = GagesConfig.set_subdir(
         self.config_file_natflow, self.subdir)
     self.config_data_lstm = GagesConfig.set_subdir(self.config_file_lstm,
                                                    self.subdir)
     add_model_param(self.config_data_lstm, "model", seqLength=1)
     test_epoch_lst = [
         100, 200, 220, 250, 270, 280, 290, 300, 310, 320, 400
     ]
     # self.test_epoch = test_epoch_lst[0]
     # self.test_epoch = test_epoch_lst[1]
     # self.test_epoch = test_epoch_lst[2]
     # self.test_epoch = test_epoch_lst[3]
     # self.test_epoch = test_epoch_lst[4]
     # self.test_epoch = test_epoch_lst[5]
     # self.test_epoch = test_epoch_lst[6]
     # self.test_epoch = test_epoch_lst[7]
     # self.test_epoch = test_epoch_lst[8]
     # self.test_epoch = test_epoch_lst[9]
     self.test_epoch = test_epoch_lst[10]
Пример #2
0
 def setUp(self) -> None:
     """choose basins with small DOR """
     config_dir = definitions.CONFIG_DIR
     self.config_file_1 = os.path.join(config_dir, "dam/config1_exp6.ini")
     self.config_file_2 = os.path.join(config_dir, "dam/config2_exp6.ini")
     self.subdir = r"dam/exp6"
     self.config_data_1 = GagesConfig.set_subdir(self.config_file_1,
                                                 self.subdir)
     self.config_data_2 = GagesConfig.set_subdir(self.config_file_2,
                                                 self.subdir)
     add_model_param(self.config_data_1, "model", seqLength=1)
     # choose some small basins, unit: SQKM
     # self.basin_area_screen = 100
     test_epoch_lst = [
         100, 200, 220, 250, 280, 290, 295, 300, 305, 310, 320, 400, 500
     ]
     # self.test_epoch = test_epoch_lst[0]
     # self.test_epoch = test_epoch_lst[1]
     # self.test_epoch = test_epoch_lst[2]
     # self.test_epoch = test_epoch_lst[3]
     # self.test_epoch = test_epoch_lst[4]
     # self.test_epoch = test_epoch_lst[5]
     # self.test_epoch = test_epoch_lst[6]
     # self.test_epoch = test_epoch_lst[7]
     # self.test_epoch = test_epoch_lst[8]
     # self.test_epoch = test_epoch_lst[9]
     self.test_epoch = test_epoch_lst[10]
Пример #3
0
 def setUp(self) -> None:
     """choose basins with small DOR """
     config_dir = definitions.CONFIG_DIR
     # self.sim_config_file = os.path.join(config_dir, "dam/config1_exp2.ini")
     # self.config_file = os.path.join(config_dir, "dam/config2_exp2.ini")
     # self.subdir = "dam/exp2"
     self.sim_config_file = os.path.join(config_dir,
                                         "simulate/config1_exp3.ini")
     self.config_file = os.path.join(config_dir,
                                     "simulate/config2_exp3.ini")
     self.subdir = "simulate/exp3"
     self.config_data_natflow = GagesConfig.set_subdir(
         self.sim_config_file, self.subdir)
     self.config_data_lstm = GagesConfig.set_subdir(self.config_file,
                                                    self.subdir)
     add_model_param(self.config_data_lstm, "model", seqLength=1)
     # choose some small basins, unit: SQKM
     # self.basin_area_screen = 100
     test_epoch_lst = [
         100, 200, 220, 250, 280, 290, 295, 300, 305, 310, 320
     ]
     # self.test_epoch = test_epoch_lst[0]
     # self.test_epoch = test_epoch_lst[1]
     # self.test_epoch = test_epoch_lst[2]
     # self.test_epoch = test_epoch_lst[3]
     # self.test_epoch = test_epoch_lst[4]
     # self.test_epoch = test_epoch_lst[5]
     # self.test_epoch = test_epoch_lst[6]
     self.test_epoch = test_epoch_lst[7]
Пример #4
0
    def setUp(self) -> None:
        """before all of these, natural flow model need to be generated by config.ini of gages dataset, and it need
        to be moved to right dir manually """
        config_dir = definitions.CONFIG_DIR
        # self.config_file_1 = os.path.join(config_dir, "inv/config_inv_1_ex3.ini")
        # self.config_file_2 = os.path.join(config_dir, "inv/config_inv_2_ex3.ini")
        # self.subdir = r"inv/exp3"
        # self.config_file_1 = os.path.join(config_dir, "inv/config_inv_1_ex4.ini")
        # self.config_file_2 = os.path.join(config_dir, "inv/config_inv_2_ex4.ini")
        # self.subdir = r"inv/exp4"
        # self.config_file_1 = os.path.join(config_dir, "inv/config_inv_1_ex5.ini")
        # self.config_file_2 = os.path.join(config_dir, "inv/config_inv_2_ex5.ini")
        # self.subdir = r"inv/exp5"
        # self.config_file_1 = os.path.join(config_dir, "inv/config_inv_1_ex6.ini")
        # self.config_file_2 = os.path.join(config_dir, "inv/config_inv_2_ex6.ini")
        # self.subdir = r"inv/exp6"
        # self.config_file_1 = os.path.join(config_dir, "inv/config_inv_1_ex7.ini")
        # self.config_file_2 = os.path.join(config_dir, "inv/config_inv_2_ex7.ini")
        # self.subdir = r"inv/exp7"
        # self.config_file_1 = os.path.join(config_dir, "inv/config_inv_1_ex8.ini")
        # self.config_file_2 = os.path.join(config_dir, "inv/config_inv_2_ex8.ini")
        # self.subdir = r"inv/exp8"
        # self.config_file_1 = os.path.join(config_dir, "inv/config1_exp10.ini")
        # self.config_file_2 = os.path.join(config_dir, "inv/config2_exp10.ini")
        # self.subdir = r"inv/exp10"

        # self.config_file_1 = os.path.join(config_dir, "inv/config1_exp1.ini")
        # self.config_file_2 = os.path.join(config_dir, "inv/config2_exp1.ini")
        # self.subdir = r"inv/exp1"
        # self.config_data_1 = GagesConfig.set_subdir(self.config_file_1, self.subdir)
        # self.config_data_2 = GagesConfig.set_subdir(self.config_file_2, self.subdir)
        # add_model_param(self.config_data_1, "model", seqLength=7)
        self.config_file_1 = os.path.join(config_dir, "inv/config1_exp2.ini")
        self.config_file_2 = os.path.join(config_dir, "inv/config2_exp2.ini")
        self.subdir = r"inv/exp2"
        self.config_data_1 = GagesConfig.set_subdir(self.config_file_1,
                                                    self.subdir)
        self.config_data_2 = GagesConfig.set_subdir(self.config_file_2,
                                                    self.subdir)
        add_model_param(self.config_data_1, "model", seqLength=1)
        # choose some small basins, unit: SQKM
        # self.basin_area_screen = 100
        test_epoch_lst = [
            100, 200, 220, 250, 280, 290, 295, 300, 305, 310, 320, 400, 500
        ]
        # self.test_epoch = test_epoch_lst[0]
        # self.test_epoch = test_epoch_lst[1]
        # self.test_epoch = test_epoch_lst[2]
        # self.test_epoch = test_epoch_lst[3]
        # self.test_epoch = test_epoch_lst[4]
        # self.test_epoch = test_epoch_lst[5]
        # self.test_epoch = test_epoch_lst[6]
        self.test_epoch = test_epoch_lst[7]
Пример #5
0
 def setUp(self) -> None:
     """choose basins with major dams """
     config_dir = definitions.CONFIG_DIR
     self.camels_config_file = os.path.join(config_dir, "camels/config_exp2.ini")
     self.camels_subdir = r"camels/exp2"
     self.camels_config_data = CamelsConfig.set_subdir(self.camels_config_file, self.camels_subdir)
     self.config_file = os.path.join(config_dir, "dam/config_exp13.ini")
     self.subdir = r"dam/exp13"
     self.config_data = GagesConfig.set_subdir(self.config_file, self.subdir)
     self.conus_config_file = os.path.join(config_dir, "basic/config_exp25.ini")
     self.conus_subdir = r"basic/exp25"
     self.conus_config_data = GagesConfig.set_subdir(self.conus_config_file, self.conus_subdir)
     self.test_epoch = 300
Пример #6
0
    def setUp(self) -> None:
        """before all of these, natural flow model need to be generated by config.ini of gages dataset, and it need
        to be moved to right dir manually """
        config_dir = definitions.CONFIG_DIR
        # self.config_file_1 = os.path.join(config_dir, "siminv/config_siminv_1_exp2.ini")
        # self.config_file_2 = os.path.join(config_dir, "siminv/config_siminv_2_exp2.ini")
        # self.config_file_3 = os.path.join(config_dir, "siminv/config_siminv_3_exp2.ini")
        # self.subdir = r"siminv/exp2"
        # self.config_file_1 = os.path.join(config_dir, "siminv/config_siminv_1_exp3.ini")
        # self.config_file_2 = os.path.join(config_dir, "siminv/config_siminv_2_exp3.ini")
        # self.config_file_3 = os.path.join(config_dir, "siminv/config_siminv_3_exp3.ini")
        # self.subdir = r"siminv/exp3"
        # self.config_file_1 = os.path.join(config_dir, "siminv/config_siminv_1_exp4.ini")
        # self.config_file_2 = os.path.join(config_dir, "siminv/config_siminv_2_exp4.ini")
        # self.config_file_3 = os.path.join(config_dir, "siminv/config_siminv_3_exp4.ini")
        # self.subdir = r"siminv/exp4"

        # self.config_file_1 = os.path.join(config_dir, "siminv/config1_exp10.ini")
        # self.config_file_2 = os.path.join(config_dir, "siminv/config2_exp10.ini")
        # self.config_file_3 = os.path.join(config_dir, "siminv/config3_exp10.ini")
        # self.subdir = r"siminv/exp10"
        self.config_file_1 = os.path.join(config_dir,
                                          "siminv/config1_exp1.ini")
        self.config_file_2 = os.path.join(config_dir,
                                          "siminv/config2_exp1.ini")
        self.config_file_3 = os.path.join(config_dir,
                                          "siminv/config3_exp1.ini")
        self.subdir = r"siminv/exp1"
        self.config_data_sim = GagesConfig.set_subdir(self.config_file_1,
                                                      self.subdir)
        self.config_data_inv = GagesConfig.set_subdir(self.config_file_2,
                                                      self.subdir)
        self.config_data = GagesConfig.set_subdir(self.config_file_3,
                                                  self.subdir)
        add_model_param(self.config_data_inv, "model", seqLength=1)
        test_epoch_lst = [
            100, 200, 220, 250, 280, 290, 295, 300, 305, 310, 320, 400, 500
        ]
        # self.test_epoch = test_epoch_lst[0]
        # self.test_epoch = test_epoch_lst[1]
        # self.test_epoch = test_epoch_lst[2]
        # self.test_epoch = test_epoch_lst[3]
        # self.test_epoch = test_epoch_lst[4]
        # self.test_epoch = test_epoch_lst[5]
        # self.test_epoch = test_epoch_lst[6]
        # self.test_epoch = test_epoch_lst[7]
        # self.test_epoch = test_epoch_lst[8]
        # self.test_epoch = test_epoch_lst[9]
        # self.test_epoch = test_epoch_lst[10]
        # self.test_epoch = test_epoch_lst[11]
        self.test_epoch = test_epoch_lst[12]
Пример #7
0
 def setUp(self) -> None:
     """before all of these, natural flow model need to be generated by config.ini of gages dataset, and it need
     to be moved to right dir manually """
     config_dir = definitions.CONFIG_DIR
     self.sim_config_file = os.path.join(config_dir, "dam/config1_exp14.ini")
     self.config_file = os.path.join(config_dir, "dam/config2_exp14.ini")
     self.subdir = "dam/exp14"
     self.config_data = GagesConfig.set_subdir(self.config_file, self.subdir)
     self.sim_config_data = GagesConfig.set_subdir(self.sim_config_file, self.subdir)
     add_model_param(self.config_data, "model", seqLength=1)
     # self.nid_file = 'PA_U.xlsx'
     # self.nid_file = 'OH_U.xlsx'
     self.nid_file = 'NID2018_U.xlsx'
     self.test_epoch = 300
Пример #8
0
 def setUp(self):
     config_dir = definitions.CONFIG_DIR
     # config_file = os.path.join(config_dir, "transdata/config_exp1.ini")
     # subdir = r"transdata/exp1"
     # config_file = os.path.join(config_dir, "transdata/config_exp2.ini")
     # subdir = r"transdata/exp2"
     # config_file = os.path.join(config_dir, "transdata/config_exp3.ini")
     # subdir = r"transdata/exp3"
     # config_file = os.path.join(config_dir, "transdata/config_exp4.ini")
     # subdir = r"transdata/exp4"
     # config_file = os.path.join(config_dir, "transdata/config_exp5.ini")
     # subdir = r"transdata/exp5"
     # config_file = os.path.join(config_dir, "transdata/config_exp6.ini")
     # subdir = r"transdata/exp6"
     # config_file = os.path.join(config_dir, "transdata/config_exp7.ini")
     # subdir = r"transdata/exp7"
     # config_file = os.path.join(config_dir, "transdata/config_exp8.ini")
     # subdir = r"transdata/exp8"
     # config_file = os.path.join(config_dir, "transdata/config_exp9.ini")
     # subdir = r"transdata/exp9"
     # config_file = os.path.join(config_dir, "transdata/config_exp10.ini")
     # subdir = r"transdata/exp10"
     config_file = os.path.join(config_dir, "transdata/config_exp11.ini")
     subdir = r"transdata/exp11"
     self.config_data = GagesConfig.set_subdir(config_file, subdir)
Пример #9
0
    def test_trans_all_forcing_file_to_camels(self):
        data_source_dump = os.path.join(self.config_data.data_path["Temp"],
                                        'data_source.txt')
        source_data = unserialize_pickle(data_source_dump)
        output_dir = os.path.join(self.config_data.data_path["DB"],
                                  "basin_mean_forcing", "daymet")
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)
        region_names = [
            region_temp.split("_")[-1]
            for region_temp in source_data.all_configs['regions']
        ]
        # forcing data file generated is named as "allref", so rename the "all"
        region_names = ["allref" if r == "all" else r for r in region_names]
        year_start = int(source_data.t_range[0].split("-")[0])
        year_end = int(source_data.t_range[1].split("-")[0])
        years = np.arange(year_start, year_end)
        assert (all(x < y for x, y in zip(source_data.gage_dict['STAID'],
                                          source_data.gage_dict['STAID'][1:])))

        config_dir = definitions.CONFIG_DIR
        for i in range(len(region_names)):
            config_file_i = os.path.join(
                config_dir, "transdata/config_exp" + str(i + 1) + ".ini")
            subdir_i = "transdata/exp" + str(i + 1)
            config_data_i = GagesConfig.set_subdir(config_file_i, subdir_i)
            source_data_i = GagesSource(
                config_data_i,
                config_data_i.model_dict["data"]["tRangeTrain"],
                screen_basin_area_huc4=False)
            for year in years:
                trans_daymet_to_camels(source_data.all_configs["forcing_dir"],
                                       output_dir, source_data_i.gage_dict,
                                       region_names[i], year)
Пример #10
0
 def setUp(self) -> None:
     """before all of these, natural flow model need to be generated by config.ini of gages dataset, and it need
     to be moved to right dir manually """
     config_dir = definitions.CONFIG_DIR
     self.config_file = os.path.join(config_dir, "gages1by1/config_exp6.ini")
     self.subdir = r"gages1by1/exp6"
     self.config_data = GagesConfig.set_subdir(self.config_file, self.subdir)
Пример #11
0
    def test_screen_some_gauge_and_save(self):
        config_dir = definitions.CONFIG_DIR
        config_file = os.path.join(config_dir, "transdata/config_exp12.ini")
        subdir = r"transdata/exp12"
        config_data = GagesConfig.set_subdir(config_file, subdir)

        ref_source_data = GagesSource.choose_some_basins(
            self.config_data,
            self.config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            ref="Ref")
        ref_sites_id = ref_source_data.all_configs['flow_screen_gage_id']
        ref_sites_id_df = pd.DataFrame({"STAID": ref_sites_id})
        dapeng_dir = os.path.join(self.config_data.data_path["DB"], "dapeng")
        if not os.path.isdir(dapeng_dir):
            os.makedirs(dapeng_dir)
        dapeng_v2_gageid_file = os.path.join(dapeng_dir, "v2.csv")
        ref_sites_id_df.to_csv(dapeng_v2_gageid_file, index=False)

        gages_model = GagesModels(config_data,
                                  screen_basin_area_huc4=False,
                                  major_dam_num=0)
        sites_id_df = pd.DataFrame(
            {"STAID": gages_model.data_model_train.t_s_dict["sites_id"]})
        dapeng_v1_gageid_file = os.path.join(dapeng_dir, "v1.csv")
        sites_id_df.to_csv(dapeng_v1_gageid_file, index=False)

        print("read and save data screen")
Пример #12
0
 def test_test_gages_wo_attr(self):
     config_dir = definitions.CONFIG_DIR
     config_file = os.path.join(config_dir, "susquehanna/config_exp2.ini")
     subdir = r"susquehanna/exp2"
     config_data = GagesConfig.set_subdir(config_file, subdir)
     data_model = GagesModelWoBasinNorm.load_datamodel(
         config_data.data_path["Temp"],
         data_source_file_name='test_data_source.txt',
         stat_file_name='test_Statistics.json',
         flow_file_name='test_flow.npy',
         forcing_file_name='test_forcing.npy',
         attr_file_name='test_attr.npy',
         f_dict_file_name='test_dictFactorize.json',
         var_dict_file_name='test_dictAttribute.json',
         t_s_dict_file_name='test_dictTimeSpace.json')
     with torch.cuda.device(2):
         pred, obs = master_test(data_model, epoch=self.test_epoch)
         save_result(data_model.data_source.data_config.data_path['Temp'],
                     self.test_epoch, pred, obs)
         plot_we_need(data_model,
                      obs,
                      pred,
                      id_col="STAID",
                      lon_col="LNG_GAGE",
                      lat_col="LAT_GAGE")
Пример #13
0
 def test_train_gages_wo_attr(self):
     config_dir = definitions.CONFIG_DIR
     config_file = os.path.join(config_dir, "susquehanna/config_exp2.ini")
     subdir = r"susquehanna/exp2"
     config_data = GagesConfig.set_subdir(config_file, subdir)
     dor = -0.02
     gages_model = GagesModelsWoBasinNorm(config_data,
                                          screen_basin_area_huc4=False,
                                          DOR=dor)
     save_datamodel(gages_model.data_model_train,
                    data_source_file_name='data_source.txt',
                    stat_file_name='Statistics.json',
                    flow_file_name='flow',
                    forcing_file_name='forcing',
                    attr_file_name='attr',
                    f_dict_file_name='dictFactorize.json',
                    var_dict_file_name='dictAttribute.json',
                    t_s_dict_file_name='dictTimeSpace.json')
     save_datamodel(gages_model.data_model_test,
                    data_source_file_name='test_data_source.txt',
                    stat_file_name='test_Statistics.json',
                    flow_file_name='test_flow',
                    forcing_file_name='test_forcing',
                    attr_file_name='test_attr',
                    f_dict_file_name='test_dictFactorize.json',
                    var_dict_file_name='test_dictAttribute.json',
                    t_s_dict_file_name='test_dictTimeSpace.json')
     with torch.cuda.device(2):
         pre_trained_model_epoch = 400
         # master_train(gages_model.data_model_train)
         master_train(gages_model.data_model_train,
                      pre_trained_model_epoch=pre_trained_model_epoch)
     print("read and train data model")
Пример #14
0
 def setUp(self) -> None:
     """choose basins with small storage """
     config_dir = definitions.CONFIG_DIR
     self.config_file = os.path.join(config_dir, "dam/config_exp11.ini")
     self.subdir = r"dam/exp11"
     self.config_data = GagesConfig.set_subdir(self.config_file,
                                               self.subdir)
     self.test_epoch = 300
Пример #15
0
 def setUp(self) -> None:
     """choose basins with 0 DOR and large dor"""
     config_dir = definitions.CONFIG_DIR
     self.config_file = os.path.join(config_dir, "nodam/config_exp7.ini")
     self.subdir = r"nodam/exp7"
     self.random_seed = 1234
     self.config_data = GagesConfig.set_subdir(self.config_file,
                                               self.subdir)
     self.test_epoch = 300
Пример #16
0
    def setUp(self) -> None:
        """before all of these, natural flow model need to be generated by config.ini of gages dataset, and it need
        to be moved to right dir manually """
        config_dir = definitions.CONFIG_DIR

        # self.sim_config_file = os.path.join(config_dir, "forecast/config_forecast_1_exp1.ini")
        # self.config_file = os.path.join(config_dir, "forecast/config_forecast_2_exp1.ini")
        # self.subdir = r"forecast/exp1"

        self.sim_config_file = os.path.join(
            config_dir, "forecast/config_forecast_1_exp2.ini")
        self.config_file = os.path.join(config_dir,
                                        "forecast/config_forecast_2_exp2.ini")
        self.subdir = r"forecast/exp2"

        self.sim_config_data = GagesConfig.set_subdir(self.sim_config_file,
                                                      self.subdir)
        self.config_data = GagesConfig.set_subdir(self.config_file,
                                                  self.subdir)
        add_model_param(self.config_data, "model", seqLength=7, fcstLength=2)
Пример #17
0
 def setUp(self) -> None:
     """choose basins with small DOR """
     config_dir = definitions.CONFIG_DIR
     # self.config_file = os.path.join(config_dir, "dam/config_exp3.ini")
     # self.subdir = r"dam/exp3"
     # self.random_seed = 12345
     self.config_file = os.path.join(config_dir, "dam/config_exp9.ini")
     self.subdir = r"dam/exp9"
     self.random_seed = 11111
     # self.config_file = os.path.join(config_dir, "dam/config_exp17.ini")
     # self.subdir = r"dam/exp17"
     self.config_data = GagesConfig.set_subdir(self.config_file,
                                               self.subdir)
     self.test_epoch = 300
Пример #18
0
 def setUp(self) -> None:
     """before all of these, natural flow model need to be generated by config.ini of gages dataset, and it need
     to be moved to right dir manually """
     config_dir = definitions.CONFIG_DIR
     # self.config_file_natflow = os.path.join(config_dir, "storage/config1_exp2.ini")
     # self.config_file_storage = os.path.join(config_dir, "storage/config2_exp2.ini")
     # self.subdir = r"storage/exp2"
     self.config_file_natflow = os.path.join(config_dir, "storage/config1_exp3.ini")
     self.config_file_storage = os.path.join(config_dir, "storage/config2_exp3.ini")
     self.subdir = r"storage/exp3"
     self.config_data_natflow = GagesConfig.set_subdir(self.config_file_natflow, self.subdir)
     self.config_data_storage = GagesConfig.set_subdir(self.config_file_storage, self.subdir)
     # To simulate storage, get info from previous T time-seq natflow (set several months)
     add_model_param(self.config_data_storage, "model", storageLength=100, seqLength=1)
     test_epoch_lst = [100, 200, 220, 250, 280, 290, 295, 300, 305, 310, 320, 400, 500]
     # self.test_epoch = test_epoch_lst[0]
     # self.test_epoch = test_epoch_lst[1]
     # self.test_epoch = test_epoch_lst[2]
     # self.test_epoch = test_epoch_lst[3]
     # self.test_epoch = test_epoch_lst[4]
     # self.test_epoch = test_epoch_lst[5]
     # self.test_epoch = test_epoch_lst[6]
     self.test_epoch = test_epoch_lst[7]
Пример #19
0
    def setUp(self) -> None:
        """before all of these, natural flow model need to be generated by config.ini of gages dataset, and it need
        to be moved to right dir manually """
        config_dir = definitions.CONFIG_DIR
        # self.config_file = os.path.join(config_dir, "cluster/config_explore_exp1.ini")
        # self.subdir = r"explore/exp1"
        # self.config_file = os.path.join(config_dir, "cluster/config_explore_exp2.ini")
        # self.subdir = r"explore/exp2"
        self.num_cluster = 2

        self.config_file = os.path.join(config_dir,
                                        "cluster/config_explore_exp3.ini")
        self.subdir = r"cluster/exp3"
        self.config_data = GagesConfig.set_subdir(self.config_file,
                                                  self.subdir)
Пример #20
0
 def setUp(self) -> None:
     """before all of these, natural flow model need to be generated by config.ini of gages dataset, and it need
     to be moved to right dir manually """
     config_dir = definitions.CONFIG_DIR
     # self.config_file = os.path.join(config_dir, "da/config_da_exp1.ini")
     # self.subdir = r"da/exp1"
     # self.config_file = os.path.join(config_dir, "da/config_da_exp2.ini")
     # self.subdir = r"da/exp2"
     self.config_file = os.path.join(config_dir, "da/config_exp10.ini")
     self.subdir = r"da/exp10"
     self.config_data = GagesConfig.set_subdir(self.config_file,
                                               self.subdir)
     add_model_param(self.config_data, "model", seqLength=7)
     # choose some small basins, unit: SQKM
     self.basin_area_screen = 100
Пример #21
0
 def test_test_gages4susquehanna(self):
     config_dir = definitions.CONFIG_DIR
     config_file = os.path.join(config_dir, "susquehanna/config_exp4.ini")
     subdir = r"susquehanna/exp4"
     config_data = GagesConfig.set_subdir(config_file, subdir)
     dor = -0.02
     gages_model = GagesModels(config_data,
                               screen_basin_area_huc4=False,
                               DOR=dor)
     save_datamodel(gages_model.data_model_test,
                    data_source_file_name='test_data_source.txt',
                    stat_file_name='test_Statistics.json',
                    flow_file_name='test_flow',
                    forcing_file_name='test_forcing',
                    attr_file_name='test_attr',
                    f_dict_file_name='test_dictFactorize.json',
                    var_dict_file_name='test_dictAttribute.json',
                    t_s_dict_file_name='test_dictTimeSpace.json')
     data_model = gages_model.data_model_test
     with torch.cuda.device(2):
         pred, obs = master_test(data_model, epoch=self.test_epoch)
         basin_area = data_model.data_source.read_attr(
             data_model.t_s_dict["sites_id"], ['DRAIN_SQKM'],
             is_return_dict=False)
         mean_prep = data_model.data_source.read_attr(
             data_model.t_s_dict["sites_id"], ['PPTAVG_BASIN'],
             is_return_dict=False)
         mean_prep = mean_prep / 365 * 10
         pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False)
         obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False)
         save_result(data_model.data_source.data_config.data_path['Temp'],
                     self.test_epoch, pred, obs)
         plot_we_need(data_model,
                      obs,
                      pred,
                      id_col="STAID",
                      lon_col="LNG_GAGE",
                      lat_col="LAT_GAGE")
Пример #22
0
    def test_stor_seperate(self):
        config_dir = definitions.CONFIG_DIR
        config_file = os.path.join(config_dir, "basic/config_exp18.ini")
        subdir = r"basic/exp18"
        config_data = GagesConfig.set_subdir(config_file, subdir)
        data_model = GagesModel.load_datamodel(
            config_data.data_path["Temp"],
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        all_sites = data_model.t_s_dict["sites_id"]
        storage_nor_1 = [0, 50]
        storage_nor_2 = [50, 15000]  # max is 14348.6581036888
        source_data_nor1 = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            STORAGE=storage_nor_1)
        source_data_nor2 = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            STORAGE=storage_nor_2)
        sites_id_nor1 = source_data_nor1.all_configs['flow_screen_gage_id']
        sites_id_nor2 = source_data_nor2.all_configs['flow_screen_gage_id']
        idx_lst_nor1 = [
            i for i in range(len(all_sites)) if all_sites[i] in sites_id_nor1
        ]
        idx_lst_nor2 = [
            i for i in range(len(all_sites)) if all_sites[i] in sites_id_nor2
        ]

        pred, obs = load_result(
            data_model.data_source.data_config.data_path['Temp'],
            self.test_epoch)
        pred = pred.reshape(pred.shape[0], pred.shape[1])
        obs = obs.reshape(pred.shape[0], pred.shape[1])
        inds = statError(obs, pred)
        inds_df = pd.DataFrame(inds)

        keys_nse = "NSE"
        xs = []
        ys = []
        cases_exps_legends_together = ["small_stor", "large_stor"]

        x1, y1 = ecdf(inds_df[keys_nse].iloc[idx_lst_nor1])
        xs.append(x1)
        ys.append(y1)

        x2, y2 = ecdf(inds_df[keys_nse].iloc[idx_lst_nor2])
        xs.append(x2)
        ys.append(y2)

        cases_exps = ["dam_exp12", "dam_exp11"]
        cases_exps_legends_separate = ["small_stor", "large_stor"]
        # cases_exps = ["dam_exp4", "dam_exp5", "dam_exp6"]
        # cases_exps = ["dam_exp1", "dam_exp2", "dam_exp3"]
        # cases_exps_legends = ["dam-lstm", "dam-with-natural-flow", "dam-with-kernel"]
        for case_exp in cases_exps:
            config_data_i = load_dataconfig_case_exp(case_exp)
            pred_i, obs_i = load_result(config_data_i.data_path['Temp'],
                                        self.test_epoch)
            pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1])
            obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1])
            inds_i = statError(obs_i, pred_i)
            x, y = ecdf(inds_i[keys_nse])
            xs.append(x)
            ys.append(y)

        plot_ecdfs(xs,
                   ys,
                   cases_exps_legends_together + cases_exps_legends_separate,
                   style=["together", "together", "separate", "separate"])
Пример #23
0
def synergy_ecoregion(args):
    update_cfg(cfg, args)
    cache = cfg.CACHE.STATE
    train_mode = cfg.TRAIN_MODE
    test_epoch = cfg.TEST_EPOCH
    config_data = GagesConfig(cfg)
    eco_names = [("ECO2_CODE", 5.2), ("ECO2_CODE", 5.3), ("ECO2_CODE", 6.2),
                 ("ECO2_CODE", 7.1), ("ECO2_CODE", 8.1), ("ECO2_CODE", 8.2),
                 ("ECO2_CODE", 8.3), ("ECO2_CODE", 8.4), ("ECO2_CODE", 8.5),
                 ("ECO2_CODE", 9.2), ("ECO2_CODE", 9.3), ("ECO2_CODE", 9.4),
                 ("ECO2_CODE", 9.5), ("ECO2_CODE", 9.6), ("ECO2_CODE", 10.1),
                 ("ECO2_CODE", 10.2), ("ECO2_CODE", 10.4), ("ECO2_CODE", 11.1),
                 ("ECO2_CODE", 12.1), ("ECO2_CODE", 13.1)]

    quick_data_dir = os.path.join(config_data.data_path["DB"], "quickdata")
    data_dir = os.path.join(quick_data_dir, "conus-all_90-10_nan-0.0_00-1.0")
    data_model_train = GagesModel.load_datamodel(
        data_dir,
        data_source_file_name='data_source.txt',
        stat_file_name='Statistics.json',
        flow_file_name='flow.npy',
        forcing_file_name='forcing.npy',
        attr_file_name='attr.npy',
        f_dict_file_name='dictFactorize.json',
        var_dict_file_name='dictAttribute.json',
        t_s_dict_file_name='dictTimeSpace.json')
    data_model_test = GagesModel.load_datamodel(
        data_dir,
        data_source_file_name='test_data_source.txt',
        stat_file_name='test_Statistics.json',
        flow_file_name='test_flow.npy',
        forcing_file_name='test_forcing.npy',
        attr_file_name='test_attr.npy',
        f_dict_file_name='test_dictFactorize.json',
        var_dict_file_name='test_dictAttribute.json',
        t_s_dict_file_name='test_dictTimeSpace.json')

    for eco_name in eco_names:
        source_data = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            ecoregion=eco_name)
        sites_id = source_data.all_configs['flow_screen_gage_id']
        sites_id_inter = np.intersect1d(data_model_train.t_s_dict["sites_id"],
                                        sites_id)
        if sites_id_inter.size < 1:
            continue
        config_data = GagesConfig.set_subdir(cfg, str(eco_name[1]))
        gages_model_train = GagesModel.update_data_model(
            config_data,
            data_model_train,
            sites_id_update=sites_id,
            data_attr_update=True,
            screen_basin_area_huc4=False)
        gages_model_test = GagesModel.update_data_model(
            config_data,
            data_model_test,
            sites_id_update=sites_id,
            data_attr_update=True,
            train_stat_dict=gages_model_train.stat_dict,
            screen_basin_area_huc4=False)
        if cache:
            save_datamodel(gages_model_train,
                           data_source_file_name='data_source.txt',
                           stat_file_name='Statistics.json',
                           flow_file_name='flow',
                           forcing_file_name='forcing',
                           attr_file_name='attr',
                           f_dict_file_name='dictFactorize.json',
                           var_dict_file_name='dictAttribute.json',
                           t_s_dict_file_name='dictTimeSpace.json')
            save_datamodel(gages_model_test,
                           data_source_file_name='test_data_source.txt',
                           stat_file_name='test_Statistics.json',
                           flow_file_name='test_flow',
                           forcing_file_name='test_forcing',
                           attr_file_name='test_attr',
                           f_dict_file_name='test_dictFactorize.json',
                           var_dict_file_name='test_dictAttribute.json',
                           t_s_dict_file_name='test_dictTimeSpace.json')
            print("save ecoregion " + str(eco_name[1]) + " data model")

        with torch.cuda.device(0):
            if train_mode:
                master_train(gages_model_train)
            pred, obs = master_test(gages_model_test, epoch=test_epoch)
            basin_area = gages_model_test.data_source.read_attr(
                gages_model_test.t_s_dict["sites_id"], ['DRAIN_SQKM'],
                is_return_dict=False)
            mean_prep = gages_model_test.data_source.read_attr(
                gages_model_test.t_s_dict["sites_id"], ['PPTAVG_BASIN'],
                is_return_dict=False)
            mean_prep = mean_prep / 365 * 10
            pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False)
            obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False)
            save_result(
                gages_model_test.data_source.data_config.data_path['Temp'],
                test_epoch, pred, obs)
Пример #24
0
 def setUp(self):
     config_dir = definitions.CONFIG_DIR
     config_file = os.path.join(config_dir, "transdata/config_exp2.ini")
     subdir = r"transdata/exp2"
     self.config_data = GagesConfig.set_subdir(config_file, subdir)
Пример #25
0
def pub_lstm(args):
    update_cfg(cfg, args)
    random_seed = cfg.RANDOM_SEED
    test_epoch = cfg.TEST_EPOCH
    gpu_num = cfg.CTX
    train_mode = cfg.TRAIN_MODE
    cache = cfg.CACHE.STATE
    pub_plan = cfg.PUB_PLAN
    plus = cfg.PLUS
    dor = cfg.GAGES.attrScreenParams.DOR
    split_num = cfg.SPLIT_NUM
    print("train and test for PUB: \n")
    config_data = GagesConfig(cfg)
    if cache:
        eco_names = [
            ("ECO2_CODE", 5.2), ("ECO2_CODE", 5.3), ("ECO2_CODE", 6.2),
            ("ECO2_CODE", 7.1), ("ECO2_CODE", 8.1), ("ECO2_CODE", 8.2),
            ("ECO2_CODE", 8.3), ("ECO2_CODE", 8.4), ("ECO2_CODE", 8.5),
            ("ECO2_CODE", 9.2), ("ECO2_CODE", 9.3), ("ECO2_CODE", 9.4),
            ("ECO2_CODE", 9.5), ("ECO2_CODE", 9.6), ("ECO2_CODE", 10.1),
            ("ECO2_CODE", 10.2), ("ECO2_CODE", 10.4), ("ECO2_CODE", 11.1),
            ("ECO2_CODE", 12.1), ("ECO2_CODE", 13.1)
        ]
        quick_data_dir = os.path.join(config_data.data_path["DB"], "quickdata")
        data_dir = os.path.join(quick_data_dir,
                                "conus-all_90-10_nan-0.0_00-1.0")
        data_model_train = GagesModel.load_datamodel(
            data_dir,
            data_source_file_name='data_source.txt',
            stat_file_name='Statistics.json',
            flow_file_name='flow.npy',
            forcing_file_name='forcing.npy',
            attr_file_name='attr.npy',
            f_dict_file_name='dictFactorize.json',
            var_dict_file_name='dictAttribute.json',
            t_s_dict_file_name='dictTimeSpace.json')
        data_model_test = GagesModel.load_datamodel(
            data_dir,
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        conus_sites_id = data_model_train.t_s_dict["sites_id"]
        if pub_plan == 0:
            """do a pub test like freddy's"""
            camels531_gageid_file = os.path.join(config_data.data_path["DB"],
                                                 "camels531", "camels531.txt")
            gauge_df = pd.read_csv(camels531_gageid_file,
                                   dtype={"GaugeID": str})
            gauge_list = gauge_df["GaugeID"].values
            all_sites_camels_531 = np.sort(
                [str(gauge).zfill(8) for gauge in gauge_list])
            sites_id_train = np.intersect1d(conus_sites_id,
                                            all_sites_camels_531)
            # basins not in CAMELS
            sites_id_test = [
                a_temp_site for a_temp_site in conus_sites_id
                if a_temp_site not in all_sites_camels_531
            ]
            assert (all(x < y
                        for x, y in zip(sites_id_test, sites_id_test[1:])))
        elif pub_plan == 1 or pub_plan == 4:
            source_data_dor1 = GagesSource.choose_some_basins(
                config_data,
                config_data.model_dict["data"]["tRangeTrain"],
                screen_basin_area_huc4=False,
                DOR=-dor)
            # basins with dams
            source_data_withdams = GagesSource.choose_some_basins(
                config_data,
                config_data.model_dict["data"]["tRangeTrain"],
                screen_basin_area_huc4=False,
                dam_num=[1, 100000])
            # basins without dams
            source_data_withoutdams = GagesSource.choose_some_basins(
                config_data,
                config_data.model_dict["data"]["tRangeTrain"],
                screen_basin_area_huc4=False,
                dam_num=0)

            sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id']
            sites_id_withdams = source_data_withdams.all_configs[
                'flow_screen_gage_id']

            if pub_plan == 1:
                sites_id_train = source_data_withoutdams.all_configs[
                    'flow_screen_gage_id']
                sites_id_test = np.intersect1d(
                    np.array(sites_id_dor1),
                    np.array(sites_id_withdams)).tolist()
            else:
                sites_id_train = np.intersect1d(
                    np.array(sites_id_dor1),
                    np.array(sites_id_withdams)).tolist()
                sites_id_test = source_data_withoutdams.all_configs[
                    'flow_screen_gage_id']

        elif pub_plan == 2 or pub_plan == 5:
            source_data_dor1 = GagesSource.choose_some_basins(
                config_data,
                config_data.model_dict["data"]["tRangeTrain"],
                screen_basin_area_huc4=False,
                DOR=dor)
            # basins without dams
            source_data_withoutdams = GagesSource.choose_some_basins(
                config_data,
                config_data.model_dict["data"]["tRangeTrain"],
                screen_basin_area_huc4=False,
                dam_num=0)

            if pub_plan == 2:
                sites_id_train = source_data_withoutdams.all_configs[
                    'flow_screen_gage_id']
                sites_id_test = source_data_dor1.all_configs[
                    'flow_screen_gage_id']
            else:
                sites_id_train = source_data_dor1.all_configs[
                    'flow_screen_gage_id']
                sites_id_test = source_data_withoutdams.all_configs[
                    'flow_screen_gage_id']

        elif pub_plan == 3 or pub_plan == 6:
            dor_1 = -dor
            dor_2 = dor
            source_data_dor1 = GagesSource.choose_some_basins(
                config_data,
                config_data.model_dict["data"]["tRangeTrain"],
                screen_basin_area_huc4=False,
                DOR=dor_1)
            # basins with dams
            source_data_withdams = GagesSource.choose_some_basins(
                config_data,
                config_data.model_dict["data"]["tRangeTrain"],
                screen_basin_area_huc4=False,
                dam_num=[1, 100000])
            sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id']
            sites_id_withdams = source_data_withdams.all_configs[
                'flow_screen_gage_id']

            source_data_dor2 = GagesSource.choose_some_basins(
                config_data,
                config_data.model_dict["data"]["tRangeTrain"],
                screen_basin_area_huc4=False,
                DOR=dor_2)

            if pub_plan == 3:
                sites_id_train = np.intersect1d(
                    np.array(sites_id_dor1),
                    np.array(sites_id_withdams)).tolist()
                sites_id_test = source_data_dor2.all_configs[
                    'flow_screen_gage_id']
            else:
                sites_id_train = source_data_dor2.all_configs[
                    'flow_screen_gage_id']
                sites_id_test = np.intersect1d(
                    np.array(sites_id_dor1),
                    np.array(sites_id_withdams)).tolist()

        else:
            print("wrong plan")
            sites_id_train = None
            sites_id_test = None

        train_sites_in_conus = np.intersect1d(conus_sites_id, sites_id_train)
        test_sites_in_conus = np.intersect1d(conus_sites_id, sites_id_test)

        if plus == 0:
            all_index_lst_train_1 = []
            # all sites come from train1 dataset
            sites_lst_train = []
            all_index_lst_test_1 = []
            sites_lst_test_1 = []
            all_index_lst_test_2 = []
            sites_lst_test_2 = []
            np.random.seed(random_seed)
            kf = KFold(n_splits=split_num,
                       shuffle=True,
                       random_state=random_seed)
            eco_name_chosen = []
            for eco_name in eco_names:
                eco_source_data = GagesSource.choose_some_basins(
                    config_data,
                    config_data.model_dict["data"]["tRangeTrain"],
                    screen_basin_area_huc4=False,
                    ecoregion=eco_name)
                eco_sites_id = eco_source_data.all_configs[
                    'flow_screen_gage_id']
                train_sites_id_inter = np.intersect1d(train_sites_in_conus,
                                                      eco_sites_id)
                test_sites_id_inter = np.intersect1d(test_sites_in_conus,
                                                     eco_sites_id)
                if train_sites_id_inter.size < split_num or test_sites_id_inter.size < 1:
                    continue
                for train, test in kf.split(train_sites_id_inter):
                    all_index_lst_train_1.append(train)
                    sites_lst_train.append(train_sites_id_inter[train])
                    all_index_lst_test_1.append(test)
                    sites_lst_test_1.append(train_sites_id_inter[test])
                    if test_sites_id_inter.size < test.size:
                        all_index_lst_test_2.append(
                            np.arange(test_sites_id_inter.size))
                        sites_lst_test_2.append(test_sites_id_inter)
                    else:
                        test2_chosen_idx = np.random.choice(
                            test_sites_id_inter.size, test.size, replace=False)
                        all_index_lst_test_2.append(test2_chosen_idx)
                        sites_lst_test_2.append(
                            test_sites_id_inter[test2_chosen_idx])
                eco_name_chosen.append(eco_name)
        elif plus == -1:
            print("camels pub, only do pub on the camels basins")
            all_index_lst_train_1 = []
            # all sites come from train1 dataset
            sites_lst_train = []
            all_index_lst_test_1 = []
            sites_lst_test_1 = []
            np.random.seed(random_seed)
            kf = KFold(n_splits=split_num,
                       shuffle=True,
                       random_state=random_seed)
            eco_name_chosen = []
            for eco_name in eco_names:
                eco_source_data = GagesSource.choose_some_basins(
                    config_data,
                    config_data.model_dict["data"]["tRangeTrain"],
                    screen_basin_area_huc4=False,
                    ecoregion=eco_name)
                eco_sites_id = eco_source_data.all_configs[
                    'flow_screen_gage_id']
                train_sites_id_inter = np.intersect1d(train_sites_in_conus,
                                                      eco_sites_id)
                if train_sites_id_inter.size < split_num:
                    continue
                for train, test in kf.split(train_sites_id_inter):
                    all_index_lst_train_1.append(train)
                    sites_lst_train.append(train_sites_id_inter[train])
                    all_index_lst_test_1.append(test)
                    sites_lst_test_1.append(train_sites_id_inter[test])
                eco_name_chosen.append(eco_name)
        elif plus == -2:
            print(
                "camels pub, only do pub on the camels basins, same with freddy's split method"
            )
            all_index_lst_train_1 = []
            # all sites come from train1 dataset
            sites_lst_train = []
            all_index_lst_test_1 = []
            sites_lst_test_1 = []
            np.random.seed(random_seed)
            kf = KFold(n_splits=split_num,
                       shuffle=True,
                       random_state=random_seed)

            for train, test in kf.split(train_sites_in_conus):
                all_index_lst_train_1.append(train)
                sites_lst_train.append(train_sites_in_conus[train])
                all_index_lst_test_1.append(test)
                sites_lst_test_1.append(train_sites_in_conus[test])
        else:
            sites_lst_train = []
            sites_lst_test_1 = []
            sites_lst_test_2 = []

            np.random.seed(random_seed)
            kf = KFold(n_splits=split_num,
                       shuffle=True,
                       random_state=random_seed)
            eco_name_chosen = []
            for eco_name in eco_names:
                eco_source_data = GagesSource.choose_some_basins(
                    config_data,
                    config_data.model_dict["data"]["tRangeTrain"],
                    screen_basin_area_huc4=False,
                    ecoregion=eco_name)
                eco_sites_id = eco_source_data.all_configs[
                    'flow_screen_gage_id']
                sites_id_inter_1 = np.intersect1d(train_sites_in_conus,
                                                  eco_sites_id)
                sites_id_inter_2 = np.intersect1d(test_sites_in_conus,
                                                  eco_sites_id)

                if sites_id_inter_1.size < sites_id_inter_2.size:
                    if sites_id_inter_1.size < split_num:
                        continue
                    for train, test in kf.split(sites_id_inter_1):
                        sites_lst_train_1 = sites_id_inter_1[train]
                        sites_lst_test_1.append(sites_id_inter_1[test])

                        chosen_lst_2 = random_choice_no_return(
                            sites_id_inter_2, [train.size, test.size])
                        sites_lst_train_2 = chosen_lst_2[0]
                        sites_lst_test_2.append(chosen_lst_2[1])

                        sites_lst_train.append(
                            np.sort(
                                np.append(sites_lst_train_1,
                                          sites_lst_train_2)))

                else:
                    if sites_id_inter_2.size < split_num:
                        continue
                    for train, test in kf.split(sites_id_inter_2):
                        sites_lst_train_2 = sites_id_inter_2[train]
                        sites_lst_test_2.append(sites_id_inter_2[test])

                        chosen_lst_1 = random_choice_no_return(
                            sites_id_inter_1, [train.size, test.size])
                        sites_lst_train_1 = chosen_lst_1[0]
                        sites_lst_test_1.append(chosen_lst_1[1])

                        sites_lst_train.append(
                            np.sort(
                                np.append(sites_lst_train_1,
                                          sites_lst_train_2)))

                eco_name_chosen.append(eco_name)
        for i in range(split_num):
            sites_ids_train_ilst = [
                sites_lst_train[j] for j in range(len(sites_lst_train))
                if j % split_num == i
            ]
            sites_ids_train_i = np.sort(
                reduce(lambda x, y: np.hstack((x, y)), sites_ids_train_ilst))
            sites_ids_test_ilst_1 = [
                sites_lst_test_1[j] for j in range(len(sites_lst_test_1))
                if j % split_num == i
            ]
            sites_ids_test_i_1 = np.sort(
                reduce(lambda x, y: np.hstack((x, y)), sites_ids_test_ilst_1))

            if plus >= 0:
                sites_ids_test_ilst_2 = [
                    sites_lst_test_2[j] for j in range(len(sites_lst_test_2))
                    if j % split_num == i
                ]
                sites_ids_test_i_2 = np.sort(
                    reduce(lambda x, y: np.hstack((x, y)),
                           sites_ids_test_ilst_2))
            config_data_i = GagesConfig.set_subdir(cfg, str(i))

            gages_model_train_i = GagesModel.update_data_model(
                config_data_i,
                data_model_train,
                sites_id_update=sites_ids_train_i,
                data_attr_update=True,
                screen_basin_area_huc4=False)
            gages_model_test_baseline_i = GagesModel.update_data_model(
                config_data_i,
                data_model_test,
                sites_id_update=sites_ids_train_i,
                data_attr_update=True,
                train_stat_dict=gages_model_train_i.stat_dict,
                screen_basin_area_huc4=False)
            gages_model_test_i_1 = GagesModel.update_data_model(
                config_data_i,
                data_model_test,
                sites_id_update=sites_ids_test_i_1,
                data_attr_update=True,
                train_stat_dict=gages_model_train_i.stat_dict,
                screen_basin_area_huc4=False)
            if plus >= 0:
                gages_model_test_i_2 = GagesModel.update_data_model(
                    config_data_i,
                    data_model_test,
                    sites_id_update=sites_ids_test_i_2,
                    data_attr_update=True,
                    train_stat_dict=gages_model_train_i.stat_dict,
                    screen_basin_area_huc4=False)
            save_datamodel(gages_model_train_i,
                           data_source_file_name='data_source.txt',
                           stat_file_name='Statistics.json',
                           flow_file_name='flow',
                           forcing_file_name='forcing',
                           attr_file_name='attr',
                           f_dict_file_name='dictFactorize.json',
                           var_dict_file_name='dictAttribute.json',
                           t_s_dict_file_name='dictTimeSpace.json')
            save_datamodel(gages_model_test_baseline_i,
                           data_source_file_name='test_data_source_base.txt',
                           stat_file_name='test_Statistics_base.json',
                           flow_file_name='test_flow_base',
                           forcing_file_name='test_forcing_base',
                           attr_file_name='test_attr_base',
                           f_dict_file_name='test_dictFactorize_base.json',
                           var_dict_file_name='test_dictAttribute_base.json',
                           t_s_dict_file_name='test_dictTimeSpace_base.json')
            save_datamodel(gages_model_test_i_1,
                           data_source_file_name='test_data_source.txt',
                           stat_file_name='test_Statistics.json',
                           flow_file_name='test_flow',
                           forcing_file_name='test_forcing',
                           attr_file_name='test_attr',
                           f_dict_file_name='test_dictFactorize.json',
                           var_dict_file_name='test_dictAttribute.json',
                           t_s_dict_file_name='test_dictTimeSpace.json')
            if plus >= 0:
                save_datamodel(gages_model_test_i_2,
                               data_source_file_name='test_data_source_2.txt',
                               stat_file_name='test_Statistics_2.json',
                               flow_file_name='test_flow_2',
                               forcing_file_name='test_forcing_2',
                               attr_file_name='test_attr_2',
                               f_dict_file_name='test_dictFactorize_2.json',
                               var_dict_file_name='test_dictAttribute_2.json',
                               t_s_dict_file_name='test_dictTimeSpace_2.json')
            print("save ecoregion " + str(i) + " data model")
    with torch.cuda.device(gpu_num):
        if train_mode:
            for i in range(split_num):
                data_model = GagesModel.load_datamodel(
                    config_data.data_path["Temp"],
                    str(i),
                    data_source_file_name='data_source.txt',
                    stat_file_name='Statistics.json',
                    flow_file_name='flow.npy',
                    forcing_file_name='forcing.npy',
                    attr_file_name='attr.npy',
                    f_dict_file_name='dictFactorize.json',
                    var_dict_file_name='dictAttribute.json',
                    t_s_dict_file_name='dictTimeSpace.json')
                master_train(data_model, random_seed=random_seed)
        for i in range(split_num):
            data_model_baseline = GagesModel.load_datamodel(
                config_data.data_path["Temp"],
                str(i),
                data_source_file_name='test_data_source_base.txt',
                stat_file_name='test_Statistics_base.json',
                flow_file_name='test_flow_base.npy',
                forcing_file_name='test_forcing_base.npy',
                attr_file_name='test_attr_base.npy',
                f_dict_file_name='test_dictFactorize_base.json',
                var_dict_file_name='test_dictAttribute_base.json',
                t_s_dict_file_name='test_dictTimeSpace_base.json')
            data_model = GagesModel.load_datamodel(
                config_data.data_path["Temp"],
                str(i),
                data_source_file_name='test_data_source.txt',
                stat_file_name='test_Statistics.json',
                flow_file_name='test_flow.npy',
                forcing_file_name='test_forcing.npy',
                attr_file_name='test_attr.npy',
                f_dict_file_name='test_dictFactorize.json',
                var_dict_file_name='test_dictAttribute.json',
                t_s_dict_file_name='test_dictTimeSpace.json')
            if plus >= 0:
                data_model_2 = GagesModel.load_datamodel(
                    config_data.data_path["Temp"],
                    str(i),
                    data_source_file_name='test_data_source_2.txt',
                    stat_file_name='test_Statistics_2.json',
                    flow_file_name='test_flow_2.npy',
                    forcing_file_name='test_forcing_2.npy',
                    attr_file_name='test_attr_2.npy',
                    f_dict_file_name='test_dictFactorize_2.json',
                    var_dict_file_name='test_dictAttribute_2.json',
                    t_s_dict_file_name='test_dictTimeSpace_2.json')
            pred_baseline, obs_baseline = master_test(data_model_baseline,
                                                      epoch=test_epoch,
                                                      save_file_suffix="base")
            basin_area_baseline = data_model_baseline.data_source.read_attr(
                data_model_baseline.t_s_dict["sites_id"], ['DRAIN_SQKM'],
                is_return_dict=False)
            mean_prep_baseline = data_model_baseline.data_source.read_attr(
                data_model_baseline.t_s_dict["sites_id"], ['PPTAVG_BASIN'],
                is_return_dict=False)
            mean_prep_baseline = mean_prep_baseline / 365 * 10
            pred_baseline = _basin_norm(pred_baseline,
                                        basin_area_baseline,
                                        mean_prep_baseline,
                                        to_norm=False)
            obs_baseline = _basin_norm(obs_baseline,
                                       basin_area_baseline,
                                       mean_prep_baseline,
                                       to_norm=False)
            save_result(
                data_model_baseline.data_source.data_config.data_path['Temp'],
                test_epoch,
                pred_baseline,
                obs_baseline,
                pred_name='flow_pred_base',
                obs_name='flow_obs_base')

            pred, obs = master_test(data_model, epoch=test_epoch)
            basin_area = data_model.data_source.read_attr(
                data_model.t_s_dict["sites_id"], ['DRAIN_SQKM'],
                is_return_dict=False)
            mean_prep = data_model.data_source.read_attr(
                data_model.t_s_dict["sites_id"], ['PPTAVG_BASIN'],
                is_return_dict=False)
            mean_prep = mean_prep / 365 * 10
            pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False)
            obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False)
            save_result(data_model.data_source.data_config.data_path['Temp'],
                        test_epoch, pred, obs)
            if plus >= 0:
                pred_2, obs_2 = master_test(data_model_2,
                                            epoch=test_epoch,
                                            save_file_suffix="2")
                basin_area_2 = data_model_2.data_source.read_attr(
                    data_model_2.t_s_dict["sites_id"], ['DRAIN_SQKM'],
                    is_return_dict=False)
                mean_prep_2 = data_model_2.data_source.read_attr(
                    data_model_2.t_s_dict["sites_id"], ['PPTAVG_BASIN'],
                    is_return_dict=False)
                mean_prep_2 = mean_prep_2 / 365 * 10
                pred_2 = _basin_norm(pred_2,
                                     basin_area_2,
                                     mean_prep_2,
                                     to_norm=False)
                obs_2 = _basin_norm(obs_2,
                                    basin_area_2,
                                    mean_prep_2,
                                    to_norm=False)
                save_result(
                    data_model_2.data_source.data_config.data_path['Temp'],
                    test_epoch,
                    pred_2,
                    obs_2,
                    pred_name='flow_pred_2',
                    obs_name='flow_obs_2')