Exemplo n.º 1
0
def create_usgs(meta_data_dir: str, precip_path: str, start: int, end: int):
    gage_list = sorted(os.listdir(meta_data_dir))
    exceptions = {}
    client = get_storage_client()
    for i in range(start, end):
        try:
            file_name = gage_list[i]
            gage_id = file_name.split("stations")[0]
            with open(os.path.join(meta_data_dir, file_name)) as f:
                print(os.path.join(meta_data_dir, file_name))
                data = json.load(f)
            if len(gage_id) == 7:
                gage_id = "0" + gage_id
                raw_df = make_usgs_data(
                    datetime(2014, 1, 1), datetime(2019, 1, 1), gage_id
                )
            else:
                raw_df = make_usgs_data(
                    datetime(2014, 1, 1), datetime(2019, 1, 1), gage_id
                )
            df, max_flow, min_flow = process_intermediate_csv(raw_df)
            data["time_zone_code"] = df["tz_cd"].iloc[0]
            data["max_flow"] = max_flow
            data["min_flow"] = min_flow
            precip_df = pd.read_csv(
                os.path.join(
                    precip_path, data["stations"][0]["station_id"] + ".csv"
                )
            )
            fixed_df, nan_flow, nan_precip = combine_data(df, precip_df)
            data["nan_flow"] = nan_flow
            data["nan_precip"] = nan_precip
            joined_name = (
                str(gage_id) + data["stations"][0]["station_id"] + "_flow.csv"
            )
            joined_upload = "joined/" + joined_name
            meta_path = os.path.join(meta_data_dir, file_name)
            data["files"] = [joined_name]
            fixed_df.to_csv(joined_name)
            with open(meta_path, "w") as f:
                json.dump(data, f)
            upload_file("predict_cfs", "meta2/" + file_name, meta_path, client)
            upload_file("predict_cfs", joined_upload, joined_name, client)
        except Exception as e:
            exceptions[str(gage_id)] = str(e)
            with open("exceptions.json", "w+") as a:
                json.dump(exceptions, a)
            print("exception")
            upload_file(
                "predict_cfs",
                "meta2/" + "exceptions.json",
                "exceptions.json",
                client,
            )
Exemplo n.º 2
0
 def __init__(self, model_base: str, training_data: str,
              validation_data: str, test_data: str, params: Dict):
     self.params = params
     if "weight_path" in params:
         self.model = self.load_model(model_base, params["model_params"],
                                      params["weight_path"])
     else:
         self.model = self.load_model(model_base, params["model_params"])
     self.training = self.make_data_load(training_data,
                                         params["dataset_params"], "train")
     self.validation = self.make_data_load(validation_data,
                                           params["dataset_params"],
                                           "valid")
     self.test_data = self.make_data_load(test_data,
                                          params["dataset_params"], "test")
     if "GCS" in self.params and self.params["GCS"] != False:
         self.gcs_client = get_storage_client()
     else:
         self.gcs_client = None
     self.wandb = self.wandb_init()
Exemplo n.º 3
0
 def __init__(
         self,
         model_base: str,
         training_data: str,
         validation_data: str,
         test_data: str,
         params: Dict):
     self.params = params
     if "weight_path" in params:
         params["weight_path"] = get_data(params["weight_path"])
         self.model = self.load_model(model_base, params["model_params"], params["weight_path"])
     else:
         self.model = self.load_model(model_base, params["model_params"])
     params["dataset_params"]["forecast_test_len"] = params["inference_params"]["hours_to_forecast"]
     self.training = self.make_data_load(training_data, params["dataset_params"], "train")
     self.validation = self.make_data_load(validation_data, params["dataset_params"], "valid")
     self.test_data = self.make_data_load(test_data, params["dataset_params"], "test")
     if "GCS" in self.params and self.params["GCS"]:
         self.gcs_client = get_storage_client()
     else:
         self.gcs_client = None
     self.wandb = self.wandb_init()