def infer_now(self, some_date, csv_path=None, save_buck=None, save_name=None): forecast_history = self.inference_params["dataset_params"][ "forecast_history"] self.inference_params["datetime_start"] = some_date if csv_path: self.inference_params["test_csv_path"] = csv_path self.inference_params["dataset_params"]["file_path"] = csv_path df, tensor, history, forecast_start, test, samples = infer_on_torch_model( self.model, **self.inference_params) if test.scale: unscaled = test.inverse_scale(tensor.numpy().reshape(-1, 1)) df["preds"][forecast_history:] = unscaled.numpy()[:, 0] if len(samples.columns) > 1: index = samples.index if hasattr(test, "targ_scaler"): samples = test.inverse_scale(samples) samples = pd.DataFrame(samples.numpy(), index=index) samples[:forecast_history] = 0 if save_buck: df.to_csv("temp3.csv") upload_file(save_buck, save_name, "temp3.csv", self.model.gcs_client) return df, tensor, history, forecast_start, test, samples
def upload_gcs(self, save_path: str, name: str, file_type: str, epoch=0, bucket_name=None): """ Function to upload model checkpoints to GCS """ if self.gcs_client: if bucket_name is None: bucket_name = os.environ["MODEL_BUCKET"] print("Data saved to: ") print(name) upload_file(bucket_name, os.path.join("experiments", name), save_path, self.gcs_client) online_path = os.path.join("gs://", bucket_name, "experiments", name) if self.wandb: wandb.config.update({"gcs_m_path_" + str(epoch) + file_type: online_path})
def infer_now(self, some_date: datetime, csv_path=None, save_buck=None, save_name=None, use_torch_script=False): """Performs inference on a CSV file at a specified datatime :param some_date: The date you want inference to begin on. :param csv_path: A path to a CSV you want to perform inference on, defaults to None :type csv_path: str, optional :param save_buck: The GCP bucket where you want to save predictions, defaults to None :type save_buck: str, optional :param save_name: The name of the file to save the Pandas data-frame to GCP as, defaults to None :type save_name: str, optional :param use_torch_script: Optional parameter which allows you to use a saved torch script version of your model. :return: Returns a tuple consisting of the Pandas dataframe with predictions + history, the prediction tensor, a tensor of the historical values, the forecast start index, the test loader, and the a dataframe of the prediction samples (e.g. the confidence interval preds) :rtype: tuple(pd.DataFrame, torch.Tensor, int, CSVTestLoader, pd.DataFrame) """ forecast_history = self.inference_params["dataset_params"][ "forecast_history"] self.inference_params["datetime_start"] = some_date if csv_path: self.inference_params["test_csv_path"] = csv_path self.inference_params["dataset_params"]["file_path"] = csv_path df, tensor, history, forecast_start, test, samples = infer_on_torch_model( self.model, **self.inference_params) if test.scale and self.n_targets: for i in range(0, self.n_targets): unscaled = test.inverse_scale(tensor.numpy()) df["pred_" + self.targ_cols[i]] = 0 print("Shape of unscaled is: ") print(unscaled.shape) df["pred_" + self.targ_cols[i]][forecast_history:] = unscaled[0, :, i].numpy() elif test.scale: unscaled = test.inverse_scale(tensor.numpy().reshape(-1, 1)) df["preds"][forecast_history:] = unscaled.numpy()[:, 0] if len(samples) > 0: for i in range(0, len(samples)): samples[i][:forecast_history] = 0 if save_buck: df.to_csv("temp3.csv") upload_file(save_buck, save_name, "temp3.csv", self.model.gcs_client) return df, tensor, history, forecast_start, test, samples
def infer_now(self, some_date, csv_path=None, save_buck=None, save_name=None): self.inference_params["datetime_start"] = some_date if csv_path: self.inference_params["test_csv_path"] = csv_path self.inference_params["dataset_params"]["file_path"] = csv_path df, tensor, history, forecast_start, test, samples = infer_on_torch_model(self.model, **self.inference_params) if self.model.test_data.scale: unscaled = self.model.test_data.inverse_scale(df["preds"].values.reshape(-1, 1).astype('float64')) df["preds"] = unscaled[:, 0] if len(samples.columns) > 1: samples = pd.DataFrame(self.model.test_data.inverse_scale(samples), index=samples.index) if save_buck: df.to_csv("temp3.csv") upload_file(save_buck, save_name, "temp3.csv", self.model.gcs_client) return df, tensor, history, forecast_start, test, samples
def infer_now(self, some_date, csv_path=None, save_buck=None, save_name=None): self.inference_params["datetime_start"] = some_date if csv_path: self.inference_params["test_csv_path"] = csv_path self.inference_params["dataset_params"]["file_path"] = csv_path df, tensor, history, forecast_start, test, samples = infer_on_torch_model( self.model, **self.inference_params) if save_buck: df.to_csv("temp3.csv") upload_file(save_buck, save_name, "temp3.csv", self.model.gcs_client) return df, tensor, history, forecast_start, test, samples
def infer_now(self, some_date: datetime, csv_path=None, save_buck=None, save_name=None, use_torch_script=False): """Performs inference at a specified datatime :param some_date: The date you want inference to begin on. :param csv_path: [description], defaults to None :type csv_path: [type], optional :param save_buck: [description], defaults to None :type save_buck: [type], optional :param save_name: The name of the file to save the Pandas data-frame to GCP as, defaults to None :type save_name: [type], optional :return: Returns a tuple consisting of the Pandas dataframe with predictions + history, the prediction tensor, a tensor of the historical values, the forecast start index, and the test :rtype: [type] """ forecast_history = self.inference_params["dataset_params"][ "forecast_history"] self.inference_params["datetime_start"] = some_date if csv_path: self.inference_params["test_csv_path"] = csv_path self.inference_params["dataset_params"]["file_path"] = csv_path df, tensor, history, forecast_start, test, samples = infer_on_torch_model( self.model, **self.inference_params) if test.scale: unscaled = test.inverse_scale(tensor.numpy().reshape(-1, 1)) df["preds"][forecast_history:] = unscaled.numpy()[:, 0] if len(samples) > 1: samples[:forecast_history] = 0 if save_buck: df.to_csv("temp3.csv") upload_file(save_buck, save_name, "temp3.csv", self.model.gcs_client) return df, tensor, history, forecast_start, test, samples
def create_usgs(meta_data_dir: str, precip_path: str, start: int, end: int): gage_list = sorted(os.listdir(meta_data_dir)) exceptions = {} client = get_storage_client() for i in range(start, end): try: file_name = gage_list[i] gage_id = file_name.split("stations")[0] with open(os.path.join(meta_data_dir, file_name)) as f: print(os.path.join(meta_data_dir, file_name)) data = json.load(f) if len(gage_id) == 7: gage_id = "0" + gage_id raw_df = make_usgs_data( datetime(2014, 1, 1), datetime(2019, 1, 1), gage_id ) else: raw_df = make_usgs_data( datetime(2014, 1, 1), datetime(2019, 1, 1), gage_id ) df, max_flow, min_flow = process_intermediate_csv(raw_df) data["time_zone_code"] = df["tz_cd"].iloc[0] data["max_flow"] = max_flow data["min_flow"] = min_flow precip_df = pd.read_csv( os.path.join( precip_path, data["stations"][0]["station_id"] + ".csv" ) ) fixed_df, nan_flow, nan_precip = combine_data(df, precip_df) data["nan_flow"] = nan_flow data["nan_precip"] = nan_precip joined_name = ( str(gage_id) + data["stations"][0]["station_id"] + "_flow.csv" ) joined_upload = "joined/" + joined_name meta_path = os.path.join(meta_data_dir, file_name) data["files"] = [joined_name] fixed_df.to_csv(joined_name) with open(meta_path, "w") as f: json.dump(data, f) upload_file("predict_cfs", "meta2/" + file_name, meta_path, client) upload_file("predict_cfs", joined_upload, joined_name, client) except Exception as e: exceptions[str(gage_id)] = str(e) with open("exceptions.json", "w+") as a: json.dump(exceptions, a) print("exception") upload_file( "predict_cfs", "meta2/" + "exceptions.json", "exceptions.json", client, )