def __init__(self, hours_to_forecast: int, num_prediction_samples: int, model_params, csv_path: str, weight_path, wandb_proj: str = None): """ Class to handle inference for models. """ self.hours_to_forecast = hours_to_forecast self.csv_path = csv_path self.model = load_model(model_params.copy(), csv_path, weight_path) self.inference_params = model_params["inference_params"] if "scaling" in self.inference_params["dataset_params"]: s = scaling_function( {}, self.inference_params["dataset_params"])["scaling"] self.inference_params["dataset_params"]["scaling"] = s self.inference_params["hours_to_forecast"] = hours_to_forecast self.inference_params[ "num_prediction_samples"] = num_prediction_samples if wandb_proj: date = datetime.now() wandb.init(name=date.strftime("%H-%M-%D-%Y") + "_prod", project=wandb_proj) wandb.config.update(model_params)
def __init__(self, forecast_steps: int, num_prediction_samples: int, model_params, csv_path: str, weight_path, wandb_proj: str = None, torch_script=False): """Class to handle inference for models, :param forecasts_steps: Number of time-steps to forecast (doesn't have to be hours) :type forecast_steps: int :param num_prediction_samples: Number of prediction samples :type num_prediction_samples: int :param model_params: A dictionary of model parameters (ideally this should come from saved JSON config file) :type model_params: Dict :param csv_path: Path to the CSV test file you want to be used for inference. Evem of you aren't using :type csv_path: str :param weight_path: Path to the model weights :type weight_path: str :param wandb_proj: The name of the WB project leave blank if you don't want to log to Wandb, defaults to None :type wandb_proj: str, optionals """ self.hours_to_forecast = forecast_steps self.csv_path = csv_path self.n_targets = model_params.get("n_targets") self.targ_cols = model_params["dataset_params"]["target_col"] self.model = load_model(model_params.copy(), csv_path, weight_path) self.inference_params = model_params["inference_params"] if "scaling" in self.inference_params["dataset_params"]: s = scaling_function({}, self.inference_params["dataset_params"])["scaling"] self.inference_params["dataset_params"]["scaling"] = s self.inference_params["hours_to_forecast"] = forecast_steps self.inference_params["num_prediction_samples"] = num_prediction_samples if wandb_proj: date = datetime.now() wandb.init(name=date.strftime("%H-%M-%D-%Y") + "_prod", project=wandb_proj) wandb.config.update(model_params, allow_val_change=True)
def __init__(self, hours_to_forecast: int, num_prediction_samples: int, model_params, csv_path: str, weight_path, wandb_proj: str = None, torch_script=False): """Class to handle inference for models :param hours_to_forecast: [description] :type hours_to_forecast: int :param num_prediction_samples: [description] :type num_prediction_samples: int :param model_params: [description] :type model_params: [type] :param csv_path: [description] :type csv_path: str :param weight_path: [description] :type weight_path: [type] :param wandb_proj: [description], defaults to None :type wandb_proj: str, optional """ self.hours_to_forecast = hours_to_forecast self.csv_path = csv_path self.model = load_model(model_params.copy(), csv_path, weight_path) self.inference_params = model_params["inference_params"] if "scaling" in self.inference_params["dataset_params"]: s = scaling_function( {}, self.inference_params["dataset_params"])["scaling"] self.inference_params["dataset_params"]["scaling"] = s self.inference_params["hours_to_forecast"] = hours_to_forecast self.inference_params[ "num_prediction_samples"] = num_prediction_samples if wandb_proj: date = datetime.now() wandb.init(name=date.strftime("%H-%M-%D-%Y") + "_prod", project=wandb_proj) wandb.config.update(model_params)
def train_function(model_type: str, params: Dict): """ Function to train a Model(TimeSeriesModel) or da_rnn. Will return the trained model model_type str: Type of the model (for now) must be da_rnn or :params dict: Dictionary containing all the parameters needed to run the model """ dataset_params = params["dataset_params"] if model_type == "da_rnn": from flood_forecast.da_rnn.train_da import da_rnn, train from flood_forecast.preprocessing.preprocess_da_rnn import make_data preprocessed_data = make_data( params["dataset_params"]["training_path"], params["dataset_params"]["target_col"], params["dataset_params"]["forecast_length"]) config, model = da_rnn(preprocessed_data, len(dataset_params["target_col"])) # All train functions return trained_model trained_model = train(model, preprocessed_data, config) elif model_type == "PyTorch": trained_model = PyTorchForecast(params["model_name"], dataset_params["training_path"], dataset_params["validation_path"], dataset_params["test_path"], params) train_transformer_style(trained_model, params["training_params"], params["forward_params"]) # To do delete if "scaler" in dataset_params: if "scaler_params" in dataset_params: params["inference_params"]["dataset_params"][ "scaling"] = scaling_function({}, dataset_params)["scaling"] else: params["inference_params"]["dataset_params"][ "scaling"] = scaling_function({}, dataset_params)["scaling"] test_acc = evaluate_model(trained_model, model_type, params["dataset_params"]["target_col"], params["metrics"], params["inference_params"], {}) wandb.run.summary["test_accuracy"] = test_acc[0] df_train_and_test = test_acc[1] forecast_start_idx = test_acc[2] df_prediction_samples = test_acc[3] mae = (df_train_and_test.loc[forecast_start_idx:, "preds"] - df_train_and_test.loc[forecast_start_idx:, params["dataset_params"]["target_col"][0]] ).abs() inverse_mae = 1 / mae pred_std = df_prediction_samples.std(axis=1) average_prediction_sharpe = (inverse_mae / pred_std).mean() wandb.log({'average_prediction_sharpe': average_prediction_sharpe}) # Log plots if "probabilistic" in params["inference_params"]: test_plot = plot_df_test_with_probabilistic_confidence_interval( df_train_and_test, forecast_start_idx, params, ) else: test_plot = plot_df_test_with_confidence_interval( df_train_and_test, df_prediction_samples, forecast_start_idx, params, ci=95, alpha=0.25) wandb.log({"test_plot": test_plot}) test_plot_all = go.Figure() for relevant_col in params["dataset_params"]["relevant_cols"]: test_plot_all.add_trace( go.Scatter(x=df_train_and_test.index, y=df_train_and_test[relevant_col], name=relevant_col)) wandb.log({"test_plot_all": test_plot_all}) else: raise Exception("Please supply valid model type for forecasting") return trained_model
def train_function(model_type: str, params: Dict): """Function to train a Model(TimeSeriesModel) or da_rnn. Will return the trained model :param model_type: Type of the model. In almost all cases this will be 'PyTorch' :type model_type: str :param params: Dictionary containing all the parameters needed to run the model :type Dict: """ dataset_params = params["dataset_params"] if model_type == "da_rnn": from flood_forecast.da_rnn.train_da import da_rnn, train from flood_forecast.preprocessing.preprocess_da_rnn import make_data preprocessed_data = make_data( params["dataset_params"]["training_path"], params["dataset_params"]["target_col"], params["dataset_params"]["forecast_length"]) config, model = da_rnn(preprocessed_data, len(dataset_params["target_col"])) # All train functions return trained_model trained_model = train(model, preprocessed_data, config) elif model_type == "PyTorch": trained_model = PyTorchForecast(params["model_name"], dataset_params["training_path"], dataset_params["validation_path"], dataset_params["test_path"], params) takes_target = False if "takes_target" in trained_model.params: takes_target = trained_model.params["takes_target"] train_transformer_style(model=trained_model, training_params=params["training_params"], takes_target=takes_target, forward_params=params["forward_params"]) # To do delete if "scaler" in dataset_params: if "scaler_params" in dataset_params: params["inference_params"]["dataset_params"][ "scaling"] = scaling_function({}, dataset_params)["scaling"] else: params["inference_params"]["dataset_params"][ "scaling"] = scaling_function({}, dataset_params)["scaling"] params["inference_params"]["dataset_params"].pop( 'scaler_params', None) test_acc = evaluate_model(trained_model, model_type, params["dataset_params"]["target_col"], params["metrics"], params["inference_params"], {}) wandb.run.summary["test_accuracy"] = test_acc[0] df_train_and_test = test_acc[1] forecast_start_idx = test_acc[2] df_prediction_samples = test_acc[3] mae = (df_train_and_test.loc[forecast_start_idx:, "preds"] - df_train_and_test.loc[forecast_start_idx:, params["dataset_params"]["target_col"][0]] ).abs() inverse_mae = 1 / mae i = 0 for df in df_prediction_samples: pred_std = df.std(axis=1) average_prediction_sharpe = (inverse_mae / pred_std).mean() wandb.log({ 'average_prediction_sharpe' + str(i): average_prediction_sharpe }) i += 1 df_train_and_test.to_csv("temp_preds.csv") # Log plots now if "probabilistic" in params["inference_params"]: test_plot = plot_df_test_with_probabilistic_confidence_interval( df_train_and_test, forecast_start_idx, params, ) elif len(df_prediction_samples) > 0: for thing in zip(df_prediction_samples, params["dataset_params"]["target_col"]): thing[0].to_csv(thing[1] + ".csv") test_plot = plot_df_test_with_confidence_interval( df_train_and_test, thing[0], forecast_start_idx, params, targ_col=thing[1], ci=95, alpha=0.25) wandb.log({"test_plot_" + thing[1]: test_plot}) else: pd.options.plotting.backend = "plotly" t = params["dataset_params"]["target_col"][0] test_plot = df_train_and_test[[t, "preds"]].plot() wandb.log({"test_plot_" + t: test_plot}) print("Now plotting final plots") test_plot_all = go.Figure() for relevant_col in params["dataset_params"]["relevant_cols"]: test_plot_all.add_trace( go.Scatter(x=df_train_and_test.index, y=df_train_and_test[relevant_col], name=relevant_col)) wandb.log({"test_plot_all": test_plot_all}) else: raise Exception("Please supply valid model type for forecasting") return trained_model
def train_function(model_type: str, params: Dict) -> PyTorchForecast: """Function to train a Model(TimeSeriesModel) or da_rnn. Will return the trained model :param model_type: Type of the model. In almost all cases this will be 'PyTorch' :type model_type: str :param params: Dictionary containing all the parameters needed to run the model :type Dict: :return: A trained model .. code-block:: python with open("model_config.json") as f: params_dict = json.load(f) train_function("PyTorch", params_dict) ... For information on what this params_dict should include see `Confluence pages <https://flow-forecast.atlassian.net/wiki/spaces/FF/pages/92864513/Getting+Started>`_ on training models. """ dataset_params = params["dataset_params"] if model_type == "da_rnn": from flood_forecast.da_rnn.train_da import da_rnn, train from flood_forecast.preprocessing.preprocess_da_rnn import make_data preprocessed_data = make_data( params["dataset_params"]["training_path"], params["dataset_params"]["target_col"], params["dataset_params"]["forecast_length"]) config, model = da_rnn(preprocessed_data, len(dataset_params["target_col"])) # All train functions return trained_model trained_model = train(model, preprocessed_data, config) elif model_type == "PyTorch": dataset_params["batch_size"] = params["training_params"]["batch_size"] trained_model = PyTorchForecast(params["model_name"], dataset_params["training_path"], dataset_params["validation_path"], dataset_params["test_path"], params) class2 = False if trained_model.params["dataset_params"][ "class"] != "GeneralClassificationLoader" else True takes_target = False if "takes_target" in trained_model.params: takes_target = trained_model.params["takes_target"] if "inference_params" in trained_model.params: if "dataset_params" not in trained_model.params[ "inference_params"]: print("Using generic dataset params") trained_model.params["inference_params"][ "dataset_params"] = trained_model.params[ "dataset_params"].copy() del trained_model.params["inference_params"]["dataset_params"][ "class"] # noqa: F501 trained_model.params["inference_params"]["dataset_params"][ "interpolate_param"] = trained_model.params[ "inference_params"]["dataset_params"].pop( "interpolate") trained_model.params["inference_params"]["dataset_params"][ "scaling"] = trained_model.params["inference_params"][ "dataset_params"].pop("scaler") if "feature_param" in trained_model.params["dataset_params"]: trained_model.params["inference_params"]["dataset_params"][ "feature_params"] = trained_model.params[ "inference_params"]["dataset_params"].pop( "feature_param") delete_params = [ "num_workers", "pin_memory", "train_start", "train_end", "valid_start", "valid_end", "test_start", "test_end", "training_path", "validation_path", "test_path", "batch_size" ] for param in delete_params: if param in trained_model.params["inference_params"][ "dataset_params"]: del trained_model.params["inference_params"][ "dataset_params"][param] train_transformer_style(model=trained_model, training_params=params["training_params"], takes_target=takes_target, forward_params={}, class2=class2) if "scaler" in dataset_params and "inference_params" in params: if "scaler_params" in dataset_params: params["inference_params"]["dataset_params"][ "scaling"] = scaling_function({}, dataset_params)["scaling"] else: params["inference_params"]["dataset_params"][ "scaling"] = scaling_function({}, dataset_params)["scaling"] params["inference_params"]["dataset_params"].pop( 'scaler_params', None) # TODO Move to other func if params["dataset_params"]["class"] != "GeneralClassificationLoader": handle_model_evaluation1(trained_model, params, model_type) else: raise Exception( "Please supply valid model type for forecasting or classification") return trained_model