Exemplo n.º 1
0
 def __init__(self,
              df_path: str,
              forecast_total: int,
              use_real_precip=True,
              use_real_temp=True,
              target_supplied=True,
              interpolate=False,
              sort_column_clone=None,
              **kwargs):
     """
     :param str df_path:
     A data loader for the test data.
     """
     super().__init__(**kwargs)
     df_path = get_data(df_path)
     self.original_df = pd.read_csv(df_path)
     if interpolate:
         self.original_df = interpolate_dict[interpolate["method"]](
             self.original_df, **interpolate["params"])
     if sort_column_clone:
         self.original_df = self.original_df.sort_values(
             by=sort_column_clone)
     print("CSV Path below")
     print(df_path)
     self.forecast_total = forecast_total
     self.use_real_temp = use_real_temp
     self.use_real_precip = use_real_precip
     self.target_supplied = target_supplied
     # Convert back to datetime and save index
     self.original_df["datetime"] = self.original_df["datetime"].astype(
         "datetime64[ns]")
     self.original_df["original_index"] = self.original_df.index
     if len(self.relevant_cols3) > 0:
         self.original_df[self.relevant_cols3] = self.df[
             self.relevant_cols3]
Exemplo n.º 2
0
 def __init__(
         self,
         model_base: str,
         training_data: str,
         validation_data: str,
         test_data: str,
         params: Dict):
     self.params = params
     if "weight_path" in params:
         params["weight_path"] = get_data(params["weight_path"])
         self.model = self.load_model(model_base, params["model_params"], params["weight_path"])
     else:
         self.model = self.load_model(model_base, params["model_params"])
     params["dataset_params"]["forecast_test_len"] = params["inference_params"]["hours_to_forecast"]
     self.training = self.make_data_load(training_data, params["dataset_params"], "train")
     self.validation = self.make_data_load(validation_data, params["dataset_params"], "valid")
     self.test_data = self.make_data_load(test_data, params["dataset_params"], "test")
     if "GCS" in self.params and self.params["GCS"]:
         self.gcs_client = get_storage_client()
     else:
         self.gcs_client = None
     self.wandb = self.wandb_init()
Exemplo n.º 3
0
    def __init__(self,
                 file_path: str,
                 forecast_history: int,
                 forecast_length: int,
                 target_col: List,
                 relevant_cols: List,
                 scaling=None,
                 start_stamp: int = 0,
                 end_stamp: int = None,
                 gcp_service_key: Optional[str] = None,
                 interpolate_param: bool = True,
                 sort_column=None,
                 feature_params=None):
        """
        A data loader that takes a CSV file and properly batches for use in training/eval a PyTorch model
        :param file_path: The path to the CSV file you wish to use.
        :param forecast_history: This is the length of the historical time series data you wish to
                                utilize for forecasting
        :param forecast_length: The number of time steps to forecast ahead (for transformer this must
                                equal history_length)
        :param relevant_cols: Supply column names you wish to predict in the forecast (others will not be used)
        :param target_col: The target column or columns you to predict. If you only have one still use a list ['cfs']
        :param scaling: (highly reccomended) If provided should be a subclass of sklearn.base.BaseEstimator
        and sklearn.base.TransformerMixin) i.e StandardScaler,  MaxAbsScaler, MinMaxScaler, etc) Note without
        a scaler the loss is likely to explode and cause infinite loss which will corrupt weights
        :param start_stamp int: Optional if you want to only use part of a CSV for training, validation
                                or testing supply these
        :param end_stamp int: Optional if you want to only use part of a CSV for training, validation,
                            or testing supply these
        :param sort_column str: The column to sort the time series on prior to forecast.
        """
        super().__init__()
        interpolate = interpolate_param
        self.forecast_history = forecast_history
        self.forecast_length = forecast_length
        print("interpolate should be below")
        self.local_file_path = get_data(file_path, gcp_service_key)
        df = pd.read_csv(self.local_file_path)
        relevant_cols3 = []
        if sort_column:
            df[sort_column] = pd.to_datetime(df[sort_column])
            df = df.sort_values(by=sort_column)
            if feature_params:
                df, relevant_cols3 = feature_fix(feature_params, sort_column,
                                                 df)
                print("Relevant cols are")
        print(relevant_cols3)
        self.relevant_cols3 = relevant_cols3
        if interpolate:
            interpolated_df = interpolate_dict[interpolate["method"]](
                df, **interpolate["params"])
            self.df = interpolated_df[relevant_cols + relevant_cols3]
        else:
            self.df = df[relevant_cols + relevant_cols3]
        print("Now loading" + file_path)
        self.original_df = df
        self.scale = None
        if start_stamp != 0 and end_stamp is not None:
            self.df = self.df[start_stamp:end_stamp]
        elif start_stamp != 0:
            self.df = self.df[start_stamp:]
        elif end_stamp is not None:
            self.df = self.df[:end_stamp]
        if scaling is not None:
            print("scaling now")
            self.scale = scaling
            temp_df = self.scale.fit_transform(self.df[relevant_cols])
            # We define a second scaler to scale the end output
            # back to normal as models might not necessarily predict
            # other present time series values.
            targ_scale_class = self.scale.__class__
            self.targ_scaler = targ_scale_class()
            print(len(target_col))
            if len(target_col) == 1:
                self.targ_scaler.fit_transform(
                    self.df[target_col[0]].values.reshape(-1, 1))
            else:
                self.targ_scaler.fit_transform(self.df[target_col])

            self.df[relevant_cols] = temp_df
        if (len(self.df) - self.df.count()).max() != 0:
            print(
                "Error nan values detected in data. Please run interpolate ffill or bfill on data"
            )
        self.targ_col = target_col
        self.df.to_csv("temp_df.csv")