def from_geo_events( cls, df: pandas.DataFrame, lat_col: str, lon_col: str, nodes: Tuple, levels: Tuple[int, int] = (6, 7), resample_freq: str = "1H", min_count: Union[float, int] = 0.2, root_name: str = "total", fillna: bool = False, ): """ Parameters ---------- df : pandas.DataFrame lat_col : str Column where the latitude coordinates can be found lon_col : str Column where the longitude coordinates can be found nodes : str levels : resample_freq min_count root_name fillna Returns ------- HierarchyTree """ hexified = hexify(df, lat_col, lon_col, levels=levels) total = resample_count(hexified, resample_freq, root_name) hierarchy = cls(key=root_name, item=total) grouped = groupify( hierarchy, df=hexified, nodes=nodes, freq=resample_freq, min_count=min_count, total=total, ) # TODO: more flexible strategy if fillna: df = grouped.to_pandas() df = df.fillna(method="ffill").dropna() for node in make_iterable(grouped, prop=None): repl = df[[node.key]] node.item = repl return grouped
def _revise(self, steps_ahead=1): logger.info(f'Reconciling forecasts using {self.revision_method}') revised = self.revision_method.revise( forecasts=self.hts_result.forecasts, mse=self.hts_result.errors, nodes=self.nodes) revised_columns = list(make_iterable(self.nodes)) revised_index = self._get_predict_index(steps_ahead=steps_ahead) return pandas.DataFrame(revised, index=revised_index, columns=revised_columns)
def _model_mapping_to_iterable( model_mapping: Dict[str, ModelFitResultT], nodes: NAryTreeT) -> List[Tuple[str, ModelFitResultT, NAryTreeT]]: prediction_triplet = [] for node in make_iterable(nodes, prop=None): if isinstance(model_mapping[node.key], tuple): model = model_mapping[node.key][1] else: model = model_mapping[node.key] prediction_triplet.append((node.key, model, node)) return prediction_triplet
def revise(self, forecasts=None, mse=None, nodes=None): """ Parameters ---------- forecasts mse nodes Returns ------- """ if self.name == MethodsT.NONE.name: return y_hat_matrix(forecasts=forecasts) if self.name in [ MethodsT.OLS.name, MethodsT.WLSS.name, MethodsT.WLSV.name ]: return optimal_combination(forecasts=forecasts, sum_mat=self.sum_mat, method=MethodsT.OLS.name, mse=mse) elif self.name == MethodsT.BU.name: print("Name:::" + str(self.name)) print(list(forecasts.keys())) y_hat = self._y_hat_matrix(forecasts) return self._new_mat(y_hat) elif self.name in [MethodsT.AHP.name, MethodsT.PHA.name]: if self.transformer: for node in make_iterable(nodes, prop=None): node.item[node.key] = self.transformer.inverse_transform( node.item[node.key]) y_hat = proportions(nodes=nodes, forecasts=forecasts, sum_mat=self.sum_mat, method=self.name) return self._new_mat(y_hat) elif self.name == MethodsT.FP.name: return forecast_proportions(forecasts, nodes) else: raise InvalidArgumentException('Revision model name is invalid')
def __validate_steps_ahead(self, exogenous_df: pandas.DataFrame, steps_ahead: int) -> int: if exogenous_df is None and not steps_ahead: logger.info( "No arguments passed for 'steps_ahead', defaulting to predicting 1-step-ahead" ) steps_ahead = 1 elif exogenous_df is not None: steps_ahead = len(exogenous_df) for node in make_iterable(self.nodes, prop=None): exog_cols = node.exogenous try: _ = exogenous_df[exog_cols] except KeyError: raise MissingRegressorException( f"Node {node.key} has as exogenous variables {node.exogenous} but " f"these columns were not found in 'exogenous_df'") return steps_ahead
def __init_predict_step(self, exogenous_df: pandas.DataFrame, steps_ahead: int): if self.exogenous and not exogenous_df: raise MissingRegressorException( f"Exogenous variables were provided at fit step, hence are required at " f"predict step. Please pass the 'exogenous_df' variable to predict " f"function") if not exogenous_df and not steps_ahead: logger.info( f"No arguments passed for 'steps_ahead', defaulting to predicting 1-step-ahead" ) steps_ahead = 1 elif exogenous_df: steps_ahead = len(exogenous_df) for node in make_iterable(self.nodes, prop=None): exog_cols = node.exogenous try: node.item = exogenous_df[exog_cols] except KeyError: raise MissingRegressorException( f"Node {node.key} has as exogenous variables {node.exogenous} but " f"these columns were not found in 'exogenous_df'") return steps_ahead
def fit( self, df: Optional[pandas.DataFrame] = None, nodes: Optional[NodesT] = None, tree: Optional[HierarchyTree] = None, exogenous: Optional[ExogT] = None, root: str = "total", distributor: Optional[DistributorBaseClass] = None, disable_progressbar=defaults.DISABLE_PROGRESSBAR, show_warnings=defaults.SHOW_WARNINGS, **fit_kwargs: Any, ) -> "HTSRegressor": """ Fit hierarchical model to dataframe containing hierarchical data as specified in the ``nodes`` parameter. Exogenous can also be passed as a dict of (string, list), where string is the specific node key and the list contains the names of the columns to be used as exogenous variables for that node. Alternatively, a pre-built HierarchyTree can be passed without specifying the node and df. See more at :class:`hts.hierarchy.HierarchyTree` Parameters ---------- df : pandas.DataFrame A Dataframe of time series with a DateTimeIndex. Each column represents a node in the hierarchy. Ignored if tree argument is passed nodes : Dict[str, List[str]] The hierarchy defined as a dict of (string, list), as specified in :py:func:`HierarchyTree.from_nodes <hts.hierarchy.HierarchyTree.from_nodes>` tree : HierarchyTree A pre-built HierarchyTree. Ignored if df and nodes are passed, as the tree will be built from thise distributor : Optional[DistributorBaseClass] A distributor, for parallel/distributed processing exogenous : Dict[str, List[str]] or None Node key mapping to columns that contain the exogenous variable for that node root : str The name of the root node disable_progressbar : Bool Disable or enable progressbar show_warnings : Bool Disable warnings fit_kwargs : Any Any arguments to be passed to the underlying forecasting model's fit function Returns ------- HTSRegressor The fitted HTSRegressor instance """ self.__init_hts(nodes=nodes, df=df, tree=tree, root=root, exogenous=exogenous) nodes = make_iterable(self.nodes, prop=None) fit_function_kwargs = { "fit_kwargs": fit_kwargs, "low_memory": self.low_memory, "tmp_dir": self.tmp_dir, "model_instance": self.model_instance, "model_args": self.model_args, "transform": self.transform, } fitted_models = _do_fit( nodes=nodes, function_kwargs=fit_function_kwargs, n_jobs=self.n_jobs, disable_progressbar=disable_progressbar, show_warnings=show_warnings, distributor=distributor, ) for model in fitted_models: if isinstance(model, tuple): self.hts_result.models = model else: self.hts_result.models = (model.node.key, model) return self