def n_tree(): """ This is the format of this tree t 1 a b c 3 aa ab ba bb ca cb 6 aaa aab aba abb baa bab bba bbb caa cab cba cbb 12 Resulting in the summing matrix: y_t = S * b_t t 1 1 1 1 1 1 1 1 1 1 1 1 a 1 1 1 1 0 0 0 0 0 0 0 0 b 0 0 0 0 1 1 1 1 0 0 0 0 c 0 0 0 0 0 0 0 0 1 1 1 1 aa 1 1 0 0 0 0 0 0 0 0 0 0 ab 0 0 1 1 0 0 0 0 0 0 0 0 aaa ba 0 0 0 0 1 1 0 0 0 0 0 0 aab bb 0 0 0 0 0 0 1 1 0 0 0 0 aba ca 0 0 0 0 0 0 0 0 1 1 0 0 abb cb 0 0 0 0 0 0 0 0 0 0 1 1 baa aaa 1 0 0 0 0 0 0 0 0 0 0 0 bab aab 0 1 0 0 0 0 0 0 0 0 0 0 bba aba 0 0 1 0 0 0 0 0 0 0 0 0 bbb abb 0 0 0 1 0 0 0 0 0 0 0 0 caa baa 0 0 0 0 1 0 0 0 0 0 0 0 cab bab 0 0 0 0 0 1 0 0 0 0 0 0 cba bba 0 0 0 0 0 0 1 0 0 0 0 0 cbb bbb 0 0 0 0 0 0 0 1 0 0 0 0 caa 0 0 0 0 0 0 0 0 1 0 0 0 cab 0 0 0 0 0 0 0 0 0 1 0 0 cba 0 0 0 0 0 0 0 0 0 0 1 0 cbb 0 0 0 0 0 0 0 0 0 0 0 1 """ t = ('t', 1) t1 = [('a', 2), ('b', 2), ('c', 3)] t2 = [('aa', 4), ('ab', 5), ('ba', 6), ('bb', 4), ('ca', 5), ('cb', 6)] t3 = [('aaa', 4), ('aab', 5), ('aba', 6), ('abb', 4), ('baa', 5), ('bab', 6), ('bba', 5), ('bbb', 6), ('caa', 5), ('cab', 6), ('cba', 5), ('cbb', 6)] test_t = HierarchyTree(key=t[0], item=t[1]) for i, j in t1: test_t.add_child(key=i, item=j) for c in test_t.children: for i, j in t2: if i.startswith(c.key): c.add_child(key=i, item=j) for c in test_t.children: for c2 in c.children: for i, j in t3: if i.startswith(c2.key): c2.add_child(key=i, item=j) return test_t
def test_create_mv_tree(hierarchical_mv_data): hier = { 'total': ['CH', 'SLU', 'BT', 'OTHER'], 'CH': ['CH-07', 'CH-02', 'CH-08', 'CH-05', 'CH-01'], 'SLU': ['SLU-15', 'SLU-01', 'SLU-19', 'SLU-07', 'SLU-02'], 'BT': ['BT-01', 'BT-03'], 'OTHER': ['WF-01', 'CBD-13'] } exogenous = { k: ['precipitation', 'temp'] for k in hierarchical_mv_data.columns if k not in ['precipitation', 'temp'] } ht = HierarchyTree.from_nodes(hier, hierarchical_mv_data, exogenous=exogenous) assert isinstance(ht.to_pandas(), pandas.DataFrame) assert ht.key == 'total' assert len(ht.children) == 4 assert ht.get_node_height('CH') == 1 assert ht.get_node_height('BT-03') == 0 assert ht.get_node_height('CBD-13') == 0 assert ht.get_node_height('SLU') == 1
def __init_hts( self, nodes: Optional[NodesT] = None, df: Optional[pandas.DataFrame] = None, tree: Optional[HierarchyTree] = None, root: str = "root", exogenous: Optional[List[str]] = None, ): if not nodes and not df: if not tree: raise InvalidArgumentException( "Either nodes and df must be passed, or a pre-built hierarchy tree" ) else: self.nodes = tree else: self.nodes = HierarchyTree.from_nodes(nodes=nodes, df=df, exogenous=exogenous, root=root) self.exogenous = exogenous self.sum_mat, sum_mat_labels = to_sum_mat(self.nodes) self._set_model_instance() self._init_revision()
def test_create_mv_tree(hierarchical_mv_data): hier = { "total": ["CH", "SLU", "BT", "OTHER"], "CH": ["CH-07", "CH-02", "CH-08", "CH-05", "CH-01"], "SLU": ["SLU-15", "SLU-01", "SLU-19", "SLU-07", "SLU-02"], "BT": ["BT-01", "BT-03"], "OTHER": ["WF-01", "CBD-13"], } exogenous = { k: ["precipitation", "temp"] for k in hierarchical_mv_data.columns if k not in ["precipitation", "temp"] } ht = HierarchyTree.from_nodes(hier, hierarchical_mv_data, exogenous=exogenous) assert isinstance(ht.to_pandas(), pandas.DataFrame) assert ht.key == "total" assert len(ht.children) == 4 assert ht.get_node_height("CH") == 1 assert ht.get_node_height("BT-03") == 0 assert ht.get_node_height("CBD-13") == 0 assert ht.get_node_height("SLU") == 1
def test_to_pandas(events): ht = HierarchyTree.from_geo_events(df=events, lat_col='start_latitude', lon_col='start_longitude', nodes=('city', 'hex_index_6', 'hex_index_7', 'hex_index_8'), levels=(6, 8), resample_freq='1H', min_count=0.5) assert isinstance(ht.to_pandas(), pandas.DataFrame)
def mv_tree(hierarchical_mv_data, mv_tree_empty): exogenous = { k: ["precipitation", "temp"] for k in hierarchical_mv_data.columns if k not in ["precipitation", "temp"] } return HierarchyTree.from_nodes(mv_tree_empty, hierarchical_mv_data, exogenous=exogenous)
def test_to_pandas(events): ht = HierarchyTree.from_geo_events( df=events, lat_col="start_latitude", lon_col="start_longitude", nodes=("city", "hex_index_6", "hex_index_7", "hex_index_8"), levels=(6, 8), resample_freq="1H", min_count=0.5, ) assert isinstance(ht.to_pandas(), pandas.DataFrame)
def test_create_from_events(events): ht = HierarchyTree.from_geo_events(df=events, lat_col='start_latitude', lon_col='start_longitude', nodes=('city', 'hex_index_6', 'hex_index_7', 'hex_index_8'), levels=(6, 8), resample_freq='1H', min_count=0.5) assert isinstance(ht, NAryTreeT) assert len(ht.children) == events['city'].nunique()
def test_create_from_events(events): ht = HierarchyTree.from_geo_events( df=events, lat_col="start_latitude", lon_col="start_longitude", nodes=("city", "hex_index_6", "hex_index_7", "hex_index_8"), levels=(6, 8), resample_freq="1H", min_count=0.5, ) assert isinstance(ht, NAryTreeT) assert len(ht.children) == events["city"].nunique()
def mv_tree(hierarchical_mv_data): hier = { "total": ["CH", "SLU", "BT", "OTHER"], "CH": ["CH-07", "CH-02", "CH-08", "CH-05", "CH-01"], "SLU": ["SLU-15", "SLU-01", "SLU-19", "SLU-07", "SLU-02"], "BT": ["BT-01", "BT-03"], "OTHER": ["WF-01", "CBD-13"], } exogenous = { k: ["precipitation", "temp"] for k in hierarchical_mv_data.columns if k not in ["precipitation", "temp"] } return HierarchyTree.from_nodes(hier, hierarchical_mv_data, exogenous=exogenous)
def mv_tree(hierarchical_mv_data): hier = { 'total': ['CH', 'SLU', 'BT', 'OTHER'], 'CH': ['CH-07', 'CH-02', 'CH-08', 'CH-05', 'CH-01'], 'SLU': ['SLU-15', 'SLU-01', 'SLU-19', 'SLU-07', 'SLU-02'], 'BT': ['BT-01', 'BT-03'], 'OTHER': ['WF-01', 'CBD-13'] } exogenous = { k: ['precipitation', 'temp'] for k in hierarchical_mv_data.columns if k not in ['precipitation', 'temp'] } return HierarchyTree.from_nodes(hier, hierarchical_mv_data, exogenous=exogenous)
def predict( self, node: HierarchyTree, freq: str = "D", steps_ahead: int = 1, exogenous_df: pandas.DataFrame = None, ): df = self._pre_process(node.item) future = self.model.make_future_dataframe( periods=steps_ahead, freq=freq, include_history=True ) if exogenous_df is not None: previous_exogenous_values = node.to_pandas()[node.exogenous].reset_index( drop=True ) future_exogenous = pandas.concat( [previous_exogenous_values, exogenous_df] ).reset_index(drop=True) future = pandas.concat( [future, future_exogenous.reindex(future.index)], axis=1 ) if self.cap: future["cap"] = self.cap if self.floor: future["floor"] = self.floor self.forecast = self.model.predict(future) merged = pandas.merge(df, self.forecast, on="ds") self.residual = (merged["yhat"] - merged["y"]).values self.mse = numpy.mean(numpy.array(self.residual) ** 2) if self.cap is not None: self.forecast.yhat = numpy.exp(self.forecast.yhat) self.forecast.yhat = self.transform_function.inverse_transform( self.forecast.yhat ) self.forecast.trend = self.transform_function.inverse_transform( self.forecast.trend ) for component in ["seasonal", "daily", "weekly", "yearly", "holidays"]: if component in self.forecast.columns.tolist(): inv_transf = self.transform_function.inverse_transform( getattr(self.forecast, component) ) setattr(self.forecast, component, inv_transf) return self
def test_create_hierarchical_sine_data_tree(hierarchical_sine_data): hier = { "total": ["a", "b", "c"], "a": ["aa", "ab"], "aa": ["aaa", "aab"], "b": ["ba", "bb"], "c": ["ca", "cb", "cc", "cd"], } ht = HierarchyTree.from_nodes(hier, hierarchical_sine_data) assert isinstance(ht.to_pandas(), pandas.DataFrame) assert ht.key == "total" assert len(ht.children) == 3 for c in ht.children: if c.key == "a" or c.key == "b": assert len(c.children) == 2 if c.key == "c": assert len(c.children) == 4
def test_create_hierarchical_sine_data_tree(hierarchical_sine_data): hier = { 'total': ['a', 'b', 'c'], 'a': ['aa', 'ab'], 'aa': ['aaa', 'aab'], 'b': ['ba', 'bb'], 'c': ['ca', 'cb', 'cc', 'cd'] } ht = HierarchyTree.from_nodes(hier, hierarchical_sine_data) assert isinstance(ht.to_pandas(), pandas.DataFrame) assert ht.key == 'total' assert len(ht.children) == 3 for c in ht.children: if c.key == 'a' or c.key == 'b': assert len(c.children) == 2 if c.key == 'c': assert len(c.children) == 4
def test_create_hierarchical_sine_data_tree(hierarchical_sine_data): hier = { "total": ["a", "b", "c"], "a": ["a_x", "a_y"], "b": ["b_x", "b_y"], "c": ["c_x", "c_y"], "a_x": ["a_x_1", "a_x_2"], "a_y": ["a_y_1", "a_y_2"], "b_x": ["b_x_1", "b_x_2"], "b_y": ["b_y_1", "b_y_2"], "c_x": ["c_x_1", "c_x_2"], "c_y": ["c_y_1", "c_y_2"], } ht = HierarchyTree.from_nodes(hier, hierarchical_sine_data) assert isinstance(ht.to_pandas(), pandas.DataFrame) assert ht.key == "total" assert len(ht.children) == 3 for c in ht.children: if c.key == "a" or c.key == "b" or c.key == "c": assert len(c.children) == 2 if (c.key == "a_x" or c.key == "b_x" or c.key == "c_x" or c.key == "a_y" or c.key == "b_y" or c.key == "c_y"): assert len(c.children) == 4
def uv_tree(sine_hier, hierarchical_sine_data): hsd = hierarchical_sine_data.resample('1H').apply(sum).head(400) return HierarchyTree.from_nodes(sine_hier, hsd)