Пример #1
0
def n_tree():
    """
    This is the format of this tree
                                t                                1

               a                b                   c            3

         aa   ab             ba   bb             ca   cb         6

    aaa aab   aba abb   baa bab   bba bbb   caa cab   cba cbb    12

    Resulting in the summing matrix: y_t = S * b_t

    t        1 1 1 1 1 1 1 1 1 1 1 1
    a        1 1 1 1 0 0 0 0 0 0 0 0
    b        0 0 0 0 1 1 1 1 0 0 0 0
    c        0 0 0 0 0 0 0 0 1 1 1 1
    aa       1 1 0 0 0 0 0 0 0 0 0 0
    ab       0 0 1 1 0 0 0 0 0 0 0 0      aaa
    ba       0 0 0 0 1 1 0 0 0 0 0 0      aab
    bb       0 0 0 0 0 0 1 1 0 0 0 0      aba
    ca       0 0 0 0 0 0 0 0 1 1 0 0      abb
    cb       0 0 0 0 0 0 0 0 0 0 1 1      baa
    aaa      1 0 0 0 0 0 0 0 0 0 0 0      bab
    aab      0 1 0 0 0 0 0 0 0 0 0 0      bba
    aba      0 0 1 0 0 0 0 0 0 0 0 0      bbb
    abb      0 0 0 1 0 0 0 0 0 0 0 0      caa
    baa      0 0 0 0 1 0 0 0 0 0 0 0      cab
    bab      0 0 0 0 0 1 0 0 0 0 0 0      cba
    bba      0 0 0 0 0 0 1 0 0 0 0 0      cbb
    bbb      0 0 0 0 0 0 0 1 0 0 0 0
    caa      0 0 0 0 0 0 0 0 1 0 0 0
    cab      0 0 0 0 0 0 0 0 0 1 0 0
    cba      0 0 0 0 0 0 0 0 0 0 1 0
    cbb      0 0 0 0 0 0 0 0 0 0 0 1

    """

    t = ('t', 1)
    t1 = [('a', 2), ('b', 2), ('c', 3)]
    t2 = [('aa', 4), ('ab', 5), ('ba', 6), ('bb', 4), ('ca', 5), ('cb', 6)]
    t3 = [('aaa', 4), ('aab', 5), ('aba', 6), ('abb', 4), ('baa', 5),
          ('bab', 6), ('bba', 5), ('bbb', 6), ('caa', 5), ('cab', 6),
          ('cba', 5), ('cbb', 6)]

    test_t = HierarchyTree(key=t[0], item=t[1])
    for i, j in t1:
        test_t.add_child(key=i, item=j)

    for c in test_t.children:
        for i, j in t2:
            if i.startswith(c.key):
                c.add_child(key=i, item=j)

    for c in test_t.children:
        for c2 in c.children:
            for i, j in t3:
                if i.startswith(c2.key):
                    c2.add_child(key=i, item=j)
    return test_t
Пример #2
0
def test_create_mv_tree(hierarchical_mv_data):

    hier = {
        'total': ['CH', 'SLU', 'BT', 'OTHER'],
        'CH': ['CH-07', 'CH-02', 'CH-08', 'CH-05', 'CH-01'],
        'SLU': ['SLU-15', 'SLU-01', 'SLU-19', 'SLU-07', 'SLU-02'],
        'BT': ['BT-01', 'BT-03'],
        'OTHER': ['WF-01', 'CBD-13']
    }
    exogenous = {
        k: ['precipitation', 'temp']
        for k in hierarchical_mv_data.columns
        if k not in ['precipitation', 'temp']
    }

    ht = HierarchyTree.from_nodes(hier,
                                  hierarchical_mv_data,
                                  exogenous=exogenous)
    assert isinstance(ht.to_pandas(), pandas.DataFrame)
    assert ht.key == 'total'
    assert len(ht.children) == 4
    assert ht.get_node_height('CH') == 1
    assert ht.get_node_height('BT-03') == 0
    assert ht.get_node_height('CBD-13') == 0
    assert ht.get_node_height('SLU') == 1
Пример #3
0
    def __init_hts(
        self,
        nodes: Optional[NodesT] = None,
        df: Optional[pandas.DataFrame] = None,
        tree: Optional[HierarchyTree] = None,
        root: str = "root",
        exogenous: Optional[List[str]] = None,
    ):

        if not nodes and not df:
            if not tree:
                raise InvalidArgumentException(
                    "Either nodes and df must be passed, or a pre-built hierarchy tree"
                )
            else:
                self.nodes = tree
        else:
            self.nodes = HierarchyTree.from_nodes(nodes=nodes,
                                                  df=df,
                                                  exogenous=exogenous,
                                                  root=root)
        self.exogenous = exogenous
        self.sum_mat, sum_mat_labels = to_sum_mat(self.nodes)
        self._set_model_instance()
        self._init_revision()
Пример #4
0
def test_create_mv_tree(hierarchical_mv_data):

    hier = {
        "total": ["CH", "SLU", "BT", "OTHER"],
        "CH": ["CH-07", "CH-02", "CH-08", "CH-05", "CH-01"],
        "SLU": ["SLU-15", "SLU-01", "SLU-19", "SLU-07", "SLU-02"],
        "BT": ["BT-01", "BT-03"],
        "OTHER": ["WF-01", "CBD-13"],
    }
    exogenous = {
        k: ["precipitation", "temp"]
        for k in hierarchical_mv_data.columns
        if k not in ["precipitation", "temp"]
    }

    ht = HierarchyTree.from_nodes(hier,
                                  hierarchical_mv_data,
                                  exogenous=exogenous)
    assert isinstance(ht.to_pandas(), pandas.DataFrame)
    assert ht.key == "total"
    assert len(ht.children) == 4
    assert ht.get_node_height("CH") == 1
    assert ht.get_node_height("BT-03") == 0
    assert ht.get_node_height("CBD-13") == 0
    assert ht.get_node_height("SLU") == 1
Пример #5
0
def test_to_pandas(events):
    ht = HierarchyTree.from_geo_events(df=events,
                                       lat_col='start_latitude',
                                       lon_col='start_longitude',
                                       nodes=('city', 'hex_index_6',
                                              'hex_index_7', 'hex_index_8'),
                                       levels=(6, 8),
                                       resample_freq='1H',
                                       min_count=0.5)
    assert isinstance(ht.to_pandas(), pandas.DataFrame)
Пример #6
0
def mv_tree(hierarchical_mv_data, mv_tree_empty):

    exogenous = {
        k: ["precipitation", "temp"]
        for k in hierarchical_mv_data.columns
        if k not in ["precipitation", "temp"]
    }
    return HierarchyTree.from_nodes(mv_tree_empty,
                                    hierarchical_mv_data,
                                    exogenous=exogenous)
Пример #7
0
def test_to_pandas(events):
    ht = HierarchyTree.from_geo_events(
        df=events,
        lat_col="start_latitude",
        lon_col="start_longitude",
        nodes=("city", "hex_index_6", "hex_index_7", "hex_index_8"),
        levels=(6, 8),
        resample_freq="1H",
        min_count=0.5,
    )
    assert isinstance(ht.to_pandas(), pandas.DataFrame)
Пример #8
0
def test_create_from_events(events):
    ht = HierarchyTree.from_geo_events(df=events,
                                       lat_col='start_latitude',
                                       lon_col='start_longitude',
                                       nodes=('city', 'hex_index_6',
                                              'hex_index_7', 'hex_index_8'),
                                       levels=(6, 8),
                                       resample_freq='1H',
                                       min_count=0.5)
    assert isinstance(ht, NAryTreeT)
    assert len(ht.children) == events['city'].nunique()
Пример #9
0
def test_create_from_events(events):
    ht = HierarchyTree.from_geo_events(
        df=events,
        lat_col="start_latitude",
        lon_col="start_longitude",
        nodes=("city", "hex_index_6", "hex_index_7", "hex_index_8"),
        levels=(6, 8),
        resample_freq="1H",
        min_count=0.5,
    )
    assert isinstance(ht, NAryTreeT)
    assert len(ht.children) == events["city"].nunique()
Пример #10
0
def mv_tree(hierarchical_mv_data):
    hier = {
        "total": ["CH", "SLU", "BT", "OTHER"],
        "CH": ["CH-07", "CH-02", "CH-08", "CH-05", "CH-01"],
        "SLU": ["SLU-15", "SLU-01", "SLU-19", "SLU-07", "SLU-02"],
        "BT": ["BT-01", "BT-03"],
        "OTHER": ["WF-01", "CBD-13"],
    }
    exogenous = {
        k: ["precipitation", "temp"]
        for k in hierarchical_mv_data.columns
        if k not in ["precipitation", "temp"]
    }
    return HierarchyTree.from_nodes(hier, hierarchical_mv_data, exogenous=exogenous)
Пример #11
0
def mv_tree(hierarchical_mv_data):
    hier = {
        'total': ['CH', 'SLU', 'BT', 'OTHER'],
        'CH': ['CH-07', 'CH-02', 'CH-08', 'CH-05', 'CH-01'],
        'SLU': ['SLU-15', 'SLU-01', 'SLU-19', 'SLU-07', 'SLU-02'],
        'BT': ['BT-01', 'BT-03'],
        'OTHER': ['WF-01', 'CBD-13']
    }
    exogenous = {
        k: ['precipitation', 'temp']
        for k in hierarchical_mv_data.columns
        if k not in ['precipitation', 'temp']
    }
    return HierarchyTree.from_nodes(hier,
                                    hierarchical_mv_data,
                                    exogenous=exogenous)
Пример #12
0
    def predict(
        self,
        node: HierarchyTree,
        freq: str = "D",
        steps_ahead: int = 1,
        exogenous_df: pandas.DataFrame = None,
    ):

        df = self._pre_process(node.item)

        future = self.model.make_future_dataframe(
            periods=steps_ahead, freq=freq, include_history=True
        )
        if exogenous_df is not None:
            previous_exogenous_values = node.to_pandas()[node.exogenous].reset_index(
                drop=True
            )
            future_exogenous = pandas.concat(
                [previous_exogenous_values, exogenous_df]
            ).reset_index(drop=True)
            future = pandas.concat(
                [future, future_exogenous.reindex(future.index)], axis=1
            )
        if self.cap:
            future["cap"] = self.cap
        if self.floor:
            future["floor"] = self.floor

        self.forecast = self.model.predict(future)
        merged = pandas.merge(df, self.forecast, on="ds")
        self.residual = (merged["yhat"] - merged["y"]).values
        self.mse = numpy.mean(numpy.array(self.residual) ** 2)
        if self.cap is not None:
            self.forecast.yhat = numpy.exp(self.forecast.yhat)
        self.forecast.yhat = self.transform_function.inverse_transform(
            self.forecast.yhat
        )
        self.forecast.trend = self.transform_function.inverse_transform(
            self.forecast.trend
        )
        for component in ["seasonal", "daily", "weekly", "yearly", "holidays"]:
            if component in self.forecast.columns.tolist():
                inv_transf = self.transform_function.inverse_transform(
                    getattr(self.forecast, component)
                )
                setattr(self.forecast, component, inv_transf)
        return self
Пример #13
0
def test_create_hierarchical_sine_data_tree(hierarchical_sine_data):
    hier = {
        "total": ["a", "b", "c"],
        "a": ["aa", "ab"],
        "aa": ["aaa", "aab"],
        "b": ["ba", "bb"],
        "c": ["ca", "cb", "cc", "cd"],
    }
    ht = HierarchyTree.from_nodes(hier, hierarchical_sine_data)
    assert isinstance(ht.to_pandas(), pandas.DataFrame)
    assert ht.key == "total"
    assert len(ht.children) == 3
    for c in ht.children:
        if c.key == "a" or c.key == "b":
            assert len(c.children) == 2
        if c.key == "c":
            assert len(c.children) == 4
Пример #14
0
def test_create_hierarchical_sine_data_tree(hierarchical_sine_data):
    hier = {
        'total': ['a', 'b', 'c'],
        'a': ['aa', 'ab'],
        'aa': ['aaa', 'aab'],
        'b': ['ba', 'bb'],
        'c': ['ca', 'cb', 'cc', 'cd']
    }
    ht = HierarchyTree.from_nodes(hier, hierarchical_sine_data)
    assert isinstance(ht.to_pandas(), pandas.DataFrame)
    assert ht.key == 'total'
    assert len(ht.children) == 3
    for c in ht.children:
        if c.key == 'a' or c.key == 'b':
            assert len(c.children) == 2
        if c.key == 'c':
            assert len(c.children) == 4
Пример #15
0
def test_create_hierarchical_sine_data_tree(hierarchical_sine_data):
    hier = {
        "total": ["a", "b", "c"],
        "a": ["a_x", "a_y"],
        "b": ["b_x", "b_y"],
        "c": ["c_x", "c_y"],
        "a_x": ["a_x_1", "a_x_2"],
        "a_y": ["a_y_1", "a_y_2"],
        "b_x": ["b_x_1", "b_x_2"],
        "b_y": ["b_y_1", "b_y_2"],
        "c_x": ["c_x_1", "c_x_2"],
        "c_y": ["c_y_1", "c_y_2"],
    }
    ht = HierarchyTree.from_nodes(hier, hierarchical_sine_data)
    assert isinstance(ht.to_pandas(), pandas.DataFrame)
    assert ht.key == "total"
    assert len(ht.children) == 3
    for c in ht.children:
        if c.key == "a" or c.key == "b" or c.key == "c":
            assert len(c.children) == 2
        if (c.key == "a_x" or c.key == "b_x" or c.key == "c_x"
                or c.key == "a_y" or c.key == "b_y" or c.key == "c_y"):
            assert len(c.children) == 4
Пример #16
0
def uv_tree(sine_hier, hierarchical_sine_data):
    hsd = hierarchical_sine_data.resample('1H').apply(sum).head(400)
    return HierarchyTree.from_nodes(sine_hier, hsd)