def build_closure(g, terminals, debug=False, verbose=False):
    terminals = list(terminals)
    # build closure
    gc = Graph(directed=False)
    gc.add_vertex(g.num_vertices())

    edges_with_weight = set()
    r2pred = {}

    for r in terminals:
        if debug:
            print('root {}'.format(r))
        vis = init_visitor(g, r)
        pbfs_search(g, source=r, terminals=terminals, visitor=vis)
        new_edges = set(get_edges(vis.dist, r, terminals))
        if debug:
            print('new edges {}'.format(new_edges))
        edges_with_weight |= new_edges
        r2pred[r] = vis.pred

    for u, v, c in edges_with_weight:
        gc.add_edge(u, v)

    eweight = gc.new_edge_property('int')
    weights = np.array([c for _, _, c in edges_with_weight])
    eweight.set_2d_array(weights)

    vfilt = gc.new_vertex_property('bool')
    vfilt.a = False
    for v in terminals:
        vfilt[v] = True
    gc.set_vertex_filter(vfilt)
    return gc, eweight, r2pred
def is_order_respected(tree, root, obs_nodes, infection_times):
    tree = GraphView(tree)
    obs_set = set(obs_nodes)
    vfilt = tree.new_vertex_property('bool')
    vfilt.a = True
    tree.set_vertex_filter(vfilt)

    leaves = [o for o in obs_nodes if tree.vertex(o).out_degree() == 0]
    vis = init_visitor(tree, root)
    pbfs_search(tree, root, terminals=leaves, visitor=vis, count_threshold=-1)
    for l in leaves:
        edges = extract_edges_from_pred(tree, root, l, vis.pred)
        edges = edges[::-1]
        path = list(edges[0]) + [u for _, u in edges[1:]]
        useful_nodes_on_path = [v for v in path if v in obs_set]

        for i in range(len(useful_nodes_on_path)-1):
            u, v = useful_nodes_on_path[i: i+2]
            if infection_times[u] > infection_times[v]:
                return False
    return True
def build_closure(g, terminals,
                  debug=False,
                  verbose=False):
    terminals = list(terminals)
    # build closure
    gc = Graph(directed=False)

    for _ in range(g.num_vertices()):
        gc.add_vertex()

    edges_with_weight = set()
    r2pred = {}

    for r in terminals:
        if debug:
            print('root {}'.format(r))
        vis = init_visitor(g, r)
        pbfs_search(g, source=r, terminals=terminals, visitor=vis)
        new_edges = set(get_edges(vis.dist, r, terminals))
        if debug:
            print('new edges {}'.format(new_edges))
        edges_with_weight |= new_edges
        r2pred[r] = vis.pred
    
    for u, v, c in edges_with_weight:
        gc.add_edge(u, v)
        
    eweight = gc.new_edge_property('int')
    weights = np.array([c for _, _, c in edges_with_weight])
    eweight.set_2d_array(weights)

    vfilt = gc.new_vertex_property('bool')
    vfilt.a = False
    for v in terminals:
        vfilt[v] = True
    gc.set_vertex_filter(vfilt)
    return gc, eweight, r2pred
def build_closure(g, cand_source, terminals, infection_times, k=-1,
                  strictly_smaller=True,
                  debug=False,
                  verbose=False):
    """
    build a clojure graph in which cand_source + terminals are all connected to each other.
    the number of neighbors of each node is determined by k

    the larger the k, the denser the graph"""
    r2pred = {}
    edges = {}
    terminals = list(terminals)

    # from cand_source to terminals
    vis = init_visitor(g, cand_source)
    cpbfs_search(g, source=cand_source, visitor=vis, terminals=terminals,
                 forbidden_nodes=terminals,
                 count_threshold=k)
    r2pred[cand_source] = vis.pred
    for u, v, c in get_edges(vis.dist, cand_source, terminals):
        edges[(u, v)] = c

    if debug:
        print('cand_source: {}'.format(cand_source))
        print('#terminals: {}'.format(len(terminals)))
        print('edges from cand_source: {}'.format(edges))

    if verbose:
        terminals_iter = tqdm(terminals)
        print('building closure graph')
    else:
        terminals_iter = terminals

    # from terminal to other terminals
    for root in terminals_iter:

        if strictly_smaller:
            late_terminals = [t for t in terminals
                              if infection_times[t] > infection_times[root]]
        else:
            # respect what the paper presents
            late_terminals = [t for t in terminals
                              if infection_times[t] >= infection_times[root]]

        late_terminals = set(late_terminals) - {cand_source}  # no one can connect to cand_source
        if debug:
            print('root: {}'.format(root))
            print('late_terminals: {}'.format(late_terminals))
        vis = init_visitor(g, root)
        cpbfs_search(g, source=root, visitor=vis, terminals=list(late_terminals),
                     forbidden_nodes=list(set(terminals) - set(late_terminals)),
                     count_threshold=k)
        r2pred[root] = vis.pred
        for u, v, c in get_edges(vis.dist, root, late_terminals):
            if debug:
                print('edge ({}, {})'.format(u, v))
            edges[(u, v)] = c

    if verbose:
        print('returning closure graph')

    gc = Graph(directed=True)

    for _ in range(g.num_vertices()):
        gc.add_vertex()

    for (u, v) in edges:
        gc.add_edge(u, v)

    eweight = gc.new_edge_property('int')
    eweight.set_2d_array(np.array(list(edges.values())))
    # for e, c in edges.items():
    #     eweight[e] = c
    return gc, eweight, r2pred
def steiner_tree_mst(g, root, infection_times, source, terminals,
                     closure_builder=build_closure,
                     strictly_smaller=True,
                     return_closure=False,
                     k=-1,
                     debug=False,
                     verbose=True):
    gc, eweight, r2pred = closure_builder(g, root, terminals,
                                          infection_times,
                                          strictly_smaller=strictly_smaller,
                                          k=k,
                                          debug=debug,
                                          verbose=verbose)

    # get the minimum spanning arborescence
    # graph_tool does not provide minimum_spanning_arborescence
    if verbose:
        print('getting mst')
    gx = gt2nx(gc, root, terminals, edge_attrs={'weight': eweight})
    try:
        nx_tree = nx.minimum_spanning_arborescence(gx, 'weight')
    except nx.exception.NetworkXException:
        if debug:
            print('fail to find mst')
        if return_closure:
            return None, gc, None
        else:
            return None

    if verbose:
        print('returning tree')

    mst_tree = Graph(directed=True)
    for _ in range(g.num_vertices()):
        mst_tree.add_vertex()

    for u, v in nx_tree.edges():
        mst_tree.add_edge(u, v)

    if verbose:
        print('extract edges from original graph')

    # extract the edges from the original graph

    # sort observations by time
    # and also topological order
    topological_index = {}
    for i, e in enumerate(bfs_iterator(mst_tree, source=root)):
        topological_index[int(e.target())] = i
    sorted_obs = sorted(
        set(terminals) - {root},
        key=lambda o: (infection_times[o], topological_index[o]))

    tree_nodes = {root}
    tree_edges = set()
    # print('root', root)
    for u in sorted_obs:
        if u in tree_nodes:
            if debug:
                print('{} covered already'.format(u))
            continue
        # print(u)
        v, u = map(int, next(mst_tree.vertex(u).in_edges()))  # v is ancestor
        tree_nodes.add(v)

        late_nodes = [n for n in terminals if infection_times[n] > infection_times[u]]
        vis = init_visitor(g, u)
        # from child to any tree node, including v

        cpbfs_search(g, source=u, terminals=list(tree_nodes),
                     forbidden_nodes=late_nodes,
                     visitor=vis,
                     count_threshold=1)
        # dist, pred = shortest_distance(g, source=u, pred_map=True)
        node_set = {v for v, d in vis.dist.items() if d > 0}
        reachable_tree_nodes = node_set.intersection(tree_nodes)
        ancestor = min(reachable_tree_nodes, key=vis.dist.__getitem__)

        edges = extract_edges_from_pred(g, u, ancestor, vis.pred)
        edges = {(j, i) for i, j in edges}  # need to reverse it
        if debug:
            print('tree_nodes', tree_nodes)
            print('connecting {} to {}'.format(v, u))
            print('using ancestor {}'.format(ancestor))
            print('adding edges {}'.format(edges))
        tree_nodes |= {u for e in edges for u in e}

        tree_edges |= edges

    t = Graph(directed=True)
    for _ in range(g.num_vertices()):
        t.add_vertex()

    for u, v in tree_edges:
        t.add_edge(t.vertex(u), t.vertex(v))

    tree_nodes = {u for e in tree_edges for u in e}
    vfilt = t.new_vertex_property('bool')
    vfilt.a = False
    for v in tree_nodes:
        vfilt[t.vertex(v)] = True

    t.set_vertex_filter(vfilt)

    if return_closure:
        return t, gc, mst_tree
    else:
        return t
def find_tree_greedy(g,
                     root,
                     infection_times,
                     source,
                     obs_nodes,
                     debug=False,
                     verbose=True):
    # root = min(obs_nodes, key=infection_times.__getitem__)
    sorted_obs = list(sorted(obs_nodes, key=infection_times.__getitem__))[1:]
    tree_nodes = {root}
    tree_edges = set()
    for u in sorted_obs:
        if u in tree_nodes:
            continue

        # connect u to the tree
        vis = init_visitor(g, u)
        if debug:
            print('connect {} to tree'.format(u))
            print('nodes connectable: {}'.format(tree_nodes))
        forbidden_nodes = list(set(obs_nodes) - tree_nodes)
        cpbfs_search(g,
                     u,
                     visitor=vis,
                     terminals=list(tree_nodes),
                     forbidden_nodes=forbidden_nodes,
                     count_threshold=1)

        # add edge
        reachable_nodes = set(filter(lambda k: vis.dist[k] > 0,
                                     vis.dist)).intersection(tree_nodes)

        if debug:
            print('reachable_nodes: {}'.format(reachable_nodes))

        assert len(reachable_nodes) > 0
        sorted_ancestors = sorted(reachable_nodes, key=vis.dist.__getitem__)
        ancestor = sorted_ancestors[0]

        if debug:
            print('ancestor: {}'.format(ancestor))
            print('dist to reachable: {}'.format(vis.dist[sorted_ancestors]))

        new_edges = extract_edges_from_pred(g, u, ancestor, vis.pred)
        new_edges = {(v, u)
                     for u, v in new_edges}  # needs to reverse the order

        if debug:
            print('new_edges: {}'.format(new_edges))

        tree_edges |= set(new_edges)
        tree_nodes |= {v for e in new_edges for v in e}

    t = Graph(directed=True)
    t.add_vertex(g.num_vertices())

    vfilt = t.new_vertex_property('bool')
    vfilt.a = False
    for v in tree_nodes:
        vfilt[t.vertex(v)] = True

    for u, v in tree_edges:
        t.add_edge(t.vertex(u), t.vertex(v))

    t.set_vertex_filter(vfilt)

    return t
def build_closure_with_order(g,
                             cand_source,
                             terminals,
                             infection_times,
                             k=-1,
                             strictly_smaller=True,
                             return_r2pred=False,
                             debug=False,
                             verbose=False):
    """
    build transitive closure with infection order constraint

    g: gt.Graph(directed=False)
    cand_source: int
    terminals: list of int
    infection_times: dict int -> float

    build a clojure graph in which cand_source + terminals are all connected to each other.
    the number of neighbors of each node is determined by k

    the larger the k, the denser the graph

    note that vertex ids are preserved (without re-mapping to consecutive integers)

    return:

    gt.Graph(directed=True)
    """
    if return_r2pred:
        r2pred = {}
    edges = {}
    terminals = list(terminals)

    # from cand_source to terminals
    vis = init_visitor(g, cand_source)
    cpbfs_search(g,
                 source=cand_source,
                 visitor=vis,
                 terminals=terminals,
                 forbidden_nodes=terminals,
                 count_threshold=k)
    if return_r2pred:
        r2pred[cand_source] = vis.pred
    for u, v, c in get_edges(vis.dist, cand_source, terminals):
        edges[(u, v)] = c

    if debug:
        print('cand_source: {}'.format(cand_source))
        print('#terminals: {}'.format(len(terminals)))
        print('edges from cand_source: {}'.format(edges))

    if verbose:
        terminals_iter = tqdm(terminals)
        print('building closure graph')
    else:
        terminals_iter = terminals

    # from terminal to other terminals
    for root in terminals_iter:

        if strictly_smaller:
            late_terminals = [
                t for t in terminals
                if infection_times[t] > infection_times[root]
            ]
        else:
            # respect what the paper presents
            late_terminals = [
                t for t in terminals
                if infection_times[t] >= infection_times[root]
            ]

        late_terminals = set(late_terminals) - {
            cand_source
        }  # no one can connect to cand_source
        if debug:
            print('root: {}'.format(root))
            print('late_terminals: {}'.format(late_terminals))
        vis = init_visitor(g, root)
        cpbfs_search(
            g,
            source=root,
            visitor=vis,
            terminals=list(late_terminals),
            forbidden_nodes=list(set(terminals) - set(late_terminals)),
            count_threshold=k)

        if return_r2pred:
            r2pred[root] = vis.pred

        for u, v, c in get_edges(vis.dist, root, late_terminals):
            if debug:
                print('edge ({}, {})'.format(u, v))
            edges[(u, v)] = c

    if verbose:
        print('returning closure graph')

    gc = Graph(directed=True)

    gc.add_vertex(g.num_vertices())

    vfilt = gc.new_vertex_property('bool')
    vfilt.a = False

    for (u, v) in edges:
        gc.add_edge(u, v)
        vfilt[u] = vfilt[v] = True

    eweight = gc.new_edge_property('int')
    eweight.set_2d_array(np.array(list(edges.values())))
    gc.set_vertex_filter(vfilt)

    rets = (gc, eweight)
    if return_r2pred:
        rets += (r2pred, )
    return rets
def find_tree_by_closure(g,
                         root,
                         infection_times,
                         terminals,
                         closure_builder=build_closure_with_order,
                         strictly_smaller=True,
                         return_closure=False,
                         k=-1,
                         debug=False,
                         verbose=True):
    """find the steiner tree by trainsitive closure
    
    """
    gc, eweight = closure_builder(g,
                                  root,
                                  terminals,
                                  infection_times,
                                  strictly_smaller=strictly_smaller,
                                  k=k,
                                  return_r2pred=False,
                                  debug=debug,
                                  verbose=verbose)

    # get the minimum spanning arborescence
    # graph_tool does not provide minimum_spanning_arborescence
    if verbose:
        print('getting mst')
    tree_edges = find_minimum_branching(gc, [root], weights=eweight)

    efilt = gc.new_edge_property('bool')
    efilt.a = False
    for u, v in tree_edges:
        efilt[gc.edge(u, v)] = True

    mst_tree = GraphView(gc, efilt=efilt)

    if verbose:
        print('extract edges from original graph')

    # extract the edges from the original graph

    # sort observations by time
    # and also topological order
    # why doing this: we want to start collecting the edges
    # for nodes with higher order
    topological_index = {}
    for i, e in enumerate(bfs_iterator(mst_tree, source=root)):
        topological_index[int(e.target())] = i

    try:
        sorted_obs = sorted(set(terminals) - {root},
                            key=lambda o:
                            (infection_times[o], topological_index[o]))
    except KeyError:
        raise TreeNotFound(
            "it's likely that the input cannot produce a feasible solution, " +
            "because the topological sort on terminals does not visit all terminals"
        )

    # next, we start reconstructing the minimum steiner arborescence
    tree_nodes = {root}
    tree_edges = set()
    # print('root', root)
    for u in sorted_obs:
        if u in tree_nodes:
            if debug:
                print('{} covered already'.format(u))
            continue
        # print(u)
        v, u = map(int, next(mst_tree.vertex(u).in_edges()))  # v is ancestor
        tree_nodes.add(v)

        late_nodes = [
            n for n in terminals if infection_times[n] > infection_times[u]
        ]
        vis = init_visitor(g, u)
        # from child to any tree node, including v

        cpbfs_search(g,
                     source=u,
                     terminals=list(tree_nodes),
                     forbidden_nodes=late_nodes,
                     visitor=vis,
                     count_threshold=1)
        # dist, pred = shortest_distance(g, source=u, pred_map=True)
        node_set = {v for v, d in vis.dist.items() if d > 0}
        reachable_tree_nodes = node_set.intersection(tree_nodes)
        ancestor = min(reachable_tree_nodes, key=vis.dist.__getitem__)

        edges = extract_edges_from_pred(g, u, ancestor, vis.pred)
        edges = {(j, i) for i, j in edges}  # need to reverse it
        if debug:
            print('tree_nodes', tree_nodes)
            print('connecting {} to {}'.format(v, u))
            print('using ancestor {}'.format(ancestor))
            print('adding edges {}'.format(edges))
        tree_nodes |= {u for e in edges for u in e}

        tree_edges |= edges

    t = Graph(directed=True)
    t.add_vertex(g.num_vertices())

    for u, v in tree_edges:
        t.add_edge(t.vertex(u), t.vertex(v))

    tree_nodes = {u for e in tree_edges for u in e}
    vfilt = t.new_vertex_property('bool')
    vfilt.a = False
    for v in tree_nodes:
        vfilt[t.vertex(v)] = True

    t.set_vertex_filter(vfilt)

    if return_closure:
        return t, gc, mst_tree
    else:
        return t
def build_closure(g,
                  cand_source,
                  terminals,
                  infection_times,
                  k=-1,
                  strictly_smaller=True,
                  debug=False,
                  verbose=False):
    """
    build a clojure graph in which cand_source + terminals are all connected to each other.
    the number of neighbors of each node is determined by k

    the larger the k, the denser the graph"""
    r2pred = {}
    edges = {}
    terminals = list(terminals)

    # from cand_source to terminals
    vis = init_visitor(g, cand_source)
    cpbfs_search(g,
                 source=cand_source,
                 visitor=vis,
                 terminals=terminals,
                 forbidden_nodes=terminals,
                 count_threshold=k)
    r2pred[cand_source] = vis.pred
    for u, v, c in get_edges(vis.dist, cand_source, terminals):
        edges[(u, v)] = c

    if debug:
        print('cand_source: {}'.format(cand_source))
        print('#terminals: {}'.format(len(terminals)))
        print('edges from cand_source: {}'.format(edges))

    if verbose:
        terminals_iter = tqdm(terminals)
        print('building closure graph')
    else:
        terminals_iter = terminals

    # from terminal to other terminals
    for root in terminals_iter:

        if strictly_smaller:
            late_terminals = [
                t for t in terminals
                if infection_times[t] > infection_times[root]
            ]
        else:
            # respect what the paper presents
            late_terminals = [
                t for t in terminals
                if infection_times[t] >= infection_times[root]
            ]

        late_terminals = set(late_terminals) - {
            cand_source
        }  # no one can connect to cand_source
        if debug:
            print('root: {}'.format(root))
            print('late_terminals: {}'.format(late_terminals))
        vis = init_visitor(g, root)
        cpbfs_search(
            g,
            source=root,
            visitor=vis,
            terminals=list(late_terminals),
            forbidden_nodes=list(set(terminals) - set(late_terminals)),
            count_threshold=k)
        r2pred[root] = vis.pred
        for u, v, c in get_edges(vis.dist, root, late_terminals):
            if debug:
                print('edge ({}, {})'.format(u, v))
            edges[(u, v)] = c

    if verbose:
        print('returning closure graph')

    gc = Graph(directed=True)

    for _ in range(g.num_vertices()):
        gc.add_vertex()

    for (u, v) in edges:
        gc.add_edge(u, v)

    eweight = gc.new_edge_property('int')
    eweight.set_2d_array(np.array(list(edges.values())))
    # for e, c in edges.items():
    #     eweight[e] = c
    return gc, eweight, r2pred
def steiner_tree_mst(g,
                     root,
                     infection_times,
                     source,
                     terminals,
                     closure_builder=build_closure,
                     strictly_smaller=True,
                     return_closure=False,
                     k=-1,
                     debug=False,
                     verbose=True):
    gc, eweight, r2pred = closure_builder(g,
                                          root,
                                          terminals,
                                          infection_times,
                                          strictly_smaller=strictly_smaller,
                                          k=k,
                                          debug=debug,
                                          verbose=verbose)

    # get the minimum spanning arborescence
    # graph_tool does not provide minimum_spanning_arborescence
    if verbose:
        print('getting mst')
    gx = gt2nx(gc, root, terminals, edge_attrs={'weight': eweight})
    try:
        nx_tree = nx.minimum_spanning_arborescence(gx, 'weight')
    except nx.exception.NetworkXException:
        if debug:
            print('fail to find mst')
        if return_closure:
            return None, gc, None
        else:
            return None

    if verbose:
        print('returning tree')

    mst_tree = Graph(directed=True)
    for _ in range(g.num_vertices()):
        mst_tree.add_vertex()

    for u, v in nx_tree.edges():
        mst_tree.add_edge(u, v)

    if verbose:
        print('extract edges from original graph')

    # extract the edges from the original graph

    # sort observations by time
    # and also topological order
    topological_index = {}
    for i, e in enumerate(bfs_iterator(mst_tree, source=root)):
        topological_index[int(e.target())] = i
    sorted_obs = sorted(set(terminals) - {root},
                        key=lambda o:
                        (infection_times[o], topological_index[o]))

    tree_nodes = {root}
    tree_edges = set()
    # print('root', root)
    for u in sorted_obs:
        if u in tree_nodes:
            if debug:
                print('{} covered already'.format(u))
            continue
        # print(u)
        v, u = map(int, next(mst_tree.vertex(u).in_edges()))  # v is ancestor
        tree_nodes.add(v)

        late_nodes = [
            n for n in terminals if infection_times[n] > infection_times[u]
        ]
        vis = init_visitor(g, u)
        # from child to any tree node, including v

        cpbfs_search(g,
                     source=u,
                     terminals=list(tree_nodes),
                     forbidden_nodes=late_nodes,
                     visitor=vis,
                     count_threshold=1)
        # dist, pred = shortest_distance(g, source=u, pred_map=True)
        node_set = {v for v, d in vis.dist.items() if d > 0}
        reachable_tree_nodes = node_set.intersection(tree_nodes)
        ancestor = min(reachable_tree_nodes, key=vis.dist.__getitem__)

        edges = extract_edges_from_pred(g, u, ancestor, vis.pred)
        edges = {(j, i) for i, j in edges}  # need to reverse it
        if debug:
            print('tree_nodes', tree_nodes)
            print('connecting {} to {}'.format(v, u))
            print('using ancestor {}'.format(ancestor))
            print('adding edges {}'.format(edges))
        tree_nodes |= {u for e in edges for u in e}

        tree_edges |= edges

    t = Graph(directed=True)
    for _ in range(g.num_vertices()):
        t.add_vertex()

    for u, v in tree_edges:
        t.add_edge(t.vertex(u), t.vertex(v))

    tree_nodes = {u for e in tree_edges for u in e}
    vfilt = t.new_vertex_property('bool')
    vfilt.a = False
    for v in tree_nodes:
        vfilt[t.vertex(v)] = True

    t.set_vertex_filter(vfilt)

    if return_closure:
        return t, gc, mst_tree
    else:
        return t