Пример #1
0
def test_component_functions():
    """
    Tests whether UnionFind component/connected_component methods
    work as expected

    """

    # demand nodes are 0-3 plus a 'fake node' at 4
    demand_components = set(range(5))
    # existing grid nodes
    external_components = set(['grid-0', 'grid-1', 'grid-2'])

    subgraphs = UnionFind()
    for i in demand_components:
        subgraphs.add_component(i)
    for g in external_components:
        subgraphs.add_component(g)

    # assign weights to eventual connected component roots
    subgraphs.weights[4] = np.inf
    subgraphs.weights[2] = np.inf
    subgraphs.weights['grid-1'] = np.inf

    # first connect the grid nodes and the fake node
    grid_union_pairs = [(4, 'grid-0'), ('grid-1', 'grid-2')]
    for g1, g2 in grid_union_pairs:
        subgraphs.union(g1, g2, 1)

    # should be 3 components at this point (2 within demand set)
    eq_(subgraphs.connected_components(component_subset=demand_components),
        set(range(5)))
    eq_(subgraphs.connected_components(), set(range(5) + ['grid-1']))

    # connect others (including a connection to the grid via fake node 4)
    union_pairs = [(0, 1), (2, 3), (0, 4)]
    for u1, u2 in union_pairs:
        subgraphs.union(u1, u2, 1)

    # test component sets
    eq_(set(subgraphs.component_set(0)), set([0, 1, 4, 'grid-0']))
    eq_(set(subgraphs.component_set(2)), set([2, 3]))

    # test connected components
    eq_(subgraphs.connected_components(), set([4, 2, 'grid-1']))
    # connected component with ('grid-1', 'grid-2') should be filtered out
    eq_(subgraphs.connected_components(component_subset=demand_components),
        set([4, 2]))
Пример #2
0
def test_component_functions():
    """
    Tests whether UnionFind component/connected_component methods
    work as expected

    """

    # demand nodes are 0-3 plus a 'fake node' at 4
    demand_components = set(range(5))
    # existing grid nodes
    external_components = set(['grid-0', 'grid-1', 'grid-2'])

    subgraphs = UnionFind()
    for i in demand_components:  subgraphs.add_component(i)
    for g in external_components:  subgraphs.add_component(g)

    # assign weights to eventual connected component roots
    subgraphs.weights[4] = np.inf
    subgraphs.weights[2] = np.inf
    subgraphs.weights['grid-1'] = np.inf

    # first connect the grid nodes and the fake node
    grid_union_pairs = [(4, 'grid-0'), ('grid-1', 'grid-2')]
    for g1, g2 in grid_union_pairs:
        subgraphs.union(g1, g2, 1)

    # should be 3 components at this point (2 within demand set)
    eq_(subgraphs.connected_components(component_subset=demand_components),
        set(range(5)))
    eq_(subgraphs.connected_components(), set(range(5) + ['grid-1']))

    # connect others (including a connection to the grid via fake node 4)
    union_pairs = [(0, 1), (2, 3), (0, 4)]
    for u1, u2 in union_pairs:
        subgraphs.union(u1, u2, 1)

    # test component sets
    eq_(set(subgraphs.component_set(0)), set([0, 1, 4, 'grid-0']))
    eq_(set(subgraphs.component_set(2)), set([2, 3]))

    # test connected components
    eq_(subgraphs.connected_components(), set([4, 2, 'grid-1']))
    # connected component with ('grid-1', 'grid-2') should be filtered out
    eq_(subgraphs.connected_components(component_subset=demand_components),
        set([4, 2]))
Пример #3
0
def mod_boruvka(G, subgraphs=None, rtree=None):

    """
    algorithm to calculate the minimum spanning forest of nodes in GeoGraph G
    with 'budget' based restrictions on edges.

    Uses a modified version of Boruvka's algorithm

    NOTE:  subgraphs is modified as a side-effect...may remove in future
        (useful for testing right now)

    Args:
        G:  GeoGraph of nodes to be connected if appropriate
            Nodes should have 'budget' attribute
        subgraphs:  UnionFind data structure representing existing network's
            connected components AND the 'fake' nodes projected onto it.  This
            is the basis for the agglomerative nearest neighbor approach in
            this algorithm.
        rtree:  RTree based index of existing network

    Returns:
        GeoGraph: representing minimum spanning forest of G subject to the
            budget based restrictions
    """

    # special case (already MST)
    if G.number_of_nodes() < 2:
        return G

    V = set(G.nodes())
    coords = np.row_stack(G.coords.values())
    projcoords = ang_to_vec_coords(coords) if G.is_geographic() else coords
    kdtree = KDTree(projcoords)

    # Handle "dead" components
    D = set()

    if subgraphs is None:
        if rtree is not None: raise ValueError('RTree passed without UnionFind')

        rtree = Rtree()
        # modified to handle queues, children, mv
        subgraphs = UnionFind()

    # <helper_functions>
    def candidate_components(C):
        """
        return the set of candidate nearest components for the connected
        component containing C.  Do not consider those in C's connected
        component or those that are 'dead'.
        """
        component_set = subgraphs.component_set(C)
        return list((V - set(component_set)) - D)

    def update_nn_component(C, candidates):
        """
        find the nearest neighbor pair for the connected component
        represented by c.  candidates represents the list of
        components from which to select.
        """

        (v, vm) = subgraphs.queues[C].top()

        # vm ∈ C {not a foreign nearest neighbor}
        # go through the queue until an edge is found between this node
        # and the set of candidates, updating the neighbors in the connected
        # components queue in the process.
        while vm not in candidates:
            subgraphs.queues[C].pop()
            um, _ = kdtree.query_subset(projcoords[v], candidates)
            dm = square_distance(projcoords[v], projcoords[um])
            subgraphs.push(subgraphs.queues[C], (v, um), dm)
            # Note:  v will always be a vertex in this connected component
            #        vm *may* be external
            (v, vm) = subgraphs.queues[C].top()

        return (v, vm)

    # Tests whether the node is a projection on the existing grid, using its MV
    is_fake = lambda n: subgraphs.budget[n] == np.inf

    # Test whether the component is dead
    # i.e. can never connect to another node
    def is_dead(c, nn_dist):
        return not is_fake(c) and subgraphs.budget[c] < nn_dist

    # "true" distance between components
    def component_dist(c1, c2):
        dist = 0
        if G.is_geographic():
            dist = spherical_distance([coords[c1], coords[c2]])
        else:
            dist = euclidean_distance([coords[c1], coords[c2]])
        return dist

    # </helper_functions>

    # Initialize the connected components holding a single node
    # and push the nearest neighbor into its queue
    for v in V:
        vm, _ = kdtree.query_subset(projcoords[v], list(V - {v}))
        dm = square_distance(projcoords[v], projcoords[vm])
        subgraphs.add_component(v, budget=G.node[v]['budget'])
        subgraphs.push(subgraphs.queues[v], (v, vm), dm)

        # Add to dead set if warranted
        nn_dist = component_dist(v, vm)

        if is_dead(v, nn_dist):
            # here components are single nodes
            # so no need to worry about adding children to dead set
            if v not in D: D.add(v)

    Et = []  # Initialize MST edges to empty list
    last_state = None

    # MST is complete when no progress was made in the prior iteration
    while Et != last_state:

        # This is a candidate list of edges that might be added to the MST
        Ep = PriorityQueue()

        # ∀ C of G; where c <- connected component
        for C in subgraphs.connected_components(component_subset=V):

            candidates = candidate_components(C)

            # Skip if no valid candidates
            if not candidates:
                continue

            (v, vm) = update_nn_component(C, candidates)

            # Add to dead set if warranted
            nn_dist = component_dist(v, vm)

            if is_dead(C, nn_dist):
                # add all dead components to the dead set D
                # (note that fake nodes can never be dead)
                for c in subgraphs.component_set(C):
                    if c not in D and not is_fake(c):  D.add(c)

        # One more round to root out connections to dead components
        # found in above iteration.
        # Need to do this BEFORE pushing candidate edges into Ep.
        # Otherwise we might be testing only 'dead' candidates
        # and therefore mistakenly think we were done (since
        # no new edges would have been added)
        for C in subgraphs.connected_components(component_subset=V):

            candidates = candidate_components(C)

            # Skip if no valid candidates
            if not candidates:
                continue

            (v, vm) = update_nn_component(C, candidates)

            # Calculate nn_dist for comparison to mv later
            nn_dist = component_dist(v, vm)

            # Append the top priority edge from the subgraph to the candidate
            # edge set
            Ep.push((v, vm, nn_dist), nn_dist)

        last_state = deepcopy(Et)

        # Candidate Test
        # At this point we have all of our nearest neighbor component edge
        # candidates defined for this "round"
        #
        # Now test all candidate edges in Ep for cycles and satisfaction of
        # custom criteria
        while Ep._queue:
            (um, vm, dm) = Ep.pop()

            # if doesn't create cycle 
            # and subgraphs have enough MV
            # and we're not connecting 2 fake nodes 
            # then allow the connection
            if subgraphs[um] != subgraphs[vm] and \
                (subgraphs.budget[subgraphs[um]] >= dm or is_fake(um)) and \
                (subgraphs.budget[subgraphs[vm]] >= dm or is_fake(vm)) and \
                not (is_fake(um) and is_fake(vm)):

                # doesn't create cycles from line segment intersection
                invalid_edge, intersections = \
                    line_subgraph_intersection(subgraphs, rtree,
                        coords[um], coords[vm])

                if not invalid_edge:
                    # edges should not intersect a subgraph more than once
                    assert(filter(lambda n: n > 1,
                        intersections.values()) == [])

                    # merge the subgraphs
                    subgraphs.union(um, vm, dm)

                    # For all intersected subgraphs update the mv to that
                    # created by the edge intersecting them,
                    # TODO: This should be updated in not such a naive method
                    map(lambda (n, _): subgraphs.union(um, n, 0),
                            filter(lambda (n, i): i == 1 and
                                    subgraphs[n] != subgraphs[um],
                                intersections.iteritems()))

                    # index the newly added edge
                    box = make_bounding_box(coords[um], coords[vm])

                    # Object is (u.label, v.label), (u.coord, v.coord)
                    rtree.insert(hash((um, vm)), box,
                        obj=((um, vm), (coords[um], coords[vm])))
                    Et += [(um, vm, {'weight': dm})]

    # create new GeoGraph with results
    result = G.copy()
    result.coords = G.coords
    result.remove_edges_from(result.edges())
    result.add_edges_from(Et)
    return result
Пример #4
0
def mod_boruvka(G, subgraphs=None, rtree=None):

    """
    algorithm to calculate the minimum spanning forest of nodes in GeoGraph G
    with 'budget' based restrictions on edges.

    Uses a modified version of Boruvka's algorithm

    NOTE:  subgraphs is modified as a side-effect...may remove in future
        (useful for testing right now)

    Args:
        G:  GeoGraph of nodes to be connected if appropriate
            Nodes should have 'budget' attribute
        subgraphs:  UnionFind data structure representing existing network's
            connected components AND the 'fake' nodes projected onto it.  This
            is the basis for the agglomerative nearest neighbor approach in
            this algorithm.
        rtree:  RTree based index of existing network

    Returns:
        GeoGraph: representing minimum spanning forest of G subject to the
            budget based restrictions
    """

    # special case (already MST)
    if G.number_of_nodes() < 2:
        return G

    V = set(G.nodes())

    # GeoGraph coords may be ndarray or dict
    if isinstance(G.coords, np.ndarray):
        coords = G.coords
    else:
        coords = np.row_stack(G.coords.values())

    projcoords = ang_to_vec_coords(coords) if G.is_geographic() else coords
    kdtree = KDTree(projcoords)

    # Handle "dead" components
    D = set()

    if subgraphs is None:
        if rtree is not None:
            raise ValueError('RTree passed without UnionFind')

        rtree = Rtree()
        # modified to handle queues, children, mv
        subgraphs = UnionFind()

    # <helper_functions>
    def candidate_components(C):
        """
        return the set of candidate nearest components for the connected
        component containing C.  Do not consider those in C's connected
        component or those that are 'dead'.
        """
        component_set = subgraphs.component_set(C)
        return list((V - set(component_set)) - D)

    def update_nn_component(C, candidates):
        """
        find the nearest neighbor pair for the connected component
        represented by c.  candidates represents the list of
        components from which to select.
        """

        (v, vm) = subgraphs.queues[C].top()

        # vm ∈ C {not a foreign nearest neighbor}
        # go through the queue until an edge is found between this node
        # and the set of candidates, updating the neighbors in the connected
        # components queue in the process.
        while vm not in candidates:
            subgraphs.queues[C].pop()
            um, _ = kdtree.query_subset(projcoords[v], candidates)
            dm = square_distance(projcoords[v], projcoords[um])
            subgraphs.push(subgraphs.queues[C], (v, um), dm)
            # Note:  v will always be a vertex in this connected component
            #        vm *may* be external
            (v, vm) = subgraphs.queues[C].top()

        return (v, vm)

    def is_fake(node):
        """
        Tests whether the node is a projection on the existing grid
        """
        return subgraphs.budget[node] == np.inf

    # Test whether the component is dead
    # i.e. can never connect to another node
    def is_dead(c, nn_dist):
        return not is_fake(c) and subgraphs.budget[c] < nn_dist

    # "true" distance between components
    def component_dist(c1, c2):
        dist = 0
        if G.is_geographic():
            dist = spherical_distance([coords[c1], coords[c2]])
        else:
            dist = euclidean_distance([coords[c1], coords[c2]])
        return dist

    # </helper_functions>

    # Initialize the connected components holding a single node
    # and push the nearest neighbor into its queue
    for v in V:
        vm, _ = kdtree.query_subset(projcoords[v], list(V - {v}))
        dm = square_distance(projcoords[v], projcoords[vm])
        subgraphs.add_component(v, budget=G.node[v]['budget'])
        subgraphs.push(subgraphs.queues[v], (v, vm), dm)

        # Add to dead set if warranted
        nn_dist = component_dist(v, vm)

        if is_dead(v, nn_dist):
            # here components are single nodes
            # so no need to worry about adding children to dead set
            if v not in D:
                D.add(v)

    Et = []  # Initialize MST edges to empty list
    last_state = None

    # MST is complete when no progress was made in the prior iteration
    while Et != last_state:

        # This is a candidate list of edges that might be added to the MST
        Ep = PriorityQueue()

        # ∀ C of G; where c <- connected component
        for C in subgraphs.connected_components(component_subset=V):

            candidates = candidate_components(C)

            # Skip if no valid candidates
            if not candidates:
                continue

            (v, vm) = update_nn_component(C, candidates)

            # Add to dead set if warranted
            nn_dist = component_dist(v, vm)

            if is_dead(C, nn_dist):
                # add all dead components to the dead set D
                # (note that fake nodes can never be dead)
                for c in subgraphs.component_set(C):
                    if c not in D and not is_fake(c):
                        D.add(c)

        # One more round to root out connections to dead components
        # found in above iteration.
        # Need to do this BEFORE pushing candidate edges into Ep.
        # Otherwise we might be testing only 'dead' candidates
        # and therefore mistakenly think we were done (since
        # no new edges would have been added)
        for C in subgraphs.connected_components(component_subset=V):

            candidates = candidate_components(C)

            # Skip if no valid candidates
            if not candidates:
                continue

            (v, vm) = update_nn_component(C, candidates)

            # Calculate nn_dist for comparison to mv later
            nn_dist = component_dist(v, vm)

            # Append the top priority edge from the subgraph to the candidate
            # edge set
            Ep.push((v, vm, nn_dist), nn_dist)

        last_state = deepcopy(Et)

        # Candidate Test
        # At this point we have all of our nearest neighbor component edge
        # candidates defined for this "round"
        #
        # Now test all candidate edges in Ep for cycles and satisfaction of
        # custom criteria
        while Ep._queue:
            (um, vm, dm) = Ep.pop()

            # if doesn't create cycle
            # and subgraphs have enough MV
            # and we're not connecting 2 fake nodes
            # then allow the connection
            if subgraphs[um] != subgraphs[vm] and \
               (subgraphs.budget[subgraphs[um]] >= dm or is_fake(um)) and \
               (subgraphs.budget[subgraphs[vm]] >= dm or is_fake(vm)) and \
               not (is_fake(um) and is_fake(vm)):

                # doesn't create cycles from line segment intersection
                invalid_edge, intersections = \
                    line_subgraph_intersection(subgraphs, rtree,
                                               coords[um], coords[vm])

                if not invalid_edge:
                    # edges should not intersect a subgraph more than once
                    assert(filter(lambda n: n > 1,
                                  intersections.values()) == [])

                    # merge the subgraphs
                    subgraphs.union(um, vm, dm)

                    # For all intersected subgraphs update the mv to that
                    # created by the edge intersecting them,
                    # TODO: This should be updated in not such a naive method
                    map(lambda (n, _): subgraphs.union(um, n, 0),
                        filter(lambda (n, i): i == 1 and
                        subgraphs[n] != subgraphs[um],
                        intersections.iteritems()))

                    # index the newly added edge
                    box = make_bounding_box(coords[um], coords[vm])

                    # Object is (u.label, v.label), (u.coord, v.coord)
                    rtree.insert(hash((um, vm)), box,
                                 obj=((um, vm), (coords[um], coords[vm])))
                    Et += [(um, vm, {'weight': dm})]

    # create new GeoGraph with results
    result = G.copy()
    result.coords = G.coords
    result.remove_edges_from(result.edges())
    result.add_edges_from(Et)
    return result