示例#1
0
def test_decomposed_local_centrality():
    # centralities on the original nodes within the decomposed network should equal non-decomposed workflow
    betas = np.array([-0.02, -0.01, -0.005, -0.0008, -0.0])
    distances = networks.distance_from_beta(betas)
    measure_keys = ('node_density',
                    'node_farness',
                    'node_cycles',
                    'node_harmonic',
                    'node_beta',
                    'segment_density',
                    'segment_harmonic',
                    'segment_beta',
                    'node_betweenness',
                    'node_betweenness_beta',
                    'segment_betweenness')
    # test a decomposed graph
    G = mock.mock_graph()
    G = graphs.nX_simple_geoms(G)
    node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX(G)  # generate node and edge maps
    measures_data = centrality.local_centrality(node_data,
                                                edge_data,
                                                node_edge_map,
                                                distances,
                                                betas,
                                                measure_keys,
                                                angular=False)
    G_decomposed = graphs.nX_decompose(G, 20)
    # generate node and edge maps
    node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX(G_decomposed)
    checks.check_network_maps(node_data, edge_data, node_edge_map)
    measures_data_decomposed = centrality.local_centrality(node_data,
                                                           edge_data,
                                                           node_edge_map,
                                                           distances,
                                                           betas,
                                                           measure_keys,
                                                           angular=False)
    # test harmonic closeness on original nodes for non-decomposed vs decomposed
    d_range = len(distances)
    m_range = len(measure_keys)
    assert measures_data.shape == (m_range, d_range, len(G))
    assert measures_data_decomposed.shape == (m_range, d_range, len(G_decomposed))
    original_node_idx = np.where(node_data[:, 3] == 0)
    # with increasing decomposition:
    # - node based measures will not match
    # - node based segment measures will match - these measure to the cut endpoints per thresholds
    # - betweenness based segment won't match - doesn't measure to cut endpoints
    for m_idx in range(m_range):
        print(m_idx)
        for d_idx in range(d_range):
            match = np.allclose(measures_data[m_idx][d_idx], measures_data_decomposed[m_idx][d_idx][original_node_idx],
                                atol=0.1, rtol=0)  # relax precision
            if not match:
                print('key', measure_keys[m_idx], 'dist:', distances[d_idx], 'match:', match)
            if m_idx in [5, 6, 7]:
                assert match
示例#2
0
    def __init__(self,
                 node_uids: list | tuple,
                 node_data: np.ndarray,
                 edge_data: np.ndarray,
                 node_edge_map: Dict,
                 distances: list | tuple | np.ndarray = None,
                 betas: list | tuple | np.ndarray = None,
                 min_threshold_wt: float = checks.def_min_thresh_wt):
        """
        Parameters
        ----------
        node_uids
            A `list` or `tuple` of node identifiers corresponding to each node. This list must be in the same order and
            of the same length as the `node_data`.
        node_data
            A 2d `numpy` array representing the graph's nodes. The indices of the second dimension correspond as
            follows:

            | idx | property |
            |-----|:---------|
            | 0 | `x` coordinate |
            | 1 | `y` coordinate |
            | 2 | `bool` describing whether the node is `live`. Metrics are only computed for `live` nodes. |

            The `x` and `y` node attributes determine the spatial coordinates of the node, and should be in a suitable
            projected (flat) coordinate reference system in metres. [`nX_wgs_to_utm`](/tools/graphs/#nx_wgs_to_utm)
            can be used for converting a `networkX` graph from WGS84 `lng`, `lat` geographic coordinates to the local
            UTM `x`, `y` projected coordinate system.

            When calculating local network centralities or land-use accessibilities, it is best-practice to buffer the
            network by a distance equal to the maximum distance threshold to be considered. This prevents problematic
            results arising due to boundary roll-off effects.
            
            The `live` node attribute identifies nodes falling within the areal boundary of interest as opposed to those
            that fall within the surrounding buffered area. Calculations are only performed for `live=True` nodes, thus
            reducing frivolous computation while also cleanly identifying which nodes are in the buffered roll-off area.
            If some other process will be used for filtering the nodes, or if boundary roll-off is not being considered,
            then set all nodes to `live=True`.
        edge_data
            A 2d `numpy` array representing the graph's edges. Each edge will be described separately for each direction
            of travel. The indices of the second dimension correspond as follows:

            | idx | property |
            |-----|:---------|
            | 0 | start node `idx` |
            | 1 | end node `idx` |
            | 2 | the segment length in metres |
            | 3 | the sum of segment's angular change |
            | 4 | an 'impedance factor' which can be applied to magnify or reduce the effect of the edge's impedance on
            shortest-path calculations. e.g. for gradients or other such considerations. Use with caution. |
            | 5 | the edge's entry angular bearing |
            | 6 | the edge's exit angular bearing |

            The start and end edge `idx` attributes point to the corresponding node indices in the `node_data` array.

            The `length` edge attribute (index 2) should always correspond to the edge lengths in metres. This is used
            when calculating the distances traversed by the shortest-path algorithm so that the respective $d_{max}$
            maximum distance thresholds can be enforced: these distance thresholds are based on the actual network-paths
            traversed by the algorithm as opposed to crow-flies distances.

            The `angle_sum` edge bearing (index 3) should correspond to the total angular change along the length of
            the segment. This is used when calculating angular impedances for simplest-path measures. The
            `start_bearing` (index 5) and `end_bearing` (index 6) attributes respectively represent the starting and
            ending bearing of the segment. This is also used when calculating simplest-path measures when the algorithm
            steps from one edge to another.

            The `imp_factor` edge attribute (index 4) represents an impedance multiplier for increasing or diminishing
            the impedance of an edge. This is ordinarily set to 1, therefor not impacting calculations. By setting
            this to greater or less than 1, the edge will have a correspondingly higher or lower impedance. This can
            be used to take considerations such as street gradients into account, but should be used with caution.
        node_edge_map
            A `numba` `Dict` with `node_data` indices as keys and `numba` `List` types as values containing the out-edge
            indices for each node.
        distances
            A distance, or `list`, `tuple`, or `numpy` array of distances corresponding to the local $d_{max}$
            thresholds to be used for centrality (and land-use) calculations. The $\beta$ parameters (for
            distance-weighted metrics) will be determined implicitly. If the `distances` parameter is not provided, then
            the `beta` parameter must be provided instead. Use a distance of `np.inf` where no distance threshold should
            be enforced.
        betas
            A $\beta$, or `list`, `tuple`, or `numpy` array of $\beta$ to be used for the exponential decay function for
            weighted metrics. The `distance` parameters for unweighted metrics will be determined implicitly. If the
            `betas` parameter is not provided, then the `distance` parameter must be provided instead.
        min_threshold_wt
            The default `min_threshold_wt` parameter can be overridden to generate custom mappings between the
            `distance` and `beta` parameters. See [`distance_from_beta`](#distance_from_beta) for more information.

        Returns
        -------
        NetworkLayer
            A `NetworkLayer`.

        Notes
        -----
        :::tip Comment
        It is possible to represent unlimited $d_{max}$ distance thresholds by setting one of the specified `distance`
        parameter values to `np.inf`. Note that this may substantially increase the computational time required for the
        completion of the algorithms on large networks.
        :::

        Properties
        ----------
        """
        self._uids = node_uids
        self._node_data = node_data
        self._edge_data = edge_data
        self._node_edge_map = node_edge_map
        self._distances = distances
        self._betas = betas
        self._min_threshold_wt = min_threshold_wt
        self.metrics = {
            'centrality': {},
            'mixed_uses': {},
            'accessibility': {
                'non_weighted': {},
                'weighted': {}
            },
            'stats': {},
            'models': {}
        }
        # for storing originating networkX graph
        self._networkX_multigraph = None
        # check the data structures
        if len(self._uids) != len(self._node_data):
            raise ValueError(
                'The number of indices does not match the number of nodes.')
        # check network maps
        checks.check_network_maps(self._node_data, self._edge_data,
                                  self._node_edge_map)
        # if distances, check the types and generate the betas
        if self._distances is not None and self._betas is None:
            if isinstance(self._distances, (int, float)):
                self._distances = [self._distances]
            if isinstance(self._distances, (list, tuple, np.ndarray)):
                if len(self._distances) == 0:
                    raise ValueError('Please provide at least one distance.')
            else:
                raise TypeError(
                    'Please provide a distance, or a list, tuple, or numpy.ndarray of distances.'
                )
            # generate the betas
            self._betas = beta_from_distance(
                self._distances, min_threshold_wt=self._min_threshold_wt)
        # if betas, generate the distances
        elif self._betas is not None and self._distances is None:
            if isinstance(self._betas, (float)):
                self._betas = [self._betas]
            if isinstance(self._betas, (list, tuple, np.ndarray)):
                if len(self._betas) == 0:
                    raise ValueError('Please provide at least one beta.')
            else:
                raise TypeError(
                    'Please provide a beta, or a list, tuple, or numpy.ndarray of betas.'
                )
            self._distances = distance_from_beta(
                self._betas, min_threshold_wt=self._min_threshold_wt)
        else:
            raise ValueError(
                'Please provide either distances or betas, but not both.')
示例#3
0
def test_check_network_maps():
    # network maps
    G = mock.mock_graph()
    G = graphs.nX_simple_geoms(G)
    N = networks.Network_Layer_From_nX(G, distances=[500])
    # from cityseer.util import plot
    # plot.plot_networkX_primal_or_dual(primal=G)
    # plot.plot_graph_maps(N.uids, N._node_data, N._edge_data)
    # catch zero length node and edge arrays
    empty_node_arr = np.full((0, 5), np.nan)
    with pytest.raises(ValueError):
        checks.check_network_maps(empty_node_arr, N._edge_data,
                                  N._node_edge_map)
    empty_edge_arr = np.full((0, 4), np.nan)
    with pytest.raises(ValueError):
        checks.check_network_maps(N._node_data, empty_edge_arr,
                                  N._node_edge_map)
    # check that malformed node and data maps throw errors
    with pytest.raises(ValueError):
        checks.check_network_maps(N._node_data[:, :-1], N._edge_data,
                                  N._node_edge_map)
    with pytest.raises(ValueError):
        checks.check_network_maps(N._node_data, N._edge_data[:, :-1],
                                  N._node_edge_map)
    # catch problematic edge map values
    for x in [np.nan, -1]:
        # missing start node
        corrupted_edges = N._edge_data.copy()
        corrupted_edges[0, 0] = x
        with pytest.raises(AssertionError):
            checks.check_network_maps(N._node_data, corrupted_edges,
                                      N._node_edge_map)
        # missing end node
        corrupted_edges = N._edge_data.copy()
        corrupted_edges[0, 1] = x
        with pytest.raises(KeyError):
            checks.check_network_maps(N._node_data, corrupted_edges,
                                      N._node_edge_map)
        # invalid length
        corrupted_edges = N._edge_data.copy()
        corrupted_edges[0, 2] = x
        with pytest.raises(ValueError):
            checks.check_network_maps(N._node_data, corrupted_edges,
                                      N._node_edge_map)
        # invalid angle_sum
        corrupted_edges = N._edge_data.copy()
        corrupted_edges[0, 3] = x
        with pytest.raises(ValueError):
            checks.check_network_maps(N._node_data, corrupted_edges,
                                      N._node_edge_map)
        # invalid imp_factor
        corrupted_edges = N._edge_data.copy()
        corrupted_edges[0, 4] = x
        with pytest.raises(ValueError):
            checks.check_network_maps(N._node_data, corrupted_edges,
                                      N._node_edge_map)
示例#4
0
    def __init__(self,
                 node_uids: Union[list, tuple],
                 node_data: np.ndarray,
                 edge_data: np.ndarray,
                 node_edge_map: Dict,
                 distances: Union[list, tuple, np.ndarray] = None,
                 betas: Union[list, tuple, np.ndarray] = None,
                 min_threshold_wt: float = checks.def_min_thresh_wt):
        '''
        NODE MAP:
        0 - x
        1 - y
        2 - live
        3 - ghosted

        EDGE MAP:
        0 - start node
        1 - end node
        2 - length in metres
        3 - sum of angular travel along length
        4 - impedance factor
        5 - in bearing
        6 - out bearing
        '''
        self._uids = node_uids
        self._node_data = node_data
        self._edge_data = edge_data
        self._node_edge_map = node_edge_map
        self._distances = distances
        self._betas = betas
        self._min_threshold_wt = min_threshold_wt
        self.metrics = {
            'centrality': {},
            'mixed_uses': {},
            'accessibility': {
                'non_weighted': {},
                'weighted': {}
            },
            'stats': {},
            'models': {}
        }
        # for storing originating networkX graph
        self._networkX = None
        # check the data structures
        if len(self._uids) != len(self._node_data):
            raise ValueError(
                'The number of indices does not match the number of nodes.')
        # check network maps
        checks.check_network_maps(self._node_data, self._edge_data,
                                  self._node_edge_map)
        # if distances, check the types and generate the betas
        if self._distances is not None and self._betas is None:
            if isinstance(self._distances, (int, float)):
                self._distances = [self._distances]
            if isinstance(self._distances, (list, tuple, np.ndarray)):
                if len(self._distances) == 0:
                    raise ValueError('Please provide at least one distance.')
            else:
                raise TypeError(
                    'Please provide a distance, or a list, tuple, or numpy.ndarray of distances.'
                )
            # generate the betas
            self._betas = beta_from_distance(
                self._distances, min_threshold_wt=self._min_threshold_wt)
        # if betas, generate the distances
        elif self._betas is not None and self._distances is None:
            if isinstance(self._betas, (float)):
                self._betas = [self._betas]
            if isinstance(self._betas, (list, tuple, np.ndarray)):
                if len(self._betas) == 0:
                    raise ValueError('Please provide at least one beta.')
            else:
                raise TypeError(
                    'Please provide a beta, or a list, tuple, or numpy.ndarray of betas.'
                )
            self._distances = distance_from_beta(
                self._betas, min_threshold_wt=self._min_threshold_wt)
        else:
            raise ValueError(
                'Please provide either distances or betas, but not both.')
示例#5
0
def test_graph_maps_from_nX(diamond_graph):
    # test maps vs. networkX
    G_test = diamond_graph.copy()
    G_test_dual = graphs.nX_to_dual(G_test)
    for G, is_dual in zip((G_test, G_test_dual), (False, True)):
        # set some random 'live' statuses
        for n in G.nodes():
            G.nodes[n]['live'] = bool(np.random.randint(0, 1))

        # generate test maps
        node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX(G)
        # debug plot
        # plot.plot_graphs(primal=G)
        # plot.plot_graph_maps(node_uids, node_data, edge_data)

        # run check (this checks node to edge maps internally)
        checks.check_network_maps(node_data, edge_data, node_edge_map)

        # check lengths
        assert len(node_uids) == len(node_data) == G.number_of_nodes()
        # edges = x2
        assert len(edge_data) == G.number_of_edges() * 2

        # check node maps (idx and label match in this case...)
        for n_label in node_uids:
            n_idx = node_uids.index(n_label)
            assert node_data[n_idx][0] == G.nodes[n_label]['x']
            assert node_data[n_idx][1] == G.nodes[n_label]['y']
            assert node_data[n_idx][2] == G.nodes[n_label]['live']

        # check edge maps (idx and label match in this case...)
        for start, end, length, angle, imp_fact, start_bear, end_bear in edge_data:
            # print(f'elif (start, end) == ({start}, {end}):')
            # print(f'assert (length, angle, imp_fact, start_bear, end_bear) == ({length}, {angle}, {imp_fact}, {start_bear}, {end_bear})')
            if not is_dual:
                if (start, end) == (0.0, 1.0):
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 0.0, 1.0, 120.0, 120.0)
                elif (start, end) == (0.0, 2.0):
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 0.0, 1.0, 60.0, 60.0)
                elif (start, end) == (1.0, 0.0):
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 0.0, 1.0, -60.0, -60.0)
                elif (start, end) == (1.0, 2.0):
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 0.0, 1.0, 0.0, 0.0)
                elif (start, end) == (1.0, 3.0):
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 0.0, 1.0, 60.0, 60.0)
                elif (start, end) == (2.0, 0.0):
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 0.0, 1.0, -120.0, -120.0)
                elif (start, end) == (2.0, 1.0):
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 0.0, 1.0, 180.0, 180.0)
                elif (start, end) == (2.0, 3.0):
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 0.0, 1.0, 120.0, 120.0)
                elif (start, end) == (3.0, 1.0):
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 0.0, 1.0, -120.0, -120.0)
                elif (start, end) == (3.0, 2.0):
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 0.0, 1.0, -60.0, -60.0)
                else:
                    raise KeyError('Unmatched edge.')
            else:
                s_idx = node_uids[int(start)]
                e_idx = node_uids[int(end)]
                print(s_idx, e_idx)
                if (start, end) == (0.0, 1.0):  # 0_1 0_2
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 120.0, 1.0, -60.0, 60.0)
                elif (start, end) == (0.0, 2.0):  # 0_1 1_2
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 120.0, 1.0, 120.0, 0.0)
                elif (start, end) == (0.0, 3.0):  # 0_1 1_3
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 60.0, 1.0, 120.0, 60.0)
                elif (start, end) == (1.0, 0.0):  # 0_2 0_1
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 120.0, 1.0, -120.0, 120.0)
                elif (start, end) == (1.0, 2.0):  # 0_2 1_2
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 120.0, 1.0, 60.0, 180.0)
                elif (start, end) == (1.0, 4.0):  # 0_2 2_3
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 60.0, 1.0, 60.0, 120.0)
                elif (start, end) == (2.0, 0.0):  # 1_2 0_1
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 120.0, 1.0, 180.0, -60.0)
                elif (start, end) == (2.0, 1.0):  # 1_2 0_2
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 120.0, 1.0, 0.0, -120.0)
                elif (start, end) == (2.0, 3.0):  # 1_2 1_3
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 120.0, 1.0, 180.0, 60.0)
                elif (start, end) == (2.0, 4.0):  # 1_2 2_3
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 120.0, 1.0, 0.0, 120.0)
                elif (start, end) == (3.0, 0.0):  # 1_3 0_1
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 60.0, 1.0, -120.0, -60.0)
                elif (start, end) == (3.0, 2.0):  # 1_3 1_2
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 120.0, 1.0, -120.0, 0.0)
                elif (start, end) == (3.0, 4.0):  # 1_3 2_3
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 120.0, 1.0, 60.0, -60.0)
                elif (start, end) == (4.0, 1.0):  # 2_3 0_2
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 60.0, 1.0, -60.0, -120.0)
                elif (start, end) == (4.0, 2.0):  # 2_3 1_2
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 120.0, 1.0, -60.0, 180.0)
                elif (start, end) == (4.0, 3.0):  # 2_3 1_3
                    assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 120.0, 1.0, 120.0, -120.0)
                else:
                    raise KeyError('Unmatched edge.')
    # check that missing geoms throw an error
    G_test = diamond_graph.copy()
    for s, e, k in G_test.edges(keys=True):
        # delete key from first node and break
        del G_test[s][e][k]['geom']
        break
    with pytest.raises(KeyError):
        graphs.graph_maps_from_nX(G_test)

    # check that non-LineString geoms throw an error
    G_test = diamond_graph.copy()
    for s, e, k in G_test.edges(keys=True):
        G_test[s][e][k]['geom'] = geometry.Point([G_test.nodes[s]['x'], G_test.nodes[s]['y']])
    with pytest.raises(TypeError):
        graphs.graph_maps_from_nX(G_test)

    # check that missing node keys throw an error
    G_test = diamond_graph.copy()
    for k in ['x', 'y']:
        for n in G_test.nodes():
            # delete key from first node and break
            del G_test.nodes[n][k]
            break
        with pytest.raises(KeyError):
            graphs.graph_maps_from_nX(G_test)

    # check that invalid imp_factors are caught
    G_test = diamond_graph.copy()
    # corrupt imp_factor value and break
    for corrupt_val in [-1, -np.inf, np.nan]:
        for s, e, k in G_test.edges(keys=True):
            G_test[s][e][k]['imp_factor'] = corrupt_val
            break
        with pytest.raises(ValueError):
            graphs.graph_maps_from_nX(G_test)
示例#6
0
def singly_constrained(
        node_data: np.ndarray,
        edge_data: np.ndarray,
        node_edge_map: Dict,
        distances: np.ndarray,
        betas: np.ndarray,
        i_data_map: np.ndarray,
        j_data_map: np.ndarray,
        i_weights: np.ndarray,
        j_weights: np.ndarray,
        angular: bool = False,
        suppress_progress: bool = False) -> Tuple[np.ndarray, np.ndarray]:
    '''
    - Calculates trips from i to j and returns the assigned trips and network assigned flows for j nodes
    #TODO: consider enhanced numerical checks for single vs. multi dimensional numerical data

    - Keeping separate from local aggregator because singly-constrained origin / destination models computed separately
    - Requires two iters, one to gather all k-nodes to per j node, then another to get the ratio of j / k attractiveness
    - Assigns j -> k trips over the network as part of second iter
    NODE MAP:
    0 - x
    1 - y
    2 - live
    3 - ghosted
    EDGE MAP:
    0 - start node
    1 - end node
    2 - length in metres
    3 - sum of angular travel along length
    4 - impedance factor
    5 - entry bearing
    6 - exit bearing
    DATA MAP:
    0 - x
    1 - y
    2 - assigned network index - nearest
    3 - assigned network index - next-nearest
    '''
    checks.check_network_maps(node_data, edge_data, node_edge_map)
    checks.check_distances_and_betas(distances, betas)
    checks.check_data_map(i_data_map, check_assigned=True)
    checks.check_data_map(j_data_map, check_assigned=True)

    if len(i_weights) != len(i_data_map):
        raise ValueError(
            'The i_weights array must be the same length as the i_data_map.')

    if len(j_weights) != len(j_data_map):
        raise ValueError(
            'The j_weights array must be the same length as the j_data_map.')

    # establish variables
    netw_n = len(node_data)
    d_n = len(distances)
    global_max_dist = np.max(distances)
    netw_flows = np.full((d_n, netw_n), 0.0)

    i_n = len(i_data_map)
    k_agg = np.full((d_n, i_n), 0.0)

    j_n = len(j_data_map)
    j_assigned = np.full((d_n, j_n), 0.0)

    # iterate all i nodes
    # filter all reachable nodes k and aggregate k attractiveness * negative exponential of distance
    steps = int(i_n / 10000)
    for i_idx in range(i_n):
        if not suppress_progress:
            checks.progress_bar(i_idx, i_n, steps)
        # get the nearest node
        i_assigned_netw_idx = int(i_data_map[i_idx, 2])
        # calculate the base distance from the data point to the nearest assigned node
        i_x, i_y = i_data_map[i_idx, :2]
        n_x, n_y = node_data[i_assigned_netw_idx, :2]
        i_door_dist = np.hypot(i_x - n_x, i_y - n_y)

        # find the reachable j data points and their respective points from the closest node
        reachable_j, reachable_j_dist, tree_preds = aggregate_to_src_idx(
            i_assigned_netw_idx, node_data, edge_data, node_edge_map,
            j_data_map, global_max_dist, angular)

        # aggregate the weighted j (all k) nodes
        # iterate the reachable indices and related distances
        for j_idx, (j_reachable,
                    j_dist) in enumerate(zip(reachable_j, reachable_j_dist)):
            if not j_reachable:
                continue
            # iterate the distance dimensions
            for d_idx, (d, b) in enumerate(zip(distances, betas)):
                total_dist = j_dist + i_door_dist
                # increment weighted k aggregations at respective distances if the distance is less than current d
                if total_dist <= d:
                    k_agg[d_idx,
                          i_idx] += j_weights[j_idx] * np.exp(total_dist * b)

    # this is the second step
    # this time, filter all reachable j vertices and aggregate the proportion of flow from i to j
    # this is done by dividing i-j flow through i-k_agg flow from previous step
    steps = int(i_n / 10000)
    for i_idx in range(i_n):
        if not suppress_progress:
            checks.progress_bar(i_idx, i_n, steps)

        # get the nearest node
        i_assigned_netw_idx = int(i_data_map[i_idx, 2])
        # calculate the base distance from the data point to the nearest assigned node
        i_x, i_y = i_data_map[i_idx, :2]
        n_x, n_y = node_data[i_assigned_netw_idx, :2]
        i_door_dist = np.hypot(i_x - n_x, i_y - n_y)

        # find the reachable j data points and their respective points from the closest node
        reachable_j, reachable_j_dist, tree_preds = aggregate_to_src_idx(
            i_assigned_netw_idx, node_data, edge_data, node_edge_map,
            j_data_map, global_max_dist, angular)

        # aggregate j divided through all k nodes
        # iterate the reachable indices and related distances
        for j_idx, (j_reachable,
                    j_dist) in enumerate(zip(reachable_j, reachable_j_dist)):
            if not j_reachable:
                continue
            # iterate the distance dimensions
            for d_idx, (d, b) in enumerate(zip(distances, betas)):
                total_dist = j_dist + i_door_dist
                # if the distance is less than current d
                if total_dist <= d:
                    # aggregate all flows from reachable j's to i_idx
                    # divide through respective i-k_agg sums
                    # catch division by zero:
                    if k_agg[d_idx, i_idx] == 0:
                        assigned = 0
                    else:
                        assigned = i_weights[i_idx] * j_weights[j_idx] * np.exp(
                            total_dist * b) / k_agg[d_idx, i_idx]
                    j_assigned[d_idx, j_idx] += assigned
                    # assign trips to network
                    if assigned != 0:
                        # get the j assigned node
                        j_assigned_netw_idx = int(j_data_map[j_idx, 2])
                        # in this case start and end nodes are counted...!
                        netw_flows[d_idx, j_assigned_netw_idx] += assigned
                        # skip if same start / end node
                        if j_assigned_netw_idx == i_assigned_netw_idx:
                            continue
                        # aggregate to the network
                        inter_idx = np.int(tree_preds[j_assigned_netw_idx])
                        while True:
                            # end nodes counted, so place above break
                            netw_flows[d_idx, inter_idx] += assigned
                            # break out of while loop if the intermediary has reached the source node
                            if inter_idx == i_assigned_netw_idx:
                                break
                            # follow the chain
                            inter_idx = np.int(tree_preds[inter_idx])

    return j_assigned, netw_flows
示例#7
0
def assign_to_network(data_map: np.ndarray,
                      node_data: np.ndarray,
                      edge_data: np.ndarray,
                      node_edge_map: Dict,
                      max_dist: float,
                      suppress_progress: bool = False) -> np.ndarray:
    '''
    To save unnecessary computation - this is done once and written to the data map.

    1 - find the closest network node from each data point
    2A - wind clockwise along the network to preferably find a block cycle surrounding the node
    2B - in event of topological traps, try anti-clockwise as well
    3A - select the closest block cycle node
    3B - if no enclosing cycle - simply use the closest node
    4 - find the neighbouring node that minimises the distance between the data point on "street-front"
    NODE MAP:
    0 - x
    1 - y
    2 - live
    3 - ghosted
    EDGE MAP:
    0 - start node
    1 - end node
    2 - length in metres
    3 - sum of angular travel along length
    4 - impedance factor
    5 - entry bearing
    6 - exit bearing
    DATA MAP:
    0 - x
    1 - y
    2 - assigned network index - nearest
    3 - assigned network index - next-nearest
    '''
    checks.check_network_maps(node_data, edge_data, node_edge_map)

    def calculate_rotation(point_a, point_b):
        # https://stackoverflow.com/questions/37459121/calculating-angle-between-three-points-but-only-anticlockwise-in-python
        # these two points / angles are relative to the origin - so pass in difference between the points and origin as vectors
        ang_a = np.arctan2(point_a[1], point_a[0])  # arctan is in y/x order
        ang_b = np.arctan2(point_b[1], point_b[0])
        return np.rad2deg((ang_a - ang_b) % (2 * np.pi))

    def calculate_rotation_smallest(point_a, point_b):
        # smallest difference angle
        ang_a = np.rad2deg(np.arctan2(point_a[1], point_a[0]))
        ang_b = np.rad2deg(np.arctan2(point_b[1], point_b[0]))
        return np.abs((ang_b - ang_a + 180) % 360 - 180)

    def road_distance(d_coords, netw_idx_a, netw_idx_b):
        a_coords = node_data[netw_idx_a, :2]
        b_coords = node_data[netw_idx_b, :2]
        # get the angles from either intersection node to the data point
        ang_a = calculate_rotation_smallest(d_coords - a_coords,
                                            b_coords - a_coords)
        ang_b = calculate_rotation_smallest(d_coords - b_coords,
                                            a_coords - b_coords)
        # assume offset street segment if either is significantly greater than 90 (in which case sideways offset from road)
        if ang_a > 110 or ang_b > 110:
            return np.inf, np.nan, np.nan
        # calculate height from two sides and included angle
        side_a = np.hypot(d_coords[0] - a_coords[0], d_coords[1] - a_coords[1])
        side_b = np.hypot(d_coords[0] - b_coords[0], d_coords[1] - b_coords[1])
        base = np.hypot(a_coords[0] - b_coords[0], a_coords[1] - b_coords[1])
        # forestall potential division by zero
        if base == 0:
            return np.inf, np.nan, np.nan
        # heron's formula
        s = (side_a + side_b + base) / 2  # perimeter / 2
        a = np.sqrt(s * (s - side_a) * (s - side_b) * (s - base))
        # area is 1/2 base * h, so h = area / (0.5 * base)
        h = a / (0.5 * base)
        # NOTE - the height of the triangle may be less than the distance to the nodes
        # happens due to offset segments: can cause wrong assignment where adjacent segments have same triangle height
        # in this case, set to length of closest node so that h (minimum distance) is still meaningful
        # return indices in order of nearest then next nearest
        if side_a < side_b:
            if ang_a > 90:
                h = side_a
            return h, netw_idx_a, netw_idx_b
        else:
            if ang_b > 90:
                h = side_b
            return h, netw_idx_b, netw_idx_a

    def closest_intersections(d_coords, pr_map, end_node):
        if len(pr_map) == 1:
            return np.inf, end_node, np.nan
        current_idx = end_node
        next_idx = int(pr_map[int(end_node)])
        if len(pr_map) == 2:
            return road_distance(d_coords, current_idx, next_idx)
        nearest_idx = np.nan
        next_nearest_idx = np.nan
        min_d = np.inf
        first_pred = next_idx  # for finding end of loop
        while True:
            h, n_idx, n_n_idx = road_distance(d_coords, current_idx, next_idx)
            if h < min_d:
                min_d = h
                nearest_idx = n_idx
                next_nearest_idx = n_n_idx
            # if the next in the chain is nan, then break
            if np.isnan(pr_map[next_idx]):
                break
            current_idx = next_idx
            next_idx = int(pr_map[next_idx])
            if next_idx == first_pred:
                break
        return min_d, nearest_idx, next_nearest_idx

    pred_map = np.full(len(node_data), np.nan)
    netw_coords = node_data[:, :2]
    netw_x_arr = node_data[:, 0]
    netw_y_arr = node_data[:, 1]
    data_coords = data_map[:, :2]
    data_x_arr = data_map[:, 0]
    data_y_arr = data_map[:, 1]
    total_count = len(data_map)
    # setup progress bar params
    steps = int(total_count / 10000)
    for data_idx in range(total_count):
        if not suppress_progress:
            checks.progress_bar(data_idx, total_count, steps)
        # find the nearest network node
        min_idx, min_dist = find_nearest(data_x_arr[data_idx],
                                         data_y_arr[data_idx], netw_x_arr,
                                         netw_y_arr, max_dist)
        # in some cases no network node will be within max_dist... so accept NaN
        if np.isnan(min_idx):
            continue
        # nearest is initially set for this nearest node, but if a nearer street-edge is found, it will be overriden
        nearest = min_idx
        next_nearest = np.nan
        # set start node to nearest network node
        node_idx = int(min_idx)
        # keep track of visited nodes
        pred_map.fill(np.nan)
        # state
        reversing = False
        # keep track of previous indices
        prev_idx = np.nan
        # iterate neighbours
        while True:
            # reset neighbour rotation and index counters
            rotation = np.nan
            nb_idx = np.nan
            # iterate the edges
            for edge_idx in node_edge_map[node_idx]:
                # get the edge's start and end node indices
                start, end = edge_data[edge_idx, :2]
                # cast to int for indexing
                new_idx = int(end)
                # don't follow self-loops
                if new_idx == node_idx:
                    continue
                # check that this isn't the previous node (already visited as neighbour from other direction)
                if np.isfinite(prev_idx) and new_idx == prev_idx:
                    continue
                # look for the new neighbour with the smallest rightwards (anti-clockwise arctan2) angle
                # measure the angle relative to the data point for the first node
                if np.isnan(prev_idx):
                    r = calculate_rotation(
                        netw_coords[int(new_idx)] - netw_coords[node_idx],
                        data_coords[data_idx] - netw_coords[node_idx])
                # else relative to the previous node
                else:
                    r = calculate_rotation(
                        netw_coords[int(new_idx)] - netw_coords[node_idx],
                        netw_coords[int(prev_idx)] - netw_coords[node_idx])
                if reversing:
                    r = 360 - r
                # if least angle, update
                if np.isnan(rotation) or r < rotation:
                    rotation = r
                    nb_idx = new_idx
            # allow backtracking if no neighbour is found - i.e. dead-ends
            if np.isnan(nb_idx):
                if np.isnan(pred_map[node_idx]):
                    # for isolated nodes: nb_idx == np.nan, pred_map[node_idx] == np.nan, and prev_idx == np.nan
                    if np.isnan(prev_idx):
                        break
                    # for isolated edges, the algorithm gets turned-around back to the starting node with nowhere to go
                    # nb_idx == np.nan, pred_map[node_idx] == np.nan
                    # in these cases, pass closest_intersections the prev idx so that it has a predecessor to follow
                    d, n, n_n = closest_intersections(data_coords[data_idx],
                                                      pred_map, int(prev_idx))
                    if d < min_dist:
                        nearest = n
                        next_nearest = n_n
                    break
                # otherwise, go ahead and backtrack
                nb_idx = pred_map[node_idx]
            # if the distance is exceeded, reset and attempt in the other direction
            dist = np.hypot(netw_x_arr[int(nb_idx)] - data_x_arr[data_idx],
                            netw_y_arr[int(nb_idx)] - data_y_arr[data_idx])
            if dist > max_dist:
                pred_map[int(nb_idx)] = node_idx
                d, n, n_n = closest_intersections(data_coords[data_idx],
                                                  pred_map, int(nb_idx))
                # if the distance to the street edge is less than the nearest node, or than the prior closest edge
                if d < min_dist:
                    min_dist = d
                    nearest = n
                    next_nearest = n_n
                # reverse and try in opposite direction
                if not reversing:
                    reversing = True
                    pred_map.fill(np.nan)
                    node_idx = int(min_idx)
                    prev_idx = np.nan
                    continue
                break
            # ignore the following conditions while backtracking
            # (if backtracking, the current node's predecessor will be equal to the new neighbour)
            if nb_idx != pred_map[node_idx]:
                # if the new nb node has already been visited then terminate, this prevent infinite loops
                # or, if the algorithm has circled the block back to the original starting node
                if not np.isnan(pred_map[int(nb_idx)]) or nb_idx == min_idx:
                    # set the final predecessor, BUT ONLY if re-encountered the original node
                    # this would otherwise occlude routes (e.g. backtracks) that have passed the same node twice
                    # (such routes are still able to recover the closest edge)
                    if nb_idx == min_idx:
                        pred_map[int(nb_idx)] = node_idx
                    d, n, n_n = closest_intersections(data_coords[data_idx],
                                                      pred_map, int(nb_idx))
                    if d < min_dist:
                        nearest = n
                        next_nearest = n_n
                    break
                # set predecessor (only if not backtracking)
                pred_map[int(nb_idx)] = node_idx
            # otherwise, keep going
            prev_idx = node_idx
            node_idx = int(nb_idx)
        # print(f'[{data_idx}, {nearest}, {next_nearest}],')
        # set in the data map
        data_map[data_idx, 2] = nearest  # adj_idx
        # in some cases next nearest will be NaN
        # this is mostly in situations where it works to leave as NaN - e.g. access off dead-ends...
        data_map[data_idx, 3] = next_nearest  # next_adj_idx

    return data_map
示例#8
0
def local_aggregator(
    node_data: np.ndarray,
    edge_data: np.ndarray,
    node_edge_map: Dict,
    data_map: np.ndarray,
    distances: np.ndarray,
    betas: np.ndarray,
    landuse_encodings: np.ndarray = np.array([]),
    qs: np.ndarray = np.array([]),
    mixed_use_hill_keys: np.ndarray = np.array([]),
    mixed_use_other_keys: np.ndarray = np.array([]),
    accessibility_keys: np.ndarray = np.array([]),
    cl_disparity_wt_matrix: np.ndarray = np.array(np.full((0, 0), np.nan)),
    numerical_arrays: np.ndarray = np.array(np.full((0, 0), np.nan)),
    angular: bool = False,
    suppress_progress: bool = False
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray,
           np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray,
           np.ndarray, np.ndarray]:
    '''
    NODE MAP:
    0 - x
    1 - y
    2 - live
    3 - ghosted
    EDGE MAP:
    0 - start node
    1 - end node
    2 - length in metres
    3 - sum of angular travel along length
    4 - impedance factor
    5 - in bearing
    6 - out bearing
    DATA MAP:
    0 - x
    1 - y
    2 - assigned network index - nearest
    3 - assigned network index - next-nearest
    '''
    checks.check_network_maps(node_data, edge_data, node_edge_map)
    checks.check_data_map(
        data_map, check_assigned=True
    )  # raises ValueError data points are not assigned to a network
    checks.check_distances_and_betas(distances, betas)

    # check landuse encodings
    compute_landuses = False
    if len(landuse_encodings) == 0:
        if len(mixed_use_hill_keys) != 0 or len(
                mixed_use_other_keys) != 0 or len(accessibility_keys) != 0:
            raise ValueError(
                'Mixed use metrics or land-use accessibilities require an array of landuse labels.'
            )
    elif len(landuse_encodings) != len(data_map):
        raise ValueError(
            'The number of landuse encodings does not match the number of data points.'
        )
    else:
        checks.check_categorical_data(landuse_encodings)

    # catch completely missing metrics
    if len(mixed_use_hill_keys) == 0 and len(
            mixed_use_other_keys) == 0 and len(accessibility_keys) == 0:
        if len(numerical_arrays) == 0:
            raise ValueError(
                'No metrics specified, please specify at least one metric to compute.'
            )
    else:
        compute_landuses = True

    # catch missing qs
    if len(mixed_use_hill_keys) != 0 and len(qs) == 0:
        raise ValueError(
            'Hill diversity measures require that at least one value of q is specified.'
        )

    # negative qs caught by hill diversity methods

    # check various problematic key combinations
    if len(mixed_use_hill_keys) != 0:
        if (mixed_use_hill_keys.min() < 0 or mixed_use_hill_keys.max() > 3):
            raise ValueError('Mixed-use "hill" keys out of range of 0:4.')

    if len(mixed_use_other_keys) != 0:
        if (mixed_use_other_keys.min() < 0 or mixed_use_other_keys.max() > 2):
            raise ValueError('Mixed-use "other" keys out of range of 0:3.')

    if len(accessibility_keys) != 0:
        max_ac_key = landuse_encodings.max()
        if (accessibility_keys.min() < 0
                or accessibility_keys.max() > max_ac_key):
            raise ValueError(
                'Negative or out of range accessibility key encountered. Keys must match class encodings.'
            )

    for i in range(len(mixed_use_hill_keys)):
        for j in range(len(mixed_use_hill_keys)):
            if j > i:
                i_key = mixed_use_hill_keys[i]
                j_key = mixed_use_hill_keys[j]
                if i_key == j_key:
                    raise ValueError('Duplicate mixed-use "hill" key.')

    for i in range(len(mixed_use_other_keys)):
        for j in range(len(mixed_use_other_keys)):
            if j > i:
                i_key = mixed_use_other_keys[i]
                j_key = mixed_use_other_keys[j]
                if i_key == j_key:
                    raise ValueError('Duplicate mixed-use "other" key.')

    for i in range(len(accessibility_keys)):
        for j in range(len(accessibility_keys)):
            if j > i:
                i_key = accessibility_keys[i]
                j_key = accessibility_keys[j]
                if i_key == j_key:
                    raise ValueError('Duplicate accessibility key.')

    def disp_check(disp_matrix):
        # the length of the disparity matrix vis-a-vis unique landuses is tested in underlying diversity functions
        if disp_matrix.ndim != 2 or disp_matrix.shape[0] != disp_matrix.shape[
                1]:
            raise ValueError(
                'The disparity matrix must be a square NxN matrix.')
        if len(disp_matrix) == 0:
            raise ValueError(
                'Hill disparity and Rao pairwise measures requires a class disparity weights matrix.'
            )

    # check that missing or malformed disparity weights matrices are caught
    for k in mixed_use_hill_keys:
        if k == 3:  # hill disparity
            disp_check(cl_disparity_wt_matrix)
    for k in mixed_use_other_keys:
        if k == 2:  # raos pairwise
            disp_check(cl_disparity_wt_matrix)

    compute_numerical = False
    # when passing an empty 2d array to numba, use: np.array(np.full((0, 0), np.nan))
    if len(numerical_arrays) != 0:
        compute_numerical = True
        if numerical_arrays.shape[1] != len(data_map):
            raise ValueError(
                'The length of the numerical data arrays do not match the length of the data map.'
            )
        checks.check_numerical_data(numerical_arrays)

    # establish variables
    netw_n = len(node_data)
    d_n = len(distances)
    q_n = len(qs)
    n_n = len(numerical_arrays)
    global_max_dist = distances.max()
    netw_nodes_live = node_data[:, 2]

    # setup data structures
    # hill mixed uses are structured separately to take values of q into account
    mixed_use_hill_data = np.full((4, q_n, d_n, netw_n), np.nan)  # 4 dim
    mixed_use_other_data = np.full((3, d_n, netw_n), np.nan)  # 3 dim

    accessibility_data = np.full((len(accessibility_keys), d_n, netw_n), 0.0)
    accessibility_data_wt = np.full((len(accessibility_keys), d_n, netw_n),
                                    0.0)

    # stats
    stats_sum = np.full((n_n, d_n, netw_n), np.nan)
    stats_sum_wt = np.full((n_n, d_n, netw_n), np.nan)

    stats_mean = np.full((n_n, d_n, netw_n), np.nan)
    stats_mean_wt = np.full((n_n, d_n, netw_n), np.nan)

    stats_count = np.full(
        (n_n, d_n, netw_n),
        np.nan)  # use np.nan instead of 0 to avoid division by zero issues
    stats_count_wt = np.full((n_n, d_n, netw_n), np.nan)

    stats_variance = np.full((n_n, d_n, netw_n), np.nan)
    stats_variance_wt = np.full((n_n, d_n, netw_n), np.nan)

    stats_max = np.full((n_n, d_n, netw_n), np.nan)
    stats_min = np.full((n_n, d_n, netw_n), np.nan)

    # iterate through each vert and aggregate
    steps = int(netw_n / 10000)
    for netw_src_idx in range(netw_n):
        if not suppress_progress:
            checks.progress_bar(netw_src_idx, netw_n, steps)
        # only compute for live nodes
        if not netw_nodes_live[netw_src_idx]:
            continue
        # generate the reachable classes and their respective distances
        # these are non-unique - i.e. simply the class of each data point within the maximum distance
        # the aggregate_to_src_idx method will choose the closer direction of approach to a data point
        # from the nearest or next-nearest network node (calculated once globally, prior to local_landuses method)
        reachable_data, reachable_data_dist, tree_preds = aggregate_to_src_idx(
            netw_src_idx, node_data, edge_data, node_edge_map, data_map,
            global_max_dist, angular)
        # LANDUSES
        if compute_landuses:
            mu_max_unique_cl = int(landuse_encodings.max() + 1)
            # counts of each class type (array length per max unique classes - not just those within max distance)
            classes_counts = np.full((d_n, mu_max_unique_cl), 0)
            # nearest of each class type (likewise)
            classes_nearest = np.full((d_n, mu_max_unique_cl), np.inf)
            # iterate the reachable indices and related distances
            for data_idx, (reachable, data_dist) in enumerate(
                    zip(reachable_data, reachable_data_dist)):
                if not reachable:
                    continue
                # get the class category in integer form
                # all class codes were encoded to sequential integers - these correspond to the array indices
                cl_code = int(landuse_encodings[int(data_idx)])
                # iterate the distance dimensions
                for d_idx, (d, b) in enumerate(zip(distances, betas)):
                    # increment class counts at respective distances if the distance is less than current d
                    if data_dist <= d:
                        classes_counts[d_idx, cl_code] += 1
                        # if distance is nearer, update the nearest distance array too
                        if data_dist < classes_nearest[d_idx, cl_code]:
                            classes_nearest[d_idx, cl_code] = data_dist
                        # if within distance, and if in accessibility keys, then aggregate accessibility too
                        for ac_idx, ac_code in enumerate(accessibility_keys):
                            if ac_code == cl_code:
                                accessibility_data[ac_idx, d_idx,
                                                   netw_src_idx] += 1
                                accessibility_data_wt[ac_idx, d_idx,
                                                      netw_src_idx] += np.exp(
                                                          b * data_dist)
                                # if a match was found, then no need to check others
                                break
            # mixed uses can be calculated now that the local class counts are aggregated
            # iterate the distances and betas
            for d_idx, b in enumerate(betas):
                cl_counts = classes_counts[d_idx]
                cl_nearest = classes_nearest[d_idx]
                # mu keys determine which metrics to compute
                # don't confuse with indices
                # previously used dynamic indices in data structures - but obtuse if irregularly ordered keys
                for mu_hill_key in mixed_use_hill_keys:
                    for q_idx, q_key in enumerate(qs):
                        if mu_hill_key == 0:
                            mixed_use_hill_data[0, q_idx, d_idx, netw_src_idx] = \
                                diversity.hill_diversity(cl_counts, q_key)
                        elif mu_hill_key == 1:
                            mixed_use_hill_data[1, q_idx, d_idx, netw_src_idx] = \
                                diversity.hill_diversity_branch_distance_wt(cl_counts, cl_nearest, q=q_key, beta=b)
                        elif mu_hill_key == 2:
                            mixed_use_hill_data[2, q_idx, d_idx, netw_src_idx] = \
                                diversity.hill_diversity_pairwise_distance_wt(cl_counts, cl_nearest, q=q_key, beta=b)
                        # land-use classification disparity hill diversity
                        # the wt matrix can be used without mapping because cl_counts is based on all classes
                        # regardless of whether they are reachable
                        elif mu_hill_key == 3:
                            mixed_use_hill_data[3, q_idx, d_idx, netw_src_idx] = \
                                diversity.hill_diversity_pairwise_matrix_wt(cl_counts,
                                                                            wt_matrix=cl_disparity_wt_matrix,
                                                                            q=q_key)
                for mu_other_key in mixed_use_other_keys:
                    if mu_other_key == 0:
                        mixed_use_other_data[0, d_idx, netw_src_idx] = \
                            diversity.shannon_diversity(cl_counts)
                    elif mu_other_key == 1:
                        mixed_use_other_data[1, d_idx, netw_src_idx] = \
                            diversity.gini_simpson_diversity(cl_counts)
                    elif mu_other_key == 2:
                        mixed_use_other_data[2, d_idx, netw_src_idx] = \
                            diversity.raos_quadratic_diversity(cl_counts, wt_matrix=cl_disparity_wt_matrix)
        # IDW
        # the order of the loops matters because the nested aggregations happen per distance per numerical array
        if compute_numerical:
            # iterate the reachable indices and related distances
            for data_idx, (reachable, data_dist) in enumerate(
                    zip(reachable_data, reachable_data_dist)):
                # some indices will be NaN if beyond max threshold distance - so check for infinity
                # this happens when within radial max distance, but beyond network max distance
                if not reachable:
                    continue
                # iterate the numerical arrays dimension
                for num_idx in range(n_n):
                    # some values will be NaN
                    num = numerical_arrays[num_idx, int(data_idx)]
                    if np.isnan(num):
                        continue
                    # iterate the distance dimensions
                    for d_idx, (d, b) in enumerate(zip(distances, betas)):
                        # increment mean aggregations at respective distances if the distance is less than current d
                        if data_dist <= d:
                            # aggregate
                            if np.isnan(stats_sum[num_idx, d_idx,
                                                  netw_src_idx]):
                                stats_sum[num_idx, d_idx, netw_src_idx] = num
                                stats_count[num_idx, d_idx, netw_src_idx] = 1
                                stats_sum_wt[num_idx, d_idx,
                                             netw_src_idx] = num * np.exp(
                                                 data_dist * b)
                                stats_count_wt[num_idx, d_idx,
                                               netw_src_idx] = np.exp(
                                                   data_dist * b)
                            else:
                                stats_sum[num_idx, d_idx, netw_src_idx] += num
                                stats_count[num_idx, d_idx, netw_src_idx] += 1
                                stats_sum_wt[num_idx, d_idx,
                                             netw_src_idx] += num * np.exp(
                                                 data_dist * b)
                                stats_count_wt[num_idx, d_idx,
                                               netw_src_idx] += np.exp(
                                                   data_dist * b)

                            if np.isnan(stats_max[num_idx, d_idx,
                                                  netw_src_idx]):
                                stats_max[num_idx, d_idx, netw_src_idx] = num
                            elif num > stats_max[num_idx, d_idx, netw_src_idx]:
                                stats_max[num_idx, d_idx, netw_src_idx] = num

                            if np.isnan(stats_min[num_idx, d_idx,
                                                  netw_src_idx]):
                                stats_min[num_idx, d_idx, netw_src_idx] = num
                            elif num < stats_min[num_idx, d_idx, netw_src_idx]:
                                stats_min[num_idx, d_idx, netw_src_idx] = num
            # finalise mean calculations - this is happening for a single netw_src_idx, so fairly fast
            for num_idx in range(n_n):
                for d_idx in range(d_n):
                    stats_mean[num_idx, d_idx, netw_src_idx] = \
                        stats_sum[num_idx, d_idx, netw_src_idx] / stats_count[num_idx, d_idx, netw_src_idx]
                    stats_mean_wt[num_idx, d_idx, netw_src_idx] = \
                        stats_sum_wt[num_idx, d_idx, netw_src_idx] / stats_count_wt[num_idx, d_idx, netw_src_idx]
            # calculate variances - counts are already computed per above
            # weighted version is IDW by division through equivalently weighted counts above
            # iterate the reachable indices and related distances
            for data_idx, (reachable, data_dist) in enumerate(
                    zip(reachable_data, reachable_data_dist)):
                # some indices will be NaN if beyond max threshold distance - so check for infinity
                # this happens when within radial max distance, but beyond network max distance
                if not reachable:
                    continue
                # iterate the numerical arrays dimension
                for num_idx in range(n_n):
                    # some values will be NaN
                    num = numerical_arrays[num_idx, int(data_idx)]
                    if np.isnan(num):
                        continue
                    # iterate the distance dimensions
                    for d_idx, (d, b) in enumerate(zip(distances, betas)):
                        # increment variance aggregations at respective distances if the distance is less than current d
                        if data_dist <= d:
                            # aggregate
                            if np.isnan(stats_variance[num_idx, d_idx,
                                                       netw_src_idx]):
                                stats_variance[num_idx, d_idx, netw_src_idx] = \
                                    np.square(num - stats_mean[num_idx, d_idx, netw_src_idx])
                                stats_variance_wt[num_idx, d_idx, netw_src_idx] = \
                                    np.square(num - stats_mean_wt[num_idx, d_idx, netw_src_idx]) * np.exp(data_dist * b)
                            else:
                                stats_variance[num_idx, d_idx, netw_src_idx] += \
                                    np.square(num - stats_mean[num_idx, d_idx, netw_src_idx])
                                stats_variance_wt[num_idx, d_idx, netw_src_idx] += \
                                    np.square(num - stats_mean_wt[num_idx, d_idx, netw_src_idx]) * np.exp(data_dist * b)
            # finalise variance calculations
            for num_idx in range(n_n):
                for d_idx in range(d_n):
                    stats_variance[num_idx, d_idx, netw_src_idx] = \
                        stats_variance[num_idx, d_idx, netw_src_idx] / stats_count[num_idx, d_idx, netw_src_idx]
                    stats_variance_wt[num_idx, d_idx, netw_src_idx] = \
                        stats_variance_wt[num_idx, d_idx, netw_src_idx] / stats_count_wt[num_idx, d_idx, netw_src_idx]
    # send the data back in the same types and same order as the original keys - convert to int for indexing
    mu_hill_k_int = np.full(len(mixed_use_hill_keys), 0)
    for i, k in enumerate(mixed_use_hill_keys):
        mu_hill_k_int[i] = k
    mu_other_k_int = np.full(len(mixed_use_other_keys), 0)
    for i, k in enumerate(mixed_use_other_keys):
        mu_other_k_int[i] = k

    return mixed_use_hill_data[mu_hill_k_int], \
           mixed_use_other_data[mu_other_k_int], \
           accessibility_data, accessibility_data_wt, \
           stats_sum, stats_sum_wt, \
           stats_mean, stats_mean_wt, \
           stats_variance, stats_variance_wt, \
           stats_max, stats_min
示例#9
0
def test_graph_maps_from_nX():
    # template graph
    G_template = mock.mock_graph()
    G_template = graphs.nX_simple_geoms(G_template)

    # test maps vs. networkX
    G_test = G_template.copy()
    # set some random 'live' statuses
    for n in G_test.nodes():
        G_test.nodes[n]['live'] = bool(np.random.randint(0, 1))
    # randomise the imp_factors
    for s, e in G_test.edges():
        G_test[s][e]['imp_factor'] = np.random.random() * 2
    # generate geom with angular change for edge 50-51 - should sum to 360
    angle_geom = geometry.LineString([
        [700700, 5719900],
        [700700, 5720000],
        [700750, 5720050],
        [700700, 5720050],
        [700700, 5720100]
    ])
    G_test[50][51]['geom'] = angle_geom

    # generate test maps
    node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX(G_test)
    # debug plot
    # plot.plot_graphs(primal=G_test)
    # plot.plot_graph_maps(node_uids, node_data, edge_data)

    # run check
    checks.check_network_maps(node_data, edge_data, node_edge_map)

    # check lengths
    assert len(node_uids) == len(node_data) == G_test.number_of_nodes()
    # no ghosted edges, so edges = x2
    assert len(edge_data) == G_test.number_of_edges() * 2

    # check node maps (idx and label match in this case...)
    for n_label in node_uids:
        assert node_data[n_label][0] == G_test.nodes[n_label]['x']
        assert node_data[n_label][1] == G_test.nodes[n_label]['y']
        assert node_data[n_label][2] == G_test.nodes[n_label]['live']
        assert node_data[n_label][3] == 0  # ghosted is False by default

    # check edge maps (idx and label match in this case...)
    for start, end, length, angle_sum, imp_factor, start_bearing, end_bearing in edge_data:
        assert np.allclose(length, G_test[start][end]['geom'].length, atol=0.001, rtol=0)
        if (start == 50 and end == 51) or (start == 51 and end == 50):
            # check that the angle is measured along the line of change
            # i.e. 45 + 135 + 90 (not 45 + 45 + 90)
            # angles are transformed per: 1 + (angle_sum / 180)
            assert angle_sum == 270
        else:
            assert angle_sum == 0
        assert np.allclose(imp_factor, G_test[start][end]['imp_factor'], atol=0.001, rtol=0)
        s_x, s_y = node_data[int(start)][:2]
        e_x, e_y = node_data[int(end)][:2]
        assert np.allclose(start_bearing, np.rad2deg(np.arctan2(e_y - s_y, e_x - s_x)), atol=0.001, rtol=0)
        assert np.allclose(end_bearing, np.rad2deg(np.arctan2(e_y - s_y, e_x - s_x)), atol=0.001, rtol=0)

    # check that missing geoms throw an error
    G_test = G_template.copy()
    for s, e in G_test.edges():
        # delete key from first node and break
        del G_test[s][e]['geom']
        break
    with pytest.raises(KeyError):
        graphs.graph_maps_from_nX(G_test)

    # check that non-LineString geoms throw an error
    G_test = G_template.copy()
    for s, e in G_test.edges():
        G_test[s][e]['geom'] = geometry.Point([G_test.nodes[s]['x'], G_test.nodes[s]['y']])
    with pytest.raises(TypeError):
        graphs.graph_maps_from_nX(G_test)

    # check that missing node keys throw an error
    G_test = G_template.copy()
    for k in ['x', 'y']:
        for n in G_test.nodes():
            # delete key from first node and break
            del G_test.nodes[n][k]
            break
        with pytest.raises(KeyError):
            graphs.graph_maps_from_nX(G_test)

    # check that invalid imp_factors are caught
    G_test = G_template.copy()
    # corrupt imp_factor value and break
    for corrupt_val in [-1, -np.inf, np.nan]:
        for s, e in G_test.edges():
            G_test[s][e]['imp_factor'] = corrupt_val
            break
        with pytest.raises(ValueError):
            graphs.graph_maps_from_nX(G_test)
示例#10
0
def local_segment_centrality(node_data: np.ndarray,
                             edge_data: np.ndarray,
                             node_edge_map: Dict,
                             distances: np.ndarray,
                             betas: np.ndarray,
                             measure_keys: tuple,
                             jitter_scale: float = 0.0,
                             angular: bool = False,
                             progress_proxy=None) -> np.ndarray:
    # integrity checks
    checks.check_distances_and_betas(distances, betas)
    checks.check_network_maps(node_data, edge_data, node_edge_map)
    # gather functions
    close_funcs = List.empty_list(segment_func_proto)
    close_idxs = []
    betw_idxs = []
    for m_idx, m_key in enumerate(measure_keys):
        if not angular:
            # segment keys
            if m_key == 'segment_density':
                close_funcs.append(segment_density)
                close_idxs.append(m_idx)
            elif m_key == 'segment_harmonic':
                close_funcs.append(segment_harmonic)
                close_idxs.append(m_idx)
            elif m_key == 'segment_beta':
                close_funcs.append(segment_beta)
                close_idxs.append(m_idx)
            elif m_key == 'segment_betweenness':
                # only one version of shortest path betweenness - no need for func
                betw_idxs.append(m_idx)
            else:
                raise ValueError('''
                    Unable to match requested centrality measure key against available options.
                    Shortest-path measures can't be mixed with simplest-path measures.
                    Set angular=True if using simplest-path measures. 
                ''')
        else:
            # segment keys
            if m_key == 'segment_harmonic_hybrid':
                # only one version of simplest path closeness - no need for func
                close_idxs.append(m_idx)
            elif m_key == 'segment_betweeness_hybrid':
                # only one version of simplest path betweenness - no need for func
                betw_idxs.append(m_idx)
            else:
                raise ValueError('''
                    Unable to match requested centrality measure key against available options.
                    Shortest-path measures can't be mixed with simplest-path measures.
                    Set angular=False if using shortest-path measures. 
                ''')
    # prepare variables
    n = len(node_data)
    d_n = len(distances)
    k_n = len(measure_keys)
    measures_data = np.full((k_n, d_n, n), 0.0, dtype=np.float32)
    global_max_dist = float(np.nanmax(distances))
    nodes_live = node_data[:, 2]
    # iterate through each vert and calculate the shortest path tree
    for src_idx in prange(n):
        shadow_arr = np.full((k_n, d_n, n), 0.0, dtype=np.float32)
        # numba no object mode can only handle basic printing
        # note that progress bar adds a performance penalty
        if progress_proxy is not None:
            progress_proxy.update(1)
        # only compute for live nodes
        if not nodes_live[src_idx]:
            continue
        '''
        Shortest tree dijkstra        
        Predecessor map is based on impedance heuristic - i.e. angular vs not
        Shortest path distances in metres used for defining max distances regardless
        RETURNS A SHORTEST PATH TREE MAP:
        0 - processed nodes
        1 - predecessors
        2 - shortest path distance
        3 - simplest path angular distance
        4 - cycles
        5 - origin segments
        6 - last segments
        '''
        tree_map, tree_edges = shortest_path_tree(edge_data,
                                                  node_edge_map,
                                                  src_idx,
                                                  max_dist=global_max_dist,
                                                  jitter_scale=jitter_scale,
                                                  angular=angular)
        tree_nodes = np.where(tree_map[:, 0])[0]
        tree_preds = tree_map[:, 1]
        tree_short_dists = tree_map[:, 2]
        tree_simpl_dists = tree_map[:, 3]
        tree_origin_seg = tree_map[:, 5]
        tree_last_seg = tree_map[:, 6]
        '''
        can't do edge processing as part of shortest tree because all shortest paths have to be resolved first
        hence visiting all processed edges and extrapolating information
        NOTES:
        1. the above shortest tree algorithm only tracks edges in one direction - i.e. no duplication
        2. dijkstra sorts all active nodes by distance: explores from near to far: edges discovered accordingly
        '''
        # only build edge data if necessary
        if close_idxs:
            for edge_idx in np.where(tree_edges)[0]:
                # unpack the edge data
                seg_n_nd, seg_m_nd, seg_len, seg_ang, seg_imp_fact, seg_in_bear, seg_out_bear = edge_data[
                    edge_idx]
                n_nd_idx = int(seg_n_nd)
                m_nd_idx = int(seg_m_nd)
                n_simpl_dist = tree_simpl_dists[n_nd_idx]
                m_simpl_dist = tree_simpl_dists[m_nd_idx]
                n_short_dist = tree_short_dists[n_nd_idx]
                m_short_dist = tree_short_dists[m_nd_idx]
                # don't process unreachable segments
                if np.isinf(n_short_dist) and np.isinf(m_short_dist):
                    continue
                '''
                shortest path (non-angular) uses a split segment workflow
                the split workflow allows for non-shortest-path edges to be approached from either direction
                i.e. the shortest path to node "b" isn't necessarily via node "a"
                the edge is then split at the farthest point from either direction and apportioned either way
                if the segment is on the shortest path then the second segment will squash down to naught
                '''
                if not angular:
                    '''
                    dijkstra discovers edges from near to far (sorts before popping next node)
                    i.e. this sort may be unnecessary?
                    '''
                    # sort where a < b
                    if n_short_dist <= m_short_dist:
                        a = tree_short_dists[n_nd_idx]
                        a_imp = tree_short_dists[n_nd_idx]
                        b = tree_short_dists[m_nd_idx]
                        b_imp = tree_short_dists[m_nd_idx]
                    else:
                        a = tree_short_dists[m_nd_idx]
                        a_imp = tree_short_dists[m_nd_idx]
                        b = tree_short_dists[n_nd_idx]
                        b_imp = tree_short_dists[n_nd_idx]
                    # get the max distance along the segment: seg_len = (m - start_len) + (m - end_len)
                    # c and d variables can diverge per beneath
                    c = d = (seg_len + a + b) / 2
                    # c | d impedance should technically be the same if computed from either side
                    c_imp = d_imp = a_imp + (c - a) * seg_imp_fact
                    # iterate the distance and beta thresholds - from large to small for threshold snipping
                    for d_idx in range(len(distances) - 1, -1, -1):
                        dist_cutoff = distances[d_idx]
                        beta = betas[d_idx]
                        '''
                        if c or d are greater than the distance threshold, then the segments are "snipped"
                        '''
                        # a to c segment
                        if a <= dist_cutoff:
                            if c > dist_cutoff:
                                c = dist_cutoff
                                c_imp = a_imp + (dist_cutoff -
                                                 a) * seg_imp_fact
                            for m_idx, close_func in zip(
                                    close_idxs, close_funcs):
                                shadow_arr[m_idx, d_idx,
                                           src_idx] += close_func(
                                               a, c, a_imp, c_imp, beta)
                        # a to b segment - if on the shortest path then b == d, in which case, continue
                        if b == d:
                            continue
                        if b <= dist_cutoff:
                            if d > dist_cutoff:
                                d = dist_cutoff
                                d_imp = b_imp + (dist_cutoff -
                                                 b) * seg_imp_fact
                            for m_idx, close_func in zip(
                                    close_idxs, close_funcs):
                                shadow_arr[m_idx, d_idx,
                                           src_idx] += close_func(
                                               b, d, b_imp, d_imp, beta)
                else:
                    '''
                    there is a different workflow for angular - uses single segment (no segment splitting)
                    this is because the simplest path onto the entire length of segment is from the lower impedance end
                    this assumes segments are relatively straight, overly complex to subdivide segments for spliting...
                    '''
                    # only a single case existing for angular version so no need for abstracted functions
                    # there are three scenarios:
                    # 1) e is the predecessor for f
                    if n_nd_idx == src_idx or tree_preds[m_nd_idx] == n_nd_idx:
                        e = tree_short_dists[n_nd_idx]
                        f = tree_short_dists[m_nd_idx]
                        # if travelling via n, then m = n_imp + seg_ang
                        # calculations are based on segment length / angle
                        # i.e. need to decide whether to base angular change on entry vs exit impedance
                        # else take midpoint of segment as ballpark for average, which is the course taken here
                        # i.e. exit impedance minus half segment impedance
                        ang = m_simpl_dist - seg_ang / 2
                    # 2) f is the predecessor for e
                    elif m_nd_idx == src_idx or tree_preds[
                            n_nd_idx] == m_nd_idx:
                        e = tree_short_dists[m_nd_idx]
                        f = tree_short_dists[n_nd_idx]
                        ang = n_simpl_dist - seg_ang / 2  # per above
                    # 3) neither of the above
                    # get the approach angles for either side and compare to find the least inwards impedance
                    # this involves impedance up to entrypoint either side plus respective turns onto the segment
                    else:
                        # get the out bearing from the predecessor and calculate the turn onto current seg's in bearing
                        # find n's predecessor
                        n_pred_idx = int(tree_preds[n_nd_idx])
                        # find the edge from n's predecessor to n
                        e_i = _find_edge_idx(node_edge_map, edge_data,
                                             n_pred_idx, n_nd_idx)
                        # get the predecessor edge's outwards bearing at index 6
                        n_pred_out_bear = edge_data[int(e_i), 6]
                        # calculating the turn into this segment from the predecessor's out bearing
                        n_turn_in = np.abs(
                            (seg_in_bear - n_pred_out_bear + 180) % 360 - 180)
                        # then add the turn-in to the aggregated impedance at n
                        # i.e. total angular impedance onto this segment
                        # as above two scenarios, adding half of angular impedance for segment as avg between in / out
                        n_ang = n_simpl_dist + n_turn_in + seg_ang / 2
                        # repeat for the other side other side
                        # per original n -> m edge destructuring: m is the node in the outwards bound direction
                        # i.e. need to first find the corresponding edge in the opposite m -> n direction of travel
                        # this gives the correct inwards bearing as if m were the entry point
                        opp_i = _find_edge_idx(node_edge_map, edge_data,
                                               m_nd_idx, n_nd_idx)
                        # now that the opposing edge is known, we can fetch the inwards bearing at index 5 (not 6)
                        opp_in_bear = edge_data[int(opp_i), 5]
                        # find m's predecessor
                        m_pred_idx = int(tree_preds[m_nd_idx])
                        # we can now go ahead and find m's predecessor edge
                        e_i = _find_edge_idx(node_edge_map, edge_data,
                                             m_pred_idx, m_nd_idx)
                        # get the predecessor edge's outwards bearing at index 6
                        m_pred_out_bear = edge_data[int(e_i), 6]
                        # and calculate the turn-in from m's predecessor onto the m inwards bearing
                        m_turn_in = np.abs(
                            (opp_in_bear - m_pred_out_bear + 180) % 360 - 180)
                        # then add to aggregated impedance at m
                        m_ang = m_simpl_dist + m_turn_in + seg_ang / 2
                        # the distance and angle are based on the smallest angular impedance onto the segment
                        # select by shortest distance in event angular impedances are identical from either direction
                        if n_ang == m_ang:
                            if n_short_dist <= m_short_dist:
                                e = tree_short_dists[n_nd_idx]
                                ang = n_ang
                            else:
                                e = tree_short_dists[m_nd_idx]
                                ang = m_ang
                        elif n_ang < m_ang:
                            e = tree_short_dists[n_nd_idx]
                            ang = n_ang
                        else:
                            e = tree_short_dists[m_nd_idx]
                            ang = m_ang
                        # f is the entry distance plus segment length
                        f = e + seg_len
                    # iterate the distance thresholds - from large to small for threshold snipping
                    for d_idx in range(len(distances) - 1, -1, -1):
                        dist_cutoff = distances[d_idx]
                        if e <= dist_cutoff:
                            if f > dist_cutoff:
                                f = dist_cutoff
                            # uses segment length as base (in this sense hybrid)
                            # intentionally not using integral because conflates harmonic shortest-path w. simplest
                            # there is only one case for angular - no need to abstract to func
                            for m_idx in close_idxs:
                                # transform - prevents division by zero
                                agg_ang = 1 + (ang / 180)
                                # then aggregate - angular uses distances explicitly
                                shadow_arr[m_idx, d_idx,
                                           src_idx] += (f - e) / agg_ang
        if betw_idxs:
            # prepare a list of neighbouring nodes
            nb_nodes = List.empty_list(types.int64)
            for edge_idx in node_edge_map[src_idx]:
                out_nd_idx = int(edge_data[edge_idx][1])  # to node is index 1
                nb_nodes.append(out_nd_idx)
            # betweenness keys computed per to_idx
            for to_idx in tree_nodes:
                # only process in one direction
                if to_idx < src_idx:
                    continue
                # skip self node
                if to_idx == src_idx:
                    continue
                # skip direct neighbours (no nodes between)
                if to_idx in nb_nodes:
                    continue
                # distance - do not proceed if no route available
                to_dist = tree_short_dists[to_idx]
                if np.isinf(to_dist):
                    continue
                '''
                BETWEENNESS
                segment versions only agg first and last segments
                the distance decay is based on the distance between the src segment and to segment
                i.e. willingness of people to walk between src and to segments

                betweenness is aggregated to intervening nodes based on above distances and decays
                other sections (in between current first and last) are respectively processed from other to nodes

                distance thresholds are computed using the innner as opposed to outer edges of the segments
                '''
                o_seg_len = edge_data[int(tree_origin_seg[to_idx])][2]
                l_seg_len = edge_data[int(tree_last_seg[to_idx])][2]
                min_span = to_dist - o_seg_len - l_seg_len
                # calculate traversal distances from opposing segments
                o_1 = min_span
                o_2 = min_span + o_seg_len
                l_1 = min_span
                l_2 = min_span + l_seg_len
                # betweenness - only counting truly between vertices, not starting and ending verts
                inter_idx = int(tree_preds[to_idx])
                while True:
                    # break out of while loop if the intermediary has reached the source node
                    if inter_idx == src_idx:
                        break
                    # iterate the distance thresholds - from large to small for threshold snipping
                    for d_idx in range(len(distances) - 1, -1, -1):
                        dist_cutoff = distances[d_idx]
                        beta = betas[d_idx]
                        if min_span <= dist_cutoff:
                            # prune if necessary
                            if o_2 > dist_cutoff:
                                o_2 = dist_cutoff
                            if l_2 > dist_cutoff:
                                l_2 = dist_cutoff
                            # only one version for betweenness for respective angular / non angular
                            # i.e. no need to abstract to function
                            for m_idx in betw_idxs:
                                if not angular:
                                    # catch division by zero
                                    if beta == 0.0:
                                        auc = o_2 - o_1 + l_2 - l_1
                                    else:
                                        auc = (np.exp(-beta * o_2) -
                                               np.exp(-beta * o_1)) / -beta + \
                                              (np.exp(-beta * l_2) -
                                               np.exp(-beta * l_1)) / -beta
                                    shadow_arr[m_idx, d_idx, inter_idx] += auc
                                else:
                                    bt_ang = 1 + tree_simpl_dists[to_idx] / 180
                                    pt_a = o_2 - o_1
                                    pt_b = l_2 - l_1
                                    shadow_arr[m_idx, d_idx,
                                               inter_idx] += (pt_a +
                                                              pt_b) / bt_ang
                    # follow the chain
                    inter_idx = int(tree_preds[inter_idx])

        # reduction
        measures_data += shadow_arr

    return measures_data
示例#11
0
def local_node_centrality(node_data: np.ndarray,
                          edge_data: np.ndarray,
                          node_edge_map: Dict,
                          distances: np.ndarray,
                          betas: np.ndarray,
                          measure_keys: tuple,
                          jitter_scale: float = 0.0,
                          angular: bool = False,
                          progress_proxy=None) -> np.ndarray:
    # integrity checks
    checks.check_distances_and_betas(distances, betas)
    checks.check_network_maps(node_data, edge_data, node_edge_map)
    # gather functions
    close_funcs = List.empty_list(node_close_func_proto)
    close_idxs = []
    betw_funcs = List.empty_list(node_betw_func_proto)
    betw_idxs = []
    for m_idx, m_key in enumerate(measure_keys):
        if not angular:
            # closeness keys
            if m_key == 'node_density':
                close_funcs.append(node_density)
                close_idxs.append(m_idx)
            elif m_key == 'node_farness':
                close_funcs.append(node_farness)
                close_idxs.append(m_idx)
            elif m_key == 'node_cycles':
                close_funcs.append(node_cycles)
                close_idxs.append(m_idx)
            elif m_key == 'node_harmonic':
                close_funcs.append(node_harmonic)
                close_idxs.append(m_idx)
            elif m_key == 'node_beta':
                close_funcs.append(node_beta)
                close_idxs.append(m_idx)
            # betweenness keys
            elif m_key == 'node_betweenness':
                betw_funcs.append(node_betweenness)
                betw_idxs.append(m_idx)
            elif m_key == 'node_betweenness_beta':
                betw_funcs.append(node_betweenness_beta)
                betw_idxs.append(m_idx)
            else:
                raise ValueError('''
                Unable to match requested centrality measure key against available options.
                Shortest-path measures can't be mixed with simplest-path measures.
                Set angular=True if using simplest-path measures.''')
        else:
            # aggregative keys
            if m_key == 'node_harmonic_angular':
                close_funcs.append(node_harmonic_angular)
                close_idxs.append(m_idx)
            # betweenness keys
            elif m_key == 'node_betweenness_angular':
                betw_funcs.append(node_betweenness)
                betw_idxs.append(m_idx)
            else:
                raise ValueError('''
                Unable to match requested centrality measure key against available options.
                Shortest-path measures can't be mixed with simplest-path measures.
                Set angular=False if using shortest-path measures.''')
    # prepare variables
    n = len(node_data)
    d_n = len(distances)
    k_n = len(measure_keys)
    measures_data = np.full((k_n, d_n, n), 0.0, dtype=np.float32)
    global_max_dist = float(np.nanmax(distances))
    nodes_live = node_data[:, 2]
    # iterate through each vert and calculate the shortest path tree
    for src_idx in prange(n):
        shadow_arr = np.full((k_n, d_n, n), 0.0, dtype=np.float32)
        # numba no object mode can only handle basic printing
        # note that progress bar adds a performance penalty
        if progress_proxy is not None:
            progress_proxy.update(1)
        # only compute for live nodes
        if not nodes_live[src_idx]:
            continue
        '''
        Shortest tree dijkstra        
        Predecessor map is based on impedance heuristic - which can be different from metres
        Distance map in metres still necessary for defining max distances and computing equivalent distance measures
        RETURNS A SHORTEST PATH TREE MAP:
        0 - processed nodes
        1 - predecessors
        2 - shortest path distance
        3 - simplest path angular distance
        4 - cycles
        5 - origin segments
        6 - last segments
        '''
        tree_map, tree_edges = shortest_path_tree(edge_data,
                                                  node_edge_map,
                                                  src_idx,
                                                  max_dist=global_max_dist,
                                                  jitter_scale=jitter_scale,
                                                  angular=angular)
        tree_nodes = np.where(tree_map[:, 0])[0]
        tree_preds = tree_map[:, 1]
        tree_short_dists = tree_map[:, 2]
        tree_simpl_dists = tree_map[:, 3]
        tree_cycles = tree_map[:, 4]
        # process each reachable node
        for to_idx in tree_nodes:
            # skip self node
            if to_idx == src_idx:
                continue
            # unpack impedance and distance for to index
            to_short_dist = tree_short_dists[to_idx]
            to_simpl_dist = tree_simpl_dists[to_idx]
            cycles = tree_cycles[to_idx]
            # do not proceed if no route available
            if np.isinf(to_short_dist):
                continue
            # calculate closeness centralities
            if close_funcs:
                for d_idx in range(len(distances)):
                    dist_cutoff = distances[d_idx]
                    beta = betas[d_idx]
                    if to_short_dist <= dist_cutoff:
                        for m_idx, close_func in zip(close_idxs, close_funcs):
                            shadow_arr[m_idx, d_idx, src_idx] += close_func(
                                to_short_dist, to_simpl_dist, beta, cycles)
            # only process in one direction
            if to_idx < src_idx:
                continue
            # calculate betweenness centralities
            if betw_funcs:
                # only counting truly between vertices, not starting and ending verts
                inter_idx = int(tree_preds[to_idx])
                while True:
                    # break out of while loop if the intermediary has reached the source node
                    if inter_idx == src_idx:
                        break
                    # iterate the distance thresholds
                    for d_idx in range(len(distances)):
                        dist_cutoff = distances[d_idx]
                        beta = betas[d_idx]
                        # check threshold
                        if tree_short_dists[to_idx] <= dist_cutoff:
                            # iterate betweenness functions
                            for m_idx, betw_func in zip(betw_idxs, betw_funcs):
                                shadow_arr[m_idx, d_idx,
                                           inter_idx] += betw_func(
                                               to_short_dist, beta)
                    # follow the chain
                    inter_idx = int(tree_preds[inter_idx])
        # reduce
        measures_data += shadow_arr

    return measures_data
示例#12
0
def aggregate_stats(
    node_data: np.ndarray,
    edge_data: np.ndarray,
    node_edge_map: Dict,
    data_map: np.ndarray,
    distances: np.ndarray,
    betas: np.ndarray,
    numerical_arrays: np.ndarray = np.array(np.full((0, 0), np.nan)),
    jitter_scale: float = 0.0,
    angular: bool = False,
    progress_proxy=None
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray,
           np.ndarray, np.ndarray, np.ndarray]:
    """
    NODE MAP:
    0 - x
    1 - y
    2 - live
    EDGE MAP:
    0 - start node
    1 - end node
    2 - length in metres
    3 - sum of angular travel along length
    4 - impedance factor
    5 - in bearing
    6 - out bearing
    DATA MAP:
    0 - x
    1 - y
    2 - assigned network index - nearest
    3 - assigned network index - next-nearest
    """
    checks.check_network_maps(node_data, edge_data, node_edge_map)
    checks.check_data_map(
        data_map, check_assigned=True
    )  # raises ValueError data points are not assigned to a network
    checks.check_distances_and_betas(distances, betas)
    # when passing an empty 2d array to numba, use: np.array(np.full((0, 0), np.nan))
    if numerical_arrays.shape[1] != len(data_map):
        raise ValueError(
            'The length of the numerical data arrays do not match the length of the data map.'
        )
    checks.check_numerical_data(numerical_arrays)
    # establish variables
    netw_n = len(node_data)
    d_n = len(distances)
    n_n = len(numerical_arrays)
    global_max_dist = float(np.nanmax(distances))
    netw_nodes_live = node_data[:, 2]
    # setup data structures
    stats_sum = np.full((n_n, d_n, netw_n), 0.0)
    stats_sum_wt = np.full((n_n, d_n, netw_n), 0.0)
    stats_mean = np.full((n_n, d_n, netw_n), np.nan)
    stats_mean_wt = np.full((n_n, d_n, netw_n), np.nan)
    stats_count = np.full((n_n, d_n, netw_n), 0.0)
    stats_count_wt = np.full((n_n, d_n, netw_n), 0.0)
    stats_variance = np.full((n_n, d_n, netw_n), np.nan)
    stats_variance_wt = np.full((n_n, d_n, netw_n), np.nan)
    stats_max = np.full((n_n, d_n, netw_n), np.nan)
    stats_min = np.full((n_n, d_n, netw_n), np.nan)
    # iterate through each vert and aggregate
    steps = int(netw_n / 10000)
    # parallelise over n nodes:
    # each distance or stat array index is therefore only touched by one thread at a time
    # i.e. no need to use inner array deductions as with centralities
    for netw_src_idx in prange(netw_n):
        if progress_proxy is not None:
            progress_proxy.update(1)
        # only compute for live nodes
        if not netw_nodes_live[netw_src_idx]:
            continue
        # generate the reachable classes and their respective distances
        # these are non-unique - i.e. simply the class of each data point within the maximum distance
        # the aggregate_to_src_idx method will choose the closer direction of approach to a data point
        # from the nearest or next-nearest network node (calculated once globally, prior to local_landuses method)
        reachable_data, reachable_data_dist, tree_preds = aggregate_to_src_idx(
            netw_src_idx,
            node_data,
            edge_data,
            node_edge_map,
            data_map,
            global_max_dist,
            jitter_scale=jitter_scale,
            angular=angular)
        # IDW
        # the order of the loops matters because the nested aggregations happen per distance per numerical array
        # iterate the reachable indices and related distances
        for data_idx, (reachable, data_dist) in enumerate(
                zip(reachable_data, reachable_data_dist)):
            # some indices will be NaN if beyond max threshold distance - so check for infinity
            # this happens when within radial max distance, but beyond network max distance
            if not reachable:
                continue
            # iterate the numerical arrays dimension
            for num_idx in range(n_n):
                # some values will be NaN
                num = numerical_arrays[num_idx, int(data_idx)]
                if np.isnan(num):
                    continue
                # iterate the distance dimensions
                for d_idx, (d, b) in enumerate(zip(distances, betas)):
                    # increment mean aggregations at respective distances if the distance is less than current d
                    if data_dist <= d:
                        # aggregate
                        stats_sum[num_idx, d_idx, netw_src_idx] += num
                        stats_count[num_idx, d_idx, netw_src_idx] += 1
                        stats_sum_wt[num_idx, d_idx,
                                     netw_src_idx] += num * np.exp(
                                         -b * data_dist)
                        stats_count_wt[num_idx, d_idx,
                                       netw_src_idx] += np.exp(-b * data_dist)
                        # max
                        if np.isnan(stats_max[num_idx, d_idx, netw_src_idx]):
                            stats_max[num_idx, d_idx, netw_src_idx] = num
                        elif num > stats_max[num_idx, d_idx, netw_src_idx]:
                            stats_max[num_idx, d_idx, netw_src_idx] = num
                        # min
                        if np.isnan(stats_min[num_idx, d_idx, netw_src_idx]):
                            stats_min[num_idx, d_idx, netw_src_idx] = num
                        elif num < stats_min[num_idx, d_idx, netw_src_idx]:
                            stats_min[num_idx, d_idx, netw_src_idx] = num
        # finalise mean calculations - this is happening for a single netw_src_idx, so fairly fast
        for num_idx in range(n_n):
            for d_idx in range(d_n):
                # use divide so that division through zero doesn't trigger
                stats_mean[num_idx, d_idx, netw_src_idx] = np.divide(
                    stats_sum[num_idx, d_idx, netw_src_idx],
                    stats_count[num_idx, d_idx, netw_src_idx])
                stats_mean_wt[num_idx, d_idx, netw_src_idx] = np.divide(
                    stats_sum_wt[num_idx, d_idx, netw_src_idx],
                    stats_count_wt[num_idx, d_idx, netw_src_idx])
        # calculate variances - counts are already computed per above
        # weighted version is IDW by division through equivalently weighted counts above
        # iterate the reachable indices and related distances
        for data_idx, (reachable, data_dist) in enumerate(
                zip(reachable_data, reachable_data_dist)):
            # some indices will be NaN if beyond max threshold distance - so check for infinity
            # this happens when within radial max distance, but beyond network max distance
            if not reachable:
                continue
            # iterate the numerical arrays dimension
            for num_idx in range(n_n):
                # some values will be NaN
                num = numerical_arrays[num_idx, int(data_idx)]
                if np.isnan(num):
                    continue
                # iterate the distance dimensions
                for d_idx, (d, b) in enumerate(zip(distances, betas)):
                    # increment variance aggregations at respective distances if the distance is less than current d
                    if data_dist <= d:
                        # aggregate
                        if np.isnan(stats_variance[num_idx, d_idx,
                                                   netw_src_idx]):
                            stats_variance[num_idx, d_idx, netw_src_idx] = \
                                np.square(num - stats_mean[num_idx, d_idx, netw_src_idx])
                            stats_variance_wt[num_idx, d_idx, netw_src_idx] = \
                                np.square(num - stats_mean_wt[num_idx, d_idx, netw_src_idx]) * np.exp(-b * data_dist)
                        else:
                            stats_variance[num_idx, d_idx, netw_src_idx] += \
                                np.square(num - stats_mean[num_idx, d_idx, netw_src_idx])
                            stats_variance_wt[num_idx, d_idx, netw_src_idx] += \
                                np.square(num - stats_mean_wt[num_idx, d_idx, netw_src_idx]) * np.exp(-b * data_dist)
        # finalise variance calculations
        for num_idx in range(n_n):
            for d_idx in range(d_n):
                # use divide so that division through zero doesn't trigger
                stats_variance[num_idx, d_idx, netw_src_idx] = np.divide(
                    stats_variance[num_idx, d_idx, netw_src_idx],
                    stats_count[num_idx, d_idx, netw_src_idx])
                stats_variance_wt[num_idx, d_idx, netw_src_idx] = np.divide(
                    stats_variance_wt[num_idx, d_idx, netw_src_idx],
                    stats_count_wt[num_idx, d_idx, netw_src_idx])

    return stats_sum, stats_sum_wt, stats_mean, stats_mean_wt, stats_variance, stats_variance_wt, stats_max, stats_min
示例#13
0
def aggregate_landuses(
    node_data: np.ndarray,
    edge_data: np.ndarray,
    node_edge_map: Dict,
    data_map: np.ndarray,
    distances: np.ndarray,
    betas: np.ndarray,
    landuse_encodings: np.ndarray = np.array([]),
    qs: np.ndarray = np.array([]),
    mixed_use_hill_keys: np.ndarray = np.array([]),
    mixed_use_other_keys: np.ndarray = np.array([]),
    accessibility_keys: np.ndarray = np.array([]),
    cl_disparity_wt_matrix: np.ndarray = np.array(np.full((0, 0), np.nan)),
    jitter_scale: float = 0.0,
    angular: bool = False,
    progress_proxy=None
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """
    NODE MAP:
    0 - x
    1 - y
    2 - live
    EDGE MAP:
    0 - start node
    1 - end node
    2 - length in metres
    3 - sum of angular travel along length
    4 - impedance factor
    5 - in bearing
    6 - out bearing
    DATA MAP:
    0 - x
    1 - y
    2 - assigned network index - nearest
    3 - assigned network index - next-nearest
    """
    checks.check_network_maps(node_data, edge_data, node_edge_map)
    checks.check_data_map(
        data_map, check_assigned=True
    )  # raises ValueError data points are not assigned to a network
    checks.check_distances_and_betas(distances, betas)
    # check landuse encodings
    if len(landuse_encodings) == 0:
        raise ValueError(
            'Mixed use metrics or land-use accessibilities require an array of landuse labels.'
        )
    elif len(landuse_encodings) != len(data_map):
        raise ValueError(
            'The number of landuse encodings does not match the number of data points.'
        )
    else:
        checks.check_categorical_data(landuse_encodings)
    # catch completely missing metrics
    if len(mixed_use_hill_keys) == 0 and len(
            mixed_use_other_keys) == 0 and len(accessibility_keys) == 0:
        raise ValueError(
            'No metrics specified, please specify at least one metric to compute.'
        )
    # catch missing qs
    if len(mixed_use_hill_keys) != 0 and len(qs) == 0:
        raise ValueError(
            'Hill diversity measures require that at least one value of q is specified.'
        )
    # negative qs caught by hill diversity methods
    # check various problematic key combinations
    if len(mixed_use_hill_keys) != 0:
        if np.nanmin(mixed_use_hill_keys) < 0 or np.max(
                mixed_use_hill_keys) > 3:
            raise ValueError('Mixed-use "hill" keys out of range of 0:4.')
    if len(mixed_use_other_keys) != 0:
        if np.nanmin(mixed_use_other_keys) < 0 or np.max(
                mixed_use_other_keys) > 2:
            raise ValueError('Mixed-use "other" keys out of range of 0:3.')
    if len(accessibility_keys) != 0:
        max_ac_key = np.nanmax(landuse_encodings)
        if np.nanmin(accessibility_keys) < 0 or np.max(
                accessibility_keys) > max_ac_key:
            raise ValueError(
                'Negative or out of range accessibility key encountered. Keys must match class encodings.'
            )
    for i in range(len(mixed_use_hill_keys)):
        for j in range(len(mixed_use_hill_keys)):
            if j > i:
                i_key = mixed_use_hill_keys[i]
                j_key = mixed_use_hill_keys[j]
                if i_key == j_key:
                    raise ValueError('Duplicate mixed-use "hill" key.')
    for i in range(len(mixed_use_other_keys)):
        for j in range(len(mixed_use_other_keys)):
            if j > i:
                i_key = mixed_use_other_keys[i]
                j_key = mixed_use_other_keys[j]
                if i_key == j_key:
                    raise ValueError('Duplicate mixed-use "other" key.')
    for i in range(len(accessibility_keys)):
        for j in range(len(accessibility_keys)):
            if j > i:
                i_key = accessibility_keys[i]
                j_key = accessibility_keys[j]
                if i_key == j_key:
                    raise ValueError('Duplicate accessibility key.')

    def disp_check(disp_matrix):
        # the length of the disparity matrix vis-a-vis unique landuses is tested in underlying diversity functions
        if disp_matrix.ndim != 2 or disp_matrix.shape[0] != disp_matrix.shape[
                1]:
            raise ValueError(
                'The disparity matrix must be a square NxN matrix.')
        if len(disp_matrix) == 0:
            raise ValueError(
                'Hill disparity and Rao pairwise measures requires a class disparity weights matrix.'
            )

    # check that missing or malformed disparity weights matrices are caught
    for k in mixed_use_hill_keys:
        if k == 3:  # hill disparity
            disp_check(cl_disparity_wt_matrix)
    for k in mixed_use_other_keys:
        if k == 2:  # raos pairwise
            disp_check(cl_disparity_wt_matrix)
    # establish variables
    netw_n = len(node_data)
    d_n = len(distances)
    q_n = len(qs)
    global_max_dist = float(np.nanmax(distances))
    netw_nodes_live = node_data[:, 2]
    # setup data structures
    # hill mixed uses are structured separately to take values of q into account
    mixed_use_hill_data = np.full((4, q_n, d_n, netw_n), 0.0)  # 4 dim
    mixed_use_other_data = np.full((3, d_n, netw_n), 0.0)  # 3 dim
    accessibility_data = np.full((len(accessibility_keys), d_n, netw_n), 0.0)
    accessibility_data_wt = np.full((len(accessibility_keys), d_n, netw_n),
                                    0.0)
    # iterate through each vert and aggregate
    # parallelise over n nodes:
    # each distance or stat array index is therefore only touched by one thread at a time
    # i.e. no need to use inner array deductions as with centralities
    for netw_src_idx in prange(netw_n):
        if progress_proxy is not None:
            progress_proxy.update(1)
        # only compute for live nodes
        if not netw_nodes_live[netw_src_idx]:
            continue
        # generate the reachable classes and their respective distances
        # these are non-unique - i.e. simply the class of each data point within the maximum distance
        # the aggregate_to_src_idx method will choose the closer direction of approach to a data point
        # from the nearest or next-nearest network node (calculated once globally, prior to local_landuses method)
        reachable_data, reachable_data_dist, tree_preds = aggregate_to_src_idx(
            netw_src_idx,
            node_data,
            edge_data,
            node_edge_map,
            data_map,
            global_max_dist,
            jitter_scale=jitter_scale,
            angular=angular)
        # LANDUSES
        mu_max_unique_cl = int(landuse_encodings.max() + 1)
        # counts of each class type (array length per max unique classes - not just those within max distance)
        classes_counts = np.full((d_n, mu_max_unique_cl), 0)
        # nearest of each class type (likewise)
        classes_nearest = np.full((d_n, mu_max_unique_cl), np.inf)
        # iterate the reachable indices and related distances
        for data_idx, (reachable, data_dist) in enumerate(
                zip(reachable_data, reachable_data_dist)):
            if not reachable:
                continue
            # get the class category in integer form
            # all class codes were encoded to sequential integers - these correspond to the array indices
            cl_code = int(landuse_encodings[int(data_idx)])
            # iterate the distance dimensions
            for d_idx, (d, b) in enumerate(zip(distances, betas)):
                # increment class counts at respective distances if the distance is less than current d
                if data_dist <= d:
                    classes_counts[d_idx, cl_code] += 1
                    # if distance is nearer, update the nearest distance array too
                    if data_dist < classes_nearest[d_idx, cl_code]:
                        classes_nearest[d_idx, cl_code] = data_dist
                    # if within distance, and if in accessibility keys, then aggregate accessibility too
                    for ac_idx, ac_code in enumerate(accessibility_keys):
                        if ac_code == cl_code:
                            accessibility_data[ac_idx, d_idx,
                                               netw_src_idx] += 1
                            accessibility_data_wt[ac_idx, d_idx,
                                                  netw_src_idx] += np.exp(
                                                      -b * data_dist)
                            # if a match was found, then no need to check others
                            break
        # mixed uses can be calculated now that the local class counts are aggregated
        # iterate the distances and betas
        for d_idx, b in enumerate(betas):
            cl_counts = classes_counts[d_idx]
            cl_nearest = classes_nearest[d_idx]
            # mu keys determine which metrics to compute
            # don't confuse with indices
            # previously used dynamic indices in data structures - but obtuse if irregularly ordered keys
            for mu_hill_key in mixed_use_hill_keys:
                for q_idx, q_key in enumerate(qs):
                    if mu_hill_key == 0:
                        mixed_use_hill_data[0, q_idx, d_idx, netw_src_idx] = \
                            diversity.hill_diversity(cl_counts, q_key)
                    elif mu_hill_key == 1:
                        mixed_use_hill_data[1, q_idx, d_idx, netw_src_idx] = \
                            diversity.hill_diversity_branch_distance_wt(cl_counts, cl_nearest, q=q_key, beta=b)
                    elif mu_hill_key == 2:
                        mixed_use_hill_data[2, q_idx, d_idx, netw_src_idx] = \
                            diversity.hill_diversity_pairwise_distance_wt(cl_counts, cl_nearest, q=q_key, beta=b)
                    # land-use classification disparity hill diversity
                    # the wt matrix can be used without mapping because cl_counts is based on all classes
                    # regardless of whether they are reachable
                    elif mu_hill_key == 3:
                        mixed_use_hill_data[3, q_idx, d_idx, netw_src_idx] = \
                            diversity.hill_diversity_pairwise_matrix_wt(cl_counts,
                                                                        wt_matrix=cl_disparity_wt_matrix,
                                                                        q=q_key)
            for mu_other_key in mixed_use_other_keys:
                if mu_other_key == 0:
                    mixed_use_other_data[0, d_idx, netw_src_idx] = \
                        diversity.shannon_diversity(cl_counts)
                elif mu_other_key == 1:
                    mixed_use_other_data[1, d_idx, netw_src_idx] = \
                        diversity.gini_simpson_diversity(cl_counts)
                elif mu_other_key == 2:
                    mixed_use_other_data[2, d_idx, netw_src_idx] = \
                        diversity.raos_quadratic_diversity(cl_counts, wt_matrix=cl_disparity_wt_matrix)
    # send the data back in the same types and same order as the original keys - convert to int for indexing
    mu_hill_k_int = np.full(len(mixed_use_hill_keys), 0)
    for i, k in enumerate(mixed_use_hill_keys):
        mu_hill_k_int[i] = k
    mu_other_k_int = np.full(len(mixed_use_other_keys), 0)
    for i, k in enumerate(mixed_use_other_keys):
        mu_other_k_int[i] = k

    return mixed_use_hill_data[mu_hill_k_int], \
           mixed_use_other_data[mu_other_k_int], \
           accessibility_data, \
           accessibility_data_wt
示例#14
0
def assign_to_network(data_map: np.ndarray,
                      node_data: np.ndarray,
                      edge_data: np.ndarray,
                      node_edge_map: Dict,
                      max_dist: float,
                      progress_proxy=None) -> np.ndarray:
    """
    To save unnecessary computation - this is done once and written to the data map.

    1 - find the closest network node from each data point
    2A - wind clockwise along the network to preferably find a block cycle surrounding the node
    2B - in event of topological traps, try anti-clockwise as well
    3A - select the closest block cycle node
    3B - if no enclosing cycle - simply use the closest node
    4 - find the neighbouring node that minimises the distance between the data point on "street-front"
    NODE MAP:
    0 - x
    1 - y
    2 - live
    EDGE MAP:
    0 - start node
    1 - end node
    2 - length in metres
    3 - sum of angular travel along length
    4 - impedance factor
    5 - entry bearing
    6 - exit bearing
    DATA MAP:
    0 - x
    1 - y
    2 - assigned network index - nearest
    3 - assigned network index - next-nearest
    """
    checks.check_network_maps(node_data, edge_data, node_edge_map)

    netw_coords = node_data[:, :2]
    netw_x_arr = node_data[:, 0]
    netw_y_arr = node_data[:, 1]
    data_coords = data_map[:, :2]
    data_x_arr = data_map[:, 0]
    data_y_arr = data_map[:, 1]
    total_count = len(data_map)
    for data_idx in prange(total_count):
        if progress_proxy is not None:
            progress_proxy.update(1)
        # find the nearest network node
        min_idx, min_dist = find_nearest(data_x_arr[data_idx],
                                         data_y_arr[data_idx], netw_x_arr,
                                         netw_y_arr, max_dist)
        # in some cases no network node will be within max_dist... so accept NaN
        if np.isnan(min_idx):
            continue
        # nearest is initially set for this nearest node, but if a nearer street-edge is found, it will be overriden
        nearest = min_idx
        next_nearest = np.nan
        # set start node to nearest network node
        node_idx = int(min_idx)
        # keep track of visited nodes
        pred_map = np.full(len(node_data), np.nan)
        # state
        reversing = False
        # keep track of previous indices
        prev_idx = np.nan
        # iterate neighbours
        while True:
            # reset neighbour rotation and index counters
            rotation = np.nan
            nb_idx = np.nan
            # iterate the edges
            for edge_idx in node_edge_map[node_idx]:
                # get the edge's start and end node indices
                start, end = edge_data[edge_idx, :2]
                # cast to int for indexing
                new_idx = int(end)
                # don't follow self-loops
                if new_idx == node_idx:
                    continue
                # check that this isn't the previous node (already visited as neighbour from other direction)
                if np.isfinite(prev_idx) and new_idx == prev_idx:
                    continue
                # look for the new neighbour with the smallest rightwards (anti-clockwise arctan2) angle
                # measure the angle relative to the data point for the first node
                if np.isnan(prev_idx):
                    r = _calculate_rotation(
                        netw_coords[int(new_idx)] - netw_coords[node_idx],
                        data_coords[data_idx] - netw_coords[node_idx])
                # else relative to the previous node
                else:
                    r = _calculate_rotation(
                        netw_coords[int(new_idx)] - netw_coords[node_idx],
                        netw_coords[int(prev_idx)] - netw_coords[node_idx])
                if reversing:
                    r = 360 - r
                # if least angle, update
                if np.isnan(rotation) or r < rotation:
                    rotation = r
                    nb_idx = new_idx
            # allow backtracking if no neighbour is found - i.e. dead-ends
            if np.isnan(nb_idx):
                if np.isnan(pred_map[node_idx]):
                    # for isolated nodes: nb_idx == np.nan, pred_map[node_idx] == np.nan, and prev_idx == np.nan
                    if np.isnan(prev_idx):
                        break
                    # for isolated edges, the algorithm gets turned-around back to the starting node with nowhere to go
                    # nb_idx == np.nan, pred_map[node_idx] == np.nan
                    # in these cases, pass _closest_intersections the prev idx so that it has a predecessor to follow
                    d, n, n_n = _closest_intersections(node_data,
                                                       data_coords[data_idx],
                                                       pred_map, int(prev_idx))
                    if d < min_dist:
                        nearest = n
                        next_nearest = n_n
                    break
                # otherwise, go ahead and backtrack
                nb_idx = pred_map[node_idx]
            # if the distance is exceeded, reset and attempt in the other direction
            dist = np.hypot(netw_x_arr[int(nb_idx)] - data_x_arr[data_idx],
                            netw_y_arr[int(nb_idx)] - data_y_arr[data_idx])
            if dist > max_dist:
                pred_map[int(nb_idx)] = node_idx
                d, n, n_n = _closest_intersections(node_data,
                                                   data_coords[data_idx],
                                                   pred_map, int(nb_idx))
                # if the distance to the street edge is less than the nearest node, or than the prior closest edge
                if d < min_dist:
                    min_dist = d
                    nearest = n
                    next_nearest = n_n
                # reverse and try in opposite direction
                if not reversing:
                    reversing = True
                    pred_map.fill(np.nan)
                    node_idx = int(min_idx)
                    prev_idx = np.nan
                    continue
                break
            # ignore the following conditions while backtracking
            # (if backtracking, the current node's predecessor will be equal to the new neighbour)
            if nb_idx != pred_map[node_idx]:
                # if the new nb node has already been visited then terminate, this prevent infinite loops
                # or, if the algorithm has circled the block back to the original starting node
                if not np.isnan(pred_map[int(nb_idx)]) or nb_idx == min_idx:
                    # set the final predecessor, BUT ONLY if re-encountered the original node
                    # this would otherwise occlude routes (e.g. backtracks) that have passed the same node twice
                    # (such routes are still able to recover the closest edge)
                    if nb_idx == min_idx:
                        pred_map[int(nb_idx)] = node_idx
                    d, n, n_n = _closest_intersections(node_data,
                                                       data_coords[data_idx],
                                                       pred_map, int(nb_idx))
                    if d < min_dist:
                        nearest = n
                        next_nearest = n_n
                    break
                # set predecessor (only if not backtracking)
                pred_map[int(nb_idx)] = node_idx
            # otherwise, keep going
            prev_idx = node_idx
            node_idx = int(nb_idx)
        # print(f'[{data_idx}, {nearest}, {next_nearest}],')
        # set in the data map
        # no race condition in spite of direct indexing because each is set only once?
        data_map[data_idx, 2] = nearest  # adj_idx
        # in some cases next nearest will be NaN
        # this is mostly in situations where it works to leave as NaN
        # e.g. access off dead-ends...
        data_map[data_idx, 3] = next_nearest  # next_adj_idx

    return data_map
示例#15
0
def local_centrality(node_data: np.ndarray,
                     edge_data: np.ndarray,
                     node_edge_map: Dict,
                     distances: np.ndarray,
                     betas: np.ndarray,
                     measure_keys: tuple,
                     angular: bool = False,
                     suppress_progress: bool = False) -> np.ndarray:
    '''
    Call from "compute_centrality", which handles high level checks on keys and heuristic flag
    NODE MAP:
    0 - x
    1 - y
    2 - live
    3 - ghosted
    EDGE MAP:
    0 - start node
    1 - end node
    2 - length in metres
    3 - sum of angular travel along length
    4 - impedance factor
    5 - in bearing
    6 - out bearing
    '''
    checks.check_distances_and_betas(distances, betas)
    checks.check_network_maps(node_data, edge_data, node_edge_map)
    # string comparisons will substantially slow down nested loops
    # hence the out-of-loop strategy to map strings to indices corresponding to respective measures
    # keep name and index relationships explicit
    agg_keys = []
    agg_targets = []
    seg_keys = []
    seg_targets = []
    betw_keys = []
    betw_targets = []
    for m_idx, measure_name in enumerate(measure_keys):
        if not angular:
            # aggregating keys
            if measure_name == 'node_density':
                agg_keys.append(0)
                agg_targets.append(m_idx)
            elif measure_name == 'node_farness':
                agg_keys.append(1)
                agg_targets.append(m_idx)
            elif measure_name == 'node_cycles':
                agg_keys.append(2)
                agg_targets.append(m_idx)
            elif measure_name == 'node_harmonic':
                agg_keys.append(3)
                agg_targets.append(m_idx)
            elif measure_name == 'node_beta':
                agg_keys.append(4)
                agg_targets.append(m_idx)
            # segment keys (betweenness segments can be built during betweenness iters)
            elif measure_name == 'segment_density':
                seg_keys.append(0)
                seg_targets.append(m_idx)
            elif measure_name == 'segment_harmonic':
                seg_keys.append(1)
                seg_targets.append(m_idx)
            elif measure_name == 'segment_beta':
                seg_keys.append(2)
                seg_targets.append(m_idx)
            # betweenness keys
            elif measure_name == 'node_betweenness':
                betw_keys.append(0)
                betw_targets.append(m_idx)
            elif measure_name == 'node_betweenness_beta':
                betw_keys.append(1)
                betw_targets.append(m_idx)
            elif measure_name == 'segment_betweenness':
                betw_keys.append(2)
                betw_targets.append(m_idx)
            else:
                raise ValueError('''
                    Unable to match requested centrality measure key against available options.
                    Shortest-path measures can't be mixed with simplest-path measures.
                    Set angular=True if using simplest-path measures. 
                ''')
        else:
            # aggregating keys
            if measure_name == 'node_harmonic_angular':
                agg_keys.append(5)
                agg_targets.append(m_idx)
            # segment keys
            elif measure_name == 'segment_harmonic_hybrid':
                seg_keys.append(3)
                seg_targets.append(m_idx)
            # betweenness keys
            elif measure_name == 'node_betweenness_angular':
                betw_keys.append(3)
                betw_targets.append(m_idx)
            elif measure_name == 'segment_betweeness_hybrid':
                betw_keys.append(4)
                betw_targets.append(m_idx)
            else:
                raise ValueError('''
                    Unable to match requested centrality measure key against available options.
                    Shortest-path measures can't be mixed with simplest-path measures.
                    Set angular=False if using shortest-path measures. 
                ''')
    if len(agg_keys) != len(set(agg_keys)) or \
            len(seg_keys) != len(set(seg_keys)) or \
            len(betw_keys) != len(set(betw_keys)):
        raise ValueError('Please remove duplicate measure key.')
    # flags
    betw_nodes = (0 in betw_keys or 1 in betw_keys or 3 in betw_keys)
    betw_segs = (2 in betw_keys or 4 in betw_keys)
    # prepare data arrays
    # establish variables
    n = len(node_data)
    d_n = len(distances)
    k_n = len(measure_keys)
    global_max_dist = np.nanmax(distances)
    nodes_live = node_data[:, 2]
    nodes_ghosted = node_data[:, 3]
    # the shortest path is based on impedances -> be cognisant of cases where impedances are not based on true distance:
    # in such cases, distances are equivalent to the impedance heuristic shortest path, not shortest distance in metres
    measures_data = np.full((k_n, d_n, n), 0.0, dtype=np.float32)
    steps = int(n / 10000)
    # iterate through each vert and calculate the shortest path tree
    for src_idx in range(n):
        # numba no object mode can only handle basic printing
        # note that progress bar adds a performance penalty
        if not suppress_progress:
            checks.progress_bar(src_idx, n, steps)
        # only compute for live nodes
        if not nodes_live[src_idx]:
            continue
        '''
        run the shortest tree dijkstra
        keep in mind that predecessor map is based on impedance heuristic - which can be different from metres
        distance map in metres still necessary for defining max distances and computing equivalent distance measures
        RETURNS A SHORTEST PATH TREE MAP:
        0 - processed nodes
        1 - predecessors
        2 - distances
        3 - impedances
        4 - cycles
        5 - origin segments
        6 - last segments
        '''
        tree_map, tree_edges = shortest_path_tree(edge_data,
                                                  node_edge_map,
                                                  src_idx,
                                                  max_dist=global_max_dist,
                                                  angular=angular)
        tree_nodes = np.where(tree_map[:, 0])[0]
        tree_preds = tree_map[:, 1]
        tree_dists = tree_map[:, 2]
        tree_imps = tree_map[:, 3]
        tree_cycles = tree_map[:, 4]
        tree_origin_seg = tree_map[:, 5]
        tree_last_seg = tree_map[:, 6]
        # only build edge data if necessary
        if len(seg_keys) > 0:
            # can't do edge processing as part of shortest tree because all shortest paths have to be resolved first
            # visit all processed edges
            for edge_idx in np.where(tree_edges)[0]:
                # unpack
                seg_in_nd, seg_out_nd, seg_len, seg_ang, seg_imp_fact, seg_in_bear, seg_out_bear = edge_data[
                    edge_idx]
                in_nd_idx = int(seg_in_nd)
                out_nd_idx = int(seg_out_nd)
                in_imp = tree_imps[in_nd_idx]
                out_imp = tree_imps[out_nd_idx]
                in_dist = tree_dists[in_nd_idx]
                out_dist = tree_dists[out_nd_idx]
                # don't process unreachable segments
                if np.isinf(in_dist) and np.isinf(out_dist):
                    continue
                # for conceptual simplicity, separate angular and non-angular workflows
                # non angular uses a split segment workflow
                # if the segment is on the shortest path then the second segment will squash down to naught
                if not angular:
                    # sort where a < b
                    if in_imp <= out_imp:
                        a = tree_dists[in_nd_idx]
                        a_imp = tree_imps[in_nd_idx]
                        b = tree_dists[out_nd_idx]
                        b_imp = tree_imps[out_nd_idx]
                    else:
                        a = tree_dists[out_nd_idx]
                        a_imp = tree_imps[out_nd_idx]
                        b = tree_dists[in_nd_idx]
                        b_imp = tree_imps[in_nd_idx]
                    # get the max distance along the segment: seg_len = (m - start_len) + (m - end_len)
                    # c and d variables can diverge per beneath
                    c = d = (seg_len + a + b) / 2
                    # c / d impedance should technically be the same if computed from either side
                    c_imp = d_imp = a_imp + (c - a) * seg_imp_fact
                    # iterate the distance and beta thresholds - from large to small for threshold snipping
                    for d_idx in range(len(distances) - 1, -1, -1):
                        dist_cutoff = distances[d_idx]
                        beta = betas[d_idx]
                        # a-c segment
                        if a <= dist_cutoff:
                            if c > dist_cutoff:
                                c = dist_cutoff
                                c_imp = a_imp + (dist_cutoff -
                                                 a) * seg_imp_fact
                            for seg_idx, seg_key in enumerate(seg_keys):
                                m_idx = seg_targets[seg_idx]
                                if seg_key == 0:
                                    measures_data[m_idx, d_idx,
                                                  src_idx] += c - a
                                elif seg_key == 1:
                                    if a_imp < 1:
                                        measures_data[m_idx, d_idx,
                                                      src_idx] += np.log(c_imp)
                                    else:
                                        measures_data[
                                            m_idx, d_idx, src_idx] += np.log(
                                                c_imp) - np.log(a_imp)
                                elif seg_key == 2:
                                    if beta == -0.0:
                                        auc = c_imp - a_imp
                                    else:
                                        auc = (np.exp(beta * c_imp) -
                                               np.exp(beta * a_imp)) / beta
                                    measures_data[m_idx, d_idx, src_idx] += auc
                        # a-b segment - if on the shortest path then d == b - in which case, continue
                        if b == d:
                            continue
                        if b <= dist_cutoff:
                            if d > dist_cutoff:
                                d = dist_cutoff
                                d_imp = b_imp + (dist_cutoff -
                                                 b) * seg_imp_fact
                            for seg_idx, seg_key in enumerate(seg_keys):
                                m_idx = seg_targets[seg_idx]
                                if seg_key == 0:
                                    measures_data[m_idx, d_idx,
                                                  src_idx] += d - b
                                elif seg_key == 1:
                                    if b_imp < 1:
                                        measures_data[m_idx, d_idx,
                                                      src_idx] += np.log(d_imp)
                                    else:
                                        measures_data[
                                            m_idx, d_idx, src_idx] += np.log(
                                                d_imp) - np.log(b_imp)
                                elif seg_key == 2:
                                    # catch division by zero
                                    # as beta approaches 0 the distance is weighted by 1 instead of < 1
                                    if beta == -0.0:
                                        auc = d_imp - b_imp
                                    else:
                                        auc = (np.exp(beta * d_imp) -
                                               np.exp(beta * b_imp)) / beta
                                    measures_data[m_idx, d_idx, src_idx] += auc
                # different workflow for angular - uses single segment
                # otherwise many assumptions if splitting segments re: angular vs. distance shortest-paths...
                else:
                    # get the approach angles for either side
                    # this involves impedance up to that point plus the turn onto the segment
                    # also add half of the segment's length-wise angular impedance
                    in_ang = in_imp + seg_ang / 2
                    # the source node won't have a predecessor
                    if in_nd_idx != src_idx:
                        # get the out bearing from the predecessor and calculate the turn onto current seg's in bearing
                        in_pred_idx = int(tree_preds[in_nd_idx])
                        e_i = _find_edge_idx(node_edge_map, edge_data,
                                             in_pred_idx, in_nd_idx)
                        in_pred_out_bear = edge_data[int(e_i), 6]
                        in_ang += np.abs(
                            (seg_in_bear - in_pred_out_bear + 180) % 360 - 180)
                    # same for other side
                    out_ang = out_imp + seg_ang / 2
                    if out_nd_idx != src_idx:
                        out_pred_idx = int(tree_preds[out_nd_idx])
                        e_i = _find_edge_idx(node_edge_map, edge_data,
                                             out_pred_idx, out_nd_idx)
                        out_pred_out_bear = edge_data[int(e_i), 6]
                        out_ang += np.abs(
                            (seg_out_bear - out_pred_out_bear + 180) % 360 -
                            180)
                    # the distance and angle are based on the smallest angular impedance onto the segment
                    # shortest-path segments will have exit bearings equal to the entry bearings
                    # in this case, select the closest by shortest distance
                    if in_ang == out_ang:
                        if in_dist < out_dist:
                            e = tree_dists[in_nd_idx]
                            ang = in_ang
                        else:
                            e = tree_dists[out_nd_idx]
                            ang = out_ang
                    elif in_ang < out_ang:
                        e = tree_dists[in_nd_idx]
                        ang = in_ang
                    else:
                        e = tree_dists[out_nd_idx]
                        ang = out_ang
                    # f is the entry distance plus segment length
                    f = e + seg_len
                    # iterate the distance thresholds - from large to small for threshold snipping
                    for d_idx in range(len(distances) - 1, -1, -1):
                        dist_cutoff = distances[d_idx]
                        if e <= dist_cutoff:
                            if f > dist_cutoff:
                                f = dist_cutoff
                            # 3 - harmonic segments hybrid
                            # Uses integral of segment distances as a base - then weighted by angular
                            for seg_idx, seg_key in enumerate(seg_keys):
                                if seg_key == 3:
                                    m_idx = seg_targets[seg_idx]
                                    # transform - prevents division by zero
                                    agg_ang = 1 + (ang / 180)
                                    # then aggregate - angular uses distances explicitly
                                    measures_data[m_idx, d_idx,
                                                  src_idx] += (f - e) / agg_ang
        # aggregative and betweenness keys can be computed per to_idx
        for to_idx in tree_nodes:
            # skip self node
            if to_idx == src_idx:
                continue
            # unpack impedance and distance for to index
            to_imp = tree_imps[to_idx]
            to_dist = tree_dists[to_idx]
            # do not proceed if no route available
            if np.isinf(to_dist):
                continue
            # node weights removed since v0.10
            # switched to edge impedance factors
            # calculate centralities
            for d_idx in range(len(distances)):
                dist_cutoff = distances[d_idx]
                beta = betas[d_idx]
                if to_dist <= dist_cutoff:
                    # iterate aggregation functions
                    for agg_idx, agg_key in enumerate(agg_keys):
                        # fetch target index for writing data
                        # stored at equivalent index in agg_targets
                        m_idx = agg_targets[agg_idx]
                        # go through keys and write data
                        # 0 - simple node counts
                        if agg_key == 0:
                            measures_data[m_idx, d_idx, src_idx] += 1
                        # 1 - farness
                        elif agg_key == 1:
                            measures_data[m_idx, d_idx, src_idx] += to_dist
                        # 2 - cycles
                        elif agg_key == 2:
                            if tree_cycles[to_idx]:
                                measures_data[m_idx, d_idx, src_idx] += 1
                        # 3 - harmonic node
                        elif agg_key == 3:
                            measures_data[m_idx, d_idx, src_idx] += 1 / to_imp
                        # 4 - beta weighted node
                        elif agg_key == 4:
                            measures_data[m_idx, d_idx,
                                          src_idx] += np.exp(beta * to_dist)
                        # 5 - harmonic node - angular
                        elif agg_key == 5:
                            a = 1 + (to_imp / 180)  # transform angles
                            measures_data[m_idx, d_idx, src_idx] += 1 / a
            # check whether betweenness keys are present prior to proceeding
            if not betw_nodes and not betw_segs:
                continue
            # only process in one direction
            if to_idx < src_idx:
                continue
            # NODE WORKFLOW
            if betw_nodes:
                # betweenness - only counting truly between vertices, not starting and ending verts
                inter_idx = int(tree_preds[to_idx])
                while True:
                    # break out of while loop if the intermediary has reached the source node
                    if inter_idx == src_idx:
                        break
                    # iterate the distance thresholds
                    for d_idx in range(len(distances)):
                        dist_cutoff = distances[d_idx]
                        beta = betas[d_idx]
                        # check threshold
                        if tree_dists[to_idx] <= dist_cutoff:
                            # iterate betweenness functions
                            for betw_idx, betw_key in enumerate(betw_keys):
                                # fetch target index for writing data
                                # stored at equivalent index in betw_targets
                                m_idx = betw_targets[betw_idx]
                                # go through keys and write data
                                # simple count of nodes for betweenness
                                if betw_key == 0:
                                    measures_data[m_idx, d_idx, inter_idx] += 1
                                # 1 - beta weighted betweenness
                                # distance is based on distance between from and to vertices
                                # thus potential spatial impedance via between vertex
                                elif betw_key == 1:
                                    measures_data[m_idx, d_idx,
                                                  inter_idx] += np.exp(
                                                      beta * to_dist) * 1
                                # 3 - betweenness node count - angular heuristic version
                                elif betw_key == 3:
                                    measures_data[m_idx, d_idx, inter_idx] += 1
                    # follow the chain
                    inter_idx = int(tree_preds[inter_idx])
            if betw_segs:
                # segment versions only agg first and last segments - intervening bits are processed from other to nodes
                o_seg_len = edge_data[int(tree_origin_seg[to_idx])][2]
                l_seg_len = edge_data[int(tree_last_seg[to_idx])][2]
                min_seg_span = tree_dists[to_idx] - o_seg_len - l_seg_len
                o_1 = min_seg_span
                o_2 = min_seg_span + o_seg_len
                l_1 = min_seg_span
                l_2 = min_seg_span + l_seg_len
                # betweenness - only counting truly between vertices, not starting and ending verts
                inter_idx = int(tree_preds[to_idx])
                while True:
                    # break out of while loop if the intermediary has reached the source node
                    if inter_idx == src_idx:
                        break
                    # iterate the distance thresholds - from large to small for threshold snipping
                    for d_idx in range(len(distances) - 1, -1, -1):
                        dist_cutoff = distances[d_idx]
                        beta = betas[d_idx]
                        if min_seg_span <= dist_cutoff:
                            # prune if necessary
                            if o_2 > dist_cutoff:
                                o_2 = dist_cutoff
                            if l_2 > dist_cutoff:
                                l_2 = dist_cutoff
                            for betw_idx, betw_key in enumerate(betw_keys):
                                m_idx = betw_targets[betw_idx]
                                # 2 - segment version of betweenness
                                if betw_key == 2:
                                    # catch division by zero
                                    if beta == -0.0:
                                        auc = o_2 - o_1 + l_2 - l_1
                                    else:
                                        auc = (np.exp(beta * o_2) -
                                               np.exp(beta * o_1)) / beta + \
                                              (np.exp(beta * l_2) -
                                               np.exp(beta * l_1)) / beta
                                    measures_data[m_idx, d_idx,
                                                  inter_idx] += auc
                                # 4 - betweeenness segment hybrid version
                                elif betw_key == 4:
                                    bt_ang = 1 + tree_imps[to_idx] / 180
                                    pt_a = o_2 - o_1
                                    pt_b = l_2 - l_1
                                    measures_data[m_idx, d_idx,
                                                  inter_idx] += (pt_a +
                                                                 pt_b) / bt_ang
                    # follow the chain
                    inter_idx = int(tree_preds[inter_idx])

    return measures_data
示例#16
0
def nX_from_graph_maps(node_uids: Union[tuple, list],
                       node_data: np.ndarray,
                       edge_data: np.ndarray,
                       node_edge_map: Dict,
                       networkX_graph: nx.Graph = None,
                       metrics_dict: dict = None) -> nx.Graph:
    logger.info('Populating node and edge map data to a networkX graph.')

    if networkX_graph is not None:
        logger.info('Reusing existing graph as backbone.')
        if networkX_graph.number_of_nodes() != len(node_data):
            raise ValueError(
                'The number of nodes in the graph does not match the number of nodes in the node map.'
            )
        g_copy = networkX_graph.copy()
        for uid in node_uids:
            if uid not in g_copy:
                raise KeyError(
                    f'Node uid {uid} not found in graph. '
                    f'If passing a graph as backbone, the uids must match those supplied with the node and edge maps.'
                )
    else:
        logger.info('No existing graph found, creating new.')
        g_copy = nx.Graph()
        for uid in node_uids:
            g_copy.add_node(uid)

    # after above so that errors caught first
    checks.check_network_maps(node_data, edge_data, node_edge_map)

    logger.info('Unpacking node data.')
    for uid, node in tqdm(zip(node_uids, node_data),
                          disable=checks.quiet_mode):
        x, y, live, ghosted = node
        g_copy.nodes[uid]['x'] = x
        g_copy.nodes[uid]['y'] = y
        g_copy.nodes[uid]['live'] = bool(live)
        g_copy.nodes[uid]['ghosted'] = bool(ghosted)

    logger.info('Unpacking edge data.')
    for edge in tqdm(edge_data, disable=checks.quiet_mode):
        start, end, length, angle_sum, imp_factor, start_bearing, end_bearing = edge
        start_uid = node_uids[int(start)]
        end_uid = node_uids[int(end)]
        # networkX will silently add new edges / data over existing edges
        g_copy.add_edge(start_uid,
                        end_uid,
                        length=length,
                        angle_sum=angle_sum,
                        imp_factor=imp_factor,
                        start_bearing=start_bearing,
                        end_bearing=end_bearing)

    if metrics_dict is not None:
        logger.info('Unpacking metrics to nodes.')
        for uid, metrics in tqdm(metrics_dict.items(),
                                 disable=checks.quiet_mode):
            if uid not in g_copy:
                raise KeyError(
                    f'Node uid {uid} not found in graph. '
                    f'Data dictionary uids must match those supplied with the node and edge maps.'
                )
            g_copy.nodes[uid]['metrics'] = metrics

    return g_copy