def test_mock_numerical_data(): for length in [50, 100]: for num_arrs in range(1, 3): num_d = mock.mock_numerical_data(length=length, num_arrs=num_arrs) assert num_d.shape[0] == num_arrs assert num_d.shape[1] == length for arr in num_d: for n in arr: assert isinstance(n, float) assert 0 <= n <= 100000
def test_check_numerical_data(): mock_numerical = mock.mock_numerical_data(50) # check for malformed data # difficult to catch int arrays without running into numba type checking errors # single dimension with pytest.raises(ValueError): corrupt_numerical = mock_numerical[0] assert corrupt_numerical.ndim == 1 checks.check_numerical_data(corrupt_numerical) # catch infinites with pytest.raises(ValueError): mock_numerical[0][0] = np.inf checks.check_numerical_data(mock_numerical)
def test_metrics_to_dict(primal_graph): # create a network layer and run some metrics N = networks.NetworkLayerFromNX(primal_graph, distances=[500, 1000]) # check with no metrics metrics_dict = N.metrics_to_dict() dict_check(metrics_dict, N) # check with centrality metrics N.node_centrality(measures=['node_harmonic']) metrics_dict = N.metrics_to_dict() dict_check(metrics_dict, N) # check with data metrics data_dict = mock.mock_data_dict(primal_graph) landuse_labels = mock.mock_categorical_data(len(data_dict)) numerical_data = mock.mock_numerical_data(len(data_dict)) metrics_dict = N.metrics_to_dict() dict_check(metrics_dict, N)
def test_local_agg_time(primal_graph): """ Timing tests for landuse and stats aggregations """ if 'GITHUB_ACTIONS' in os.environ: return os.environ['CITYSEER_QUIET_MODE'] = '1' # generate node and edge maps node_uids, node_data, edge_data, node_edge_map, = graphs.graph_maps_from_nX(primal_graph) # setup data data_dict = mock.mock_data_dict(primal_graph, random_seed=13) data_uids, data_map = layers.data_map_from_dict(data_dict) data_map = data.assign_to_network(data_map, node_data, edge_data, node_edge_map, 500) # needs a large enough beta so that distance thresholds aren't encountered distances = np.array([np.inf]) betas = networks.beta_from_distance(distances) qs = np.array([0, 1, 2]) mock_categorical = mock.mock_categorical_data(len(data_map)) landuse_classes, landuse_encodings = layers.encode_categorical(mock_categorical) mock_numerical = mock.mock_numerical_data(len(data_dict), num_arrs=2, random_seed=0) def assign_wrapper(): data.assign_to_network(data_map, node_data, edge_data, node_edge_map, 500) # prime the function assign_wrapper() iters = 20000 # time and report - roughly 5.675 func_time = timeit.timeit(assign_wrapper, number=iters) print(f'node_cent_wrapper: {func_time} for {iters} iterations') assert func_time < 10 def landuse_agg_wrapper(): mu_data_hill, mu_data_other, ac_data, ac_data_wt = data.aggregate_landuses(node_data, edge_data, node_edge_map, data_map, distances, betas, mixed_use_hill_keys=np.array([0, 1]), landuse_encodings=landuse_encodings, qs=qs, angular=False) # prime the function landuse_agg_wrapper() iters = 20000 # time and report - roughly 10.10 func_time = timeit.timeit(landuse_agg_wrapper, number=iters) print(f'node_cent_wrapper: {func_time} for {iters} iterations') assert func_time < 15 def stats_agg_wrapper(): # compute data.aggregate_stats(node_data, edge_data, node_edge_map, data_map, distances, betas, numerical_arrays=mock_numerical, angular=False) # prime the function stats_agg_wrapper() iters = 20000 # time and report - roughly 4.96 func_time = timeit.timeit(stats_agg_wrapper, number=iters) print(f'segment_cent_wrapper: {func_time} for {iters} iterations') assert func_time < 10
def test_local_aggregator_numerical_components(primal_graph): # generate node and edge maps node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX(primal_graph) # setup data data_dict = mock.mock_data_dict(primal_graph, random_seed=13) data_uids, data_map = layers.data_map_from_dict(data_dict) data_map = data.assign_to_network(data_map, node_data, edge_data, node_edge_map, 500) # for debugging # from cityseer.tools import plot # plot.plot_graph_maps(node_uids, node_data, edge_data, data_map) # set parameters - use a large enough distance such that simple non-weighted checks can be run for max, mean, variance betas = np.array([0.00125]) distances = networks.distance_from_beta(betas) mock_numerical = mock.mock_numerical_data(len(data_dict), num_arrs=2, random_seed=0) # compute stats_sum, stats_sum_wt, stats_mean, stats_mean_wt, stats_variance, stats_variance_wt, stats_max, stats_min = \ data.aggregate_stats(node_data, edge_data, node_edge_map, data_map, distances, betas, numerical_arrays=mock_numerical, angular=False) # non connected portions of the graph will have different stats # used manual data plots from test_assign_to_network() to see which nodes the data points are assigned to # connected graph is from 0 to 48 -> assigned data points are all except 5, 8, 17, 33, 48 connected_nodes_idx = list(range(49)) # and the respective data assigned to connected portion of the graph connected_data_idx = [i for i in range(len(data_dict)) if i not in [5, 8, 9, 17, 18, 29, 33, 38, 48]] # isolated node = 49 -> assigned no data points # isolated nodes = 50 & 51 -> assigned data points = 17, 33 # isolated loop = 52, 53, 54, 55 -> assigned data points = 5, 8, 9, 18, 29, 38, 48 isolated_nodes_idx = [52, 53, 54, 55] isolated_data_idx = [5, 8, 9, 18, 29, 38, 48] for stats_idx in range(len(mock_numerical)): for d_idx in range(len(distances)): # max assert np.isnan(stats_max[stats_idx, d_idx, 49]) assert np.allclose(stats_max[stats_idx, d_idx, [50, 51]], mock_numerical[stats_idx, [17, 33]].max(), atol=0.001, rtol=0) assert np.allclose(stats_max[stats_idx, d_idx, isolated_nodes_idx], mock_numerical[stats_idx, isolated_data_idx].max(), atol=0.001, rtol=0) assert np.allclose(stats_max[stats_idx, d_idx, connected_nodes_idx], mock_numerical[stats_idx, connected_data_idx].max(), atol=0.001, rtol=0) # min assert np.isnan(stats_min[stats_idx, d_idx, 49]) assert np.allclose(stats_min[stats_idx, d_idx, [50, 51]], mock_numerical[stats_idx, [17, 33]].min(), atol=0.001, rtol=0) assert np.allclose(stats_min[stats_idx, d_idx, isolated_nodes_idx], mock_numerical[stats_idx, isolated_data_idx].min(), atol=0.001, rtol=0) assert np.allclose(stats_min[stats_idx, d_idx, connected_nodes_idx], mock_numerical[stats_idx, connected_data_idx].min(), atol=0.001, rtol=0) # sum assert stats_sum[stats_idx, d_idx, 49] == 0 assert np.allclose(stats_sum[stats_idx, d_idx, [50, 51]], mock_numerical[stats_idx, [17, 33]].sum(), atol=0.001, rtol=0) assert np.allclose(stats_sum[stats_idx, d_idx, isolated_nodes_idx], mock_numerical[stats_idx, isolated_data_idx].sum(), atol=0.001, rtol=0) assert np.allclose(stats_sum[stats_idx, d_idx, connected_nodes_idx], mock_numerical[stats_idx, connected_data_idx].sum(), atol=0.001, rtol=0) # mean assert np.isnan(stats_mean[stats_idx, d_idx, 49]) assert np.allclose(stats_mean[stats_idx, d_idx, [50, 51]], mock_numerical[stats_idx, [17, 33]].mean(), atol=0.001, rtol=0) assert np.allclose(stats_mean[stats_idx, d_idx, isolated_nodes_idx], mock_numerical[stats_idx, isolated_data_idx].mean(), atol=0.001, rtol=0) assert np.allclose(stats_mean[stats_idx, d_idx, connected_nodes_idx], mock_numerical[stats_idx, connected_data_idx].mean(), atol=0.001, rtol=0) # variance assert np.isnan(stats_variance[stats_idx, d_idx, 49]) assert np.allclose(stats_variance[stats_idx, d_idx, [50, 51]], mock_numerical[stats_idx, [17, 33]].var(), atol=0.001, rtol=0) assert np.allclose(stats_variance[stats_idx, d_idx, isolated_nodes_idx], mock_numerical[stats_idx, isolated_data_idx].var(), atol=0.001, rtol=0) assert np.allclose(stats_variance[stats_idx, d_idx, connected_nodes_idx], mock_numerical[stats_idx, connected_data_idx].var(), atol=0.001, rtol=0)
def test_compute_stats(primal_graph): """ Test stats component """ betas = np.array([0.01, 0.005]) distances = networks.distance_from_beta(betas) # network layer N_single = networks.NetworkLayerFromNX(primal_graph, distances=distances) N_multi = networks.NetworkLayerFromNX(primal_graph, distances=distances) node_map = N_multi._node_data edge_map = N_multi._edge_data node_edge_map = N_multi._node_edge_map # data layer data_dict = mock.mock_data_dict(primal_graph) D_single = layers.DataLayerFromDict(data_dict) D_multi = layers.DataLayerFromDict(data_dict) # check single metrics independently against underlying for some use-cases, e.g. hill, non-hill, accessibility... D_single.assign_to_network(N_single, max_dist=500) D_multi.assign_to_network(N_multi, max_dist=500) # generate some mock landuse data mock_numeric = mock.mock_numerical_data(len(data_dict), num_arrs=2) # generate stats D_single.compute_stats(stats_keys='boo', stats_data_arrs=mock_numeric[0]) D_single.compute_stats(stats_keys='baa', stats_data_arrs=mock_numeric[1]) D_multi.compute_stats(stats_keys=['boo', 'baa'], stats_data_arrs=mock_numeric) # test against underlying method data_map = D_single._data stats_sum, stats_sum_wt, stats_mean, stats_mean_wt, stats_variance, stats_variance_wt, stats_max, stats_min = \ data.aggregate_stats(node_map, edge_map, node_edge_map, data_map, distances, betas, numerical_arrays=mock_numeric) stats_keys = [ 'max', 'min', 'sum', 'sum_weighted', 'mean', 'mean_weighted', 'variance', 'variance_weighted' ] stats_data = [ stats_max, stats_min, stats_sum, stats_sum_wt, stats_mean, stats_mean_wt, stats_variance, stats_variance_wt ] for num_idx, num_label in enumerate(['boo', 'baa']): for s_key, stats in zip(stats_keys, stats_data): for d_idx, d_key in enumerate(distances): # check one-at-a-time computed vs multiply computed assert np.allclose( N_single.metrics['stats'][num_label][s_key][d_key], N_multi.metrics['stats'][num_label][s_key][d_key], atol=0.001, rtol=0, equal_nan=True) # check one-at-a-time against manual assert np.allclose( N_single.metrics['stats'][num_label][s_key][d_key], stats[num_idx][d_idx], atol=0.001, rtol=0, equal_nan=True) # check multiply computed against manual assert np.allclose( N_multi.metrics['stats'][num_label][s_key][d_key], stats[num_idx][d_idx], atol=0.001, rtol=0, equal_nan=True) # check that problematic keys and data arrays are caught for labels, arrs, err in ( (['a'], mock_numeric, ValueError), # mismatching lengths (['a', 'b'], None, TypeError), # missing arrays (['a', 'b'], [], ValueError), # missing arrays (None, mock_numeric, TypeError), # missing labels ([], mock_numeric, ValueError)): # missing labels with pytest.raises(err): D_multi.compute_stats(stats_keys=labels, stats_data_arrs=arrs)