def test_hill_diversity(): # test hill diversity against scipy entropy for counts, probs in mock.mock_species_data(): # check hill q=1 - this can be tested against scipy because hill q=1 is exponential of entropy assert np.allclose(diversity.hill_diversity(counts, q=1), np.exp(entropy(probs)), atol=0.001, rtol=0) # check that hill q<1 and q>1 is reasonably close to scipy entropy # (different internal computation) assert np.allclose(diversity.hill_diversity(counts, 0.99999999), np.exp(entropy(probs)), atol=0.001, rtol=0) assert np.allclose(diversity.hill_diversity(counts, 1.00000001), np.exp(entropy(probs)), atol=0.001, rtol=0) # check for malformed q with pytest.raises(ValueError): diversity.hill_diversity(counts, q=-1)
def test_hill_diversity_branch_distance_wt(): # test against hill diversity by setting all weights = 1 for counts, probs in mock.mock_species_data(): non_weights = np.full(len(counts), 1) non_beta = -0 for q in [0, 1, 2]: assert np.allclose(diversity.hill_diversity(counts, q), diversity.hill_diversity_branch_distance_wt(counts, non_weights, q, non_beta), atol=0.001, rtol=0) # check for malformed signatures with pytest.raises(ValueError): diversity.hill_diversity_branch_distance_wt(counts[:-1], non_weights, q=1, beta=-0.005) with pytest.raises(ValueError): diversity.hill_diversity_branch_distance_wt(counts, non_weights[:-1], q=1, beta=-0.005) with pytest.raises(ValueError): diversity.hill_diversity_branch_distance_wt(counts, non_weights, q=1, beta=0.005) with pytest.raises(ValueError): diversity.hill_diversity_branch_distance_wt(counts, non_weights, q=-1, beta=-0.005)
if len(class_code_list) > max_elements: continue if len(class_code_list) % 100 == 0: print(f'List now at {len(class_code_list)}') class_code_arr = np.array(class_code_list) dist_arr = np.array(class_dist_list) classes_unique, classes_counts, classes_nearest = deduce_unique_species( class_code_arr, dist_arr, max_dist=1600) # iterate the betas and generate the mixed use metrics for k, beta in zip(data_keys, data_betas): # run the calculations data_5[k]['mu_hill_0'].append( diversity.hill_diversity(classes_counts, 0)) data_5[k]['mu_hill_1'].append( diversity.hill_diversity(classes_counts, 1)) data_5[k]['mu_hill_2'].append( diversity.hill_diversity(classes_counts, 2)) data_5[k]['mu_hill_branch_wt_0'].append( diversity.hill_diversity_branch_distance_wt(classes_counts, classes_nearest, 0, beta=beta)) data_5[k]['mu_hill_branch_wt_1'].append( diversity.hill_diversity_branch_distance_wt(classes_counts, classes_nearest, 1, beta=beta))
def local_aggregator( node_data: np.ndarray, edge_data: np.ndarray, node_edge_map: Dict, data_map: np.ndarray, distances: np.ndarray, betas: np.ndarray, landuse_encodings: np.ndarray = np.array([]), qs: np.ndarray = np.array([]), mixed_use_hill_keys: np.ndarray = np.array([]), mixed_use_other_keys: np.ndarray = np.array([]), accessibility_keys: np.ndarray = np.array([]), cl_disparity_wt_matrix: np.ndarray = np.array(np.full((0, 0), np.nan)), numerical_arrays: np.ndarray = np.array(np.full((0, 0), np.nan)), angular: bool = False, suppress_progress: bool = False ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: ''' NODE MAP: 0 - x 1 - y 2 - live 3 - ghosted EDGE MAP: 0 - start node 1 - end node 2 - length in metres 3 - sum of angular travel along length 4 - impedance factor 5 - in bearing 6 - out bearing DATA MAP: 0 - x 1 - y 2 - assigned network index - nearest 3 - assigned network index - next-nearest ''' checks.check_network_maps(node_data, edge_data, node_edge_map) checks.check_data_map( data_map, check_assigned=True ) # raises ValueError data points are not assigned to a network checks.check_distances_and_betas(distances, betas) # check landuse encodings compute_landuses = False if len(landuse_encodings) == 0: if len(mixed_use_hill_keys) != 0 or len( mixed_use_other_keys) != 0 or len(accessibility_keys) != 0: raise ValueError( 'Mixed use metrics or land-use accessibilities require an array of landuse labels.' ) elif len(landuse_encodings) != len(data_map): raise ValueError( 'The number of landuse encodings does not match the number of data points.' ) else: checks.check_categorical_data(landuse_encodings) # catch completely missing metrics if len(mixed_use_hill_keys) == 0 and len( mixed_use_other_keys) == 0 and len(accessibility_keys) == 0: if len(numerical_arrays) == 0: raise ValueError( 'No metrics specified, please specify at least one metric to compute.' ) else: compute_landuses = True # catch missing qs if len(mixed_use_hill_keys) != 0 and len(qs) == 0: raise ValueError( 'Hill diversity measures require that at least one value of q is specified.' ) # negative qs caught by hill diversity methods # check various problematic key combinations if len(mixed_use_hill_keys) != 0: if (mixed_use_hill_keys.min() < 0 or mixed_use_hill_keys.max() > 3): raise ValueError('Mixed-use "hill" keys out of range of 0:4.') if len(mixed_use_other_keys) != 0: if (mixed_use_other_keys.min() < 0 or mixed_use_other_keys.max() > 2): raise ValueError('Mixed-use "other" keys out of range of 0:3.') if len(accessibility_keys) != 0: max_ac_key = landuse_encodings.max() if (accessibility_keys.min() < 0 or accessibility_keys.max() > max_ac_key): raise ValueError( 'Negative or out of range accessibility key encountered. Keys must match class encodings.' ) for i in range(len(mixed_use_hill_keys)): for j in range(len(mixed_use_hill_keys)): if j > i: i_key = mixed_use_hill_keys[i] j_key = mixed_use_hill_keys[j] if i_key == j_key: raise ValueError('Duplicate mixed-use "hill" key.') for i in range(len(mixed_use_other_keys)): for j in range(len(mixed_use_other_keys)): if j > i: i_key = mixed_use_other_keys[i] j_key = mixed_use_other_keys[j] if i_key == j_key: raise ValueError('Duplicate mixed-use "other" key.') for i in range(len(accessibility_keys)): for j in range(len(accessibility_keys)): if j > i: i_key = accessibility_keys[i] j_key = accessibility_keys[j] if i_key == j_key: raise ValueError('Duplicate accessibility key.') def disp_check(disp_matrix): # the length of the disparity matrix vis-a-vis unique landuses is tested in underlying diversity functions if disp_matrix.ndim != 2 or disp_matrix.shape[0] != disp_matrix.shape[ 1]: raise ValueError( 'The disparity matrix must be a square NxN matrix.') if len(disp_matrix) == 0: raise ValueError( 'Hill disparity and Rao pairwise measures requires a class disparity weights matrix.' ) # check that missing or malformed disparity weights matrices are caught for k in mixed_use_hill_keys: if k == 3: # hill disparity disp_check(cl_disparity_wt_matrix) for k in mixed_use_other_keys: if k == 2: # raos pairwise disp_check(cl_disparity_wt_matrix) compute_numerical = False # when passing an empty 2d array to numba, use: np.array(np.full((0, 0), np.nan)) if len(numerical_arrays) != 0: compute_numerical = True if numerical_arrays.shape[1] != len(data_map): raise ValueError( 'The length of the numerical data arrays do not match the length of the data map.' ) checks.check_numerical_data(numerical_arrays) # establish variables netw_n = len(node_data) d_n = len(distances) q_n = len(qs) n_n = len(numerical_arrays) global_max_dist = distances.max() netw_nodes_live = node_data[:, 2] # setup data structures # hill mixed uses are structured separately to take values of q into account mixed_use_hill_data = np.full((4, q_n, d_n, netw_n), np.nan) # 4 dim mixed_use_other_data = np.full((3, d_n, netw_n), np.nan) # 3 dim accessibility_data = np.full((len(accessibility_keys), d_n, netw_n), 0.0) accessibility_data_wt = np.full((len(accessibility_keys), d_n, netw_n), 0.0) # stats stats_sum = np.full((n_n, d_n, netw_n), np.nan) stats_sum_wt = np.full((n_n, d_n, netw_n), np.nan) stats_mean = np.full((n_n, d_n, netw_n), np.nan) stats_mean_wt = np.full((n_n, d_n, netw_n), np.nan) stats_count = np.full( (n_n, d_n, netw_n), np.nan) # use np.nan instead of 0 to avoid division by zero issues stats_count_wt = np.full((n_n, d_n, netw_n), np.nan) stats_variance = np.full((n_n, d_n, netw_n), np.nan) stats_variance_wt = np.full((n_n, d_n, netw_n), np.nan) stats_max = np.full((n_n, d_n, netw_n), np.nan) stats_min = np.full((n_n, d_n, netw_n), np.nan) # iterate through each vert and aggregate steps = int(netw_n / 10000) for netw_src_idx in range(netw_n): if not suppress_progress: checks.progress_bar(netw_src_idx, netw_n, steps) # only compute for live nodes if not netw_nodes_live[netw_src_idx]: continue # generate the reachable classes and their respective distances # these are non-unique - i.e. simply the class of each data point within the maximum distance # the aggregate_to_src_idx method will choose the closer direction of approach to a data point # from the nearest or next-nearest network node (calculated once globally, prior to local_landuses method) reachable_data, reachable_data_dist, tree_preds = aggregate_to_src_idx( netw_src_idx, node_data, edge_data, node_edge_map, data_map, global_max_dist, angular) # LANDUSES if compute_landuses: mu_max_unique_cl = int(landuse_encodings.max() + 1) # counts of each class type (array length per max unique classes - not just those within max distance) classes_counts = np.full((d_n, mu_max_unique_cl), 0) # nearest of each class type (likewise) classes_nearest = np.full((d_n, mu_max_unique_cl), np.inf) # iterate the reachable indices and related distances for data_idx, (reachable, data_dist) in enumerate( zip(reachable_data, reachable_data_dist)): if not reachable: continue # get the class category in integer form # all class codes were encoded to sequential integers - these correspond to the array indices cl_code = int(landuse_encodings[int(data_idx)]) # iterate the distance dimensions for d_idx, (d, b) in enumerate(zip(distances, betas)): # increment class counts at respective distances if the distance is less than current d if data_dist <= d: classes_counts[d_idx, cl_code] += 1 # if distance is nearer, update the nearest distance array too if data_dist < classes_nearest[d_idx, cl_code]: classes_nearest[d_idx, cl_code] = data_dist # if within distance, and if in accessibility keys, then aggregate accessibility too for ac_idx, ac_code in enumerate(accessibility_keys): if ac_code == cl_code: accessibility_data[ac_idx, d_idx, netw_src_idx] += 1 accessibility_data_wt[ac_idx, d_idx, netw_src_idx] += np.exp( b * data_dist) # if a match was found, then no need to check others break # mixed uses can be calculated now that the local class counts are aggregated # iterate the distances and betas for d_idx, b in enumerate(betas): cl_counts = classes_counts[d_idx] cl_nearest = classes_nearest[d_idx] # mu keys determine which metrics to compute # don't confuse with indices # previously used dynamic indices in data structures - but obtuse if irregularly ordered keys for mu_hill_key in mixed_use_hill_keys: for q_idx, q_key in enumerate(qs): if mu_hill_key == 0: mixed_use_hill_data[0, q_idx, d_idx, netw_src_idx] = \ diversity.hill_diversity(cl_counts, q_key) elif mu_hill_key == 1: mixed_use_hill_data[1, q_idx, d_idx, netw_src_idx] = \ diversity.hill_diversity_branch_distance_wt(cl_counts, cl_nearest, q=q_key, beta=b) elif mu_hill_key == 2: mixed_use_hill_data[2, q_idx, d_idx, netw_src_idx] = \ diversity.hill_diversity_pairwise_distance_wt(cl_counts, cl_nearest, q=q_key, beta=b) # land-use classification disparity hill diversity # the wt matrix can be used without mapping because cl_counts is based on all classes # regardless of whether they are reachable elif mu_hill_key == 3: mixed_use_hill_data[3, q_idx, d_idx, netw_src_idx] = \ diversity.hill_diversity_pairwise_matrix_wt(cl_counts, wt_matrix=cl_disparity_wt_matrix, q=q_key) for mu_other_key in mixed_use_other_keys: if mu_other_key == 0: mixed_use_other_data[0, d_idx, netw_src_idx] = \ diversity.shannon_diversity(cl_counts) elif mu_other_key == 1: mixed_use_other_data[1, d_idx, netw_src_idx] = \ diversity.gini_simpson_diversity(cl_counts) elif mu_other_key == 2: mixed_use_other_data[2, d_idx, netw_src_idx] = \ diversity.raos_quadratic_diversity(cl_counts, wt_matrix=cl_disparity_wt_matrix) # IDW # the order of the loops matters because the nested aggregations happen per distance per numerical array if compute_numerical: # iterate the reachable indices and related distances for data_idx, (reachable, data_dist) in enumerate( zip(reachable_data, reachable_data_dist)): # some indices will be NaN if beyond max threshold distance - so check for infinity # this happens when within radial max distance, but beyond network max distance if not reachable: continue # iterate the numerical arrays dimension for num_idx in range(n_n): # some values will be NaN num = numerical_arrays[num_idx, int(data_idx)] if np.isnan(num): continue # iterate the distance dimensions for d_idx, (d, b) in enumerate(zip(distances, betas)): # increment mean aggregations at respective distances if the distance is less than current d if data_dist <= d: # aggregate if np.isnan(stats_sum[num_idx, d_idx, netw_src_idx]): stats_sum[num_idx, d_idx, netw_src_idx] = num stats_count[num_idx, d_idx, netw_src_idx] = 1 stats_sum_wt[num_idx, d_idx, netw_src_idx] = num * np.exp( data_dist * b) stats_count_wt[num_idx, d_idx, netw_src_idx] = np.exp( data_dist * b) else: stats_sum[num_idx, d_idx, netw_src_idx] += num stats_count[num_idx, d_idx, netw_src_idx] += 1 stats_sum_wt[num_idx, d_idx, netw_src_idx] += num * np.exp( data_dist * b) stats_count_wt[num_idx, d_idx, netw_src_idx] += np.exp( data_dist * b) if np.isnan(stats_max[num_idx, d_idx, netw_src_idx]): stats_max[num_idx, d_idx, netw_src_idx] = num elif num > stats_max[num_idx, d_idx, netw_src_idx]: stats_max[num_idx, d_idx, netw_src_idx] = num if np.isnan(stats_min[num_idx, d_idx, netw_src_idx]): stats_min[num_idx, d_idx, netw_src_idx] = num elif num < stats_min[num_idx, d_idx, netw_src_idx]: stats_min[num_idx, d_idx, netw_src_idx] = num # finalise mean calculations - this is happening for a single netw_src_idx, so fairly fast for num_idx in range(n_n): for d_idx in range(d_n): stats_mean[num_idx, d_idx, netw_src_idx] = \ stats_sum[num_idx, d_idx, netw_src_idx] / stats_count[num_idx, d_idx, netw_src_idx] stats_mean_wt[num_idx, d_idx, netw_src_idx] = \ stats_sum_wt[num_idx, d_idx, netw_src_idx] / stats_count_wt[num_idx, d_idx, netw_src_idx] # calculate variances - counts are already computed per above # weighted version is IDW by division through equivalently weighted counts above # iterate the reachable indices and related distances for data_idx, (reachable, data_dist) in enumerate( zip(reachable_data, reachable_data_dist)): # some indices will be NaN if beyond max threshold distance - so check for infinity # this happens when within radial max distance, but beyond network max distance if not reachable: continue # iterate the numerical arrays dimension for num_idx in range(n_n): # some values will be NaN num = numerical_arrays[num_idx, int(data_idx)] if np.isnan(num): continue # iterate the distance dimensions for d_idx, (d, b) in enumerate(zip(distances, betas)): # increment variance aggregations at respective distances if the distance is less than current d if data_dist <= d: # aggregate if np.isnan(stats_variance[num_idx, d_idx, netw_src_idx]): stats_variance[num_idx, d_idx, netw_src_idx] = \ np.square(num - stats_mean[num_idx, d_idx, netw_src_idx]) stats_variance_wt[num_idx, d_idx, netw_src_idx] = \ np.square(num - stats_mean_wt[num_idx, d_idx, netw_src_idx]) * np.exp(data_dist * b) else: stats_variance[num_idx, d_idx, netw_src_idx] += \ np.square(num - stats_mean[num_idx, d_idx, netw_src_idx]) stats_variance_wt[num_idx, d_idx, netw_src_idx] += \ np.square(num - stats_mean_wt[num_idx, d_idx, netw_src_idx]) * np.exp(data_dist * b) # finalise variance calculations for num_idx in range(n_n): for d_idx in range(d_n): stats_variance[num_idx, d_idx, netw_src_idx] = \ stats_variance[num_idx, d_idx, netw_src_idx] / stats_count[num_idx, d_idx, netw_src_idx] stats_variance_wt[num_idx, d_idx, netw_src_idx] = \ stats_variance_wt[num_idx, d_idx, netw_src_idx] / stats_count_wt[num_idx, d_idx, netw_src_idx] # send the data back in the same types and same order as the original keys - convert to int for indexing mu_hill_k_int = np.full(len(mixed_use_hill_keys), 0) for i, k in enumerate(mixed_use_hill_keys): mu_hill_k_int[i] = k mu_other_k_int = np.full(len(mixed_use_other_keys), 0) for i, k in enumerate(mixed_use_other_keys): mu_other_k_int[i] = k return mixed_use_hill_data[mu_hill_k_int], \ mixed_use_other_data[mu_other_k_int], \ accessibility_data, accessibility_data_wt, \ stats_sum, stats_sum_wt, \ stats_mean, stats_mean_wt, \ stats_variance, stats_variance_wt, \ stats_max, stats_min
def test_aggregate_landuses_categorical_components(primal_graph): # generate node and edge maps node_uids, node_data, edge_data, node_edge_map, = graphs.graph_maps_from_nX(primal_graph) # setup data data_dict = mock.mock_data_dict(primal_graph, random_seed=13) data_uids, data_map = layers.data_map_from_dict(data_dict) data_map = data.assign_to_network(data_map, node_data, edge_data, node_edge_map, 500) # set parameters betas = np.array([0.02, 0.01, 0.005, 0.0025]) distances = networks.distance_from_beta(betas) qs = np.array([0, 1, 2]) mock_categorical = mock.mock_categorical_data(len(data_map)) landuse_classes, landuse_encodings = layers.encode_categorical(mock_categorical) mock_matrix = np.full((len(landuse_classes), len(landuse_classes)), 1) # set the keys - add shuffling to be sure various orders work hill_keys = np.arange(4) np.random.shuffle(hill_keys) non_hill_keys = np.arange(3) np.random.shuffle(non_hill_keys) ac_keys = np.array([1, 2, 5]) np.random.shuffle(ac_keys) # generate mu_data_hill, mu_data_other, ac_data, ac_data_wt = data.aggregate_landuses(node_data, edge_data, node_edge_map, data_map, distances, betas, landuse_encodings=landuse_encodings, qs=qs, mixed_use_hill_keys=hill_keys, mixed_use_other_keys=non_hill_keys, accessibility_keys=ac_keys, cl_disparity_wt_matrix=mock_matrix, angular=False) # hill hill = mu_data_hill[np.where(hill_keys == 0)][0] hill_branch_wt = mu_data_hill[np.where(hill_keys == 1)][0] hill_pw_wt = mu_data_hill[np.where(hill_keys == 2)][0] hill_disp_wt = mu_data_hill[np.where(hill_keys == 3)][0] # non hill shannon = mu_data_other[np.where(non_hill_keys == 0)][0] gini = mu_data_other[np.where(non_hill_keys == 1)][0] raos = mu_data_other[np.where(non_hill_keys == 2)][0] # access non-weighted ac_1_nw = ac_data[np.where(ac_keys == 1)][0] ac_2_nw = ac_data[np.where(ac_keys == 2)][0] ac_5_nw = ac_data[np.where(ac_keys == 5)][0] # access weighted ac_1_w = ac_data_wt[np.where(ac_keys == 1)][0] ac_2_w = ac_data_wt[np.where(ac_keys == 2)][0] ac_5_w = ac_data_wt[np.where(ac_keys == 5)][0] # test manual metrics against all nodes mu_max_unique = len(landuse_classes) # test against various distances for d_idx in range(len(distances)): dist_cutoff = distances[d_idx] beta = betas[d_idx] for src_idx in range(len(primal_graph)): reachable_data, reachable_data_dist, tree_preds = data.aggregate_to_src_idx(src_idx, node_data, edge_data, node_edge_map, data_map, dist_cutoff) # counts of each class type (array length per max unique classes - not just those within max distance) cl_counts = np.full(mu_max_unique, 0) # nearest of each class type (likewise) cl_nearest = np.full(mu_max_unique, np.inf) # aggregate a_1_nw = 0 a_2_nw = 0 a_5_nw = 0 a_1_w = 0 a_2_w = 0 a_5_w = 0 # iterate reachable for data_idx, (reachable, data_dist) in enumerate(zip(reachable_data, reachable_data_dist)): if not reachable: continue cl = landuse_encodings[data_idx] # double check distance is within threshold assert data_dist <= dist_cutoff # update the class counts cl_counts[cl] += 1 # if distance is nearer, update the nearest distance array too if data_dist < cl_nearest[cl]: cl_nearest[cl] = data_dist # aggregate accessibility codes if cl == 1: a_1_nw += 1 a_1_w += np.exp(-beta * data_dist) elif cl == 2: a_2_nw += 1 a_2_w += np.exp(-beta * data_dist) elif cl == 5: a_5_nw += 1 a_5_w += np.exp(-beta * data_dist) # assertions assert ac_1_nw[d_idx, src_idx] == a_1_nw assert ac_2_nw[d_idx, src_idx] == a_2_nw assert ac_5_nw[d_idx, src_idx] == a_5_nw assert ac_1_w[d_idx, src_idx] == a_1_w assert ac_2_w[d_idx, src_idx] == a_2_w assert ac_5_w[d_idx, src_idx] == a_5_w assert hill[0, d_idx, src_idx] == diversity.hill_diversity(cl_counts, 0) assert hill[1, d_idx, src_idx] == diversity.hill_diversity(cl_counts, 1) assert hill[2, d_idx, src_idx] == diversity.hill_diversity(cl_counts, 2) assert hill_branch_wt[0, d_idx, src_idx] == \ diversity.hill_diversity_branch_distance_wt(cl_counts, cl_nearest, 0, beta) assert hill_branch_wt[1, d_idx, src_idx] == \ diversity.hill_diversity_branch_distance_wt(cl_counts, cl_nearest, 1, beta) assert hill_branch_wt[2, d_idx, src_idx] == \ diversity.hill_diversity_branch_distance_wt(cl_counts, cl_nearest, 2, beta) assert hill_pw_wt[0, d_idx, src_idx] == \ diversity.hill_diversity_pairwise_distance_wt(cl_counts, cl_nearest, 0, beta) assert hill_pw_wt[1, d_idx, src_idx] == \ diversity.hill_diversity_pairwise_distance_wt(cl_counts, cl_nearest, 1, beta) assert hill_pw_wt[2, d_idx, src_idx] == \ diversity.hill_diversity_pairwise_distance_wt(cl_counts, cl_nearest, 2, beta) assert hill_disp_wt[0, d_idx, src_idx] == \ diversity.hill_diversity_pairwise_matrix_wt(cl_counts, mock_matrix, 0) assert hill_disp_wt[1, d_idx, src_idx] == \ diversity.hill_diversity_pairwise_matrix_wt(cl_counts, mock_matrix, 1) assert hill_disp_wt[2, d_idx, src_idx] == \ diversity.hill_diversity_pairwise_matrix_wt(cl_counts, mock_matrix, 2) assert shannon[d_idx, src_idx] == diversity.shannon_diversity(cl_counts) assert gini[d_idx, src_idx] == diversity.gini_simpson_diversity(cl_counts) assert raos[d_idx, src_idx] == diversity.raos_quadratic_diversity(cl_counts, mock_matrix) # check that angular is passed-through # actual angular tests happen in test_shortest_path_tree() # here the emphasis is simply on checking that the angular instruction gets chained through # setup dual data G_dual = graphs.nX_to_dual(primal_graph) node_labels_dual, node_data_dual, edge_data_dual, node_edge_map_dual = graphs.graph_maps_from_nX(G_dual) data_dict_dual = mock.mock_data_dict(G_dual, random_seed=13) data_uids_dual, data_map_dual = layers.data_map_from_dict(data_dict_dual) data_map_dual = data.assign_to_network(data_map_dual, node_data_dual, edge_data_dual, node_edge_map_dual, 500) mock_categorical = mock.mock_categorical_data(len(data_map_dual)) landuse_classes_dual, landuse_encodings_dual = layers.encode_categorical(mock_categorical) mock_matrix = np.full((len(landuse_classes_dual), len(landuse_classes_dual)), 1) mu_hill_dual, mu_other_dual, ac_dual, ac_wt_dual = data.aggregate_landuses(node_data_dual, edge_data_dual, node_edge_map_dual, data_map_dual, distances, betas, landuse_encodings_dual, qs=qs, mixed_use_hill_keys=hill_keys, mixed_use_other_keys=non_hill_keys, accessibility_keys=ac_keys, cl_disparity_wt_matrix=mock_matrix, angular=True) mu_hill_dual_sidestep, mu_other_dual_sidestep, ac_dual_sidestep, ac_wt_dual_sidestep = \ data.aggregate_landuses(node_data_dual, edge_data_dual, node_edge_map_dual, data_map_dual, distances, betas, landuse_encodings_dual, qs=qs, mixed_use_hill_keys=hill_keys, mixed_use_other_keys=non_hill_keys, accessibility_keys=ac_keys, cl_disparity_wt_matrix=mock_matrix, angular=False) assert not np.allclose(mu_hill_dual, mu_hill_dual_sidestep, atol=0.001, rtol=0) assert not np.allclose(mu_other_dual, mu_other_dual_sidestep, atol=0.001, rtol=0) assert not np.allclose(ac_dual, ac_dual_sidestep, atol=0.001, rtol=0) assert not np.allclose(ac_wt_dual, ac_wt_dual_sidestep, atol=0.001, rtol=0)
def aggregate_landuses( node_data: np.ndarray, edge_data: np.ndarray, node_edge_map: Dict, data_map: np.ndarray, distances: np.ndarray, betas: np.ndarray, landuse_encodings: np.ndarray = np.array([]), qs: np.ndarray = np.array([]), mixed_use_hill_keys: np.ndarray = np.array([]), mixed_use_other_keys: np.ndarray = np.array([]), accessibility_keys: np.ndarray = np.array([]), cl_disparity_wt_matrix: np.ndarray = np.array(np.full((0, 0), np.nan)), jitter_scale: float = 0.0, angular: bool = False, progress_proxy=None ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """ NODE MAP: 0 - x 1 - y 2 - live EDGE MAP: 0 - start node 1 - end node 2 - length in metres 3 - sum of angular travel along length 4 - impedance factor 5 - in bearing 6 - out bearing DATA MAP: 0 - x 1 - y 2 - assigned network index - nearest 3 - assigned network index - next-nearest """ checks.check_network_maps(node_data, edge_data, node_edge_map) checks.check_data_map( data_map, check_assigned=True ) # raises ValueError data points are not assigned to a network checks.check_distances_and_betas(distances, betas) # check landuse encodings if len(landuse_encodings) == 0: raise ValueError( 'Mixed use metrics or land-use accessibilities require an array of landuse labels.' ) elif len(landuse_encodings) != len(data_map): raise ValueError( 'The number of landuse encodings does not match the number of data points.' ) else: checks.check_categorical_data(landuse_encodings) # catch completely missing metrics if len(mixed_use_hill_keys) == 0 and len( mixed_use_other_keys) == 0 and len(accessibility_keys) == 0: raise ValueError( 'No metrics specified, please specify at least one metric to compute.' ) # catch missing qs if len(mixed_use_hill_keys) != 0 and len(qs) == 0: raise ValueError( 'Hill diversity measures require that at least one value of q is specified.' ) # negative qs caught by hill diversity methods # check various problematic key combinations if len(mixed_use_hill_keys) != 0: if np.nanmin(mixed_use_hill_keys) < 0 or np.max( mixed_use_hill_keys) > 3: raise ValueError('Mixed-use "hill" keys out of range of 0:4.') if len(mixed_use_other_keys) != 0: if np.nanmin(mixed_use_other_keys) < 0 or np.max( mixed_use_other_keys) > 2: raise ValueError('Mixed-use "other" keys out of range of 0:3.') if len(accessibility_keys) != 0: max_ac_key = np.nanmax(landuse_encodings) if np.nanmin(accessibility_keys) < 0 or np.max( accessibility_keys) > max_ac_key: raise ValueError( 'Negative or out of range accessibility key encountered. Keys must match class encodings.' ) for i in range(len(mixed_use_hill_keys)): for j in range(len(mixed_use_hill_keys)): if j > i: i_key = mixed_use_hill_keys[i] j_key = mixed_use_hill_keys[j] if i_key == j_key: raise ValueError('Duplicate mixed-use "hill" key.') for i in range(len(mixed_use_other_keys)): for j in range(len(mixed_use_other_keys)): if j > i: i_key = mixed_use_other_keys[i] j_key = mixed_use_other_keys[j] if i_key == j_key: raise ValueError('Duplicate mixed-use "other" key.') for i in range(len(accessibility_keys)): for j in range(len(accessibility_keys)): if j > i: i_key = accessibility_keys[i] j_key = accessibility_keys[j] if i_key == j_key: raise ValueError('Duplicate accessibility key.') def disp_check(disp_matrix): # the length of the disparity matrix vis-a-vis unique landuses is tested in underlying diversity functions if disp_matrix.ndim != 2 or disp_matrix.shape[0] != disp_matrix.shape[ 1]: raise ValueError( 'The disparity matrix must be a square NxN matrix.') if len(disp_matrix) == 0: raise ValueError( 'Hill disparity and Rao pairwise measures requires a class disparity weights matrix.' ) # check that missing or malformed disparity weights matrices are caught for k in mixed_use_hill_keys: if k == 3: # hill disparity disp_check(cl_disparity_wt_matrix) for k in mixed_use_other_keys: if k == 2: # raos pairwise disp_check(cl_disparity_wt_matrix) # establish variables netw_n = len(node_data) d_n = len(distances) q_n = len(qs) global_max_dist = float(np.nanmax(distances)) netw_nodes_live = node_data[:, 2] # setup data structures # hill mixed uses are structured separately to take values of q into account mixed_use_hill_data = np.full((4, q_n, d_n, netw_n), 0.0) # 4 dim mixed_use_other_data = np.full((3, d_n, netw_n), 0.0) # 3 dim accessibility_data = np.full((len(accessibility_keys), d_n, netw_n), 0.0) accessibility_data_wt = np.full((len(accessibility_keys), d_n, netw_n), 0.0) # iterate through each vert and aggregate # parallelise over n nodes: # each distance or stat array index is therefore only touched by one thread at a time # i.e. no need to use inner array deductions as with centralities for netw_src_idx in prange(netw_n): if progress_proxy is not None: progress_proxy.update(1) # only compute for live nodes if not netw_nodes_live[netw_src_idx]: continue # generate the reachable classes and their respective distances # these are non-unique - i.e. simply the class of each data point within the maximum distance # the aggregate_to_src_idx method will choose the closer direction of approach to a data point # from the nearest or next-nearest network node (calculated once globally, prior to local_landuses method) reachable_data, reachable_data_dist, tree_preds = aggregate_to_src_idx( netw_src_idx, node_data, edge_data, node_edge_map, data_map, global_max_dist, jitter_scale=jitter_scale, angular=angular) # LANDUSES mu_max_unique_cl = int(landuse_encodings.max() + 1) # counts of each class type (array length per max unique classes - not just those within max distance) classes_counts = np.full((d_n, mu_max_unique_cl), 0) # nearest of each class type (likewise) classes_nearest = np.full((d_n, mu_max_unique_cl), np.inf) # iterate the reachable indices and related distances for data_idx, (reachable, data_dist) in enumerate( zip(reachable_data, reachable_data_dist)): if not reachable: continue # get the class category in integer form # all class codes were encoded to sequential integers - these correspond to the array indices cl_code = int(landuse_encodings[int(data_idx)]) # iterate the distance dimensions for d_idx, (d, b) in enumerate(zip(distances, betas)): # increment class counts at respective distances if the distance is less than current d if data_dist <= d: classes_counts[d_idx, cl_code] += 1 # if distance is nearer, update the nearest distance array too if data_dist < classes_nearest[d_idx, cl_code]: classes_nearest[d_idx, cl_code] = data_dist # if within distance, and if in accessibility keys, then aggregate accessibility too for ac_idx, ac_code in enumerate(accessibility_keys): if ac_code == cl_code: accessibility_data[ac_idx, d_idx, netw_src_idx] += 1 accessibility_data_wt[ac_idx, d_idx, netw_src_idx] += np.exp( -b * data_dist) # if a match was found, then no need to check others break # mixed uses can be calculated now that the local class counts are aggregated # iterate the distances and betas for d_idx, b in enumerate(betas): cl_counts = classes_counts[d_idx] cl_nearest = classes_nearest[d_idx] # mu keys determine which metrics to compute # don't confuse with indices # previously used dynamic indices in data structures - but obtuse if irregularly ordered keys for mu_hill_key in mixed_use_hill_keys: for q_idx, q_key in enumerate(qs): if mu_hill_key == 0: mixed_use_hill_data[0, q_idx, d_idx, netw_src_idx] = \ diversity.hill_diversity(cl_counts, q_key) elif mu_hill_key == 1: mixed_use_hill_data[1, q_idx, d_idx, netw_src_idx] = \ diversity.hill_diversity_branch_distance_wt(cl_counts, cl_nearest, q=q_key, beta=b) elif mu_hill_key == 2: mixed_use_hill_data[2, q_idx, d_idx, netw_src_idx] = \ diversity.hill_diversity_pairwise_distance_wt(cl_counts, cl_nearest, q=q_key, beta=b) # land-use classification disparity hill diversity # the wt matrix can be used without mapping because cl_counts is based on all classes # regardless of whether they are reachable elif mu_hill_key == 3: mixed_use_hill_data[3, q_idx, d_idx, netw_src_idx] = \ diversity.hill_diversity_pairwise_matrix_wt(cl_counts, wt_matrix=cl_disparity_wt_matrix, q=q_key) for mu_other_key in mixed_use_other_keys: if mu_other_key == 0: mixed_use_other_data[0, d_idx, netw_src_idx] = \ diversity.shannon_diversity(cl_counts) elif mu_other_key == 1: mixed_use_other_data[1, d_idx, netw_src_idx] = \ diversity.gini_simpson_diversity(cl_counts) elif mu_other_key == 2: mixed_use_other_data[2, d_idx, netw_src_idx] = \ diversity.raos_quadratic_diversity(cl_counts, wt_matrix=cl_disparity_wt_matrix) # send the data back in the same types and same order as the original keys - convert to int for indexing mu_hill_k_int = np.full(len(mixed_use_hill_keys), 0) for i, k in enumerate(mixed_use_hill_keys): mu_hill_k_int[i] = k mu_other_k_int = np.full(len(mixed_use_other_keys), 0) for i, k in enumerate(mixed_use_other_keys): mu_other_k_int[i] = k return mixed_use_hill_data[mu_hill_k_int], \ mixed_use_other_data[mu_other_k_int], \ accessibility_data, \ accessibility_data_wt