def _calculate(self, threshold, is_regression=False): num_nodes = len(self._gnx) directed_dists = dict(weighted.all_pairs_dijkstra_path_length(self._gnx, num_nodes, weight='weight')) undirected_dists = dict( weighted.all_pairs_dijkstra_path_length(self._gnx.to_undirected(), num_nodes, weight='weight')) # calculate the number of nodes reachable to/ from node 'n' b_u = {node: len(set(nx.ancestors(self._gnx, node)).union(nx.descendants(self._gnx, node))) for node in self._gnx} max_b_u = float(max(b_u.values())) for node in self._gnx: # the delta determines whether this node is to be considered if (b_u[node] / max_b_u) <= self._threshold: self._features[node] = 0 continue udists = undirected_dists[node] dists = directed_dists[node] # getting coordinated values from two dictionaries with the same keys # saving the data as np.array type num, denom = map(np.array, zip(*((udists[n], dists[n]) for n in dists))) num = num[denom != 0] denom = denom[denom != 0] self._features[node] = np.sum(num / denom) / float(b_u[node])
def calculate_flow_index(gnx, threshold): flow_list = {} nodes = gnx.nodes() gnx_without_direction = gnx.to_undirected() max_b_v = 0 for n in nodes: b_v = len(nx.ancestors(gnx_without_direction, n)) if (b_v > max_b_v): max_b_v = b_v for n in nodes: b_u = len(nx.ancestors(gnx_without_direction, n)) frac_but = weight.all_pairs_dijkstra_path_length(gnx, b_u, weight='weight') frac_top = weight.all_pairs_dijkstra_path_length(gnx_without_direction, b_u, weight='weight') vet_sum = 0 for k in nodes: if (k in frac_but[n]): if (frac_but[n][k] != 0 and float(b_u) / max_b_v > threshold): vet_sum += frac_top[n][k] / frac_but[n][k] flow_node = float(vet_sum) / b_u flow_list[n] = flow_node return flow_list
def _initialize_attraction_basin_dist(self): ab_in_dist = {} ab_out_dist = {} # for each node we are calculating the the out and in distances for the other nodes in the graph dists = dict( weighted.all_pairs_dijkstra_path_length(self._gnx, len(self._gnx), weight='weight')) for node in self._gnx: if node not in dists: continue node_dists = dists[node] count_out_dist = Counter( [node_dists.get(d) for d in nx.descendants(self._gnx, node)]) count_in_dist = Counter([ dists.get(d, {}).get(node) for d in nx.ancestors(self._gnx, node) ]) count_out_dist.pop(None, None) count_in_dist.pop(None, None) ab_out_dist[node] = count_out_dist ab_in_dist[node] = count_in_dist return ab_out_dist, ab_in_dist
def gdist(replist, path_method='dijkstra', filename='pctot_raw.p'): """ Parameters ---------- replist (rep): LIST. List of representatives in each sub-network. path_method : CATEGORICAL ('dijkstra', 'bellman_ford'), default = 'dijkstra'. filename : STRING, default = 'pctot_raw.p'. Filename of raw corr coeff matrix. Returns ------- gr_dist (inter_group_dist): DICT. Gives group-to-group distances. """ # LOAD RAW CORR MATRIX pctot_raw = pickle.load(open("pctot_raw.p", "rb")) # FILTER GROUP REPRESENTATIVES pc_group = pctot_raw[replist, :][:, replist] pc_group = np.absolute(pc_group) # FORM INTERGROUP DISTANCE nx_group = nx.from_numpy_array(pc_group) if path_method == 'dijkstra': gr_dist = dict(spw.all_pairs_dijkstra_path_length(nx_group)) elif path_method == 'bellman_ford': gr_dist = dict(spw.all_pairs_bellman_ford_path_length(nx_group)) else: raise ValueError('Invalid shortest path algorithm.') return return gr_dist
def __init__(self, network: UndirectedStreetNetwork, **kwargs): """See class documentation.""" assert type(network) is UndirectedStreetNetwork self.street_network = network self.states = list(set(map(lambda x: tuple(sorted(x)), network.graph.edges.keys()))) self.shortest_paths = { source: target_dict for source, target_dict in all_pairs_dijkstra_path( self.street_network.graph, weight="length" ) } self.shortest_path_dictionary = { source: target_dict for source, target_dict in all_pairs_dijkstra_path_length( self.street_network.graph, weight="length" ) } if "gamma" in kwargs: self.gamma = kwargs["gamma"] else: self.gamma = 0.01 if "sigma" in kwargs: self.sigma = kwargs["sigma"] else: self.sigma = 1
def __init__(self, network, **kwargs): assert type(network) is DirectedStreetNetwork self.street_network = network self.create_states() self.shortest_path_dictionary = { source: target_dict for source, target_dict in all_pairs_dijkstra_path_length( network.graph, weight="length" ) } self.shortest_paths = { source: target_dict for source, target_dict in all_pairs_dijkstra_path( self.street_network.graph, weight="length" ) } self.compute_legal_transitions() if "gamma" in kwargs: self.gamma = kwargs["gamma"] else: self.gamma = 0.01 if "sigma" in kwargs: self.sigma = kwargs["sigma"] else: self.sigma = 1
def calculate_flow_index(gnx): flow_list = {} nodes = gnx.nodes() gnx_without_direction=gnx.to_undirected() for n in nodes: b_u=len(nx.ancestors(gnx_without_direction,n)) frac_but=weight.all_pairs_dijkstra_path_length(gnx, b_u, weight='weight') frac_top=weight.all_pairs_dijkstra_path_length(gnx_without_direction, b_u, weight='weight') vet_sum = 0 for k in nodes: if (k in frac_but[n]): if (frac_but[n][k] != 0): vet_sum+=frac_top[n][k]/frac_but[n][k] flow_node = vet_sum/b_u flow_list[n] = flow_node return flow_list
def model_network_with_linreg(n: any) -> list: """ Model every node in the water network with an sklearn.linear_regression() model based on readings from n closest sensors Arguments: n - amount of closest sensors to read data from. Uses n of each, eg. if n=1 readings from the closest flowmeter AND the closest pressure meter are used (int or str 'all') Returns: list of dicts describing each node in the network {'node', 'regression', 'msq', 'r2'} where: node - node name (str) regression - sklearn regression object msq - mean squared error (float) r2 - R2 score (float) sensors - list of strings, names of sensors used """ print("Modeling the network with linreg (linear_regression.py - model_network_with_linregression())") # collect data G = ng.create_graph() # get NetworkX graphs amount_of_junctions = len(G.nodes()) # for printing progress amount_of_pipes = len(G.edges()) # for printing progress shortest_paths = list(all_pairs_dijkstra_path_length(G)) # get shortest distances between all node pairs sim_results = wntr_WSN.get_sim_results() # no leak simulation results all_pressure_readings, all_flowrate_readings = get_all_sensor_readings(sim_results) # extract readings for sensors we have # preprocess X data data_X = pd.concat([all_pressure_readings, all_flowrate_readings], axis=1, join='inner') # join readings into a single pandas DF data_X = data_X.loc[:, ~data_X.columns.duplicated()] # remove duplicate columns normalization_params = {'mean': data_X.mean(), 'std': data_X.std()} # collect normalization params data_X = (data_X - normalization_params['mean']) / normalization_params['std'] # normalise X set # for each node in graph create a separate sklearn linear_regression() object linreg_models = [] for i, node in enumerate(G.nodes()): print(f'\tModeling junction {node}: ({i+1} out of {amount_of_junctions})') # printing progress data_X_scope = data_X # copy of data_X inside for scope sensors = ng.get_closest_sensors(G=G, # retrieve sensors closest to node central_node=node, n=n, # nr of sensors shortest_paths=shortest_paths) sensors_names = [i[0] for i in sensors] # and flowrate readings, get rid of distances data_X_scope = data_X_scope[sensors_names] # extract data for the closest sensors data_y = sim_results.node['pressure'][node] # get y data for supervised learning train_X, test_X, train_y, test_y = train_test_split(data_X_scope, # split and shuffle dataset data_y, test_size=0.1) linreg_models.append(create_linreg_model(node, train_X, test_X, train_y, test_y)) # model the node and add it to the list return linreg_models, normalization_params
def floyd_warshall_highway(): """Generate Floyd-Warshall results with MA highway data.""" from ch07.tmg_load import tmg_load, highway_map from ch07.dependencies import plt_error if not plt_error: (G, positions, _) = tmg_load(highway_map()) from networkx.algorithms.shortest_paths.dense import floyd_warshall print('This might take awhile') start_fw_time = time.time() dist_to = floyd_warshall(G, weight='weight') longest_so_far = 0 start = -1 end = -1 for i in range(G.number_of_nodes()): for j in range(i + 1, G.number_of_nodes()): if dist_to[i][j] > longest_so_far: longest_so_far = dist_to[i][j] start = i end = j end_fw_time = time.time() print( 'start {} to end {} in longest shortest distance {} in time {:.3f} seconds' .format(positions[start], positions[end], longest_so_far, end_fw_time - start_fw_time)) # so much faster since graph is sparse from networkx.algorithms.shortest_paths.weighted import all_pairs_dijkstra_path_length start_time = time.time() dist_to = dict(all_pairs_dijkstra_path_length(G)) longest_so_far = 0 start = -1 end = -1 for i in range(G.number_of_nodes()): for j in range(i + 1, G.number_of_nodes()): if dist_to[i][j] > longest_so_far: longest_so_far = dist_to[i][j] start = i end = j end_time = time.time() print( 'start {} to end {} in longest shortest distance {} in time {:.3f} seconds' .format(positions[start], positions[end], longest_so_far, end_time - start_time))
def initialize_attraction_basin_dist(gnx): attractor_basin_out_dist, attractor_basin_in_dist, avg_in, avg_out = initialize_variables( gnx) ####for each node we are calculating the the out and in distances for the other nodes in the graph # print(gnx.nodes()) dists = weight.all_pairs_dijkstra_path_length(gnx, len(gnx.nodes()), weight='weight') for n in gnx.nodes(): try: count_out_dist = {} count_in_dist = {} out_nodes = nx.descendants(gnx, n) in_nodes = nx.ancestors(gnx, n) for d in out_nodes: if dists[n][d] not in count_out_dist.keys(): count_out_dist[dists[n][d]] = 1 else: count_out_dist[dists[n][d]] += 1 for d in in_nodes: if dists[d][n] not in count_in_dist.keys(): count_in_dist[dists[d][n]] = 1 else: count_in_dist[dists[d][n]] += 1 attractor_basin_out_dist.append(count_out_dist) count_out_dist = { } ####clearing "count_out_dist" for the next node in the loop attractor_basin_in_dist.append(count_in_dist) count_in_dist = { } ####clearing "count_in_dist" for the next node in the loop except: attractor_basin_out_dist.append({}) attractor_basin_in_dist.append({}) ####calculte "avg_out" and "avg_in" for each distance from the details of all the nodes avg_out = calc_avg_for_dist(len(gnx.nodes()), attractor_basin_out_dist) avg_in = calc_avg_for_dist(len(gnx.nodes()), attractor_basin_in_dist) attractor_basin_details = [ attractor_basin_out_dist, avg_out, attractor_basin_in_dist, avg_in ] return attractor_basin_details
def _active_learning(self, eps=0.05, batch_size=5, epochs=200, out_prog=True, clear_model=False, out_interval=25, **params): if self._opt not in Available_Options: print("option {} is not supported".format(self._opt)) return # preliminary calculation for exploitation neighbors_matrix = None if self._opt in {'region_entropy'} or params.get('representation_region', False): gn = GraphNeighbors(self._graph) neighbors_matrix = gn.neighbors(second_order=params.get('region_second_order', False), self_node=params.get('region_include_self', False), with_orders=False, only_out=params.get('region_only_out', False)) if params.get('region_weights', None): if params['region_weights'] == 'out': neighbors_degree = np.array(self._graph.out_degree(self._nodes_order))[:, 1] elif params['region_weights'] == 'in': neighbors_degree = np.array(self._graph.in_degree(self._nodes_order))[:, 1] else: neighbors_degree = np.array(self._graph.degree(self._nodes_order))[:, 1] neighbors_degree = neighbors_degree.astype(float) for x in neighbors_matrix: for j in range(len(x) - 1, -1, -1): if neighbors_degree[x[j]] == 0: del x[j] if params.get('region_opposite_weights', False): weights = neighbors_degree else: weights = 1 / neighbors_degree weights[weights == np.inf] = 0 else: weights = None if self._opt in {'centrality', 'Chang', 'geo_cent', 'APR'}: page_rank_feature = {"page_rank": FeatureMeta(PageRankCalculator, {"pr"})} page_rank_matrix = self._gl.get_features(page_rank_feature) norm_page_rank = normalize(page_rank_matrix, axis=0, norm='max').reshape(-1) if self._opt == 'APR': adj_matrix = nx.adjacency_matrix(self._graph, nodelist=self._nodes_order).todense() np.fill_diagonal(adj_matrix, 0) adj_matrix = adj_matrix / adj_matrix.sum(axis=1) adj_matrix = np.nan_to_num(adj_matrix) adj_matrix = adj_matrix.T if torch.cuda.is_available(): page_rank_matrix = torch.Tensor(page_rank_matrix).to(self._device) adj_matrix = torch.Tensor(adj_matrix).to(self._device) if self._opt in {'rep_dist', 'Chang'}: inv_cov = None features_matrix = None if params.get('representation_vectors', 'model') in {'topology', 'both'}: rep_features = CHOSEN_FEATURES.copy() if not params.get('representation_motif4', False): rep_features.pop('motif4') features_matrix = self._gl.get_features(rep_features, print_time=True) # replace all nan values of attractor basin to 100 features_matrix[np.isnan(features_matrix)] = 100 if params.get('representation_measure', 'mahalanobis') == 'mahalanobis' and \ params['representation_vectors'] == 'topology': inv_cov = np.linalg.pinv(np.cov(features_matrix.T)) if self._opt in {'geo_dist', 'geo_cent'}: graph_dists = dict(weighted.all_pairs_dijkstra_path_length(self._graph, self._num_nodes, weight='weight')) ancestors = [nx.ancestors(self._graph, node) for node in sorted(self._graph)] descendants = [nx.descendants(self._graph, node) for node in sorted(self._graph)] if self._opt == 'k_truss': k_truss_score = k_truss(self._graph) if self._opt == 'feature': if params.get('feature', 'attractor_basin') == 'attractor_basin': chosen_feature = {"attractor_basin": FeatureMeta(AttractorBasinCalculator, {"ab"})} feature_score = self._gl.get_features(chosen_feature) # initializing parameters for plots if out_prog: results = {0: self._init(n_nodes=1)} prog_intervals = out_interval prog_iter = 1 cur_interval = 1 else: self._init() results = {} while self._num_revealed < self._stop_cond: # making output for plot if out_prog and self._num_revealed >= cur_interval/prog_intervals*self._stop_cond: res = self._train_and_eval(epochs, iterations=prog_iter, clear_model=clear_model) percent_revealed = round(self._num_revealed / self._num_nodes * 100, 2) results[percent_revealed] = res val = list(res.values())[0] self._model_runner.data_logger.info(self.eval_method, percent_revealed, val["loss"], val["acc"], val["mic_f1"], val["mac_f1"]) cur_interval = np.floor(self._num_revealed/self._stop_cond*prog_intervals) + 1 elif self._opt in {'entropy', 'region_entropy', 'rep_dist', 'Chang'}: self._train_and_eval(epochs, clear_model=clear_model, verbose=0) # eps greedy algorithm choosing nodes to reveal rand = np.random.uniform(0, 1) # Explore if rand < eps or self._opt == 'random': idx = self._explore(batch_size=batch_size, balance=params.get('balance', False)) # Exploit else: # evaluate the samples if self._opt in {'rep_dist', 'Chang'}: outlier_score = self._representation_distance(vectors_type=params.get('representation_vectors', 'model'), dist_measure=params.get('representation_measure', 'mahalanobis'), features_matrix=features_matrix, inv_cov=inv_cov, average_distance=params.get('representation_average', False), region=params.get('representation_region', False), neighbors_matrix=neighbors_matrix) if self._opt in {'entropy', 'Chang'}: test_probs = self._model_runner.predict_proba(self._models).cpu() if params.get('margin', False): test_probs.sort() entropies = test_probs[:, -1] - test_probs[:, -2] else: entropies = entropy(test_probs.t()) if self._opt in {'region_entropy'}: region_entropies = self._region_entropies(neighbors_matrix=neighbors_matrix, weights=weights, average_entropy=params.get('region_average_entropy', False), prefer_large_regions=params.get('region_prefer_large', False), margin=params.get('margin', False)) if self._opt in {'geo_dist', 'geo_cent'}: dist_to_train = self._geodesic_distance(graph_dists, ancestors, descendants, in_out=params.get('geo_in_out', 'both')) if self._opt == 'APR': relative_pr = self._relative_page_rank(page_rank_matrix, adj_matrix, only_known=params.get('apr_only_known', False)) # build scores vector according to the evaluation method if self._opt == 'entropy': scores = entropies elif self._opt == 'region_entropy': scores = region_entropies elif self._opt == 'rep_dist': scores = outlier_score elif self._opt == 'geo_dist': scores = dist_to_train elif self._opt == 'centrality': scores = np.asarray([page_rank_matrix[x] for x in self._pool]).reshape(-1) elif self._opt == 'k_truss': scores = np.asarray([k_truss_score[x] for x in self._pool]).reshape(-1) elif self._opt == 'feature': scores = np.asarray([feature_score[x] for x in self._pool]).reshape(-1) elif self._opt == 'APR': scores = relative_pr.flat elif self._opt == 'Chang': c3 = max(0.7 * (1 - self._num_revealed/self._stop_cond), 0) c1 = c2 = (1 - c3) / 2 density = normalize(np.nan_to_num(outlier_score).reshape(1, -1)).reshape(-1) norm_entropy = normalize(entropies.reshape(1, -1)).reshape(-1) centrality = np.asarray([norm_page_rank[x] for x in self._pool]).reshape(-1) scores = c1 * norm_entropy + c2 * density + c3 * centrality elif self._opt == 'geo_cent': c1 = 0.7 c2 = 0.3 g_dists = normalize(dist_to_train.reshape(1, -1), norm='max').reshape(-1) centrality = np.vstack(norm_page_rank[x] for x in self._pool).reshape(-1) scores = c1 * g_dists + c2 * centrality if params.get('margin', False): # choose the lowest scores nodes idx = np.argpartition(scores, batch_size)[:batch_size] else: # choose the best nodes idx = np.argpartition(scores, -batch_size)[-batch_size:] self._reveal(idx) # evaluate the model res = self._train_and_eval(epochs, clear_model=clear_model) percent_revealed = round(self._num_revealed / self._num_nodes * 100, 2) results[percent_revealed] = res val = list(res.values())[0] self._model_runner.data_logger.info(self.eval_method, percent_revealed, val["loss"], val["acc"], val["mic_f1"], val["mac_f1"]) return results
#!/usr/bin/env python """dijkstra.py: all_pairs_dijkstra_path demonstration """ import networkx as nx from networkx.algorithms.shortest_paths.weighted import all_pairs_dijkstra_path_length # Make a graph. G = nx.DiGraph() # Define edges with weights. G.add_weighted_edges_from( ((0, 1, 10.0), (0, 2, 14.0), (0, 3, 12.0), (1, 2, 8.0), (1, 4, 19.0), (2, 3, 7.0), (2, 5, 22.0), (3, 5, 21.0), (4, 5, 11.0),)) # Compute the shortest path lengths between all nodes in graph G. all_pairs = all_pairs_dijkstra_path_length(G) for source, mapping in all_pairs: for target in mapping.keys(): if source != target: dist = mapping[target] print(f"({source}, {target}): {dist:4.1f}")
pressure_sensors_sorted_by_distance = sorted( pressure_sensors_with_distance, key=lambda sensor: sensor[1]) flow_sensors_sorted_by_distance = sorted(flow_sensors_with_distance, key=lambda sensor: sensor[1]) sensors_sorted_by_distance = sorted(sensors, key=lambda sensor: sensor[1]) if n == 'all': return sensors_sorted_by_distance # return all sensors else: return sensors_sorted_by_distance[:n] # return only n closest sensors def get_sensor_names() -> dict: """ Returns names of available sensors names (dict) Purpose is to get the value to another .py file """ return available_measurements if __name__ == '__main__': G = create_graph() shortest_paths = list(all_pairs_dijkstra_path_length(G)) a, b = get_closest_sensors(G, 'J63', 'all', shortest_paths) #print(b) #nx.draw(G, nx.get_node_attributes(G, 'pos'), node_size=20) #length = wn.query_link_attribute('length') #G = wn.get_graph(wn, link_weight=length) #plt.show()