Пример #1
0
    def _calculate(self, threshold, is_regression=False):
        num_nodes = len(self._gnx)
        directed_dists = dict(weighted.all_pairs_dijkstra_path_length(self._gnx, num_nodes, weight='weight'))
        undirected_dists = dict(
            weighted.all_pairs_dijkstra_path_length(self._gnx.to_undirected(), num_nodes, weight='weight'))

        # calculate the number of nodes reachable to/ from node 'n'
        b_u = {node: len(set(nx.ancestors(self._gnx, node)).union(nx.descendants(self._gnx, node)))
               for node in self._gnx}
        max_b_u = float(max(b_u.values()))

        for node in self._gnx:
            # the delta determines whether this node is to be considered
            if (b_u[node] / max_b_u) <= self._threshold:
                self._features[node] = 0
                continue

            udists = undirected_dists[node]
            dists = directed_dists[node]

            # getting coordinated values from two dictionaries with the same keys
            # saving the data as np.array type
            num, denom = map(np.array, zip(*((udists[n], dists[n]) for n in dists)))

            num = num[denom != 0]
            denom = denom[denom != 0]

            self._features[node] = np.sum(num / denom) / float(b_u[node])
Пример #2
0
def calculate_flow_index(gnx, threshold):
    flow_list = {}
    nodes = gnx.nodes()
    gnx_without_direction = gnx.to_undirected()
    max_b_v = 0
    for n in nodes:
        b_v = len(nx.ancestors(gnx_without_direction, n))
        if (b_v > max_b_v):
            max_b_v = b_v
    for n in nodes:
        b_u = len(nx.ancestors(gnx_without_direction, n))
        frac_but = weight.all_pairs_dijkstra_path_length(gnx,
                                                         b_u,
                                                         weight='weight')
        frac_top = weight.all_pairs_dijkstra_path_length(gnx_without_direction,
                                                         b_u,
                                                         weight='weight')
        vet_sum = 0
        for k in nodes:
            if (k in frac_but[n]):
                if (frac_but[n][k] != 0 and float(b_u) / max_b_v > threshold):
                    vet_sum += frac_top[n][k] / frac_but[n][k]
        flow_node = float(vet_sum) / b_u
        flow_list[n] = flow_node

    return flow_list
Пример #3
0
    def _initialize_attraction_basin_dist(self):
        ab_in_dist = {}
        ab_out_dist = {}

        # for each node we are calculating the the out and in distances for the other nodes in the graph
        dists = dict(
            weighted.all_pairs_dijkstra_path_length(self._gnx,
                                                    len(self._gnx),
                                                    weight='weight'))
        for node in self._gnx:
            if node not in dists:
                continue

            node_dists = dists[node]
            count_out_dist = Counter(
                [node_dists.get(d) for d in nx.descendants(self._gnx, node)])
            count_in_dist = Counter([
                dists.get(d, {}).get(node)
                for d in nx.ancestors(self._gnx, node)
            ])
            count_out_dist.pop(None, None)
            count_in_dist.pop(None, None)

            ab_out_dist[node] = count_out_dist
            ab_in_dist[node] = count_in_dist

        return ab_out_dist, ab_in_dist
Пример #4
0
def gdist(replist, path_method='dijkstra', filename='pctot_raw.p'):
    """
    Parameters
    ----------
    replist (rep): LIST. List of representatives in each sub-network.
    path_method : CATEGORICAL ('dijkstra', 'bellman_ford'), default = 'dijkstra'.
    filename : STRING, default = 'pctot_raw.p'. Filename of raw corr coeff matrix.

    Returns
    -------
    gr_dist (inter_group_dist): DICT. Gives group-to-group distances.
    """

    # LOAD RAW CORR MATRIX
    pctot_raw = pickle.load(open("pctot_raw.p", "rb"))
    # FILTER GROUP REPRESENTATIVES
    pc_group  = pctot_raw[replist, :][:, replist]
    pc_group  = np.absolute(pc_group)
    # FORM INTERGROUP DISTANCE
    nx_group  = nx.from_numpy_array(pc_group)
    if path_method == 'dijkstra':
        gr_dist = dict(spw.all_pairs_dijkstra_path_length(nx_group))
    elif path_method == 'bellman_ford':
        gr_dist = dict(spw.all_pairs_bellman_ford_path_length(nx_group))
    else:
        raise ValueError('Invalid shortest path algorithm.')
        return
    return gr_dist
Пример #5
0
 def __init__(self, network: UndirectedStreetNetwork, **kwargs):
     """See class documentation."""
     assert type(network) is UndirectedStreetNetwork
     self.street_network = network
     self.states = list(set(map(lambda x: tuple(sorted(x)), network.graph.edges.keys())))
     self.shortest_paths = {
         source: target_dict
         for source, target_dict in all_pairs_dijkstra_path(
             self.street_network.graph, weight="length"
         )
     }
     self.shortest_path_dictionary = {
         source: target_dict
         for source, target_dict in all_pairs_dijkstra_path_length(
             self.street_network.graph, weight="length"
         )
     }
     if "gamma" in kwargs:
         self.gamma = kwargs["gamma"]
     else:
         self.gamma = 0.01
     if "sigma" in kwargs:
         self.sigma = kwargs["sigma"]
     else:
         self.sigma = 1
Пример #6
0
 def __init__(self, network, **kwargs):
     assert type(network) is DirectedStreetNetwork
     self.street_network = network
     self.create_states()
     self.shortest_path_dictionary = {
         source: target_dict
         for source, target_dict in all_pairs_dijkstra_path_length(
             network.graph, weight="length"
         )
     }
     self.shortest_paths = {
         source: target_dict
         for source, target_dict in all_pairs_dijkstra_path(
             self.street_network.graph, weight="length"
         )
     }
     self.compute_legal_transitions()
     if "gamma" in kwargs:
         self.gamma = kwargs["gamma"]
     else:
         self.gamma = 0.01
     if "sigma" in kwargs:
         self.sigma = kwargs["sigma"]
     else:
         self.sigma = 1
Пример #7
0
def calculate_flow_index(gnx):
    flow_list = {}
    nodes = gnx.nodes()
    gnx_without_direction=gnx.to_undirected()
    for n in nodes:
        b_u=len(nx.ancestors(gnx_without_direction,n))
        frac_but=weight.all_pairs_dijkstra_path_length(gnx, b_u, weight='weight')
        frac_top=weight.all_pairs_dijkstra_path_length(gnx_without_direction, b_u, weight='weight')
        vet_sum = 0
        for k in nodes:
            if (k in frac_but[n]):
                if (frac_but[n][k] != 0):
                    vet_sum+=frac_top[n][k]/frac_but[n][k]
        flow_node = vet_sum/b_u
        flow_list[n] = flow_node

    return flow_list
def model_network_with_linreg(n: any) -> list:
	"""
	Model every node in the water network with an sklearn.linear_regression() model based on readings from 
	n closest sensors
	Arguments:	n - amount of closest sensors to read data from. Uses n of each, eg. if n=1 readings from the closest
				flowmeter AND the closest pressure meter are used (int or str 'all')
	Returns:	list of dicts describing each node in the network {'node', 'regression', 'msq', 'r2'}
															where:	node - node name (str)
																	regression - sklearn regression object
																	msq - mean squared error (float)
																	r2 - R2 score (float)
																	sensors - list of strings, names of sensors used
	"""
	print("Modeling the network with linreg (linear_regression.py - model_network_with_linregression())")

	# collect data
	G = ng.create_graph()																							# get NetworkX graphs
	amount_of_junctions = len(G.nodes())																			# for printing progress
	amount_of_pipes = len(G.edges())																				# for printing progress
	shortest_paths = list(all_pairs_dijkstra_path_length(G))														# get shortest distances between all node pairs
	sim_results = wntr_WSN.get_sim_results()																		# no leak simulation results
	all_pressure_readings, all_flowrate_readings = get_all_sensor_readings(sim_results)								# extract readings for sensors we have

	# preprocess X data
	data_X = pd.concat([all_pressure_readings, all_flowrate_readings], axis=1, join='inner')						# join readings into a single pandas DF
	data_X = data_X.loc[:, ~data_X.columns.duplicated()]															# remove duplicate columns
	normalization_params = {'mean': data_X.mean(), 'std': data_X.std()}												# collect normalization params
	data_X = (data_X - normalization_params['mean']) / normalization_params['std']									# normalise X set

	# for each node in graph create a separate sklearn linear_regression() object
	linreg_models = []
	for i, node in enumerate(G.nodes()):
		print(f'\tModeling junction {node}: ({i+1} out of {amount_of_junctions})')									# printing progress
		data_X_scope = data_X 																						# copy of data_X inside for scope
		sensors = ng.get_closest_sensors(G=G, 																		# retrieve sensors closest to node
										 central_node=node,
										 n=n,																		# nr of sensors 
										 shortest_paths=shortest_paths)
		sensors_names = [i[0] for i in sensors]																		# and flowrate readings, get rid of distances

		data_X_scope = data_X_scope[sensors_names]																	# extract data for the closest sensors

		data_y = sim_results.node['pressure'][node]																	# get y data for supervised learning
		train_X, test_X, train_y, test_y = train_test_split(data_X_scope,											# split and shuffle dataset
												 		  	data_y,
												 		  	test_size=0.1)

		linreg_models.append(create_linreg_model(node, train_X, test_X, train_y, test_y))							# model the node and add it to the list

	return linreg_models, normalization_params
Пример #9
0
def floyd_warshall_highway():
    """Generate Floyd-Warshall results with MA highway data."""
    from ch07.tmg_load import tmg_load, highway_map
    from ch07.dependencies import plt_error

    if not plt_error:
        (G, positions, _) = tmg_load(highway_map())
        from networkx.algorithms.shortest_paths.dense import floyd_warshall
        print('This might take awhile')
        start_fw_time = time.time()
        dist_to = floyd_warshall(G, weight='weight')
        longest_so_far = 0
        start = -1
        end = -1
        for i in range(G.number_of_nodes()):
            for j in range(i + 1, G.number_of_nodes()):
                if dist_to[i][j] > longest_so_far:
                    longest_so_far = dist_to[i][j]
                    start = i
                    end = j
        end_fw_time = time.time()
        print(
            'start {} to end {} in longest shortest distance {} in time {:.3f} seconds'
            .format(positions[start], positions[end], longest_so_far,
                    end_fw_time - start_fw_time))

        # so much faster since graph is sparse
        from networkx.algorithms.shortest_paths.weighted import all_pairs_dijkstra_path_length
        start_time = time.time()
        dist_to = dict(all_pairs_dijkstra_path_length(G))

        longest_so_far = 0
        start = -1
        end = -1
        for i in range(G.number_of_nodes()):
            for j in range(i + 1, G.number_of_nodes()):
                if dist_to[i][j] > longest_so_far:
                    longest_so_far = dist_to[i][j]
                    start = i
                    end = j
        end_time = time.time()
        print(
            'start {} to end {} in longest shortest distance {} in time {:.3f} seconds'
            .format(positions[start], positions[end], longest_so_far,
                    end_time - start_time))
def initialize_attraction_basin_dist(gnx):
    attractor_basin_out_dist, attractor_basin_in_dist, avg_in, avg_out = initialize_variables(
        gnx)
    ####for each node we are calculating the the out and in distances for the other nodes in the graph
    # print(gnx.nodes())
    dists = weight.all_pairs_dijkstra_path_length(gnx,
                                                  len(gnx.nodes()),
                                                  weight='weight')
    for n in gnx.nodes():
        try:
            count_out_dist = {}
            count_in_dist = {}
            out_nodes = nx.descendants(gnx, n)
            in_nodes = nx.ancestors(gnx, n)
            for d in out_nodes:
                if dists[n][d] not in count_out_dist.keys():
                    count_out_dist[dists[n][d]] = 1
                else:
                    count_out_dist[dists[n][d]] += 1
            for d in in_nodes:
                if dists[d][n] not in count_in_dist.keys():
                    count_in_dist[dists[d][n]] = 1
                else:
                    count_in_dist[dists[d][n]] += 1
            attractor_basin_out_dist.append(count_out_dist)
            count_out_dist = {
            }  ####clearing "count_out_dist" for the next node in the loop
            attractor_basin_in_dist.append(count_in_dist)
            count_in_dist = {
            }  ####clearing "count_in_dist" for the next node in the loop
        except:
            attractor_basin_out_dist.append({})
            attractor_basin_in_dist.append({})
    ####calculte "avg_out" and "avg_in" for each distance from the details of all the nodes
    avg_out = calc_avg_for_dist(len(gnx.nodes()), attractor_basin_out_dist)
    avg_in = calc_avg_for_dist(len(gnx.nodes()), attractor_basin_in_dist)
    attractor_basin_details = [
        attractor_basin_out_dist, avg_out, attractor_basin_in_dist, avg_in
    ]
    return attractor_basin_details
Пример #11
0
    def _active_learning(self, eps=0.05, batch_size=5, epochs=200, out_prog=True, clear_model=False, out_interval=25,
                         **params):

        if self._opt not in Available_Options:
            print("option {} is not supported".format(self._opt))
            return

        # preliminary calculation for exploitation
        neighbors_matrix = None
        if self._opt in {'region_entropy'} or params.get('representation_region', False):
            gn = GraphNeighbors(self._graph)
            neighbors_matrix = gn.neighbors(second_order=params.get('region_second_order', False),
                                            self_node=params.get('region_include_self', False),
                                            with_orders=False, only_out=params.get('region_only_out', False))

            if params.get('region_weights', None):
                if params['region_weights'] == 'out':
                    neighbors_degree = np.array(self._graph.out_degree(self._nodes_order))[:, 1]
                elif params['region_weights'] == 'in':
                    neighbors_degree = np.array(self._graph.in_degree(self._nodes_order))[:, 1]
                else:
                    neighbors_degree = np.array(self._graph.degree(self._nodes_order))[:, 1]

                neighbors_degree = neighbors_degree.astype(float)
                for x in neighbors_matrix:
                    for j in range(len(x) - 1, -1, -1):
                        if neighbors_degree[x[j]] == 0:
                            del x[j]
                if params.get('region_opposite_weights', False):
                    weights = neighbors_degree
                else:
                    weights = 1 / neighbors_degree
                    weights[weights == np.inf] = 0
            else:
                weights = None

        if self._opt in {'centrality', 'Chang', 'geo_cent', 'APR'}:
            page_rank_feature = {"page_rank": FeatureMeta(PageRankCalculator, {"pr"})}
            page_rank_matrix = self._gl.get_features(page_rank_feature)
            norm_page_rank = normalize(page_rank_matrix, axis=0, norm='max').reshape(-1)
            if self._opt == 'APR':
                adj_matrix = nx.adjacency_matrix(self._graph, nodelist=self._nodes_order).todense()
                np.fill_diagonal(adj_matrix, 0)
                adj_matrix = adj_matrix / adj_matrix.sum(axis=1)
                adj_matrix = np.nan_to_num(adj_matrix)
                adj_matrix = adj_matrix.T
                if torch.cuda.is_available():
                    page_rank_matrix = torch.Tensor(page_rank_matrix).to(self._device)
                    adj_matrix = torch.Tensor(adj_matrix).to(self._device)

        if self._opt in {'rep_dist', 'Chang'}:
            inv_cov = None
            features_matrix = None
            if params.get('representation_vectors', 'model') in {'topology', 'both'}:
                rep_features = CHOSEN_FEATURES.copy()
                if not params.get('representation_motif4', False):
                    rep_features.pop('motif4')
                features_matrix = self._gl.get_features(rep_features, print_time=True)
                # replace all nan values of attractor basin to 100
                features_matrix[np.isnan(features_matrix)] = 100
                if params.get('representation_measure', 'mahalanobis') == 'mahalanobis' and \
                        params['representation_vectors'] == 'topology':
                    inv_cov = np.linalg.pinv(np.cov(features_matrix.T))

        if self._opt in {'geo_dist', 'geo_cent'}:
            graph_dists = dict(weighted.all_pairs_dijkstra_path_length(self._graph, self._num_nodes, weight='weight'))
            ancestors = [nx.ancestors(self._graph, node) for node in sorted(self._graph)]
            descendants = [nx.descendants(self._graph, node) for node in sorted(self._graph)]

        if self._opt == 'k_truss':
            k_truss_score = k_truss(self._graph)

        if self._opt == 'feature':
            if params.get('feature', 'attractor_basin') == 'attractor_basin':
                chosen_feature = {"attractor_basin": FeatureMeta(AttractorBasinCalculator, {"ab"})}
            feature_score = self._gl.get_features(chosen_feature)

        # initializing parameters for plots
        if out_prog:
            results = {0: self._init(n_nodes=1)}
            prog_intervals = out_interval
            prog_iter = 1
            cur_interval = 1
        else:
            self._init()
            results = {}

        while self._num_revealed < self._stop_cond:
            # making output for plot
            if out_prog and self._num_revealed >= cur_interval/prog_intervals*self._stop_cond:
                res = self._train_and_eval(epochs, iterations=prog_iter, clear_model=clear_model)
                percent_revealed = round(self._num_revealed / self._num_nodes * 100, 2)
                results[percent_revealed] = res
                val = list(res.values())[0]
                self._model_runner.data_logger.info(self.eval_method, percent_revealed,
                                                    val["loss"], val["acc"], val["mic_f1"], val["mac_f1"])
                cur_interval = np.floor(self._num_revealed/self._stop_cond*prog_intervals) + 1
            elif self._opt in {'entropy', 'region_entropy', 'rep_dist', 'Chang'}:
                self._train_and_eval(epochs, clear_model=clear_model, verbose=0)

            # eps greedy algorithm choosing nodes to reveal
            rand = np.random.uniform(0, 1)
            # Explore
            if rand < eps or self._opt == 'random':
                idx = self._explore(batch_size=batch_size, balance=params.get('balance', False))
            # Exploit
            else:
                # evaluate the samples
                if self._opt in {'rep_dist', 'Chang'}:
                    outlier_score = self._representation_distance(vectors_type=params.get('representation_vectors',
                                                                                          'model'),
                                                                  dist_measure=params.get('representation_measure',
                                                                                          'mahalanobis'),
                                                                  features_matrix=features_matrix, inv_cov=inv_cov,
                                                                  average_distance=params.get('representation_average',
                                                                                              False),
                                                                  region=params.get('representation_region', False),
                                                                  neighbors_matrix=neighbors_matrix)

                if self._opt in {'entropy', 'Chang'}:
                    test_probs = self._model_runner.predict_proba(self._models).cpu()
                    if params.get('margin', False):
                        test_probs.sort()
                        entropies = test_probs[:, -1] - test_probs[:, -2]
                    else:
                        entropies = entropy(test_probs.t())

                if self._opt in {'region_entropy'}:
                    region_entropies = self._region_entropies(neighbors_matrix=neighbors_matrix, weights=weights,
                                                              average_entropy=params.get('region_average_entropy',
                                                                                         False),
                                                              prefer_large_regions=params.get('region_prefer_large',
                                                                                              False),
                                                              margin=params.get('margin', False))

                if self._opt in {'geo_dist', 'geo_cent'}:
                    dist_to_train = self._geodesic_distance(graph_dists, ancestors, descendants,
                                                            in_out=params.get('geo_in_out', 'both'))

                if self._opt == 'APR':
                    relative_pr = self._relative_page_rank(page_rank_matrix, adj_matrix,
                                                           only_known=params.get('apr_only_known', False))

                # build scores vector according to the evaluation method
                if self._opt == 'entropy':
                    scores = entropies
                elif self._opt == 'region_entropy':
                    scores = region_entropies
                elif self._opt == 'rep_dist':
                    scores = outlier_score
                elif self._opt == 'geo_dist':
                    scores = dist_to_train
                elif self._opt == 'centrality':
                    scores = np.asarray([page_rank_matrix[x] for x in self._pool]).reshape(-1)
                elif self._opt == 'k_truss':
                    scores = np.asarray([k_truss_score[x] for x in self._pool]).reshape(-1)
                elif self._opt == 'feature':
                    scores = np.asarray([feature_score[x] for x in self._pool]).reshape(-1)
                elif self._opt == 'APR':
                    scores = relative_pr.flat
                elif self._opt == 'Chang':
                    c3 = max(0.7 * (1 - self._num_revealed/self._stop_cond), 0)
                    c1 = c2 = (1 - c3) / 2

                    density = normalize(np.nan_to_num(outlier_score).reshape(1, -1)).reshape(-1)
                    norm_entropy = normalize(entropies.reshape(1, -1)).reshape(-1)
                    centrality = np.asarray([norm_page_rank[x] for x in self._pool]).reshape(-1)

                    scores = c1 * norm_entropy + c2 * density + c3 * centrality
                elif self._opt == 'geo_cent':
                    c1 = 0.7
                    c2 = 0.3

                    g_dists = normalize(dist_to_train.reshape(1, -1), norm='max').reshape(-1)
                    centrality = np.vstack(norm_page_rank[x] for x in self._pool).reshape(-1)

                    scores = c1 * g_dists + c2 * centrality

                if params.get('margin', False):
                    # choose the lowest scores nodes
                    idx = np.argpartition(scores, batch_size)[:batch_size]
                else:
                    # choose the best nodes
                    idx = np.argpartition(scores, -batch_size)[-batch_size:]

            self._reveal(idx)

        # evaluate the model
        res = self._train_and_eval(epochs, clear_model=clear_model)
        percent_revealed = round(self._num_revealed / self._num_nodes * 100, 2)
        results[percent_revealed] = res
        val = list(res.values())[0]
        self._model_runner.data_logger.info(self.eval_method, percent_revealed,
                                            val["loss"], val["acc"], val["mic_f1"], val["mac_f1"])

        return results
Пример #12
0
#!/usr/bin/env python
"""dijkstra.py: all_pairs_dijkstra_path demonstration
"""

import networkx as nx
from networkx.algorithms.shortest_paths.weighted import all_pairs_dijkstra_path_length

# Make a graph.
G = nx.DiGraph()

# Define edges with weights.
G.add_weighted_edges_from(
    ((0, 1, 10.0),
     (0, 2, 14.0),
     (0, 3, 12.0),
     (1, 2, 8.0),
     (1, 4, 19.0),
     (2, 3, 7.0),
     (2, 5, 22.0),
     (3, 5, 21.0),
     (4, 5, 11.0),))

# Compute the shortest path lengths between all nodes in graph G.
all_pairs = all_pairs_dijkstra_path_length(G)
for source, mapping in all_pairs:
    for target in mapping.keys():
        if source != target:
            dist = mapping[target]
            print(f"({source}, {target}): {dist:4.1f}")
Пример #13
0
    pressure_sensors_sorted_by_distance = sorted(
        pressure_sensors_with_distance, key=lambda sensor: sensor[1])
    flow_sensors_sorted_by_distance = sorted(flow_sensors_with_distance,
                                             key=lambda sensor: sensor[1])
    sensors_sorted_by_distance = sorted(sensors, key=lambda sensor: sensor[1])

    if n == 'all':
        return sensors_sorted_by_distance  # return all sensors
    else:
        return sensors_sorted_by_distance[:n]  # return only n closest sensors


def get_sensor_names() -> dict:
    """
	Returns names of available sensors names (dict)
	Purpose is to get the value to another .py file
	"""
    return available_measurements


if __name__ == '__main__':

    G = create_graph()
    shortest_paths = list(all_pairs_dijkstra_path_length(G))
    a, b = get_closest_sensors(G, 'J63', 'all', shortest_paths)
    #print(b)
    #nx.draw(G, nx.get_node_attributes(G, 'pos'), node_size=20)
    #length = wn.query_link_attribute('length')
    #G = wn.get_graph(wn, link_weight=length)
    #plt.show()