def test_directed(self): """Tests that each pair of nodes in the directed graph is counted once when computing the Wiener index. """ G = complete_graph(3) H = DiGraph(G) eq_(2 * wiener_index(G), wiener_index(H))
def ustrezna_drevesa(sez): #funkcija, ki iz seznama dreves pobere tista, ki se jim index spremeni za ena pri zamenjavi ene povezave pri listu novi = [] for T in sez: w = nx.wiener_index(T, weight=None) for lis in listi(T): for vozlisce in T: G = T.copy() G.remove_node(lis) G.add_node(lis) G.add_edge(lis, vozlisce) v = nx.wiener_index(G, weight=None) if abs(w - v) == 1: novi.append(G) return novi
def networkx_to_outp_format(graph, index, type): return { "index": index, "graph_type": type, "wiener_index": nx.wiener_index(graph, weight='weight'), "edges": list(graph.edges()) }
def wiener_impact_e_removal(g: nx.Graph) -> List[float]: """ Calcula os impactos de Wiener para cada remoção de arestas possível em um grafo. """ M = g.number_of_edges() return wiener_indices_e_removal(g) - nx.wiener_index(g) * np.ones(M)
def test_path_graph(self): """Tests that the Wiener index of the path graph is correctly computed. """ # In P_n, there are n - 1 pairs of vertices at distance one, n - # 2 pairs at distance two, n - 3 at distance three, ..., 1 at # distance n - 1, so the Wiener index should be # # 1 * (n - 1) + 2 * (n - 2) + ... + (n - 2) * 2 + (n - 1) * 1 # # For example, in P_5, # # 1 * 4 + 2 * 3 + 3 * 2 + 4 * 1 = 2 (1 * 4 + 2 * 3) # # and in P_6, # # 1 * 5 + 2 * 4 + 3 * 3 + 4 * 2 + 5 * 1 = 2 (1 * 5 + 2 * 4) + 3 * 3 # # assuming n is *odd*, this gives the formula # # 2 \sum_{i = 1}^{(n - 1) / 2} [i * (n - i)] # # assuming n is *even*, this gives the formula # # 2 \sum_{i = 1}^{n / 2} [i * (n - i)] - (n / 2) ** 2 # n = 9 G = path_graph(n) expected = 2 * sum(i * (n - i) for i in range(1, (n // 2) + 1)) actual = wiener_index(G) eq_(expected, actual)
def test_complete_graph(self): """TestData that the Wiener index of the complete graph is simply the number of edges. """ n = 10 G = complete_graph(n) assert wiener_index(G) == (n * (n - 1) / 2)
def wiener_indices_v_removal(g: nx.Graph) -> List[int]: """ Calcula os índices de Wiener para cada remoção de vértice possível em um grafo. """ deck = deck_of_graphs(g) wieners = np.squeeze(np.asarray([nx.wiener_index(x) for x in deck])) return wieners
def test_complete_graph(self): """Tests that the Wiener index of the complete graph is simply the number of edges. """ n = 10 G = complete_graph(n) eq_(wiener_index(G), n * (n - 1) / 2)
def compute_wiener_index(G): '''If graph is not connected returns -1''' projected_graph = projected_graph.project_oxygen_role_based_graph(G) try: wiener = nx.wiener_index(projected_graph) return wiener except: return -1
def wiener_impact_v_removal(g: nx.Graph) -> List[float]: """ Calcula os impactos de Wiener para cada remoção de vértices possível em um grafo. """ n = g.number_of_nodes() transmissions = np.squeeze( np.asarray(np.dot(distance_matrix([g]), np.ones(n)))) return wiener_indices_v_removal( g) + transmissions - nx.wiener_index(g) * np.ones(n)
def projected_oxygen_graph_metrics(projected_graph): communities = community.greedy_modularity_communities(projected_graph) mod_score = modularity(projected_graph, communities) #not returned try: aspl = nx.average_shortest_path_length(projected_graph) wiener = nx.wiener_index(projected_graph) except: aspl = -1 wiener = -1 return aspl, wiener, len(communities), communities, mod_score
def get_structural_virality(self): """ Returns a measure of structural virality. Returns -------- Structural virality: float """ import networkx as nx from networkx.algorithms.wiener import wiener_index n = self.diffusion_tree.number_of_nodes() return nx.wiener_index(self.diffusion_tree) / (n * (n - 1))
def moc_mnozice_novih_indeksov(graf): n = len(graf) osnovni_index = nx.wiener_index(graf) najkrajse_poti = nx.all_pairs_shortest_path(graf) I = set() for i in range(n): vozlisce, slovar_poti = next(najkrajse_poti) vsota_poti_iz_vozlisca = sum( len(slovar_poti[kljuc]) for kljuc in slovar_poti) - n nov_index = osnovni_index + vsota_poti_iz_vozlisca + n I.add(int(nov_index)) return len(I)
def nodal_wiener_impact(self) -> Tuple[List[float], float]: """ Calcula o impacto nodal de Wiener para o grafo. """ impacts: List[float] = [] for i in range(self.graph.number_of_nodes()): T_v = 0.0 graph_copy = deepcopy(self.graph) for j in range(self.graph.number_of_nodes()): T_v += self.distances[i][j] graph_copy.remove_node(i) W_v = nx.wiener_index(graph_copy) I_v = W_v + T_v - self.wiener impacts.append(I_v) return (impacts, sum(impacts))
def _graph(self, graph): """Generate graph-based attributes.""" graph_attr = pd.DataFrame() graph_attr['number_of_nodes'] = [nx.number_of_nodes(graph)] graph_attr['number_of_edges'] = [nx.number_of_edges(graph)] graph_attr['number_of_selfloops'] = [nx.number_of_selfloops(graph)] graph_attr['graph_number_of_cliques'] = [ nx.graph_number_of_cliques(graph) ] graph_attr['graph_clique_number'] = [nx.graph_clique_number(graph)] graph_attr['density'] = [nx.density(graph)] graph_attr['transitivity'] = [nx.transitivity(graph)] graph_attr['average_clustering'] = [nx.average_clustering(graph)] graph_attr['radius'] = [nx.radius(graph)] graph_attr['is_tree'] = [1 if nx.is_tree(graph) else 0] graph_attr['wiener_index'] = [nx.wiener_index(graph)] return graph_attr
def compute_features(self): wiener_index = lambda graph: nx.wiener_index(graph) self.add_feature( "wiener index", wiener_index, "The wiener index is defined as the sum of the lengths of the shortest paths between all pairs of vertices", InterpretabilityScore(4), ) estrada_index = lambda graph: nx.estrada_index(graph) self.add_feature( "estrada_index", estrada_index, "The Estrada Index is a topological index of protein folding or 3D compactness", InterpretabilityScore(4), )
def add_features(G): L = nx.laplacian_matrix(G).todense() eig = LA.eigvals(L) avg_deg = 2 * (G.number_of_edges() / G.number_of_nodes()) lap_energy = sum([abs(i - avg_deg) for i in eig]) dist_matrix = np.array( nx.floyd_warshall_numpy(G, nodelist=sorted(G.nodes()))) eccentricity = dist_matrix * (dist_matrix >= np.sort( dist_matrix, axis=1)[:, [-1]]).astype(int) e_vals = LA.eigvals(eccentricity) largest_eig = np.real(max(e_vals)) energy = np.real(sum([abs(x) for x in e_vals])) wiener_index = nx.wiener_index(G) trace_DS = trace_deq_seq(G) return { 'lap_energy': lap_energy, 'ecc_spectrum': largest_eig, 'ecc_energy': energy, 'wiener_index': wiener_index, 'trace_deg_seq': trace_DS }
def get_subgraph_features(self, atoms_idx, name_space): ''' :param atoms_idx: subgraph node indices :param name_space: name space :return: features dict ''' res = {} res[f'{name_space}#atoms_num'] = len(atoms_idx) res[f'{name_space}#electronegativity_sum'] = np.sum( self.electron[atoms_idx]) atoms_num_dict = {'C': 0, 'H': 0, 'O': 0, 'N': 0, 'F': 0} for i in atoms_idx: atoms_num_dict[self.atoms[i]] += 1 for atom_type, num in atoms_num_dict.items(): res[f'{name_space}#{atom_type}_num'] = num s = np.linalg.eigvalsh(np.cov(self.coordinates[atoms_idx, :].T))[::-1] eigen_ratio = np.cumsum(s) / np.sum(s) res[f'{name_space}#eigen_ratio_1d'] = eigen_ratio[0] res[f'{name_space}#eigen_ratio_2d'] = eigen_ratio[1] sub_dist_matrix = self.dist_matrix[atoms_idx][:, atoms_idx] res[f'{name_space}#bond_length_max'] = np.max(sub_dist_matrix) subgraph = nx.Graph(self.graph_edges[atoms_idx][:, atoms_idx]) res[f'{name_space}#edges_num'] = len(subgraph.edges) cycle_basis = nx.cycle_basis(subgraph) res[f'{name_space}#cycle_basis_num'] = len(cycle_basis) res[f'{name_space}#triangle_num'] = sum( len(cycle) == 3 for cycle in cycle_basis) res[f'{name_space}#wiener_index'] = nx.wiener_index(subgraph) res[f'{name_space}#algebraic_connectivity'] = \ np.linalg.eigvalsh(nx.laplacian_matrix(subgraph).toarray())[1] res[f'{name_space}#algebraic_connectivity_normalized'] = \ np.linalg.eigvalsh(nx.normalized_laplacian_matrix(subgraph).toarray())[1] bond_length = [sub_dist_matrix[i1, i2] for i1, i2 in subgraph.edges] res[f'{name_space}#bond_length_mean'] = np.mean(bond_length) res[f'{name_space}#bond_length_max'] = np.max(bond_length) res[f'{name_space}#bond_length_min'] = np.min(bond_length) res[f'{name_space}#bond_length_std'] = np.std(bond_length) return res
def calculate_features(filename): graph = open(filename, "r") G = nx.Graph() nbColors = 0 nbPrecolor = 0 for line in graph: split = line.split(' ') if split[0] == 'p': nbColors = int(split[4]) for i in range(int(split[2])): G.add_node(i) elif split[0] == 'e': n1 = int(split[1]) - 1 n2 = int(split[2]) - 1 G.add_edge(n1, n2) elif split[0] == 'n': nbPrecolor += 1 graph.close() degrees = np.array(G.degree) centrality = np.fromiter(nx.betweenness_centrality(G).values(), dtype=int) eigenvalues = np.linalg.eigvals(nx.to_numpy_matrix(G)) try: eigenvector_centrality = np.fromiter( nx.eigenvector_centrality(G).values(), dtype=float) except nx.PowerIterationFailedConvergence: eigenvector_centrality = np.array([0.]) cycles = list(map(lambda x: len(x), nx.cycle_basis(G))) components = [nx.subgraph(G, comp) for comp in nx.connected_components(G)] return G.number_of_nodes(), G.number_of_edges(), nx.density(G), np.mean(degrees, axis=0)[1], \ np.std(degrees, axis=0)[1], sum([nx.average_shortest_path_length(g) for g in components]), \ sum([nx.diameter(g) for g in components]), "inf" if len(cycles) == 0 else min(cycles), np.mean( centrality), np.std(centrality), nx.average_clustering(G), nx.wiener_index(G), np.mean( np.fromiter(map(lambda x: abs(x), eigenvalues), dtype=float)), np.std(eigenvalues), -1, \ np.mean(eigenvector_centrality), np.std(eigenvector_centrality), \ nbColors, nbPrecolor / float(G.number_of_nodes())
def _graph(self): """Generate graph-based attributes.""" self.graph_attr['number_of_nodes'] = [nx.number_of_nodes(self.graph)] self.graph_attr['number_of_edges'] = [nx.number_of_edges(self.graph)] self.graph_attr['number_of_selfloops'] = [ nx.number_of_selfloops(self.graph) ] self.graph_attr['graph_number_of_cliques'] = [ nx.graph_number_of_cliques(self.graph) ] self.graph_attr['graph_clique_number'] = [ nx.graph_clique_number(self.graph) ] self.graph_attr['density'] = [nx.density(self.graph)] self.graph_attr['transitivity'] = [nx.transitivity(self.graph)] self.graph_attr['average_clustering'] = [ nx.average_clustering(self.graph) ] self.graph_attr['radius'] = [nx.radius(self.graph)] self.graph_attr['is_tree'] = [1 if nx.is_tree(self.graph) else 0] self.graph_attr['wiener_index'] = [nx.wiener_index(self.graph)] return self.graph_attr
def cpip_stats(filepath, core): # Read in the network data for the full network W = __pd__.read_csv(filepath) # Rename columns to match pulp formatting cc = list(W.columns) for c in range(len(cc)): cc[c] = __pulp_names__(cc[c]) W.columns = cc # Create the core and periphery of the network c_ids = [list(W.columns).index(c) for c in core] p_ids = [i for i in range(len(W.columns)) if W.columns[i] not in core] C = W P = W for c in [c_ids[len(c_ids) - 1 - i] for i in range(len(c_ids))]: P = P.drop(P.columns[c], axis=1).drop(c, axis=0) for p in [p_ids[len(p_ids) - 1 - i] for i in range(len(p_ids))]: C = C.drop(C.columns[p], axis=1).drop(p, axis=0) # Preparing some arrays of binary adjacency matrices for generating function outputs M = W.values MC = C.values MP = P.values for row in range(len(M)): for col in range(len(M)): if W.values[row][col] > 0: M[row][col] = 1 for row in range(len(C)): for col in range(len(C)): if C.values[row][col] > 0: MC[row][col] = 1 for row in range(len(P)): for col in range(len(P)): if P.values[row][col] > 0: MP[row][col] = 1 # Creating the output object output = __network_object__() # Number of vertices output.order = __network_object__() output.order.network = len(M) output.order.core = len(c_ids) output.order.periphery = len(p_ids) # Number of edges output.size = __network_object__() output.size.network = sum(sum(M)) / 2 output.size.core = sum(sum(MC)) / 2 output.size.periphery = sum(sum(MP)) / 2 output.size.between = output.size.network - output.size.core - output.size.periphery # Ratios of order and size output.ratio_v = __network_object__() output.ratio_v.network = 1 output.ratio_v.core = output.order.core / output.order.network output.ratio_v.periphery = 1 - output.ratio_v.core output.ratio_e = __network_object__() output.ratio_e.network = 1 output.ratio_e.core = output.size.core / output.size.network output.ratio_e.periphery = output.size.periphery / output.size.network output.ratio_e.between = 1 - output.ratio_e.core - output.ratio_e.periphery # Densities output.density = __network_object__() output.density.network = output.size.network / ((len(M) * (len(M) - 1)) / 2) output.density.core = output.size.core / ((len(MC) * (len(MC) - 1)) / 2) output.density.periphery = output.size.periphery / ((len(MP) * (len(MP) - 1)) / 2) # Degree statistics (mean, min, max) output.within_degrees = __network_object__() output.total_degrees = __network_object__() output.within_degrees.average = __network_object__() output.within_degrees.average.network = sum(sum(M)) / len(M) output.within_degrees.average.core = sum(sum(MC)) / len(MC) output.within_degrees.average.periphery = sum(sum(MP)) / len(MP) output.within_degrees.min = __network_object__() output.within_degrees.min.network = min(sum(M)) output.within_degrees.min.core = min(sum(MC)) output.within_degrees.min.periphery = min(sum(MP)) output.within_degrees.max = __network_object__() output.within_degrees.max.network = max(sum(M)) output.within_degrees.max.core = max(sum(MC)) output.within_degrees.max.periphery = max(sum(MP)) output.total_degrees.average = __network_object__() output.total_degrees.average.network = output.within_degrees.average.network output.total_degrees.average.core = sum([sum(M)[c] for c in c_ids]) / len(c_ids) output.total_degrees.average.periphery = sum([sum(M)[p] for p in p_ids]) / len(p_ids) output.total_degrees.min = __network_object__() output.total_degrees.min.network = output.within_degrees.min.network output.total_degrees.min.core = min([sum(M)[c] for c in c_ids]) output.total_degrees.min.periphery = min([sum(M)[p] for p in p_ids]) output.total_degrees.max = __network_object__() output.total_degrees.max.network = output.within_degrees.max.network output.total_degrees.max.core = max([sum(M)[c] for c in c_ids]) output.total_degrees.max.periphery = max([sum(M)[p] for p in p_ids]) # Number of connected components output.components = __network_object__() output.components.network = __nx__.number_connected_components( __nx__.Graph(M)) output.components.core = __nx__.number_connected_components( __nx__.Graph(MC)) output.components.periphery = __nx__.number_connected_components( __nx__.Graph(MP)) # Radius and diameter output.radius = __network_object__() output.diameter = __network_object__() if output.components.network != 1: output.radius.network = 'inf' output.diameter.network = 'inf' else: output.radius.network = __nx__.radius(__nx__.Graph(M)) output.diameter.network = __nx__.diameter(__nx__.Graph(M)) if output.components.core != 1: output.radius.core = 'inf' output.diameter.core = 'inf' else: output.radius.core = __nx__.radius(__nx__.Graph(MC)) output.diameter.core = __nx__.diameter(__nx__.Graph(MC)) if output.components.periphery != 1: output.radius.periphery = 'inf' output.diameter.periphery = 'inf' else: output.radius.periphery = __nx__.radius(__nx__.Graph(MP)) output.diameter.periphery = __nx__.diameter(__nx__.Graph(MP)) # Maximium clique size output.clique = __network_object__() output.clique.network = len(max(__nx__.find_cliques(__nx__.Graph(M)))) output.clique.core = len(max(__nx__.find_cliques(__nx__.Graph(MC)))) output.clique.periphery = len(max(__nx__.find_cliques(__nx__.Graph(MP)))) # Global clustering output.clustering = __network_object__() output.clustering.network = __nx__.average_clustering(__nx__.Graph(M)) output.clustering.core = __nx__.average_clustering(__nx__.Graph(MC)) output.clustering.periphery = __nx__.average_clustering(__nx__.Graph(MP)) # Connectivity statistics output.connectivity = __network_object__() output.edge_connectivity = __network_object__() output.algebraic_connectivity = __network_object__() if output.components.network > 1: output.connectivity.network = 0 output.edge_connectivity.network = 0 else: output.connectivity.network = __nx__.node_connectivity(__nx__.Graph(M)) output.edge_connectivity.network = __nx__.edge_connectivity( __nx__.Graph(M)) if output.components.core > 1: output.connectivity.core = 0 output.edge_connectivity.core = 0 else: output.connectivity.core = __nx__.node_connectivity(__nx__.Graph(MC)) output.edge_connectivity.core = __nx__.edge_connectivity( __nx__.Graph(MC)) if output.components.periphery > 1: output.connectivity.periphery = 0 output.edge_connectivity.periphery = 0 else: output.connectivity.periphery = __nx__.node_connectivity( __nx__.Graph(MP)) output.edge_connectivity.periphery = __nx__.edge_connectivity( __nx__.Graph(MP)) output.algebraic_connectivity.network = __nx__.algebraic_connectivity( __nx__.Graph(M)) output.algebraic_connectivity.core = __nx__.algebraic_connectivity( __nx__.Graph(MC)) output.algebraic_connectivity.periphery = __nx__.algebraic_connectivity( __nx__.Graph(MP)) # Energies and Laplacian energies output.energy = __network_object__() output.laplacian_energy = __network_object__() output.energy.network = sum(abs(__nx__.adjacency_spectrum( __nx__.Graph(M)))) output.energy.core = sum(abs(__nx__.adjacency_spectrum(__nx__.Graph(MC)))) output.energy.periphery = sum( abs(__nx__.adjacency_spectrum(__nx__.Graph(MP)))) output.laplacian_energy.network = sum( abs(__nx__.laplacian_spectrum(__nx__.Graph(M)))) output.laplacian_energy.core = sum( abs(__nx__.laplacian_spectrum(__nx__.Graph(MC)))) output.laplacian_energy.periphery = sum( abs(__nx__.laplacian_spectrum(__nx__.Graph(MP)))) # Transitivity output.transitivity = __network_object__() output.transitivity.network = __nx__.transitivity(__nx__.Graph(M)) output.transitivity.core = __nx__.transitivity(__nx__.Graph(MC)) output.transitivity.periphery = __nx__.transitivity(__nx__.Graph(MP)) # Wiener index output.wiener = __network_object__() output.wiener.network = __nx__.wiener_index(__nx__.Graph(M)) output.wiener.core = __nx__.wiener_index(__nx__.Graph(MC)) output.wiener.periphery = __nx__.wiener_index(__nx__.Graph(MP)) # Check if the core is a dominating set output.dom_set = __network_object__() output.dom_set.core = __nx__.is_dominating_set(__nx__.Graph(M), c_ids) output.dom_set.periphery = __nx__.is_dominating_set(__nx__.Graph(M), p_ids) return output
def test_disconnected_graph(self): """Tests that the Wiener index of a disconnected graph is positive infinity. """ eq_(wiener_index(empty_graph(2)), float('inf'))
import networkx as nx n = 10 G = nx.complete_graph(n) print(nx.wiener_index(G) == n * (n - 1) / 2)
def compute_summaries(G): """ Compute network features, computational times and their nature. Evaluate 54 summary statistics of a network G, plus 4 noise variables, store the computational time to evaluate each summary statistic, and keep track of their nature (discrete or not). Args: G (networkx.classes.graph.Graph): an undirected networkx graph. Returns: resDicts (tuple): a tuple containing the elements: - dictSums (dict): a dictionary with the name of the summaries as keys and the summary statistic values as values; - dictTimes (dict): a dictionary with the name of the summaries as keys and the time to compute each one as values; - dictIsDist (dict): a dictionary indicating if the summary is discrete (True) or not (False). """ dictSums = dict() # Will store the summary statistic values dictTimes = dict() # Will store the evaluation times dictIsDisc = dict() # Will store the summary statistic nature # Extract the largest connected component Gcc = sorted(nx.connected_components(G), key=len, reverse=True) G_lcc = G.subgraph(Gcc[0]) # Number of edges start = time.time() dictSums["num_edges"] = G.number_of_edges() dictTimes["num_edges"] = time.time() - start dictIsDisc["num_edges"] = True # Number of connected components start = time.time() dictSums["num_of_CC"] = nx.number_connected_components(G) dictTimes["num_of_CC"] = time.time() - start dictIsDisc["num_of_CC"] = True # Number of nodes in the largest connected component start = time.time() dictSums["num_nodes_LCC"] = nx.number_of_nodes(G_lcc) dictTimes["num_nodes_LCC"] = time.time() - start dictIsDisc["num_nodes_LCC"] = True # Number of edges in the largest connected component start = time.time() dictSums["num_edges_LCC"] = G_lcc.number_of_edges() dictTimes["num_edges_LCC"] = time.time() - start dictIsDisc["num_edges_LCC"] = True # Diameter of the largest connected component start = time.time() dictSums["diameter_LCC"] = nx.diameter(G_lcc) dictTimes["diameter_LCC"] = time.time() - start dictIsDisc["diameter_LCC"] = True # Average geodesic distance (shortest path length in the LCC) start = time.time() dictSums["avg_geodesic_dist_LCC"] = nx.average_shortest_path_length(G_lcc) dictTimes["avg_geodesic_dist_LCC"] = time.time() - start dictIsDisc["avg_geodesic_dist_LCC"] = False # Average degree of the neighborhood of each node start = time.time() dictSums["avg_deg_connectivity"] = np.mean( list(nx.average_degree_connectivity(G).values())) dictTimes["avg_deg_connectivity"] = time.time() - start dictIsDisc["avg_deg_connectivity"] = False # Average degree of the neighbors of each node in the LCC start = time.time() dictSums["avg_deg_connectivity_LCC"] = np.mean( list(nx.average_degree_connectivity(G_lcc).values())) dictTimes["avg_deg_connectivity_LCC"] = time.time() - start dictIsDisc["avg_deg_connectivity_LCC"] = False # Recover the degree distribution start_degree_extract = time.time() degree_vals = list(dict(G.degree()).values()) degree_extract_time = time.time() - start_degree_extract # Entropy of the degree distribution start = time.time() dictSums["degree_entropy"] = ss.entropy(degree_vals) dictTimes["degree_entropy"] = time.time() - start + degree_extract_time dictIsDisc["degree_entropy"] = False # Maximum degree start = time.time() dictSums["degree_max"] = max(degree_vals) dictTimes["degree_max"] = time.time() - start + degree_extract_time dictIsDisc["degree_max"] = True # Average degree start = time.time() dictSums["degree_mean"] = np.mean(degree_vals) dictTimes["degree_mean"] = time.time() - start + degree_extract_time dictIsDisc["degree_mean"] = False # Median degree start = time.time() dictSums["degree_median"] = np.median(degree_vals) dictTimes["degree_median"] = time.time() - start + degree_extract_time dictIsDisc["degree_median"] = False # Standard deviation of the degree distribution start = time.time() dictSums["degree_std"] = np.std(degree_vals) dictTimes["degree_std"] = time.time() - start + degree_extract_time dictIsDisc["degree_std"] = False # Quantile 25% start = time.time() dictSums["degree_q025"] = np.quantile(degree_vals, 0.25) dictTimes["degree_q025"] = time.time() - start + degree_extract_time dictIsDisc["degree_q025"] = False # Quantile 75% start = time.time() dictSums["degree_q075"] = np.quantile(degree_vals, 0.75) dictTimes["degree_q075"] = time.time() - start + degree_extract_time dictIsDisc["degree_q075"] = False # Average geodesic distance start = time.time() dictSums["avg_shortest_path_length_LCC"] = nx.average_shortest_path_length( G_lcc) dictTimes["avg_shortest_path_length_LCC"] = time.time() - start dictIsDisc["avg_shortest_path_length_LCC"] = False # Average global efficiency: # The efficiency of a pair of nodes in a graph is the multiplicative # inverse of the shortest path distance between the nodes. # The average global efficiency of a graph is the average efficiency of # all pairs of nodes. start = time.time() dictSums["avg_global_efficiency"] = nx.global_efficiency(G) dictTimes["avg_global_efficiency"] = time.time() - start dictIsDisc["avg_global_efficiency"] = False # Harmonic mean which is 1/avg_global_efficiency start = time.time() dictSums["harmonic_mean"] = nx.global_efficiency(G) dictTimes["harmonic_mean"] = time.time() - start dictIsDisc["harmonic_mean"] = False # Average local efficiency # The local efficiency of a node in the graph is the average global # efficiency of the subgraph induced by the neighbors of the node. # The average local efficiency is the average of the # local efficiencies of each node. start = time.time() dictSums["avg_local_efficiency_LCC"] = nx.local_efficiency(G_lcc) dictTimes["avg_local_efficiency_LCC"] = time.time() - start dictIsDisc["avg_local_efficiency_LCC"] = False # Node connectivity # The node connectivity is equal to the minimum number of nodes that # must be removed to disconnect G or render it trivial. # Only on the largest connected component here. start = time.time() dictSums["node_connectivity_LCC"] = nx.node_connectivity(G_lcc) dictTimes["node_connectivity_LCC"] = time.time() - start dictIsDisc["node_connectivity_LCC"] = True # Edge connectivity # The edge connectivity is equal to the minimum number of edges that # must be removed to disconnect G or render it trivial. # Only on the largest connected component here. start = time.time() dictSums["edge_connectivity_LCC"] = nx.edge_connectivity(G_lcc) dictTimes["edge_connectivity_LCC"] = time.time() - start dictIsDisc["edge_connectivity_LCC"] = True # Graph transitivity # 3*times the number of triangles divided by the number of triades start = time.time() dictSums["transitivity"] = nx.transitivity(G) dictTimes["transitivity"] = time.time() - start dictIsDisc["transitivity"] = False # Number of triangles start = time.time() dictSums["num_triangles"] = np.sum(list(nx.triangles(G).values())) / 3 dictTimes["num_triangles"] = time.time() - start dictIsDisc["num_triangles"] = True # Estimate of the average clustering coefficient of G: # Average local clustering coefficient, with local clustering coefficient # defined as C_i = (nbr of pairs of neighbors of i that are connected)/(nbr of pairs of neighbors of i) start = time.time() dictSums["avg_clustering_coef"] = nx.average_clustering(G) dictTimes["avg_clustering_coef"] = time.time() - start dictIsDisc["avg_clustering_coef"] = False # Square clustering (averaged over nodes): # the fraction of possible squares that exist at the node. # We average it over nodes start = time.time() dictSums["square_clustering_mean"] = np.mean( list(nx.square_clustering(G).values())) dictTimes["square_clustering_mean"] = time.time() - start dictIsDisc["square_clustering_mean"] = False # We compute the median start = time.time() dictSums["square_clustering_median"] = np.median( list(nx.square_clustering(G).values())) dictTimes["square_clustering_median"] = time.time() - start dictIsDisc["square_clustering_median"] = False # We compute the standard deviation start = time.time() dictSums["square_clustering_std"] = np.std( list(nx.square_clustering(G).values())) dictTimes["square_clustering_std"] = time.time() - start dictIsDisc["square_clustering_std"] = False # Number of 2-cores start = time.time() dictSums["num_2cores"] = len(nx.k_core(G, k=2)) dictTimes["num_2cores"] = time.time() - start dictIsDisc["num_2cores"] = True # Number of 3-cores start = time.time() dictSums["num_3cores"] = len(nx.k_core(G, k=3)) dictTimes["num_3cores"] = time.time() - start dictIsDisc["num_3cores"] = True # Number of 4-cores start = time.time() dictSums["num_4cores"] = len(nx.k_core(G, k=4)) dictTimes["num_4cores"] = time.time() - start dictIsDisc["num_4cores"] = True # Number of 5-cores start = time.time() dictSums["num_5cores"] = len(nx.k_core(G, k=5)) dictTimes["num_5cores"] = time.time() - start dictIsDisc["num_5cores"] = True # Number of 6-cores start = time.time() dictSums["num_6cores"] = len(nx.k_core(G, k=6)) dictTimes["num_6cores"] = time.time() - start dictIsDisc["num_6cores"] = True # Number of k-shells # The k-shell is the subgraph induced by nodes with core number k. # That is, nodes in the k-core that are not in the k+1-core # Number of 2-shells start = time.time() dictSums["num_2shells"] = len(nx.k_shell(G, 2)) dictTimes["num_2shells"] = time.time() - start dictIsDisc["num_2shells"] = True # Number of 3-shells start = time.time() dictSums["num_3shells"] = len(nx.k_shell(G, 3)) dictTimes["num_3shells"] = time.time() - start dictIsDisc["num_3shells"] = True # Number of 4-shells start = time.time() dictSums["num_4shells"] = len(nx.k_shell(G, 4)) dictTimes["num_4shells"] = time.time() - start dictIsDisc["num_4shells"] = True # Number of 5-shells start = time.time() dictSums["num_5shells"] = len(nx.k_shell(G, 5)) dictTimes["num_5shells"] = time.time() - start dictIsDisc["num_5shells"] = True # Number of 6-shells start = time.time() dictSums["num_6shells"] = len(nx.k_shell(G, 6)) dictTimes["num_6shells"] = time.time() - start dictIsDisc["num_6shells"] = True start = time.time() listOfCliques = list(nx.enumerate_all_cliques(G)) enum_all_cliques_time = time.time() - start # Number of 4-cliques start = time.time() n4Clique = 0 for li in listOfCliques: if len(li) == 4: n4Clique += 1 dictSums["num_4cliques"] = n4Clique dictTimes["num_4cliques"] = time.time() - start + enum_all_cliques_time dictIsDisc["num_4cliques"] = True # Number of 5-cliques start = time.time() n5Clique = 0 for li in listOfCliques: if len(li) == 5: n5Clique += 1 dictSums["num_5cliques"] = n5Clique dictTimes["num_5cliques"] = time.time() - start + enum_all_cliques_time dictIsDisc["num_5cliques"] = True # Maximal size of a clique in the graph start = time.time() dictSums["max_clique_size"] = len(approximation.clique.max_clique(G)) dictTimes["max_clique_size"] = time.time() - start dictIsDisc["max_clique_size"] = True # Approximated size of a large clique in the graph start = time.time() dictSums["large_clique_size"] = approximation.large_clique_size(G) dictTimes["large_clique_size"] = time.time() - start dictIsDisc["large_clique_size"] = True # Number of shortest path of size k start = time.time() listOfPLength = list(nx.shortest_path_length(G)) path_length_time = time.time() - start # when k = 3 start = time.time() n3Paths = 0 for node in G.nodes(): tmp = list(listOfPLength[node][1].values()) n3Paths += tmp.count(3) dictSums["num_shortest_3paths"] = n3Paths / 2 dictTimes["num_shortest_3paths"] = time.time() - start + path_length_time dictIsDisc["num_shortest_3paths"] = True # when k = 4 start = time.time() n4Paths = 0 for node in G.nodes(): tmp = list(listOfPLength[node][1].values()) n4Paths += tmp.count(4) dictSums["num_shortest_4paths"] = n4Paths / 2 dictTimes["num_shortest_4paths"] = time.time() - start + path_length_time dictIsDisc["num_shortest_4paths"] = True # when k = 5 start = time.time() n5Paths = 0 for node in G.nodes(): tmp = list(listOfPLength[node][1].values()) n5Paths += tmp.count(5) dictSums["num_shortest_5paths"] = n5Paths / 2 dictTimes["num_shortest_5paths"] = time.time() - start + path_length_time dictIsDisc["num_shortest_5paths"] = True # when k = 6 start = time.time() n6Paths = 0 for node in G.nodes(): tmp = list(listOfPLength[node][1].values()) n6Paths += tmp.count(6) dictSums["num_shortest_6paths"] = n6Paths / 2 dictTimes["num_shortest_6paths"] = time.time() - start + path_length_time dictIsDisc["num_shortest_6paths"] = True # Size of the minimum (weight) node dominating set: # A subset of nodes where each node not in the subset has for direct # neighbor a node of the dominating set. start = time.time() T = approximation.min_weighted_dominating_set(G) dictSums["size_min_node_dom_set"] = len(T) dictTimes["size_min_node_dom_set"] = time.time() - start dictIsDisc["size_min_node_dom_set"] = True # Idem but with the edge dominating set start = time.time() T = approximation.min_edge_dominating_set(G) dictSums["size_min_edge_dom_set"] = 2 * len( T) # times 2 to have a number of nodes dictTimes["size_min_edge_dom_set"] = time.time() - start dictIsDisc["size_min_edge_dom_set"] = True # The Wiener index of a graph is the sum of the shortest-path distances # between each pair of reachable nodes. For pairs of nodes in undirected graphs, # only one orientation of the pair is counted. # (On LCC otherwise inf) start = time.time() dictSums["wiener_index_LCC"] = nx.wiener_index(G_lcc) dictTimes["wiener_index_LCC"] = time.time() - start dictIsDisc["wiener_index_LCC"] = True # Betweenness node centrality (averaged over nodes): # at node u it is defined as B_u = sum_i,j sigma(i,u,j)/sigma(i,j) # where sigma is the number of shortest path between i and j going through u or not start = time.time() betweenness = list(nx.betweenness_centrality(G).values()) time_betweenness = time.time() - start # Averaged across nodes start = time.time() dictSums["betweenness_centrality_mean"] = np.mean(betweenness) dictTimes["betweenness_centrality_mean"] = time.time( ) - start + time_betweenness dictIsDisc["betweenness_centrality_mean"] = False # Maximum across nodes start = time.time() dictSums["betweenness_centrality_max"] = max(betweenness) dictTimes["betweenness_centrality_max"] = time.time( ) - start + time_betweenness dictIsDisc["betweenness_centrality_max"] = False # Central point dominance # CPD = sum_u(B_max - B_u)/(N-1) start = time.time() dictSums["central_point_dominance"] = sum( max(betweenness) - np.array(betweenness)) / (len(betweenness) - 1) dictTimes["central_point_dominance"] = time.time( ) - start + time_betweenness dictIsDisc["central_point_dominance"] = False # Estrata index : sum_i^n exp(lambda_i) # with n the number of nodes, lamda_i the i-th eigen value of the adjacency matrix of G start = time.time() dictSums["Estrata_index"] = nx.estrada_index(G) dictTimes["Estrata_index"] = time.time() - start dictIsDisc["Estrata_index"] = False # Eigenvector centrality # For each node, it is the average eigenvalue centrality of its neighbors, # where centrality of node i is taken as the i-th coordinate of x # such that Ax = lambda*x (for the maximal eigen value) # Averaged start = time.time() dictSums["avg_eigenvec_centrality"] = np.mean( list(nx.eigenvector_centrality_numpy(G).values())) dictTimes["avg_eigenvec_centrality"] = time.time() - start dictIsDisc["avg_eigenvec_centrality"] = False # Maximum start = time.time() dictSums["max_eigenvec_centrality"] = max( list(nx.eigenvector_centrality_numpy(G).values())) dictTimes["max_eigenvec_centrality"] = time.time() - start dictIsDisc["max_eigenvec_centrality"] = False ### Noise generation ### # Noise simulated from a Normal(0,1) distribution start = time.time() dictSums["noise_Gauss"] = ss.norm.rvs(0, 1) dictTimes["noise_Gauss"] = time.time() - start dictIsDisc["noise_Gauss"] = False # Noise simulated from a Uniform distribution [0-50] start = time.time() dictSums["noise_Unif"] = ss.uniform.rvs(0, 50) dictTimes["noise_Unif"] = time.time() - start dictIsDisc["noise_Unif"] = False # Noise simulated from a Bernoulli B(0.5) distribution start = time.time() dictSums["noise_Bern"] = ss.bernoulli.rvs(0.5) dictTimes["noise_Bern"] = time.time() - start dictIsDisc["noise_Bern"] = True # Noise simulated from a discrete uniform distribution [0,50[ start = time.time() dictSums["noise_disc_Unif"] = ss.randint.rvs(0, 50) dictTimes["noise_disc_Unif"] = time.time() - start dictIsDisc["noise_disc_Unif"] = True resDicts = (dictSums, dictTimes, dictIsDisc) return resDicts
def closeness_vitality(G, node=None, weight=None, wiener_index=None): """Returns the closeness vitality for nodes in the graph. The *closeness vitality* of a node, defined in Section 3.6.2 of [1], is the change in the sum of distances between all node pairs when excluding that node. Parameters ---------- G : NetworkX graph A strongly-connected graph. weight : string The name of the edge attribute used as weight. This is passed directly to the :func:`~networkx.wiener_index` function. node : object If specified, only the closeness vitality for this node will be returned. Otherwise, a dictionary mappping each node to its closeness vitality will be returned. Other parameters ---------------- wiener_index : number If you have already computed the Wiener index of the graph ``G``, you can provide that value here. Otherwise, it will be computed for you. Returns ------- dictionary or float If ``node`` is ``None``, this function returnes a dictionary with nodes as keys and closeness vitality as the value. Otherwise, it returns only the closeness vitality for the specified ``node``. The closeness vitality of a node may be negative infinity if removing that node would disconnect the graph. Examples -------- >>> G = nx.cycle_graph(3) >>> nx.closeness_vitality(G) {0: 2.0, 1: 2.0, 2: 2.0} See Also -------- closeness_centrality References ---------- .. [1] Ulrik Brandes, Thomas Erlebach (eds.). *Network Analysis: Methodological Foundations*. Springer, 2005. <http://books.google.com/books?id=TTNhSm7HYrIC> """ if wiener_index is None: wiener_index = nx.wiener_index(G, weight=weight) if node is not None: after = nx.wiener_index(G.subgraph(set(G) - {node}), weight=weight) return wiener_index - after vitality = partial(closeness_vitality, G, weight=weight, wiener_index=wiener_index) # TODO This can be trivially parallelized. return {v: vitality(node=v) for v in G}
def getWienerD(self): """ Returns the Wiener distance for a graph. """ return NX.wiener_index(self)
'.json') and not pos_json.endswith('nodes.json')] with open('./wienerIndexes.json', 'w') as wienerIndexes: with open('./graphDegres.json', 'w') as graphDegres: wienerIndexesDict = {} graphDegreeDict = {} for fileName in json_files: print(fileName) with open('./Topologias/' + fileName, 'r') as json_file: JSONLinks = json.load(json_file) G = nx.Graph() G.add_weighted_edges_from( (elem['From'], elem['To'], 1) for elem in JSONLinks ) wienerIndex = nx.wiener_index(G) graphDegree = sum( map(lambda v: v[1], G.degree())) / len(G.nodes) wienerIndexesDict[fileName] = wienerIndex graphDegreeDict[fileName] = graphDegree json.dump(wienerIndexesDict, wienerIndexes, sort_keys=True) json.dump(graphDegreeDict, graphDegres, sort_keys=True)
def __init__(self, graph: nx.Graph): self.graph = graph self.wiener = nx.wiener_index(graph) self.distances = dict(nx.all_pairs_dijkstra_path_length(graph))
def calculate(graph): if nx.is_connected(graph): return Utils.approx_to_int(nx.wiener_index(graph)) else: return 10**10
), ) return edge_disjoint_shortest_paths graph = nx.Graph() with open("TopologiasRedesReais/scteste_nodes.csv") as nf: with open("TopologiasRedesReais/scteste_links.csv") as ef: nodes = list(map(lambda n: n["Id"], csv.DictReader(nf))) edges = list(map(lambda e: (e["From"], e["To"], 1), csv.DictReader(ef))) graph.add_nodes_from(nodes) graph.add_weighted_edges_from(edges) print(nx.wiener_index(graph, weight='weight')) # get_all_pairs_edge_disjoint_shortest_paths(graph.to_directed()) # graph = nx.Graph() # graph.add_nodes_from(["s", "a", "b", "c", "d", "e", "f", "g"]) # graph.add_edges_from( # [ # ("s", "a"), # ("s", "b"), # ("s", "d"), # ("a", "c"), # ("a", "d"), # ("a", "e"), # ("b", "e"), # ("c", "f"),