def find_threshold(o_samples_npy, context, c_model, min_thres=0, max_thres=50, n_iters=10): graph = build_memory_graph(o_samples_npy, context, c_model, min_thres) assert is_connected(graph) graph = build_memory_graph(o_samples_npy, context, c_model, max_thres) assert not is_connected(graph) for count in range(n_iters): threshold = (min_thres + max_thres) / 2 graph = build_memory_graph(o_samples_npy, context, c_model, threshold) if is_connected(graph): min_thres = threshold else: max_thres = threshold print(min_thres, max_thres) return min_thres
def make_graph_with_same_degree_dist(G): G_sequence = list(d for n, d in G.degree()) G_sequence.sort() sorted_G_sequence = list((d, n) for n, d in G.degree()) sorted_G_sequence.sort(key=lambda tup: tup[0]) done = False while not done: G_prime = nx.configuration_model(G_sequence) G_prime = nx.Graph(G_prime) G_prime.remove_edges_from(G_prime.selfloop_edges()) tries = 10 while tries > 0 and (len(G.edges()) != len(G_prime.edges())): sorted_G_prime_sequence = list((d, n) for n, d in G_prime.degree()) sorted_G_prime_sequence.sort(key=lambda tup: tup[0]) #print("Sorted G_sequence:") #print(sorted_G_sequence) #print("Sorted G_prime_sequence:") #print(sorted_G_prime_sequence) missing = [] for i in range(0, len(G.nodes())): while sorted_G_sequence[i][0] > sorted_G_prime_sequence[i][0]: missing.append(sorted_G_prime_sequence[i][1]) sorted_G_prime_sequence[i] = ( sorted_G_prime_sequence[i][0] + 1, sorted_G_prime_sequence[i][1]) missing = np.random.permutation(missing) if len(missing) % 2 != 0: print("Sanity issue! Alert!") #print("Edges before:") #print(G_prime.edges()) #print("Missing:") #print(missing) for i in range(0, int(len(missing) / 2)): G_prime.add_edge(missing[2 * i], missing[2 * i + 1]) G_prime = nx.Graph(G_prime) G_prime.remove_edges_from(G_prime.selfloop_edges()) #print("Edges after:") #print(G_prime.edges()) if not is_connected(G_prime): # print("Bad: G_prime disconnected") pass tries -= 1 if not is_connected(G_prime): pass elif len(G.edges()) == len(G_prime.edges()): #print("Graph creation successful") done = True return G_prime
def generate_erdos_renyi_graph(n: int, p: float) -> Tuple[Set[Tuple[int, int]], Text]: """Return edges of a Erdos Renyi graph. A generalized random graph. Can be disconnected The following statements are true on average `` np < 1 has O(log(n)) clusters np = 1 has O(n^2/3) clusters np -> c > 1 will cluster to one large component, sub clusters have O(log(n)) vertices p < ln(n)/n the graph will be disconnected p > ln(n)/n the graph will be connected `` G(n,p) Arguments: n: Number of vertices p: Probability of edge creation """ connected = False while not connected: G = fast_gnp_random_graph(n, p) connected = is_connected(G) return set(G.edges), f"G({n},{p})"
def test_graphs(db): pipeline = insert_pipeline(db, 'test_graphs') dg = pipeline.to_graph() assert len(dg.nodes) == 0 assert len(dg.edges) == 0 n1 = PipelineNode(pipeline_id=pipeline.id) n2 = PipelineNode(pipeline_id=pipeline.id) n1.save(db) n2.save(db) l1 = PipelineLink(pipeline_id=pipeline.id, from_node_id=n1.id, to_node_id=n2.id) l1.save(db) db.commit() assert len(pipeline.nodes) == 2 assert len(pipeline.links) == 1 dg = pipeline.to_graph() assert len(dg.nodes) == len(pipeline.nodes) assert len(dg.edges) == len(pipeline.links) assert dg.is_directed() with pytest.raises(NetworkXNoCycle): find_cycle(dg) assert is_connected(dg.to_undirected())
def validate_pipeline_structure(graph: Union[Graph, DiGraph]) -> bool: assert is_connected( graph.to_undirected()), 'The pipeline must be connected.' try: find_cycle(graph) except NetworkXNoCycle: pass else: raise ValueError('The pipeline must not contain a cycle.') return True
def resolve_duplicate_clusters(clusters, pairs): if not clusters: summary.add('Duplicate components', 0) summary.add('Clusters truely duplicate', 0) return dict(), dict() G = nx.Graph() G.add_nodes_from(clusters) G.add_edges_from(pairs) # # print('Nodes:', G.number_of_nodes()) # print('Edges:', G.number_of_edges()) components_list = [] if not components.is_connected(G): # verbosity('Graph not connected, components = ' + str(components.number_connected_components(G)), args.quiet) for component in components.connected_components(G): components_list.append(component) else: components_list.append(clusters) summary.add('Duplicate components', components.number_connected_components(G)) translation_dict = {} duplicate_weights = {} for component in components_list: main_node = min(component) component.remove(main_node) try: duplicate_weights[len(component)] += 1 except KeyError: duplicate_weights[len(component)] = 1 for node in component: translation_dict[node] = main_node summary.add('Clusters truely duplicate') return translation_dict, duplicate_weights
def get_subgraph_list(data, n): """ Gets all subgroups of size n. Parameters ---------- G : networkx graph Returns ------- k_paths : list of k-paths """ G = to_networkx(data, to_undirected=True, remove_self_loops=True) N = G.number_of_nodes() indices = list(range(N)) subgraphs = [] for node_set in combinations(indices, n): G_sub = G.subgraph(node_set) if is_connected(G_sub): subgraphs.append(node_set) return subgraphs
def report_connectedness(G, save_img_path=None): """ Checks if the graph is connected and returns the connected components if the graph is disconnected G (nx.Graph): graph for which the top nodes must be determined. save_img_path (str): path to save visualisation of the components detected. Returns: True: if the given graph is connected. False, connected_components: if the graph is disconnected along with list of sets of nodes representing the components. """ # aggregrate connectedness metrics is_connected = components.is_connected(G) # get the connected components connected_components = components.connected_components(G) # save the disconnected components visualisation if the path given if save_img_path: colors = np.linspace(0, 1, len(connected_components)) com_color_map = dict() for idx, com in enumerate(connected_components): for node in com: com_color_map[node] = colors[idx] labels = nx.draw_networkx_labels(G, pos=pos) nx.draw(G, pos, node_color=list(com_color_map.values())) plt.savefig(save_img_path, format="PNG") return (is_connected, connected_components)
def print_Measures(self, G, blnCalculateDimater=False, blnCalculateRadius=False, blnCalculateExtremaBounding=False, blnCalculateCenterNodes=False, fileName_to_print=None): #verify if graph is connected or not try: blnGraphConnected = is_connected(G) except: blnGraphConnected = False no_nodes = str(len(G.nodes())) no_edges = str(len(G.edges())) print("# Nodes: " + no_nodes) print("# Edges: " + no_edges) #Calculate and print Diameter if blnCalculateDimater == True: if blnGraphConnected == True: diameter_value = str(distance_measures.diameter(G)) print("Diameter: " + diameter_value) else: diameter_value = "Not possible to calculate diameter. Graph must be connected" print(diameter_value) #Calculate and print Radius if blnCalculateRadius == True: if blnGraphConnected == True: radius_value = str(distance_measures.radius(G)) print("Radius: " + radius_value) else: radius_value = "Not possible to calculate radius. Graph must be connected" print(radius_value) #Calculate and print Extrema bounding if blnCalculateExtremaBounding == True: if blnGraphConnected == True: extrema_bounding_value = str( distance_measures.extrema_bounding(G)) print("Extrema bounding: " + extrema_bounding_value) else: extrema_bounding_value = "Not possible to calculate Extrema bounding. Graph must be connected" print(extrema_bounding_value) #Calculate and print Centers if blnCalculateCenterNodes == True: str_centers_nodes = "" if blnGraphConnected == True: centers_nodes = distance_measures.center(G) str_centers_nodes = str( sorted(G.degree(centers_nodes), key=lambda x: x[1], reverse=True)) print("Centers with their degree: " + str_centers_nodes) else: centers_nodes = "Not possible to calculate Centers. Graph must be connected" print(centers_nodes) # if file name is passed in the parameters, we save the measures into a file if fileName_to_print != None: #creates path if does not exists if not os.path.exists(os.path.dirname(fileName_to_print)): os.makedirs(os.path.dirname(fileName_to_print)) f = open(fileName_to_print, "w") f.write("# Nodes: " + no_nodes + "\n") f.write("# Edges: " + no_edges + "\n") if blnCalculateDimater == True: f.write("Diameter: " + diameter_value + "\n") if blnCalculateRadius == True: f.write("Radius: " + radius_value + "\n") #if blnCalculateBaryCenter == True: # f.write("Bary Center: " + barycenter_node + "\n") if blnCalculateExtremaBounding == True: f.write("Extrema bounding: " + extrema_bounding_value + "\n") if blnCalculateCenterNodes == True: f.write("Centers with their degree: " + str_centers_nodes + "\n") f.close()
def asyn_fluidc(G, k, max_iter=100, enable_pr=True): """Returns communities in `G` as detected by Fluid Communities algorithm. The asynchronous fluid communities algorithm is described in [1]_. The algorithm is based on the simple idea of fluids interacting in an environment, expanding and pushing each other. It's initialization is random, so found communities may vary on different executions. The algorithm proceeds as follows. First each of the initial k communities is initialized in a random vertex in the graph. Then the algorithm iterates over all vertices in a random order, updating the community of each vertex based on its own community and the communities of its neighbours. This process is performed several times until convergence. At all times, each community has a total density of 1, which is equally distributed among the vertices it contains. If a vertex changes of community, vertex densities of affected communities are adjusted immediately. When a complete iteration over all vertices is done, such that no vertex changes the community it belongs to, the algorithm has converged and returns. This is the original version of the algorithm described in [1]_. Unfortunately, it does not support weighted graphs yet. Parameters ---------- G : Graph k : integer The number of communities to be found. max_iter : integer The number of maximum iterations allowed. By default 15. enable_pr : Enable/disable Pagerank for initialize starting points Returns ------- communities : iterable Iterable of communities given as sets of nodes. Notes ----- k variable is not an optional argument. References ---------- .. [1] Parés F., Garcia-Gasulla D. et al. "Fluid Communities: A Competitive and Highly Scalable Community Detection Algorithm". [https://arxiv.org/pdf/1703.09307.pdf]. """ # Initial checks if not isinstance(k, int): raise NetworkXError("k must be an integer.") if not k > 0: raise NetworkXError("k must be greater than 0.") if not is_connected(G): raise NetworkXError("Fluid Communities can only be run on connected\ Graphs.") if len(G) < k: raise NetworkXError("k must be greater than graph size.") # Initialization max_density = 1.0 vertices = list(G) random.shuffle(vertices) # print "@@@",vertices if enable_pr: # Run PageRank with alpha of 0.9 the push them to the head of vertices # so that it will be understand as start points maybe_print("PageRanks: {0}".format(pagerank(G)), 2, u'i') # Find the top k keys by page rank: run pr, sort the value, then get top k key top_keys = [word_id for word_id,_ in list(sorted(pagerank(G).items(), key=lambda x:x[1], reverse=True))] # random.shuffle(top_keys[:(len(top_keys))/4]) random.shuffle(top_keys[:(k*2)]) top_keys = top_keys[:k] maybe_print("Top keys: {0}".format(top_keys), 2, u'i') # print "+++", top_keys # Remove these top keys from the vertices, then append top_key to the head top_keys.extend([v for v in vertices if v not in top_keys]) # print "XXX", vertices communities = {n: i for i, n in enumerate(vertices[:k])} density = {} com_to_numvertices = {} for vertex in communities.keys(): com_to_numvertices[communities[vertex]] = 1 density[communities[vertex]] = max_density # Set up control variables and start iterating iter_count = 0 cont = True while cont: cont = False iter_count += 1 # Loop over all vertices in graph in a random order vertices = list(G) random.shuffle(vertices) for vertex in vertices: # Updating rule com_counter = Counter() # Take into account self vertex community try: com_counter.update({communities[vertex]: density[communities[vertex]]}) except KeyError: pass # Gather neighbour vertex communities for v in G[vertex]: try: com_counter.update({communities[v]: density[communities[v]]}) except KeyError: continue # Check which is the community with highest density new_com = -1 if len(com_counter.keys()) > 0: max_freq = max(com_counter.values()) best_communities = [com for com, freq in com_counter.items() if (max_freq - freq) < 0.0001] # If actual vertex com in best communities, it is preserved try: if communities[vertex] in best_communities: new_com = communities[vertex] except KeyError: pass # If vertex community changes... if new_com == -1: # Set flag of non-convergence cont = True # Randomly chose a new community from candidates new_com = random.choice(best_communities) # Update previous community status try: com_to_numvertices[communities[vertex]] -= 1 density[communities[vertex]] = max_density / \ com_to_numvertices[communities[vertex]] except KeyError: pass # Update new community status communities[vertex] = new_com com_to_numvertices[communities[vertex]] += 1 density[communities[vertex]] = max_density / \ com_to_numvertices[communities[vertex]] # If maximum iterations reached --> output actual results if iter_count > max_iter: break # Return results by grouping communities as list of vertices return iter(groups(communities).values())
def asyn_fluidc(G, k, max_iter=100, seed=None): """Returns communities in `G` as detected by Fluid Communities algorithm. The asynchronous fluid communities algorithm is described in [1]_. The algorithm is based on the simple idea of fluids interacting in an environment, expanding and pushing each other. It's initialization is random, so found communities may vary on different executions. The algorithm proceeds as follows. First each of the initial k communities is initialized in a random vertex in the graph. Then the algorithm iterates over all vertices in a random order, updating the community of each vertex based on its own community and the communities of its neighbours. This process is performed several times until convergence. At all times, each community has a total density of 1, which is equally distributed among the vertices it contains. If a vertex changes of community, vertex densities of affected communities are adjusted immediately. When a complete iteration over all vertices is done, such that no vertex changes the community it belongs to, the algorithm has converged and returns. This is the original version of the algorithm described in [1]_. Unfortunately, it does not support weighted graphs yet. Parameters ---------- G : Graph k : integer The number of communities to be found. max_iter : integer The number of maximum iterations allowed. By default 15. seed : integer, random_state, or None (default) Indicator of random number generation state. See :ref:`Randomness<randomness>`. Returns ------- communities : iterable Iterable of communities given as sets of nodes. Notes ----- k variable is not an optional argument. References ---------- .. [1] Parés F., Garcia-Gasulla D. et al. "Fluid Communities: A Competitive and Highly Scalable Community Detection Algorithm". [https://arxiv.org/pdf/1703.09307.pdf]. """ # Initial checks if not isinstance(k, int): raise NetworkXError("k must be an integer.") if not k > 0: raise NetworkXError("k must be greater than 0.") if not is_connected(G): raise NetworkXError("Fluid Communities require connected Graphs.") if len(G) < k: raise NetworkXError("k cannot be bigger than the number of nodes.") # Initialization max_density = 1.0 vertices = list(G) seed.shuffle(vertices) communities = {n: i for i, n in enumerate(vertices[:k])} density = {} com_to_numvertices = {} for vertex in communities.keys(): com_to_numvertices[communities[vertex]] = 1 density[communities[vertex]] = max_density # Set up control variables and start iterating iter_count = 0 cont = True while cont: cont = False iter_count += 1 # Loop over all vertices in graph in a random order vertices = list(G) seed.shuffle(vertices) for vertex in vertices: # Updating rule com_counter = Counter() # Take into account self vertex community try: com_counter.update( {communities[vertex]: density[communities[vertex]]}) except KeyError: pass # Gather neighbour vertex communities for v in G[vertex]: try: com_counter.update( {communities[v]: density[communities[v]]}) except KeyError: continue # Check which is the community with highest density new_com = -1 if len(com_counter.keys()) > 0: max_freq = max(com_counter.values()) best_communities = [ com for com, freq in com_counter.items() if (max_freq - freq) < 0.0001 ] # If actual vertex com in best communities, it is preserved try: if communities[vertex] in best_communities: new_com = communities[vertex] except KeyError: pass # If vertex community changes... if new_com == -1: # Set flag of non-convergence cont = True # Randomly chose a new community from candidates new_com = seed.choice(best_communities) # Update previous community status try: com_to_numvertices[communities[vertex]] -= 1 density[communities[vertex]] = max_density / \ com_to_numvertices[communities[vertex]] except KeyError: pass # Update new community status communities[vertex] = new_com com_to_numvertices[communities[vertex]] += 1 density[communities[vertex]] = max_density / \ com_to_numvertices[communities[vertex]] # If maximum iterations reached --> output actual results if iter_count > max_iter: break # Return results by grouping communities as list of vertices return iter(groups(communities).values())
def asyn_fluidc(G, k, max_iter=100, seed=None): # noqa, pylint: disable=too-many-locals,too-many-branches,too-many-statements """This function is adapted from networks directly.""" # Initial checks if not isinstance(k, int): raise NetworkXError("k must be an integer.") if not k > 0: raise NetworkXError("k must be greater than 0.") if not is_connected(G): raise NetworkXError("Fluid Communities require connected Graphs.") if len(G) < k: raise NetworkXError("k cannot be bigger than the number of nodes.") # Initialization max_density = 1.0 vertices = list(G) seed.shuffle(vertices) communities = {n: i for i, n in enumerate(vertices[:k])} density = {} com_to_numvertices = {} for vertex in communities.keys(): com_to_numvertices[communities[vertex]] = 1 density[communities[vertex]] = max_density # Set up control variables and start iterating iter_count = 0 cont = True while cont: cont = False iter_count += 1 # Loop over all vertices in graph in a random order vertices = list(G) seed.shuffle(vertices) for vertex in vertices: # Updating rule com_counter = Counter() # Take into account self vertex community try: com_counter.update( {communities[vertex]: density[communities[vertex]]}) except KeyError: pass # Gather neighbour vertex communities for v in G[vertex]: try: com_counter.update( {communities[v]: density[communities[v]]}) except KeyError: continue # Check which is the community with highest density new_com = -1 if com_counter.keys(): max_freq = max(com_counter.values()) best_communities = [ com for com, freq in com_counter.items() if (max_freq - freq) < 0.0001 ] # If actual vertex com in best communities, it is preserved try: if communities[vertex] in best_communities: new_com = communities[vertex] except KeyError: pass # If vertex community changes... if new_com == -1: # Set flag of non-convergence cont = True # Randomly chose a new community from candidates new_com = seed.choice(best_communities) # Update previous community status try: com_to_numvertices[communities[vertex]] -= 1 density[communities[vertex]] = ( max_density / com_to_numvertices[communities[vertex]]) except KeyError: pass # Update new community status communities[vertex] = new_com com_to_numvertices[communities[vertex]] += 1 density[communities[vertex]] = ( max_density / com_to_numvertices[communities[vertex]]) # If maximum iterations reached --> output actual results if iter_count > max_iter: break # Return results by grouping communities as list of vertices return list(iter(groups(communities).values())), list(density.values())
def asyn_fluidc(G, k, max_iter=100, seed=None): """Returns communities in `G` as detected by Fluid Communities algorithm. The asynchronous fluid communities algorithm is described in [1]_. The algorithm is based on the simple idea of fluids interacting in an environment, expanding and pushing each other. It's initialization is random, so found communities may vary on different executions. The algorithm proceeds as follows. First each of the initial k communities is initialized in a random vertex in the graph. Then the algorithm iterates over all vertices in a random order, updating the community of each vertex based on its own community and the communities of its neighbours. This process is performed several times until convergence. At all times, each community has a total density of 1, which is equally distributed among the vertices it contains. If a vertex changes of community, vertex densities of affected communities are adjusted immediately. When a complete iteration over all vertices is done, such that no vertex changes the community it belongs to, the algorithm has converged and returns. This is the original version of the algorithm described in [1]_. Unfortunately, it does not support weighted graphs yet. Parameters ---------- G : Graph k : integer The number of communities to be found. max_iter : integer The number of maximum iterations allowed. By default 15. seed : integer, random_state, or None (default) Indicator of random number generation state. See :ref:`Randomness<randomness>`. Returns ------- communities : iterable Iterable of communities given as sets of nodes. Notes ----- k variable is not an optional argument. References ---------- .. [1] Parés F., Garcia-Gasulla D. et al. "Fluid Communities: A Competitive and Highly Scalable Community Detection Algorithm". [https://arxiv.org/pdf/1703.09307.pdf]. """ # Initial checks if not isinstance(k, int): raise NetworkXError("k must be an integer.") if not k > 0: raise NetworkXError("k must be greater than 0.") if not is_connected(G): raise NetworkXError("Fluid Communities require connected Graphs.") if len(G) < k: raise NetworkXError("k cannot be bigger than the number of nodes.") # Initialization max_density = 1.0 vertices = list(G) seed.shuffle(vertices) communities = {n: i for i, n in enumerate(vertices[:k])} density = {} com_to_numvertices = {} for vertex in communities.keys(): com_to_numvertices[communities[vertex]] = 1 density[communities[vertex]] = max_density # Set up control variables and start iterating iter_count = 0 cont = True while cont: cont = False iter_count += 1 # Loop over all vertices in graph in a random order vertices = list(G) seed.shuffle(vertices) for vertex in vertices: # Updating rule com_counter = Counter() # Take into account self vertex community try: com_counter.update({communities[vertex]: density[communities[vertex]]}) except KeyError: pass # Gather neighbour vertex communities for v in G[vertex]: try: com_counter.update({communities[v]: density[communities[v]]}) except KeyError: continue # Check which is the community with highest density new_com = -1 if len(com_counter.keys()) > 0: max_freq = max(com_counter.values()) best_communities = [com for com, freq in com_counter.items() if (max_freq - freq) < 0.0001] # If actual vertex com in best communities, it is preserved try: if communities[vertex] in best_communities: new_com = communities[vertex] except KeyError: pass # If vertex community changes... if new_com == -1: # Set flag of non-convergence cont = True # Randomly chose a new community from candidates new_com = seed.choice(best_communities) # Update previous community status try: com_to_numvertices[communities[vertex]] -= 1 density[communities[vertex]] = max_density / \ com_to_numvertices[communities[vertex]] except KeyError: pass # Update new community status communities[vertex] = new_com com_to_numvertices[communities[vertex]] += 1 density[communities[vertex]] = max_density / \ com_to_numvertices[communities[vertex]] # If maximum iterations reached --> output actual results if iter_count > max_iter: break # Return results by grouping communities as list of vertices return iter(groups(communities).values())
def elastic_centered_graph(self, start_node=None): """ Args: start_node (): Returns: """ logging.info("In elastic centering") # Loop on start_nodes, sometimes some nodes cannot be elastically taken # inside the cell if you start from a specific node ntest_nodes = 0 start_node = list(self.graph.nodes())[0] ntest_nodes += 1 centered_connected_subgraph = nx.MultiGraph() centered_connected_subgraph.add_nodes_from(self.graph.nodes()) centered_connected_subgraph.add_edges_from(self.graph.edges(data=True)) tree = bfs_tree(G=self.graph, source=start_node) current_nodes = [start_node] nodes_traversed = [start_node] inode = 0 # Loop on "levels" in the tree tree_level = 0 while True: tree_level += 1 logging.debug("In tree level {:d} ({:d} nodes)".format(tree_level, len(current_nodes))) new_current_nodes = [] # Loop on nodes in this level of the tree for node in current_nodes: inode += 1 logging.debug( " In node #{:d}/{:d} in level {:d} ({})".format(inode, len(current_nodes), tree_level, str(node)) ) node_neighbors = list(tree.neighbors(n=node)) node_edges = centered_connected_subgraph.edges(nbunch=[node], data=True, keys=True) # Loop on neighbors of a node (from the tree used) for inode_neighbor, node_neighbor in enumerate(node_neighbors): logging.debug( " Testing neighbor #{:d}/{:d} ({}) of node #{:d} ({})".format( inode_neighbor, len(node_neighbors), node_neighbor, inode, node, ) ) already_inside = False ddeltas = [] for n1, n2, key, edata in node_edges: if (n1 == node and n2 == node_neighbor) or (n2 == node and n1 == node_neighbor): if edata["delta"] == (0, 0, 0): already_inside = True thisdelta = edata["delta"] else: if edata["start"] == node.isite and edata["end"] != node.isite: thisdelta = edata["delta"] elif edata["end"] == node.isite: thisdelta = tuple([-dd for dd in edata["delta"]]) else: raise ValueError("Should not be here ...") ddeltas.append(thisdelta) logging.debug( " ddeltas : {}".format( ", ".join(["({})".format(", ".join(str(ddd) for ddd in dd)) for dd in ddeltas]) ) ) if ddeltas.count((0, 0, 0)) > 1: raise ValueError("Should not have more than one 000 delta ...") if already_inside: logging.debug(" Edge inside the cell ... continuing to next neighbor") continue logging.debug(" Edge outside the cell ... getting neighbor back inside") if (0, 0, 0) in ddeltas: ddeltas.remove((0, 0, 0)) myddelta = np.array(ddeltas[0], np.int_) node_neighbor_edges = centered_connected_subgraph.edges( nbunch=[node_neighbor], data=True, keys=True ) logging.debug( " Delta image from node {} to neighbor {} : " "{}".format( str(node), str(node_neighbor), "({})".format(", ".join([str(iii) for iii in myddelta])), ) ) # Loop on the edges of this neighbor for n1, n2, key, edata in node_neighbor_edges: if (n1 == node_neighbor and n2 != node_neighbor) or ( n2 == node_neighbor and n1 != node_neighbor ): if edata["start"] == node_neighbor.isite and edata["end"] != node_neighbor.isite: centered_connected_subgraph[n1][n2][key]["delta"] = tuple( np.array(edata["delta"], np.int_) + myddelta ) elif edata["end"] == node_neighbor.isite: centered_connected_subgraph[n1][n2][key]["delta"] = tuple( np.array(edata["delta"], np.int_) - myddelta ) else: raise ValueError("DUHH") logging.debug( " {} to node {} now has delta " "{}".format( str(n1), str(n2), str(centered_connected_subgraph[n1][n2][key]["delta"]), ) ) new_current_nodes.extend(node_neighbors) nodes_traversed.extend(node_neighbors) current_nodes = new_current_nodes if not current_nodes: break # Check if the graph is indeed connected if "periodic" edges (i.e. whose "delta" is not 0, 0, 0) are removed check_centered_connected_subgraph = nx.MultiGraph() check_centered_connected_subgraph.add_nodes_from(centered_connected_subgraph.nodes()) check_centered_connected_subgraph.add_edges_from( [e for e in centered_connected_subgraph.edges(data=True) if np.allclose(e[2]["delta"], np.zeros(3))] ) if not is_connected(check_centered_connected_subgraph): raise RuntimeError("Could not find a centered graph.") return centered_connected_subgraph
#print("Creating Pairs of Graphs") start_time = time.time() good = False while not good: # Generate first G using_sequence = False #sequence = [2, 2, 2, 2, 6, 4, 4, 4, 4] # Set sequence #G=nx.configuration_model(sequence) G = nx.erdos_renyi_graph(size, 0.02) #G=nx.watts_strogatz_graph(10,3,0.3) #G=nx.barabasi_albert_graph(10,2) G = nx.Graph(G) G.remove_edges_from(G.selfloop_edges()) if not is_connected(G): # print("Bad: G disconnected") continue good = True # G_prime = make_graph_with_same_degree_dist(G) G_prime = permute_labels_only(G) print("Graph of size %s created in %s seconds." % (size, time.time() - start_time)) start_time = time.time() numbers = get_values(G, G_prime) end_time = time.time() print("%s, %s" % (size, end_time - start_time)) for i in range(0, size): max_cost = numbers[i * size] min_cost = max_cost
def wiener_index(G, weight=None): """Returns the Wiener index of the given graph. The *Wiener index* of a graph is the sum of the shortest-path distances between each pair of reachable nodes. For pairs of nodes in undirected graphs, only one orientation of the pair is counted. Parameters ---------- G : NetworkX graph weight : object The edge attribute to use as distance when computing shortest-path distances. This is passed directly to the :func:`networkx.shortest_path_length` function. Returns ------- float The Wiener index of the graph `G`. Raises ------ NetworkXError If the graph `G` is not connected. Notes ----- If a pair of nodes is not reachable, the distance is assumed to be infinity. This means that for graphs that are not strongly-connected, this function returns ``inf``. The Wiener index is not usually defined for directed graphs, however this function uses the natural generalization of the Wiener index to directed graphs. Examples -------- The Wiener index of the (unweighted) complete graph on *n* nodes equals the number of pairs of the *n* nodes, since each pair of nodes is at distance one:: >>> import networkx as nx >>> n = 10 >>> G = nx.complete_graph(n) >>> nx.wiener_index(G) == n * (n - 1) / 2 True Graphs that are not strongly-connected have infinite Wiener index:: >>> G = nx.empty_graph(2) >>> nx.wiener_index(G) inf """ is_directed = G.is_directed() if (is_directed and not is_strongly_connected(G)) or \ (not is_directed and not is_connected(G)): return float('inf') total = sum(chaini(p.values() for v, p in spl(G, weight=weight))) # Need to account for double counting pairs of nodes in undirected graphs. return total if is_directed else total / 2
def asyn_fluidcWeight(G, k, max_iter=100, seed=None): """Returns communities in `G` as detected by Fluid Communities algorithm. The asynchronous fluid communities algorithm is described in [1]_. The algorithm is based on the simple idea of fluids interacting in an environment, expanding and pushing each other. Its initialization is random, so found communities may vary on different executions. The algorithm proceeds as follows. First each of the initial k communities is initialized in a random vertex in the graph. Then the algorithm iterates over all vertices in a random order, updating the community of each vertex based on its own community and the communities of its neighbours. This process is performed several times until convergence. At all times, each community has a total density of 1, which is equally distributed among the vertices it contains. If a vertex changes of community, vertex densities of affected communities are adjusted immediately. When a complete iteration over all vertices is done, such that no vertex changes the community it belongs to, the algorithm has converged and returns. This is a modified version of the algorithm described in [1]_. This version uses the density aggregate multiplied by the edge weights to determin community Parameters ---------- G : Graph k : integer The number of communities to be found. max_iter : integer The number of maximum iterations allowed. By default 100. seed : integer, random_state, or None (default) Indicator of random number generation state. See :ref:`Randomness<randomness>`. Returns ------- communities : iterable Iterable of communities given as sets of nodes. Notes ----- k variable is not an optional argument. References ---------- .. [1] Parés F., Garcia-Gasulla D. et al. "Fluid Communities: A Competitive and Highly Scalable Community Detection Algorithm". [https://arxiv.org/pdf/1703.09307.pdf]. """ # Initial checks if not isinstance(k, int): raise NetworkXError("k must be an integer.") if not k > 0: raise NetworkXError("k must be greater than 0.") if not is_connected(G): raise NetworkXError("Fluid Communities require connected Graphs.") if len(G) < k: raise NetworkXError("k cannot be bigger than the number of nodes.") # Initialization max_density = 1.0 vertices = list(G) seed.shuffle(vertices) communities = {n: i for i, n in enumerate(vertices[:k])} density = {} com_to_numvertices = {} for vertex in communities.keys(): com_to_numvertices[communities[vertex]] = 1 G.nodes[vertex]["density"] = 1 density[communities[vertex]] = max_density # Set up control variables and start iterating iter_count = 0 cont = True while cont: cont = False iter_count += 1 # Loop over all vertices in graph in a random order vertices = list(G) seed.shuffle(vertices) for vertex in vertices: # Updating rule com_counter = Counter() weight_counter = Counter() # Take into account self vertex community try: com_counter.update( {communities[vertex]: density[communities[vertex]]}) except KeyError: pass # Gather neighbour vertex communities for v in G[vertex]: try: com_counter.update( {communities[v]: density[communities[v]]}) weight_counter.update( {communities[v]: G.edges[v, vertex]['weight']}) except KeyError: continue combined = {} for k, v in com_counter.items(): if weight_counter.get(k) is not None: temp = v * weight_counter.get(k) combined.update({k: temp}) # Check which is the community with highest density new_com = -1 if len(com_counter.keys()) > 0: if combined: max_combined = max(combined.values()) best_communities = [ com for com, freq in com_counter.items() if (max_combined == combined.get(com)) ] # If actual vertex com in best communities, it is preserved try: if communities[vertex] in best_communities: new_com = communities[vertex] except KeyError: pass # If vertex community changes... if new_com == -1: # Set flag of non-convergence cont = True # Randomly chose a new community from candidates if best_communities: new_com = seed.choice(best_communities) # Update previous community status try: if com_to_numvertices[communities[vertex]] > 1: com_to_numvertices[communities[vertex]] -= 1 density[communities[vertex]] = ( max_density / com_to_numvertices[communities[vertex]]) except KeyError: pass # Update new community status communities[vertex] = new_com com_to_numvertices[communities[vertex]] += 1 density[communities[vertex]] = ( max_density / com_to_numvertices[communities[vertex]]) # If maximum iterations reached --> output actual results if iter_count > max_iter: break # Return results by grouping communities as list of vertices return iter(groups(communities).values()) # if __name__ == '__main__': # name3 = "/home/james/4F90/sg_infectious_graphs/weightededgesX_2009_05_06.out" # name = "/home/james/4F90/sg_infectious_graphs/weightededgesX_2009_07_15.out" # fh2 = open(name, "rb") # fh3 = open(name3, "rb") # my_graph2 = nx.read_weighted_edgelist(fh3) # testg = nx.read_weighted_edgelist(fh3) # fh2.close() # graphs = (my_graph2.subgraph(c) for c in nx.connected_components(my_graph2)) # graphs = list(graphs) # community = asyn_fluidcWeight(my_graph2, 10, seed=1) # fluid = nx.algorithms.community.asyn_fluidc(my_graph2, 13, seed=10) # defaultFLuid = algorithms.async_fluid(my_graph2, 13) # louvain = algorithms.louvain(my_graph2, weight='weight') # com1 = [] # com2 = [] # coms1 = [list(x) for x in community] # fluid2 = [list(x) for x in fluid] # coms2 = cdlib.NodeClustering(coms1, my_graph2, "FluidWeight") # fluid3 = cdlib.NodeClustering(fluid2, my_graph2, "FluidWeight") # pos = nx.spring_layout(testg, weight='weight',seed=5) # pos = nx.nx_pydot.graphviz_layout(testg) # wcom = asyn_fluidcWeight(testg, 10, seed=3) # wcoms = [list(x) for x in wcom] # wcoms2 = cdlib.NodeClustering(wcoms, testg, "FluidWeight") # print(evaluation.newman_girvan_modularity(testg, wcoms2).score) # labels = nx.get_edge_attributes(testg, 'weight') # viz.plot_network_clusters(testg, wcoms2, pos,figsize=(20,20),node_size=600,cmap='gist_rainbow', plot_labels=False) # nx.draw_networkx_edge_labels(testg,pos, edge_labels=labels, font_size=6) # nx.draw_networkx_labels(testg, pos, font_size=8) # plt.savefig("Algo2_10com.png") # plt.show() # plt.close() # resolut = {} # resolut["5"] = 4 # resolut["7"] = 2.5 # resolut["10"] = 1.41 # resolut["13"] = 1 # resolut["15"] = 0.9 # resolut["17"] = 0.72 # resolut["20"] = 0.6 # resolutions = [4,2.5,1.41,1,0.9,0.72,0.6] # louvain = algorithms.louvain(my_graph2, weight='weight', resolution=1) # count = 0 # for i in fluid3.communities: # count = count +1 # print(count) # count = 0 # for i in louvain.communities: # count = count +1 # print(count) # with open('algo2fluidcontrol20comm.txt', 'w') as f: # count = 0 # s = 0 # scores = [] # while count <30: # try: # print("seed: "+ str(s)) # print("seed: "+ str(s),file=f) # wcom = asyn_fluidcWeight(my_graph2, 20, seed=s) # wcoms = [list(x) for x in wcom] # wcoms2 = cdlib.NodeClustering(wcoms, my_graph2, "FluidWeight") # fluid = nx.algorithms.community.asyn_fluidc(my_graph2, 20, seed=s) # fluid2 = [list(x) for x in fluid] # fluid3 = cdlib.NodeClustering(fluid2, my_graph2, "FluidWeight") # print("weightedfluid") # print(wcoms2.communities) # print("Benchmark Fluid") # print(fluid3.communities) # print("weightedfluid", file=f) # print(wcoms2.communities, file=f) # print("Benchmark Fluid", file=f) # print(fluid3.communities,file=f) # scores.append(evaluation.adjusted_rand_index(wcoms2, fluid3).score) # print(evaluation.adjusted_rand_index(wcoms2, fluid3), file=f) # count+=1 # s+=1 # except: # # print("Something went wrong with seed: "+ str(s)) # # print("Something went wrong with seed: "+ str(s),file=f) # s+=1 # print("Adjusted rand indexes") # print("Adjusted rand indexes", file=f) # print(scores) # print(scores, file=f) # print("Mean") # print("Mean", file=f) # print(numpy.mean(scores)) # print(numpy.mean(scores),file=f) # print("Standard deviation") # print("Standard deviation", file=f) # print(numpy.std(scores)) # print(numpy.std(scores), file=f) # with open('algo2louvainnorand20comm.txt', 'w') as f: # count = 0 # s = 0 # scores = [] # while count <30: # try: # print("seed: "+ str(s)) # print("seed: "+ str(s),file=f) # wcom = asyn_fluidcWeight(my_graph2, 20, seed=s) # wcoms = [list(x) for x in wcom] # wcoms2 = cdlib.NodeClustering(wcoms, my_graph2, "FluidWeight") # louvain = algorithms.louvain(my_graph2, weight='weight', resolution=0.4) # print("weightedfluid") # print(wcoms2.communities) # print("Benchmark Fluid") # print(louvain.communities) # print("weightedfluid", file=f) # print(wcoms2.communities, file=f) # print("Benchmark Fluid", file=f) # print(louvain.communities,file=f) # print(evaluation.adjusted_rand_index(wcoms2, louvain)) # print(evaluation.adjusted_rand_index(wcoms2, louvain),file=f) # scores.append(evaluation.adjusted_rand_index(wcoms2, louvain).score) # count+=1 # s+=1 # except: # # print("Something went wrong with seed: "+ str(s)) # # print("Something went wrong with seed: "+ str(s),file=f) # s+=1 # print("Adjusted rand indexes") # print("Adjusted rand indexes", file=f) # print(scores) # print(scores, file=f) # print("Mean") # print("Mean", file=f) # print(numpy.mean(scores)) # print(numpy.mean(scores),file=f) # print("Standard deviation") # print("Standard deviation", file=f) # print(numpy.std(scores)) # print(numpy.std(scores), file=f) # with open('algo2louvainrand20comm.txt', 'w') as f: # count = 0 # s = 0 # scores = [] # while count <30: # try: # print("seed: "+ str(s)) # print("seed: "+ str(s),file=f) # wcom = asyn_fluidcWeight(my_graph2, 20, seed=s) # wcoms = [list(x) for x in wcom] # wcoms2 = cdlib.NodeClustering(wcoms, my_graph2, "FluidWeight") # #Adjust resolution to get community size [4,2.5,1.41,1,0.9,0.72,0.6] -> [5,7,10,13,15,17,20] # louvain = algorithms.louvain(my_graph2, weight='weight',randomize=1, resolution=0.4) # print("weightedfluid") # print(wcoms2.communities) # print("Benchmark Fluid") # print(louvain.communities) # print("weightedfluid", file=f) # print(wcoms2.communities, file=f) # print("Benchmark Fluid", file=f) # print(louvain.communities,file=f) # print(evaluation.adjusted_rand_index(wcoms2, louvain)) # print(evaluation.adjusted_rand_index(wcoms2, louvain),file=f) # scores.append(evaluation.adjusted_rand_index(wcoms2, louvain).score) # count+=1 # s+=1 # except: # # print("Something went wrong with seed: "+ str(s)) # # print("Something went wrong with seed: "+ str(s),file=f) # s+=1 # print("Adjusted rand indexes") # print("Adjusted rand indexes", file=f) # print(scores) # print(scores, file=f) # print("Mean") # print("Mean", file=f) # print(numpy.mean(scores)) # print(numpy.mean(scores),file=f) # print("Standard deviation") # print("Standard deviation", file=f) # print(numpy.std(scores)) # print(numpy.std(scores), file=f) # name2 = "/content/drive/MyDrive/4F90/sg_infectious_graphs/sg_infectious_graphs/weightededgesX_2009_06_02.out" # name = "/content/drive/MyDrive/4F90/sg_infectious_graphs/sg_infectious_graphs/weightededgesX_2009_07_15.out" # fh2 = open(name, "rb") # my_graph2 = nx.read_weighted_edgelist(fh2) # fh2.close() # graphs = (my_graph2.subgraph(c) for c in nx.connected_components(my_graph2)) # graphs = list(graphs) # com1 = [] # com2 = [] # with open('algo2fluidcontrol.txt', 'w') as f: # count = 0 # s = 0 # while count <30: # try: # wcom = asyn_fluidcWeight(my_graph2, 10, seed=s) # print("seed: "+ str(s)) # print("seed: "+ str(s),file=f) # wcoms = [list(x) for x in wcom] # wcoms2 = cdlib.NodeClustering(wcoms, my_graph2, "FluidWeight") # fluid = nx.algorithms.community.asyn_fluidc(my_graph2, 10, seed=s) # fluid2 = [list(x) for x in fluid] # fluid3 = cdlib.NodeClustering(fluid2, my_graph2, "FluidWeight") # print(evaluation.adjusted_rand_index(wcoms2, fluid3)) # print(evaluation.adjusted_rand_index(wcoms2, fluid3), file=f) # count+=1 # s+=1 # except: # # print("Something went wrong with seed: "+ str(s)) # # print("Something went wrong with seed: "+ str(s),file=f) # s+=1 # with open('algo2louvainnorand.txt', 'w') as f: # count = 0 # while count <30: # try: # wcom = asyn_fluidcWeight(my_graph2, 10, seed=s) # print("seed: "+ str(s)) # print("seed: "+ str(s),file=f) # wcoms = [list(x) for x in wcom] # wcoms2 = cdlib.NodeClustering(wcoms, my_graph2, "FluidWeight") # louvain = algorithms.louvain(my_graph2, weight='weight') # print(evaluation.adjusted_rand_index(wcoms2, louvain)) # print(evaluation.adjusted_rand_index(wcoms2, louvain),file=f) # count+=1 # s+=1 # except: # # print("Something went wrong with seed: "+ str(s)) # # print("Something went wrong with seed: "+ str(s),file=f) # s+=1 # with open('algo2louvainrand.txt', 'w') as f: # count = 0 # while count <30: # try: # wcom = asyn_fluidcWeight(my_graph2, 10, seed=s) # print("seed: "+ str(s)) # print("seed: "+ str(s),file=f) # wcoms = [list(x) for x in wcom] # wcoms2 = cdlib.NodeClustering(wcoms, my_graph2, "FluidWeight") # louvain = algorithms.louvain(my_graph2, weight='weight',randomize=1) # print(evaluation.adjusted_rand_index(wcoms2, louvain)) # print(evaluation.adjusted_rand_index(wcoms2, louvain),file=f) # count+=1 # s+=1 # except: # # print("Something went wrong with seed: "+ str(s)) # # print("Something went wrong with seed: "+ str(s),file=f) # s+=1 # for s in range(1, 30): # print("seed: "+ str(s)) # try: # wcom = asyn_fluidcWeight(my_graph2, 10, seed=s) # wcoms = [list(x) for x in wcom] # wcoms2 = cdlib.NodeClustering(wcoms, my_graph2, "FluidWeight") # fluid = nx.algorithms.community.asyn_fluidc(my_graph2, 10, seed=s) # fluid2 = [list(x) for x in fluid] # fluid3 = cdlib.NodeClustering(fluid2, my_graph2, "FluidWeight") # print(evaluation.adjusted_rand_index(wcoms2, fluid3)) # except: # print("Something went wrong with seed: "+ str(s))
print(type(data)) F = nx.Graph() F.add_nodes_from(data) print("I nodi sono: ", number_of_nodes(F)) # for i in range(20): # print(F.nodes[i]) # Per leggere un edgelist with open("/home/utente/Scaricati/Tesi/edgelist_3", "rb") as fp: edgelist = pickle.load(fp) print("ho caricato il grafo") # F = nx.Graph() F.add_edges_from(edgelist) print("I nodi sono: ", number_of_nodes(F)) print("Gli archi sono: ", number_of_edges(F)) if not is_connected(F): print("Proviamoci...") sys.exit() """ # Per mostrare e salvare il grafo in ingresso sotto forma di png nx.draw(test_graph, with_labels=True) plt.savefig("mygraph.png") plt.clf() """ """ # Questioni di debug sul grafo in ingresso print("Prima del clustering il grafo ha archi:\n", test_graph.edges) print('Il grafo di test ha: ', test_graph.number_of_edges(), ' archi') print('Il grafo di test ha: ', test_graph.number_of_nodes(), ' nodi') print('Il grafo di test ha: ', number_of_selfloops(test_graph), ' self-edges')