def graphAnalyzer(graph, kmeans=False): """Argument: the path to find a .gml graph file, boolean : if yes using scikit-learn KMean to cluster otherwise using our dbscan algorithm. Will page rank and cluster the nodes in order to return the highest page rank page in the three biggest clusters It also print a graph in order to visualize the clustering""" G = nx.read_gml(graph) G = removeIsolatedNodes(G) # removing meaningless nodes G.remove_node(list(G.nodes)[0]) # ----------------------------------- PageRank Computation -------------------------------------- # creating a PageRank object pr = PageRank.PageRank(G) pr.constructDispersionMatrix(G) pr = pr.getPageRank() # ----------------------------------- Clustering Computation -------------------------------------- # constructing network layout forceatlas2 = fa2.ForceAtlas2( # Behavior alternatives outboundAttractionDistribution=False, # Dissuade hubs linLogMode=False, # NOT IMPLEMENTED adjustSizes=False, # Prevent overlap (NOT IMPLEMENTED) edgeWeightInfluence=0, # Performance jitterTolerance=.01, # Tolerance barnesHutOptimize=True, barnesHutTheta=1.2, multiThreaded=False, # NOT IMPLEMENTED # Tuning scalingRatio=1, strongGravityMode=True, gravity=200, # Log verbose=True) pos = forceatlas2.forceatlas2_networkx_layout(G, pos=None, iterations=1000) if kmeans: # converting positions into a list of np.array pos_list = [np.array([elt[0], elt[1]]) for key, elt in pos.items()] # clustering the nodes according to the kmeans algorithm clusters = Kmeans.kmeans(pos_list, 8, 0.01, 300) else: pos = {key: np.array([elt[0], elt[1]]) for key, elt in pos.items()} pos_transf = dbscan.transf( pos) # changing position format to be able to use it in DBSCAN clusters = dbscan.dbscan(pos_transf, 40, 20) # clustering cluster_with_pr = associatingPageRankToNode(pr, clusters) # sorting each cluster according to page rank result for key, value in cluster_with_pr.items(): cluster_with_pr[key] = sorted(value, key=lambda item: (item[1], item[0])) # rendering the suggested pages and their page rank print("\nThe recommanded pages are the following :") for key, value in cluster_with_pr.items(): try: node_index = value[-1][0] # retrieving the node index title_node = re.search( r'titles=(.*?)\&', list(G.nodes()) [node_index]) # getting the title of the Wikipedia page print("•", title_node.group(1), "- with a page rank of ", value[-1][1]) except IndexError: pass # ----------------------------------- Graph Creation -------------------------------------- # each node within a cluster have the same color get_colors = lambda n: list( map(lambda i: "#" + "%06x" % random.randint(0, 0xFFFFFF), range(n))) colors = get_colors(len(clusters.keys()) + 1) node_color = ['black' for _ in range(len(G.nodes()))] for key, value in clusters.items(): for elt in value: node_color[elt] = colors[key] nx.draw(G.to_undirected(), pos, node_size=2, width=.05, edge_color='grey', node_color=node_color) plt.savefig("graph_with_layout.png")
def _compute_pos( adjacency_solid, layout=None, random_state=0, init_pos=None, adj_tree=None, root=0, layout_kwds=None, ): import networkx as nx np.random.seed(random_state) random.seed(random_state) nx_g_solid = nx.Graph(adjacency_solid) if layout is None: layout = "fr" if layout == "fa": try: import fa2 except Exception: logg.warn( "Package 'fa2' is not installed, falling back to layout 'fr'." "To use the faster and better ForceAtlas2 layout, " "install package 'fa2' (`pip install fa2`).") layout = "fr" if layout == "fa": init_coords = (np.random.random( (adjacency_solid.shape[0], 2)) if init_pos is None else init_pos.copy()) forceatlas2 = fa2.ForceAtlas2( outboundAttractionDistribution=False, linLogMode=False, adjustSizes=False, edgeWeightInfluence=1.0, jitterTolerance=1.0, barnesHutOptimize=True, barnesHutTheta=1.2, multiThreaded=False, scalingRatio=2.0, strongGravityMode=False, gravity=1.0, verbose=False, ) iterations = ( layout_kwds["maxiter"] if "maxiter" in layout_kwds else layout_kwds["iterations"] if "iterations" in layout_kwds else 500) pos_list = forceatlas2.forceatlas2(adjacency_solid, pos=init_coords, iterations=iterations) pos = {n: [p[0], -p[1]] for n, p in enumerate(pos_list)} elif layout == "eq_tree": nx_g_tree = nx.Graph(adj_tree) from scanpy.plotting._utils import hierarchy_pos pos = hierarchy_pos(nx_g_tree, root) if len(pos) < adjacency_solid.shape[0]: raise ValueError("This is a forest and not a single tree. " "Try another `layout`, e.g., {'fr'}.") else: # igraph layouts g = get_igraph_from_adjacency(adjacency_solid) if "rt" in layout: g_tree = get_igraph_from_adjacency(adj_tree) root = root if isinstance(root, list) else [root] pos_list = g_tree.layout(layout, root=root).coords elif layout == "circle": pos_list = g.layout(layout).coords else: if init_pos is None: init_coords = np.random.random( (adjacency_solid.shape[0], 2)).tolist() else: init_pos = init_pos.copy() init_pos[:, 1] *= -1 # to be checked init_coords = init_pos.tolist() try: layout_kwds.update({"seed": init_coords}) pos_list = g.layout(layout, weights="weight", **layout_kwds).coords except AttributeError: # hack for empty graphs... pos_list = g.layout(layout, **layout_kwds).coords pos = {n: [p[0], -p[1]] for n, p in enumerate(pos_list)} if len(pos) == 1: pos[0] = (0.5, 0.5) pos_array = np.array([pos[n] for count, n in enumerate(nx_g_solid)]) return pos_array
def fa2_pos(graph, gravity=40, scalingRatio=4, **kwargs): # Seed positions with quick FA2 fa = fa2.ForceAtlas2(gravity=gravity, scalingRatio=scalingRatio, **kwargs) pos = fa.forceatlas2_networkx_layout(graph, iterations=100) return pos