def pagerank(self): # from networkx.algorithms.link_analysis import pagerank_scipy # from networkx.algorithms.link_analysis import pagerank_numpy from networkx.algorithms.link_analysis import pagerank from time import time try: start = time() # pagerank(graph, max_iter=1000) # 1.7s for #nodes = 2500 pagerank(self.graph, max_iter=1000, nstart=self.page_ranked) # 0.2-1.5s for #node = 2500 # pagerank_scipy(graph) # 1.0s for #nodes = 2500 # pagerank_numpy(graph) # > 30s if #nodes > 1000 print "Pagerank took: %f seconds" % (time()-start) except ZeroDivisionError: print "ZeroDivisionError in pagerank" page_ranked_sorted = sorted(self.page_ranked.items(), key=lambda x: x[1], reverse=True) print page_ranked_sorted[:4] # from networkx.algorithms.centrality import * # start = time() # degree_centrality = degree_centrality(graph) # 0.003s for 1500 nodes # print "Degree centrality took: %f seconds" % (time()-start) # # start = time() # closeness_centrality = closeness_centrality(graph) # 4s for 1500 nodes # print "Closeness centrality took: %f seconds" % (time()-start) # # start = time() # betweenness_centrality = betweenness_centrality(graph) # 18s for 1500 nodes # print "Betweenness centrality took: %f seconds" % (time()-start) return self.page_ranked
def graph_2(): G = nx.nx.DiGraph() # G.add_nodes_from([2, 3, 5, 6, 7]) G.add_edges_from([[2, 3], [5, 3], [6, 7], [7, 2], [5, 7]]) # G.add_path([2,3,6,7]) # G.add_path([2,4,5]) # print(list(G.nodes())) print(list(G.edges())) print(list(G.out_degree())) print(list(G.in_degree())) print(centrality.in_degree_centrality(G)) print(link_analysis.pagerank(G, personalization={2: -4})) print(link_analysis.pagerank(G, dangling={5: 0, 7: 1}))
def rank_it(self): rank = la.pagerank(self.pg.graph, max_iter=self.simulations, alpha=self.p_restart) print "\t".join(["id","pagerank","freq-abs","freq-rel","centrality"]) for k, v in sorted(rank.items(), key=lambda q: q[1], reverse=True): print "%s:\t%s\t%s\t%s\t%s" % (k, v, self.pg.freq[k], self.pg.freq[k]/float(self.pg.total), self.pg.central[k])
def pagerank(self): # from networkx.algorithms.link_analysis import pagerank_scipy # from networkx.algorithms.link_analysis import pagerank_numpy from networkx.algorithms.link_analysis import pagerank from time import time try: start = time() # pagerank(graph, max_iter=1000) # 1.7s for #nodes = 2500 pagerank(self.graph, max_iter=1000, nstart=self.page_ranked) # 0.2-1.5s for #node = 2500 # pagerank_scipy(graph) # 1.0s for #nodes = 2500 # pagerank_numpy(graph) # > 30s if #nodes > 1000 print "Pagerank took: %f seconds" % (time() - start) except ZeroDivisionError: print "ZeroDivisionError in pagerank" page_ranked_sorted = sorted(self.page_ranked.items(), key=lambda x: x[1], reverse=True) print page_ranked_sorted[:4] # from networkx.algorithms.centrality import * # start = time() # degree_centrality = degree_centrality(graph) # 0.003s for 1500 nodes # print "Degree centrality took: %f seconds" % (time()-start) # # start = time() # closeness_centrality = closeness_centrality(graph) # 4s for 1500 nodes # print "Closeness centrality took: %f seconds" % (time()-start) # # start = time() # betweenness_centrality = betweenness_centrality(graph) # 18s for 1500 nodes # print "Betweenness centrality took: %f seconds" % (time()-start) return self.page_ranked
def compute_pagerank(self): self.kg = KG(self.train_facts, entity_num=len(self.entity_dict), relation_num=len(self.relation_dict)) graph = networkx.DiGraph(self.kg.to_networkx()) print("Begin to compute pagerank") self.pagerank = pagerank(graph) self.pagerank = [ self.pagerank[entity] for entity in range(len(self.pagerank)) ] print("Begin to save pagerank") with open(os.path.join(self.data_directory, "pagerank.txt"), "w") as output: for value in self.pagerank: output.write("{}\n".format(value)) print("Complete save pagerank")
def rank_it(self, out): f_out = open(out, "w") rank = la.pagerank(self.graph, personalization=self.personalize, max_iter=self.simulations, alpha=self.p_restart) size_of_rank = len(rank) f_out.write("\t".join(["id", "pagerank", "odds", "freq-abs", "freq-rel"]) + "\n") for k, v in sorted(rank.items(), key=lambda q: q[1], reverse=True): f_out.write("%s:\t%s\t%s\t%s\t%s\n" % (k, v, float(v) * float(size_of_rank), self.freq[k], self.freq[k]/float(self.total))) f_out.close()
def pagerank_worker(graph, page_ranked): print "Pagerank on graph with %d nodes and %d edges." \ % (len(graph.nodes()), \ len(graph.edges())) for node in graph.nodes(): page_ranked.setdefault(node, 1) from networkx.algorithms.link_analysis import pagerank from time import time try: start = time() page_ranked = pagerank(graph, max_iter=1000, nstart=page_ranked) # 0.2-1.5s for #node = 2500 print "Pagerank took: %f seconds" % (time()-start) except ZeroDivisionError: print "ZeroDivisionError in pagerank" page_ranked_sorted = sorted(page_ranked.items(), key=lambda x: x[1], reverse=True) print page_ranked_sorted[:4]
def pagerank_worker(graph, page_ranked): print "Pagerank on graph with %d nodes and %d edges." \ % (len(graph.nodes()), \ len(graph.edges())) for node in graph.nodes(): page_ranked.setdefault(node, 1) from networkx.algorithms.link_analysis import pagerank from time import time try: start = time() page_ranked = pagerank(graph, max_iter=1000, nstart=page_ranked) # 0.2-1.5s for #node = 2500 print "Pagerank took: %f seconds" % (time() - start) except ZeroDivisionError: print "ZeroDivisionError in pagerank" page_ranked_sorted = sorted(page_ranked.items(), key=lambda x: x[1], reverse=True) print page_ranked_sorted[:4]
with open(graph_dump_filename, "r") as dump: graph = cPickle.load(dump) return graph if __name__ == "__main__": G = get_graph() result = {"pagerank_ids": [], "harmonic_ids": []} logger.info('graph nodes %d' % G.number_of_nodes()) logger.info('harmonic') harmonic = hc(G) sorted_h = sorted(harmonic.items(), key=operator.itemgetter(1), reverse=True) result['harmonic_ids'] = map(lambda x: str(x[0]), sorted_h[0:200]) logger.info('pr') pr = pagerank(G, alpha=0.8507246376811566) sorted_pr = sorted(pr.items(), key=operator.itemgetter(1), reverse=True) result['pagerank_ids'] = map(lambda x: str(x[0]), sorted_pr[0:200]) with open('lab6centralities.json', 'w') as f: f.write(json.dumps(result)) f.close() logger.info('finish')