def nodes_centrality_page_rank(): PRankH = snap.TIntFltH() snap.GetPageRank(G, PRankH) sorted_PRankH = sorted(PRankH, key=lambda key: PRankH[key], reverse=True) # print top n nodes with highest PageRank for item in sorted_PRankH[0:5]: #top 5 print(item, PRankH[item])
def get_top_packages(graph_path, n): graph_abs_path = os.path.abspath(graph_path) graph_name = os.path.basename(graph_abs_path).replace(".graph", "") fin = snap.TFIn(graph_abs_path) graph = snap.TNEANet.Load(fin) # rebuild the id => pkg dictionary id_pkg_dict = {} for node in graph.Nodes(): id_pkg_dict[node.GetId()] = graph.GetStrAttrDatN(node.GetId(), "pkg") directory = os.path.dirname(os.path.abspath(graph_path)) # snap.py doesn't suport absolute paths for some operations. Let's cd to the directory os.chdir(directory) # print("{0} Computing top {0} nodes with highest pagerank".format(n, datetime.datetime.now())) data_file = graph_name + "_pageranks" prank_hashtable = snap.TIntFltH() if not os.path.isfile(data_file): # Damping Factor: 0.85, Convergence difference: 1e-4, MaxIter: 100 snap.GetPageRank(graph, prank_hashtable, 0.85) fout = snap.TFOut(data_file) prank_hashtable.Save(fout) else: fin = snap.TFIn(data_file) prank_hashtable.Load(fin) top_n = get_top_nodes_from_hashtable(prank_hashtable, n) top_n.sort(key=itemgetter(1)) top_packages = [] for pair in top_n: top_packages.append(id_pkg_dict[pair[0]]) return top_packages
def compute_page_rank(graph): logging.info("compute pagerank") PRankH = snap.TIntFltH() snap.GetPageRank(graph, PRankH) for item in PRankH: print item, PRankH[item] return PRankH
def pageRank(rankCommands, Graph, conn, cur): PRankH = snap.TIntFltH() before_time = time.time() snap.GetPageRank(Graph, PRankH) print "Total handling time is: ", (time.time() - before_time) slist = sorted(PRankH, key=lambda key: PRankH[key], reverse=True) createTable(rankCommands, slist, PRankH, conn, cur)
def PageRank(d, e): f = open(d) s = f.read() s1 = re.split('\n', s) G1 = snap.PNGraph.New() PRankH = snap.TIntFltH() a = re.split(' ', s1[0]) for i in range(0, int(a[0])): G1.AddNode(i) for i in range(1, int(a[1]) + 1): b = re.split(' ', s1[i]) b0 = re.sub("\D", "", b[0]) b1 = re.sub("\D", "", b[1]) G1.AddEdge(int(b0), int(b1)) snap.GetPageRank(G1, PRankH) EdgePara = dict() for i in range(1, int(a[1]) + 1): c = re.split(' ', s1[i]) if PRankH[int(c[0])] == 0 and PRankH[int(c[1])] == 0: EdgePara[(int(c[0]), int(c[1]))] == 0 EdgePara[(int(c[1]), int(c[0]))] == 0 else: EdgePara[(int(c[0]), int(c[1]))] = e * PRankH[int( c[0])] / (PRankH[int(c[0])] + PRankH[int(c[1])]) EdgePara[(int(c[1]), int(c[0]))] = e * PRankH[int( c[1])] / (PRankH[int(c[0])] + PRankH[int(c[1])]) return EdgePara
def rank_pagerank(self , C=0.85, Eps=1e-4, MaxIter=100): """ Return dictionary of node ID and its pagerank centrality score, in score order """ PRankH = snap.TIntFltH() snap.GetPageRank(self._graph, PRankH, C, Eps, MaxIter) assert len(PRankH) == self._num_nodes, 'Number of nodes in centrality result must match number of nodes in graph' return snap_hash_to_dict(PRankH)
def get_page_rank_sum(G, n1, n2, reset=False): global PRankH if reset or PRankH is None: print 'Initializing Page Rank' PRankH = snap.TIntFltH() snap.GetPageRank(G, PRankH, 1e-2, 50) return PRankH[n1] + PRankH[n2]
def CalculatePageRank(graph, alpha, number_iteration): PRankH = snap.TIntFltH() snap.GetPageRank(graph, PRankH, alpha, 1e-4, number_iteration) output = {} for item in PRankH: output[item] = PRankH[item] return output
def print_top_pr(G, K, iid_to_ingredient, Reverse=True): print 'PageRank:' PRankH = snap.TIntFltH() snap.GetPageRank(G, PRankH) PageRank = sorted([(PRankH[item], item) for item in PRankH], reverse=Reverse) for Rank, IId in PageRank[:K]: print '{}: {:.5f}'.format(iid_to_ingredient[IId], Rank)
def calc_PageRank(Graph, node_to_g): prot_to_PageRank = {} PRankH = snap.TIntFltH() snap.GetPageRank(Graph, PRankH) for node in PRankH: my_prot = node_to_g[node] prot_to_PageRank[my_prot] = PRankH[node] return prot_to_PageRank
def get_page_ranks(df, G, names_df): PRankH = snap.TIntFltH() snap.GetPageRank(G, PRankH) ranks = [] for item in PRankH: name = names_df[names_df["id"] == item]["name"].values[0] ranks.append((name, PRankH[item])) return sorted(ranks, key=lambda x: x[1], reverse=True)
def quick_properties(graph, name, dic_path): """Get quick properties of the graph "name". dic_path is the path of the dict {players: id} """ n_edges = graph.GetEdges() n_nodes = graph.GetNodes() print("##########") print("Quick overview of {} Network".format(name)) print("##########") print("{} Nodes, {} Edges").format(n_nodes, n_edges) print("{} Self-edges ".format(snap.CntSelfEdges(graph))) print("{} Directed edges, {} Undirected edges".format( snap.CntUniqDirEdges(graph), snap.CntUniqUndirEdges(graph))) print("{} Reciprocated edges".format(snap.CntUniqBiDirEdges(graph))) print("{} 0-out-degree nodes, {} 0-in-degree nodes".format( snap.CntOutDegNodes(graph, 0), snap.CntInDegNodes(graph, 0))) node_in = graph.GetNI(snap.GetMxInDegNId(graph)) node_out = graph.GetNI(snap.GetMxOutDegNId(graph)) print("Maximum node in-degree: {}, maximum node out-degree: {}".format( node_in.GetDeg(), node_out.GetDeg())) print("###") components = snap.TCnComV() snap.GetWccs(graph, components) max_wcc = snap.GetMxWcc(graph) print "{} Weakly connected components".format(components.Len()) print "Largest Wcc: {} Nodes, {} Edges".format(max_wcc.GetNodes(), max_wcc.GetEdges()) prankH = snap.TIntFltH() snap.GetPageRank(graph, prankH) sorted_prankH = sorted(prankH, key=lambda key: prankH[key], reverse=True) NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(graph, NIdHubH, NIdAuthH) sorted_NIdHubH = sorted(NIdHubH, key=lambda key: NIdHubH[key], reverse=True) sorted_NIdAuthH = sorted(NIdAuthH, key=lambda key: NIdAuthH[key], reverse=True) with open(dic_path, 'rb') as dic_id: mydict = pickle.load(dic_id) print("3 most central players by PageRank scores: {}, {}, {}".format( list(mydict.keys())[list(mydict.values()).index(sorted_prankH[0])], list(mydict.keys())[list(mydict.values()).index(sorted_prankH[1])], list(mydict.keys())[list(mydict.values()).index( sorted_prankH[2])])) print("Top 3 hubs: {}, {}, {}".format( list(mydict.keys())[list(mydict.values()).index( sorted_NIdHubH[0])], list(mydict.keys())[list(mydict.values()).index( sorted_NIdHubH[1])], list(mydict.keys())[list(mydict.values()).index( sorted_NIdHubH[2])])) print("Top 3 authorities: {}, {}, {}".format( list(mydict.keys())[list(mydict.values()).index( sorted_NIdAuthH[0])], list(mydict.keys())[list(mydict.values()).index( sorted_NIdAuthH[1])], list(mydict.keys())[list(mydict.values()).index( sorted_NIdAuthH[2])]))
def _get_pagerank(Graph, H, output_path): PRankH = snap.TIntFltH() snap.GetPageRank(Graph, PRankH) pr_list = list() for ID in PRankH: pr_list.append({'username': H.GetKey(ID), 'PR': PRankH[ID]}) dataset = pd.DataFrame(pr_list) dataset = dataset[['username', 'PR']] dataset.to_csv(output_path, index=False, encoding='utf-8')
def PageRank(G): centrality = {} PRank = snap.TIntFltH() snap.GetPageRank(G, PRank) for item in PRank: centrality[item] = PRank[item] return centrality
def compute_page_rank(self, graph, c: float = 0.85, eps: float = 10e-4, max_iter: int = 100): page_rank = snap.TIntFltH() snap.GetPageRank(graph, page_rank, c, eps, max_iter) return page_rank
def getNodeAttributes(self,UGraph): attriList=[] for index in range(UGraph.GetNodes()): nodelist=[] attriList.append(nodelist) #page rank PRankH = snap.TIntFltH() snap.GetPageRank(UGraph, PRankH) counter=0 for item in PRankH: attriList[counter].append(PRankH[item]) counter+=1 #HIN counter=0 NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(UGraph, NIdHubH, NIdAuthH) for item in NIdHubH: attriList[counter].append(NIdHubH[item]) attriList[counter].append(NIdAuthH[item]) counter+=1 # Betweenness Centrality counter=0 Nodes = snap.TIntFltH() Edges = snap.TIntPrFltH() snap.GetBetweennessCentr(UGraph, Nodes, Edges, 1.0) for node in Nodes: attriList[counter].append(Nodes[node]) counter+=1 # closeness centrality counter=0 for NI in UGraph.Nodes(): CloseCentr = snap.GetClosenessCentr(UGraph, NI.GetId()) attriList[counter].append(CloseCentr) counter+=1 # farness centrality counter=0 for NI in UGraph.Nodes(): FarCentr = snap.GetFarnessCentr(UGraph, NI.GetId()) attriList[counter].append(FarCentr) counter+=1 # node eccentricity counter=0 for NI in UGraph.Nodes(): attriList[counter].append(snap.GetNodeEcc(UGraph, NI.GetId(), True)) counter+=1 atrriMarix=np.array(attriList) return atrriMarix
def getPageRank(Graph): nodeid_prank = {} prank = [] PRankH = snap.TIntFltH() snap.GetPageRank(Graph, PRankH) for node in PRankH: nodeid_prank[node] = PRankH[node] for node_id in sorted(nodeid_prank): prank.append(nodeid_prank[node_id]) return prank
def compute_pagerank(Graph): ''' :param Graph: the graph to compute pagerank on :return: a list of tuple (pagerank_score, node_id) in descending order ''' PRankH = snap.TIntFltH() snap.GetPageRank(Graph, PRankH) listPageRank = [] for item in PRankH: listPageRank.append((PRankH[item], item)) return sorted(listPageRank)[::-1]
def single_year_page_rank(df, id2names): years = sorted(df['year'].unique()) page_ranks = {} for year in years: G = snap.TNEANet.New() cur_ranks = {} add_df_to_G(df[df['year'] == year], G, directed=True) PRankH = snap.TIntFltH() snap.GetPageRank(G, PRankH) for id in PRankH: cur_ranks[id2names[id]] = PRankH[id] page_ranks[year] = cur_ranks return page_ranks
def getUndirAttribute(filename, node_num, weighted=None, param=1.0): UGraph = snap.LoadEdgeList(snap.PUNGraph, filename, 0, 1) attributeNames = [ 'Graph', 'Id', 'Degree', 'NodeBetweennessCentrality', 'PageRank', 'EgonetDegree', 'AvgNeighborDeg', 'EgonetConnectivity' ] if weighted: attributeNames += [ 'WeightedDegree', 'EgoWeightedDegree', 'AvgWeightedNeighborDeg', 'EgonetWeightedConnectivity' ] attributes = pd.DataFrame(np.zeros((node_num, len(attributeNames))), columns=attributeNames) attributes['Graph'] = [filename.split('/')[-1].split('.')[0] ] * node_num #node_num # Degree attributes['Id'] = range(0, node_num) degree = np.zeros((node_num, )) OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(UGraph, OutDegV) for item in OutDegV: degree[item.GetVal1()] = item.GetVal2() attributes['Degree'] = degree getEgoAttr(UGraph, node_num, attributes, directed=False) if weighted: df = getWeightedDegree(filename, node_num, attributes, directed=False) getWeightedEgoAttr(UGraph, node_num, attributes, df, directed=False) # Betweenness Centrality betCentr = np.zeros((node_num, )) Nodes = snap.TIntFltH() Edges = snap.TIntPrFltH() snap.GetBetweennessCentr(UGraph, Nodes, Edges, param) for node in Nodes: betCentr[node] = Nodes[node] attributes['NodeBetweennessCentrality'] = betCentr # PageRank pgRank = np.zeros((node_num, )) PRankH = snap.TIntFltH() snap.GetPageRank(UGraph, PRankH) for item in PRankH: pgRank[item] = PRankH[item] attributes['PageRank'] = pgRank return attributes
def GetOverlap(filePathName, Graph, t): # l is here the final ranking of the nodes # Intially, we just put all the nodes in this # and afterwards we sort it l = [i for i in range(Graph.GetNodes())] # The reference vector whose information is used to sort l ref_vect = [0 for i in range(Graph.GetNodes())] # if Type 1, then fill ref_vect with closeness centrality measure if (t == 1): for NI in Graph.Nodes(): ref_vect[NI.GetId()] = snap.GetClosenessCentr(Graph, NI.GetId()) # if Type 2, then fill ref_vect with betweenness centrality measure if (t == 2): Nodes = snap.TIntFltH() Edges = snap.TIntPrFltH() # Setting NodeFrac parameter as 0.8 as instructed snap.GetBetweennessCentr(Graph, Nodes, Edges, 0.8) for node in Nodes: ref_vect[node] = Nodes[node] # if Type 3, then fill ref_vect with PageRank scores if (t == 3): PRankH = snap.TIntFltH() # Taking the limit as 1e-6 as used in gen_centrality.py snap.GetPageRank(Graph, PRankH, 0.8, 1e-6, 100) for item in PRankH: ref_vect[item] = PRankH[item] # Now we sort l using the ref_vect l.sort( key=cmp_to_key(lambda item1, item2: ref_vect[item2] - ref_vect[item1])) # make a set containing top 100 nodes of l S1 = set(l[:100]) # make another set containing top 100 nodes from the text files S2 = set() f = open(filePathName, 'r') for _ in range(100): s = f.readline() a, b = s.split() S2.add(int(a)) # return the number of overlaps in S1 and S2 return len(S1.intersection(S2))
def pageRank(graph, userId): Nodes = snap.TIntFltH() snap.GetPageRank(graph, Nodes) df = pd.DataFrame(columns=('Node', 'PageRank')) for node in Nodes: #print 'node: %d pageRank: %f' % (node, Nodes[node]) df.loc[node] = [node, Nodes[node]] df.to_csv(write_DIR + 'pagerank_{}.csv'.format(userId), sep=',', index=False)
def _pageRankOverlap(elistPath, alpha=0.85): """ Compute overlap between our values of PageRank centrality and SNAP's internal implementation Parameters ---------- elistPath: str or pathlib.Path Edge list of the graph to compute centralities on alpha: float, default = 0.85 Damping factor for PageRank computations ---------- Returns ---------- calculatedNodes: set Top 100 nodes by PageRank centrality according to our implementation SNAPNodes: set Top 100 nodes by PageRank centrality according to the SNAP implementation (snap.GetPageRank) len(overlap): int Count of overlapping nodes between the 2 sets ---------- Reads from file our values of PageRank centrality and then calls the SNAP function Once we have 2 sets of top 100 nodes, perform a set.intersection() call for common elements between both sets """ adjGraph = AdjGraph(elistPath, separator=" ") graph = adjGraph.SNAPGraph calculatedNodes = readNodes("pagerank.txt") SNAPPR = {} PRankH = snap.TIntFltH() snap.GetPageRank(graph, PRankH, alpha) for item in PRankH: SNAPPR[item] = PRankH[item] SNAPPR = { k: v for k, v in sorted(SNAPPR.items(), key=lambda x: x[1], reverse=True) } SNAPNodes = list(SNAPPR.keys())[:100] SNAPNodes = set([int(node) for node in SNAPNodes]) overlap = SNAPNodes.intersection(calculatedNodes) return (calculatedNodes, SNAPNodes, len(overlap))
def page_rank(input): id_to_login = get_user_id_to_login() print("Loading graph...") FIn = snap.TFIn(input) graph = snap.TNGraph.Load(FIn) print("Calculating page rank...") PRankH = snap.TIntFltH() snap.GetPageRank(graph, PRankH) scores = sorted([(PRankH[item], item) for item in PRankH], reverse=True)[:100] for i, (score, id) in enumerate(scores): print(i + 1, "&", id_to_login[id], "&", score, "\\\\")
def run(snap_graph): page_rank = snap.TIntFltH() snap.GetPageRank(snap_graph, page_rank) node_page_ranks = [] node_page_rank_name_map = {} for node in page_rank: value = page_rank[node] if value not in node_page_ranks: node_page_ranks.append(value) node_page_rank_name_map[value] = step7.get_screen_name_from_hash(node) top_ten_page_ranks = heapq.nlargest(10, node_page_ranks) print "\nTop 10 page ranks from the merged graph =>" for top_node in top_ten_page_ranks: print node_page_rank_name_map[top_node], top_node
def computePageRank(cls, graph, args_paths): """ Calcolo del page rank sull'intero grafo. La funzione di page rank prende in input un hashtable vuota (int, float) e la riempie con i valori calcolati. Una volta calcolato il pagerank, lo salvo su file. :param cls :param graph: grafo su cui calcolare il pagerank :parma args_paths: dove salvare la table del page rank """ params = {'C': 0.85, 'Eps': 1e-4, 'MaxIter': 100} table_rank = snap.TIntFltH() snap.GetPageRank(graph, table_rank, *params.values()) snapSave(table_rank, args_paths.pagerank)
def page_rank_score(self, C=0.85, Eps=1e-4, MaxIter=100): ''' Computes the PageRank score of every node in Graph :param C: Damping factor. :param Eps: Convergence difference. :param MaxIter: Maximum number of iterations. ''' snap = self.snap ret = [] PRankH = snap.TIntFlt64H() snap.GetPageRank(self.graph, PRankH, C, Eps, MaxIter) for item in PRankH: ret.append((item, PRankH[item])) return ret
def BCF(x): n = G.GetNodes() v = [] for j in range(0, n): v.append(0) val = 100 for j in range(0, n): if G.IsEdge(j + 1, x) and (ind[j + 1] > 1): G.DelEdge(j + 1, x) PRankH2 = snap.TIntFltH() snap.GetPageRank(G, PRankH2) v[j] = PRankH2[x] val = min(val, v[j]) G.AddEdge(j + 1, x) else: v[j] = 100 for j in range(0, n): if abs(val - v[j]) < 1e-8: return j + 1
def extract_top_nodes(edges_big_file, edges_extracted_file, top_n): graph = snap.LoadEdgeList(snap.PUNGraph, edges_big_file , 0, 1, '\t') total_node = graph.GetNodes() num_remove = total_node - top_n # Get page rank to extract top n: PRankH = snap.TIntFltH() snap.GetPageRank(graph, PRankH) list_prank = [] for item in PRankH: list_prank.append([item, PRankH[item]]) list_prank = sorted(list_prank, key=lambda x: x[1]) # Sort by page rank remove_node = list_prank[: num_remove] for node in remove_node: graph.DelNode(node[0]) snap.SaveEdgeList(graph, edges_extracted_file)
def main(): network = snap.LoadEdgeList( snap.PNEANet, "/Users/qingyuan/CS224W/stackoverflow-Java.txt", 0, 1) Components = snap.TCnComV() snap.GetWccs(network, Components) print("The number of weakly connected components is %d" % Components.Len()) MxWcc = snap.GetMxWcc(network) print( "The number of edges is %d and the number of nodes is %d in the largest weakly connected component." % (MxWcc.GetNodes(), MxWcc.GetEdges())) PRankH = snap.TIntFltH() snap.GetPageRank(network, PRankH) PRankH.SortByDat(False) num = 0 print( "IDs of the top 3 most central nodes in the network by PagePank scores. " ) for item in PRankH: print(item, PRankH[item]) num += 1 if num == 3: num = 0 break NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(network, NIdHubH, NIdAuthH) NIdHubH.SortByDat(False) print("IDs of the top 3 hubs in the network by HITS scores. ") for item in NIdHubH: print(item, NIdHubH[item]) num += 1 if num == 3: num = 0 break NIdAuthH.SortByDat(False) print("IDs of top 3 authorities in the network by HITS scores. ") for item in NIdAuthH: print(item, NIdAuthH[item]) num += 1 if num == 3: num = 0 break