def test_google_matrix(self): G = self.G M = networkx.google_matrix(G, alpha=0.9) e, ev = numpy.linalg.eig(M.T) p = numpy.array(ev[:, 0] / ev[:, 0].sum())[:, 0] for (a, b) in zip(p, self.G.pagerank.values()): assert_almost_equal(a, b) personalize = dict((n, random.random()) for n in G) M = networkx.google_matrix(G, alpha=0.9, personalization=personalize) personalize.pop(1) assert_raises(networkx.NetworkXError, networkx.google_matrix, G, personalization=personalize)
def test_google_matrix(self): G = self.G M = networkx.google_matrix(G, alpha=0.9, nodelist=sorted(G)) e, ev = numpy.linalg.eig(M.T) p = numpy.array(ev[:, 0] / ev[:, 0].sum())[:, 0] for (a, b) in zip(p, self.G.pagerank.values()): assert_almost_equal(a, b)
def test_google_matrix(self): G = self.G M = nx.google_matrix(G, alpha=0.9, nodelist=sorted(G)) e, ev = np.linalg.eig(M.T) p = np.array(ev[:, 0] / ev[:, 0].sum())[:, 0] for (a, b) in zip(p, self.G.pagerank.values()): assert a == pytest.approx(b, abs=1e-7)
def LRW_Index (matrix, graph, n): P = nx.google_matrix(graph) trans = np.transpose(P) pi = [] vector = np.zeros(nodes_num) vector[0] = 1 new_v = copy.copy(vector) pi.append(new_v) for i in range(1, nodes_num): vector[i-1] = 0 vector[i] = 1 new_v = copy.copy(vector) pi.append(new_v) for i in range(0, n): for x in range(0, nodes_num): pi[x] = pi[x].dot(trans) s = (nodes_num, nodes_num) S_LRW = np.zeros(s) degree_matrix = nx.laplacian_matrix(graph) + matrix for x in range(0, nodes_num): q_x = degree_matrix[x, x] / len(graph.edges) for y in range(0, nodes_num): q_y = degree_matrix[y, y] / len(graph.edges) S_LRW[x, y] = q_x * pi[x].tolist()[0][y] +q_y * pi[y].tolist()[0][x] return S_LRW
def RWR_Index (graph, nodes_num, c): # google matrix == transition matrix z papera P = nx.google_matrix(graph) idn = np.identity(nodes_num) trans = np.transpose(P) core = (1 - c) * LAD.inv((idn - c * trans)) vector = np.zeros(nodes_num) qs = [] vector[0] = 1 new_v = copy.copy(vector) # vector <==> "baza" wektora qs.append(core.dot(new_v)) for i in range(1, nodes_num): vector[i - 1] = 0 vector[i] = 1 new_v = copy.copy(vector) qs.append(core.dot(new_v)) s = (nodes_num, nodes_num) S_RWR = np.zeros(s) for x in range(0, nodes_num): for y in range(0, nodes_num): S_RWR[x, y] = qs[x].tolist()[0][y] + qs[y].tolist()[0][x] return S_RWR
def page_rank(data): import pandas as pd import numpy as np import networkx as nx df = pd.DataFrame(data) graph = nx.DiGraph() h = nx.path_graph(pd.Series.count(np.unique(df[0]))) graph.add_nodes_from(h) graph.add_node(h) for i in range(len(df[0])): graph.add_edge(df[0][i], df[1][i]) graph[df[0][i]][df[1][i]]['weight'] = df[3][i] transition_matrix = nx.google_matrix(graph) n = min(transition_matrix.shape) p0 = np.repeat(1 / n, n) pi = np.matmul(p0, transition_matrix) eps = 0.0000025 i = 1 while np.sum(np.abs(pi - p0)) >= eps: p0 = pi pi = np.matmul(pi, transition_matrix) print(i) print(pi) i = i + 1 print('The final rank is :', pi)
def pagerank_numpy(G, alpha=0.85, personalization=None, weight='weight', dangling=None): """Return the PageRank of the nodes in the graph. """ if len(G) == 0: return {} M = nx.google_matrix(G, alpha, personalization=personalization, weight=weight, dangling=dangling) # use numpy LAPACK solver eigenvalues, eigenvectors = np.linalg.eig(M.T) ind = eigenvalues.argsort() # eigenvector of largest eigenvalue at ind[-1], normalized largest = np.array(eigenvectors[:, ind[-1]]).flatten().real norm = float(largest.sum()) return dict(zip(G, map(float, largest / norm)))
def test_empty(self): try: import numpy except ImportError: raise SkipTest('numpy not available.') G = networkx.Graph() assert_equal(networkx.pagerank(G), {}) assert_equal(networkx.pagerank_numpy(G), {}) assert_equal(networkx.google_matrix(G).shape, (0, 0))
def test_empty(self): try: import numpy except ImportError: raise SkipTest("numpy not available.") G = networkx.Graph() assert_equal(networkx.pagerank(G), {}) assert_equal(networkx.pagerank_numpy(G), {}) assert_equal(networkx.google_matrix(G).shape, (0, 0))
def test_dangling_matrix(self): """ Tests that the google_matrix doesn't change except for the dangling nodes. """ G = self.G dangling = self.dangling_edges dangling_sum = float(sum(dangling.values())) M1 = nx.google_matrix(G, personalization=dangling) M2 = nx.google_matrix(G, personalization=dangling, dangling=dangling) for i in range(len(G)): for j in range(len(G)): if i == self.dangling_node_index and (j + 1) in dangling: assert almost_equal(M2[i, j], dangling[j + 1] / dangling_sum, places=4) else: assert almost_equal(M2[i, j], M1[i, j], places=4)
def test_dangling_matrix(self): """ Tests that the google_matrix doesn't change except for the dangling nodes. """ G = self.G dangling = self.dangling_edges dangling_sum = float(sum(dangling.values())) M1 = networkx.google_matrix(G, personalization=dangling) M2 = networkx.google_matrix(G, personalization=dangling, dangling=dangling) for i in range(len(G)): for j in range(len(G)): if i == self.dangling_node_index and (j + 1) in dangling: assert_almost_equal(M2[i, j], dangling[j + 1] / dangling_sum, places=4) else: assert_almost_equal(M2[i, j], M1[i, j], places=4)
def test_google_matrix(self): try: import numpy.linalg except ImportError: raise SkipTest('numpy not available.') G=self.G M=networkx.google_matrix(G,alpha=0.9) e,ev=numpy.linalg.eig(M.T) p=numpy.array(ev[:,0]/ev[:,0].sum())[:,0] for (a,b) in zip(p,self.G.pagerank.values()): assert_almost_equal(a,b)
def calc(g,negative,alpha,M,beta=1): epsilon = 0.000000001 print "start calc pagetrust, epsilon =",epsilon N = len(g) x = np.ones(N) x = x * 1/N visualize("x",x) P,tildeP = initialize_P(g,negative) t = 0 G = nx.google_matrix(g) pagerank = nx.pagerank(g,alpha=alpha) visualize("Google matrix",G) t = 0 while True: t += 1 #build the transition matrix T print "***" print "*** iteration start, time = ",t print "***" T = build_transition_matrix(alpha,x,g,G,M) tildeP = np.dot(T,P) visualize("P",P) visualize("tildeP",tildeP) x2 = np.zeros(N) for i in range(N): p = 0 for k in range(N): p += G[k,i]*x[k] x2[i] = (1 - tildeP[i][i])**beta*p for j in range(N): if (i,j) in negative: P[i,j] = 1 elif i == j: P[i,j] = 0 else: P[i,j] = tildeP[i,j] #normalization tmpl = 0 for l in range(N): tmpl += x2[l] for o in range(N): x2[o] = x2[o] / tmpl visualize("x2",x2) e = is_converged(x,x2) print "e:",e if e < epsilon: #visualize('pagerank',pagerank) break else: #x <- x(t+1) for p in range(N): x[p] = x2[p] print x2 return x2
def test_google_matrix(self): try: import numpy.linalg except ImportError: raise SkipTest('numpy not available.') G = self.G M = networkx.google_matrix(G, alpha=0.9) e, ev = numpy.linalg.eig(M.T) p = numpy.array(ev[:, 0] / ev[:, 0].sum())[:, 0] for (a, b) in zip(p, self.G.pagerank.values()): assert_almost_equal(a, b)
def calc(g, negative, alpha, M, beta=1): epsilon = 0.000000001 print "start calc pagetrust, epsilon =", epsilon N = len(g) x = np.ones(N) x = x * 1 / N visualize("x", x) P, tildeP = initialize_P(g, negative) t = 0 G = nx.google_matrix(g) pagerank = nx.pagerank(g, alpha=alpha) visualize("Google matrix", G) t = 0 while True: t += 1 #build the transition matrix T print "***" print "*** iteration start, time = ", t print "***" T = build_transition_matrix(alpha, x, g, G, M) tildeP = np.dot(T, P) visualize("P", P) visualize("tildeP", tildeP) x2 = np.zeros(N) for i in range(N): p = 0 for k in range(N): p += G[k, i] * x[k] x2[i] = (1 - tildeP[i][i])**beta * p for j in range(N): if (i, j) in negative: P[i, j] = 1 elif i == j: P[i, j] = 0 else: P[i, j] = tildeP[i, j] #normalization tmpl = 0 for l in range(N): tmpl += x2[l] for o in range(N): x2[o] = x2[o] / tmpl visualize("x2", x2) e = is_converged(x, x2) print "e:", e if e < epsilon: #visualize('pagerank',pagerank) break else: #x <- x(t+1) for p in range(N): x[p] = x2[p] print x2 return x2
def test_google_matrix(self): G=self.G try: import numpy.linalg M=networkx.google_matrix(G,alpha=0.9) e,ev=numpy.linalg.eig(M.T) p=numpy.array(ev[:,0]/ev[:,0].sum())[:,0] for (a,b) in zip(p,self.G.pagerank): assert_almost_equal(a,b) except ImportError: print "Skipping google_matrix test"
def graph_structure(graph, content_graph, path): """ Builds interaction graph with structure based weighs similarity metric, and save it at the given path """ A = scipy.sparse.csr_matrix( nx.google_matrix(graph, alpha=1, weight='weight')) W = nx.to_scipy_sparse_matrix(content_graph) W2 = (W.dot(A) + A.transpose().dot(W)) * 0.5 # rescaling in range 0-1 max_val = W2.max() W2 = W2.multiply(1.0 / max_val) save_matrix_to_edgelist(W2, path)
def exhaustive_set(G, query_nodes, target_nodes, n_edges, start_dist): """Exaustively searches all the combinations of k links between a set of query nodes Q and a set of absorbing target nodes C such that Q \cap C = \emptyset. Parameters ---------- G : Networkx graph The graph from which the team will be selected. query : list The set of nodes from which random walker starts. target : list The set of nodes from where the random walker ends. n_edges : integer the number of links to be added start_dist: list The starting distribution over the query set Returns ------- links : list The set of links that reduce the absorbing RW centrality ac_scores: list The set of scores of adding the links """ query_set_size = len(query_nodes) map_query_to_org = dict(zip(query_nodes, range(query_set_size))) P = csc_matrix(nx.google_matrix(G, alpha=1)) P_abs = P[list(query_nodes),:][:,list(query_nodes)] F = compute_fundamental(P_abs) row_sums = start_dist.dot(F.sum())[0,0] candidates = list(product(query_nodes, target_nodes)) eligible = [candidates[i] for i in range(len(candidates)) if G.has_edge(candidates[i][0], candidates[i][1]) == False] ac_scores = [row_sums] exhaustive_links = [] for L in range(1, n_edges+1): print '\t Number of edges {}'.format(L) round_min = -1 best_combination = [] for subset in combinations(eligible, L): H = G.copy() F_modified = F.copy() for links_to_add in subset: F_updated = update_fundamental_mat(F_modified, H, map_query_to_org, links_to_add[0]) H.add_edge(links_to_add[0], links_to_add[1]) F_modified = F_updated abs_cen = start_dist.dot( F_updated.sum(axis = 1))[0,0] if abs_cen < round_min or round_min == -1: best_combination = subset round_min = abs_cen exhaustive_links.append(best_combination) ac_scores.append(round_min) return exhaustive_links, ac_scores
def page_rank(graph, jump_probability=.15, weighted=False): if weighted: wt = 'weight' else: wt = None alpha = 1 - jump_probability M = _nx.google_matrix(graph, alpha, weight=wt) _, v = _eigs(M.T, k=1) r = v.flatten().real r /= r.sum() return _pd.Series(r, index=graph.nodes).sort_index()
def linkrank(G): c = arange(len(G.nodes())) goo = nx.google_matrix(G) goo = array(goo) m = nx.pagerank_numpy(G) m = m.items() m = [i[1] for i in m] m = array([m]) m = m.T L = tile(m,[1,len(goo)])*goo Q = 0 mm = tile(m,[1,len(goo)])*tile(m.T,[len(goo),1]) Qlr = L - mm return greedyMax(Qlr,c,0)
def _google_matrix(graph, anchors, normalize): aut = {} node_to_num = dict((node, i) for i, node in enumerate(graph.nodes())) num_to_node = dict(enumerate(graph.nodes())) aut_mat = nx.google_matrix(graph) for num, node in enumerate(graph.nodes()): d = [] for anchor in anchors: a_num = node_to_num[anchor] d.append(aut_mat[num, a_num]) if normalize: d = normalized(d) aut[node] = np.array(d) return aut
def linkrank(G): c = arange(len(G.nodes())) goo = nx.google_matrix(G) goo = array(goo) m = nx.pagerank_numpy(G) m = m.items() m = [i[1] for i in m] m = array([m]) m = m.T L = tile(m, [1, len(goo)]) * goo Q = 0 mm = tile(m, [1, len(goo)]) * tile(m.T, [len(goo), 1]) Qlr = L - mm return greedyMax(Qlr, c, 0)
def pagerank(g): import numpy as np import networkx as nx trm = nx.google_matrix(g) n = min(trm.shape) p0 = np.repeat(1 / n, n) pi = np.matmul(p0, trm) i = 1 eps = 0.00015 while np.sum(np.abs(pi - p0)) >= eps: p0 = pi pi = np.matmul(pi, trm) i = i + 1 if i == 10000: break return pi
def pagerank_numpy(G,alpha=0.85,max_iter=100,tol=1.0e-6,nodelist=None): """Return a NumPy array of the PageRank of G. """ import numpy import networkx M=networkx.google_matrix(G,alpha,nodelist) (n,m)=M.shape # should be square x=numpy.ones((n))/n for i in range(max_iter): xlast=x x=numpy.dot(x,M) # check convergence, l1 norm err=numpy.abs(x-xlast).sum() if err < n*tol: return numpy.asarray(x).flatten() raise NetworkXError("pagerank: power iteration failed to converge in %d iterations."%(i+1))
def random_links(G, query_nodes, target_nodes, n_edges, start_dist): """Selects a random set of links between a set of query nodes Q and a set of absorbing target nodes C such that Q \cap C = \emptyset. Parameters ---------- G : Networkx graph The graph from which the team will be selected. query : list The set of nodes from which random walker starts. target : list The set of nodes from where the random walker ends. n_edges : integer the number of links to be added start_dist: list The starting distribution over the query set Returns ------- links : list The set of links that reduce the absorbing RW centrality ac_scores: list The set of scores of adding the links """ query_set_size = len(query_nodes) map_query_to_org = dict(zip(query_nodes, range(query_set_size))) P = csc_matrix(nx.google_matrix(G, alpha=1)) P_abs = P[list(query_nodes),:][:,list(query_nodes)] F = compute_fundamental(P_abs) row_sums = start_dist.dot(F.sum())[0,0] candidates = list(product(query_nodes, target_nodes)) eligible = [candidates[i] for i in range(len(candidates)) if G.has_edge(candidates[i][0], candidates[i][1]) == False] links_to_add = sample(eligible, n_edges) ac_scores = [] ac_scores.append(row_sums) i = 0 while i < n_edges: F_updated = update_fundamental_mat(F, G, map_query_to_org, links_to_add[i][0]) G.add_edge(links_to_add[i][0], links_to_add[i][1]) abs_cen = start_dist.dot(F_updated.sum(axis = 1))[0,0] F = F_updated ac_scores.append(abs_cen) i += 1 return links_to_add, ac_scores
def pagerank_numpy(G, alpha=0.85, max_iter=100, tol=1.0e-6, nodelist=None): """Return a NumPy array of the PageRank of G. """ import numpy import networkx M = networkx.google_matrix(G, alpha, nodelist) (n, m) = M.shape # should be square x = numpy.ones((n)) / n for i in range(max_iter): xlast = x x = numpy.dot(x, M) # check convergence, l1 norm err = numpy.abs(x - xlast).sum() if err < n * tol: return numpy.asarray(x).flatten() raise NetworkXError( "pagerank: power iteration failed to converge in %d iterations." % (i + 1))
def cluster(seed_nodes, graph): # Compute adjacency matrix A = nx.adjacency_matrix(graph, weight='weight').todense() # Compute the initial transition matrix M = A / A.sum(axis=1) M[np.isnan(M)] = 0 # Compute the initial google matrix P = nx.google_matrix(graph, weight='weight') PF = np.copy(P) # Compute random walk for t steps t = 3 for i in range(2, t + 1): PF += np.linalg.matrix_power(P, i) P_degree = np.diag(PF) P_weight = cosine_similarity(PF) coms = set() membership = {} # Sort the nodes array in decreasing value of weights sorted_P_deg = np.sort(P_degree)[::-1] sorted_P_deg_indices = np.argsort(P_degree) nodes_list = list(graph.nodes) sorted_nodes = [nodes_list[i] for i in sorted_P_deg_indices] # Take the P-degree of N/4th node as threshold Pt = sorted_P_deg[graph.number_of_nodes() // 4] # Loop over the nodes and check if P-degree of node > Pt # If P-degree of a node > Pt and P-degree is not in a sub region, # keep the node as a seed node and map the nodes connected to as a # community # If P-degree of a node < Pt and P-degree is in a sub-region, skip for i, node in enumerate(sorted_nodes): if sorted_P_deg[i] > Pt and node not in coms: coms |= {node, *graph.neighbors(node)} membership[node] = [*graph.neighbors(node)]
def M(path, graph, a, personal): if path is None and graph is None: return g = nx.read_gpickle(path) if path is not None else graph P = nx.google_matrix(g, alpha=1, dangling=personal) # Transition matrix (per row). Returns NumPy array that is different from # ndarray! P_transp = np.transpose(P) I = np.identity(len(g.nodes())) m = np.subtract(I,np.dot(a,P_transp)) m_inv = linalg.inv(m) p_array = np.array(P_transp) return (m_inv,p_array)
def sim(n=4, m=15): I_graph = nx.DiGraph() nodes = ["FB", "Amazon", "HCP", "LinkedIn"] # nodes = range(n) edges = np.random.choice(nodes,2*m).reshape(m,2) # I_graph.add_edges_from(edges) I_graph.add_edge("FB", "HCP") I_graph.add_edge("Amazon", "HCP") I_graph.add_edge("LinkedIn", "HCP") # visualize graphVizWrapper.graph(I_graph.edges(), 'di') # google transition matrix G = nx.google_matrix(I_graph, alpha=1.0) # make indexing consistent G = np.array(G) surfer = Markov_process(I_graph.nodes(), G) print(surfer.sample_game(stop=10)) print(G) print(I_graph.node) surfer.sim_distribution()
walk[0] = i elements = np.arange(a.shape[0]) # for our graph [0,1,2,3] c_index = i # current index for this iteration for k in range(iters): count = 0 # count of transitions probs = a[c_index].reshape((-1,)) # probability of transitions # sample from probs sample = np.random.choice(elements,p=probs) # sample a target using probs index = sample # go to target walk[k+1] = index c_index = index return walk # print(pd.DataFrame(nx.adj_matrix(G).todense())) walk_length = 1000000 markov_matrix = np.array(nx.google_matrix(G, alpha=1)) nodes = G.nodes() vocab = {f"node_{node}":node for node in nodes} n2voc = {node:name for name, node in vocab.items()} starting_point = np.random.choice(nodes) walk = random_walk(markov_matrix, starting_point, walk_length) walk # %% sliding_windows = np.vstack((walk,np.roll(walk, -1), np.roll(walk, -2), np.roll(walk, -3), np.roll(walk, -4))).astype(np.int) sliding_windows # %% cooccurence_matrix = np.zeros_like(markov_matrix) center_node_pos = int(sliding_windows.shape[0]/2) for position in range(walk_length):
def get_d_erH(self, alpha = 1, gamma = 1, N = 100): p_mtx = nx.google_matrix(self.graph, alpha = alpha) r_mtx = -1 * np.eye(self.graph.order()) + p_mtx d_erH = (np.eye(self.graph.order()) + (float(gamma) / N ) * r_mtx) ** N return(d_erH)
def get_trans_matrix(graph): P = nx.google_matrix(graph, alpha=1) return P.T
def test_empty(self): G = networkx.Graph() assert_equal(networkx.pagerank(G), {}) assert_equal(networkx.pagerank_numpy(G), {}) assert_equal(networkx.google_matrix(G).shape, (0, 0))
def test_empty(self): G = networkx.Graph() assert networkx.pagerank(G) == {} assert networkx.pagerank_numpy(G) == {} assert networkx.google_matrix(G).shape == (0, 0)
def greedy_navigation(G, query_nodes, target_nodes, n_edges, start_dist): """Selects a set of links with a greedy descent algorithm that reduce the absorbing RW centrality between a set of query nodes Q and a set of absorbing target nodes C such that Q \cap C = \emptyset. The query and target set must be a 'viable' partition of the graph. Parameters ---------- G : Networkx graph The graph from which the team will be selected. query : list The set of nodes from which random walker starts. target : list The set of nodes from where the random walker ends. n_edges : integer the number of links to be added start_dist: list The starting distribution over the query set P : Scipy matrix The transition matrix of the graph G F : Scipy matrix The fundamental matrix for the graph G with the given set of absorbing random walk nodes Returns ------- links : list The set of links that reduce the absorbing RW centrality """ H = G.copy() prng = RandomState() query_set_size = len(query_nodes) target_set_size = len(target_nodes) map_query_to_org = dict(zip(query_nodes, range(query_set_size))) P = csc_matrix(nx.google_matrix(H, alpha=1)) P_abs = P[list(query_nodes),:][:,list(query_nodes)] F = compute_fundamental(P_abs) row_sums = start_dist.dot(F.sum(axis=1))[0,0] best_F = zeros(F.shape) optimal_set = [] ac_scores = [] ac_scores.append(row_sums) while n_edges > 0: round_min = -1 best_node = -1 for i in query_nodes: abs_neighbours = [l for l in H.neighbors(i) if l in target_nodes] if len(abs_neighbours) == target_set_size: continue F_updated = update_fundamental_mat(F, H, map_query_to_org, i) abs_cen = start_dist.dot( F_updated.sum(axis = 1))[0,0] if abs_cen < round_min or round_min == -1: best_node = i round_min = abs_cen best_F = F_updated F = best_F ac_scores.append(round_min) optimal_candidate_edges = [(best_node, k, round_min) for k in target_nodes if H.has_edge(best_node, k) == False ] try: edge_idx = prng.randint(0, len(optimal_candidate_edges)) except ValueError: print(H.neighbors(best_node)) print([l for l in H.neighbors(best_node) if l in target_nodes]) print(best_node) print(optimal_candidate_edges) print(target_nodes) H.add_edge(optimal_candidate_edges[edge_idx][0], optimal_candidate_edges[edge_idx][1]) optimal_set.append(optimal_candidate_edges[edge_idx]) n_edges -= 1 return optimal_set, ac_scores
def compute_security(star_dict, edge_dict, num_seeds, num_iterations): #build up a nx graph galaxy = networkx.Graph() for v, vertex in star_dict.iteritems(): galaxy.add_node(v) for v, neighbors in edge_dict.iteritems(): for n in neighbors: galaxy.add_edge(v,n) #use the centrality measures already computed to find seeds #find the top 25% vertices of each centrality measure betweenness_limit = int(len(star_dict)* 0.4) sorted_betweenness = sorted((v['betweenness'], k) for k, v in star_dict.iteritems()) top_betweenness = {k for (value, k) in sorted_betweenness[-betweenness_limit:]} closeness_limit = int(len(star_dict)* 0.15) sorted_closeness = sorted((v['closeness'], k) for k, v in star_dict.iteritems()) top_closeness = {k for (value, k) in sorted_closeness[-closeness_limit:]} pagerank_limit = int(len(star_dict)* 0.4) sorted_pagerank = sorted((v['pagerank'], k) for k, v in star_dict.iteritems()) top_pagerank = {k for (value, k) in sorted_pagerank[-pagerank_limit:]} #take the intersection of all the top measures. this will be our pool to choose seeds from seed_pool = top_betweenness & top_closeness & top_pagerank print len(seed_pool) seeds = set() #loop until we have num_seeds or the seed pool is exhausted while(len(seed_pool) > 0 and len(seeds) < num_seeds): #pick a random vertex and remove it from the seed pool current_seed = random.choice(list(seed_pool)) seed_pool.remove(current_seed) #find all vertices within 10 jumps close_vertices = networkx.single_source_shortest_path(galaxy, source=current_seed, cutoff=15) #if none of the current seeds were found within 10 jumps, add this as a seed if(len(seeds.intersection(close_vertices.iterkeys())) == 0): seeds.add(current_seed) print len(seeds) #apply the random walk algorithm, aka pagerank with alpha=1 personalization_dict = {k: v['closeness']**10 for k,v in star_dict.iteritems()} mat = networkx.google_matrix(galaxy, alpha=1, personalization=personalization_dict, nodelist=star_dict.keys()) mat = csr_matrix(mat) #for the initial array, set the seeds to 1 and everything else to 0 weight_array = numpy.empty(len(star_dict), dtype=mat.dtype) weight_array.fill(0.0001) for i,k in enumerate(star_dict.iterkeys()): if(k in seeds): weight_array[i] = 1 #iterate that shit for i in xrange(num_iterations): weight_array = weight_array * mat #create a security dict to normalize security_dict = {k:weight_array[i] for i, k in enumerate(star_dict.iterkeys())} security_dict = normalize(security_dict) #apply some data transformations - the square root will create more hisec, and the subtraction will create nullsec for key,value in security_dict.iteritems(): security_dict[key] = value ** 0.025 - 0.94 security_dict = normalize(security_dict) #copy the result into the security field for k,v in star_dict.iteritems(): v['security'] = security_dict[k]
import networkx as nx import numpy as np import sys sys.path.append('./src') from dataset import createFromDataset from stationary import statDist G = createFromDataset() # Especifique a matriz P_ (P_barra) referente ao modelo Pagerank considerando alpha = 0.1. P_ = nx.google_matrix(G, alpha=0.1) print(P_) #Considerando k = 100, aplique o Power method e compare o resultado com o obtido no item b). # As distribuições estacionárias obtidas em b) e c) são iguais ou diferentes? # distribuição estacionaria item C wdict = nx.pagerank(G, alpha=0.1, max_iter=100) witemC = [0. for i in range(len(P_))] for key in wdict: witemC[key - 1] = wdict[key] # distribuição estacionária item B witemB = statDist(G, 100) # Comparar vetores: isDif = False # booleana para diferentes contDif = 0 # contador para os diferentes for i in range(len(witemB)):
help='pagerank algorithm type. naive for pagerank(), numpy for pagerank_numpy(), scipy for pagerank_scipy(), google for google_matrix()') arg = parser.parse_args() print("pagerank begins!") begintime = time.time() G = nx.DiGraph() G = buildGFromFile(G) print("building Graph elapsed time: ", time.time() - begintime) if arg.type == 'naive': beginTime = time.time() PageRankResult = nx.pagerank(G, alpha=1 - arg.rate, max_iter=int(arg.num_iter), tol=arg.eps) print("PageRank elapsed time: ", time.time() - beginTime) elif arg.type == 'numpy': beginTime = time.time() PageRankResult = nx.pagerank_numpy(G, alpha=1 - arg.rate, max_iter=int(arg.num_iter), tol=arg.eps) print("pagerank_numpy elapsed time: ", time.time() - beginTime) elif arg.type == 'scipy': beginTime = time.time() PageRankResult = nx.pagerank_scipy(G, alpha=1 - arg.rate) print("PageRank_scipy elapsed time: ", time.time() - beginTime) elif arg.type == 'google': beginTime = time.time() PageRankResult = nx.google_matrix(G, alpha=1 - arg.rate) print("Google Matrix elapsed time: ", time.time() - beginTime) pr_res = json.dumps(PageRankResult) f = open('rank_save.tsv', 'w') f.write(pr_res) f.close()
def getTransMatrix(graph): P = nx.google_matrix(graph, alpha=1) # P /= P.sum(axis=1) P = P.T return P
def test_empty(self): G = nx.Graph() assert nx.pagerank(G) == {} assert _pagerank_python(G) == {} assert nx.pagerank_numpy(G) == {} assert nx.google_matrix(G).shape == (0, 0)
def link_prediction(G, query_nodes, target_nodes, n_edges, start_dist, alg = "ra"): """Selects a random set of links between based on the scores calculated by a standard link-prediction algorithm from networkx library Parameters ---------- G : Networkx graph The graph from which the team will be selected. query : list The set of nodes from which random walker starts. target : list The set of nodes from where the random walker ends. n_edges : integer the number of links to be added start_dist: list The starting distribution over the query set alg: string A string describing the link-prediction algorithm to be used Returns ------- links : list The set of links that reduce the absorbing RW centrality ac_scores: list The set of scores of adding the links """ assert alg in ["ra", "pa", "jaccard", "aa"], "alg must be one of [\"ra\", \"pa\", \"jaccard\", \"aa\"]." H = G.copy() query_set_size = len(query_nodes) map_query_to_org = dict(zip(query_nodes, range(query_set_size))) P = csc_matrix(nx.google_matrix(H, alpha=1)) P_abs = P[list(query_nodes),:][:,list(query_nodes)] F = compute_fundamental(P_abs) row_sums = start_dist.dot(F.sum())[0,0] candidates = list(product(query_nodes, target_nodes)) eligible = [candidates[i] for i in range(len(candidates)) if H.has_edge(candidates[i][0], candidates[i][1]) == False] links_to_add = [] if alg == 'ra': preds = nx.resource_allocation_index(H, eligible) elif alg == 'jaccard': preds = nx.jaccard_coefficient(H, eligible) elif alg == 'aa': preds = nx.adamic_adar_index(H, eligible) elif alg == 'pa': preds = nx.preferential_attachment(H, eligible) for u,v,p in preds: links_to_add.append((u,v,p)) links_to_add.sort(key=lambda x: x[2], reverse = True) ac_scores = [] ac_scores.append(row_sums) i = 0 while i < n_edges: F_updated = update_fundamental_mat(F, H, map_query_to_org, links_to_add[i][0]) H.add_edge(links_to_add[i][0], links_to_add[i][1]) abs_cen = start_dist.dot(F_updated.sum(axis = 1))[0,0] F = F_updated ac_scores.append(abs_cen) i += 1 return links_to_add, ac_scores
def googleMatrix(self): fname = self.DIR + '/googleMatricx.txt' google_matrix = nx.google_matrix(self.graph) numpy.savetxt(fname, google_matrix) print(fname)
def get_approx_boundary(G, query_nodes, target_nodes, n_edges, start_dist): """ Used to calculate an approximation guarantee for greedy algorithm """ H = G.copy() # GET A COPY OF THE GRAPH query_set_size = len(query_nodes) target_set_size = len(target_nodes) map_query_to_org = dict(zip(query_nodes, range(query_set_size))) candidates = list(product(query_nodes, target_nodes)) # ALL minus exitsting in G eligible = [candidates[i] for i in range(len(candidates)) if H.has_edge(candidates[i][0], candidates[i][1]) == False] # CALCULATE MARGINAL GAIN TO EMPTY SET FOR ALL NODES IN STEEPNESS FUNCTION P = csc_matrix(nx.google_matrix(H, alpha=1)) P_abs = P[list(query_nodes),:][:,list(query_nodes)] F = compute_fundamental(P_abs) row_sums_empty = start_dist.dot(F.sum(axis=1))[0,0] # F(\emptyset) # candidates = list(product(query_nodes, target_nodes)) ac_marginal_empty = [] ac_marginal_full = [] source_idx_empty = [] node_processed = -1 for out_edge in eligible: abs_cen = -1 source_node = out_edge[0] if(node_processed == source_node): # skip updating matrix because this updates the F matrix in the same way continue node_processed = source_node F_updated = update_fundamental_mat(F, H, map_query_to_org, source_node) abs_cen = start_dist.dot(F_updated.sum(axis = 1))[0,0] ac_marginal_empty.append(abs_cen) source_idx_empty.append(source_node) sorted_indexes_empty = [i[0] for i in sorted(enumerate(source_idx_empty), key=lambda x:x[1])] ac_marginal_empty = [ac_marginal_empty[i] for i in sorted_indexes_empty] # CALCULATE MARGINAL GAIN FOR FULL SET H.add_edges_from(eligible) P_all = csc_matrix(nx.google_matrix(H, alpha=1)) P_abs_all = P_all[list(query_nodes),:][:,list(query_nodes)] F_all = compute_fundamental(P_abs_all) row_sums_all = start_dist.dot(F_all.sum(axis=1))[0,0] node_prcessed = -1 source_idx = [] for out_edge in eligible: abs_cen = -1 source_node = out_edge[0] if(node_prcessed == source_node): # skip updating matrix because this updates the F matrix in the same way continue node_prcessed = source_node F_all_updated = update_rev_fundamental_mat(F_all, H, map_query_to_org, source_node) abs_cen = start_dist.dot(F_all_updated.sum(axis = 1))[0,0] ac_marginal_full.append(abs_cen) source_idx.append(source_node) sorted_indexes = [i[0] for i in sorted(enumerate(source_idx), key=lambda x:x[1])] ac_marginal_full = [ac_marginal_full[i] for i in sorted_indexes] assert sorted_indexes == sorted_indexes_empty , "Something is wrong with the way scores are appended" all_steepness = (asarray(ac_marginal_full) - row_sums_all) / (row_sums_empty-asarray(ac_marginal_empty)) s = min(all_steepness) node_max = argmin(all_steepness) return 1-s, sorted_indexes[node_max]
def localPartitioningAttempt(alpha, beta, node): gamma = alpha + beta - alpha * beta global nodeIntDict nodeIntDict = getNodeIntDict(DG) #This returns the transition matrix, which I think is the random walk matrix M = nx.google_matrix(DG) print "M" print M print "about to page rank" # Compute the two global page rank vectors prBeta = nx.pagerank(DG, alpha=beta) prGamma = nx.pagerank(DG, alpha=gamma) #starting vector for local page rank with all probability on node localDict = dict.fromkeys(DG.nodes(), 0) localDict[node] = 1 localPR = nx.pagerank(DG, alpha=gamma, nstart=localDict, personalization=localDict) #linear combination #this would maybe be faster if I used actual arrays instead of dictionaries p = {} for key in localPR.keys(): p[key] = (alpha/gamma)*localPR[key] + (((1-alpha)*beta)/gamma)*prGamma[key] p[key] = p[key]/prBeta[key] #create a list of tuples sorted in non-increasing order by value sortedP = sorted(p.iteritems(), key = operator.itemgetter(1), reverse=True) sortedLocalPR = sorted(localPR.iteritems(), key = operator.itemgetter(1), reverse=True) print "node is " + str(node) print "sortedLocalPR is " print sortedLocalPR[0:20] print "sortedP" print sortedP[0:20] print "Conductance Loop" S = [] notS = DG.nodes() j = 0 S.append(sortedP[j][0]) notS.remove(sortedP[j][0]) minConducance = calculateConductance(S, notS) minJ = j for j in range(2,len(sortedP)): S.append(sortedP[j][0]) notS.remove(sortedP[j][0]) tempConductance = calculateConductance(S, notS) #print "cond " + str(tempConductance) if tempConductance < minConducance: minConducance = tempConductance minJ = j print minJ print minConducance minSet = sortedP[0:j] print "minset" print minSet print "length of minset " + str(len(minSet)) return minSet """
''' n3graph = nx.read_edgelist('../3node2SPs1direct.csv', create_using=nx.DiGraph()) ''' Simplest Pagerank, use defaults (but print google matrix first) ''' n3res = pagerank(n3graph, alpha=0.85, personalization=None, dangling=None, max_iter=1000, nstart=None, weight=None) print("Pagerank, NO teleport set:") print(google_matrix(n3graph, alpha=0.85, weight=None)) print(n3res) print("Numpy version:") n3res = pagerank_numpy(n3graph, alpha=0.85) print(n3res) print("Scipy version:") n3res = pagerank_scipy(n3graph, alpha=0.85) print(n3res) print("\n=============================\n") ''' We only need to provide values for the nodes we want considered (rather than all nodes) Personalization values are normalized automatically ''' pValues = {'A': 1} n3res = pagerank(n3graph, alpha=0.85,
N = M.shape[1] v = np.random.rand(N, 1) v = v / np.linalg.norm(v, 1) M_hat = (d * M + (1 - d) / N) start = time.process_time() for i in range(iters): v = M_hat @ v end = time.process_time() return v # experiment parameters N = 10000 iters = 25 # prepare pagerank matrix print("constructing pagerank matrix...") # use nx to create representation of a web graph d = 0.85 G = nx.scale_free_graph(N, alpha=0.41, beta=0.49, gamma=0.1, delta_in=0) M = nx.google_matrix(G, alpha=1).T start = time.process_time() v = pagerank(M, 25, 0.85) end = time.process_time() print("numpy:") print(end - start) print(v[1], v[2], v[3])
def reverse_greedy(G, query_nodes, target_nodes, n_edges, start_dist): """Selects a set of links with a reverse greedy descent algorithm that reduce the absorbing RW centrality between a set of query nodes Q and a set of absorbing target nodes C such that Q \cap C = \emptyset. The query and target set must be a 'viable' partition of the graph. Parameters ---------- G : Networkx graph The graph from which the team will be selected. query : list The set of nodes from which random walker starts. target : list The set of nodes from where the random walker ends. n_edges : integer the number of links to be added start_dist: list The starting distribution over the query set P : Scipy matrix The transition matrix of the graph G F : Scipy matrix The fundamental matrix for the graph G with the given set of absorbing random walk nodes Returns ------- links : list The set of links that reduce the absorbing RW centrality """ H = G.copy() query_set_size = len(query_nodes) map_query_to_org = dict(zip(query_nodes, range(query_set_size))) candidates = list(product(query_nodes, target_nodes)) eligible = [candidates[i] for i in range(len(candidates)) if H.has_edge(candidates[i][0], candidates[i][1]) == False] H.add_edges_from(eligible) P = csc_matrix(nx.google_matrix(H, alpha=1)) P_abs = P[list(query_nodes),:][:,list(query_nodes)] F = compute_fundamental(P_abs) row_sums = start_dist.dot(F.sum(axis=1))[0,0] # candidates = list(product(query_nodes, target_nodes)) worst_F = zeros(F.shape) worst_set = [] optimal_set = [] ac_scores = [] # ac_scores.append(row_sums) while len(eligible) > 0: round_min = -1 worst_link = (-1,-1) node_prcessed = -1 for out_edge in eligible: source_node = out_edge[0] if(node_prcessed == source_node): # skip updating matrix because this updates the F matrix in the same way continue node_prcessed = source_node F_updated = update_rev_fundamental_mat(F, H, map_query_to_org, source_node) abs_cen = start_dist.dot(F_updated.sum(axis = 1))[0,0] if abs_cen < round_min or round_min == -1: worst_link = out_edge round_min = abs_cen worst_F = F_updated F = worst_F H.remove_edge(*worst_link) worst_set.append(worst_link) eligible.remove(worst_link) if (len(eligible) <= n_edges): ac_scores.append(round_min) optimal_set.append(worst_link) return list(reversed(optimal_set)), list(reversed(ac_scores))