Exemplo n.º 1
0
    def test_in_place(self):
        """Tests for an in-place reweighting of the edges of the graph.

        """
        G = nx.DiGraph()
        G.add_edge(0, 1, weight=1)
        G.add_edge(0, 2, weight=1)
        nx.stochastic_graph(G, copy=False)
        assert_equal(sorted(G.edges(data=True)),
                     [(0, 1, {'weight': 0.5}), (0, 2, {'weight': 0.5})])
Exemplo n.º 2
0
 def test_in_place(self):
     """Tests for an in-place reweighting of the edges of the graph."""
     G = nx.DiGraph()
     G.add_edge(0, 1, weight=1)
     G.add_edge(0, 2, weight=1)
     nx.stochastic_graph(G, copy=False)
     assert sorted(G.edges(data=True)) == [
         (0, 1, {"weight": 0.5}),
         (0, 2, {"weight": 0.5}),
     ]
Exemplo n.º 3
0
def add_noise(G,noise=1e-13):
    # Add noise to the greater weights in the graph
    # NOTE: this method is used to handle the eigs() RuntimeError: Factor is exactly singular
    max_weight = max(e[2]['weight'] for e in G.edges_iter(data=True)) 
    for e in G.edges_iter(data=True):
        if e[2]['weight'] == max_weight:
            e[2]['weight'] += noise
    if not gm.check_if_stochastic_matrix(nx.to_numpy_matrix(G)):
        nx.stochastic_graph(G, copy=False)
    return G
Exemplo n.º 4
0
def add_noise(G, noise=1e-13):
    # Add noise to the greater weights in the graph
    # NOTE: this method is used to handle the eigs() RuntimeError: Factor is exactly singular
    max_weight = max(e[2]['weight'] for e in G.edges_iter(data=True))
    for e in G.edges_iter(data=True):
        if e[2]['weight'] == max_weight:
            e[2]['weight'] += noise
    if not gm.check_if_stochastic_matrix(nx.to_numpy_matrix(G)):
        nx.stochastic_graph(G, copy=False)
    return G
def test_stochastic():
    G=nx.DiGraph()
    G.add_edge(0,1)
    G.add_edge(0,2)
    S=nx.stochastic_graph(G)
    assert_true(nx.is_isomorphic(G,S))
    assert_equal(sorted(S.edges(data=True)),
                 [(0, 1, {'weight': 0.5}), 
                  (0, 2, {'weight': 0.5})])
    S=nx.stochastic_graph(G,copy=True)
    assert_equal(sorted(S.edges(data=True)),
                 [(0, 1, {'weight': 0.5}), 
                  (0, 2, {'weight': 0.5})])
Exemplo n.º 6
0
def test_stochastic():
    G=nx.DiGraph()
    G.add_edge(0,1)
    G.add_edge(0,2)
    S=nx.stochastic_graph(G)
    assert_true(nx.is_isomorphic(G,S))
    assert_equal(sorted(S.edges(data=True)),
                 [(0, 1, {'weight': 0.5}), 
                  (0, 2, {'weight': 0.5})])
    S=nx.stochastic_graph(G,copy=True)
    assert_equal(sorted(S.edges(data=True)),
                 [(0, 1, {'weight': 0.5}), 
                  (0, 2, {'weight': 0.5})])
Exemplo n.º 7
0
    def update_bipartite_graph(self, graph, import_export_data):
        new_graph = nx.DiGraph()

        # use nodes in original graph
        new_graph.add_nodes_from(graph.nodes(data=True))

        # add new edges
        # add country - product edges
        for country, records in import_export_data.iteritems():
            if not country in self.country_list:
                continue

            for each_record in records:
                if not each_record['id'] in self.product_list:
                    continue

                if not each_record['import'] == 0:
                    new_graph.add_edge(
                        self.country_ids[country],
                        self.product_ids[each_record['id']],
                        weight=each_record['import'])  # import edge

                if not each_record['export'] == 0:
                    new_graph.add_edge(
                        self.product_ids[each_record['id']],
                        self.country_ids[country],
                        weight=each_record['export'])  # export edge

        # create a copy in (right) stochastic form
        W = nx.stochastic_graph(new_graph, weight='weight')

        return W
Exemplo n.º 8
0
 def test_multidigraph(self):
     G = nx.MultiDiGraph()
     G.add_edges_from([(0, 1), (0, 1), (0, 2), (0, 2)])
     S = nx.stochastic_graph(G)
     d = dict(weight=0.25)
     assert (sorted(S.edges(data=True)) == [(0, 1, d), (0, 1, d), (0, 2, d),
                                            (0, 2, d)])
Exemplo n.º 9
0
def pagerank(G, damping=0.85, max_iterations=1000, tolerance=10e-6) -> dict:
    if len(G) == 0:
        return {}

    # A right-stochastic graph is a weighted digraph in which for each node,
    # the sum of the weights of all the out-edges of that node is 1. (From NetworkX documentation)
    stoch = nx.stochastic_graph(G)
    n = stoch.number_of_nodes()

    ranks = dict.fromkeys(stoch, 1.0 / n)
    dead_ends = [node for node in stoch if stoch.out_degree(node) == 0.0]

    for _ in range(max_iterations):
        previous_x = ranks
        ranks = dict.fromkeys(previous_x.keys(), 0)
        dead_end_sum = damping*sum(previous_x[node] for node in dead_ends)
        for node in ranks:
            for outgoing_node in stoch[node]:
                ranks[outgoing_node] += damping * previous_x[node] * stoch[node][outgoing_node]['weight']
            ranks[node] += dead_end_sum * (1.0 / n) + (1.0 - damping) * (1.0 / n)

        # Check if the difference is within tolerance
        if sum([abs(ranks[node] - previous_x[node]) for node in ranks]) < n * tolerance:
            return ranks
    raise BaseException
Exemplo n.º 10
0
def pagerank_edgetypes_indirect(D, edgetype_scale, indirect_nodes, max_iter=100, tol=1.0e-6, weight='weight'):
    W = nx.stochastic_graph(D, weight=weight)
    N = W.number_of_nodes()

    direct_nodes = [a for a in W if a not in indirect_nodes]
    x = dict.fromkeys(direct_nodes, 1.0 / len(direct_nodes))
    p = dict.fromkeys(direct_nodes, 1.0 / len(direct_nodes))

    for _ in range(max_iter):
        xlast = x
        x = dict.fromkeys(xlast.keys(), 0.0)
        weight_to_distribute = sum([(xlast[n] * W[n][nbr]['weight'] * edgetype_scale[W[n][nbr]['type']]) for n in x for nbr in W[n]])
        undistributed_weight = 1 - weight_to_distribute
        for n in x:
            for nbr in W[n]:
                if nbr in indirect_nodes:
                    contribution = xlast[n] * W[n][nbr][weight] * edgetype_scale[W[n][nbr]['type']] / len(W[nbr])
                    for nbr_adj in W[nbr]:
                        x[nbr_adj] += contribution
                else:
                    x[nbr] += xlast[n] * W[n][nbr][weight] * edgetype_scale[W[n][nbr]['type']]
            x[n] += undistributed_weight * p.get(n, 0)

        err = sum([abs(x[n] - xlast[n]) for n in x])
        if err < N * tol:
            return x
    raise nx.PowerIterationFailedConvergence(max_iter)
Exemplo n.º 11
0
def page_rank(G, weight='weight'):
    max_iter = 100
    d = 0.85
    tol = 1.0e-6

    if len(G) == 0:
        return {}

    if not G.is_directed():
        D = G.to_directed()
    else:
        D = G
    W = nx.stochastic_graph(D, weight='weight')
    N = G.number_of_nodes()
    rank = dict.fromkeys(W, 1.0 / N)
    pvector = dict.fromkeys(W, 1.0 / N)
    dweights = pvector
    dnodes = [n for n in W if W.out_degree(n, weight='weight') == 0.0]

    for _ in range(max_iter):
        last = rank
        rank = dict.fromkeys(last.keys(), 0)
        dsum = d * sum(last[n] for n in dnodes)
        for n in rank:
            for nbr in W[n]:
                rank[nbr] += d * last[n] * W[n][nbr][weight]
            rank[n] += dsum * dweights[n] + (1.0 - d) * pvector[n]
        err = sum([abs(rank[n] - last[n]) for n in rank])
        if err < N * tol:
            return rank
Exemplo n.º 12
0
    def create_transition_matrix(self):
        """
        Creates ride_count_matrix and transition_matrix
        """
        # Create a networkx graph
        count_df = (self.df[[
            'pickup_bin', 'dropoff_bin', 'weight'
        ]].groupby(by=['pickup_bin', 'dropoff_bin']).sum().reset_index())

        G = nx.from_pandas_edgelist(df=count_df,
                                    source='pickup_bin',
                                    target='dropoff_bin',
                                    edge_attr=['weight'],
                                    create_using=nx.DiGraph(attr='weight'))

        # Create ride_count_matrix
        ride_count_matrix = nx.to_numpy_matrix(G,
                                               nodelist=self.hex_bins,
                                               weight='weight')

        self.ride_count_matrix = np.squeeze(np.asarray(ride_count_matrix))

        # Create transition matrix
        G = nx.stochastic_graph(G, weight='weight')
        transition_matrix = nx.to_numpy_matrix(G,
                                               nodelist=self.hex_bins,
                                               weight='weight')

        transition_matrix = np.squeeze(np.asarray(transition_matrix))

        # Remove 0 values
        transition_matrix[transition_matrix == 0] = 0.001
        transition_matrix = (transition_matrix /
                             transition_matrix.sum(axis=1)[:, None])
        self.transition_matrix = transition_matrix
Exemplo n.º 13
0
    def update_bipartite_graph(self, graph, import_export_data):
        new_graph = nx.DiGraph()

        # use nodes in original graph
        new_graph.add_nodes_from(graph.nodes(data=True))

        # add new edges
        # add country - product edges
        for country, records in import_export_data.iteritems():
            if not country in self.country_list:
                continue

            for each_record in records:
                if not each_record['id'] in self.product_list:
                    continue

                if not each_record['import'] == 0:
                    new_graph.add_edge(self.country_ids[country], self.product_ids[each_record['id']], weight=each_record['import']) # import edge

                if not each_record['export'] == 0:
                    new_graph.add_edge(self.product_ids[each_record['id']], self.country_ids[country], weight=each_record['export']) # export edge


        # create a copy in (right) stochastic form
        W = nx.stochastic_graph(new_graph, weight='weight')

        return W
Exemplo n.º 14
0
def rpr_matrix(graph, alpha=0.85):
    D = graph.to_directed()
    H = nx.stochastic_graph(D)
    H = nx.to_numpy_matrix(H).transpose()
    I = np.eye(H.shape[0])
    S = alpha * np.linalg.inv(I - (1 - alpha) * H)
    return S
Exemplo n.º 15
0
def rank(G, beta=0.85, max_iter=100, tol=1.0e-6, weight='weight'):

    if len(G) == 0:
        return {}

    M = nx.stochastic_graph(G)
    N = M.number_of_nodes()

    C = nx.to_numpy_matrix(M)
    print C

    v = dict.fromkeys(M, 1.0 / N)
    t = dict.fromkeys(M, 1.0 / N)
    #print matrix
    print v

    # power iteration: make up to max_iter iteration
    iter = 0
    for _ in range(max_iter):
        iter += 1
        vlast = v

        v = dict.fromkeys(vlast.keys(), 0)
        for n in v:
            for nbr in M[n]:
                v[nbr] += beta * vlast[n] * M[n][nbr][weight]
            v[n] += (1.0 - beta) * t.get(n, 0)
    # check convergence, l1 norm

        err = sum([abs(v[n] - vlast[n]) for n in v])
        if err < N * tol:
            break

    print v
    print iter
Exemplo n.º 16
0
def pagerank(G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-6, nstart=None, weight='weight', dangling=None): 
    
    if len(G) == 0: 
        return {} 
  
    if not G.is_directed(): 
        D = G.to_directed() 
    else: 
        D = G 
  
    
    W = net.stochastic_graph(D, weight=weight) 
    N = W.number_of_nodes()
    
    if nstart is None: 
        x = dict.fromkeys(W, 1.0 / N)
    else: 
        s = float(sum(nstart.values())) 
        x = dict((k, v / s) for k, v in nstart.items()) 
  
    if personalization is None: 
  
        p = dict.fromkeys(W, 1.0 / N) 
    else: 
        missing = set(G) - set(personalization) 
        if missing: 
            raise NetworkXError('Personalization dictionary '
                                'must have a value for every node. '
                                'Missing nodes %s' % missing) 
        s = float(sum(personalization.values())) 
        p = dict((k, v / s) for k, v in personalization.items()) 
  
    if dangling is None:  
        dangling_weights = p 
        #print(dangling_weights)
    else: 
        missing = set(G) - set(dangling) 
        if missing: 
            raise NetworkXError('Dangling node dictionary '
                                'must have a value for every node. '
                                'Missing nodes %s' % missing) 
        s = float(sum(dangling.values())) 
        dangling_weights = dict((k, v/s) for k, v in dangling.items()) 
    dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0] 
  
    for _ in range(max_iter): 
        xlast = x 
        x = dict.fromkeys(xlast.keys(), 0) 
        danglesum = alpha * sum(xlast[n] for n in dangling_nodes) 
        for n in x: 
  
            for nbr in W[n]: 
                x[nbr] += alpha * xlast[n] * W[n][nbr][weight] 
            x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n] 
  
        err = sum([abs(x[n] - xlast[n]) for n in x]) 
        if err < N*tol: 
            return x 
    raise NetworkXError('pagerank: power iteration failed to converge '
                        'in %d iterations.' % max_iter)
Exemplo n.º 17
0
 def test_pagerank_algo(self):
     W = nx.stochastic_graph(self.D, weight='weight')
     prs = pagerank_unrecorded(W,
                               personalization=self._personalization,
                               weight='weight')
     for key, val in prs.iteritems():
         self.assertEqual(val, self.result_pr[key])
Exemplo n.º 18
0
    def __init__(self, G, alpha=0.85, weight='weight', debug=False):
        if not isinstance(G, nx.classes.graph.Graph):
            raise AttributeError(
                'The Graph is not an instance of networkx Graph')
        if isinstance(G, nx.classes.multidigraph.MultiDiGraph):
            raise AttributeError('The MultiDiGraph instance is not supported')
        if not isinstance(G, nx.classes.digraph.DiGraph):
            print('gen directed graph')
            G = self.gen_directed_graph(G, weight=weight)

        self._DEBUG = debug
        self._nodes = len(self._G)
        self._alpha = alpha
        weight_tot = 0.0
        # Test if each edge of the graph has an attribut 'weight'
        self._weight_attribut = weight
        for u, v, edata in G.edges(data=True):
            if self._weight_attribut not in edata:
                raise AttributeError(
                    'The Graph has a missing weight attribut on edges')

        self._G = nx.stochastic_graph(G, weight=weight)

        # Compute the pageRank for the Graph
        self.init_nodes_in_community()
        self.init_communities()
        self.build_personalization_vector(G)
        self.compute_pagerank(alpha=alpha)
Exemplo n.º 19
0
 def test_arbitrary_weights(self):
     G = nx.DiGraph()
     G.add_edge(0, 1, weight=1)
     G.add_edge(0, 2, weight=1)
     S = nx.stochastic_graph(G)
     assert_equal(sorted(S.edges(data=True)),
                  [(0, 1, {'weight': 0.5}), (0, 2, {'weight': 0.5})])
Exemplo n.º 20
0
def rooted_pagerank(graph, root_node, alpha=0.85, max_iter=100):
    if not graph.is_directed():
        D = graph.to_directed()
    else:
        D = graph
    H = nx.stochastic_graph(D)
    n = len(graph.nodes())
    H = nx.to_numpy_matrix(H).transpose()
    x = np.full((n, 1), 1.0 / n)
    v = np.full((n, 1), 0.0)
    nodes = D.nodes()
    index = 0
    for node in nodes:
        # print(node)
        # print(root_node)
        # print(index)
        if node == root_node:
            # print(1-alpha)
            v[index][0] = 1 - alpha
            break
        index = index + 1
    # print(v)

    for _ in range(max_iter):
        x = np.add(alpha * np.dot(H, x), v)  # x = alpha*H*x + (1-alpha)*v

    x = np.squeeze(np.asarray(x))
    x = x / np.sum(x)
    return dict(zip(graph.nodes(), x))
Exemplo n.º 21
0
def get_mc_attributes(G):
    G = G.to_directed()
    G = nx.stochastic_graph(G)
    tm = nx.to_numpy_matrix(G)
    tm = np.squeeze(np.asarray(tm))

    return (G, tm)
Exemplo n.º 22
0
def clculate_pagerank(G,
                      alpha=0.85,
                      max_iter=100,
                      tol=1.0e-6,
                      weight='weight'):
    if not G.is_directed():
        D = G.to_directed()
    else:
        D = G
    # Create a copy in (right) stochastic form
    W = nx.stochastic_graph(D, weight=weight)
    N = W.number_of_nodes()
    x = dict.fromkeys(W, 1.0 / N)
    p = dict.fromkeys(W, 1.0 / N)
    dangling_weights = p
    dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]
    # power iteration: make up to max_iter iterations
    for _ in range(max_iter):
        xlast = x
        x = dict.fromkeys(xlast.keys(), 0)
        danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
        for n in x:
            for nbr in W[n]:
                x[nbr] += alpha * xlast[n] * W[n][nbr][weight]
            x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]

        # check convergence, l1 norm
        err = sum([abs(x[n] - xlast[n]) for n in x])
        if err < N * tol:
            return x
Exemplo n.º 23
0
    def page_rank(self, G):
        d = 0.85
        iterator = 100
        tol = 1.0e-6
        weight = 'weight'
        if len(G) == 0:
            return {}

        if not G.is_directed():
            D = G.to_directed()
        else:
            D = G
        W = nx.stochastic_graph(D, weight='weight')
        N = W.number_of_nodes()
        x = dict.fromkeys(W, 1.0 / N)
        p = dict.fromkeys(W, 1.0 / N)

        for _ in range(iterator):
            xlast = x
            x = dict.fromkeys(xlast.keys(), 0)
            for n in x:
                for nbr in W[n]:
                    x[nbr] += d * xlast[n] * W[n][nbr][weight]
                x[n] += (1.0 - d) * p[n]
            err = sum([abs(x[n] - xlast[n]) for n in x])
            if err < N * tol:
                return x
Exemplo n.º 24
0
    def getPageRank(self,D, d=0.85, max_iter=60, tol=1.0e-6, weight='weight'):
        print('BEGINN PAGE RANK CALCULATION')
        if len(D) == 0:
            return {}
        G = nx.stochastic_graph(D, weight=weight)
        N = G.number_of_nodes()
        print('Number of nodes: ' + str(N))
        x = dict.fromkeys(G,1.0 / N)
        p = dict.fromkeys(G,1.0 / N)
        dangling_weights = x
        dangling_nodes = [n for n in G if G.out_degree(n, weight=weight) == 0.0]
        for _ in range(max_iter):
            print('Enter iteration ' + str(_))
            xlast = x 
            x = dict.fromkeys(xlast.keys(), 0) 
            danglesum = d * sum(xlast[n] for n in dangling_nodes) 
            for n in x: 
                # this matrix multiply looks odd because it is 
                # doing a left multiply x^T=xlast^T*W 
                for nbr in G[n]: 
                    x[nbr] += d * xlast[n] * G[n][nbr][weight]
                    x[n] += danglesum * dangling_weights[n] + (1.0 - d) * p[n] 

            # check convergence, l1 norm 
            err = sum([abs(x[n] - xlast[n]) for n in x]) 
            if err < N*tol: 
                print("pagerank: power iteration failed to converge in" + str(iterations))    
            return x 
Exemplo n.º 25
0
def pagerank(G,
             alpha=0.85,
             personalization=None,
             max_iter=100,
             tol=1.0e-6,
             nstart=None,
             weight='weight',
             dangling=None):
    if len(G) == 0:
        return {}

    if not G.is_directed():
        D = G.to_directed()
    else:
        D = G

    # Create a copy in (right) stochastic form
    W = nx.stochastic_graph(D, weight=weight)
    N = W.number_of_nodes()

    # Choose fixed starting vector if not given
    if nstart is None:
        x = dict.fromkeys(W, 1.0 / N)
    else:
        # Normalized nstart vector
        s = float(sum(nstart.values()))
        x = dict((k, v / s) for k, v in nstart.items())

    if personalization is None:
        # Assign uniform personalization vector if not given
        p = dict.fromkeys(W, 1.0 / N)
    else:
        s = float(sum(personalization.values()))
        p = dict((k, v / s) for k, v in personalization.items())

    if dangling is None:
        # Use personalization vector if dangling vector not specified
        dangling_weights = p
    else:
        s = float(sum(dangling.values()))
        dangling_weights = dict((k, v / s) for k, v in dangling.items())
    dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]

    # power iteration: make up to max_iter iterations
    for _ in range(max_iter):
        xlast = x
        x = dict.fromkeys(xlast.keys(), 0)
        danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
        for n in x:
            # this matrix multiply looks odd because it is
            # doing a left multiply x^T=xlast^T*W
            for nbr in W[n]:
                x[nbr] += alpha * xlast[n] * W[n][nbr][weight]
            x[n] += danglesum * dangling_weights.get(
                n, 0) + (1.0 - alpha) * p.get(n, 0)
        # check convergence, l1 norm
        err = sum([abs(x[n] - xlast[n]) for n in x])
        if err < N * tol:
            return x
    raise nx.PowerIterationFailedConvergence(max_iter)
Exemplo n.º 26
0
 def test_multidigraph(self):
     G = nx.MultiDiGraph()
     G.add_edges_from([(0, 1), (0, 1), (0, 2), (0, 2)])
     S = nx.stochastic_graph(G)
     d = dict(weight=0.25)
     assert_equal(sorted(S.edges(data=True)),
                  [(0, 1, d), (0, 1, d), (0, 2, d), (0, 2, d)])
Exemplo n.º 27
0
def test_stochastic_ints():
    G=nx.DiGraph()
    G.add_edge(0,1,weight=1)
    G.add_edge(0,2,weight=1)
    S=nx.stochastic_graph(G)
    assert_equal(sorted(S.edges(data=True)),
                 [(0, 1, {'weight': 0.5}), 
                  (0, 2, {'weight': 0.5})])
Exemplo n.º 28
0
def get_mc_attributes(start_time="2012-04-01 10:00:00", duration=120):
    # Create csv read iterator
    data = read_trips_file("hubway_trips_2012.csv")
    start_time = pd.to_datetime(start_time)
    end_time = start_time + timedelta(minutes=duration)

    df = data[(data['start_date'] >= start_time)
              & (data['end_date'] <= end_time)]
    stations = read_stations_file("hubway_stations.csv")
    status = read_status_file("stationstatus_2012_4.csv")
    status_df = status[status['update'] == start_time]

    # Remove trips starting or ending in the stations not present in stations dataframe
    # or stations not present in the status file

    station_ids = set(stations['id'])
    status_df = status_df[status_df['station_id'].isin(station_ids)]

    df = df[(df['strt_statn'].isin(station_ids))
            & (df['end_statn'].isin(station_ids))]
    trips_df = pd.DataFrame(
        {'weight': df.groupby(['strt_statn', 'end_statn']).size()})
    trips_df = trips_df.reset_index()

    print "Creating networkx graph"
    G = nx.from_pandas_dataframe(trips_df,
                                 'strt_statn',
                                 'end_statn',
                                 'weight',
                                 create_using=nx.DiGraph())
    G = nx.stochastic_graph(G, weight='weight')

    # Add stations that are present in status_ids but not in trips_df
    status_ids = set(status['station_id'])
    for node in status_ids - set(G.nodes()):
        G.add_node(node)

    print "Creating transition matrix"
    transition_matrix = nx.to_numpy_matrix(G, weight='weight')
    transition_matrix = np.squeeze(np.asarray(transition_matrix))

    print "Creating object assignment and distribution"
    object_assignment = {}
    object_distribution = {}

    for node in G.nodes():
        try:
            object_assignment[node] = status_df[status_df['station_id'] ==
                                                node].get('nbBikes').item()
        except:
            object_assignment[node] = 0

    num_objects = sum(object_assignment.values())

    for node in G.nodes():
        object_distribution[node] = 1.0 * object_assignment[node] / num_objects

    return (num_objects, transition_matrix, G, object_distribution)
Exemplo n.º 29
0
 def test_arbitrary_weights(self):
     G = nx.DiGraph()
     G.add_edge(0, 1, weight=1)
     G.add_edge(0, 2, weight=1)
     S = nx.stochastic_graph(G)
     assert sorted(S.edges(data=True)) == [
         (0, 1, {"weight": 0.5}),
         (0, 2, {"weight": 0.5}),
     ]
Exemplo n.º 30
0
 def test_default_weights(self):
     G = nx.DiGraph()
     G.add_edge(0, 1)
     G.add_edge(0, 2)
     S = nx.stochastic_graph(G)
     assert nx.is_isomorphic(G, S)
     assert sorted(S.edges(data=True)) == [
         (0, 1, {"weight": 0.5}),
         (0, 2, {"weight": 0.5}),
     ]
Exemplo n.º 31
0
def pagerank(Graph, alpha=0.85, personalization = None , max_iteration = 50 , tolerance = 1.0e-8, initial_set_rank = None , weight='weight', dangling=None):

    if len(Graph) == 0:
        return {}
    #check directed of graph
    if not Graph.is_directed():
        Directed_Graph = Graph.to_directed()
    else:
        Directed_Graph = Graph
    #get graph to stochastic graph
    stochastic_graph = nx.stochastic_graph(Directed_Graph, weight=weight)
    Number_node = stochastic_graph.number_of_nodes()

    #set initial rank for each node
    if initial_set_rank is None:
        x = dict.fromkeys(stochastic_graph, 1.0 / Number_node)
    else:
        sum_initial_set_rank = float(sum(initial_set_rank.values()))
        x = dict((key, value / sum_initial_set_rank) for key, value in initial_set_rank.items())


    #craete fake link between each node
    if personalization is None:
        personalization_vector = dict.fromkeys(stochastic_graph, 1.0 / Number_node)
    else:

        sum_personalization_vector = float(sum(personalization.values()))
        personalization_vector = dict((key, value / sum_personalization_vector) for key, value in personalization.items())


    #set stochastic value for node without outlink
    if dangling is None:
        dangling_weights = personalization_vector
    else:

        sum_dangling_weights = float(sum(dangling.values()))
        dangling_weights = dict((key, value / sum_dangling_weights) for key, value in dangling.items())
    dangling_nodes = [n for n in stochastic_graph if stochastic_graph.out_degree(n, weight=weight) == 0.0]

    #calculate the rank for matrix A = αP +(1−α)1/n*eeT
    for i in range(max_iteration):
        print("iteration {} : the rank of page is :\n".format(i))
        last_rank = x
        x = dict.fromkeys(last_rank.keys(), 0)
        danglesum = alpha * sum(last_rank[n] for n in dangling_nodes)
        for n in x:
            for nbr in stochastic_graph[n]:
                x[nbr] += alpha * last_rank[n] * stochastic_graph[n][nbr][weight]
            x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * personalization_vector[n]
        print(x)
        print("\n----------------------------------------------------------")
        err = sum([abs(x[n] - last_rank[n]) for n in x])
        if err < Number_node * tolerance:
            print("The algoritm is converge and the last rank is :\n")
            return x
Exemplo n.º 32
0
def pagerank(G,
             alpha=0.85,
             personalization=None,
             max_iter=100,
             tol=1.0e-6,
             nstart=None,
             weight="weight",
             dangling=None):
    # 节点的pagerank值
    if len(G) == 0:
        return {}

    if not G.is_directed():
        D = G.to_directed()
    else:
        D = G

    W = nx.stochastic_graph(D, weight=weight)
    N = W.number_of_nodes()

    if nstart is None:
        x = dict.fromkeys(W, 1.0 / N)
    else:
        s = float(sum(nstart.values()))
        x = {k: v / s for k, v in nstart.items()}

    if personalization is None:
        p = dict.fromkeys(W, 1.0 / N)
    else:
        s = float(sum(personalization.values()))
        p = {k: v / s for k, v in personalization.items()}

    if dangling is None:
        dangling_weights = p
    else:
        s = float(sum(dangling.values()))
        dangling_weights = {k: v / s for k, v in dangling.items()}
    dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]

    for _ in range(max_iter):
        xlast = x
        x = dict.fromkeys(xlast.keys(), 0)
        danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
        for n in x:
            for nbr in W[n]:
                x[nbr] += alpha * xlast[n] * W[n][nbr][weight]
            x[n] += danglesum * dangling_weights.get(
                n, 0) + (1.0 - alpha) * p.get(n, 0)
        err = sum([abs(x[n] - xlast[n]) for n in x])
        if err < N * tol:
            return x
    raise nx.PowerIterationFailedConvergence(max_iter)
Exemplo n.º 33
0
def create_graph(filename):

    ##    This function accepts a filename string as a parameter.
    ##    This filename contains edgelist in the format of "node1 node2"
    ##    which indicates the former node points to the latter. Using
    ##    read_edgelist function of the networkx library , we can easily
    ##    make a directed graph from the given information. The function
    ##    returns a directed graph.

    g = nx.read_edgelist(filename, create_using=nx.DiGraph())
    n = nx.number_of_nodes(g)
    g = nx.stochastic_graph(g)

    return g, n
def pagerank_iterative(G, d=0.85, max_iter=100, tol=1.0e-6, weight='weight'):
    """
	PageRank calculation iteratively
	"""

    # Step 1: Initiate PageRank
    N = G.number_of_nodes()  # N = 11
    node_and_pr = dict.fromkeys(G, 1.0 / N)

    # Step 2: Create a copy in (right) stochastic form
    stochastic_graph = nx.stochastic_graph(G, weight=weight)  # M = 1/L(pj)

    # Step 3: Power iteration: make up to max_iter iterations
    dangling_value = (1 - d) / N

    for _ in range(max_iter):  # for each iteration
        node_and_prev_pr = node_and_pr
        node_and_pr = dict.fromkeys(node_and_prev_pr.keys(), 0)

        for node in node_and_pr:  # for each node
            for out_node in stochastic_graph[node]:  # node --> out_node
                node_and_pr[out_node] += d * node_and_prev_pr[
                    node] * stochastic_graph[node][out_node][
                        weight]  # PR(p_i) = d * PR(p_j)}/L(p_j)

            node_and_pr[node] += dangling_value

        # Plot graph with one iteration
        '''
		out_file = 'wikipedia_pagerank_example_iteration_1.pdf'
		node_size = [pr*30000 for node, pr in node_and_pr.items()]
		node_and_labels = {node : node+'\n'+str(round(pr, 3))
							for node, pr in node_and_pr.items()}

		plotnxgraph.plot_graph(G, out_file=out_file, node_size=node_size, node_and_labels=node_and_labels)
		return
		'''

        # check convergence, l1 norm
        err = sum([
            abs(node_and_pr[node] - node_and_prev_pr[node])
            for node in node_and_pr
        ])
        if err < N * tol:
            return node_and_pr

    raise NetworkXError(
        'pagerank: power iteration failed to converge in {} iterations.'.
        format(max_iter))
Exemplo n.º 35
0
def get_mc_attributes(start_time="2012-04-01 10:00:00", duration=120):
    # Create csv read iterator
    data = read_trips_file("hubway_trips_2012.csv")
    start_time = pd.to_datetime(start_time)
    end_time = start_time + timedelta(minutes=duration)

    df = data[(data['start_date'] >= start_time)
              & (data['end_date'] <= end_time)]
    stations = read_stations_file("hubway_stations.csv")
    status = read_status_file("stationstatus_2012_4.csv")
    status_df = status[status['update'] == start_time]

    # Remove trips starting or ending in the stations which are not present
    # in stations dataframe or stations not present in the status file

    station_ids = set(stations['id'])
    status_df = status_df[status_df['station_id'].isin(station_ids)]

    df = df[(df['strt_statn'].isin(station_ids))
            & (df['end_statn'].isin(station_ids))]
    trips_df = pd.DataFrame(
        {'weight': df.groupby(['strt_statn', 'end_statn']).size()})
    trips_df = trips_df.reset_index()
    print "Number of trips:{}".format(len(df))

    print "Creating networkx graph"
    G = nx.from_pandas_dataframe(trips_df,
                                 'strt_statn',
                                 'end_statn',
                                 'weight',
                                 create_using=nx.DiGraph())
    G = nx.stochastic_graph(G, weight='weight')

    # Add stations that are present in status_ids but not in trips_df
    status_ids = set(status['station_id'])
    for node in status_ids - set(G.nodes()):
        G.add_node(node)

    print "Creating item distribution"
    initial_item_distribution = {}
    for node in G.nodes():
        try:
            initial_item_distribution[node] = status_df[
                status_df['station_id'] == node].get('nbBikes').item()
        except:
            initial_item_distribution[node] = 0

    return G, initial_item_distribution
Exemplo n.º 36
0
def pagerank(g, max_iter, alpha, tau):
    sg = nx.stochastic_graph(g)  #stochastic graph
    n_nodes = nx.number_of_nodes(g)
    nodes = g.nodes()

    PI = [1.0 / n_nodes] * n_nodes  #initialization of pagerank

    a = []  #dangling nodes vector
    for n in nodes:
        if g.out_degree(n):
            a.append(1)
        else:
            a.append(0)

    H = nx.adjacency_matrix(sg)
    for i in range(max_iter):
        pi_previous = PI

        #v1 = alpha(pi_previous^T*H)
        v1 = [0] * n_nodes
        for r in range(n_nodes):
            row = H[r, :].toarray()
            for c in range(n_nodes):
                v1[c] += pi_previous[c] * row[0][c]
        v1 = [alpha * v for v in v1]

        #v2 = alpha(pi_previous^T*a)1/n*e^T
        dang_pi = 0
        for e in range(n_nodes):
            dang_pi += pi_previous[e] * a[e]
        constant = alpha * dang_pi + 1 - alpha
        v2 = [float(constant) / n_nodes] * n_nodes

        #pi = v2 + v3
        for e in range(n_nodes):
            PI[e] = v1[e] + v2[e]

        PI = normalize(PI)

        #check convergence
        delta = 0
        for e in range(n_nodes):
            delta += abs(PI[e] - pi_previous[e])
        if delta < tau * n_nodes:
            return transform_pagerank(PI)
    return transform_pagerank(PI)
Exemplo n.º 37
0
def pagerank(G, alpha=0.85, personalization=None,
             max_iter=100, tol=1.0e-10, nstart=None, weight='weight',
             dangling=None):
    if len(G) == 0:
        return {}

    if not G.is_directed():
        D = G.to_directed()
    else:
        D = G

    # Create a copy in (right) stochastic form
    W = nx.stochastic_graph(D, weight=weight)
    N = W.number_of_nodes()

    # Choose fixed starting vector if not given
    x = dict.fromkeys(W, 1.0 / N)
    
    p = dict.fromkeys(W, 1.0 / N)
    
    dangling_weights = p
    
    dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]

    # power iteration: make up to max_iter iterations
    for _ in range(max_iter):
        print('here')
        print()
        xlast = x
        x = dict.fromkeys(xlast.keys(), 0)
        danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
        # print(danglesum)
        for n in x:
            # this matrix multiply looks odd because it is
            # doing a left multiply x^T=xlast^T*W
            for nbr in W[n]:
                x[nbr] += alpha * xlast[n] * W[n][nbr][weight]
            x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]
        # check convergence, l1 norm
        err = sum([abs(x[n] - xlast[n]) for n in x])
        if err < N*tol:
            return x
    return x
    raise nx.NetworkXError('pagerank: power iteration failed to converge '
                        'in %d iterations.' % max_iter)
def pagerank_iterative(G, d=0.85, max_iter=100, tol=1.0e-6, weight='weight'):
	"""
	PageRank calculation iteratively
	"""

	# Step 1: Initiate PageRank
	N = G.number_of_nodes()						# N = 11
	node_and_pr = dict.fromkeys(G, 1.0 / N)

	# Step 2: Create a copy in (right) stochastic form
	stochastic_graph = nx.stochastic_graph(G, weight=weight)	# M = 1/L(pj)

	# Step 3: Power iteration: make up to max_iter iterations
	dangling_value = (1-d)/N

	for _ in range(max_iter):		# for each iteration
		node_and_prev_pr = node_and_pr
		node_and_pr = dict.fromkeys(node_and_prev_pr.keys(), 0)

		for node in node_and_pr:	# for each node
			for out_node in stochastic_graph[node]:		# node --> out_node
				node_and_pr[out_node] += d * node_and_prev_pr[node] * stochastic_graph[node][out_node][weight] 	# PR(p_i) = d * PR(p_j)}/L(p_j)
		
			node_and_pr[node] += dangling_value

		# Plot graph with one iteration
		'''
		out_file = 'wikipedia_pagerank_example_iteration_1.pdf'
		node_size = [pr*30000 for node, pr in node_and_pr.items()]
		node_and_labels = {node : node+'\n'+str(round(pr, 3))
							for node, pr in node_and_pr.items()}

		plotnxgraph.plot_graph(G, out_file=out_file, node_size=node_size, node_and_labels=node_and_labels)
		return
		'''

		# check convergence, l1 norm
		err = sum([abs(node_and_pr[node] - node_and_prev_pr[node]) for node in node_and_pr])
		if err < N*tol:
			return node_and_pr

	raise NetworkXError('pagerank: power iteration failed to converge in {} iterations.'.format(max_iter))
Exemplo n.º 39
0
    def run_pagerank(self, G, alpha=0.85, pers=None, max_iter=1000,
                             tol=1.0e-6, nstart=None, weight='weight', node_types=None):
        """Return the PageRank of the nodes in the graph.

        PageRank computes a ranking of the nodes in the graph G based on
        the structure of the incoming links. It was originally designed as
        an algorithm to rank web pages.

        Parameters
        -----------
        G : graph
            A NetworkX graph

        alpha : float, optional
            Damping parameter for PageRank, default=0.85

        pers: dict, optional
             The "pers vector" consisting of a dictionary with a
             key for every graph node and nonzero pers value for each node.

        max_iter : integer, optional
            Maximum number of iterations in power method eigenvalue solver.

        tol : float, optional
            Error tolerance used to check convergence in power method solver.

        nstart : dictionary, optional
            Starting value of PageRank iteration for each node.

        weight : key, optional
            Edge data key to use as weight. If None weights are set to 1.

        Returns
        -------
        pagerank : dictionary
             Dictionary of nodes with PageRank as value

        Notes
        -----
        The eigenvector calculation is done by the power iteration method
        and has no guarantee of convergence.    The iteration will stop
        after max_iter iterations or an error tolerance of
        number_of_nodes(G)*tol has been reached.
        """

        if len(G) == 0:
                return {}

        # create a copy in (right) stochastic form
        W = nx.stochastic_graph(G, weight=weight)

        scale = 1.0 / W.number_of_nodes()

        # choose fixed starting vector if not given
        if nstart is None:
            x = dict.fromkeys(W, scale)
        else:
            x = nstart
            # normalize starting vector to 1
            s = 1.0/sum(x.values())
            for k in x: x[k]*=s

        # assign uniform pers vector if not given
        if pers is None:
            pers = dict.fromkeys(W, scale)
        else:
            # Normalize the sum to 1
            s = sum(pers.values())

            for k in pers.keys():
                pers[k] /= s

            if len(pers)!=len(G):
                    raise Exception('Personalization vector must have a value for every node')


        # "dangling" nodes, no links out from them
        out_degree = W.out_degree()
        dangle = [n for n in W if out_degree[n]==0.0]

        itr = 0
        while True: # power iteration: make up to max_iter iterations
            xlast = x
            x = dict.fromkeys(xlast.keys(), 0)

            # "dangling" nodes only consume energies, so we release these energies manually
            danglesum = alpha*scale*sum(xlast[n] for n in dangle)
            # danglesum = 0

            for n in x:
                # this matrix multiply looks odd because it is
                # doing a left multiply x^T=xlast^T*W
                for nbr in W[n]:
                    x[nbr] += alpha*xlast[n]*W[n][nbr][weight]

                x[n] += danglesum + (1 - alpha) * pers[n]

            # normalize vector
            s = 1.0 / sum(x.values())
            for n in x:
                x[n]*=s


            # check convergence, l1 norm
            err = sum([abs(x[n] - xlast[n]) for n in x])
            if err < tol:
                print "converged in %d iterations." % itr
                break
            if itr > max_iter:
                raise Exception('pagerank: power iteration failed to converge '
                                                        'in %d iterations.'%(itr-1))
            itr += 1



        # Returns:
        #   x: PageRank of each node;
        #   l: Detailed contributions of each layer;
        #   itr: Iterations to converge.

        return x, itr
Exemplo n.º 40
0
def pagerank(G, alpha=0.85, personalization=None,
             max_iter=100, tol=1.0e-6, nstart=None, weight='weight',
             dangling=None):
    """Return the PageRank of the nodes in the graph.

    PageRank computes a ranking of the nodes in the graph G based on
    the structure of the incoming links. It was originally designed as
    an algorithm to rank web pages.

    Parameters
    -----------
    G : graph
      A NetworkX graph.  Undirected graphs will be converted to a directed
      graph with two directed edges for each undirected edge.

    alpha : float, optional
      Damping parameter for PageRank, default=0.85.

    personalization: dict, optional
      The "personalization vector" consisting of a dictionary with a
      key for every graph node and nonzero personalization value for each node.
      By default, a uniform distribution is used.

    max_iter : integer, optional
      Maximum number of iterations in power method eigenvalue solver.

    tol : float, optional
      Error tolerance used to check convergence in power method solver.

    nstart : dictionary, optional
      Starting value of PageRank iteration for each node.

    weight : key, optional
      Edge data key to use as weight.  If None weights are set to 1.

    dangling: dict, optional
      The outedges to be assigned to any "dangling" nodes, i.e., nodes without
      any outedges. The dict key is the node the outedge points to and the dict
      value is the weight of that outedge. By default, dangling nodes are given
      outedges according to the personalization vector (uniform if not
      specified). This must be selected to result in an irreducible transition
      matrix (see notes under google_matrix). It may be common to have the
      dangling dict to be the same as the personalization dict.

    Returns
    -------
    pagerank : dictionary
       Dictionary of nodes with PageRank as value

    Examples
    --------
    >>> G = nx.DiGraph(nx.path_graph(4))
    >>> pr = nx.pagerank(G, alpha=0.9)

    Notes
    -----
    The eigenvector calculation is done by the power iteration method
    and has no guarantee of convergence.  The iteration will stop
    after max_iter iterations or an error tolerance of
    number_of_nodes(G)*tol has been reached.

    The PageRank algorithm was designed for directed graphs but this
    algorithm does not check if the input graph is directed and will
    execute on undirected graphs by converting each edge in the
    directed graph to two edges.

    See Also
    --------
    pagerank_numpy, pagerank_scipy, google_matrix

    References
    ----------
    .. [1] A. Langville and C. Meyer,
       "A survey of eigenvector methods of web information retrieval."
       http://citeseer.ist.psu.edu/713792.html
    .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
       The PageRank citation ranking: Bringing order to the Web. 1999
       http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
    """
    if len(G) == 0:
        return {}

    if not G.is_directed():
        D = G.to_directed()
    else:
        D = G

    # Create a copy in (right) stochastic form
    W = nx.stochastic_graph(D, weight=weight)
    N = W.number_of_nodes()

    # Choose fixed starting vector if not given
    if nstart is None:
        x = dict.fromkeys(W, 1.0 / N)
    else:
        # Normalized nstart vector
        s = float(sum(nstart.values()))
        x = dict((k, v / s) for k, v in nstart.items())

    if personalization is None:
        # Assign uniform personalization vector if not given
        p = dict.fromkeys(W, 1.0 / N)
    else:
        missing = set(G) - set(personalization)
        if missing:
            raise NetworkXError('Personalization dictionary '
                                'must have a value for every node. '
                                'Missing nodes %s' % missing)
        s = float(sum(personalization.values()))
        p = dict((k, v / s) for k, v in personalization.items())

    if dangling is None:
        # Use personalization vector if dangling vector not specified
        dangling_weights = p
    else:
        missing = set(G) - set(dangling)
        if missing:
            raise NetworkXError('Dangling node dictionary '
                                'must have a value for every node. '
                                'Missing nodes %s' % missing)
        s = float(sum(dangling.values()))
        dangling_weights = dict((k, v/s) for k, v in dangling.items())
    dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]

    # power iteration: make up to max_iter iterations
    for _ in range(max_iter):
        xlast = x
        x = dict.fromkeys(xlast.keys(), 0)
        danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
        for n in x:
            # this matrix multiply looks odd because it is
            # doing a left multiply x^T=xlast^T*W
            for nbr in W[n]:
                x[nbr] += alpha * xlast[n] * W[n][nbr][weight]
            x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]
        # check convergence, l1 norm
        err = sum([abs(x[n] - xlast[n]) for n in x])
        if err < N*tol:
            return x
    raise NetworkXError('pagerank: power iteration failed to converge '
                        'in %d iterations.' % max_iter)
 def stochastic(klass, D, weight):
     # create a copy in (right) stochastic form
     ## a form in which transition probs. are equally distributed
     H = nx.stochastic_graph(D, weight=weight)
     return H
Exemplo n.º 42
0
def pagerank(G,alpha=0.85,max_iter=100,tol=1.0e-8,nstart=None):
    """Return the PageRank of the nodes in the graph.

    PageRank computes the largest eigenvector of the stochastic
    adjacency matrix of G.  
    

    Parameters
    -----------
    G : graph
      A networkx graph 

    alpha : float, optional
      Parameter for PageRank, default=0.85
       
    max_iter : interger, optional
      Maximum number of iterations in power method.

    tol : float, optional
      Error tolerance used to check convergence in power method iteration.

    nstart : dictionary, optional
      Starting value of PageRank iteration for each node. 

    Returns
    -------
    nodes : dictionary
       Dictionary of nodes with value as PageRank 


    Examples
    --------
    >>> G=nx.DiGraph(nx.path_graph(4))
    >>> pr=nx.pagerank(G,alpha=0.9)

    Notes
    -----
    The eigenvector calculation is done by the power iteration method
    and has no guarantee of convergence.  The iteration will stop
    after max_iter iterations or an error tolerance of
    number_of_nodes(G)*tol has been reached.

    The PageRank algorithm was designed for directed graphs but this
    algorithm does not check if the input graph is directed and will
    execute on undirected graphs.

    For an overview see:
    A. Langville and C. Meyer, "A survey of eigenvector methods of web
    information retrieval."  http://citeseer.ist.psu.edu/713792.html

    """
    import networkx
    if type(G) == networkx.MultiGraph or type(G) == networkx.MultiDiGraph:
        raise Exception("pagerank() not defined for graphs with multiedges.")

    # create a copy in (right) stochastic form        
    W=networkx.stochastic_graph(G)        

    # choose fixed starting vector if not given
    if nstart is None:
        x=dict.fromkeys(W,1.0/W.number_of_nodes())
    else:
        x=nstart
        # normalize starting vector to 1                
        s=1.0/sum(x.values())
        for k in x: x[k]*=s

    nnodes=W.number_of_nodes()
    # "dangling" nodes, no links out from them
    out_degree=W.out_degree(with_labels=True)
#    dangle=[n for n in W if sum(W[n].values())==0.0]  
    dangle=[n for n in W if out_degree[n]==0.0]  
    # pagerank power iteration: make up to max_iter iterations        
    for i in range(max_iter):
        xlast=x
        x=dict.fromkeys(xlast.keys(),0)
        danglesum=alpha/nnodes*sum(xlast[n] for n in dangle)
        teleportsum=(1.0-alpha)/nnodes*sum(xlast.values())
        for n in x:
            # this matrix multiply looks odd because it is
            # doing a left multiply x^T=xlast^T*W
            for nbr in W[n]:
                x[nbr]+=alpha*xlast[n]*W[n][nbr]['weight']
            x[n]+=danglesum+teleportsum
        # normalize vector to 1                
        s=1.0/sum(x.values())
        for n in x: x[n]*=s
        # check convergence, l1 norm            
        err=sum([abs(x[n]-xlast[n]) for n in x])
        if err < tol:
            return x

    raise NetworkXError("pagerank: power iteration failed to converge in %d iterations."%(i+1))
Exemplo n.º 43
0
def pagerank(G, alpha=0.85, personalization=None,
             max_iter=100, tol=1.0e-8, nstart=None, weight='weight'):
    """Return the PageRank of the nodes in the graph.

    PageRank computes a ranking of the nodes in the graph G based on
    the structure of the incoming links. It was originally designed as
    an algorithm to rank web pages.

    Parameters
    -----------
    G : graph
      A NetworkX graph

    alpha : float, optional
      Damping parameter for PageRank, default=0.85

    personalization: dict, optional
       The "personalization vector" consisting of a dictionary with a
       key for every graph node and nonzero personalization value for each node.

    max_iter : integer, optional
      Maximum number of iterations in power method eigenvalue solver.

    tol : float, optional
      Error tolerance used to check convergence in power method solver.

    nstart : dictionary, optional
      Starting value of PageRank iteration for each node.

    weight : key, optional
      Edge data key to use as weight.  If None weights are set to 1.

    Returns
    -------
    pagerank : dictionary
       Dictionary of nodes with PageRank as value

    Examples
    --------
    >>> G=nx.DiGraph(nx.path_graph(4))
    >>> pr=nx.pagerank(G,alpha=0.9)

    Notes
    -----
    The eigenvector calculation is done by the power iteration method
    and has no guarantee of convergence.  The iteration will stop
    after max_iter iterations or an error tolerance of
    number_of_nodes(G)*tol has been reached.

    The PageRank algorithm was designed for directed graphs but this
    algorithm does not check if the input graph is directed and will
    execute on undirected graphs by converting each oriented edge in the
    directed graph to two edges.

    """
    if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph:
        raise Exception("pagerank() not defined for graphs with multiedges.")

    if len(G) == 0:
        return {}

    if not G.is_directed():
        D = G.to_directed()
    else:
        D = G

    # create a copy in (right) stochastic form
    # each row of W sum up to be one
    W = nx.stochastic_graph(D, weight=weight)
    scale = 1.0 / W.number_of_nodes()

    # choose fixed starting vector if not given
    if nstart is None:
        x = dict.fromkeys(W, scale)
    else:
        x = nstart
        # normalize starting vector to 1
        s = 1.0 / sum(x.values())
        for k in x: x[k] *= s

    # assign uniform personalization/teleportation vector if not given
    if personalization is None:
        # teleport
        p = dict.fromkeys(W, scale)
    else:
        # teleport with bias
        p = personalization
        # normalize starting vector to 1
        s = 1.0 / sum(p.values())
        for k in p:
            p[k] *= s
        if set(p) != set(G):
            raise NetworkXError('Personalization vector '
                                'must have a value for every node')


    # "dangling" nodes, no links out from them
    out_degree = W.out_degree()
    dangle = [ n for n in W if out_degree[n]==0.0 ]
    i = 0
    # real 'tol' 
    tol = W.number_of_nodes() * tol
    #
    while True: # power iteration: make up to max_iter iterations
        xlast = x # x is the vector containing the value of page rank
        x = dict.fromkeys(xlast.keys(), 0)
        # dangle nodes have no out links, so we sum all the rank for these 
        # nodes, and then scale it and alpha it for the next step
        # just like making each dangle have pseudo edge to every link to the web
        danglesum = alpha * scale * sum(xlast[n] for n in dangle)
        for n in x:
            # this matrix multiply looks odd because it is
            # doing a left multiply x^T=xlast^T*W  # W is inlink form
            for nbr in W[n]:# linear combination of lines
                x[nbr] += alpha * xlast[n] * W[n][nbr][weight] 
            x[n] += danglesum + (1.0-alpha)*p[n]
        # normalize vector
        s = 1.0 / sum(x.values())
        for n in x:
            x[n] *= s
        # check convergence, l1 norm
        err = sum([abs(x[n]-xlast[n]) for n in x])
        if err < tol: # ok
            break
        if i > max_iter:
            raise NetworkXError('pagerank: power iteration failed to converge '
                                'in %d iterations.'%(i-1))
        i+=1
    return x
Exemplo n.º 44
0
def divrank(G, alpha=0.25, d=0.85, personalization=None,
            max_iter=100, tol=1.0e-6, nstart=None, weight='weight',
            dangling=None):
    '''
    Returns the DivRank (Diverse Rank) of the nodes in the graph.
    This code is based on networkx.pagerank.

    Args: (diff from pagerank)
      alpha: controls strength of self-link [0.0-1.0]
      d: the damping factor

    Reference:
      Qiaozhu Mei and Jian Guo and Dragomir Radev,
      DivRank: the Interplay of Prestige and Diversity in Information Networks,
      http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.174.7982
    '''

    if len(G) == 0:
        return {}

    if not G.is_directed():
        D = G.to_directed()
    else:
        D = G

    # Create a copy in (right) stochastic form
    W = nx.stochastic_graph(D, weight=weight)
    N = W.number_of_nodes()

    # self-link (DivRank)
    for n in W.nodes_iter():
        for n_ in W.nodes_iter():
            if n != n_ :
                if n_ in W[n]:
                    W[n][n_][weight] *= alpha
            else:
                if n_ not in W[n]:
                    W.add_edge(n, n_)
                W[n][n_][weight] = 1.0 - alpha

    # Choose fixed starting vector if not given
    if nstart is None:
        x = dict.fromkeys(W, 1.0 / N)
    else:
        # Normalized nstart vector
        s = float(sum(nstart.values()))
        x = dict((k, v / s) for k, v in nstart.items())

    if personalization is None:
        # Assign uniform personalization vector if not given
        p = dict.fromkeys(W, 1.0 / N)
    else:
        missing = set(G) - set(personalization)
        if missing:
            raise NetworkXError('Personalization dictionary '
                                'must have a value for every node. '
                                'Missing nodes %s' % missing)
        s = float(sum(personalization.values()))
        p = dict((k, v / s) for k, v in personalization.items())

    if dangling is None:
        # Use personalization vector if dangling vector not specified
        dangling_weights = p
    else:
        missing = set(G) - set(dangling)
        if missing:
            raise NetworkXError('Dangling node dictionary '
                                'must have a value for every node. '
                                'Missing nodes %s' % missing)
        s = float(sum(dangling.values()))
        dangling_weights = dict((k, v/s) for k, v in dangling.items())
    dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]

    # power iteration: make up to max_iter iterations
    for _ in range(max_iter):
        xlast = x
        x = dict.fromkeys(xlast.keys(), 0)
        danglesum = d * sum(xlast[n] for n in dangling_nodes)
        for n in x:
            D_t = sum(W[n][nbr][weight] * xlast[nbr] for nbr in W[n])
            for nbr in W[n]:
                #x[nbr] += d * xlast[n] * W[n][nbr][weight]
                x[nbr] += (
                    d * (W[n][nbr][weight] * xlast[nbr] / D_t) * xlast[n]
                )
            x[n] += danglesum * dangling_weights[n] + (1.0 - d) * p[n]

        # check convergence, l1 norm
        err = sum([abs(x[n] - xlast[n]) for n in x])
        if err < N*tol:
            return x
    raise NetworkXError('divrank: power iteration failed to converge '
                        'in %d iterations.' % max_iter)
Exemplo n.º 45
0
 def test_graph_disallowed(self):
     nx.stochastic_graph(nx.Graph())
Exemplo n.º 46
0
 def test_multigraph_disallowed(self):
     nx.stochastic_graph(nx.MultiGraph())
Exemplo n.º 47
0
def pagerank(G, alpha=0.85, personalization=None,
             max_iter=100, tol=1.0e-9, nstart=None, weight='weight',
             dangling=None):
    """Return the PageRank of the nodes in the graph.
    Parameters
    -----------
    G : graph
        A NetworkX graph. 在PageRank算法里面是有向图
    alpha : float, optional
        稳定系数, 默认0.85, 心灵漂移teleporting系数,用于解决spider trap问题
    personalization: dict, optional
      个性化向量,确定在分配中各个节点的权重
      格式举例,比如四个点的情况: {1:0.25,2:0.25,3:0.25,4:0.25}
      默认个点权重相等,也可以给某个节点多分配些权重,需保证权重和为1.
    max_iter : integer, optional
        最大迭代次数
    tol : float, optional
        迭代阈值
    nstart : dictionary, optional
        整个网络各节点PageRank初始值
    weight : key, optional
      各边权重

    dangling: dict, optional
      字典存储的是dangling边的信息
      key   --dangling边的尾节点,也就是dangling node节点
      value --dangling边的权重
      PR值按多大程度将资源分配给dangling node是根据personalization向量分配的
      This must be selected to result in an irreducible transition
      matrix (see notes under google_matrix). It may be common to have the
      dangling dict to be the same as the personalization dict.

    Notes
    -----
    特征值计算是通过迭代方法进行的,不能保证收敛,当超过最大迭代次数时,还不能减小到阈值内,就会报错

    """

    #步骤一:图结构的准备--------------------------------------------------------------------------------
    if len(G) == 0:
        return {}

    if not G.is_directed():
        D = G.to_directed()
    else:
        D = G

    # Create a copy in (right) stochastic form
    W = nx.stochastic_graph(D, weight=weight)
    N = W.number_of_nodes()


    # 确定PR向量的初值
    if nstart is None:
        x = dict.fromkeys(W, 1.0 / N)  #和为1
    else:
        # Normalized nstart vector
        s = float(sum(nstart.values()))
        x = dict((k, v / s) for k, v in nstart.items())

    if personalization is None:
        # Assign uniform personalization vector if not given
        p = dict.fromkeys(W, 1.0 / N)
    else:
        missing = set(G) - set(personalization)
        if missing:
            raise NetworkXError('Personalization dictionary '
                                'must have a value for every node. '
                                'Missing nodes %s' % missing)
        s = float(sum(personalization.values()))
        p = dict((k, v / s) for k, v in personalization.items()) #归一化处理

    if dangling is None:
        # Use personalization vector if dangling vector not specified
        dangling_weights = p
    else:
        missing = set(G) - set(dangling)
        if missing:
            raise NetworkXError('Dangling node dictionary '
                                'must have a value for every node. '
                                'Missing nodes %s' % missing)
        s = float(sum(dangling.values()))
        dangling_weights = dict((k, v/s) for k, v in dangling.items())

    dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]

    #dangling_nodes  dangling节点
    #danglesum       dangling节点PR总值

    #dangling初始化  默认为personalization
    #dangling_weights  根据dangling而生成,决定dangling node资源如何分配给全局的矩阵


    # 迭代计算--------------------------------------------------------------------

    #PR=alpha*(A*PR+dangling分配)+(1-alpha)*平均分配
    #也就是三部分,A*PR其实是我们用图矩阵分配的,dangling分配则是对dangling node的PR值进行分配,(1-alpha)分配则是天下为公大家一人一份分配的

    #其实通俗的来说,我们可以将PageRank看成抢夺大赛,有三种抢夺机制。
    #1,A*PR这种是自由分配,大家都愿意参与竞争交流的分配
    #2,dangling是强制分配,有点类似打倒土豪分田地的感觉,你不参与自由市场,那好,我们就特地帮你强制分。
    #3,平均分配,其实就是有个机会大家实现共产主义了,不让spider trap这种产生rank sink的节点捞太多油水,其实客观上也是在帮dangling分配。

    #从图和矩阵的角度来说,可以这样理解,我们这个矩阵可以看出是个有向图
    #矩阵要收敛-->矩阵有唯一解-->n阶方阵对应有向图是强连通的-->两个节点相互可达,1能到2,2能到1
    #如果是个强连通图,就是我们上面说的第1种情况,自由竞争,那么我们可以确定是收敛的
    #不然就会有spider trap造成rank sink问题


    for _ in range(max_iter):
        print 'itertime:', _
        xlast = x
        x = dict.fromkeys(xlast.keys(), 0)  #x初值
        danglesum = alpha * sum(xlast[n] for n in dangling_nodes) #第2部分:计算dangling_nodes的PR总值
        for n in x:
            for nbr in W[n]:
                x[nbr] += alpha * xlast[n] * W[n][nbr][weight]    #第1部分:将节点n的PR资源分配给各个节点,循环之
        for n in x:
            x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]   #第3部分:节点n加上dangling nodes和均分的值

        # 迭代检查
        err = sum([abs(x[n] - xlast[n]) for n in x])
        if err < N*tol:
            return x
    return x
    raise NetworkXError('pagerank: power iteration failed to converge '
                        'in %d iterations.' % max_iter)
def test_stochastic_multigraph_input():
    S = nx.stochastic_graph(nx.MultiGraph())
def pagerank(G,alpha=0.85,personalization=None,
             max_iter=100,tol=1.0e-8,nstart=None,weight='weight'):
    """Return the PageRank of the nodes in the graph.

    PageRank computes a ranking of the nodes in the graph G based on
    the structure of the incoming links. It was originally designed as
    an algorithm to rank web pages.

    Parameters
    -----------
    G : graph
      A NetworkX graph

    alpha : float, optional
      Damping parameter for PageRank, default=0.85

    personalization: dict, optional
       The "personalization vector" consisting of a dictionary with a
       key for every graph node and nonzero personalization value for each node.

    max_iter : integer, optional
      Maximum number of iterations in power method eigenvalue solver.

    tol : float, optional
      Error tolerance used to check convergence in power method solver.

    nstart : dictionary, optional
      Starting value of PageRank iteration for each node.

    weight : key, optional
      Edge data key to use as weight.  If None weights are set to 1.

    Returns
    -------
    pagerank : dictionary
       Dictionary of nodes with PageRank as value

    Examples
    --------
    >>> G=nx.DiGraph(nx.path_graph(4))
    >>> pr=nx.pagerank(G,alpha=0.9)

    Notes
    -----
    The eigenvector calculation is done by the power iteration method
    and has no guarantee of convergence.  The iteration will stop
    after max_iter iterations or an error tolerance of
    number_of_nodes(G)*tol has been reached.

    The PageRank algorithm was designed for directed graphs but this
    algorithm does not check if the input graph is directed and will
    execute on undirected graphs by converting each oriented edge in the
    directed graph to two edges.

    See Also
    --------
    pagerank_numpy, pagerank_scipy, google_matrix

    References
    ----------
    .. [1] A. Langville and C. Meyer,
       "A survey of eigenvector methods of web information retrieval."
       http://citeseer.ist.psu.edu/713792.html
    .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
       The PageRank citation ranking: Bringing order to the Web. 1999
       http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
    """
    if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph:
        raise Exception("pagerank() not defined for graphs with multiedges.")

    if len(G) == 0:
        return {}

    if not G.is_directed():
        D=G.to_directed()
    else:
        D=G

    # create a copy in (right) stochastic form
    W=nx.stochastic_graph(D, weight=weight)
    scale=1.0/W.number_of_nodes()

    # choose fixed starting vector if not given
    if nstart is None:
        x=dict.fromkeys(W,scale)
    else:
        x=nstart
        # normalize starting vector to 1
        s=1.0/sum(x.values())
        for k in x: x[k]*=s

    # assign uniform personalization/teleportation vector if not given
    if personalization is None:
        p=dict.fromkeys(W,scale)
    else:
        p=personalization
        # normalize starting vector to 1
        s=1.0/sum(p.values())
        for k in p:
            p[k]*=s
        if set(p)!=set(G):
            raise NetworkXError('Personalization vector '
                                'must have a value for every node')


    # "dangling" nodes, no links out from them
    out_degree=W.out_degree()
    dangle=[n for n in W if out_degree[n]==0.0]
    i=0
    while True: # power iteration: make up to max_iter iterations
        xlast=x
        x=dict.fromkeys(xlast.keys(),0)
        danglesum=alpha*scale*sum(xlast[n] for n in dangle)
        for n in x:
            # this matrix multiply looks odd because it is
            # doing a left multiply x^T=xlast^T*W
            for nbr in W[n]:
                x[nbr]+=alpha*xlast[n]*W[n][nbr][weight]
            x[n]+=danglesum+(1.0-alpha)*p[n]
        # normalize vector
        s=1.0/sum(x.values())
        for n in x:
            x[n]*=s
        # check convergence, l1 norm
        err=sum([abs(x[n]-xlast[n]) for n in x])
        if err < tol:
            break
        if i>max_iter:
            raise NetworkXError('pagerank: power iteration failed to converge '
                                'in %d iterations.'%(i-1))
        i+=1
    return x
def pagerank(G, alpha=0.85, personalization=None,
			 max_iter=100, tol=1.0e-6, nstart=None, weight='weight',
			 dangling=None):
	"""
	Return the PageRank of the nodes in the graph.
	Source code from http://networkx.readthedocs.io/en/stable/_modules/networkx/algorithms/link_analysis/pagerank_alg.html#pagerank
	"""
	if len(G) == 0:
		return {}

	if not G.is_directed():
		D = G.to_directed()
	else:
		D = G

	# Step 1: Create a copy in (right) stochastic form
	W = nx.stochastic_graph(D, weight=weight)
	N = W.number_of_nodes()						# N = 11

	# Plot the stochastic graph
	out_file = 'wikipedia_pagerank_example_stochastic_graph.pdf'
	edge_and_labels = {k : round(v, 2) for k, v in nx.get_edge_attributes(W, 'weight').items()}
	plot_graph(W, out_file=out_file, edge_and_labels=edge_and_labels)


	# Step 2: Choose fixed starting vector if not given
	if nstart is None:
		x = dict.fromkeys(W, 1.0 / N)
	else:
		# Normalized nstart vector
		s = float(sum(nstart.values()))
		x = dict((k, v / s) for k, v in nstart.items())

	# plot a graph with nstart: starting value of PageRank iteration for each node.
	out_file = 'wikipedia_pagerank_example_nstart.pdf'
	node_and_labels = {k : k+'\n'+str(round(v, 2)) 
							for k, v in x.items()}
	plot_graph(W, out_file=out_file, node_and_labels=node_and_labels)

	# Step 3: Assign uniform personalization vector if not given
	if personalization is None:
		p = dict.fromkeys(W, 1.0 / N)	# node and nonzero personalization value for each node
	else:
		missing = set(G) - set(personalization)
		if missing:
			raise NetworkXError('Personalization dictionary '
								'must have a value for every node. '
								'Missing nodes %s' % missing)
		s = float(sum(personalization.values()))
		p = dict((k, v / s) for k, v in personalization.items())

	# Step 4: Use personalization vector if dangling vector not specified
	if dangling is None:
		dangling_weights = p
	else:
		missing = set(G) - set(dangling)
		if missing:
			raise NetworkXError('Dangling node dictionary '
								'must have a value for every node. '
								'Missing nodes %s' % missing)
		s = float(sum(dangling.values()))
		dangling_weights = dict((k, v/s) for k, v in dangling.items())

	dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]


	# Step 5: power iteration: make up to max_iter iterations
	for _ in range(max_iter):
		xlast = x 							# pagerank for each node
		x = dict.fromkeys(xlast.keys(), 0)
		danglesum = alpha * sum(xlast[n] for n in dangling_nodes)

		for n in x:
			# this matrix multiply looks odd because it is
			# doing a left multiply x^T=xlast^T*W
			for nbr in W[n]:
				x[nbr] += alpha * xlast[n] * W[n][nbr][weight]	# PR(p_i) = d * PR(p_j)}/L(p_j)

			x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]	# danglesum/N  + (1-d)/N


		# Plot graph with one iteration
		'''
		out_file = 'wikipedia_pagerank_example_iteration_1.pdf'
		node_and_pr = x
		node_size = [pr*30000 for node, pr in node_and_pr.items()]
		node_and_labels = {node : node+'\n'+str(round(pr, 3))
							for node, pr in node_and_pr.items()}

		plot_graph(G, out_file=out_file, node_size=node_size, node_and_labels=node_and_labels)
		'''

		# check convergence, l1 norm
		err = sum([abs(x[n] - xlast[n]) for n in x])
		if err < N*tol:
			return x

	raise NetworkXError('pagerank: power iteration failed to converge in %d iterations.' % max_iter)
Exemplo n.º 51
0
def build_bipartite_graph(import_export_data, country_list=[], product_list=[]):
    """
    If country_list and product_list are not empty, then
    we only care about countries and products in the lists.
    """


    graph = nx.DiGraph()

    country_ids = {}
    product_ids = {}

    next_id = 0

    countries = set(import_export_data.keys())
    products = set([y['id'] for x in import_export_data.values() for y in x])

    filtered_countries = countries & set(country_list) if country_list else countries
    filtered_products = products & set(product_list) if product_list else products

    print "loading %s countries and %s products..." % (len(filtered_countries), len(filtered_products))


    # add country nodes
    for country in filtered_countries:
        graph.add_node(next_id,
                    type="country",
                    entity_id=country
                    )

        country_ids[country] = next_id
        next_id += 1


    # add country nodes
    for product in filtered_products:
        graph.add_node(next_id,
                    type="product",
                    entity_id=product
                    )

        product_ids[product] = next_id
        next_id += 1



    # add country - product edges
    for country, records in import_export_data.iteritems():
        if not country in filtered_countries:
            continue

        for each_record in records:
            if not each_record['id'] in filtered_products:
                continue

            if not each_record['import'] == 0:
                graph.add_edge(country_ids[country], product_ids[each_record['id']], weight=each_record['import']) # import edge

            if not each_record['export'] == 0:
                graph.add_edge(product_ids[each_record['id']], country_ids[country], weight=each_record['export']) # export edge



    # create a copy in (right) stochastic form
    W = nx.stochastic_graph(graph, weight='weight')

    return W
Exemplo n.º 52
0
def pagerank(G, alpha=0.85, personalization=None,
             max_iter=100, tol=1.0e-10, nstart=None, weight='weight',
             dangling=None):
    """Return the PageRank of the nodes in the graph.
    G : graph
      A NetworkX graph.  Undirected graphs will be converted to a directed
      graph with two directed edges for each undirected edge.
    alpha : float, optional
      Damping parameter for PageRank, default=0.85.
    personalization: dict, optional
      The "personalization vector" consisting of a dictionary with a
      key for every graph node and nonzero personalization value for each node.
      By default, a uniform distribution is used.
    max_iter : integer, optional
      Maximum number of iterations in power method eigenvalue solver.
    tol : float, optional
      Error tolerance used to check convergence in power method solver.
    nstart : dictionary, optional
      Starting value of PageRank iteration for each node.
    weight : key, optional
      Edge data key to use as weight.  If None weights are set to 1.
    dangling: dict, optional
      The outedges to be assigned to any "dangling" nodes, i.e., nodes without
      any outedges. The dict key is the node the outedge points to and the dict
      value is the weight of that outedge. By default, dangling nodes are given
      outedges according to the personalization vector (uniform if not
      specified). This must be selected to result in an irreducible transition
      matrix (see notes under google_matrix). It may be common to have the
      dangling dict to be the same as the personalization dict.
    """
    if len(G) == 0:
        return {}

    if not G.is_directed():
        D = G.to_directed()
    else:
        D = G

    # Create a copy in (right) stochastic form
    W = nx.stochastic_graph(D, weight=weight)
    N = W.number_of_nodes()

    # Choose fixed starting vector if not given
    x = dict.fromkeys(W, 1.0 / N)
    
    p = dict.fromkeys(W, 1.0 / N)
    
    dangling_weights = p
    
    dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]

    # power iteration: make up to max_iter iterations
    for _ in range(max_iter):
        # print('here')
        xlast = x
        x = dict.fromkeys(xlast.keys(), 0)
        danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
        # print(danglesum)
        for n in x:
            # this matrix multiply looks odd because it is
            # doing a left multiply x^T=xlast^T*W
            for nbr in W[n]:
                x[nbr] += alpha * xlast[n] * W[n][nbr][weight]
            x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]
        # check convergence, l1 norm
        # err = sum([abs(x[n] - xlast[n]) for n in x])
        # if err < N*tol:
            # return x
    return x
    raise nx.NetworkXError('pagerank: power iteration failed to converge '
                        'in %d iterations.' % max_iter)
Exemplo n.º 53
0
def pagerank(G, alpha=0.85, max_iter=100, tol=1e-4, x_start=None, personalization=None):
    """
    Compute and return the PageRank in an directed graph (also see networkx documentation).
    The output is a dictionary mapping the node-id to its PageRank value.
    Also, the number of iterations to convergence is returned.    
    """
    # some precondition checking. (we could also convert undirected to directed.)
    if not G.is_directed():
        raise Exception("pagerank() only defined for directed graphs.")

    # to be completely correct we should also remove self-referential nodes,
    # but let's just ignore this for performancy issues at the moment
    # and assume the input does not containt self-referential nodes.
    # G.remove_edges_from(G.selfloop_edges())

    nodes = G.nodes();
    nb_nodes = len(nodes);
    if nb_nodes == 0:
        return {}
    
    # value for nodes without backlinks
    min_value = (1.0-alpha)/nb_nodes

    # initial pagerank dict
    if x_start == None:
    # initialize the PageRank dict with 1/N for all nodes
        x = dict.fromkeys(nodes, 1.0/nb_nodes)
    else:
        x = x_start
        # normalize starting vector to 1                
        s = 1.0/sum(x.values())
        for k in x: x[k]*=s
        
    # assign uniform personalization/teleportation vector if not given
    """if personalization is None:
        p = dict.fromkeys(nodes,1.0/nb_nodes)
    else:
        p = personalization
        # normalize starting vector to 1               
        s = 1.0/sum(p.values())
        for k in p: 
            p[k]*=s
        if set(p)!=set(G):
            raise Exception('Personalization vector must have a value for every node')
    """
        
    # "dangling" nodes, no links out from them; fix them
    out_degree = G.out_degree()
    for dangling in (n for n in nodes if out_degree[n]==0.0):
        for n in nodes:
            G.add_edge(dangling, n)
    
    # create a copy in (right) stochastic form which we will use 
    # to avoid recalculating the number of outgoing links every time  
    W=nx.stochastic_graph(G)
    #W = G
    # now the iterative algorithm 
    # (which is basically a version of the power method, 
    # without using explicit matrix multiplications)
    i = 0
    while True: 
        # uncomment following 2 lines if you want to view each iteration
        #print "iteration %d:" % i
        #print "pagerank:", x 
        i += 1
        # after maximum iterations have been reached, stop
        if i > max_iter:
            print "no convergence after {0} iterations!".format(max_iter)
            break
        
        # some helper variables
        diff = 0 #total difference compared to last iteration
        x_new = dict.fromkeys(nodes, 0) # the dict where we store our new values
        
        # now the pagerank calculations
        for node in nodes:
            rank = min_value
            #print "node", node
            #print "min value", min_value
            for referring_page in W.predecessors_iter(node):
                #print "refered by ", referring_page
                #print "old value", old_pagerank[referring_page]
                #print "G out degree", G.out_degree(referring_page)
                rank += alpha * x[referring_page] * W[referring_page][node]['weight'] # or / G.out_degree(referring_page)
            # the personalization 
            # rank += min_value * p[node]
            diff += abs(rank - x[node]) #accumulate the difference
            x_new[node] = rank
        
        
        x = x_new # our new pagerank
        #print pagerank
                
        #stop if converged
        if diff < tol:
            #print "converged after {0} iterations".format(i)
            break

    #normalize PageRank
    total = sum(x.values())
    if total!=0:
        s = 1.0/total
        for n in x: 
            x[n] *= s 
   
    return x,i
def test_stochastic_graph_input():
    S = nx.stochastic_graph(nx.Graph())
Exemplo n.º 55
0
 def test_pagerank_algo(self):
   W = nx.stochastic_graph(self.D, weight='weight')
   prs = pagerank_unrecorded(W, personalization=self._personalization, weight='weight')
   for key,val in prs.iteritems():
     self.assertEqual(val , self.result_pr[key])