示例#1
0
    def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray],
            seeds_row: Optional[Union[dict, np.ndarray]] = None,
            seeds_col: Optional[Union[dict, np.ndarray]] = None) -> 'CoPageRank':
        """Fit algorithm to data.

        Parameters
        ----------
        biadjacency :
            Biadjacency matrix.
        seeds_row :
            Seed rows, as a dict or a vector.
        seeds_col :
            Seed columns, as a dict or a vector.
            If both seeds_row and seeds_col are ``None``, the uniform distribution is used.

        Returns
        -------
        self: :class:`CoPageRank`
        """
        biadjacency = check_format(biadjacency)
        n_row, n_col = biadjacency.shape

        operator = CoNeighborsOperator(biadjacency, True)
        seeds_row = seeds2probs(n_row, seeds_row)
        self.scores_row_ = get_pagerank(operator, seeds_row, damping_factor=self.damping_factor, solver=self.solver,
                                        n_iter=self.n_iter, tol=self.tol)

        operator = CoNeighborsOperator(biadjacency.T.tocsr(), True)
        seeds_col = seeds2probs(n_col, seeds_col)
        self.scores_col_ = get_pagerank(operator, seeds_col, damping_factor=self.damping_factor, solver=self.solver,
                                        n_iter=self.n_iter, tol=self.tol)

        self.scores_ = self.scores_row_

        return self
示例#2
0
    def test_seeds2probs(self):
        n = 4
        seeds_array = np.array([0, 1, -1, 0])
        seeds_dict = {0: 0, 1: 1, 3: 0}

        probs1 = seeds2probs(n, seeds_array)
        probs2 = seeds2probs(n, seeds_dict)
        self.assertTrue(np.allclose(probs1, probs2))

        bad_input = np.array([0, 0, -1, 0])
        with self.assertRaises(ValueError):
            seeds2probs(n, bad_input)
示例#3
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray, LinearOperator],
            seeds: Optional[Union[dict, np.ndarray]] = None) -> 'PageRank':
        """Fit algorithm to data.

        Parameters
        ----------
        adjacency :
            Adjacency matrix.
        seeds :
            Parameter to be used for Personalized PageRank.
            Restart distribution as a vector or a dict (node: weight).
            If ``None``, the uniform distribution is used (no personalization, default).

        Returns
        -------
        self: :class:`PageRank`
        """
        if not isinstance(adjacency, LinearOperator):
            adjacency = check_format(adjacency)
        check_square(adjacency)
        seeds = seeds2probs(adjacency.shape[0], seeds)
        self.scores_ = get_pagerank(adjacency, seeds, damping_factor=self.damping_factor, n_iter=self.n_iter,
                                    solver=self.solver, tol=self.tol)

        return self
示例#4
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray, LinearOperator],
            seeds: Optional[Union[dict, np.ndarray]] = None) -> 'PageRank':
        """Fit algorithm to data.

        Parameters
        ----------
        adjacency :
            Adjacency matrix.
        seeds :
            If ``None``, the uniform distribution is used.
            Otherwise, a non-negative, non-zero vector or a dictionary must be provided.

        Returns
        -------
        self: :class:`PageRank`
        """
        if not isinstance(adjacency, LinearOperator):
            adjacency = check_format(adjacency)
        check_square(adjacency)
        seeds = seeds2probs(adjacency.shape[0], seeds)
        self.scores_ = get_pagerank(adjacency, seeds, damping_factor=self.damping_factor, n_iter=self.n_iter,
                                    solver=self.solver, tol=self.tol)

        return self
                       fast_format=False)
adjacency = graph.adjacency
time_end = time.time()
print("Sknetwork load graph time:", time_end - time_start, "seconds")

# read names of pages

pages = pd.read_table('data/wiki_pageNum.txt', encoding='utf-8',
                      header=None).values.tolist()

# parameters

# seeds = {2597: 1, 26634: 1, 229857: 1}
seeds = None

seeds = seeds2probs(adjacency.shape[0], seeds)
tol = 1e-1
damping_factor = 0.85

# beginning push algorithm
time_start = time.time()
n = adjacency.shape[0]
degrees = adjacency.dot(np.ones(n)).astype(np.int32)
rev_adjacency = adjacency.transpose().tocsr()

indptr = adjacency.indptr.astype(np.int32)
indices = adjacency.indices.astype(np.int32)
rev_indptr = rev_adjacency.indptr.astype(np.int32)
rev_indices = rev_adjacency.indices.astype(np.int32)

scores = push_pagerank(n, degrees, indptr, indices, rev_indptr, rev_indices,