Пример #1
 def __init__(self, which='LM', n_oversamples: int = 10, n_iter='auto',
              power_iteration_normalizer: Union[str, None] = 'auto', random_state=None, one_pass: bool = False):
     EigSolver.__init__(self, which=which)
     self.n_oversamples = n_oversamples
     self.n_iter = n_iter
     self.power_iteration_normalizer = power_iteration_normalizer
     self.random_state = check_random_state(random_state)
     self.one_pass = one_pass
Пример #2
    def __init__(self, engine: str = 'default', algorithm: Union[str, Optimizer] = 'default', resolution: float = 1,
                 tol: float = 1e-3, agg_tol: float = 1e-3, max_agg_iter: int = -1, shuffle_nodes: bool = False,
                 sorted_cluster: bool = True, random_state: Optional[Union[np.random.RandomState, int]] = None,
                 verbose: bool = False):
        super(Louvain, self).__init__()
        VerboseMixin.__init__(self, verbose)

        self.random_state = check_random_state(random_state)
        if algorithm == 'default':
            self.algorithm = GreedyModularity(resolution, tol, engine=check_engine(engine))
        elif isinstance(algorithm, Optimizer):
            self.algorithm = algorithm
            raise TypeError('Algorithm must be \'auto\' or a valid algorithm.')
        if type(max_agg_iter) != int:
            raise TypeError('The maximum number of iterations must be an integer.')
        self.agg_tol = agg_tol
        self.max_agg_iter = max_agg_iter
        self.shuffle_nodes = shuffle_nodes
        self.sorted_cluster = sorted_cluster

        self.iteration_count_ = None
        self.aggregate_graph_ = None
Пример #3
def block_model(clusters: Union[np.ndarray, int], shape: Optional[Tuple[int, int]] = None, inner_prob: float = .2,
                outer_prob: float = .01, random_state: Optional[Union[np.random.RandomState, int]] = None) \
                -> Tuple[sparse.csr_matrix, np.ndarray, np.ndarray]:
    A block model graph.

    clusters: Union[np.ndarray, int]
         Cluster specifications (array of couples where each entry denotes the shape of a cluster
         or an int denoting the number of clusters). If an ``int`` is passed, ``shape`` must be given and
         the clusters are identical in shape.
    shape: Optional[Tuple[int]]
        The size of the adjacency to obtain (might be rectangular for a biadjacency matrix).
    inner_prob: float
        Intra-cluster connection probability.
    outer_prob: float
        Inter-cluster connection probability.
    random_state: Optional[Union[np.random.RandomState, int]]
        Random number generator or random seed. If ``None``, ``numpy.random`` will be used.

    adjacency: sparse.csr_matrix
        The adjacency (or biadjacency) matrix of the graph.
    ground_truth_features: np.ndarray
        The labels associated with the features
    ground_truth_samples: np.ndarray
        The labels associated with the samples

    random_state = check_random_state(random_state)

    if type(clusters) == int:
        if not shape:
            raise ValueError(
                'Please specify the shape of the matrix when giving a number of clusters.'
        if clusters <= 0:
            raise ValueError('Number of clusters should be positive.')
        n_clusters = clusters
        clusters_cumul = np.zeros((n_clusters + 1, 2), dtype=int)
        row_step, col_step = shape[0] // clusters, shape[1] // clusters
        if row_step == 0 or col_step == 0:
            raise ValueError(
                'Number of clusters is too high given the shape of the matrix.'
        clusters_cumul[:, 0] = np.arange(0, shape[0] + 1, row_step)
        clusters_cumul[:, 1] = np.arange(0, shape[1] + 1, col_step)
        clusters_cumul[-1, 0] = shape[0]
        clusters_cumul[-1, 1] = shape[1]

    elif type(clusters) == np.ndarray:
        n_clusters = clusters.shape[0]
        clusters_cumul = np.cumsum(clusters, axis=0)
        clusters_cumul = np.insert(clusters_cumul, 0, 0, axis=0)
        if shape:
            if clusters_cumul[-1,
                              0] != shape[0] or clusters_cumul[-1,
                                                               1] != shape[1]:
                raise ValueError('Cluster sizes do not match matrix size.')

        raise TypeError(
            'Please specify an array of sizes or a number of clusters (along with the shape of the desired matrix).'

    n_rows, n_cols = clusters_cumul[-1, 0], clusters_cumul[-1, 1]
    ground_truth_samples = np.zeros(n_rows, dtype=int)
    ground_truth_features = np.zeros(n_cols, dtype=int)
    mat = sparse.dok_matrix((n_rows, n_cols), dtype=bool)
    for label, row_block in enumerate(range(n_clusters)):
                                            0]:clusters_cumul[row_block + 1,
                                                              0]] = label
                                             1]:clusters_cumul[row_block + 1,
                                                               1]] = label
        mask = np.full(n_cols, outer_prob)
        mask[clusters_cumul[row_block, 1]:clusters_cumul[row_block + 1,
                                                         1]] = inner_prob
        for row in range(clusters_cumul[row_block, 0],
                         clusters_cumul[row_block + 1, 0]):
            mat[row, (random_state.rand(n_cols) < mask)] = True

    return sparse.csr_matrix(mat), ground_truth_features, ground_truth_samples
Пример #4
def randomized_range_finder(matrix: np.ndarray, size: int, n_iter: int, power_iteration_normalizer='auto',
                            random_state=None, return_all: bool = False) \
                            -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray, np.ndarray]]:
    """Compute an orthonormal matrix :math:`Q`, whose range approximates the range of the input matrix.

    :math:`A \\approx QQ^*A`.

    matrix :
        Input matrix
    size :
        Size of the return array
    n_iter :
        Number of power iterations. It can be used to deal with very noisy
        problems. When 'auto', it is set to 4, unless ``size`` is small
        (< .1 * min(matrix.shape)) in which case ``n_iter`` is set to 7.
        This improves precision with few components.
    power_iteration_normalizer: ``'auto'`` (default), ``'QR'``, ``'LU'``, ``None``
            Whether the power iterations are normalized with step-by-step
            QR factorization (the slowest but most accurate), ``None``
            (the fastest but numerically unstable when ``n_iter`` is large, e.g.
            typically 5 or larger), or ``'LU'`` factorization (numerically stable
            but can lose slightly in accuracy). The ``'auto'`` mode applies no
            normalization if ``n_iter`` <= 2 and switches to ``'LU'`` otherwise.
    random_state: int, RandomState instance or ``None``, optional (default= ``None``)
        The seed of the pseudo random number generator to use when shuffling
        the data.  If int, random_state is the seed used by the random number
        generator; If RandomState instance, random_state is the random number
        generator; If ``None``, the random number generator is the RandomState
        instance used by `np.random`.
    return_all : if True, returns (range_matrix, random_matrix, random_proj)
                else returns range_matrix.

    range_matrix : np.ndarray
        matrix (size x size) projection matrix, the range of which
        approximates well the range of the input matrix.
    random_matrix : np.ndarray, optional
        projection matrix
    projected_matrix : np.ndarray, optional
        product between the data and the projection matrix

    Follows Algorithm 4.3 of
    `Finding structure with randomness: Stochastic algorithms for constructing approximate matrix decompositions
    Halko, et al., 2009 (arXiv:909)
    random_state = check_random_state(random_state)

    # Generating normal random vectors with shape: (A.shape[1], size)
    random_matrix = random_state.normal(size=(matrix.shape[1], size))
    if matrix.dtype.kind == 'f':
        # Ensure f32 is preserved as f32
        random_matrix = random_matrix.astype(matrix.dtype, copy=False)
    range_matrix = random_matrix.copy()

    # Deal with "auto" mode
    if power_iteration_normalizer == 'auto':
        if n_iter <= 2:
            power_iteration_normalizer = 'none'
            power_iteration_normalizer = 'LU'

    # Perform power iterations with 'range_matrix' to further 'imprint' the top
    # singular vectors of matrix in 'range_matrix'
    for i in range(n_iter):
        if power_iteration_normalizer == 'none':
            range_matrix = safe_sparse_dot(matrix, range_matrix)
            range_matrix = safe_sparse_dot(matrix.T, range_matrix)
        elif power_iteration_normalizer == 'LU':
            range_matrix, _ = linalg.lu(safe_sparse_dot(matrix, range_matrix), permute_l=True)
            range_matrix, _ = linalg.lu(safe_sparse_dot(matrix.T, range_matrix), permute_l=True)
        elif power_iteration_normalizer == 'QR':
            range_matrix, _ = linalg.qr(safe_sparse_dot(matrix, range_matrix), mode='economic')
            range_matrix, _ = linalg.qr(safe_sparse_dot(matrix.T, range_matrix), mode='economic')

    # Sample the range of 'matrix' using by linear projection of 'range_matrix'
    # Extract an orthonormal basis
    range_matrix, _ = linalg.qr(safe_sparse_dot(matrix, range_matrix), mode='economic')
    if return_all:
        return range_matrix, random_matrix, matrix.dot(random_matrix)
        return range_matrix
Пример #5
def randomized_eig(matrix, n_components: int, which='LM', n_oversamples: int = 10, n_iter='auto',
                   power_iteration_normalizer: Union[str, None] = 'auto', random_state=None, one_pass: bool = False):
    """Randomized eigenvalue decomposition.

    matrix: ndarray or sparse matrix
        Matrix to decompose
    n_components: int
        Number of singular values and vectors to extract.
    which: str
        which eigenvalues to compute. ``'LM'`` for Largest Magnitude and ``'SM'`` for Smallest Magnitude.
        Any other entry will result in Largest Magnitude.
    n_oversamples : int (default=10)
        Additional number of random vectors to sample the range of ``matrix`` so as
        to ensure proper conditioning. The total number of random vectors
        used to find the range of ``matrix`` is ``n_components + n_oversamples``. Smaller number can improve speed
        but can negatively impact the quality of approximation of singular vectors and singular values.
    n_iter: int or 'auto' (default is 'auto')
        See :meth:`randomized_range_finder`
    power_iteration_normalizer: ``'auto'`` (default), ``'QR'``, ``'LU'``, ``None``
        See :meth:`randomized_range_finder`
    random_state: int, RandomState instance or None, optional (default=None)
        See :meth:`randomized_range_finder`
    one_pass: bool (default=False)
        whether to use algorithm 5.6 instead of 5.3. 5.6 requires less access to the original matrix,
        while 5.3 is more accurate.

    eigenvalues: np.ndarray
    eigenvectors: np.ndarray

    Finding structure with randomness: Stochastic algorithms for constructing
    approximate matrix decompositions
    Halko, et al., 2009

    random_state = check_random_state(random_state)
    n_random = n_components + n_oversamples
    n_samples, n_features = matrix.shape
    lambda_max = 0.

    if n_samples != n_features:
        raise ValueError('The input matrix is not square.')

    if which == 'SM':
        lambda_max: float = 1.1 * randomized_eig(matrix, n_components=1)[0][0]
        matrix *= -1
        if isinstance(matrix, SparseLR):
            matrix += SparseLR(lambda_max * sparse.identity(matrix.shape[0]), [])
            matrix += lambda_max * sparse.identity(matrix.shape[0])

    if n_iter == 'auto':
        # Checks if the number of iterations is explicitly specified
        # Adjust n_iter. 7 was found a good compromise for PCA. See #5299
        n_iter = 7 if n_components < .1 * min(matrix.shape) else 4

    range_matrix, random_matrix, random_proj = randomized_range_finder(matrix, n_random, n_iter,
                                                                       power_iteration_normalizer, random_state, True)
    if one_pass:
        approx_matrix = np.linalg.lstsq(random_matrix.T.dot(range_matrix), random_proj.T.dot(range_matrix), None)[0].T
        approx_matrix = (matrix.dot(range_matrix)).T.dot(range_matrix)

    eigenvalues, eigenvectors = np.linalg.eig(approx_matrix)

    del approx_matrix
    # eigenvalues indices in decreasing order
    values_order = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[values_order]
    eigenvectors = np.dot(range_matrix, eigenvectors)[:, values_order]

    if which == 'SM':
        eigenvalues = lambda_max - eigenvalues

    return eigenvalues[:n_components], eigenvectors[:, :n_components]
Пример #6
def randomized_svd(matrix, n_components: int, n_oversamples: int = 10, n_iter='auto', transpose='auto',
                   power_iteration_normalizer: Union[str, None] = 'auto', flip_sign: bool = True, random_state=None):
    """Truncated randomized SVD

        matrix : ndarray or sparse matrix
            Matrix to decompose
        n_components : int
            Number of singular values and vectors to extract.
        n_oversamples : int (default=10)
            Additional number of random vectors to sample the range of M so as
            to ensure proper conditioning. The total number of random vectors
            used to find the range of M is embedding_dimension + n_oversamples. Smaller
            number can improve speed but can negatively impact the quality of
            approximation of singular vectors and singular values.
        n_iter : int or 'auto' (default is 'auto')
            See :meth:`randomized_range_finder`
        power_iteration_normalizer : ``'auto'`` (default), ``'QR'``, ``'LU'``, ``None``
            See :meth:`randomized_range_finder`
        transpose : True, False or 'auto' (default)
            Whether the algorithm should be applied to ``matrix.T`` instead of ``matrix``. The
            result should approximately be the same. The 'auto' mode will
            trigger the transposition if ``matrix.shape[1] > matrix.shape[0]`` since this
            implementation of randomized SVD tends to be a little faster in that case.
        flip_sign : boolean, (default=True)
            The output of a singular value decomposition is only unique up to a
            permutation of the signs of the singular vectors. If `flip_sign` is
            set to `True`, the sign ambiguity is resolved by making the largest
            loadings for each component in the left singular vectors positive.
        random_state : int, RandomState instance or None, optional (default=None)
            See :meth:`randomized_range_finder`

        left_singular_vectors: np.ndarray
        singular_values: np.ndarray
        right_singular_vectors: np.ndarray

        This algorithm finds a (usually very good) approximate truncated
        singular value decomposition using randomization to speed up the
        computations. It is particularly fast on large matrices on which
        you wish to extract only a small number of components. In order to
        obtain further speed up, ``n_iter`` can be set <=2 (at the cost of
        loss of precision).

        * Finding structure with randomness: Stochastic algorithms for constructing
          approximate matrix decompositions
          Halko, et al., 2009 http://arxiv.org/abs/arXiv:0909.4061
          (algorithm 5.1)
        * A randomized algorithm for the decomposition of matrices
          Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert
        * An implementation of a randomized algorithm for principal component
          A. Szlam et al. 2014

    random_state = check_random_state(random_state)
    n_random = n_components + n_oversamples
    n_samples, n_features = matrix.shape

    if n_iter == 'auto':
        # Checks if the number of iterations is explicitly specified
        # Adjust n_iter. 7 was found a good compromise for PCA. See #5299
        n_iter = 7 if n_components < .1 * min(matrix.shape) else 4

    if transpose == 'auto':
        transpose = n_samples < n_features
    if transpose:
        # this implementation is a bit faster with smaller shape[1]
        matrix = matrix.T

    range_matrix: np.ndarray = randomized_range_finder(matrix, n_random, n_iter,
                                                       power_iteration_normalizer, random_state)

    # project M to the (k + p) dimensional space using the basis vectors
    approx_matrix = safe_sparse_dot(range_matrix.T, matrix)

    # compute the SVD on the thin matrix: (k + p) wide
    uhat, singular_values, v = linalg.svd(approx_matrix, full_matrices=False)

    del approx_matrix
    u = np.dot(range_matrix, uhat)

    if flip_sign:
        if not transpose:
            u, v = svd_flip(u, v)
            # In case of transpose u_based_decision=false
            # to actually flip based on u and not v.
            u, v = svd_flip(u, v, u_based_decision=False)

    if transpose:
        # transpose back the results according to the input convention
        return v[:n_components, :].T, singular_values[:n_components], u[:, :n_components].T
        return u[:, :n_components], singular_values[:n_components], v[:n_components, :]
Пример #7
 def test_error_random_state(self):
     with self.assertRaises(TypeError):
         # noinspection PyTypeChecker
Пример #8
 def test_random_state(self):
     random_state = np.random.RandomState(1)
     self.assertEqual(type(check_random_state(random_state)), np.random.RandomState)