Пример #1
0
 def to_tuple(mx):
     if not sp.isspmatrix_coo(mx):
         mx = mx.tocoo()
     coords = np.vstack((mx.row, mx.col)).transpose()
     values = mx.data
     shape = mx.shape
     return coords, values, shape
Пример #2
0
def sparse_matrix_to_hdf(obj, path, name):
	if (sparse.isspmatrix_csr(obj) or sparse.isspmatrix_csc(obj)):
		sparse_csx_matrix_to_hdf(obj, path, name)
	elif (sparse.isspmatrix_coo(obj)):
		sparse_coo_matrix_to_hdf(obj, path, name)
	else:
		raise ValueError('Type {} not yet supported for serialisation!'.format(type(obj)))
Пример #3
0
def sparse_to_tuple(sparse_mx):
    if not sp.isspmatrix_coo(sparse_mx):
        sparse_mx = sparse_mx.tocoo()
    coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose()
    values = sparse_mx.data
    shape = sparse_mx.shape
    return coords, values, shape
Пример #4
0
    def inverse_transform(self, X):
        """Return terms per document with nonzero entries in X.

        Parameters
        ----------
        X : {array, sparse matrix}, shape = [n_samples, n_features]

        Returns
        -------
        X_inv : list of arrays, len = n_samples
            List of arrays of terms.
        """
        if sp.isspmatrix_coo(X):  # COO matrix is not indexable
            X = X.tocsr()
        elif not sp.issparse(X):
            # We need to convert X to a matrix, so that the indexing
            # returns 2D objects
            X = np.asmatrix(X)
        n_samples = X.shape[0]

        terms = np.array(self.vocabulary.keys())
        indices = np.array(self.vocabulary.values())
        inverse_vocabulary = terms[np.argsort(indices)]

        return [inverse_vocabulary[X[i, :].nonzero()[1]].ravel()
                for i in xrange(n_samples)]
Пример #5
0
    def fit(self, counts, lengths=None):
        """

        """
        if not sparse.isspmatrix_coo(counts):
            counts = sparse.coo_matrix(counts)

        if self.init == "MDS2":
            if self.verbose:
                print "Initialing with MDS2"
            X = mds.estimate_X(counts, alpha=self.alpha,
                               beta=self.beta,
                               ini="random",
                               bias=self.bias,
                               random_state=self.random_state,
                               maxiter=self.max_iter,
                               verbose=self.verbose)
        else:
            X = self.init

        X = estimate_X(counts,
                       alpha=self.alpha,
                       beta=self.beta,
                       ini=X,
                       bias=self.bias,
                       verbose=self.verbose,
                       random_state=self.random_state,
                       maxiter=self.max_iter)
        return X
Пример #6
0
    def fit(self, matrix, epochs=5, no_threads=2, verbose=False):
        """
        Estimate the word embeddings.

        Parameters:
        - scipy.sparse.coo_matrix matrix: coocurrence matrix
        - int epochs: number of training epochs
        - int no_threads: number of training threads
        - bool verbose: print progress messages if True
        """

        shape = matrix.shape

        if (len(shape) != 2 or
            shape[0] != shape[1]):
            raise Exception('Coocurrence matrix must be square')

        if not sp.isspmatrix_coo(matrix):
            raise Exception('Coocurrence matrix must be in the COO format')

        self.word_vectors = ((np.random.rand(shape[0],
                                             self.no_components) - 0.5)
                                             / self.no_components)
        self.word_biases = np.zeros(shape[0], 
                                    dtype=np.float64)

        self.vectors_sum_gradients = np.ones_like(self.word_vectors)
        self.biases_sum_gradients = np.ones_like(self.word_biases)

        shuffle_indices = np.arange(matrix.nnz, dtype=np.int32)

        if verbose:
            print('Performing %s training epochs '
                  'with %s threads' % (epochs, no_threads))

        for epoch in range(epochs):

            if verbose:
                print('Epoch %s' % epoch)

            # Shuffle the coocurrence matrix
            np.random.shuffle(shuffle_indices)

            fit_vectors(self.word_vectors,
                        self.vectors_sum_gradients,
                        self.word_biases,
                        self.biases_sum_gradients,
                        matrix.row,
                        matrix.col,
                        matrix.data,
                        shuffle_indices,
                        self.learning_rate,
                        self.max_count,
                        self.alpha,
                        int(no_threads))

            if not np.isfinite(self.word_vectors).all():
                raise Exception('Non-finite values in word vectors. '
                                'Try reducing the learning rate.')
Пример #7
0
def weighted_bipartite_matching(A, perm_type="row"):
    """
    Returns an array of row permutations that attempts to maximize
    the product of the ABS values of the diagonal elements in
    a nonsingular square CSC sparse matrix. Such a permutation is
    always possible provided that the matrix is nonsingular.

    This function looks at both the structure and ABS values of the
    underlying matrix.

    Parameters
    ----------
    A : csc_matrix
        Input matrix

    perm_type : str {'row', 'column'}
        Type of permutation to generate.

    Returns
    -------
    perm : array
        Array of row or column permutations.

    Notes
    -----
    This function uses a weighted maximum cardinality bipartite matching
    algorithm based on breadth-first search (BFS).  The columns are weighted
    according to the element of max ABS value in the associated rows and
    are traversed in descending order by weight.  When performing the BFS
    traversal, the row associated to a given column is the one with maximum
    weight. Unlike other techniques[1]_, this algorithm does not guarantee the
    product of the diagonal is maximized.  However, this limitation is offset
    by the substantially faster runtime of this method.

    References
    ----------
    .. [1] I. S. Duff and J. Koster, "The design and use of algorithms for
    permuting large entries to the diagonal of sparse matrices", SIAM J.
    Matrix Anal. and Applics. 20, no. 4, 889 (1997).

    """
    nrows = A.shape[0]
    if A.shape[0] != A.shape[1]:
        raise ValueError("weighted_bfs_matching requires a square matrix.")

    if sp.isspmatrix_csr(A) or sp.isspmatrix_coo(A):
        A = A.tocsc()
    elif not sp.isspmatrix_csc(A):
        raise TypeError("matrix must be in CSC, CSR, or COO format.")

    if perm_type == "column":
        A = A.transpose().tocsc()

    perm = _weighted_bipartite_matching(np.asarray(np.abs(A.data), dtype=float), A.indices, A.indptr, nrows)

    if np.any(perm == -1):
        raise Exception("Possibly singular input matrix.")

    return perm
Пример #8
0
def add_dummy_feature(X, value=1.0):
    """Augment dataset with an additional dummy feature.

    This is useful for fitting an intercept term with implementations which
    cannot otherwise fit it directly.

    Parameters
    ----------
    X : array or scipy.sparse matrix with shape [n_samples, n_features]
        Data.

    value : float
        Value to use for the dummy feature.

    Returns
    -------

    X : array or scipy.sparse matrix with shape [n_samples, n_features + 1]
        Same data with dummy feature added as first column.

    Examples
    --------

    >>> from sklearn.preprocessing import add_dummy_feature
    >>> add_dummy_feature([[0, 1], [1, 0]])
    array([[ 1.,  0.,  1.],
           [ 1.,  1.,  0.]])
    """
    X = safe_asarray(X)
    n_samples, n_features = X.shape
    shape = (n_samples, n_features + 1)
    if sp.issparse(X):
        if sp.isspmatrix_coo(X):
            # Shift columns to the right.
            col = X.col + 1
            # Column indices of dummy feature are 0 everywhere.
            col = np.concatenate((np.zeros(n_samples), col))
            # Row indices of dummy feature are 0, ..., n_samples-1.
            row = np.concatenate((np.arange(n_samples), X.row))
            # Prepend the dummy feature n_samples times.
            data = np.concatenate((np.ones(n_samples) * value, X.data))
            return sp.coo_matrix((data, (row, col)), shape)
        elif sp.isspmatrix_csc(X):
            # Shift index pointers since we need to add n_samples elements.
            indptr = X.indptr + n_samples
            # indptr[0] must be 0.
            indptr = np.concatenate((np.array([0]), indptr))
            # Row indices of dummy feature are 0, ..., n_samples-1.
            indices = np.concatenate((np.arange(n_samples), X.indices))
            # Prepend the dummy feature n_samples times.
            data = np.concatenate((np.ones(n_samples) * value, X.data))
            return sp.csc_matrix((data, indices, indptr), shape)
        else:
            klass = X.__class__
            X = klass(add_dummy_feature(X.tocoo(), value))
            return klass(X)
    else:
        return np.hstack((np.ones((n_samples, 1)) * value, X))
Пример #9
0
def estimate_X(counts, alpha=-3., beta=1.,
               ini=None, bias=None,
               random_state=None, maxiter=10000, verbose=0):
    """
    Estimate the parameters of g

    Parameters
    ----------
    counts : sparse scipy matrix (n, n)

    alpha : float, optional, default: -3
        counts-to-distances mapping coefficient

    beta : float, optional, default: 1
        counts-to-distnances scaling coefficient

    init : ndarray (n, 3), optional, default: None
        initialization point

    bias : ndarray (n, 1), optional, default: None
        bias vector. If None, no bias will be applied to the model

    random_state : {RandomState, int, None}, optional, default: None
        random state object, or seed, or None.

    maxiter : int, optional, default: 10000
        Maximum number of iteration

    verbose : int, optional, default: 0
        verbosity

    Returns
    ------
    X : 3D structure

    """
    n = counts.shape[0]

    if not sparse.isspmatrix_coo(counts):
        counts = sparse.coo_matrix(counts)

    random_state = check_random_state(random_state)
    if ini is None:
        ini = 1 - 2 * random_state.rand(n * 3)

    data = (n, counts, alpha, beta, bias,
            False)

    results = optimize.fmin_l_bfgs_b(
        eval_f,
        ini.flatten(),
        eval_grad_f,
        (data, ),
        iprint=verbose,
        maxiter=maxiter,
        )
    results = results[0].reshape(-1, 3)
    return results
Пример #10
0
def sparse_matrix_to_edges(data):
    if not isspmatrix_coo(data):
        data = data.tocoo()
    edges = defaultdict(list)
    for x,y in zip(data.row, data.col):
        if x != y and y not in edges[x]:
            edges[x].append(y)
            edges[y].append(x)
    return edges
Пример #11
0
def sparse_to_tuple(m):

    if not sps.isspmatrix_coo(m):
        m = m.tocoo()

    indices = np.vstack((m.row, m.col)).transpose()
    values = np.float32(m.data)
    dense_shape = m.shape

    return indices, values, dense_shape
Пример #12
0
def prepare_graph_data1(adj):
    # adapted from preprocess_adj_bias
    num_nodes = adj.shape[0]
    adj = adj + sp.eye(num_nodes)  # self-loop
    #data =  adj.tocoo().data
    if not sp.isspmatrix_coo(adj):
        adj = adj.tocoo()
    adj = adj.astype(np.float32)
    indices = np.vstack((adj.col, adj.row)).transpose()
    return (indices, adj.data, adj.shape), adj.row, adj.col
Пример #13
0
 def to_tuple(mx):
     if not sp.isspmatrix_coo(mx):
         mx = mx.tocoo()
         # type(mx):scipy.sparse.coo.coo_matrix,将csr_matrix转成coo_matrix
         # scipy.sparse.coo_matrix - A sparse matrix in COOrdinate format.
     coords = np.vstack((mx.row, mx.col)).transpose(
     )  # Stack arrays in sequence vertically (row wise). 一行接一行.
     values = mx.data
     shape = mx.shape
     return coords, values, shape
Пример #14
0
def save_named_sparse(named_sparse_matrices: Dict[str, sparse.spmatrix], output_filename: str):
    assert all(sparse.issparse(matrix) for matrix in named_sparse_matrices.values())
    coo = {name: sparse_matrix if sparse.isspmatrix_coo(sparse_matrix) else sparse_matrix.tocoo()
           for name, sparse_matrix in named_sparse_matrices.items()}
    coo = {name: {"data": matrix.data,
                  "col": matrix.col,
                  "row": matrix.row,
                  "shape": matrix.shape}
           for name, matrix in coo.items()}
    np.savez(output_filename, **coo)
Пример #15
0
def sparse_to_tuple(sparse_mx):
    if not sp.isspmatrix_coo(sparse_mx):
        sparse_mx = sparse_mx.tocoo()
    coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose()
    values = sparse_mx.data
    shape = sparse_mx.shape
    if(len(coords) == 0):
        print('here')
        print(sparse_mx.shape[1])
    return coords, values, shape
Пример #16
0
def test_sparse_dtm(docs, pass_vocab):
    if pass_vocab:
        vocab = vocabulary(docs, sort=True)
        dtm = sparse_dtm(docs, vocab)
    else:
        dtm, vocab = sparse_dtm(docs)

    assert isspmatrix_coo(dtm)
    assert dtm.shape == (len(docs), len(vocab))
    assert vocab == vocabulary(docs, sort=True)
Пример #17
0
def sparse_to_tuple(spmx):
    """Convert sparse matrix to tuple representation."""

    if not sps.isspmatrix_coo(spmx):
        spmx = spmx.tocoo()

    indices = np.vstack((spmx.row, spmx.col)).transpose()
    values = spmx.data
    shape = spmx.shape
    return indices, values, shape
Пример #18
0
def decompose_delta(deltak):
    '''Decomposes the k-th order trend filtering matrix into a c-compatible set
    of arrays.'''
    if not isspmatrix_coo(deltak):
        deltak = coo_matrix(deltak)
    dk_rows = deltak.shape[0]
    dk_rowbreaks = np.cumsum(deltak.getnnz(1), dtype="int32")
    dk_cols = deltak.col.astype('int32')
    dk_vals = deltak.data.astype('double')
    return dk_rows, dk_rowbreaks, dk_cols, dk_vals
Пример #19
0
 def to_tuple(mx):
     if not sp.isspmatrix_coo(mx):
         mx = mx.tocoo()
     coords = np.vstack((mx.row, mx.col)).transpose()
     values = mx.data
     shape = mx.shape
     # print('coords:', coords)
     # print('values:', values)
     # print('shape:', shape)
     return coords, values, shape
Пример #20
0
def preprocess_adj_bias(adj):
    num_nodes = adj.shape[0]
    adj = adj + sp.eye(num_nodes)  # self-loop
    adj[adj > 0.0] = 1.0
    if not sp.isspmatrix_coo(adj):
        adj = adj.tocoo()
    adj = adj.astype(np.float32)
    indices = np.vstack((adj.col, adj.row)).transpose()  # This is where I made a mistake, I used (adj.row, adj.col) instead
    # return tf.SparseTensor(indices=indices, values=adj.data, dense_shape=adj.shape)
    return indices, adj.data, adj.shape
Пример #21
0
def decompose_delta(deltak):
    '''Decomposes the k-th order trend filtering matrix into a c-compatible set
    of arrays.'''
    if not isspmatrix_coo(deltak):
        deltak = coo_matrix(deltak)
    dk_rows = deltak.shape[0]
    dk_rowbreaks = np.cumsum(deltak.getnnz(1), dtype="int32")
    dk_cols = deltak.col.astype('int32')
    dk_vals = deltak.data.astype('double')
    return dk_rows, dk_rowbreaks, dk_cols, dk_vals
Пример #22
0
def sparse_matrix_report(m):
    print(repr(m))
    print('Number of non-zeros  :', m.nnz)
    print('Sparsity             :', 1 - m.nnz / (m.shape[0] * m.shape[1]))

    if isspmatrix_csr(m) or isspmatrix_csc(m):
        print('data length          : {} ({})'.format(len(m.data),
                                                      m.data.dtype))
        print('indptr length        : {} ({})'.format(len(m.indptr),
                                                      m.indptr.dtype))
        print('indices length       : {} ({})'.format(len(m.indices),
                                                      m.indices.dtype))
        print('Size                 :',
              size(m.data.nbytes + m.indptr.nbytes + m.indices.nbytes))
        print('10 x 10 preview:')
        print(m[:10, :10].toarray())
    elif isspmatrix_bsr(m):
        print('data length          : {} ({})'.format(len(m.data),
                                                      m.data.dtype))
        print('indptr length        : {} ({})'.format(len(m.indptr),
                                                      m.indptr.dtype))
        print('indices length       : {} ({})'.format(len(m.indices),
                                                      m.indices.dtype))
        print('blocksize length     : {}'.format(m.blocksize))
        print('Size                 :',
              size(m.data.nbytes + m.indptr.nbytes + m.indices.nbytes))
        print('preview:')
        print(m)
    elif isspmatrix_coo(m):
        print('data length          : {} ({})'.format(len(m.data),
                                                      m.data.dtype))
        print('row length           : {} ({})'.format(len(m.row), m.row.dtype))
        print('col length           : {} ({})'.format(len(m.col), m.col.dtype))
        print('Size                 :',
              size(m.data.nbytes + m.row.nbytes + m.col.nbytes))
        print('preview:')
        print(m)
    elif isspmatrix_dok(m):
        print('Size                 :', size(sys.getsizeof(m)))
        print('10 x 10 preview:')
        print(m[:10, :10].toarray())
    elif isspmatrix_dia(m):
        print('data length          : {} ({})'.format(len(m.data),
                                                      m.data.dtype))
        print('Offsets              : {} ({})'.format(len(m.offsets),
                                                      m.offsets.dtype))
        print('Size                 :', size(m.data.nbytes + m.offsets.nbytes))
        print('(no preview)')
    elif isspmatrix_lil(m):
        print('data length          : {} ({})'.format(len(m.data),
                                                      m.data.dtype))
        print('rows                 : {} ({})'.format(len(m.rows),
                                                      m.rows.dtype))
        print('Size                 :', size(m.data.nbytes + m.rows.nbytes))
        print('(no preview)')
Пример #23
0
def risk_pclassification(W, b, X, Y, P, Q, p=1):
    """
        Empirical risk of p-classification loss for multilabel classification

        Input:
            - W: current weight matrix, K by D
            - b: current bias
            - X: feature matrix, N x D
            - Y: positive label matrix, N x K
            - p: constant for p-classification push loss
            - loss_type: valid assignment is 'example' or 'label'
                - 'example': compute a loss for each example, by the #positive or #negative labels per example
                - 'label'  : compute a loss for each label, by the #positive or #negative examples per label

        Output:
            - risk: empirical risk
            - db  : gradient of bias term
            - dW  : gradients of weights
    """
    assert p > 0
    assert Y.dtype == np.bool
    assert isspmatrix_coo(Y)  # scipy.sparse.coo_matrix type
    N, D = X.shape
    K = Y.shape[1]
    assert W.shape == (K, D)
    # shape = (N, 1) if loss_type == 'example' else (1, K)
    assert P.shape == Q.shape
    if P.shape[0] == 1:
        assert P.shape[1] == K
    else:
        assert P.shape == (N, 1)

    T1 = np.dot(X, W.T) + b
    T1p = np.zeros((N, K), dtype=np.float)
    T1p[Y.row, Y.col] = T1[Y.row, Y.col]
    T1n = T1 - T1p

    T2 = np.exp(-T1p)
    T2p = np.zeros((N, K), dtype=np.float)
    T2p[Y.row, Y.col] = T2[Y.row, Y.col]
    T2 = T2p * P

    T3 = np.exp(p * T1n)
    T3[Y.row, Y.col] = 0
    T3 = T3 * Q

    risk = np.sum(T2 + T3 / p)
    T4 = T3 - T2
    db = np.sum(T4)
    dW = np.dot(T4.T, X)

    if np.isnan(risk) or np.isinf(risk):
        sys.stderr('risk_pclassification(): risk is NaN or inf!\n')
        sys.exit(0)
    return risk, db, dW
Пример #24
0
def sparse_to_tuple(sparse_mx):
    if not sp.isspmatrix_coo(sparse_mx):
        sparse_mx = sparse_mx.tocoo()
    coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose()
    values = sparse_mx.data
    shape = sparse_mx.shape
    #print(coords.shape[0])
    #print(coords)

    #print(coords_new)
    return coords, values, shape
Пример #25
0
def sparse_to_tuple(sparse_mx):
    """
    Copyright (c) Thomas Kipf
    Repo: https://github.com/tkipf/gae
    """
    if not sp.isspmatrix_coo(sparse_mx):
        sparse_mx = sparse_mx.tocoo()
    coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose()
    values = sparse_mx.data
    shape = sparse_mx.shape
    return coords, values, shape
Пример #26
0
def get_genomic_distances(lengths, counts=None):
    """
    Get genomic distances
    """
    if sparse.issparse(counts):
        if not sparse.isspmatrix_coo(counts):
            counts = counts.tocoo()
        return _get_genomic_distances_sparse(lengths, counts)
    else:
        from iced import utils
        return utils.get_genomic_distances(lengths)
def sparse_to_tuple(sparse_mx):
    """ change of format for sparse matrix. This format is used
    for the feed_dict where sparse matrices need to be linked to placeholders
    representing sparse matrices. """

    if not sp.isspmatrix_coo(sparse_mx):
        sparse_mx = sparse_mx.tocoo()
    coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose()
    values = sparse_mx.data
    shape = sparse_mx.shape
    return coords, values, shape
Пример #28
0
    def to_tuple(mx):
        #coo矩阵用来压缩系数矩阵,coo矩阵由三个数组组成,一个存放非零元素的值。
        #剩下的两个数组,一个用来存放非零元素位置的行,一个用来存放非零元素位置的列。
        if not sp.isspmatrix_coo(mx):
            mx = mx.tocoo()
#[row],[col](2*10)=[[第一非零元素行位置,第一非零元素列位置],[],...,[],[]](10*2)
        coords = np.vstack((mx.row, mx.col)).transpose()
        #values是一个列表[_,_,_,...,_]
        values = mx.data
        shape = mx.shape
        return coords, values, shape
Пример #29
0
def maximum_bipartite_matching(A, perm_type='row'):
    """
    Returns an array of row or column permutations that removes nonzero
    elements from the diagonal of a nonsingular square CSC sparse matrix. Such
    a permutation is always possible provided that the matrix is nonsingular.
    This function looks at the structure of the matrix only.

    The input matrix will be converted to CSC matrix format if
    necessary.

    Parameters
    ----------
    A : sparse matrix
        Input matrix

    perm_type : str {'row', 'column'}
        Type of permutation to generate.

    Returns
    -------
    perm : array
        Array of row or column permutations.

    Notes
    -----
    This function relies on a maximum cardinality bipartite matching algorithm
    based on a breadth-first search (BFS) of the underlying graph[1]_.

    References
    ----------
    I. S. Duff, K. Kaya, and B. Ucar, "Design, Implementation, and
    Analysis of Maximum Transversal Algorithms", ACM Trans. Math. Softw.
    38, no. 2, (2011).
    
    """
    nrows = A.shape[0]
    if A.shape[0] != A.shape[1]:
        raise ValueError(
            'Maximum bipartite matching requires a square matrix.')

    if sp.isspmatrix_csr(A) or sp.isspmatrix_coo(A):
        A = A.tocsc()
    elif not sp.isspmatrix_csc(A):
        raise TypeError("matrix must be in CSC, CSR, or COO format.")

    if perm_type == 'column':
        A = A.transpose().tocsc()

    perm = _maximum_bipartite_matching(A.indices, A.indptr, nrows)

    if np.any(perm == -1):
        raise Exception('Possibly singular input matrix.')

    return perm
Пример #30
0
def prepare_graph_data(adj, configs):
    # adapted from preprocess_adj_bias
    num_nodes = adj.shape[0]
    adj = adj + sp.eye(num_nodes) - sp.eye(num_nodes)  # self-loop
    if not configs.weighted_graph:
        adj[adj > 0.0] = 1.0
    if not sp.isspmatrix_coo(adj):
        adj = adj.tocoo()
    adj = adj.astype(np.float32)
    indices = np.vstack((adj.col, adj.row)).transpose()
    return (indices, adj.data, adj.shape), adj.row, adj.col
Пример #31
0
def maximum_bipartite_matching(A, perm_type='row'):
    """
    Returns an array of row or column permutations that removes nonzero
    elements from the diagonal of a nonsingular square CSC sparse matrix. Such
    a permutation is always possible provided that the matrix is nonsingular.
    This function looks at the structure of the matrix only.

    The input matrix will be converted to CSC matrix format if
    necessary.

    Parameters
    ----------
    A : sparse matrix
        Input matrix

    perm_type : str {'row', 'column'}
        Type of permutation to generate.

    Returns
    -------
    perm : array
        Array of row or column permutations.

    Notes
    -----
    This function relies on a maximum cardinality bipartite matching algorithm
    based on a breadth-first search (BFS) of the underlying graph[1]_.

    References
    ----------
    .. [1] I. S. Duff, K. Kaya, and B. Ucar, "Design, Implementation, and
    Analysis of Maximum Transversal Algorithms", ACM Trans. Math. Softw.
    38, no. 2, (2011).

    """
    nrows = A.shape[0]
    if A.shape[0] != A.shape[1]:
        raise ValueError(
            'Maximum bipartite matching requires a square matrix.')

    if sp.isspmatrix_csr(A) or sp.isspmatrix_coo(A):
        A = A.tocsc()
    elif not sp.isspmatrix_csc(A):
        raise TypeError("matrix must be in CSC, CSR, or COO format.")

    if perm_type == 'column':
        A = A.transpose().tocsc()

    perm = _maximum_bipartite_matching(A.indices, A.indptr, nrows)

    if np.any(perm == -1):
        raise Exception('Possibly singular input matrix.')

    return perm
Пример #32
0
def save_sparse(sparse_matrix, output_filename):
    assert sparse.issparse(sparse_matrix)
    if sparse.isspmatrix_coo(sparse_matrix):
        coo = sparse_matrix
    else:
        coo = sparse_matrix.tocoo()
    row = coo.row
    col = coo.col
    data = coo.data
    shape = coo.shape
    np.savez(output_filename, row=row, col=col, data=data, shape=shape)
Пример #33
0
 def to_tuple(mx):
     if not sp.isspmatrix_coo(mx):
         mx = mx.tocoo()
     # to be order for sparse tensor!
     shape = mx.shape
     flatten = float(shape[-1]) * mx.row + mx.col
     order_indices = np.argsort(flatten)
     coords = np.vstack(
         (mx.row[order_indices], mx.col[order_indices])).transpose()
     values = mx.data[order_indices]
     return coords.astype(np.int64), values, shape
Пример #34
0
def scipysp_to_pytorchsp(sp_mx):
    """ converts scipy sparse matrix to pytorch sparse matrix """
    if not sp.isspmatrix_coo(sp_mx):
        sp_mx = sp_mx.tocoo()
    coords = np.vstack((sp_mx.row, sp_mx.col)).transpose()
    values = sp_mx.data
    shape = sp_mx.shape
    pyt_sp_mx = torch.sparse.FloatTensor(torch.LongTensor(coords.T),
                                         torch.FloatTensor(values),
                                         torch.Size(shape))
    return pyt_sp_mx
Пример #35
0
def save_sparse(sparse_matrix, output_filename):
    assert sparse.issparse(sparse_matrix)
    if sparse.isspmatrix_coo(sparse_matrix):
        coo = sparse_matrix
    else:
        coo = sparse_matrix.tocoo()
    row = coo.row
    col = coo.col
    data = coo.data
    shape = coo.shape
    np.savez(output_filename, row=row, col=col, data=data, shape=shape)
def sparse_scipy2torch(sparse_matrix):
    if not sp.isspmatrix_coo(sparse_matrix):
        sparse_matrix = sp.coo_matrix(sparse_matrix)
    indices = np.vstack((sparse_matrix.row, sparse_matrix.col))
    values = sparse_matrix.data

    i = t.LongTensor(indices)
    v = t.FloatTensor(values)

    shape = t.Size(sparse_matrix.shape)
    return t.sparse.FloatTensor(i, v, shape)
Пример #37
0
def preprocess_adj_bias(adj, normalize=True):
    num_nodes = adj.shape[0]
    adj = adj + sp.eye(num_nodes)  # self-loop
    if normalize:
        adj = normalize_adj(adj).toarray()
    adj = sp.csr_matrix(adj)   
    if not sp.isspmatrix_coo(adj):
        adj = adj.tocoo()
    adj = adj.astype(np.float32)
    indices = np.vstack((adj.col, adj.row)).transpose()  
    return indices, adj.data, adj.shape
Пример #38
0
def to_torch_sparse_tensor(x, device='cpu'):
    if not sp.isspmatrix_coo(x):
        x = sp.coo_matrix(x)
    row, col = x.row, x.col

    indices = torch.from_numpy(np.asarray([row, col]).astype('int64')).long()
    values = torch.from_numpy(x.data.astype(np.float32))
    th_sparse_tensor = torch.sparse.FloatTensor(indices, values,
                                                x.shape).to(device)

    return th_sparse_tensor
Пример #39
0
    def do_numeric_factorization(
            self,
            matrix: Union[spmatrix, BlockMatrix],
            raise_on_error: bool = True) -> LinearSolverResults:
        """
        Perform Mumps factorization. Note that do_symbolic_factorization should be called 
        before do_numeric_factorization. 

        Parameters
        ----------
        matrix: scipy.sparse.spmatrix or pyomo.contrib.pynumero.sparse.BlockMatrix
            This matrix must have the same nonzero structure as the matrix passed into
            do_symbolic_factorization. The matrix will be converted to coo format if it 
            is not already in coo format. If sym is 1 or 2, the matrix will be converted
            to lower triangular.
        """
        if self._nnz is None:
            raise RuntimeError('Call do_symbolic_factorization first.')
        if not isspmatrix_coo(matrix):
            matrix = matrix.tocoo()
        if self._sym in {1, 2}:
            matrix = tril(matrix)
        nrows, ncols = matrix.shape
        if nrows != ncols:
            raise ValueError('matrix is not square')
        if self._dim != nrows:
            raise ValueError(
                'The shape of the matrix changed between symbolic and numeric factorization'
            )
        if self._nnz != matrix.nnz:
            raise ValueError(
                'The number of nonzeros changed between symbolic and numeric factorization'
            )
        try:
            self._mumps.set_centralized_assembled_values(matrix.data)
            self._mumps.run(job=2)
        except RuntimeError as err:
            if raise_on_error:
                raise err

        stat = self.get_infog(1)
        res = LinearSolverResults()
        if stat == 0:
            res.status = LinearSolverStatus.successful
        elif stat in {-6, -10}:
            res.status = LinearSolverStatus.singular
        elif stat in {-8, -9}:
            res.status = LinearSolverStatus.not_enough_memory
        elif stat < 0:
            res.status = LinearSolverStatus.error
        else:
            res.status = LinearSolverStatus.warning
        return res
Пример #40
0
def zero_col(A, col):
    """Sets the specified column of A to zero"""
    if sparse.issparse(A):
        if sparse.isspmatrix_coo(A) or sparse.isspmatrix_dia(A):
            A = A.tolil()
        #doesn't support slicing
        for i in xrange(A.shape[0]):
            A[i, col] = 0
        return A
    else:
        A[:, col] = np.zeros(A.shape[0])
        return A
Пример #41
0
def sparse_to_tuple(matrix):

    # Cast to COO matrix
    if not sp.isspmatrix_coo(matrix):
        matrix = matrix.tocoo()

    # Get data from sparse matrix
    coords = np.vstack((matrix.row, matrix.col)).transpose()
    values = matrix.data
    shape = matrix.shape

    return coords, values, shape
Пример #42
0
def sparse_to_tuple(sparse_mx):
    """ Expreses a sparse matrix given as parameter (in csr or coo form from scipy.sparse) 
        as a tuple of arrays. The first is a 2d array with the coordinates (with row and column)
        of the non-zero elements. The second output is an array with the non zero values corresponding 
        to the coordinates of first array. The third output is the shape of the dense matrix.
    """
    if not sp.isspmatrix_coo(sparse_mx):
        sparse_mx = sparse_mx.tocoo()
    coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose()
    values = sparse_mx.data
    shape = sparse_mx.shape
    return coords, values, shape
Пример #43
0
 def to_tuple(mx):
     if not sp.isspmatrix_coo(mx):
         mx = mx.tocoo()
     if insert_batch:
         coords = np.vstack((np.zeros(mx.row.shape[0]), mx.row, mx.col)).transpose()
         values = mx.data
         shape = (1,) + mx.shape
     else:
         coords = np.vstack((mx.row, mx.col)).transpose()
         values = mx.data
         shape = mx.shape
     return coords, values, shape
Пример #44
0
def zero_col(A,col):
	"""Sets the specified column of A to zero"""
	if sparse.issparse(A):
		if sparse.isspmatrix_coo(A) or sparse.isspmatrix_dia(A):
			A = A.tolil()
		#doesn't support slicing
		for i in xrange(A.shape[0]):
			A[i,col] = 0
		return A
	else:
		A[:,col] = np.zeros(A.shape[0])
		return A
Пример #45
0
    def fit(self, matrix, epochs=5, no_threads=2, verbose=False):
        """
        Estimate the word embeddings.

        Parameters:
        - scipy.sparse.coo_matrix matrix: coocurrence matrix
        - int epochs: number of training epochs
        - int no_threads: number of training threads
        - bool verbose: print progress messages if True
        """

        shape = matrix.shape

        if len(shape) != 2 or shape[0] != shape[1]:
            raise Exception("Coocurrence matrix must be square")

        if not sp.isspmatrix_coo(matrix):
            raise Exception("Coocurrence matrix must be in the COO format")

        self.word_vectors = np.random.rand(shape[0], self.no_components) / np.sqrt(shape[0] * self.no_components)
        self.word_biases = np.zeros(shape[0], dtype=np.float64)

        self.vectors_sum_gradients = np.ones_like(self.word_vectors)
        self.biases_sum_gradients = np.ones_like(self.word_biases)

        shuffle_indices = np.arange(matrix.nnz, dtype=np.int32)

        if verbose:
            print("Performing %s training epochs " "with %s threads") % (epochs, no_threads)

        for epoch in xrange(epochs):

            if verbose:
                print("Epoch %s" % epoch)

            # Shuffle the coocurrence matrix
            np.random.shuffle(shuffle_indices)

            fit_vectors(
                self.word_vectors,
                self.vectors_sum_gradients,
                self.word_biases,
                self.biases_sum_gradients,
                matrix.row,
                matrix.col,
                matrix.data,
                shuffle_indices,
                self.learning_rate,
                self.max_count,
                self.alpha,
                int(no_threads),
            )
Пример #46
0
def save_matrix_coo_mat(matrix, outfile):
    """
    Saves matrix in coo format (for matlab)
    matrix: numpy.array or any scipy.sparse matrix format.
    outfile: Filename.
    """
    if not sparse.isspmatrix(matrix):
        matrix = sparse.coo_matrix(matrix)
    if not sparse.isspmatrix_coo(matrix):
        matrix = matrix.tocoo()
    m, n = matrix.shape
    io.savemat(outfile + ".mat", mdict={'i': matrix.row + 1,
                                        'j': matrix.col + 1, 's': matrix.data, 'm': m, 'n': n})
Пример #47
0
def make_graph(A):
    if not (isspmatrix_coo(A)):
        try:
            A = coo_matrix(A)
            warn("Implicit conversion of A to COO", scipy.sparse.SparseEfficiencyWarning)
        except:
            raise TypeError('Argument A must have type coo_matrix,\
                             or be convertible to coo_matrix')
    G = nx.DiGraph()
    G.add_edges_from([(i,j) for (i,j) in zip(A.row,A.col) if (i != j)], capacity=1)
    G.add_nodes_from(range(A.shape[0]))

    return G
Пример #48
0
    def fit(self, counts, lengths=None):
        """

        """
        if not sparse.isspmatrix_coo(counts):
            counts = sparse.coo_matrix(counts)

        X_ = estimate_X(counts,
                        alpha=self.alpha,
                        beta=self.beta,
                        ini=self.init,
                        verbose=self.verbose,
                        precompute_distances=self.precompute_distances,
                        use_zero_entries=False,
                        random_state=self.random_state,
                        bias=self.bias,
                        factr=self.factr,
                        maxiter=self.max_iter)
        return X_
Пример #49
0
def mean(X, axis=1):

    if not axis in (0, 1):
        raise ValueError("Invalid axis.")

    if axis == 0:
        means = np.zeros(X.shape[1], dtype=np.float64)
    elif axis == 1:
        means = np.zeros(X.shape[0], dtype=np.float64)

    if sp.isspmatrix_coo(X):
        _mean_coo(X, axis, means)
    elif sp.isspmatrix_csc(X):
        _mean_csc(X, axis, means)
    else:
        X = sp.csr_matrix(X)
        _mean_csr(X, axis, means)

    return means
Пример #50
0
    def fit(self, counts):
        """

        """

        if not sparse.isspmatrix_coo(counts):
            counts = sparse.coo_matrix(counts)
        counts.setdiag(0)
        counts.eliminate_zeros()

        if self.init == "MDS2":
            X = mds.estimate_X(counts, alpha=self.alpha,
                               beta=self.beta,
                               ini="random",
                               verbose=self.verbose,
                               bias=self.bias,
                               random_state=self.random_state,
                               maxiter=self.max_iter)
        elif self.init == "random":
            X = self.init
        else:
            raise ValueError("Unknown initialization strategy")

        self.alpha_ = self.alpha
        self.beta_ = self.beta
        for it in range(self.max_iter_outer_loop):
            self.alpha_, self.beta_ = poisson_model.estimate_alpha_beta(
                counts,
                X, bias=self.bias, ini=[self.alpha_, self.beta_],
                verbose=self.verbose,
                random_state=self.random_state)
            print(self.alpha_, self.beta_)
            X_ = estimate_X(counts,
                            alpha=self.alpha_,
                            beta=self.beta_,
                            ini=X,
                            verbose=self.verbose,
                            bias=self.bias,
                            random_state=self.random_state,
                            maxiter=self.max_iter)
        return X_
Пример #51
0
def write_counts(filename, counts):
    """
    Write counts

    Parameters
    ----------

    filename : str

    counts: array-like
    """
    if not sparse.isspmatrix_coo(counts):
        if sparse.issparse(counts):
            counts = counts.tocoo()
        else:
            counts = sparse.coo_matrix(counts)
    # XXX this is slow and memory intensive
    data = np.concatenate([counts.row[:, np.newaxis],
                           counts.col[:, np.newaxis],
                           counts.data[:, np.newaxis]], axis=1)
    np.savetxt(filename, data, fmt="%d\t%d\t%f")
Пример #52
0
def _sparse_savetxt(filename, input_array):
    zip_func = six.moves.zip
    if sp.isspmatrix_csr(input_array):
        input_array = input_array.tocoo()
    elif not sp.isspmatrix_coo(input_array):
        input_array = input_array.tocsr().tocoo()
    n_row = input_array.shape[0]
    current_sample_row = 0
    line = []
    with open(filename, 'w') as fw:
        fw.write('sparse {0:d}\n'.format(input_array.shape[-1]))
        for i, j, v in zip_func(input_array.row, input_array.col, input_array.data):
            if i == current_sample_row:
                line.append('{0}:{1}'.format(j, v))
            else:
                fw.write(' '.join(line))
                fw.write('\n' * (i - current_sample_row))
                line = ['{0}:{1}'.format(j, v)]
                current_sample_row = i
        fw.write(' '.join(line))
        fw.write('\n' * (n_row - i))
Пример #53
0
    def fit(self, counts, lengths=None):
        """

        """
        if not sparse.isspmatrix_coo(counts):
            counts = sparse.coo_matrix(counts)

        for i in range(self.max_iter_outer):
            if i == 0:
                X = estimate_X(
                    counts,
                    alpha=self.alpha,
                    beta=self.beta,
                    ini=self.init,
                    verbose=self.verbose,
                    use_zero_entries=False,
                    random_state=self.random_state,
                    bias=self.bias,
                    factr=self.factr,
                    maxiter=self.max_iter,
                )
            else:
                ir = IsotonicRegression()
                dis = np.sqrt(((X[counts.row] - X[counts.col]) ** 2).sum(axis=1))
                wish_distances = ir.fit_transform(1.0 / counts.data, dis)
                X = estimate_X(
                    sparse.coo_matrix((wish_distances, (counts.row, counts.col))),
                    alpha=self.alpha,
                    beta=self.beta,
                    ini=X,
                    verbose=self.verbose,
                    use_zero_entries=False,
                    precompute_distances="precomputed",
                    random_state=self.random_state,
                    bias=self.bias,
                    factr=self.factr,
                    maxiter=self.max_iter,
                )
        print "writing wish distances"
        return X
Пример #54
0
def compute_wish_distances(counts, alpha=-3., beta=1., bias=None):
    """
    Computes wish distances from a counts matrix

    Parameters
    ----------
    counts : ndarray
        Interaction counts matrix

    alpha : float, optional, default: -3
        Coefficient of the power law

    beta : float, optional, default: 1
        Scaling factor

    Returns
    -------
    wish_distances
    """
    if beta == 0:
        raise ValueError("beta cannot be equal to 0.")
    counts = counts.copy()
    if sparse.issparse(counts):
        if not sparse.isspmatrix_coo(counts):
            counts = counts.tocoo()
        if bias is not None:
            bias = bias.flatten()
            counts.data /= bias[counts.row] * bias[counts.col]
        wish_distances = counts / beta
        wish_distances.data[wish_distances.data != 0] **= 1. / alpha
        return wish_distances
    else:
        wish_distances = counts.copy() / beta
        wish_distances[wish_distances != 0] **= 1. / alpha

        return wish_distances
Пример #55
0
    d_ctx_pair = td.Dict()
    m_ctx_pair = tm.arg_l_arg_r_asjo_matrix( d_triples._rtuple2ids, fn_ctx_pair, 
        num_triples, col_indices=d_ctx_pair, mmfile_presuffix='_pairs', reload=refresh )

    logging.info( 'loading context features for words' )
    d_ctx_word = td.Dict()
    m_ctx_w1 = tm.arg_asjo_matrix( d_triples._m2ids, d_ctx_word, fn_ctx_word, num_triples,
        transform_w2sig=lambda w2sig: sorted( list( w2sig ), key = lambda x: float( x[1] ), reverse=True )[:20],
        mmfile_presuffix='_w1', reload=refresh )
    m_ctx_w2 = tm.arg_asjo_matrix( d_triples._r2ids, d_ctx_word, fn_ctx_word, num_triples, 
        transform_w2sig = lambda w2sig: sorted( list( w2sig ), key = lambda x: float( x[1] ), reverse=True )[:20], 
        mmfile_presuffix='_w2', reload=refresh )

    # adjust ( context ) matrix dimensions, if they vary
    if m_ctx_w1.shape[1] < m_ctx_w2.shape[1]:
        if sparse.isspmatrix_coo(m_ctx_w1):
            m_ctx_w1 = m_ctx_w1.todok()
        m_ctx_w1.resize(m_ctx_w2.shape)

    if m_ctx_w2.shape[1] < m_ctx_w1.shape[1]:
        if sparse.isspmatrix_coo(m_ctx_w2):
            m_ctx_w2 = m_ctx_w2.todok()
        m_ctx_w2.resize(m_ctx_w1.shape)

    if not sparse.isspmatrix_coo(m_ctx_w1):
        m_ctx_w1 = m_ctx_w1.tocoo()
    if not sparse.isspmatrix_coo(m_ctx_w2):
        m_ctx_w2 = m_ctx_w2.tocoo()

    logging.info( "computing set operations on context matrices " )
    mb_ctx_w1               = m_ctx_w1.astype( bool )
Пример #56
0
def write_vectors_to_disk(matrix, row_index, column_index, vectors_path, features_path='', entries_path='',
                          entry_filter=lambda x: True):
    """
    Converts a matrix and its associated row/column indices to a Byblo compatible entries/features/event files,
    possibly applying a tranformation function to each entry.

    :param matrix: data matrix of size (n_entries, n_features) in scipy.sparse.coo format
    :type matrix: scipy.sparse.coo_matrix
    :param row_index: a collection of DocumentFeature-s representing entry names. `row_index[N]` should return the
     feature whose vector is stored in row N of `matrix`
    :type row_index: thesisgenerator.plugins.tokenizer.DocumentFeature
    :param column_index: sorted list of feature names
    :param features_path: str, where to write the Byblo features file. If the entry_filter removes all entries
    this file will not be written, i.e. the file will not be created at all if there's nothing to put in it
    :param entries_path: str, where to write the Byblo entries file. If the entry_filter removes all entries
    this file will not be written.
    :param vectors_path: where to write the Byblo events file
    :type vectors_path: string of file-like. If it evaluates to True progress messages will be printed
    :param entry_filter: callable, called for each entry. Takes a single DocumentFeature parameter. Returns true
    if the entry has to be written and false if the entry has to be ignored. Defaults to True.
    """
    if not any([vectors_path, features_path, entries_path]):
        raise ValueError('At least one of vectors_path, features_path or entries_path required')

    if not isspmatrix_coo(matrix):
        logging.error('Expected a scipy.sparse.coo matrix, got %s', type(matrix))
        raise ValueError('Wrong matrix type')
    if (len(row_index), len(column_index)) != matrix.shape:
        logging.error('Matrix shape is wrong, expected %dx%s, got %r', len(row_index), len(column_index), matrix.shape)
        raise ValueError('Matrix shape does not match row_index/column_index size')

    accepted_entry_counts = {}
    matrix_data = zip(matrix.row, matrix.col, matrix.data)
    accepted_rows = []

    logging.info('Writing events to %s', vectors_path)
    if isinstance(vectors_path, six.string_types):
        outfile = open(vectors_path, 'w')
    elif hasattr(vectors_path, 'write'):
        outfile = vectors_path
    else:
        raise ValueError('vectors_path: expected str or file-like, got %s' % type(vectors_path))

    for row_num, column_ids_and_values in groupby(matrix_data, itemgetter(0)):
        entry = row_index[row_num]
        if entry_filter(entry):
            if entry not in accepted_entry_counts:  # guard against duplicated vectors
                accepted_rows.append(row_num)
                features_and_counts = [(column_index[x[1]], x[2]) for x in column_ids_and_values]
                outfile.write('%s\t%s\n' % (entry,
                                            '\t'.join(map(str, chain.from_iterable(features_and_counts)))
                ))
                accepted_entry_counts[entry] = sum(x[1] for x in features_and_counts)
            if row_num % 5000 == 0 and outfile:
                logging.info('Processed %d vectors', row_num)

    outfile.close()

    if entries_path and accepted_entry_counts:
        logging.info('Writing entries to %s', entries_path)
        with open(entries_path, 'w') as outfile:
            for entry, count in accepted_entry_counts.items():
                outfile.write('%s\t%f\n' % (entry, count))

    if features_path and accepted_rows:  # guard against empty files
        logging.info('Writing features to %s', features_path)
        with open(features_path, 'w') as outfile:
            feature_sums = np.array(matrix.tocsr()[accepted_rows].sum(axis=0))[0, :]
            for feature, count in zip(column_index, feature_sums):
                if -1e-5 < count < 1e-5:
                    logging.warning('Feature %s does not occur in vector set', feature)
                else:
                    outfile.write('%s\t%f\n' % (feature, count))
Пример #57
0
def test_add_dummy_feature_coo():
    X = sparse.coo_matrix([[1, 0], [0, 1], [0, 1]])
    X = add_dummy_feature(X)
    assert_true(sparse.isspmatrix_coo(X), X)
    assert_array_equal(X.toarray(), [[1, 1, 0], [1, 0, 1], [1, 0, 1]])
Пример #58
0
def constrained_lsqr(A,b,C,p,q):
	"""Solves the least-squares problem min_x ||Ax-b||^2
	with the constraints p<=Cx<=q.  If there are more than one solution,
	picks the one with the lowest L-2 norm.

	The result is (x,activeSet,activeRhs) where activeSet lists the
	indices of the bounds that are met exactly and activeRhs lists
	the values.
	"""
	(x,istop,itn,normr,normar,norma,conda,normx) = sparse.linalg.lsmr(A,b,damp=1e-5)
	Cx = spdot(C,x)
	pmax,ipmax = max((pi-xi,i) for i,(pi,xi) in enumerate(zip(p,Cx)))
	qmax,iqmax = max((xi-qi,i) for i,(qi,xi) in enumerate(zip(q,Cx)))
	candidates = set(range(A.shape[1]))
	activeSet = []
	activeRhs = []
	AtA = None
	while pmax > 0 or qmax > 0:
		#add the most violated constraint to the active set
		#and re-solve
		cval = q[iqmax]
		crow = iqmax
		if pmax > qmax:
			cval = p[ipmax]
			crow = ipmax
		#print "Bound %d violation %f <= %f <= %f, active set size %d"%(crow,p[crow],Cx[crow],q[crow],len(activeSet))
		candidates.remove(crow)
		activeSet.append(crow)
		activeRhs.append(cval)
		#form active set matrices
		if sparse.isspmatrix_coo(C) or sparse.isspmatrix_dia(C):
			C = C.tocsr()
		Atemp = sparse.vstack([A]+[C[crow,:] for crow in activeSet])
		btemp = np.hstack((b,activeRhs))
		#solve for A'x ~= b' with starting point x0 that satisfies Ax=b
		#for old active set
		#Let x=y+x0
		#Solve for y that solves A'y ~= b'-A'x0
		Atempx = Atemp.dot(x)
		(y,istop,itn,normr,normar,norma,conda,normx) = sparse.linalg.lsmr(Atemp,btemp-Atempx)
		if normr > 1e-4:
			#solve (AtA)x+C^T z = At b, Cx=d
			if AtA == None:
				AtA = A.T.dot(A)
			Ca = sparse.vstack([C[crow,:] for crow in activeSet])
			kktMatrix = sparse.vstack(sparse.hstack((AtA,Ca.T)),sparse.hstack((Ca,sparse.csr_matrix((Ca.shape[0],Ca.shape[0])))))
			(xz,istop,itn,normr,normar,norma,conda,normx) = sparse.linalg.lsmr(kktMatrix,np.hstack((np.dot(A.T,b),activeRhs)))
			if normr > 1e-4:
				print "Warning, could not solve for constraints exactly, error",normr
			x = xz[:x.shape[0]]
		else:
			x = x+y
		#(x,istop,itn,normr,normar,norma,conda,normx) = sparse.linalg.lsmr(Atemp,btemp)
		Cx = spdot(C,x)
		pmax,ipmax = max((p[i]-Cx[i],i) for i in candidates)
		qmax,iqmax = max((Cx[i]-q[i],i) for i in candidates)
	chtol = 1e-7
	for i in xrange(len(x)):
		if Cx[i] < p[i]-chtol or Cx[i] > q[i]+chtol:
			print "Warning, constraint violation %d: %f <= %f <= %f"%(i,p[i],Cx[i],q[i])
	return (x,activeSet,activeRhs)
Пример #59
0
	def solve(self, q,dq,dt):
		"""Takes sensed q,dq, timestep dt and returns qdes and dqdes
		in joint space.
		"""

		for task in self.taskList:
			task.updateState(q,dq,dt)
		# priority 1
		if not hasattr(self,'timingStats'):
			self.timingStats = defaultdict(int)
		self.timingStats['count'] += 1
		t1 = time.time()
		J1 = self.getStackedJacobian(q,dq,1)
		v1 = self.getStackedVelocity(q,dq,dt,1)
		(A,b) = self.getMotionModel(q,dq,dt)
		if self.activeDofs != None:
			A = select_cols(A,self.activeDofs)
		if sparse.isspmatrix_coo(A) or sparse.isspmatrix_dia(A):
			A = A.tocsr()
		t2 = time.time()
		self.timingStats['get jac/vel p1'] += t2-t1
		
		J2 = self.getStackedJacobian(q,dq,2)
		if J2 is not None:
			V2 = self.getStackedVelocity(q,dq,dt,2)
		t3 = time.time()
		self.timingStats['get jac/vel p2'] += t3-t2

		#compute velocity limits
		vmax = self.robot.getVelocityLimits()
		vmin = vectorops.mul(vmax,-1.0)
		amax = self.robot.getAccelerationLimits()
		vref = dq if self.ulast == None else self.ulast
		for i,(v,vm,am) in enumerate(zip(vref,vmin,amax)):
			if v-dt*am > vm:
				vmin[i] = v-dt*am
			elif v < vm:
				#accelerate!
				vmin[i] = min(vm,v+dt*am)
		for i,(v,vm,am) in enumerate(zip(vref,vmax,amax)):
			if v-dt*am < vm:
				vmax[i] = v+dt*am
			elif v > vm:
				#decelerate!
				vmax[i] = max(vm,v-dt*am)
		for i,(l,u) in enumerate(zip(vmin,vmax)):
			assert l <= u
			if l > 0 or u < 0:
				print "Moving link:",self.robot.getLink(i).getName(),"speed",vref[i]
		#print zip(vmin,vmax)
		Aumin = np.array(vmin) - b
		Aumax = np.array(vmax) - b
		#print zip(Aumin.tolist(),Aumax.tolist())
			
		J1A = J1.dot(A)
		J1b = J1.dot(b)
		if J2 == None:
			#just solve constrained least squares
			#J1*(A*u+b) = v1
			#vmin < A*u + b < vmax
			u1 = constrained_lsqr(J1A,v1-J1b,A,Aumin,Aumax)[0]
			u2 = [0.0]*len(u1)
			t4 = time.time()
			self.timingStats['pinv jac p1'] += t4-t3
		else:			
			#solve equality constrained least squares
			#dq = A*u + b
			#J1*dq = v1
			#J1*A*u + J1*b = v1
			#least squares solve for u1:
			#J1*A*u1 = v1 - J1*b
			#vmin < A*u1 + b < vmax
			#need u to satisfy
			#Aact*u = bact
			#we know that u1 satisfies Aact*u = bact
			#let u = u1+u2
			#=> u2 = (I - Aact^+ Aact) z = N*z
			#least squares solve for z:
			#J2*A*(u1+u2+b) = v2
			#J2*A*N z = v2 - J2*(A*u1+b)
			(u1, active, activeRhs) = constrained_lsqr(J1A,v1-J1b,A,Aumin,Aumax)
			Aact = sparse.vstack([J1A]+[A[crow,:] for crow in active]).todense()
			#bact = np.hstack((v1-J1b,activeRhs))
			J1Ainv = np.linalg.pinv(Aact)
			dq1 = A.dot(u1)+b
			if len(active)>0:
				print "Priority 1 active constraints:"
				for a in active:
					print self.robot.getLink(a).getName(),vmin[a],dq1[a],vmax[a]

			r1 = J1.dot(dq1)-v1
			print "Op space controller solve"
			print "  Residual 1",np.linalg.norm(r1)

			# priority 2
			N = np.eye(len(dq)) - np.dot(J1Ainv, Aact)
			t4 = time.time()
			self.timingStats['pinv jac p1'] += t4-t3

			u2 = [0.0]*len(u1)
			#print "  Initial priority 2 task error",np.linalg.norm(V2-J2.dot(dq1))
			J2A = J2.dot(A)
			J2AN = J2A.dot(N)
			AN = sparse.csr_matrix(np.dot(A.todense(),N))
			#Note: N destroys sparsity
			V2_m_resid = np.ravel(V2 - J2.dot(dq1))
			(z,active,activeRhs) = constrained_lsqr(J2AN,V2_m_resid,AN,vmin-dq1,vmax-dq1)
			t5 = time.time()
			self.timingStats['ls jac p2'] += t5-t4
			u2 = np.ravel(np.dot(N, z))

			#debug, should be close to zero
			#print "  Nullspace projection error:",np.linalg.norm(J1A.dot(u2))
			#this is the error in the nullspace of the first priority tasks
			dq2 = A.dot(u2) + dq1

			#debug, should be equal to residual 2 printout above
			print "  Residual 2",np.linalg.norm(J2.dot(dq2)-V2)
			#debug should be close to zero
			#print "  Residual 2 in priority 1 frame",np.linalg.norm(J1.dot(dq2)-v1)
			if len(active)>0:
				print "Priority 2 active constraints:"
				for a in active:
					print self.robot.getLink(a).getName(),vmin[a],dq2[a],vmax[a]

		#compose the velocities together
		u = np.ravel((u1 + u2))
		dqpred = A.dot(u)+b
		print "  Residual 1 final",np.linalg.norm(np.ravel(J1.dot(dqpred))-v1)
		if J2 != None:
			print "  Residual 2 final",np.linalg.norm(np.ravel(J2.dot(dqpred))-V2)
		
		u = u.tolist()
		#if self.activeDofs != None:
		#	print "dqdes:",[self.dqdes[v] for v in self.activeDofs]
		self.qdes = vectorops.madd(q, u, dt)
		self.ulast = u

		t6 = time.time()
		self.timingStats['total']+=t6-t1
		if self.timingStats['count']%10==0:
			n=self.timingStats['count']
			print "OpSpace times (ms): vel/jac 1 %.2f inv 1 %.2f vel/jac 2 %.2f inv 2 %.2f total %.2f"%(self.timingStats['get jac/vel p1']/n*1000,self.timingStats['pinv jac p1']/n*1000,self.timingStats['get jac/vel p2']/n*1000,self.timingStats['ls jac p2']/n*1000,self.timingStats['total']/n*1000)
			
		return (self.qdes,u)
Пример #60
0
    def fit(self, matrix, epochs=5, no_threads=2, verbose=False):
        """
        Estimate the word embeddings.

        Parameters:
        - scipy.sparse.coo_matrix matrix: coocurrence matrix
        - int epochs: number of training epochs
        - int no_threads: number of training threads
        - bool verbose: print progress messages if True
        """

        shape = matrix.shape

        if (len(shape) != 2 or
            shape[0] != shape[1]):
            raise Exception('Coocurrence matrix must be square')

        if not sp.isspmatrix_coo(matrix):
            raise Exception('Coocurrence matrix must be in the COO format')

        self.word_vectors = ((np.random.rand(shape[0],
                                             self.no_components) - 0.5)
                                             / self.no_components)
        self.word_biases = np.zeros(shape[0], 
                                    dtype=np.float64)

        self.vectors_sum_gradients = np.ones_like(self.word_vectors)
        self.biases_sum_gradients = np.ones_like(self.word_biases)

        shuffle_indices = np.arange(matrix.nnz, dtype=np.int32)

        if verbose:
            print('Performing %s training epochs '
                  'with %s threads' % (epochs, no_threads))
                  
            # initialize lists that will hold the learning rates
            vectors_gradients = list()
            biases_gradients = list()

        for epoch in range(epochs):

            if verbose:
                starttime = dt.datetime.now()
                print('Epoch %s' % epoch)

            # Shuffle the coocurrence matrix
            np.random.shuffle(shuffle_indices)

            fit_vectors(self.word_vectors,
                        self.vectors_sum_gradients,
                        self.word_biases,
                        self.biases_sum_gradients,
                        matrix.row,
                        matrix.col,
                        matrix.data,
                        shuffle_indices,
                        self.learning_rate,
                        self.max_count,
                        self.alpha,
                        self.max_loss,
                        int(no_threads))
                        
            if not np.isfinite(self.word_vectors).all():
                raise Exception('Non-finite values in word vectors. '
                                'Try reducing the learning rate or the '
                                'max_loss parameter.')
                                
            if verbose:
                vectors_gradients.append(np.mean([self.learning_rate/np.sqrt(a) for a in self.vectors_sum_gradients]))
                biases_gradients.append(np.mean(self.learning_rate/np.sqrt(self.biases_sum_gradients)))

                endtime = dt.datetime.now()
                print('    Epoch %s took %s minutes' % (epoch, (endtime-starttime).total_seconds() / 60))
                
        if verbose:
            # show the learning rates
            plt.plot(vectors_gradients, 'k--', biases_gradients, 'k:')
            plt.legend(('word vectors', 'word biases'))
            plt.xlabel('Epoch')
            plt.ylabel('Mean learning rate')
            plt.title('Change in mean learning rates across epochs')
            plt.show()