Пример #1
0
def get_sample_data(n_sess, full_brain=False, subj=1):
    """
    Download the data for the current session and subject

    Parameters
    ----------
    n_sess: int
        number of session, one of {0, 1, 2, 3, 4}
    subj: int 
        number of subject, one of {1, 2}
    """
    DIR = tempfile.mkdtemp()
    ds = np.DataSource(DIR)
    BASEDIR = 'http://fa.bianp.net/projects/hrf_estimation/data'
    BASEDIR_COMMON = BASEDIR + '/data_common/'
    if full_brain:
        BASEDIR += '/full_brain'
    BASEDIR_SUBJ = BASEDIR + '/data_subj%s/' % subj
    event_matrix = io.mmread(ds.open(
        BASEDIR_COMMON + 'event_matrix.mtx')).toarray()
    print('Downloading BOLD signal')
    voxels = np.load(ds.open(
        BASEDIR_SUBJ + 'voxels_%s.npy' % n_sess))
    # print('Downloading Scatting Stim')
    # scatt_stim = np.load(ds.open(
    #     BASEDIR_SUBJ + 'scatt_stim_%s.npy' % n_sess))

    em = sparse.coo_matrix(event_matrix)
    fir_matrix = utils.convolve_events(event_matrix, np.eye(HRF_LENGTH))
    events_train = sparse.block_diag([event_matrix] * 5).toarray()
    conditions_train = sparse.coo_matrix(events_train).col
    onsets_train = sparse.coo_matrix(events_train).row

    return voxels, conditions_train, onsets_train
Пример #2
0
def test_cross_val_score_fit_params():
    clf = MockClassifier()
    n_samples = X.shape[0]
    n_classes = len(np.unique(y))

    W_sparse = coo_matrix((np.array([1]), (np.array([1]), np.array([0]))),
                          shape=(10, 1))
    P_sparse = coo_matrix(np.eye(5))

    DUMMY_INT = 42
    DUMMY_STR = '42'
    DUMMY_OBJ = object()

    def assert_fit_params(clf):
        # Function to test that the values are passed correctly to the
        # classifier arguments for non-array type

        assert_equal(clf.dummy_int, DUMMY_INT)
        assert_equal(clf.dummy_str, DUMMY_STR)
        assert_equal(clf.dummy_obj, DUMMY_OBJ)

    fit_params = {'sample_weight': np.ones(n_samples),
                  'class_prior': np.ones(n_classes) / n_classes,
                  'sparse_sample_weight': W_sparse,
                  'sparse_param': P_sparse,
                  'dummy_int': DUMMY_INT,
                  'dummy_str': DUMMY_STR,
                  'dummy_obj': DUMMY_OBJ,
                  'callback': assert_fit_params}
    cross_val_score(clf, X, y, fit_params=fit_params)
Пример #3
0
def mesh_edges(tris):
    """Returns sparse matrix with edges as an adjacency matrix

    Parameters
    ----------
    tris : array of shape [n_triangles x 3]
        The triangles

    Returns
    -------
    edges : sparse matrix
        The adjacency matrix
    """
    npoints = np.max(tris) + 1
    ntris = len(tris)
    a, b, c = tris.T
    edges = sparse.coo_matrix((np.ones(ntris), (a, b)),
                                            shape=(npoints, npoints))
    edges = edges + sparse.coo_matrix((np.ones(ntris), (b, c)),
                                            shape=(npoints, npoints))
    edges = edges + sparse.coo_matrix((np.ones(ntris), (c, a)),
                                            shape=(npoints, npoints))
    edges = edges.tocsr()
    edges = edges + edges.T
    return edges
Пример #4
0
def sparseMatrix2coo(A, rowOffset=0, colOffset=0):
    """Convert SparseMatrix to scipy.coo_matrix.

    Parameters
    ----------
    A: pg.SparseMapMatrix | pg.SparseMatrix
        Matrix to convert from.

    Returns
    -------
    mat: scipy.coo_matrix
        Matrix to convert into.
    """
    from scipy.sparse import coo_matrix
    vals = pg.RVector()
    rows = pg.IndexArray([0])
    cols = pg.IndexArray([0])
    if isinstance(A, pg.SparseMatrix):
        C = pg.RSparseMapMatrix(A)
        C.fillArrays(vals=vals, rows=rows, cols=cols)
        rows += rowOffset
        cols += colOffset
        return coo_matrix((vals, (rows, cols)), shape=(A.rows(), A.cols()))
    elif isinstance(A, pg.SparseMapMatrix):
        A.fillArrays(vals, rows, cols)
        rows += rowOffset
        cols += colOffset
        return coo_matrix((vals, (rows, cols)), shape=(A.rows(), A.cols()))

    return coo_matrix(A)
Пример #5
0
    def _assemble(self, mu=None):
        g = self.grid
        bi = self.boundary_info

        if g.dim > 2:
            raise NotImplementedError

        if bi is None or not bi.has_robin or self.robin_data is None:
            return coo_matrix((g.size(g.dim), g.size(g.dim))).tocsc()

        RI = bi.robin_boundaries(1)
        if g.dim == 1:
            robin_c = self.robin_data[0](g.centers(1)[RI], mu=mu)
            I = coo_matrix((robin_c, (RI, RI)), shape=(g.size(g.dim), g.size(g.dim)))
            return csc_matrix(I).copy()
        else:
            xref = g.quadrature_points(1, order=self.order)[RI]
            # xref(robin-index, quadraturepoint-index)
            if self.robin_data[0].shape_range == ():
                robin_c = self.robin_data[0](xref, mu=mu)
            else:
                robin_elements = g.superentities(1, 0)[RI, 0]
                robin_indices = g.superentity_indices(1, 0)[RI, 0]
                normals = g.unit_outer_normals()[robin_elements, robin_indices]
                robin_values = self.robin_data[0](xref, mu=mu)
                robin_c = np.einsum('ei,eqi->eq', normals, robin_values)

            # robin_c(robin-index, quadraturepoint-index)
            q, w = line.quadrature(order=self.order)
            SF = np.squeeze(np.array([1 - q, q]))
            SF_INTS = np.einsum('ep,pi,pj,e,p->eij', robin_c, SF, SF, g.integration_elements(1)[RI], w).ravel()
            SF_I0 = np.repeat(g.subentities(1, g.dim)[RI], 2).ravel()
            SF_I1 = np.tile(g.subentities(1, g.dim)[RI], [1, 2]).ravel()
            I = coo_matrix((SF_INTS, (SF_I0, SF_I1)), shape=(g.size(g.dim), g.size(g.dim)))
            return csc_matrix(I).copy()
Пример #6
0
    def test_bmat(self):

        A = coo_matrix([[1,2],[3,4]])
        B = coo_matrix([[5],[6]])
        C = coo_matrix([[7]])
        D = coo_matrix((0,0))

        expected = matrix([[1, 2, 5],
                           [3, 4, 6],
                           [0, 0, 7]])
        assert_equal(construct.bmat([[A,B],[None,C]]).todense(), expected)

        expected = matrix([[1, 2, 0],
                           [3, 4, 0],
                           [0, 0, 7]])
        assert_equal(construct.bmat([[A,None],[None,C]]).todense(), expected)

        expected = matrix([[0, 5],
                           [0, 6],
                           [7, 0]])
        assert_equal(construct.bmat([[None,B],[C,None]]).todense(), expected)

        expected = matrix(np.empty((0,0)))
        assert_equal(construct.bmat([[None,None]]).todense(), expected)
        assert_equal(construct.bmat([[None,D],[D,None]]).todense(), expected)

        # test bug reported in gh-5976
        expected = matrix([[7]])
        assert_equal(construct.bmat([[None,D],[C,None]]).todense(), expected)

        # test failure cases
        assert_raises(ValueError, construct.bmat, [[A],[B]])
        assert_raises(ValueError, construct.bmat, [[A,C]])
Пример #7
0
def lowerBidiagonalMatrix(m, n):
    # This is a simple example for testing LSMR.
    # It uses the leading m*n submatrix from
    # A = [ 1
    #       1 2
    #         2 3
    #           3 4
    #             ...
    #               n ]
    # suitably padded by zeros.
    #
    # 04 Jun 2010: First version for distribution with lsmr.py
    if m <= n:
        row = hstack((arange(m, dtype=int), \
                      arange(1, m, dtype=int)))
        col = hstack((arange(m, dtype=int), \
                      arange(m-1, dtype=int)))
        data = hstack((arange(1, m+1, dtype=float), \
                       arange(1,m, dtype=float)))
        return coo_matrix((data, (row, col)), shape=(m,n))
    else:
        row = hstack((arange(n, dtype=int), \
                      arange(1, n+1, dtype=int)))
        col = hstack((arange(n, dtype=int), \
                      arange(n, dtype=int)))
        data = hstack((arange(1, n+1, dtype=float), \
                       arange(1,n+1, dtype=float)))
        return coo_matrix((data,(row, col)), shape=(m,n))
Пример #8
0
    def get_problem_for_Q(self,p,r):
        """
        Constructs second-stage quadratic problem
        in the form

        minimize(x)   (1/2)x^THx + g^Tx
        subject to    Ax = b
                      l <= x <= u,

        where x = (q,w,s,z).

        Parameters
        ----------
        p : generator powers
        r : renewable powers

        Returns
        -------
        problem : QuadProblem
        """

        # Constatns
        num_p = self.num_p
        num_w = self.num_w
        num_r = self.num_r
        num_bus = self.num_bus
        num_br = self.num_br
        Ow = coo_matrix((num_w,num_w))
        Os = coo_matrix((num_r,num_r))
        Oz = coo_matrix((num_br,num_br))
        Iz = eye(num_br,format='coo')
        ow = np.zeros(num_w)
        os = np.zeros(num_r)
        oz = np.zeros(num_br)
        cost_factor = self.parameters['cost_factor']

        H1 = self.H1/cost_factor
        g1 = self.g1/cost_factor
        
        # Form QP problem
        H = bmat([[H1,None,None,None],  # q: gen power adjustments
                  [None,Ow,None,None],  # w: bus voltage angles
                  [None,None,Os,None],  # s: curtailed renewable powers
                  [None,None,None,Oz]], # z: slack variables for thermal limits
                 format='coo')
        g = np.hstack((g1,ow,os,oz))
        A = bmat([[self.G,-self.A,self.R,None],
                  [None,self.J,None,-Iz]],format='coo')
        b = np.hstack((self.b-self.G*p,oz))
        l = np.hstack((self.p_min-p,
                       self.w_min,
                       os,
                       self.z_min))
        u = np.hstack((self.p_max-p,
                       self.w_max,
                       r,
                       self.z_max))
        
        # Return
        return QuadProblem(H,g,A,b,l,u)
Пример #9
0
def main():
    print "Solve small matrix..."
    R = array([0, 0, 1, 1, 1, 2, 2])
    C = array([0, 1, 0, 1, 2, 1, 2])
    V = array([4.0, -1.0, -1.0,  4.0, -1.0, -1.0, 4.0])
    b = array([3.0, 2.0, 3.0])
    A = coo_matrix((V, (R, C)), shape=(3, 3))
    # convert to csr format for efficiency
    x = spsolve(A.tocsr(), b)
    print "x = ", x

    print "Solve psd matrix..."
    # skip the first row (n, nnz)
    A = numpy.genfromtxt('../data/psd.txt', skiprows=1)
    b = numpy.genfromtxt('../data/b.txt')
    coo = coo_matrix((A[:, 2], (A[:, 0], A[:, 1])))
    x = spsolve(coo.tocsr(), b)
    print 'x = ', x

    print "Solve big matrix..."
    A = numpy.genfromtxt('../data/mat_helmholtz.txt', skiprows=1)
    coo = coo_matrix((A[:, 2], (A[:, 0], A[:, 1])))
    n = coo.shape[0]
    b = numpy.ones(n)
    x = spsolve(coo.tocsr(), b)
    print 'x = ', x
Пример #10
0
def real_case():
    data = real_data(filename)
    D,R = read_tensor(filename)
    T = np.zeros((len(D),3,3))
    for i in xrange(len(D)):
        T[i,:,:] = np.dot(np.dot(R[i,:,:],D[i,:,:]),R[i,:,:].T)
    plot_2d(T[:,:2,:2])
    del T,D,R
    print np.max(data[:,0,0]),np.max(data[:,1,1]),np.max(data[:,2,2])
    for i in xrange(n*n):
        if data[i,0,0]<-20 or math.isnan(data[i,0,0]) == True:
            data[i,0,0] = np.mean(data[:,0,0])
        if data[i,1,1]<-20 or math.isnan(data[i,0,0]) == True:
            data[i,1,1] = np.mean(data[:,1,1])
    y = sparse.coo_matrix(data[:,0,0]).transpose()    
    f = main(y)
    d = np.zeros((n*n,2,2))
    d[:,0,0] = f[:,0]
    y = sparse.coo_matrix(data[:,0,1]).transpose()    
    f = main(y)
    d[:,0,1] = f[:,0]
    d[:,1,0] = f[:,0]
    y = sparse.coo_matrix(data[:,1,1]).transpose()    
    f = main(y)
    d[:,1,1] = f[:,0]
    res = from_log_euclidian(d)
    plot_2d(res)
    return res
Пример #11
0
def co_labelling(z, kmax=None, kmin=None):
    """
    return a sparse co-labelling matrix given the label vector z

    Parameters
    ----------
    z: array of shape(n_samples),
       the input labels
    kmax: int, optional,
          considers only the labels in the range [0, kmax[

    Returns
    -------
    colabel: a sparse coo_matrix,
             yields the co labelling of the data
             i.e. c[i,j]= 1 if z[i]==z[j], 0 otherwise
    """
    from scipy.sparse import coo_matrix
    n = z.size
    colabel = coo_matrix((n, n))

    if kmax == None:
        kmax = z.max() + 1

    if kmin == None:
        kmin = z.min() - 1

    for  k in np.unique(z):
        if (k < kmax) & (k > kmin):
            i = np.array(np.nonzero(z == k))
            row = np.repeat(i, i.size)
            col = np.ravel(np.tile(i, i.size))
            data = np.ones((i.size) ** 2)
            colabel = colabel + coo_matrix((data, (row, col)), shape=(n, n))
    return colabel
Пример #12
0
def function(lamb,y):    
    b = PETSc.Vec().createSeq(m*m)
    b.setValues(range(m*m), b_matrix().transpose().dot(y).toarray())
    D = np.eye(m*m)
    D[:1,:] = 0
    D = sparse.coo_matrix(D).dot(lamb)
    B = b_matrix().transpose().dot(b_matrix())
    B = D + B
    A = PETSc.Mat()
    A.create(comm)
    A.setSizes([m*m, m*m])
    A.setType('mpidense')
    A.setUp()
    A.setValues(range(m*m),range(m*m),B.toarray())
    A.assemblyBegin()
    A.assemblyEnd()
    x = PETSc.Vec().createSeq(m*m)
    ksp = PETSc.KSP().create()
    ksp.setOperators(A)
    ksp.setFromOptions()
    ksp.setType('cg')
    print 'Solving with:', ksp.getType()
    ksp.solve(b, x)  
    #SS.setValues(range(m*m), range(m*m),B.toarray())
    #S = sparse.kron(S,SX)
    print 'Converged in', ksp.getIterationNumber(), 'iterations.'
    x = sparse.coo_matrix(x.getArray())
    fun = b_matrix().dot(x.transpose())
    return fun
Пример #13
0
def test_case():
    data = test_data()
    x = np.linspace(0,1,n)
    Z = np.reshape(data[:,0,0],(n,n))
    #te = np.reshape(test,(num,num))
#    fig = plt.figure()
#    ax = fig.add_subplot(111,projection ='3d')
#    X,Y = np.meshgrid(x,x)
#    ax.plot_surface(X,Y,Z, rstride =4, cstride =4, color ='b')
#    for i in xrange(n*n):
#        if data[i,0,0]<-20 or math.isnan(data[i,0,0]) == True:
#            data[i,0,0] = -13.5
            
    #print len(data[:,0,0])
   # y = sparse.coo_matrix(test_function(n)).transpose()
    y = sparse.coo_matrix(data[:,0,0]).transpose()    
    f = main(y)
    d = np.zeros((n*n,2,2))
    d[:,0,0] = f[:,0]
    y = sparse.coo_matrix(data[:,0,1]).transpose()    
    f = main(y)
    d[:,0,1] = f[:,0]
    d[:,1,0] = f[:,0]
    y = sparse.coo_matrix(data[:,1,1]).transpose()    
    f = main(y)
    d[:,1,1] = f[:,0]
    res = from_log_euclidian(d)
    plot_2d(res)
    return res
Пример #14
0
def test_input_dtypes():

    dtypes = (np.int32,
              np.int64,
              np.float32,
              np.float64)

    no_users, no_items = (10, 100)
    no_features = 20

    for dtype in dtypes:
        train = sp.coo_matrix((no_users,
                               no_items),
                              dtype=dtype)

        user_features = sp.coo_matrix((no_users,
                                       no_features),
                                      dtype=dtype)
        item_features = sp.coo_matrix((no_items,
                                       no_features),
                                      dtype=dtype)

        model = LightFM()
        model.fit_partial(train,
                          user_features=user_features,
                          item_features=item_features)

        model.predict(np.random.randint(0, no_users, 10).astype(np.int32),
                      np.random.randint(0, no_items, 10).astype(np.int32),
                      user_features=user_features,
                      item_features=item_features)
Пример #15
0
def setup_spgemm_scipy(size, sparsity = None, context = None, dtype = np.float32):
    WITH_SCIPY = True
    try: import scipy.sparse as sp
    except: WITH_SCIPY = False
    if not WITH_SCIPY:
        raise UnsupportedPlatformException("scipy.sparse")
    import math

    nnz = int(math.ceil((size*size)*sparsity))
    mod = nnz

    values = np.array([], dtype=dtype)
    max_size = 10**6
    while mod > 0:
        if mod < max_size:
            values = np.append(values, np.ones((mod,)).astype(dtype) * 0.6)
            mod = 0
        else:
            values = np.append(values, np.ones((max_size,)).astype(dtype) * 0.6)
            mod -= max_size
    rows = np.random.randint(0, size-1, size=nnz)
    cols = np.random.randint(0, size-1, size=nnz)

    A = sp.coo_matrix((values, (rows, cols)), shape=(size, size), dtype=dtype)
    B = sp.coo_matrix((values, (rows, cols)), shape=(size, size), dtype=dtype)

    return A, B
Пример #16
0
def mesh_edges(faces):
    """Get sparse matrix with edges as an adjacency matrix.

    This function is a copy from the PySurfer package. See :
    https://github.com/nipy/PySurfer/blob/master/surfer/utils.py

    Parameters
    ----------
    faces : array_like
        The mesh faces of shape (n_faces, 3).
    Returns
    -------
    edges : sparse matrix
        The adjacency matrix.
    """
    from scipy import sparse
    npoints = np.max(faces) + 1
    nfaces = len(faces)
    a, b, c = faces.T
    edges = sparse.coo_matrix((np.ones(nfaces), (a, b)),
                              shape=(npoints, npoints))
    edges = edges + sparse.coo_matrix((np.ones(nfaces), (b, c)),
                                      shape=(npoints, npoints))
    edges = edges + sparse.coo_matrix((np.ones(nfaces), (c, a)),
                                      shape=(npoints, npoints))
    edges = edges + edges.T
    edges = edges.tocoo()
    return edges
Пример #17
0
    def allocate(self):
        
        Annz = self.A_nnz
        Gnnz = self.G_nnz
        Jnnz = self.J_nnz
        Arows = self.A_row
        Grows = self.G_row
        Jrows = self.J_rows
        Hnnz = self.H_nnz
        num_vars = self.network.num_vars

        self.set_b(np.zeros(Arows))
        self.set_A(coo_matrix((np.zeros(Annz),(Annz*[0],Annz*[0])),
                              shape=(Arows,num_vars)))

        self.set_l(np.zeros(Grows))
        self.set_u(np.zeros(Grows))
        self.set_G(coo_matrix((np.zeros(Gnnz),(Gnnz*[0],Gnnz*[0])),
                              shape=(Grows,num_vars)))

        self.set_f(np.zeros(Jrows))
        self.set_J(coo_matrix((np.zeros(Jnnz),(Jnnz*[0],Jnnz*[0])),
                              shape=(Jrows,num_vars)))
        self.allocate_H_array(Jrows)
        for i in range(Jrows):
            self.set_H_single(i,
                              coo_matrix((np.zeros(Hnnz[i]),(Hnnz[i]*[0],Hnnz[i]*[0])),
                                         shape=(num_vars,num_vars)))
Пример #18
0
    def test_iqp_random(self):
        
        solver = opt.opt_solver.OptSolverIQP()
        solver.set_parameters({'tol': 1e-8,
                               'quiet': True})

        self.assertRaises(Exception,solver.solve,4)

        for i in range(10):

            n = 50
            m = 10
            p = 20
            A = coo_matrix(np.random.randn(m,n))
            b = np.random.randn(m)
            g = np.random.randn(n)
            B = np.matrix(np.random.randn(p,n))
            H = coo_matrix(B.T*B)
            l = np.random.randn(n)
            u = l + 10*np.random.rand()
            
            prob = opt.opt_solver.QuadProblem(H,g,A,b,l,u)

            solver.solve(prob)

            x = solver.get_primal_variables()
            lam,nu,mu,pi = solver.get_dual_variables()

            eps = 1e-10
            self.assertLess(norm(g + H*x - A.T*lam + mu - pi),eps)
            self.assertLess(norm(A*x-b),eps)
            self.assertTrue(np.all(x <= u))
            self.assertTrue(np.all(x >= l))
            self.assertTrue(norm(mu*(u-x),np.inf),eps)
            self.assertTrue(norm(pi*(x-l),np.inf),eps)
def test_reshape(old_shape, new_shape, stride_only=False):
    blob_in0 = 'col'
    blob_out0 = 'col_out'

    blob_in1 = 'row'
    blob_out1 = 'row_out'

    old_shape_for_op = (-1, old_shape[1]) if stride_only else old_shape

    op = core.CreateOperator('SparseMatrixReshape',
                             [blob_in0, blob_in1],
                             [blob_out0, blob_out1],
                             old_shape=old_shape_for_op,
                             new_shape=new_shape)

    A = np.random.random_sample(old_shape)
    A[np.random.random_sample(old_shape) > .5] = 0
    A_coo = coo_matrix(A)
    old_row, old_col = A_coo.row, A_coo.col

    workspace.FeedBlob(blob_in0, old_col.astype(np.int64))
    workspace.FeedBlob(blob_in1, old_row.astype(np.int32))

    workspace.RunOperatorOnce(op)

    A_new_coo = coo_matrix(A.reshape(new_shape))
    new_row, new_col = A_new_coo.row, A_new_coo.col

    col_out = workspace.FetchBlob(blob_out0)
    row_out = workspace.FetchBlob(blob_out1)

    np.testing.assert_array_equal(col_out, new_col)
    np.testing.assert_array_equal(row_out, new_row)
Пример #20
0
    def histogram_from_ijv(parent_ijv, child_ijv):
        """Find per pixel overlap of parent labels and child labels,
        stored in ijv format.

        parent_ijv - the parents which contain the children
        child_ijv - the children to be mapped to a parent

        Returns a 2d array of overlap between each parent and child.
        Note that the first row and column are empty, as these
        correspond to parent and child labels of 0.

        """
        parent_count = 0 if (parent_ijv.shape[0] == 0) else np.max(parent_ijv[:, 2])
        child_count = 0 if (child_ijv.shape[0] == 0) else np.max(child_ijv[:, 2])

        if parent_count == 0 or child_count == 0:
            return np.zeros((parent_count + 1, child_count + 1), int)

        dim_i = max(np.max(parent_ijv[:, 0]), np.max(child_ijv[:, 0])) + 1
        dim_j = max(np.max(parent_ijv[:, 1]), np.max(child_ijv[:, 1])) + 1
        parent_linear_ij = parent_ijv[:, 0] + dim_i * parent_ijv[:, 1]
        child_linear_ij = child_ijv[:, 0] + dim_i * child_ijv[:, 1]

        parent_matrix = coo_matrix((np.ones((parent_ijv.shape[0],)),
                                    (parent_ijv[:, 2], parent_linear_ij)),
                                   shape=(parent_count + 1, dim_i * dim_j))
        child_matrix = coo_matrix((np.ones((child_ijv.shape[0],)),
                                   (child_linear_ij, child_ijv[:, 2])),
                                  shape=(dim_i * dim_j, child_count + 1))
        # I surely do not understand the sparse code.  Converting both
        # arrays to csc gives the best peformance... Why not p.csr and
        # c.csc?
        return (parent_matrix.tocsc() * child_matrix.tocsc()).toarray()
Пример #21
0
def loadMVLENS():
    print "Parsing 1M movielens data . . ."
    data_array = loadtxt("/home/meawoppl/datasets/movielens-1m/ratings.dat", delimiter="::")
    print "\tDone. . . "
    
    print "Converting to sparse matrix format . . ."
    users, movies, ratings, trash = data_array.T
    ratings -= ratings.mean()
    del trash

    print users.min(), movies.min()
    user_movie = c_[users, movies].T
    m = sparse.coo_matrix((ratings, user_movie))
    print "\tDone"

    tm = array(m.todense())
    tmm = 1*(tm != 0)


    # remove nonparticaptory users . . .
    tm = tm[tmm.sum(axis=1)!=0, :]
    tmm = tmm[tmm.sum(axis=1)!=0, :]
    tm = tm[:, tmm.sum(axis=0)!=0]
    tmm = tmm[:, tmm.sum(axis=0)!=0]

    row_mean = tm.sum(axis=1) / tmm.sum(axis=1)
    col_mean = tm.sum(axis=0) / tmm.sum(axis=0)

    
    m=sparse.coo_matrix(tm)

    return m, row_mean, col_mean
Пример #22
0
def set_src_feat_vector():
    
    global X, sizeTrData, devAndTestSrcDict, TRAIN_FILE
    
    sizeTrData = get_numlines(TRAIN_FILE)
    X1 = numpy.zeros((math.ceil(sizeTrData/2), len(devAndTestSrcDict)), dtype=int)
    
    for numLine, line in enumerate(open(TRAIN_FILE, 'r')):
        
        if numLine == math.ceil(sizeTrData/2):
            break
        
        src, tgt = line.strip().split('|||')
        for word in src.split():
            if word in devAndTestSrcDict:
                X1[numLine][devAndTestSrcDict[word]] += 1
                
    X1 = coo_matrix(X1)
    X2 = numpy.zeros((sizeTrData-math.ceil(sizeTrData/2), len(devAndTestSrcDict)), dtype=int)
    
    for numLine, line in enumerate(open(TRAIN_FILE, 'r')):
        
        if numLine >= math.ceil(sizeTrData/2):
            src, tgt = line.strip().split('|||')
            for word in src.split():
                if word in devAndTestSrcDict:
                    X2[numLine-math.ceil(sizeTrData/2)][devAndTestSrcDict[word]] += 1
                
    X2 = coo_matrix(X2)
    X = vstack([X1, X2])
Пример #23
0
    def test_synthesis_mat(self):
        """
        m1 =
        [1, 0, 2, -1]
        [0, 0, 3,  0]
        [4, 5, 6, -2]
        m2 =
        [1,-2, -5]
        [0, 3, -6]
        """
        row = np.array([0, 0, +0, 1, 2, 2, 2, 2])
        col = np.array([0, 2, +3, 2, 0, 1, 2, 3])
        dat = np.array([1, 2, -1, 3, 4, 5, 6, -2])
        m1 = coo_matrix((dat, (row, col)))
        row = np.array([+0, +0, +0, +1, +1])
        col = np.array([+0, +1, +2, +1, +2])
        dat = np.array([+1, -2, -5, +3, -6])
        m2 = coo_matrix((dat, (row, col)))

        self.assertEqual((3, 4), m1.shape)
        self.assertEqual((2, 3), m2.shape)

        m3 = synthesis_mat(m1, m2).toarray()
        self.assertEqual((6, 12), m3.shape)
        self.assertAlmostEqual(2.0, m3[0, 2])
        self.assertAlmostEqual(3.0, m3[1, 2])
        self.assertAlmostEqual(-6.0, m3[1, 4 + 2])
        self.assertAlmostEqual(+9.0, m3[3 + 1, 4 + 2])
Пример #24
0
 def __init__(self, X_l, L_l, X_u, random_generator, ** kw):
     """
     Intializes the S3VM optimizer.
     """
     self.__random_generator = random_generator
     # This is a nuisance, but we may need to pad extra dimensions to either X_l or X_u
     # in case the highest feature indices appear only in one of the two data matrices
     if X_l.shape[1] > X_u.shape[1]:
         X_u = sparse.hstack([X_u, sparse.coo_matrix(X_u.shape[0], X_l.shape[1] - X_u.shape[1])])
     elif X_l.shape[1] < X_u.shape[1]:
         X_l = sparse.hstack([X_l, sparse.coo_matrix(X_l.shape[0], X_u.shape[1] - X_u.shape[1])])
     # We vertically stack the data matrices into one big matrix
     X = sparse.vstack([X_l, X_u])
     self.__size_l, self.__size_u, self.__size_n = X_l.shape[0], X_u.shape[0], X_l.shape[0]+ X_u.shape[0]
     x = arr.array('i')
     for l in L_l:
         x.append(int(l))
     self.__YL = mat(x, dtype=np.float64)
     self.__YL = self.__YL.transpose()
     self.__setParameters( ** kw)
     self.__kw = kw
     self.X_l = X_l.tocsr()
     self.X_u = X_u.tocsr()
     self.X = X.tocsr()
     # compute mean of unlabeled patterns
     self.__mean_u = self.X_u.mean(axis=0)
     self.X_u_T = X_u.tocsc().T
     self.X_l_T = X_l.tocsc().T
     self.X_T = X.tocsc().T
Пример #25
0
def HiptmairMatrixSetup(mesh, N, M):

    path = os.path.abspath(os.path.join(inspect.getfile(inspect.currentframe()), ".."))
    gradient_code = open(os.path.join(path, 'DiscreteGradient.cpp'), 'r').read()
    compiled_gradient_module = compile_extension_module(code=gradient_code)

    column =  numpy.zeros(2*mesh.num_edges(), order="C") #, dtype="intc")
    row =  numpy.zeros(2*mesh.num_edges(), order="C") #, dtype="intc")
    data =  numpy.zeros(2*mesh.num_edges(), order="C") #, dtype="intc")

    dataX =  numpy.zeros(2*mesh.num_edges(), order="C")
    dataY =  numpy.zeros(2*mesh.num_edges(), order="C")
    dataZ =  numpy.zeros(2*mesh.num_edges(), order="C")

    tic()
    c = compiled_gradient_module.ProlongationGradsecond(mesh, dataX,dataY,dataZ, data, row, column)
    end = toc()
    print ("{:40}").format("Data for C and P created, time: "), " ==>  ",("{:4f}").format(end)
    # print row
    # print column
    # print  data
    C = coo_matrix((data,(row,column)), shape=(N, M)).tocsr()
    Px = coo_matrix((dataX,(row,column)), shape=(N, M)).tocsr()
    Py = coo_matrix((dataY,(row,column)), shape=(N, M)).tocsr()
    Pz = coo_matrix((dataZ,(row,column)), shape=(N, M)).tocsr()
    return C, [Px,Py,Pz]
def form_prediction_matrix(y_train_pred_proba,
                           y_test_pred,
                           user_label_matrix,
                           annotated_user_ids,
                           non_annotated_user_ids):
    index = user_label_matrix[annotated_user_ids, :] > 0.0
    index = index.toarray()
    y_train_pred_proba_new = np.zeros_like(y_train_pred_proba)
    y_train_pred_proba_new[index] = y_train_pred_proba[index]
    y_train_pred_proba = y_train_pred_proba_new
    y_train_pred_proba = spsp.coo_matrix(y_train_pred_proba, shape=y_train_pred_proba.shape)

    y_test_pred = spsp.coo_matrix(y_test_pred, shape=y_test_pred.shape)

    prediction_matrix_row = np.append(annotated_user_ids[y_train_pred_proba.row], [non_annotated_user_ids[y_test_pred.row, ]])
    prediction_matrix_col = np.append(y_train_pred_proba.col, [y_test_pred.col, ])
    prediction_matrix_data = np.append(y_train_pred_proba.data, [y_test_pred.data, ])

    prediction_matrix = spsp.coo_matrix((prediction_matrix_data,
                                         (prediction_matrix_row, prediction_matrix_col)),
                                        shape=(annotated_user_ids.size + non_annotated_user_ids.size,
                                               user_label_matrix.shape[1]))

    prediction_matrix = spsp.csr_matrix(prediction_matrix)
    prediction_matrix.eliminate_zeros()

    return prediction_matrix
Пример #27
0
def test_multilabel_representation_invariance():
    # Generate some data
    n_classes = 4
    n_samples = 50

    _, y1 = make_multilabel_classification(
        n_features=1, n_classes=n_classes, random_state=0, n_samples=n_samples, allow_unlabeled=True
    )
    _, y2 = make_multilabel_classification(
        n_features=1, n_classes=n_classes, random_state=1, n_samples=n_samples, allow_unlabeled=True
    )

    # To make sure at least one empty label is present
    y1 += [0] * n_classes
    y2 += [0] * n_classes

    y1_sparse_indicator = sp.coo_matrix(y1)
    y2_sparse_indicator = sp.coo_matrix(y2)

    for name in MULTILABELS_METRICS:
        metric = ALL_METRICS[name]

        # XXX cruel hack to work with partial functions
        if isinstance(metric, partial):
            metric.__module__ = "tmp"
            metric.__name__ = name

        measure = metric(y1, y2)

        # Check representation invariance
        assert_almost_equal(
            metric(y1_sparse_indicator, y2_sparse_indicator),
            measure,
            err_msg="%s failed representation invariance  " "between dense and sparse indicator " "formats." % name,
        )
def to_sparse(_membership, out):
    "Return a sparse matrix object."
    n_elements, _unique, _weighted, collections, collections_id = out
    sh = n_elements, len(collections_id)
    aux_map = dict(zip(collections_id, range(sh[1])))
    if issparse(_membership):
        return _membership, (range(n_elements), collections_id)
    if _unique:
        _membership = np.array(_membership).ravel()
        matrix = np.array([aux_map[e] for e in _membership])
        matrix = matrix.astype(int)
        matrix = coo_matrix((np.ones(sh[0]), (range(sh[0]), matrix)),
                            shape=sh)
    elif not _weighted:
        indices = []
        for i in xrange(sh[0]):
            for j in range(len(_membership[i])):
                indices.append((i, aux_map[_membership[i][j]]))
        indices = np.array(indices)[:, 0], np.array(indices)[:, 1]
        matrix = coo_matrix((np.ones(len(indices[0])), indices), shape=sh)
    elif _weighted:
        indices, data = [], []
        for i in xrange(sh[0]):
            for j in _membership[i]:
                indices.append((i, aux_map[j]))
                data.append(_membership[i][j])
        indices = np.array(indices)[:, 0], np.array(indices)[:, 1]
        matrix = coo_matrix((np.array(data), indices), shape=sh)
    return matrix, (range(n_elements), collections_id)
Пример #29
0
    def add_indices(self, indices, axis):
        """
        Add new indices to the matrix given an axis, filled with zeroes.
        If an index value is already present it is ignored.
        :param indices: List of words
        :param axis: 0 (rows) or 1 (cols)
        :return: Matrix with new index.
        """
        if axis == 0:
            new_indices = list(set(indices) - set(self.row2word))
            if len(new_indices) == 0:
                return self

            shape = (len(new_indices), self.shape[1])
            mat = self._new_instance(sp.coo_matrix(shape), row2word=new_indices)
        elif axis == 1:
            new_indices = list(set(indices) - set(self.col2word))

            if len(new_indices) == 0:
                return self

            shape = (self.shape[0], len(new_indices))
            mat = self._new_instance(sp.coo_matrix(shape), col2word=new_indices)
        else:
            raise ValueError("Axis can only be 0 or 1")

        return self.append(mat, axis)
Пример #30
0
def main():
    lily_objs = list()
    with open('lily.txt') as f:
        for line in f:
            line = line.strip() 
            lily_obj = dict()
            label,content = line.split('\t')
            lily_obj['label'] = label
            lily_obj['words'] = content.split(' ')
            lily_objs.append(lily_obj)

    random.seed(0)
    random.shuffle(lily_objs)    #随机打乱

    corpus = [' '.join(lily_obj['words']) for lily_obj in lily_objs]
    tf,tfidf = cal_tf_and_tfidf(corpus)

    labels = [lily_obj['label'] for lily_obj in lily_objs]
    labels = np.array(labels)

    lily = dict()

    tf = coo_matrix(tf)
    tfidf = coo_matrix(tfidf)
    lily['tf'] = tf
    lily['tfidf'] = tfidf
    lily['labels'] = labels

    with open('lily.pickle','wb') as f:
        pickle.dump(lily,f)

    tf = np.array(tf)
    print type(tf)
Пример #31
0
def helm_coefficients_josep(Yseries, V0, S0, Ysh0, pq, pv, sl, pqpv, tolerance=1e-6, max_coeff=30, verbose=False):
    """
    Holomorphic Embedding LoadFlow Method as formulated by Josep Fanals Batllori in 2020
    THis function just returns the coefficients for further usage in other routines
    :param Yseries: Admittance matrix of the series elements
    :param V0: vector of specified voltages
    :param S0: vector of specified power
    :param Ysh0: vector of shunt admittances (including the shunts of the branches)
    :param pq: list of pq nodes
    :param pv: list of pv nodes
    :param sl: list of slack nodes
    :param pqpv: sorted list of pq and pv nodes
    :param tolerance: target error (or tolerance)
    :param max_coeff: maximum number of coefficients
    :param verbose: print intermediate information
    :return: U, X, Q, iterations
    """

    npqpv = len(pqpv)
    npv = len(pv)
    nsl = len(sl)
    n = Yseries.shape[0]

    # --------------------------- PREPARING IMPLEMENTATION -------------------------------------------------------------
    U = np.zeros((max_coeff, npqpv), dtype=complex)  # voltages
    X = np.zeros((max_coeff, npqpv), dtype=complex)  # compute X=1/conj(U)
    Q = np.zeros((max_coeff, npqpv), dtype=complex)  # unknown reactive powers

    if n < 2:
        return U, X, Q, 0

    if verbose:
        print('Yseries')
        print(Yseries.toarray())
        df = pd.DataFrame(data=np.c_[Ysh0.imag, S0.real, S0.imag, np.abs(V0)],
                          columns=['Ysh', 'P0', 'Q0', 'V0'])
        print(df)

    # build the reduced system
    Yred = Yseries[np.ix_(pqpv, pqpv)]  # admittance matrix without slack buses
    Yslack = -Yseries[np.ix_(pqpv, sl)]  # yes, it is the negative of this
    G = np.real(Yred)  # real parts of Yij
    B = np.imag(Yred)  # imaginary parts of Yij
    vec_P = S0.real[pqpv]
    vec_Q = S0.imag[pqpv]
    Vslack = V0[sl]
    Ysh = Ysh0[pqpv]
    Vm0 = np.abs(V0[pqpv])
    vec_W = Vm0 * Vm0

    # indices 0 based in the internal scheme
    nsl_counted = np.zeros(n, dtype=int)
    compt = 0
    for i in range(n):
        if i in sl:
            compt += 1
        nsl_counted[i] = compt

    pq_ = pq - nsl_counted[pq]
    pv_ = pv - nsl_counted[pv]
    pqpv_ = np.sort(np.r_[pq_, pv_])

    # .......................CALCULATION OF TERMS [0] ------------------------------------------------------------------

    if nsl > 1:
        U[0, :] = spsolve(Yred, Yslack.sum(axis=1))
    else:
        U[0, :] = spsolve(Yred, Yslack)

    X[0, :] = 1 / np.conj(U[0, :])

    # .......................CALCULATION OF TERMS [1] ------------------------------------------------------------------
    valor = np.zeros(npqpv, dtype=complex)

    # get the current injections that appear due to the slack buses reduction
    I_inj_slack = Yslack[pqpv_, :] * Vslack

    valor[pq_] = I_inj_slack[pq_] - Yslack[pq_].sum(axis=1).A1 + (vec_P[pq_] - vec_Q[pq_] * 1j) * X[0, pq_] - U[0, pq_] * Ysh[pq_]
    valor[pv_] = I_inj_slack[pv_] - Yslack[pv_].sum(axis=1).A1 + (vec_P[pv_]) * X[0, pv_] - U[0, pv_] * Ysh[pv_]

    # compose the right-hand side vector
    RHS = np.r_[valor.real,
                valor.imag,
                vec_W[pv_] - (U[0, pv_] * U[0, pv_]).real  # vec_W[pv_] - 1.0
                ]

    # Form the system matrix (MAT)
    Upv = U[0, pv_]
    Xpv = X[0, pv_]
    VRE = coo_matrix((2 * Upv.real, (np.arange(npv), pv_)), shape=(npv, npqpv)).tocsc()
    VIM = coo_matrix((2 * Upv.imag, (np.arange(npv), pv_)), shape=(npv, npqpv)).tocsc()
    XIM = coo_matrix((-Xpv.imag, (pv_, np.arange(npv))), shape=(npqpv, npv)).tocsc()
    XRE = coo_matrix((Xpv.real, (pv_, np.arange(npv))), shape=(npqpv, npv)).tocsc()
    EMPTY = csc_matrix((npv, npv))

    MAT = vs((hs((G,  -B,   XIM)),
              hs((B,   G,   XRE)),
              hs((VRE, VIM, EMPTY))), format='csc')

    if verbose:
        print('MAT')
        print(MAT.toarray())

    # factorize (only once)
    MAT_LU = factorized(MAT.tocsc())

    # solve
    LHS = MAT_LU(RHS)

    # update coefficients
    U[1, :] = LHS[:npqpv] + 1j * LHS[npqpv:2 * npqpv]
    Q[0, pv_] = LHS[2 * npqpv:]
    X[1, :] = -X[0, :] * np.conj(U[1, :]) / np.conj(U[0, :])

    # .......................CALCULATION OF TERMS [>=2] ----------------------------------------------------------------
    iter_ = 1
    for c in range(2, max_coeff):  # c defines the current depth

        valor[pq_] = (vec_P[pq_] - vec_Q[pq_] * 1j) * X[c - 1, pq_] - U[c - 1, pq_] * Ysh[pq_]
        valor[pv_] = -1j * conv2(X, Q, c, pv_) - U[c - 1, pv_] * Ysh[pv_] + X[c - 1, pv_] * vec_P[pv_]

        RHS = np.r_[valor.real,
                    valor.imag,
                    -conv3(U, U, c, pv_).real]

        LHS = MAT_LU(RHS)

        # update voltage coefficients
        U[c, :] = LHS[:npqpv] + 1j * LHS[npqpv:2 * npqpv]

        # update reactive power
        Q[c - 1, pv_] = LHS[2 * npqpv:]

        # update voltage inverse coefficients
        X[c, :] = -conv1(U, X, c) / np.conj(U[0, :])

        iter_ += 1

    return U, X, Q, iter_
Пример #32
0
    def compute_kron_mat_cuda(self,g1,g2,kernel_name='create_kron_mat',gpu_block=None): # pragma: no cover
        """kronecker matrix with the edges pssm

        Args:
            g1 (iScore.Graph): first graph
            g2 (iScore.Graph): second graph
            kernel_name (str): name of the kernel to use
            gpu_block (None, optional): Size of the GPU block
        """
        n1 = g1.num_edges
        n2 = g2.num_edges
        n_edges_prod = 2*n1*n2

        # get the gpu block size if specified
        if gpu_block is not None:
            block = gpu_block
        else:
            block = self.gpu_block
        dim = (n1,n2,1)
        grid = tuple([int(np.ceil(n/t)) for n,t in zip(dim,block)])

        # start timer
        t0 = time()
        driver.Context.synchronize()
        create_kron_mat_gpu = self.mod.get_function(kernel_name)

        # put the raw pssm on the GPU
        pssm1 = gpuarray.to_gpu(np.array(g1.edges_pssm).astype(np.float32))
        pssm2 = gpuarray.to_gpu(np.array(g2.edges_pssm).astype(np.float32))

        # we have to put the index on the gpu as well
        ind1 = gpuarray.to_gpu(np.array(g1.edges_index).astype(np.int32))
        ind2 = gpuarray.to_gpu(np.array(g2.edges_index).astype(np.int32))

        # create the gpu arrays only if we have to
        # i.e. in case we run the calculation once (test or tune)
        # in other cases the weigh and index are booked in self.run()
        if not hasattr(self,'weight_product'):
            self.weight_product = gpuarray.zeros(n_edges_prod, np.float32)
            self.index_product = gpuarray.zeros((n_edges_prod,2), np.int32)

        driver.Context.synchronize()
        if self.debug:
            print('GPU - Mem  : %f \t (block size:%dx%d)' %(time()-t0,block[0],block[1]))

        # use the combvec kernel
        t0 = time()
        create_kron_mat_gpu (ind1,ind2,
                             pssm1,pssm2,
                             self.index_product,self.weight_product,
                             n1,n2,g2.num_nodes,
                             block=block,grid=grid)

        # extract the data
        # restrict to the ones calculated here
        ind = self.index_product.get()
        w = self.weight_product.get()[:n_edges_prod]

        # final size
        n_nodes_prod = g1.num_nodes*g2.num_nodes

        # create the matrix
        tt = time()

        # replaced the transpose with
        # doubling of weight and index (with switch)
        w = np.concatenate((w,w))
        ind = np.vstack((ind,np.flip(ind,axis=1)))
        index = ( ind[:,0],ind[:,1])
        self.Wx = sp_sparse.coo_matrix( (w,index),shape=( n_nodes_prod,n_nodes_prod ) )


        #driver.Context.synchronize()
        if self.debug:
            print('GPU - Kron : %f \t (block size:%dx%d)' %(time()-t0,block[0],block[1]))
Пример #33
0
Cx1 = np.vstack([np.mean(X[Cx == 0], 0), np.mean(X[Cx == 1], 0)])

ind = np.argmin(np.mean(Cx1[:, -num_psd_elms_high_freq:], axis=1))
active_pixels = (L[:, ind] > thresh_probability)
active_pixels = L[:, ind]
pl.imshow(np.reshape((active_pixels), (d1, d2), order='F'))

#%%
ff = np.zeros(np.shape(A_or)[-1])
cl_thr = 0.2
#ff = false(1,size(Am,2));
for i in range(np.shape(A_or)[-1]):
    a1 = A_or[:, i]
    a2 = A_or[:, i] * active_pixels
    if np.sum(a2**2) >= cl_thr**2 * np.sum(a1**2):
        ff[i] = 1

id_set = 1
cse.utilities.view_patches_bar(Yr, coo_matrix(
    A_or[:, ff == id_set]), C_or[ff == id_set, :], b2, f2, d1, d2, YrA=YrA[srt[ff == id_set], :])


# km=KMeans(n_clusters=2)
# Cx=km.fit_transform(X)
# Cx=km.fit_transform(cp)
# Cx=km.cluster_centers_
# L=km.labels_
# ind=np.argmin(np.mean(Cx[:,-49:],axis=1))
#active_pixels = (L==ind)
#centroids = Cx;
Пример #34
0
def to_scipy_sparse_matrix(G,
                           nodelist=None,
                           dtype=None,
                           weight='weight',
                           format='csr'):
    """Returns the graph adjacency matrix as a SciPy sparse matrix.

    Parameters
    ----------
    G : graph
        The NetworkX graph used to construct the NumPy matrix.

    nodelist : list, optional
       The rows and columns are ordered according to the nodes in `nodelist`.
       If `nodelist` is None, then the ordering is produced by G.nodes().

    dtype : NumPy data-type, optional
        A valid NumPy dtype used to initialize the array. If None, then the
        NumPy default is used.

    weight : string or None   optional (default='weight')
        The edge attribute that holds the numerical value used for
        the edge weight.  If None then all edge weights are 1.

    format : str in {'bsr', 'csr', 'csc', 'coo', 'lil', 'dia', 'dok'}
        The type of the matrix to be returned (default 'csr').  For
        some algorithms different implementations of sparse matrices
        can perform better.  See [1]_ for details.

    Returns
    -------
    M : SciPy sparse matrix
       Graph adjacency matrix.

    Notes
    -----
    For directed graphs, matrix entry i,j corresponds to an edge from i to j.

    The matrix entries are populated using the edge attribute held in
    parameter weight. When an edge does not have that attribute, the
    value of the entry is 1.

    For multiple edges the matrix values are the sums of the edge weights.

    When `nodelist` does not contain every node in `G`, the matrix is built
    from the subgraph of `G` that is induced by the nodes in `nodelist`.

    Uses coo_matrix format. To convert to other formats specify the
    format= keyword.

    The convention used for self-loop edges in graphs is to assign the
    diagonal matrix entry value to the weight attribute of the edge
    (or the number 1 if the edge has no weight attribute).  If the
    alternate convention of doubling the edge weight is desired the
    resulting Scipy sparse matrix can be modified as follows:

    >>> import scipy as sp
    >>> G = nx.Graph([(1, 1)])
    >>> A = nx.to_scipy_sparse_matrix(G)
    >>> print(A.todense())
    [[1]]
    >>> A.setdiag(A.diagonal() * 2)
    >>> print(A.todense())
    [[2]]

    Examples
    --------
    >>> G = nx.MultiDiGraph()
    >>> G.add_edge(0, 1, weight=2)
    0
    >>> G.add_edge(1, 0)
    0
    >>> G.add_edge(2, 2, weight=3)
    0
    >>> G.add_edge(2, 2)
    1
    >>> S = nx.to_scipy_sparse_matrix(G, nodelist=[0, 1, 2])
    >>> print(S.todense())
    [[0 2 0]
     [1 0 0]
     [0 0 4]]

    References
    ----------
    .. [1] Scipy Dev. References, "Sparse Matrices",
       https://docs.scipy.org/doc/scipy/reference/sparse.html
    """
    from scipy import sparse
    if nodelist is None:
        nodelist = list(G)
    nlen = len(nodelist)
    if nlen == 0:
        raise nx.NetworkXError("Graph has no nodes or edges")

    if len(nodelist) != len(set(nodelist)):
        msg = "Ambiguous ordering: `nodelist` contained duplicates."
        raise nx.NetworkXError(msg)

    index = dict(zip(nodelist, range(nlen)))
    coefficients = zip(*((index[u], index[v], d.get(weight, 1))
                         for u, v, d in G.edges(nodelist, data=True)
                         if u in index and v in index))
    try:
        row, col, data = coefficients
    except ValueError:
        # there is no edge in the subgraph
        row, col, data = [], [], []

    if G.is_directed():
        M = sparse.coo_matrix((data, (row, col)),
                              shape=(nlen, nlen),
                              dtype=dtype)
    else:
        # symmetrize matrix
        d = data + data
        r = row + col
        c = col + row
        # selfloop entries get double counted when symmetrizing
        # so we subtract the data on the diagonal
        selfloops = list(nx.selfloop_edges(G, data=True))
        if selfloops:
            diag_index, diag_data = zip(*((index[u], -d.get(weight, 1))
                                          for u, v, d in selfloops
                                          if u in index and v in index))
            d += diag_data
            r += diag_index
            c += diag_index
        M = sparse.coo_matrix((d, (r, c)), shape=(nlen, nlen), dtype=dtype)
    try:
        return M.asformat(format)
    # From Scipy 1.1.0, asformat will throw a ValueError instead of an
    # AttributeError if the format if not recognized.
    except (AttributeError, ValueError):
        raise nx.NetworkXError("Unknown sparse matrix format: %s" % format)
Пример #35
0
def _fast_kde_2d(x, y, gridsize=(128, 128), circular=False):
    """
    2D fft-based Gaussian kernel density estimate (KDE).

    The code was adapted from https://github.com/mfouesneau/faststats

    Parameters
    ----------
    x : Numpy array or list
    y : Numpy array or list
    gridsize : tuple
        Number of points used to discretize data. Use powers of 2 for fft optimization
    circular: bool
        If True, use circular boundaries. Defaults to False
    Returns
    -------
    grid: A gridded 2D KDE of the input points (x, y)
    xmin: minimum value of x
    xmax: maximum value of x
    ymin: minimum value of y
    ymax: maximum value of y
    """
    x = np.asarray(x, dtype=float)
    x = x[np.isfinite(x)]
    y = np.asarray(y, dtype=float)
    y = y[np.isfinite(y)]

    xmin, xmax = x.min(), x.max()
    ymin, ymax = y.min(), y.max()

    len_x = len(x)
    weights = np.ones(len_x)
    n_x, n_y = gridsize

    d_x = (xmax - xmin) / (n_x - 1)
    d_y = (ymax - ymin) / (n_y - 1)

    xyi = _stack(x, y).T
    xyi -= [xmin, ymin]
    xyi /= [d_x, d_y]
    xyi = np.floor(xyi, xyi).T

    scotts_factor = len_x**(-1 / 6)
    cov = _cov(xyi)
    std_devs = np.diag(cov)**0.5
    kern_nx, kern_ny = np.round(scotts_factor * 2 * np.pi * std_devs)

    inv_cov = np.linalg.inv(cov * scotts_factor**2)

    x_x = np.arange(kern_nx) - kern_nx / 2
    y_y = np.arange(kern_ny) - kern_ny / 2
    x_x, y_y = np.meshgrid(x_x, y_y)

    kernel = _stack(x_x.flatten(), y_y.flatten())
    kernel = _dot(inv_cov, kernel) * kernel
    kernel = np.exp(-kernel.sum(axis=0) / 2)
    kernel = kernel.reshape((int(kern_ny), int(kern_nx)))

    boundary = "wrap" if circular else "symm"

    grid = coo_matrix((weights, xyi), shape=(n_x, n_y)).toarray()
    grid = convolve2d(grid, kernel, mode="same", boundary=boundary)

    norm_factor = np.linalg.det(2 * np.pi * cov * scotts_factor**2)
    norm_factor = len_x * d_x * d_y * norm_factor**0.5

    grid /= norm_factor

    return grid, xmin, xmax, ymin, ymax
Пример #36
0
train_agg = train.drop_duplicates(subset=['pid_code', 'song_code']).copy()
test_agg = test.drop_duplicates(subset=['pid_code', 'song_code']).copy()
train_agg['val'] = 1
test_agg['val'] = 1

train_agg['val_stoch'] = train_agg.groupby('pid_code').val.transform(
    lambda x: x / np.linalg.norm(x))
test_agg['val_stoch'] = test_agg.groupby('pid_code').val.transform(
    lambda x: x / np.linalg.norm(x))

test_agg_pop = test_agg.join(train.song_code.value_counts().rename('pop'),
                             on='song_code')
test_agg_pop['pop'].fillna(1, inplace=True)

sp_A = spl.coo_matrix(
    (train_agg['val_stoch'].values.T, train_agg[['pid_code',
                                                 'song_code']].values.T))
sp_A._shape = (int(playlist_meta.pid_code.max() + 1),
               int(song_meta.song_code.max() + 1))
sp_A = sp_A.tocsr()
sp_A_t = sp_A.T
sp_A_const = spl.coo_matrix(
    (train_agg['val'].values.T, train_agg[['pid_code', 'song_code']].values.T))
sp_A_const._shape = (int(playlist_meta.pid_code.max() + 1),
                     int(song_meta.song_code.max() + 1))
sp_A_const = sp_A_const.tocsr()
sp_A_const_t = sp_A_const.T

plusadd = 0

Пример #37
0
def ripser(X,
           maxdim=1,
           thresh=np.inf,
           coeff=2,
           metric="euclidean",
           n_perm=None):
    """Compute persistence diagrams for X data array. If X is not a distance
    matrix, it will be converted to a distance matrix using the chosen metric.

    Parameters
    ----------
    X: ndarray (n_samples, n_features)
        A numpy array of either data or distance matrix.
        Can also be a sparse distance matrix of type scipy.sparse

    maxdim: int, optional, default 1
        Maximum homology dimension computed. Will compute all dimensions
        lower than and equal to this value.
        For 1, H_0 and H_1 will be computed.

    thresh: float, default infinity
        Maximum distances considered when constructing filtration.
        If infinity, compute the entire filtration.

    coeff: int prime, default 2
        Compute homology with coefficients in the prime field Z/pZ for p=coeff.

    metric: string or callable
        The metric to use when calculating distance between instances in a
        feature array. If metric is a string, it must be one of the options
        specified in pairwise_distances, including "euclidean", "manhattan",
        or "cosine". Alternatively, if metric is a callable function, it is
        called on each pair of instances (rows) and the resulting value
        recorded. The callable should take two arrays from X as input and
        return a value indicating the distance between them.

    n_perm: int
        The number of points to subsample in a "greedy permutation,"
        or a furthest point sampling of the points.  These points
        will be used in lieu of the full point cloud for a faster
        computation, at the expense of some accuracy, which can
        be bounded as a maximum bottleneck distance to all diagrams
        on the original point set

    Returns
    -------
    A dictionary holding all of the results of the computation
    {
        'dgms': list (size maxdim) of ndarray (n_pairs, 2)
            A list of persistence diagrams, one for each dimension less
            than maxdim. Each diagram is an ndarray of size (n_pairs, 2)
            with the first column representing the birth time and the
            second column representing the death time of each pair.
        'num_edges': int
            The number of edges added during the computation
        'dperm2all': ndarray(n_samples, n_samples) or ndarray (n_perm, \
            n_samples) if n_perm
            The distance matrix used in the computation if n_perm is none.
            Otherwise, the distance from all points in the permutation to
            all points in the dataset
        'idx_perm': ndarray(n_perm) if n_perm > 0
            Index into the original point cloud of the points used
            as a subsample in the greedy permutation
        'r_cover': float
            Covering radius of the subsampled points.
            If n_perm <= 0, then the full point cloud was used and this is 0
    }

    """
    if n_perm and sparse.issparse(X):
        raise Exception(
            "Greedy permutation is not supported for sparse distance matrices")
    if n_perm and n_perm > X.shape[0]:
        raise Exception("Number of points in greedy permutation is greater"
                        " than number of points in the point cloud")
    if n_perm and n_perm < 0:
        raise Exception(
            "Should be a strictly positive number of points in the greedy "
            "permutation")

    idx_perm = np.arange(X.shape[0])
    r_cover = 0.0
    if n_perm:
        idx_perm, lambdas, dperm2all = get_greedy_perm(X,
                                                       n_perm=n_perm,
                                                       metric=metric)
        r_cover = lambdas[-1]
        dm = dperm2all[:, idx_perm]
    else:
        if metric == 'precomputed':
            dm = X
        else:
            dm = pairwise_distances(X, metric=metric)
        dperm2all = dm

    n_points = dm.shape[0]
    if not sparse.issparse(dm) and np.sum(np.abs(dm.diagonal()) > 0) > 0:
        # If any of the diagonal elements are nonzero,
        # convert to sparse format, because currently
        # that's the only format that handles nonzero
        # births
        dm = sparse.coo_matrix(dm)

    if sparse.issparse(dm):
        coo = dm.tocoo()
        res = DRFDMSparse(
            coo.row.astype(dtype=np.int32, order="C"),
            coo.col.astype(dtype=np.int32, order="C"),
            np.array(coo.data, dtype=np.float32, order="C"),
            n_points,
            maxdim,
            thresh,
            coeff,
        )
    else:
        I, J = np.meshgrid(np.arange(n_points), np.arange(n_points))
        DParam = np.array(dm[I > J], dtype=np.float32)
        res = DRFDM(DParam, maxdim, thresh, coeff)

    # Unwrap persistence diagrams
    dgms = res["births_and_deaths_by_dim"]
    for dim in range(len(dgms)):
        N = int(len(dgms[dim]) / 2)
        dgms[dim] = np.reshape(np.array(dgms[dim]), [N, 2])

    ret = {
        "dgms": dgms,
        "num_edges": res["num_edges"],
        "dperm2all": dperm2all,
        "idx_perm": idx_perm,
        "r_cover": r_cover,
    }
    return ret
Пример #38
0
def _sparse_fruchterman_reingold(A, dim=2, k=None, pos=None, fixed=None, 
                                 iterations=50):
    # Position nodes in adjacency matrix A using Fruchterman-Reingold  
    # Entry point for NetworkX graph is fruchterman_reingold_layout()
    # Sparse version
    try:
        import numpy as np
    except ImportError:
        raise ImportError("_sparse_fruchterman_reingold() requires numpy: http://scipy.org/ ")
    try:
        nnodes,_=A.shape
    except AttributeError:
        raise nx.NetworkXError(
            "fruchterman_reingold() takes an adjacency matrix as input")
    try:
        from scipy.sparse import spdiags,coo_matrix
    except ImportError:
        raise ImportError("_sparse_fruchterman_reingold() scipy numpy: http://scipy.org/ ")
    # make sure we have a LIst of Lists representation
    try:
        A=A.tolil()
    except:
        A=(coo_matrix(A)).tolil()

    if pos==None:
        # random initial positions
        pos=np.asarray(np.random.random((nnodes,dim)),dtype=A.dtype)
    else:
        # make sure positions are of same type as matrix
        pos=pos.astype(A.dtype)

    # no fixed nodes
    if fixed==None:
        fixed=[]

    # optimal distance between nodes
    if k is None:
        k=np.sqrt(1.0/nnodes)
    # the initial "temperature"  is about .1 of domain area (=1x1)
    # this is the largest step allowed in the dynamics.
    t=0.1
    # simple cooling scheme.
    # linearly step down by dt on each iteration so last iteration is size dt.
    dt=t/float(iterations+1)

    displacement=np.zeros((dim,nnodes))
    for iteration in range(iterations):
        displacement*=0
        # loop over rows
        for i in range(A.shape[0]):
            if i in fixed:
                continue
            # difference between this row's node position and all others
            delta=(pos[i]-pos).T
            # distance between points
            distance=np.sqrt((delta**2).sum(axis=0))
            # enforce minimum distance of 0.01
            distance=np.where(distance<0.01,0.01,distance)
            # the adjacency matrix row
            Ai=np.asarray(A.getrowview(i).toarray())
            # displacement "force"
            displacement[:,i]+=\
                (delta*(k*k/distance**2-Ai*distance/k)).sum(axis=1)
        # update positions
        length=np.sqrt((displacement**2).sum(axis=0))
        length=np.where(length<0.01,0.1,length)
        pos+=(displacement*t/length).T
        # cool temperature
        t-=dt
        pos=_rescale_layout(pos)
    return pos
Пример #39
0
def ensemble_fit(
    X,
    estimated_n_topics=10,
    model="plsa",
    init="random",
    min_samples=3,
    min_cluster_size=4,
    n_starts=16,
    n_jobs=1,
    parallelism="dask",
    topic_combination="hellinger_umap",
    bootstrap=True,
    n_iter=100,
    n_iter_per_test=10,
    tolerance=0.001,
    e_step_thresh=1e-16,
    lift_factor=1,
    beta_loss=1,
    alpha=0.0,
    solver="mu",
    random_state=None,
):
    """Generate a set of stable topics by using an ensemble of topic models and then clustering
    the results and generating representative topics for each cluster. The generate a set of
    document vectors based on the selected stable topics.

    Parameters
    ----------
    X: array or sparse matrix of shape (n_docs, n_words)
        The bag-of-words matrix for the corpus to train on.

    estimated_n_topics: int (optional, default=10)
        The estimated number of topics. Note that the final number of topics produced can differ
        from this value, and may be more or less than the provided value. Instead this value
        provides the algorithm with a suggestion of the approximate number of topics to use.

    model: string (optional, default="plsa")
        The topic modeling method to use (either "plsa" or "nmf")

    init: string or tuple (optional, default="random")
        The intialization method to use. This should be one of:
            * ``"random"``
            * ``"nndsvd"``
            * ``"nmf"``
        or a tuple of two ndarrays of shape (n_docs, n_topics) and (n_topics, n_words).

    int (optional, default=3)
        The min_samples parameter to use for HDBSCAN clustering.

    min_cluster_size: int (optional, default=4)
        The min_cluster_size parameter to use for HDBSCAN clustering

    n_starts: int (optional, default=16)
        The number of bootstrap sampled topic models to run -- the size of the ensemble.

    n_jobs: int (optional, default=8)
        The number of parallel jobs to run at a time.

    parallelism: string (optional, default="dask")
        The parallelism model to use. Should be one of "dask" or "joblib" or "none".

    topic_combination: string (optional, default="hellinger_umap")
        The method of comnining ensemble topics into a set of stable topics. Should be one of:
            * ``"hellinger_umap"``
            * ``"hellinger"``
            * ``"kl_divergence"``

    n_iter: int
        The maximum number iterations of EM to perform

    n_iter_per_test: int
        The number of iterations between tests for
        relative improvement in log-likelihood.

    tolerance: float
        The threshold of relative improvement in
        log-likelihood required to continue iterations.

    e_step_thresh: float (optional, default=1e-32)
        Option to promote sparsity. If the value of P(w|z)P(z|d) in the E step falls
        below threshold then write a zero for P(z|w,d).

    lift_factor: int (optional, default=1)
        Importance factor to apply to lift -- if high lift value are important to
        you then larger lift factors will be beneficial.

    beta_loss: float or string, (optional, default 'kullback-leibler')
        The beta loss to use if using NMF for topic modeling.

    alpha: float (optional, default=0.0)
        The alpha parameter defining regularization if using NMF for topic modeling.

    solver: string, (optional, default="mu")
        The choice of solver if using NMF for topic modeling. Should be either "cd" or "mu".

    random_state int, RandomState instance or None, (optional, default: None)
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`. Used in in initialization.

    Returns
    -------
    doc_vectors, stable_topics: arrays of shape (n_docs, M) and (M, n_words)
        The vectors giving the probability of topics for each document, and the stable topics
        produced by the ensemble.
    """

    X = check_array(X, accept_sparse="csr", dtype=np.float32)

    if issparse(X):
        X_coo = X.tocoo()
    else:
        X_coo = coo_matrix(X, dtype=np.float32)

    all_topics = ensemble_of_topics(
        X_coo,
        estimated_n_topics,
        model,
        n_jobs,
        n_starts,
        parallelism,
        init=init,
        n_iter=n_iter,
        n_iter_per_test=n_iter_per_test,
        tolerance=tolerance,
        e_step_thresh=e_step_thresh,
        bootstrap=bootstrap,
        lift_factor=1,
        beta_loss=beta_loss,
        alpha=alpha,
        solver=solver,
        random_state=random_state,
    )

    if topic_combination in _topic_combiner:
        cluster_topics = _topic_combiner[topic_combination]
    else:
        raise ValueError("topic_combination must be one of {}".format(
            tuple(_topic_combiner.keys())))

    stable_topics = cluster_topics(all_topics, min_samples, min_cluster_size)

    if lift_factor != 1:
        stable_topics **= lift_factor
        normalize(stable_topics, axis=1)

    if model == "plsa":
        sample_weight = _check_sample_weight(None, X, dtype=np.float32)
        doc_vectors = plsa_refit(
            X,
            stable_topics,
            sample_weight,
            e_step_thresh=e_step_thresh,
            random_state=random_state,
        )
    elif model == "nmf":
        doc_vectors, _, _ = non_negative_factorization(
            X,
            H=stable_topics,
            n_components=stable_topics.shape[0],
            update_H=False,
            beta_loss=beta_loss,
            alpha=alpha,
            solver=solver,
        )
    else:
        raise ValueError('Model must be one of "plsa" or "nmf"')

    return doc_vectors, stable_topics
Пример #40
0
def load_data(path="../data/transfer/", dataset="chn", preserve_order=1):
    """Load citation network dataset (cora only for now)"""

    print('Loading {} dataset...'.format(dataset))

    idx_features_labels = np.genfromtxt("{}{}.content".format(path, dataset),
                                        dtype=np.dtype(str))
    features = sp.csr_matrix(idx_features_labels[:, 1:-1], dtype=np.float32)
    labels = encode_onehot(
        idx_features_labels[:, -1])  # labels are at the end of each line
    #f = open("{}{}.multilabel".format(path, dataset))
    #multilabels =np.genfromtxt("{}{}.multilabel".format(path, dataset),
    #                           dtype=np.dtype(str))

    # build graph
    idx = np.array(idx_features_labels[:, 0], dtype=np.int32)
    idx_map = {j: i for i, j in enumerate(idx)}
    edges_unordered = np.genfromtxt("{}{}.cites".format(path, dataset),
                                    dtype=np.int32)
    edges = np.array(list(map(idx_map.get, edges_unordered.flatten())),
                     dtype=np.int32).reshape(edges_unordered.shape)
    adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),
                        shape=(labels.shape[0], labels.shape[0]),
                        dtype=np.float32)

    # build symmetric adjacency matrix
    adj = sp.coo_matrix(adj + adj.T.multiply(adj.T > adj) -
                        adj.multiply(adj.T > adj))
    for item in adj.__dict__.items():
        print(item)
    print(adj.col)

    edge_ret = []

    edge_weight = []

    node_weight = [0.0 for i in range(0, len(idx))]

    if preserve_order == 1:
        adj_pres = adj
    else:
        adj_pres = sp.coo_matrix(adj**2)

    # sampling weight
    for i in range(0, len(adj.data)):
        edge_ret.append((adj_pres.row[i], adj_pres.col[i]))
        edge_weight.append(float(adj_pres.data[i]))
        node_weight[adj.row[i]] += adj.data[i]

    features = normalize(features)
    adj = adj + sp.eye(adj.shape[0])
    D = sp.coo_matrix([[
        1.0 / math.sqrt(node_weight[j]) if j == i else 0
        for j in range(len(idx))
    ] for i in range(len(idx))])
    adj = D * adj * D

    idx_train = range(140)
    idx_val = range(200, 500)
    idx_test = range(500, 1500)

    features = torch.FloatTensor(np.array(features.todense()))
    labels = torch.LongTensor(np.where(labels)[1])
    adj = sparse_mx_to_torch_sparse_tensor(adj)

    idx_train = torch.LongTensor(idx_train)
    idx_val = torch.LongTensor(idx_val)
    idx_test = torch.LongTensor(idx_test)

    for i in range(0, len(node_weight)):
        node_weight[i] = math.pow(node_weight[i], 0.75)

    return adj, features, labels, idx_train, idx_val, idx_test, edge_ret, torch.tensor(
        edge_weight), torch.tensor(node_weight)  #, multilabels
Пример #41
0
    print
    print factor.todense()


# In[155]:

A_int = second_deriv+factor

if n < 10:
    print A_int.todense()


# In[156]:

A = sps.vstack([
    sps.coo_matrix(([1], ([0],[0])), shape=(1, n)),
    A_int,
    sps.coo_matrix(([1], ([0],[n-1])), shape=(1, n)),
    ])
A = sps.csr_matrix(A)

if n < 10:
    print A.todense()


# In[157]:

rhs = np.zeros(n)


Пример #42
0
def generate_spectral_label(cfg, k, target_dir, n_neighbor, args):
    """Use spectral clustering to generate labels. The user can specify distance metric in args

    Parameters
    ----------
    cfg: dict, specifying data path, names we care
    k: list of int, choices of number of clusters
    target_dir: directory for storing the data
    n_neighbor: int, number of neighbors used for graph construction
    args: arguments, it provides choice of several options
        args.append means we append new number of clusters to existing problem
        args.pca means we first use PCA to perform dimensionality reduction
        args.speuclid means we use euclidean distance in y
        args.spdydx means we use dy/dx as distance metric
        args.spvio means we use constraint violation as distance metric
    """
    data = np.load(cfg['file_path'])
    x, y = data[cfg['x_name']], data[cfg['y_name']]
    query = Query(x, None, n_neighbor + 1, scale=True)
    x_scaled = query.A  # this is scaled data
    nn_ind = query.getIndex(x)  # Is this why I was wrong? Is pyflann still working fine?
    n_data = x.shape[0]
    # build sparse graph based on neighboring distances, I shall use a distance function for evaluation
    if args.speuclid:
        def dst_fun(x0, y0, x1, y1):
            return np.linalg.norm(y0 - y1)
        out_fnm = os.path.join(target_dir, 'sp_euclid_label.npz')
    if args.spdydx:
        def dst_fun(x0, y0, x1, y1):
            return np.linalg.norm(y0 - y1) / np.linalg.norm(x0 - x1)
        out_fnm = os.path.join(target_dir, 'sp_dydx_label.npz')
    if args.spvio:
        out_fnm = os.path.join(target_dir, 'sp_vio_label.npz')
        sys.path.insert(0, cfg['script_path'])
        import libserver
        if args.pen:
            libserver.init(cfg['cfg_path'])
            def dst_fun(x0, y0, x1, y1):
                xmid = (x0 + x1) / 2
                ymid = (y0 + y1) / 2
                c = libserver.eval(ymid.astype(np.float64))
                return np.linalg.norm(c[1:])
        if args.car:
            solver = libserver.pysolver()
            solver.initfnm(cfg['cfg_path'])
            def dst_fun(x0, y0, x1, y1):
                xmid = (x0 + x1) / 2
                ymid = (y0 + y1) / 2
                c = solver.constrEval(ymid.astype(np.float64))
                return np.linalg.norm(c[1:])
        if args.drone:
            solver = libserver.pysolver()
            solver.initfnm(cfg['cfg_path'])
            def dst_fun(x0, y0, x1, y1):
                xmid = (x0 + x1) / 2  # this is not used, maybe not good but who knows
                ymid = (y0 + y1) / 2
                solver.updateObstacle(xmid[3:])
                c = solver.constrEval(ymid.astype(np.float64))
                return np.linalg.norm(c[1:])
    dist, row, col = construct_distance_graph(x, y, nn_ind, dst_fun, rm_col_one=True)
    print('distance matrix construction finished')
    aff_mat = sp.coo_matrix((dist, (row, col)), shape=(n_data, n_data))
    # prepare for output
    if args.append and os.path.exists(out_fnm):
        result = ddctParse(out_fnm)
    else:
        result = {}
    # perform spectral clustering
    for k_ in k:
        print('run spectral clustering with %d' % k_)
        sc = SpectralClustering(k_, eigen_solver='amg', affinity='precomputed', assign_labels='discretize', n_jobs=-1)
        sc.fit(aff_mat)
        label = sc.labels_
        result['%d' % k_] = label
    np.savez(out_fnm, **result)
Пример #43
0
def raster_2D(poly_xy: np.ndarray, grid_x: np.ndarray,
              grid_y: np.ndarray) -> np.ndarray:
    """Draws a polygon onto a 2D grid of pixels.

    Pixel values equal to the fraction of the pixel area covered by the polygon.
    This implementation is written for accuracy and works with double precision,
    in contrast to most other implementations which are written for speed and
    usually only allow for 256 (and often fewer) possible pixel values without
    performing (very slow) super-sampling.

    Args:
        poly_xy: `2 x N` ndarray containing x,y coordinates for each point in
            the polygon.
        grid_x: x-coordinates for the edges of each pixel specified as a 1D
            array.
        grid_y: y-coordinates for the edges of each pixel specified as a 1D
            array.

    Returns:
        2D ndarray with pixel values in the range [0, 1] containing the
        anti-aliased polygon. Note that the size of the array is
        `[grid_x.size - 1, grid_y.size - 1]`.

    Raises:
        ValueError: If `poly_xy` doesn't have exactly two rows or if `grid_x`
            or `grid_y` have a size less than 2.
    """
    if poly_xy.shape[0] != 2:
        raise ValueError(
            "Expected `poly_xy` to have 2 rows, got {} instead.".format(
                poly_xy.shape[0]))
    if grid_x.size < 2 or grid_y.size < 2:
        raise ValueError(
            "Expected both `grid_x` and `grid_y` to have atleast 2"
            " elements, got sizes of {} and {} respectively.".format(
                grid_x.size, grid_y.size))

    # Oversample the polygon by including its intersection with the grid as
    # new vertices.
    vertices = _expand_polygon_vertices(poly_xy, grid_x, grid_y)

    # If the shape fell completely outside our area, just return a blank grid.
    if vertices.size == 0:
        return zeros((grid_x.size - 1, grid_y.size - 1))
    # Calculate segment cover, area, and corresponding pixel's subscripts.
    poly = np.hstack((vertices, vertices[0]))
    endpoint_avg = (poly[:-1] + poly[1:]) * 0.5

    # Remove segments along the right and top edges (they correspond to outside
    # pixels, but couldn't be removed until now because poly_xy stores points,
    # not segments, and the edge points are needed when creating endpoint_avg).
    non_edge = np.logical_and(
        np.real(endpoint_avg) < grid_x[-1],
        np.imag(endpoint_avg) < grid_y[-1])

    endpoint_final = endpoint_avg[non_edge]
    x_sub = np.digitize(np.real(endpoint_final), grid_x) - 1
    y_sub = np.digitize(np.imag(endpoint_final), grid_y) - 1

    cover = np.diff(np.imag(poly), axis=0)[non_edge] / np.diff(grid_y)[y_sub]
    area = (np.real(endpoint_final) -
            grid_x[x_sub]) * cover / np.diff(grid_x)[x_sub]

    # Use coo_matrix(...).toarray() to efficiently convert from (x, y, v) pairs
    # to ndarrays. We can use v = (-area + 1j * cover) followed with calls to
    # np.real() and np.imag() to improve performance (otherwise we'd have to
    # call coo_matrix() twice. It's really inefficient because it involves lots
    # of random memory access, unlike real() and imag()).
    poly_grid = sparse.coo_matrix(
        (-area + 1j * cover, (x_sub, y_sub)),
        shape=(grid_x.size - 1, grid_y.size - 1)).toarray()
    result_grid = np.real(poly_grid) + np.imag(poly_grid).cumsum(axis=0)
    return np.abs(result_grid)
while s != '':
    data_samples.append(s.lower())
    dataset.append(s)
    s = file_tweet.readline()
file_tweet.close()  ##################################### list of tweets

# Inverted Index to find root
print("Generating IDF to find summary...")
t0 = time()
count_vec = CountVectorizer(ngram_range=(1, 1),
                            analyzer='word',
                            stop_words="english")
X_Train_counts = count_vec.fit_transform(data_samples)
X_name = count_vec.get_feature_names()
m, n = X_Train_counts.shape
cx = coo_matrix(X_Train_counts)
freq = [0 for i in range(0, n)]
for i, j, v in zip(cx.row, cx.col, cx.data):
    freq[j] += v
words = []
for i in range(0, n):
    words.append((X_name[i], freq[i]))
words = sorted(words, key=itemgetter(1))
freq.clear()
root = words[-1][
    0]  ###################################################### Topic of summary / root node
print("Topic of the Summary is '%s'" % root)

# Inverted Index to build dictionary of frequencies of words

count_vec = CountVectorizer(ngram_range=(1, 1))
Пример #45
0
def test_plot_connectome(tmpdir):
    node_color = ['green', 'blue', 'k', 'cyan']
    # symmetric up to 1e-3 relative tolerance
    adjacency_matrix = np.array([[1., -2., 0.3, 0.],
                                 [-2.002, 1, 0., 0.],
                                 [0.3, 0., 1., 0.],
                                 [0., 0., 0., 1.]])
    node_coords = np.arange(3 * 4).reshape(4, 3)

    args = adjacency_matrix, node_coords
    kwargs = dict(edge_threshold=0.38,
                  title='threshold=0.38',
                  node_size=10, node_color=node_color)
    plot_connectome(*args, **kwargs)
    plt.close()

    # used to speed-up tests for the next plots
    kwargs['display_mode'] = 'x'

    # node_coords not an array but a list of tuples
    plot_connectome(adjacency_matrix,
                    [tuple(each) for each in node_coords],
                    **kwargs)
    # saving to file
    filename = str(tmpdir.join('temp.png'))
    display = plot_connectome(*args, output_file=filename, **kwargs)
    assert display is None
    assert os.path.isfile(filename)
    assert os.path.getsize(filename) > 0
    plt.close()

    # with node_kwargs, edge_kwargs and edge_cmap arguments
    plot_connectome(*args,
                    edge_threshold='70%',
                    node_size=[10, 20, 30, 40],
                    node_color=np.zeros((4, 3)),
                    edge_cmap='RdBu',
                    colorbar=True,
                    node_kwargs={
                        'marker': 'v'},
                    edge_kwargs={
                        'linewidth': 4})
    plt.close()

    # masked array support
    masked_adjacency_matrix = np.ma.masked_array(
        adjacency_matrix, np.abs(adjacency_matrix) < 0.5)
    plot_connectome(masked_adjacency_matrix, node_coords,
                    **kwargs)
    plt.close()

    # sparse matrix support
    sparse_adjacency_matrix = sparse.coo_matrix(adjacency_matrix)
    plot_connectome(sparse_adjacency_matrix, node_coords,
                    **kwargs)
    plt.close()

    # NaN matrix support
    node_color = ['green', 'blue', 'k']
    # Overriding 'node_color' for 3  elements of size 3.
    kwargs['node_color'] = node_color
    nan_adjacency_matrix = np.array([[1., np.nan, 0.],
                                     [np.nan, 1., 2.],
                                     [np.nan, 2., 1.]])
    nan_node_coords = np.arange(3 * 3).reshape(3, 3)
    plot_connectome(nan_adjacency_matrix, nan_node_coords, **kwargs)
    plt.close()

    # smoke-test where there is no edge to draw, e.g. when
    # edge_threshold is too high
    plot_connectome(*args, edge_threshold=1e12)
    plt.close()

    # with colorbar=True
    plot_connectome(*args, colorbar=True)
    plt.close()

    # smoke-test with hemispheric saggital cuts
    plot_connectome(*args, display_mode='lzry')
    plt.close()

    # test node_color as a string with display_mode='lzry'
    plot_connectome(*args, node_color='red', display_mode='lzry')
    plt.close()
    plot_connectome(*args, node_color=['red'], display_mode='lzry')
    plt.close()
Пример #46
0
    return b


#iteration 11 times
for i in range(11):

    l = pow(2, i - 5)
    #solve the least square problem.
    ans = np.dot(np.linalg.pinv(create_A(l)), create_B(l))

    #reshape to show image
    im_recon = ans.reshape((row, col))
    #gradient for computing error
    dX, dY = np.gradient(im_recon)
    #use sparse matrix because of matrix overflow
    dX = coo_matrix(dX)
    dY = coo_matrix(dY)

    dX2 = dX.multiply(dX).todense()
    dY2 = dY.multiply(dY).todense()

    #compute error
    error = (im_recon - im_noise)**2 + l * (dX2 + dY2)
    #show the result
    print('lamda: ', l)
    p1 = plt.subplot(2, 2, 1)
    p1.set_title('original image')
    plt.imshow(im, cmap='gray')
    plt.axis('off')

    p2 = plt.subplot(2, 2, 2)
def create_bow(doc_indices, words, n_docs, vocab_size):
    return sparse.coo_matrix(([1] * len(doc_indices), (doc_indices, words)),
                             shape=(n_docs, vocab_size)).tocsr()
Пример #48
0
def pairwise_distance_xy_z(data1,
                           data2,
                           rp_max,
                           pi_max,
                           period=None,
                           verbose=False,
                           num_threads=1,
                           approx_cell1_size=None,
                           approx_cell2_size=None):
    """
    Function returns pairs of points separated by
    a xy-projected distance smaller than or equal to the input ``rp_max`` and z distance ``pi_max``.

    Note that if data1 == data2 that the
    `~halotools.mock_observables.pairwise_distance_xy_z` function double-counts pairs.

    Parameters
    ----------
    data1 : array_like
        N1 by 3 numpy array of 3-dimensional positions.
        Values of each dimension should be between zero and the corresponding dimension
        of the input period.

    data2 : array_like
        N2 by 3 numpy array of 3-dimensional positions.
        Values of each dimension should be between zero and the corresponding dimension
        of the input period.

    rp_max : array_like
        radius of the cylinder to search for neighbors around galaxies in ``data1``.
        If a single float is given, ``rp_max`` is assumed to be the same for each galaxy in
        ``data1``. You may optionally pass in an array of length *Npts1*, in which case
        each point in ``data1`` will have its own individual neighbor-search projected radius.

        Length units assumed to be in Mpc/h, here and throughout Halotools.

    pi_max : array_like
        Half-length of cylinder to search for neighbors around galaxies in ``data1``.
        If a single float is given, ``pi_max`` is assumed to be the same for each galaxy in
        ``data1``. You may optionally pass in an array of length *Npts1*, in which case
        each point in ``data1`` will have its own individual neighbor-search cylinder half-length.

        Length units assumed to be in Mpc/h, here and throughout Halotools.

    period : array_like, optional
        Length-3 array defining the periodic boundary conditions.
        If only one number is specified, the enclosing volume is assumed to
        be a periodic cube (by far the most common case).
        If period is set to None, the default option,
        PBCs are set to infinity.

    verbose : Boolean, optional
        If True, print out information and progress.

    num_threads : int, optional
        Number of CPU cores to use in the pair counting.
        If ``num_threads`` is set to the string 'max', use all available cores.
        Default is 1 thread for a serial calculation that
        does not open a multiprocessing pool.

    approx_cell1_size : array_like, optional
        Length-3 array serving as a guess for the optimal manner by which
        the `~halotools.mock_observables.pair_counters.RectangularDoubleMesh`
        will apportion the ``data`` points into subvolumes of the simulation box.
        The optimum choice unavoidably depends on the specs of your machine.
        Default choice is to use 1/10 of the box size in each dimension,
        which will return reasonable result performance for most use-cases.
        Performance can vary sensitively with this parameter, so it is highly
        recommended that you experiment with this parameter when carrying out
        performance-critical calculations.

    approx_cell2_size : array_like, optional
        See comments for ``approx_cell1_size``.

    Returns
    -------
     distance : `~scipy.sparse.coo_matrix`
        sparse matrix in COO format containing distances
        between the ith entry in ``data1`` and jth in ``data2``.

    Examples
    --------
    For demonstration purposes we create randomly distributed sets of points within a
    periodic unit cube.

    >>> Npts1, Npts2, Lbox = 1000, 1000, 250.
    >>> period = [Lbox, Lbox, Lbox]
    >>> rp_max = 1.0
    >>> pi_max = 2.0

    >>> x1 = np.random.uniform(0, Lbox, Npts1)
    >>> y1 = np.random.uniform(0, Lbox, Npts1)
    >>> z1 = np.random.uniform(0, Lbox, Npts1)
    >>> x2 = np.random.uniform(0, Lbox, Npts2)
    >>> y2 = np.random.uniform(0, Lbox, Npts2)
    >>> z2 = np.random.uniform(0, Lbox, Npts2)

    We transform our *x, y, z* points into the array shape used by the pair-counter by
    taking the transpose of the result of `numpy.vstack`. This boilerplate transformation
    is used throughout the `~halotools.mock_observables` sub-package:

    >>> data1 = np.vstack([x1, y1, z1]).T
    >>> data2 = np.vstack([x2, y2, z2]).T

    >>> perp_dist_matrix, para_dist_matrix = pairwise_distance_xy_z(data1, data2, rp_max, pi_max, period = period)

    """

    # Process the inputs with the helper function
    result = _pairwise_distance_xy_z_process_args(data1, data2, rp_max, pi_max,
                                                  period, verbose, num_threads,
                                                  approx_cell1_size,
                                                  approx_cell2_size)
    x1in, y1in, z1in, x2in, y2in, z2in = result[0:6]
    rp_max, max_rp_max, pi_max, max_pi_max, period, num_threads, PBCs, approx_cell1_size, approx_cell2_size = result[
        6:]
    xperiod, yperiod, zperiod = period

    search_xlength, search_ylength, search_zlength = max_rp_max, max_rp_max, max_pi_max

    # Compute the estimates for the cell sizes
    approx_cell1_size, approx_cell2_size = (_set_approximate_cell_sizes(
        approx_cell1_size, approx_cell2_size, period))
    approx_x1cell_size, approx_y1cell_size, approx_z1cell_size = approx_cell1_size
    approx_x2cell_size, approx_y2cell_size, approx_z2cell_size = approx_cell2_size

    # Build the rectangular mesh
    double_mesh = RectangularDoubleMesh(
        x1in, y1in, z1in, x2in, y2in, z2in, approx_x1cell_size,
        approx_y1cell_size, approx_z1cell_size, approx_x2cell_size,
        approx_y2cell_size, approx_z2cell_size, search_xlength, search_ylength,
        search_zlength, xperiod, yperiod, zperiod, PBCs)

    # Create a function object that has a single argument, for parallelization purposes
    engine = partial(pairwise_distance_xy_z_engine, double_mesh, x1in, y1in,
                     z1in, x2in, y2in, z2in, rp_max, pi_max)

    # Calculate the cell1 indices that will be looped over by the engine
    num_threads, cell1_tuples = _cell1_parallelization_indices(
        double_mesh.mesh1.ncells, num_threads)

    if num_threads > 1:
        pool = multiprocessing.Pool(num_threads)
        result = pool.map(engine, cell1_tuples)
        pool.close()
    else:
        result = [engine(cell1_tuples[0])]

    # unpack result
    d_perp = np.zeros((0, ), dtype='float')
    d_para = np.zeros((0, ), dtype='float')
    i_inds = np.zeros((0, ), dtype='int')
    j_inds = np.zeros((0, ), dtype='int')

    # unpack the results
    for i in range(len(result)):
        d_perp = np.append(d_perp, result[i][0])
        d_para = np.append(d_para, result[i][1])
        i_inds = np.append(i_inds, result[i][2])
        j_inds = np.append(j_inds, result[i][3])

    return (coo_matrix((d_perp, (i_inds, j_inds)),
                       shape=(len(data1), len(data2))),
            coo_matrix((d_para, (i_inds, j_inds)),
                       shape=(len(data1), len(data2))))
Пример #49
0
def convert_labels(y, C=3):
    Y = sparse.coo_matrix((np.ones_like(y), (y, np.arange(len(y)))),
                          shape=(C, len(y))).toarray()
    return Y
Пример #50
0
def test_connectome_strength(tmpdir):
    # symmetric up to 1e-3 relative tolerance
    adjacency_matrix = np.array([[1., -2., 0.3, 0.],
                                 [-2.002, 1, 0., 0.],
                                 [0.3, 0., 1., 0.],
                                 [0., 0., 0., 1.]])
    node_coords = np.arange(3 * 4).reshape(4, 3)

    args = adjacency_matrix, node_coords
    kwargs = dict()
    plot_connectome_strength(*args, **kwargs)
    plt.close()

    # used to speed-up tests for the net plots
    kwargs['display_mode'] = 'x'

    # node_coords not an array but a list of tuples
    plot_connectome_strength(adjacency_matrix,
                             [tuple(each) for each in node_coords],
                             **kwargs)

    # saving to file
    filename = str(tmpdir.join('test.png'))
    display = plot_connectome_strength(
        *args, output_file=filename, **kwargs
    )
    assert display is None
    assert os.path.isfile(filename)
    assert os.path.getsize(filename) > 0
    plt.close()

    # passing node args
    plot_connectome_strength(*args, node_size=10, cmap='RdBu')
    plt.close()
    plot_connectome_strength(*args, node_size=10, cmap=plt.cm.RdBu)
    plt.close()

    # masked array support
    masked_adjacency_matrix = np.ma.masked_array(
        adjacency_matrix, np.abs(adjacency_matrix) < 0.5
    )
    plot_connectome_strength(
        masked_adjacency_matrix, node_coords, **kwargs
    )
    plt.close()

    # sparse matrix support
    sparse_adjacency_matrix = sparse.coo_matrix(adjacency_matrix)
    plot_connectome_strength(
        sparse_adjacency_matrix, node_coords, **kwargs
    )
    plt.close()

    # NaN matrix support
    nan_adjacency_matrix = np.array([[1., np.nan, 0.],
                                     [np.nan, 1., 2.],
                                     [np.nan, 2., 1.]])
    nan_node_coords = np.arange(3 * 3).reshape(3, 3)
    plot_connectome_strength(nan_adjacency_matrix, nan_node_coords, **kwargs)
    plt.close()

    # smoke-test with hemispheric sagital cuts
    plot_connectome_strength(*args, display_mode='lzry')
    plt.close()
Пример #51
0
import matplotlib.pyplot as plt

df = pd.read_csv("data/ml-latest-small/ratings.csv")
df['timestamp'] = pd.to_datetime(df.timestamp, unit='s')

#Initialize Matrix method 1
# matrix = np.zeros((max(df.userId), max(df.movieId)))
# for i in range(df.shape[0]):
#     matrix[df.iloc[i,0]-1, df.iloc[i,1]-1] = df.iloc[i,2]
# #method 2
# mtx = ss.coo_matrix((df.rating, (df.userId, df['movieId'])), shape=(max(df.userId)+1, max(df['movieId'])+1))

#train/validation splinting
trainSet, valSet = sklearn.model_selection.train_test_split(df.iloc[:, :3])
trainSet = ss.coo_matrix(
    (trainSet['rating'], (trainSet['userId'], trainSet['movieId'])),
    shape=(df['userId'].max() + 1, df['movieId'].max() + 1))
valSet = ss.coo_matrix(
    (valSet['rating'], (valSet['userId'], valSet['movieId'])),
    shape=(df['userId'].max() + 1, df['movieId'].max() + 1))


def validation(W, H, valSet):
    error = 0
    for (row, col, data) in zip(valSet.row, valSet.col, valSet.data):
        error += abs(np.dot(W[row], H[:, col]) - data)
    return error


error = np.zeros(10)
trainloss = np.zeros(10)
Пример #52
0
    def _fit_transform(self,
                       graph: Graph,
                       return_dataframe: bool = True,
                       verbose: bool = True) -> EmbeddingResult:
        """Return node embedding."""
        matrix = None
        if self._metric == "Jaccard":
            edges, weights = graph.get_jaccard_coo_matrix()
        elif self._metric == "Laplacian":
            edges, weights = graph.get_laplacian_coo_matrix()
        elif self._metric == "Modularity":
            matrix = graph.get_dense_modularity_matrix()
        elif self._metric == "Left Normalized Laplacian":
            edges, weights = graph.get_left_normalized_laplacian_coo_matrix()
        elif self._metric == "Right Normalized Laplacian":
            edges, weights = graph.get_right_normalized_laplacian_coo_matrix()
        elif self._metric == "Symmetric Normalized Laplacian":
            edges, weights = graph.get_symmetric_normalized_laplacian_coo_matrix(
            )
        elif self._metric == "Neighbours Intersection size":
            edges, weights = graph.get_neighbours_intersection_size_coo_matrix(
            )
        elif self._metric == "Ancestors Jaccard":
            matrix = graph.get_shared_ancestors_jaccard_adjacency_matrix(
                graph.get_breadth_first_search_from_node_names(
                    src_node_name=self._root_node_name,
                    compute_predecessors=True),
                verbose=verbose)
        elif self._metric == "Ancestors size":
            matrix = graph.get_shared_ancestors_size_adjacency_matrix(
                graph.get_breadth_first_search_from_node_names(
                    src_node_name=self._root_node_name,
                    compute_predecessors=True),
                verbose=verbose)
        elif self._metric == "Adamic-Adar":
            edges, weights = graph.get_adamic_adar_coo_matrix()
        elif self._metric == "Adjacency":
            edges, weights = graph.get_directed_edge_node_ids(), np.ones(
                graph.get_number_of_directed_edges())
        else:
            raise NotImplementedError(f"The provided metric {self._metric} "
                                      "is not currently supported.")

        if matrix is None:
            matrix = coo_matrix((weights, (edges[:, 0], edges[:, 1])),
                                shape=(graph.get_number_of_nodes(),
                                       graph.get_number_of_nodes()),
                                dtype=np.float32)

            U, sigmas, Vt = sparse_svds(matrix,
                                        k=int(self._embedding_size / 2))
        else:
            U, sigmas, Vt = randomized_svd(matrix,
                                           n_components=int(
                                               self._embedding_size / 2))

        sigmas = np.diagflat(np.sqrt(sigmas))
        left_embedding = np.dot(U, sigmas)
        right_embedding = np.dot(Vt.T, sigmas)

        if return_dataframe:
            node_names = graph.get_node_names()
            left_embedding = pd.DataFrame(left_embedding, index=node_names)
            right_embedding = pd.DataFrame(right_embedding, index=node_names)
        return EmbeddingResult(
            embedding_method_name=self.model_name(),
            node_embeddings=[left_embedding, right_embedding])
def main(args):
	## Load in args which set parameters for runs
	epochs = args.epochs
	points_to_collect = args.points_to_collect #number of repetitions per d
	lr = args.lr
	model = args.model
	dataset = args.dataset
	opt_alg = args.opt_alg	
	ds_to_explore = [int(d_num) for d_num in args.ds_to_explore]
	nnz = args.nnz
	init_iters = args.init_iters
	block_start = args.block_start # This mainly is so the random seed is different
	use_sparse = args.use_sparse_multiply
	jit_grad = args.jit_grad


	# Hide any GPUs form TensorFlow. Otherwise TF might reserve memory and make
	# it unavailable to JAX.
	tf.config.experimental.set_visible_devices([], "GPU")

	## Logging 
	# Logger specifications
	do_log = True
	do_gitchecks = True
	do_envchecks = True

	log_dir = '../lottery-subspace-data'
	if use_sparse:
		param_str = '%s_%s_init%i_nnz%i' % (model, dataset, init_iters, nnz)
	else:
		param_str = '%s_%s_init%i' % (model, dataset, init_iters)

	logger = logging.getLogger("my logger")
	scriptname = os.path.basename(__file__).rstrip('.py') # Get name of script
	aname, _ = loggingSetup(logger, scriptname, log_dir, do_log=do_log, param_str = param_str)
	result_file = '%s_results' % (aname)  # Outfile name

	# Print current environment and git status to the log
	if do_gitchecks:
		gitstatus(logger)

	if do_envchecks:
		envstatus(logger, use_gpu = True)


	# Start log with experimental parameters
	logger.info('\n ---Code Output---\n')
	logger.info('\n')
	logger.info('[Burn-in Subspace] Random affine subspace at trained parameters: \n')
	logger.info('\n')
	logger.info('Dimensions to Explore: %s \n' % str(ds_to_explore))
	logger.info('Model: %s \n' % (model))
	logger.info('Dataset: %s \n' % (dataset))
	logger.info('Optimization Algorithm: %s with learning rate %.2e \n' % (opt_alg, lr))
	logger.info('Initial Training Iterations: %s Iterations \n' % str(init_iters))
	if use_sparse:
		logger.info('Sparsity: %s nonzero\n' % str(nnz))
	else:
		logger.info('No sparsity restrictions on projection matrix. \n')
	logger.info('Collect %i points for each dimension (Random seed starting at %i). \n' % (points_to_collect, block_start))
	logger.info('Run optimization for %i epochs. \n' % (epochs))
	logger.info('\n')

	## Setup data
	if (dataset == 'MNIST'):
		x_train, full_train_dict, train_ds, test_ds, classes = setupMNIST()
		input_shape = (1, 28, 28, 1)
	elif (dataset == 'fashionMNIST'):
		x_train, full_train_dict, train_ds, test_ds, classes = setupFashionMNIST()
		input_shape = (1, 28, 28, 1)
	elif (dataset == 'SVHN'):
		x_train, full_train_dict, train_ds, test_ds, classes = setupSVHN()
		input_shape = (1, 32, 32, 3)
	elif (dataset == 'cifar10'):
		x_train, full_train_dict, train_ds, test_ds, classes = setupCIFAR10()
		input_shape = (1, 32, 32, 3)
	elif (dataset == 'cifar100'):
		x_train, full_train_dict, train_ds, test_ds, classes = setupCIFAR100()
		input_shape = (1, 32, 32, 3)
	else:
		logging.error('Dataset not recognized \n')

	test_ds_normalized = dict(test_ds)

	## Initialize model
	global net
	if (model == 'TinyCNN'):
		net = SimpleCNN.partial(
			channels = [16,32],
			classes = classes,
			)
	elif (model == 'SmallCNN'):
		net = SimpleCNN.partial(
			channels = [32,64,64],
			classes = classes,
			)
	elif (model == 'MediumCNN'):
		net = SimpleCNN.partial(
			channels = [32,64,64,128],
			classes = classes,
			)
	elif (model == 'ResNet_BNotf'):
		net = KerasResNets.partial(
			num_classes = classes,
			use_batch_norm = True,
		)
	elif (model == 'WideResNet'):
		net = WideResnet.partial(
			blocks_per_group=2,
			channel_multiplier=4,
			num_outputs=100,
			dropout_rate=0.0
		)
	else:
		logger.error('Model type not recognized\n')

	out =	{
		"model": model,
		"dataset": dataset,
		"epochs": epochs,
		"points_to_collect": points_to_collect,
		"ds_to_explore": ds_to_explore,
		"init_iters": init_iters,
		"nnz": nnz,
		"full_d": '',
		"data": {
			"d": [],
			"point_id": [],
			"it": [],
			"abs_theta": [],
			"train_loss": [],
			"train_acc": [],
			"full_train_loss": [],
			"full_train_acc": [],
			"best_train_acc": [],
			"test_loss": [],
			"test_acc": [],
			"best_test_acc": [],
			"nnz": [],
			"avg_grad_time": [],
			"avg_proj_time": [],
			"epoch_times": []
		}
	}

	time_per_run = onp.zeros((len(ds_to_explore), points_to_collect, epochs))

	loss_grad_full = jax.jit(jax.grad(
		lambda model, batch: normal_loss_opt(
			model,batch
		)
	))

	# Loop over runs for each dimension
	for point_id in range(points_to_collect):

		# Initialize the net, block_start allows us to split the runs up into parts
		_, initial_params = net.init_by_shape(jax.random.PRNGKey(point_id+block_start+12574),[(input_shape, jnp.float32)])
		model = flax.nn.Model(net, initial_params)


		if init_iters == 0:
			# This is the intrinsic dimension case
			trained_params = initial_params
		else:
			# This is the burn-in subspace case
			optimizer = flax.optim.Momentum(learning_rate=lr).create(model)
			total_it = -1
			for batch in train_ds:
				total_it = total_it + 1
				if total_it  > init_iters:
					break
				optimizer = optimizer.apply_gradient(loss_grad_full(optimizer.target, batch))

			# This now are parameters that have been trained for the specified number of iterations
			trained_params = optimizer.target.params
		

		# Loop over dimension to explore
		for d_num, d in enumerate(ds_to_explore):

			params_now = trained_params

			D = jnp.sum(jnp.asarray([onp.prod(x.shape) for x in jax.tree_flatten(initial_params)[0]]))
			logger.info('\n'+'-'*95+'\n')
			logger.info("Run Number "+str(point_id)+'\n')
			logger.info("Number of params = "+str(D)+"   subspace d="+str(d)+'\n')

			# Projection plane
			if use_sparse:
				M_unit = generate_projection(d,D,nnz,enforce_no_overlap_if_possible = True)
			else:	
				M_unit = generate_projection(d,D)

			if use_sparse:
				M_unit_transpose_coo = sparse.coo_matrix(M_unit.T)
				M_unit_transpose_sparse = onp.array((M_unit_transpose_coo.row, M_unit_transpose_coo.col, M_unit_transpose_coo.data))

				bytes_string = "M_unit bytes: " + sizeof_fmt(M_unit.nbytes) + "   M_unit_sparse data bytes: "+ sizeof_fmt(M_unit_transpose_coo.data.nbytes) + "   M_unit_sparse total bytes: " + sizeof_fmt(M_unit_transpose_coo.data.nbytes + M_unit_transpose_coo.col.nbytes + M_unit_transpose_coo.row.nbytes)
				logger.info(bytes_string + '\n')
				logger.info('-'*95 + '\n')

			# Important: This now uses the trained parameters
			leaves0,treedef = jax.tree_flatten(params_now)
			vec0,shapes_list = flatten_leaves(leaves0)

			if use_sparse:
				# Gradient function of the loss (with sparse matrix-vector multiplication) 
				loss_grad_wrt_theta = jax.grad(
					lambda theta_now, batch: normal_loss(
						sparse_theta_to_paramstree(theta_now,M_unit_transpose_sparse,vec0,treedef,shapes_list), batch
					)
				)
			else:
				if jit_grad:
					loss_grad_wrt_theta = jax.jit(jax.grad(
						lambda theta_now, batch: normal_loss(
							theta_to_paramstree(theta_now,M_unit,vec0,treedef,shapes_list), batch
						)
					))
				else:
					loss_grad_wrt_theta = jax.grad(
						lambda theta_now, batch: normal_loss(
							theta_to_paramstree(theta_now,M_unit,vec0,treedef,shapes_list), batch
						)
					)


			# Start at the initial params (vec0), not the global origin
			theta = jnp.zeros((1,d)) 

			# Parameters and aux variables for Adam
			beta_1=0.9
			beta_2=0.999
			epsilon=1e-07

			mass = jnp.zeros((1, d))
			velocity = jnp.zeros((1, d))

			# Reset every loop
			total_it = -1
			best_train_acc = 0
			best_test_acc = 0
			
			# Lists to store time for computing grad and projecting theta to full parameter space
			grad_ts = []
			proj_ts = []

			## Train the model
			# Loop over training data
			for batch in train_ds:

				total_it += 1

				if total_it / (len(x_train)/128.0) > epochs:
					break

				e_float = total_it / (len(x_train)/128.0)

				# This is the gradient in the hyperplane space
				grad_t1 = time.time()
				g_theta = loss_grad_wrt_theta(theta,batch)
				grad_t2 = time.time()
				grad_ts.append(grad_t2 - grad_t1)

				# Take a step in the plane
				if (opt_alg == 'Adam'):
					# Approximation of 1st and 2nd moment via exponential averaging
					mass = beta_1 * mass + (1.0 - beta_1) * g_theta
					velocity = beta_2 * velocity + (1.0 - beta_2) * (g_theta**2.0)

					# Bias correction
					hat_mass = mass / (1.0-beta_1)
					hat_velocity = velocity / (1.0-beta_2)

					# Update
					theta = theta - lr / (jnp.sqrt(hat_velocity) + epsilon) * hat_mass
				else:
					theta = theta - lr*g_theta

				# Get updated parameters
				proj_t1 = time.time()
				if use_sparse:
					params_now = sparse_theta_to_paramstree(theta,M_unit_transpose_sparse,vec0,treedef,shapes_list)
				else:
					params_now = theta_to_paramstree(theta,M_unit,vec0,treedef,shapes_list)
				proj_t2 = time.time()
				proj_ts.append(proj_t2 - proj_t1)
	

				# Batch loss and accuracy
				loss_out = normal_loss(params_now,batch)
				accuracy_out = normal_accuracy(params_now,batch)

				# Print train accuracies once in a while
				if total_it % 50 == 0 and total_it != 0:
					logger.info('{:10}{:10}{:15}{:15}{:15}{:15}{:15}'.format(str(round(e_float, 3)),str(total_it),str(onp.linalg.norm(theta)),str(loss_out),str(accuracy_out),'-','-')+'\n')

				# Test and print stats every epoch
				if (total_it % int(len(x_train)/128.0)) in [0]:
				
					# Test verification

					test_loss_out = normal_loss(params_now,test_ds_normalized)
					test_accuracy_out = normal_accuracy(params_now,test_ds_normalized)

					# Full train accuracy
					full_loss_out = normal_loss(params_now,full_train_dict)
					full_accuracy_out = normal_accuracy(params_now,full_train_dict)

					# Check if this is the best accuracy we've seen
					if test_accuracy_out > best_test_acc:
						best_test_acc = test_accuracy_out

					if full_accuracy_out > best_train_acc:
						best_train_acc = full_accuracy_out


					if total_it > 0:
						t2 = time.time()
						time_per_run[d_num, point_id, int(total_it / int(len(x_train)/128.0))-1] = t2 - t1
					t1 = time.time()
					
					logger.info('{:10}{:10}{:15}{:15}{:15}{:15}{:15}'.format('epoch','iter','|theta|', 'train loss', 'train acc', 'test loss', 'test acc')+'\n')
					logger.info('{:10}{:10}{:15}{:15}{:15}{:15}{:15}'.format(str(round(e_float, 3)),str(total_it),str(onp.linalg.norm(theta)),str(full_loss_out),str(full_accuracy_out),str(test_loss_out),str(test_accuracy_out))+'\n')


			avg_grad_time = onp.mean(grad_ts)
			avg_proj_time = onp.mean(proj_ts)

			logger.info('\nTotal time:                     ' + str(sum(time_per_run[d_num, point_id])) +'\n')
			logger.info('Avg time to compute gradient:   ' + str(avg_grad_time)+'\n')
			logger.info('Avg time to project theta:      ' + str(avg_proj_time)+'\n')

			# Data out
			out["full_d"] = D
			out["data"]["d"].append(d)
			out["data"]["point_id"].append(point_id)
			out["data"]["it"].append(str(total_it))
			out["data"]["abs_theta"].append(str(onp.linalg.norm(theta)))
			out["data"]["train_loss"].append(str(loss_out))
			out["data"]["train_acc"].append(str(accuracy_out))
			out["data"]["full_train_loss"].append(str(full_loss_out))
			out["data"]["full_train_acc"].append(str(full_accuracy_out))
			out["data"]["best_train_acc"].append(str(best_train_acc))
			out["data"]["test_loss"].append(str(test_loss_out))
			out["data"]["test_acc"].append(str(test_accuracy_out))
			out["data"]["best_test_acc"].append(str(best_test_acc))
			out["data"]["nnz"].append(nnz)
			out["data"]["avg_grad_time"].append(avg_grad_time)
			out["data"]["avg_proj_time"].append(avg_proj_time)
			out["data"]["epoch_times"].append(time_per_run[d_num, point_id])


		# Write data to file every new dimension
		save_obj(out, result_file) 
Пример #54
0
def build_matrix(pxl_inds, ints, nrows, ncols):
   return coo_matrix((ints, (pxl_inds / ncols, pxl_inds % ncols)), shape=(nrows, ncols))
        if class_prior is not None:
            assert_true(class_prior.shape[0] == len(np.unique(y)),
                        'MockClassifier extra fit_param class_prior.shape[0]'
                        ' is {0}, should be {1}'.format(class_prior.shape[0],
                                                        len(np.unique(y))))
        return self

    def predict(self, T):
        return T.shape[0]

    def score(self, X=None, Y=None):
        return 1. / (1 + np.abs(self.a))


X = np.ones((10, 2))
X_sparse = coo_matrix(X)
y = np.arange(10) // 2

##############################################################################
# Tests


def check_valid_split(train, test, n_samples=None):
    # Use python sets to get more informative assertion failure messages
    train, test = set(train), set(test)

    # Train and test split should not overlap
    assert_equal(train.intersection(test), set())

    if n_samples is not None:
        # Check that the union of train an test split cover all the indices
Пример #56
0
# In[7]:

user_item_train, user_item_test, rating_train, rating_test = train_test_split(
    user_item.T, rating, test_size=2775344, random_state=42)

# 27753444

nnz_train = 24978100
nnz_test = 2775344

# In[8]:

#for test data, we need COO format to calculate test RMSE
#1-based to 0-based
R_test_coo = coo_matrix(
    (rating_test, (user_item_test[:, 0] - 1, user_item_test[:, 1] - 1)))
#scipy does not guarantee coo row-major layout expected by cuSPARSE
R_test_coo = R_test_coo.tocsr().tocoo()
assert R_test_coo.nnz == nnz_test
R_test_coo.data.astype(np.float32).tofile('R_test_coo.data.bin')
R_test_coo.row.tofile('R_test_coo.row.bin')
R_test_coo.col.tofile('R_test_coo.col.bin')

# In[9]:

print("max(R_test_coo.data)")
print(np.max(R_test_coo.data))
print("max(R_test_coo.row)")
print(np.max(R_test_coo.row))
print("max(R_test_coo.col)")
print(np.max(R_test_coo.col))
Пример #57
0
             - Yshunts[pq_] * U[0, pq_] \
             + (vec_P[pq_] - vec_Q[pq_] * 1j) * X[0, pq_] \
             - prod2[pq_] \
             - np.sum(Ytapslack[pq_, :], axis=1)

valor[pv_] = - prod[pv_] \
             + np.sum(Yslack[pv_, :], axis=1) \
             - Yshunts[pv_] * U[0, pv_] \
             + vec_P[pv_] * X[0, pv_] \
             - prod2[pv_] \
             - np.sum(Ytapslack[pv_, :], axis=1)

RHS = np.r_[valor.real, valor.imag,
            W[pv_] - 1]  # amb l'equació del mòdul dels PV

VRE = coo_matrix((2 * U_re[0, pv_], (np.arange(npv), pv_)),
                 shape=(npv, npqpv)).tocsc()  # matriu dispersa COO a compr.
VIM = coo_matrix((2 * U_im[0, pv_], (np.arange(npv), pv_)),
                 shape=(npv, npqpv)).tocsc()
XIM = coo_matrix((-X_im[0, pv_], (pv_, np.arange(npv))),
                 shape=(npqpv, npv)).tocsc()
XRE = coo_matrix((X_re[0, pv_], (pv_, np.arange(npv))),
                 shape=(npqpv, npv)).tocsc()
EMPTY = csc_matrix((npv, npv))  # matriu dispera comprimida

MATx = vstack((hstack((G, -B, XIM)), hstack(
    (B, G, XRE)), hstack((VRE, VIM, EMPTY))),
              format='csc')

MAT_LU = factorized(
    MATx.tocsc())  # matriu factoritzada (només cal fer-ho una vegada)
LHS = MAT_LU(RHS)  # obtenir vector d'incògnites
if not os.path.isdir(process_data_dir):
    os.mkdir(process_data_dir)
adj_file = pjoin(process_data_dir, 'adj_{}.npz'.format(args.least_ratio))
node_pos_file = pjoin(process_data_dir,
                      'node_pos_{}.npy'.format(args.least_ratio))

W = sp.load_npz(adj_file).toarray()
node_pos = np.load(node_pos_file)
# num of nodes
n = node_pos.shape[0]
args.n_route = n
# Calculate graph kernel
L = scaled_laplacian(W)
# Alternative approximation method: 1st approx - first_approx(W, n).
Lk = cheb_poly_approx(L, Ks, n)
Lk_sp = sp.coo_matrix(Lk)

# Lk_spt = tf.SparseTensorValue(
#     indices=np.array([Lk_sp.row, Lk_sp.col], np.int64).T,
#     values=Lk_sp.data,
#     dense_shape=Lk_sp.shape)

tf.add_to_collection(name='graph_kernel_indices',
                     value=tf.cast(
                         tf.constant(np.array([Lk_sp.row, Lk_sp.col]).T),
                         tf.int64))
tf.add_to_collection(name='graph_kernel_value',
                     value=tf.cast(tf.constant(Lk_sp.data), tf.float32))
tf.add_to_collection(name='graph_kernel_shape',
                     value=tf.cast(tf.constant(Lk_sp.shape), tf.int64))
Пример #59
0
    def create(self, system, positions=None):
        """Return the SOAP output for the given system and given positions.

        Args:
            system (:class:`ase.Atoms` | :class:`.System`): Input system.
            positions (list): Cartesian positions or atomic indices. If
                specified, the SOAP spectrum will be created for these points.
                If no positions are defined, the SOAP output will be created
                for all atoms in the system.

        Returns:
            np.ndarray | scipy.sparse.coo_matrix: The SOAP output for the
            given system and positions. The return type depends on the
            'sparse'-attribute. The first dimension is given by the number of
            positions and the second dimension is determined by the
            get_number_of_features()-function.
        """
        # Transform the input system into the internal System-object
        system = self.get_system(system)

        # Check that the system does not have elements that are not in the list
        # of atomic numbers
        zs = set(system.get_atomic_numbers())
        if not zs.issubset(self._atomic_number_set):
            raise ValueError(
                "The given system has the following atomic numbers not defined "
                "in the SOAP constructor: {}".format(
                    zs.difference(self._atomic_number_set)))

        sub_elements = np.array(list(set(system.get_atomic_numbers())))

        # Check if periodic is valid
        if self._periodic:
            cell = system.get_cell()
            if np.cross(cell[0], cell[1]).dot(cell[2]) == 0:
                raise ValueError(
                    "System doesn't have cell to justify periodicity.")

        # Positions specified, use them
        if positions is not None:

            # Check validity of position definitions and create final cartesian
            # position list
            list_positions = []
            if len(positions) == 0:
                raise ValueError(
                    "The argument 'positions' should contain a non-empty set of"
                    " atomic indices or cartesian coordinates")
            for i in positions:
                if np.issubdtype(type(i), np.integer):
                    list_positions.append(system.get_positions()[i])
                elif isinstance(i, list) or isinstance(i, tuple):
                    list_positions.append(i)
                else:
                    raise ValueError(
                        "Create method requires the argument 'positions', a "
                        "list of atom indices and/or positions")

            # Determine the SOAPLite function to call based on periodicity and
            # rbf
            if self._rbf == "gto":
                if self._periodic:
                    soap_func = soaplite.get_periodic_soap_locals
                else:
                    soap_func = soaplite.get_soap_locals
                soap_mat = soap_func(system,
                                     list_positions,
                                     self._alphas,
                                     self._betas,
                                     rCut=self._rcut,
                                     nMax=self._nmax,
                                     Lmax=self._lmax,
                                     crossOver=self._crossover,
                                     all_atomtypes=sub_elements.tolist(),
                                     eta=self._eta)
            elif self._rbf == "polynomial":
                if self._periodic:
                    soap_func = soaplite.get_periodic_soap_locals_poly
                else:
                    soap_func = soaplite.get_soap_locals_poly
                soap_mat = soap_func(system,
                                     list_positions,
                                     rCut=self._rcut,
                                     nMax=self._nmax,
                                     Lmax=self._lmax,
                                     all_atomtypes=sub_elements.tolist(),
                                     eta=self._eta)

        # No positions given, calculate SOAP for all atoms in the structure
        else:
            # Determine the SOAPLite function to call based on periodicity and
            # rbf
            if self._rbf == "gto":
                if self._periodic:
                    soap_func = soaplite.get_periodic_soap_structure
                else:
                    soap_func = soaplite.get_soap_structure
                soap_mat = soap_func(system,
                                     self._alphas,
                                     self._betas,
                                     rCut=self._rcut,
                                     nMax=self._nmax,
                                     Lmax=self._lmax,
                                     crossOver=self._crossover,
                                     all_atomtypes=sub_elements.tolist(),
                                     eta=self._eta)
            elif self._rbf == "polynomial":
                if self._periodic:
                    soap_func = soaplite.get_periodic_soap_structure_poly
                else:
                    soap_func = soaplite.get_soap_structure_poly
                soap_mat = soap_func(system,
                                     rCut=self._rcut,
                                     nMax=self._nmax,
                                     Lmax=self._lmax,
                                     all_atomtypes=sub_elements.tolist(),
                                     eta=self._eta)

        # Map the output from subspace of elements to the full space of
        # elements
        soap_mat = self.get_full_space_output(soap_mat, sub_elements,
                                              self._atomic_numbers)

        # Create the averaged SOAP output if requested.
        if self._average:
            soap_mat = soap_mat.mean(axis=0)
            soap_mat = np.expand_dims(soap_mat, 0)

        # Make into a sparse array if requested
        if self._sparse:
            soap_mat = coo_matrix(soap_mat)

        return soap_mat
Пример #60
0
    def __init__(self, parent, value, seg_ids=None):
        '''
        Initializes and sets the correct data.
        '''
        # We've classed this so that we can override some of the normal functions and allow indexing via seg_id
        self.__dict__ = {}
        # Is this function thread safe?
        iter_group = parent.data_reader.get_iter_group(value)
        # iter_group = parent.west['iterations/iter_{num:08d}'.format(num=value)]
        self.parent = parent
        current = {}
        current['iteration'] = value
        if seg_ids is None:
            seg_ids = range(0, iter_group['seg_index']['weight'].shape[0])
        # Just make these easier to access.
        current['weights'] = iter_group['seg_index']['weight'][seg_ids]
        current['pcoord'] = iter_group['pcoord'][...][seg_ids, :, :]
        try:
            current['auxdata'] = {}
            for key in list(iter_group['auxdata'].keys()):
                current['auxdata'][key] = iter_group['auxdata'][key][...][
                    seg_ids, :]
        except Exception:
            pass
        current['parents'] = iter_group['seg_index']['parent_id'][seg_ids]
        current['summary'] = parent.data_reader.data_manager.get_iter_summary(
            int(value))
        current['seg_id'] = np.array(
            list(range(0, iter_group['seg_index'].shape[0])))[seg_ids]
        current['walkers'] = current['summary']['n_particles']
        current['states'] = parent.assign['trajlabels'][
            value - 1, :current['walkers'], :][seg_ids]
        current['bins'] = parent.assign['assignments'][
            value - 1, :current['walkers'], :][seg_ids]
        # Calculates the bin population for this iteration.
        nbins = parent.assign['state_map'].shape[0]
        # We have to take the 'unknown' state into account
        # nstates = parent.assign['state_labels'].shape[0] + 1
        # Temporarily disabled while I sort out the fact that we shouldn't be using data from w_assign for state populations.
        # current['plot'] = Plotter(parent.direct, parent.reweight, parent.iteration, parent.assign['bin_labels'], parent.assign['state_labels'], current['populations'].states, current['populations'].bins, parent.interface)
        # Now we'll load up the results of the kinetics analysis.
        current['direct'] = KineticsIteration(parent.direct, value,
                                              parent.assign, value)
        evolution_datasets = [
            'rate_evolution',
            'conditional_flux_evolution',
            'state_pop_evolution',
            'color_prob_evolution',
            'total_fluxes',
            'target_flux_evolution',
        ]
        # We want to load these up as... oh, who knows, I suppose?
        try:
            current['reweight'] = KineticsIteration(parent.reweight, value,
                                                    parent.assign, value)
            # We'll make this not a sparse matrix...
            matrix = parent.reweight['iterations/iter_{:08d}'.format(value)]
            # Assume color.
            current['instant_matrix'] = sp.coo_matrix(
                (matrix['flux'][...],
                 (matrix['rows'][...], matrix['cols'][...])),
                shape=((nbins - 1) * 2, (nbins - 1) * 2)).todense()
            reweighting = True
        except Exception:
            # This analysis hasn't been enabled, so we'll simply return the default error message.
            current['reweight'] = parent.reweight['rate_evolution']
            current['instant_matrix'] = parent.reweight['bin_populations']
            current['matrix'] = parent.reweight['bin_populations']
            reweighting = False
        # Check if the analysis has been enabled.  If yes, make them specify dataset dictionaries.  If not, return the thing.
        if reweighting:
            for key in evolution_datasets:
                current[key] = WIPIDataset(raw={
                    'direct': current['direct'][key],
                    'reweight': current['reweight'][key]
                },
                                           key='a')
        else:
            for key in evolution_datasets:
                current[key] = WIPIDataset(
                    raw={'direct': current['direct'][key]}, key='direct')

        self.raw = current