Пример #1
0
 def testProblemStiffness2(self):
     width = 4
     height = 4
     [nodes, boundary_nodes, tris] = generateRectangularMesh((width, height), (0,0), (1,1))
     p = Problem(nodes, boundary_nodes, tris)
     K = p.getStiffnessMatrix(lambda x,y: x*y)
     self.assertTrue((sparse.triu(K, 1).T.toarray() == sparse.tril(K,-1).toarray()).all())
     width = 5
     height = 5
     [nodes, boundary_nodes, tris] = generateRectangularMesh((width, height), (0,0), (1,1))
     p = Problem(nodes, boundary_nodes, tris)
     K = p.getStiffnessMatrix(lambda x,y: x*y)
     self.assertTrue((sparse.triu(K, 1).T.toarray() == sparse.tril(K,-1).toarray()).all())
Пример #2
0
def r_perturbR(g,R):
    '''可变参数的随机扰动方法'''
    A=nx.to_scipy_sparse_matrix(g)
    B=sparse.triu(A).toarray()
    #print B
    n=len(g)
    i = 0
    ts=0

    while i<n:
        j=i+1
        while j<n:
            if(B[i,j]==1):
                if R[i,j]<1:
                    B[i,j] = stats.bernoulli.rvs(R[i,j])#参数p伯努利实验成功的概率
                else:
                    B[i, j] = stats.bernoulli.rvs(1)  #其实可以去掉
                ts=ts + 1
                #print "+",ts, ":", i, ",", j, ",", B[i, j]
            else:
                if R[i,j]<1:
                    B[i,j] = stats.bernoulli.rvs(R[i,j])#参数q伯努利实验成功的概率
                else:
                    B[i, j] = stats.bernoulli.rvs(0)  #其实可以去掉
                ts=ts + 1
                #print "-",ts, ":", i, ",", j, ",", B[i, j]
            j = j + 1
        i=i+1

    return nx.from_numpy_matrix(B,create_using=nx.Graph())#重新构建了Graph类型的返回对象
Пример #3
0
def r_perturbSa(g,p=None):
    '''固定参数的随机扰动方法,p伯努利实验成功的概率'''
    A=nx.to_scipy_sparse_matrix(g)
    B=sparse.triu(A).toarray()
    #print B
    n=len(g)
    e_num=len(g.edges())#图中存在的边数

    q = e_num * (1 - p) / ((n * (n - 1)) / 2 - e_num)
    #print q
    i = 0
    ts=0
    listp=stats.bernoulli.rvs(p,size=e_num)
    listp=listp.tolist()
    listq=stats.bernoulli.rvs(q,size=(n * (n - 1)) / 2 - e_num)
    listq=listq.tolist()

    while i<n:
        j=i+1#略过对角线上的0
        while j<n:
            if(B[i,j]==1):
                B[i,j] = listp.pop()#参数p伯努利实验成功的概率
                #ts=ts + 1
                # print "+",ts, ":", i, ",", j, ",", B[i, j]
            else:
                B[i,j] = listq.pop()#参数q伯努利实验成功的概率
                #ts=ts + 1
                # print "-",ts, ":", i, ",", j, ",", B[i, j]
            j = j + 1
        i=i+1

    return nx.from_numpy_matrix(B,create_using=nx.Graph())#重新构建了Graph类型的返回对象
def rewire(Adj, p):
    """
    Rewiring takes an existing UNDIRECTED network with Adjacency matrix given by Adj and returns a matrix with the same number of
    bonds but with a scrambled connectivity.  The nodes are iterated through in order.  At each node n_i, all bonds (n_i, n_j)
    with j > i are rewired with probability p.  In rewiring, the bond to n_j is connected to a new node n_k with k selected
    uniformly from the nodes not currently connected to i.
    """
    
    # first pull the existing bonds in the network
    rows, cols = sparse.triu(Adj, k=1).nonzero()
    
    A = Adj.tolil()               # LIL matrices are cheaper to rewire

    # rewire each bond with probability p
    for i, j in zip(rows, cols):
        if np.random.rand() < p:
            # pull list of candidate nodes to be reconnected to
            A[i, i] = 1    # as a placeholder for the moment
            temp, disconnected_nodes = (A[i, :] == 0).nonzero()
            # Draw the new node
            new_node = np.random.choice(disconnected_nodes)
            A[i, i] = 0                   # remove self-link
            A[i, j] = 0                   # remove old link
            A[j, i] = 0
            A[i, new_node] = 1            # replace with new link
            A[new_node, i] = 1
    
    return A.tocsr()
Пример #5
0
def graphml2mat(ingraph, outgraph, prune=False):
	ing = Graph.Read_GraphML(ingraph)
	
	if sum(ing.es()[:]['weight']) < 500000:
		print 'bad graph? ecount= ' , sum(ing.es()[:]['weight'])
		print 'filename= ', ingraph
		return;

	#currently being done in graphgen so don't need to delete vertex 0
	#ing.vs[0].delete() 
	if prune:
		#delete zero degree nodes
		#GK TODO: be smarter
		i = list()
		for n, v in enumerate(ing.vs):
			if v.degree() == 0:
				i.append(n)
		ing.vs[i].delete()
	
	outg = lil_matrix((ing.vcount(), ing.vcount()))
	#import pdb; pdb.set_trace()
	for e in ing.es:
		outg[e.source, e.target] = e['weight']
		outg[e.target, e.source] = e['weight'] #since edges are undirected add both ways

	outg = triu(outg)
	mat_dict = {"graph": outg}
	savemat(outgraph, mat_dict)
Пример #6
0
def sor(A, b, x0=None, w=1., maxiter=200, tol=1E-6, direction='forward'):
    '''
    SOR iteration has M = L + D/w, N = (1/w-1)*D - U for forward
    and M = U + D/w, N = (1/w-1)*D - L for bacward.
    '''
    L, D, U = tril(A, k=-1), diags(A.diagonal(), 0), triu(A, k=1)
    if direction == 'forward':
        M = L + D/w
        N = (1/w - 1)*D - U
    else:
        M = U + D/w
        N = (1/w - 1)*D - L

    # Start from 0 initial guess
    if x0 is None: x0 = np.zeros(A.shape[1])

    r = b - A.dot(x0)
    residuals = [np.linalg.norm(r)]

    count = 0
    while residuals[-1] > tol and count < maxiter:
        # Update
        x0 = spsolve(M, N.dot(x0) + b)
        # Error 
        r = b - A.dot(x0)
        residuals.append(np.linalg.norm(r))
        # Count
        count += 1
    
    converged = residuals[-1] < tol
    n_iters = len(residuals) - 1
    data = {'status': converged, 'iter count': n_iters, 'residuals': residuals}

    return x0, data
Пример #7
0
def ssor(A, b, x0=None, w=1., maxiter=200, tol=1E-6):
    '''For symmetric matrices combine forward and backward SOR.'''
    assert is_symmetric(A, tol=1E-6)

    L, D, U = tril(A, k=-1), diags(A.diagonal(), 0), triu(A, k=1)
    # Forward
    MF = L + D/w
    NF = (1/w - 1)*D - U
    # Backward
    MB = U + D/w
    NB = (1/w - 1)*D - L

    # Start from 0 initial guess
    if x0 is None: x0 = np.zeros(A.shape[1])

    r = b - A.dot(x0)
    residuals = [np.linalg.norm(r)]

    count = 0
    while residuals[-1] > tol and count < maxiter:
        # Update
        x0 = spsolve(MF, NF.dot(x0) + b)
        x0 = spsolve(MB, NB.dot(x0) + b)
        # Error 
        r = b - A.dot(x0)
        residuals.append(np.linalg.norm(r))
        # Count
        count += 1
    
    converged = residuals[-1] < tol
    n_iters = len(residuals) - 1
    data = {'status': converged, 'iter count': n_iters, 'residuals': residuals}

    return x0, data
 def __init__(self, G, external_voltages, I_threshold, G_OFF=1, G_ON=100):
     rnets.ResistorNetwork.__init__(self, G, external_voltages)
     self.I_threshold = I_threshold
     self.G_OFF = G_OFF
     self.G_ON = G_ON
     self.rows_G, self.cols_G = sparse.triu(self.G).nonzero()
     self.currents = None
Пример #9
0
def path_lengthsSPARSE(G):
    """Compute array of all shortest path lengths for the given graph.

    XXX - implementation using scipy.sparse.  This might be faster for very
    sparse graphs, but so far for our cases the overhead of handling the sparse
    matrices doesn't seem to be worth it.  We're leaving it in for now, in case
    we revisit this later and it proves useful.

    The length of the output array is the number of unique pairs of nodes that
    have a connecting path, so in general it is not known in advance.

    This assumes the graph is undirected, as for any pair of reachable nodes,
    once we've seen the pair we do not keep the path length value for the
    inverse path.
    
    Parameters
    ----------
    G : an undirected graph object.
    """

    assert_no_selfloops(G)
    
    length = nx.all_pairs_shortest_path_length(G)

    nnod = G.number_of_nodes()
    paths_mat = sparse.dok_matrix((nnod,nnod))
    
    for src,targets in length.iteritems():
        for targ,val in targets.items():
            paths_mat[src,targ] = val

    return sparse.triu(paths_mat,1).data
Пример #10
0
    def getStiffnessMatrix(self, k_function):
        K = sparse.lil_matrix((len(self.free_nodes), len(self.free_nodes)))
        for t in self.triangles:
            coords = t.getCornerCoords()
            basis_gradient = self._calcBasisGradient(coords)
#            print basis_gradient
            triarea = t.getArea()
        
            centroid = t.getCentroid()
            intensity = triarea * k_function(centroid[0], centroid[1])
#            print intensity
            for i in range(0,3):
                if t.nodes[i].isBoundary():
                    continue
                for j in range(0,i+1):
                    if t.nodes[j].isBoundary():
                        continue
                    idx1 = t.nodes[i].free_node_index
                    idx2 = t.nodes[j].free_node_index
                    if idx2 < idx1:
                        temp = idx1
                        idx1 = idx2
                        idx2 = temp
                    K[idx1,idx2] += basis_gradient[i,j] * intensity
        K = K + sparse.triu(K,1).T
        return K.tocsr()
Пример #11
0
 def avg_edge_length(self):
     """Average length of all edges in the surface.
     """
     adj = self.adj
     tadj = sparse.triu(adj, 1) # only entries above main diagonal, in coo format
     edgelens = np.sqrt(((self.pts[tadj.row] - self.pts[tadj.col])**2).sum(1))
     return edgelens.mean()
Пример #12
0
def load_pdata(dataset_str):
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in xrange(len(names)):
        objects.append(pkl.load(open("./data/ind.{}.{}".format(dataset_str, names[i]))))
    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file("./data/ind.{}.test.index".format(dataset_str))
    test_idx_range = np.sort(test_idx_reorder)
    if dataset_str == 'citeseer':
        test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range-min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range-min(test_idx_range), :] = ty
        ty = ty_extended
    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]

    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]
    
    idx_test = test_idx_range.tolist()
    idx_train = range(len(y))

    train_mask = sample_mask(idx_train, labels.shape[0])
    test_mask = sample_mask(idx_test, labels.shape[0])

    y_train = np.zeros(labels.shape)
    y_test = np.zeros(labels.shape)
    y_train[train_mask, :] = labels[train_mask, :]
    y_test[test_mask, :] = labels[test_mask, :]

    train_out = []
    for i in idx_train:
        ll = y_train[i].tolist()
        ll = ll.index(1) + 1
        train_out.append([i, ll])
    train_out = np.array(train_out)
    np.random.shuffle(train_out)

    test_out = []
    for i in idx_test:
        ll = y_test[i].tolist()
        ll = ll.index(1) + 1
        test_out.append([i, ll])
    test_out = np.array(test_out)
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
    adj = adj - sp.dia_matrix((adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape)
    adj.eliminate_zeros()
    # Check that diag is zero:
    assert np.diag(adj.todense()).sum() == 0

    adj_triu = sp.triu(adj)
    adj_tuple = sparse_to_tuple(adj_triu)
    edges = adj_tuple[0]
    edges_all = sparse_to_tuple(adj)[0]
    num_mask = int(np.floor(edges.shape[0] / 10.))

    return graph, features, train_out, test_out
Пример #13
0
  def circle_tear(self, spanning_tree='mst', cycle_len_thresh=5, spt_idx=None,
                  copy=True):
    '''Circular graph tearing.

    spanning_tree: one of {'mst', 'spt'}
    cycle_len_thresh: int, length of longest allowable cycle
    spt_idx: int, start vertex for shortest_path_subtree, random if None

    From "How to project 'circular' manifolds using geodesic distances?"
      by Lee & Verleysen, ESANN 2004.

    See also: shortest_path_subtree, minimum_spanning_subtree
    '''
    # make the initial spanning tree graph
    if spanning_tree == 'mst':
      tree = self.minimum_spanning_subtree().matrix()
    elif spanning_tree == 'spt':
      if spt_idx is None:
        spt_idx = np.random.choice(self.num_vertices())
      tree = self.shortest_path_subtree(spt_idx, directed=False).matrix()

    # find edges in self but not in the tree
    potential_edges = np.argwhere(ss.triu(self.matrix() - tree))

    # remove edges that induce large cycles
    ii, jj = _find_cycle_inducers(tree, potential_edges, cycle_len_thresh)
    return self.remove_edges(ii, jj, symmetric=True, copy=copy)
Пример #14
0
 def test_directLower_1_python(self):
     from pymatsolver import _BackwardSolver
     AUinv = _BackwardSolver(sp.triu(self.A))
     X = AUinv * self.rhsU
     x = AUinv * self.rhsU[:,0]
     self.assertLess(np.linalg.norm(self.sol-X,np.inf), TOL)
     self.assertLess(np.linalg.norm(self.sol[:,0]-x,np.inf), TOL)
Пример #15
0
 def setUp(self):
     n = 50
     nrhs = 20
     self.A = sp.rand(n, n, 0.4) + sp.identity(n)
     self.sol = np.ones((n, nrhs))
     self.rhsU = sp.triu(self.A) * self.sol
     self.rhsL = sp.tril(self.A) * self.sol
Пример #16
0
def sparse_power_iteration(P, x, tol=10e-16, maxiter=200):
    """Preconditioned power iteration for a sparse stochastic matrix

    Parameters
    ---------------
    P : array, shape (n, n), sparse
        transition matrix of a Markov Chain
    x : array, shape (n, )
        On entry, the initial guess. On exit, the final solution.

    """
    t = 0
    eps = tol + 1
    n = P.shape[0]
    # ILU factorization 
    LU = ilu0_factor(P)
    L = sparse.tril(LU)
    U = sparse.triu(LU)
    # New matrix Q
    Q = P.copy()
    Q.setdiag(1 - Q.diagonal())
    Q *= -1
    Q = Q.T
    info = -1
    t = -1
    for t in range(maxiter):
        ## dot() is matrix multiplication
        dx = spla.spsolve(U, spla.spsolve(L, Q.matvec(x)))
        x -= dx
        relres = tvnorm(dx)
        if relres < tol:
            info = 0
            break
    t += 1
    return (info, t, relres)
Пример #17
0
 def prepare_preferential_attachment(self):
     self.repeated_nodes = hstack(
         sparse.triu(self.matrix, format='coo').nonzero())
     self.repeated_nodes = append(self.repeated_nodes,
                                  fromiter(iter(self.nodes),
                                           dtype=np.int32),)
     self._initialized_preferential_attachment = True
Пример #18
0
def view_laplacian_off_terms(non_normalized_Laplacian):
    normalized_Laplacian = Lapl_normalize(non_normalized_Laplacian)
    triag_u = lil_matrix(triu(normalized_Laplacian))
    triag_u.setdiag(0)
    pre_arr = -triag_u[triag_u.nonzero()].toarray().flatten()
    arr = np.log10(pre_arr)
    plt.hist(arr, bins=100, log=True, histtype='step')
    plt.show()
def train_test_split(adjacency):

    n_nodes = adjacency.shape[0]
    coo_adjacency = sp.coo_matrix(adjacency)
    coo_adjacency_upper = sp.triu(coo_adjacency, k=1)
    sp_adjacency = dense_to_sparse(coo_adjacency_upper)
    edges = sp_adjacency[0]
    num_test = int(np.floor(edges.shape[0]/10.))
    num_val = int(np.floor(edges.shape[0]/10.))

    idx_all = list(range(edges.shape[0]))
    np.random.shuffle(idx_all)
    idx_test = idx_all[:num_test]
    idx_val = idx_all[num_test:(num_val + num_test)]

    test_edges_pos = edges[idx_test]
    val_edges_pos = edges[idx_val]
    train_edges = np.delete(edges, np.hstack([idx_test, idx_val]), axis=0)
    
    test_edges_neg = []
    val_edges_neg = []
    edge_to_add = [0, 0]
    
    while (len(test_edges_neg) < len(test_edges_pos)):
        n1 = np.random.randint(0, n_nodes)
        n2 = np.random.randint(0, n_nodes)
        if n1 == n2:
            continue        
        if n1 < n2:
            edge_to_add = [n1, n2]
        else:
            edge_to_add = [n2, n1]        
        if any((edges[:]==edge_to_add).all(1)):
            continue
        test_edges_neg.append(edge_to_add)
        
    while (len(val_edges_neg) < len(val_edges_pos)):
        n1 = np.random.randint(0, n_nodes)
        n2 = np.random.randint(0, n_nodes)
        if n1 == n2:
            continue        
        if n1 < n2:
            edge_to_add = [n1, n2]
        else:
            edge_to_add = [n2, n1]        
        if any((edges[:] == edge_to_add).all(1)):
            continue
        val_edges_neg.append(edge_to_add)
    row = []
    col = []
    data = []
    for edge in train_edges:
        row.extend([edge[0], edge[1]])
        col.extend([edge[1], edge[0]])
        data.extend([1, 1])
    train_adjacency = sp.coo_matrix((data, (row,col)), shape=(n_nodes,n_nodes))

    return train_adjacency, test_edges_pos, test_edges_neg, val_edges_pos, val_edges_neg
Пример #20
0
    def  _generator(self, byres, chromsizes, bin_cumnums):

        for i in range(chromsizes.size):
            for j in range(i, chromsizes.size):
                c1, c2 = chromsizes.index[i], chromsizes.index[j]
                if self.onlyIntra:
                    if c1!=c2:
                        continue
                if (c1,c2) in byres:
                    ci, cj = i, j
                else:
                    if (c2,c1) in byres:
                        c1, c2 = c2, c1
                        ci, cj = j, i
                    else:
                        continue
                
                if type(byres[(c1,c2)])==str:
                    data = np.loadtxt(byres[(c1,c2)], dtype=self._intertype)
                else:
                    # Make it compatible with TADLib and old version of runHiC
                    if c1!=c2:
                        data = byres[(c1,c2)][(c1,c2)]
                    else:
                        if c1 in byres[(c1,c2)].files:
                            data = byres[(c1,c2)][c1]
                        else:
                            data = byres[(c1,c2)][(c1,c2)]

                x, y = data['bin1'], data['bin2']
                # Fast guarantee triu matrix
                if ci > cj:
                    x, y = y, x
                    ci, cj = cj, ci
                
                xLen = x.max() + 1
                yLen = y.max() + 1
                if ci != cj:
                    tmp = sparse.csr_matrix((data['IF'], (x,y)), shape=(xLen, yLen))
                else:
                    Len = max(xLen, yLen)
                    tmp = sparse.csr_matrix((data['IF'], (x,y)), shape=(Len, Len))
                    tmp = sparse.lil_matrix(tmp)
                    tmp[y,x] = tmp[x,y]
                    tmp = sparse.triu(tmp)
                
                x, y = tmp.nonzero()
                if ci > 0:
                    x = x + bin_cumnums[ci-1]
                if cj > 0:
                    y = y + bin_cumnums[cj-1]
                
                data = tmp.data

                current = pd.DataFrame({'bin1_id':x, 'bin2_id':y, 'count':data},
                                       columns=['bin1_id', 'bin2_id', 'count'])

                yield current
Пример #21
0
 def edge_weights(self, copy=False, directed=True):
   if not directed:
     ii, jj = ss.triu(self._adj).nonzero()
     return np.asarray(self._adj[ii, jj]).ravel()
   # XXX: assumes correct internal ordering and no explicit zeros
   w = self._adj.data.ravel()
   if copy:
     return w.copy()
   return w
Пример #22
0
def aggregate_partitions(G,nodeCommArray,N,tau = None,connectStrayNodes=True):
    
    #generate core communities
    #N = len(G['imputation_batches'])
   
    
    neighborsMatrix = nodeCommArray*nodeCommArray.T

    #create copy of neighbors matrix for evaluating thresholds
    scoringMatrix = neighborsMatrix.copy()
    neighborsMatrix = sp.triu(neighborsMatrix,format= "csr")
    
    
    if tau == None:
        
        #compute optimal tau
        tau,connectedComponents = compute_optimal_threshold(neighborsMatrix,scoringMatrix,N)
        coreCommunities = [x for x in connectedComponents if len(x) > MIN_CORE_COMM_SIZE]
    
    elif tau != None:
        
        neighborsMatrix.data[neighborsMatrix.data < tau] = 0
        neighborsMatrix.eliminate_zeros()
        connectedComponents = sp.csgraph.connected_components(neighborsMatrix,directed = False)[1]
        connectedComponents = ig.Clustering(connectedComponents)
        coreCommunities = [x for x in connectedComponents if len(x) > MIN_CORE_COMM_SIZE]
    
    #if rare case (usually degenerate) of no core communities, output CC's as final answer
    if len(coreCommunities) < 1:
        return connectedComponents
    
    #output core communities only, this will result in some of the nodes missing from final partition
    if connectStrayNodes == False:
        return coreCommunities

    #merge stray nodes with core communities    
    coreNodes = reduce(lambda x,y:x+y,coreCommunities)
    strayNodes = [v.index for v in G.vs if v.index not in coreNodes]
    finalCommunities = deepcopy(coreCommunities)
    
    #intialize array to store distances
    commDistanceMatrix = np.zeros([len(coreCommunities),G.vcount()])
    
    #compute distances of stray nodes to each core community 
    for commIndex,comm in enumerate(coreCommunities):
        commMatrix = scoringMatrix[comm]
        commMatrix = commMatrix.astype(np.float16)
        commMatrix = commMatrix.mean(axis=0)
        commDistanceMatrix[commIndex,:] = commMatrix
    maxCommIds = np.argmax(commDistanceMatrix,axis=0)
    
    #add stray nodes to the "closest" core community
    for strayNode in strayNodes:
        finalCommunities[maxCommIds[strayNode]].append(strayNode)
    
    
    return finalCommunities
Пример #23
0
def is_tri(X):
    diag = X.diagonal().sum()
    if sparse.issparse(X):
        if not (sparse.tril(X).sum() - diag) or \
           not (sparse.triu(X).sum() - diag):
            return True
    elif not np.triu(X, 1).sum() or not np.tril(X, -1).sum():
        return True
    else:
        return False
Пример #24
0
def get_current_matrix(conductivity_laplacian, node_potentials):
    """
    Recovers the current matrix based on the conductivity laplacian and voltages in each node.

    :param conductivity_laplacian:
    :param node_potentials:
    :return: matrix where M[i,j] = current intensity from i to j. Assymteric and Triangular
     superior iof the assymetric one. if current is from j to i, term is positive, otherwise
     it is negative.
    :rtype: scipy.sparse.lil_matrix
    """
    if switch_to_splu:
        diag_voltages = lil_matrix(diags(node_potentials.toarray().T.tolist()[0], 0))
    else:
        # print type(node_potentials)
        # print node_potentials.shape
        # print node_potentials
        diag_voltages = lil_matrix(diags(node_potentials.toarray().T.tolist()[0], 0))
    corr_conductance_matrix = conductivity_laplacian - \
                              lil_matrix(diags(conductivity_laplacian.diagonal(), 0))

    # true currents
    currents = diag_voltages.dot(corr_conductance_matrix) - corr_conductance_matrix.dot(diag_voltages)

    # print type(currents)

    # we want them to be fully positive (so that the direction of flow doesn't matter)
    abs_current = sparse_abs(currents)

    # and symmetric so that the triangular upper matrix contains all the data
    currents = abs_current+abs_current.T

    # positive_current = lil_matrix(currents.shape)
    # positive_current[currents > 0.0] = currents[currents > 0.0]
    # negative_current = lil_matrix(currents.shape)
    # negative_current[currents < 0.0] = currents[currents < 0.0]
    #
    # incoming_current = np.array((positive_current + positive_current.T).sum(axis=1)).flatten()/2
    # outgoing_current = np.array((negative_current + negative_current.T).sum(axis=1)).flatten()/2

    # print incoming_current
    # print outgoing_current
    #
    # print 'flow conservation', np.allclose(incoming_current, outgoing_current)
    # print incoming_current+outgoing_current
    # print 'discordant', np.nonzero(incoming_current+outgoing_current)
    #
    # # print 'symmetric', (currents-currents.T)
    # # print 'positive', np.any(currents > 0.0)
    # # print 'negative', np.any(currents < 0.0)
    # raise Exception('debug')

    # PB: we can't really use the triu because the flow matrix is not symmetric

    return currents, triu(currents)
def second_deg_poly_features(X):
  D = X.shape[1]
  D2 = (D**2+D)/2+D
  print D2
  X2 = coo_matrix(X.tocsr(), shape=(X.shape[0], D2)).tocsr()
  for i, row in enumerate(X):
    interact(local=locals())
    nzrows, nzcols = triu(outer(row.T,row)[0][0]).flatten()
    print r.shape
    X2[i, D:] = r
  return X2
Пример #26
0
    def triangular_upper(self, k=0):
        """
        Returns the upper triangular portion of this matrix.
        :param k:
            - k = 0 corresponds to the main diagonal
            - k > 0 is above the main diagonal
            - k < 0 is below the main diagonal

        TODO: Add unit tests
        """
        return self._new_instance(sp.triu(self.matrix, k=k))
Пример #27
0
    def __call__(self, x0, lagrange, obj_factor, flag, user_data = None):

        if flag:
            return (self.rind, self.cind)
        else:
            x = np.hstack([x0,lagrange,obj_factor])
            result = adolc.hessian(lID,x)
            result1 = result[:nvar,:nvar]
            result = None
            result = sps.triu(result1,format='coo')

            return result.data
Пример #28
0
def analyze_eigvects(
    non_normalized_Laplacian, num_first_eigvals_to_analyse, index_chars, permutations_limiter=10000000, fudge=10e-10
):
    # normalize the laplacian
    print "analyzing the laplacian with %s items and %s non-zero elts" % (
        non_normalized_Laplacian.shape[0] ** 2,
        len(non_normalized_Laplacian.nonzero()[0]),
    )
    t = time()
    init = time()
    normalized_Laplacian = Lapl_normalize(non_normalized_Laplacian)
    print time() - t
    t = time()
    # compute the eigenvalues and storre them
    true_eigenvals, true_eigenvects = eigsh(normalized_Laplacian, num_first_eigvals_to_analyse)
    print time() - t
    t = time()
    # permute randomly the off-diagonal terms
    triag_u = lil_matrix(triu(normalized_Laplacian))
    triag_u.setdiag(0)
    tnz = triag_u.nonzero()
    print "reassigning the indexes for %s items, with %s non-zero elts" % (triag_u.shape[0] ** 2, len(tnz[0]))
    eltsuite = zip(tnz[0].tolist(), tnz[1].tolist())
    shuffle(eltsuite)
    if eltsuite > permutations_limiter:
        # pb: we want it to affect any random number with reinsertion
        eltsuite = eltsuite[:permutations_limiter]
    print time() - t
    t = time()
    # take a nonzero pair of indexes
    for i, j in eltsuite:
        # select randomly a pair of indexes and permute it
        k = randrange(1, triag_u.shape[0] - 1)
        l = randrange(k + 1, triag_u.shape[0])
        triag_u[i, j], triag_u[k, l] = (triag_u[k, l], triag_u[i, j])
    print time() - t
    t = time()
    # recompute the diagonal terms
    fullmat = triag_u + triag_u.T
    diagterms = [-item for sublist in fullmat.sum(axis=0).tolist() for item in sublist]
    fullmat.setdiag(diagterms)
    print time() - t
    t = time()
    # recompute the normalized matrix
    normalized_rand = Lapl_normalize(fullmat)
    # recompute the eigenvalues
    rand_eigenvals, rand_eigenvects = eigsh(normalized_rand, num_first_eigvals_to_analyse)
    print time() - t
    t = time()
    show_eigenvals_and_eigenvects(true_eigenvals, true_eigenvects, 20, "true laplacian", index_chars)
    show_eigenvals_and_eigenvects(rand_eigenvals, rand_eigenvects, 20, "random")
    print "final", time() - t, time() - init
 def voltage_drop_abs(self):
     """
     Return a sparse matrix in CSR form containing the voltage drop between nodes i and j.  Requires that self.solve()
     have been called to populate self.voltages
     """
     rows, cols = sparse.triu(self.G).nonzero()
     
     # fill in the entries in the voltage drop matrix
     voltage_drop = sparse.lil_matrix(self.G.shape)
     for node_i, node_j in itertools.izip(rows, cols):
         voltage_drop[node_i, node_j] = abs(self.voltages[node_j] - self.voltages[node_i])
         voltage_drop[node_j, node_i] = voltage_drop[node_i, node_j]
     return voltage_drop.tocsr()
Пример #30
0
def test_sparse_ICE_normalization_triu():
    n = 100
    X = np.random.random((n, n))
    thres = (np.random.random((n, n)) > 0.5).astype(bool)
    X[thres] = 0
    X = X + X.T
    sparse_X = sparse.triu(X)
    true_normed_X = ICE_normalization(X, eps=1e-10, max_iter=10)
    true_normed_X = np.triu(true_normed_X)
    X = np.triu(X)
    normed_X = ICE_normalization(sparse_X, eps=1e-10, max_iter=10)
    assert_array_almost_equal(X, sparse_X.todense())
    assert_array_almost_equal(true_normed_X, np.array(normed_X.todense()))
Пример #31
0
def mask_test_edges(adj):
    # Function to build test set with 10% positive links
    # NOTE: Splits are randomized and results might slightly deviate from reported numbers in the paper.
    # TODO: Clean up.

    # Remove diagonal elements
    adj = adj - sp.dia_matrix(
        (adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape)
    adj.eliminate_zeros()
    # Check that diag is zero:
    assert np.diag(adj.todense()).sum() == 0

    adj_triu = sp.triu(adj)
    adj_tuple = sparse_to_tuple(adj_triu)
    edges = adj_tuple[0]
    edges_all = sparse_to_tuple(adj)[0]
    num_test = int(np.floor(edges.shape[0] / 10.))
    num_val = int(np.floor(edges.shape[0] / 20.))

    all_edge_idx = list(range(edges.shape[0]))
    np.random.shuffle(all_edge_idx)
    val_edge_idx = all_edge_idx[:num_val]
    test_edge_idx = all_edge_idx[num_val:(num_val + num_test)]
    test_edges = edges[test_edge_idx]
    val_edges = edges[val_edge_idx]
    train_edges = np.delete(edges,
                            np.hstack([test_edge_idx, val_edge_idx]),
                            axis=0)

    def ismember(a, b, tol=5):
        rows_close = np.all(np.round(a - b[:, None], tol) == 0, axis=-1)
        return np.any(rows_close)

    test_edges_false = []
    while len(test_edges_false) < len(test_edges):
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j:
            continue
        if ismember([idx_i, idx_j], edges_all):
            continue
        if test_edges_false:
            if ismember([idx_j, idx_i], np.array(test_edges_false)):
                continue
            if ismember([idx_i, idx_j], np.array(test_edges_false)):
                continue
        test_edges_false.append([idx_i, idx_j])

    val_edges_false = []
    while len(val_edges_false) < len(val_edges):
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j:
            continue
        if ismember([idx_i, idx_j], train_edges):
            continue
        if ismember([idx_j, idx_i], train_edges):
            continue
        if ismember([idx_i, idx_j], val_edges):
            continue
        if ismember([idx_j, idx_i], val_edges):
            continue
        if val_edges_false:
            if ismember([idx_j, idx_i], np.array(val_edges_false)):
                continue
            if ismember([idx_i, idx_j], np.array(val_edges_false)):
                continue
        val_edges_false.append([idx_i, idx_j])

    assert ~ismember(test_edges_false, edges_all)
    assert ~ismember(val_edges_false, edges_all)
    assert ~ismember(val_edges, train_edges)
    assert ~ismember(test_edges, train_edges)
    assert ~ismember(val_edges, test_edges)

    data = np.ones(train_edges.shape[0])

    # Re-build adj matrix
    adj_train = sp.csr_matrix((data, (train_edges[:, 0], train_edges[:, 1])),
                              shape=adj.shape)
    adj_train = adj_train + adj_train.T

    # NOTE: these edge lists only contain single direction of edge!
    return adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false
Пример #32
0
    def check_network_health(self):
        r"""
        This method check the network topological health by checking for:

            (1) Isolated pores
            (2) Islands or isolated clusters of pores
            (3) Duplicate throats
            (4) Bidirectional throats (ie. symmetrical adjacency matrix)
            (5) Headless throats

        Returns
        -------
        health : dict
            A dictionary containing the offending pores or throat numbers under
            each named key.

        Notes
        -----
        It also returns a list of which pores and throats should be trimmed
        from the network to restore health.  This list is a suggestion only,
        and is based on keeping the largest cluster and trimming the others.

        Notes
        -----
        - Does not yet check for duplicate pores
        - Does not yet suggest which throats to remove
        - This is just a 'check' and does not 'fix' the problems it finds
        """
        import scipy.sparse.csgraph as csg
        import scipy.sparse as sprs

        health = HealthDict()
        health['disconnected_clusters'] = []
        health['isolated_pores'] = []
        health['trim_pores'] = []
        health['duplicate_throats'] = []
        health['bidirectional_throats'] = []
        health['headless_throats'] = []
        health['looped_throats'] = []

        net = self.network

        # Check for headless throats
        hits = np.where(net['throat.conns'] > net.Np - 1)[0]
        if np.size(hits) > 0:
            health['headless_throats'] = np.unique(hits)
            return health

        # Check for throats that loop back onto the same pore
        P12 = net['throat.conns']
        hits = np.where(P12[:, 0] == P12[:, 1])[0]
        if np.size(hits) > 0:
            health['looped_throats'] = hits

        # Check for individual isolated pores
        Ps = net.num_neighbors(net.pores())
        if np.sum(Ps == 0) > 0:
            health['isolated_pores'] = np.where(Ps == 0)[0]

        # Check for separated clusters of pores
        temp = []
        am = net.create_adjacency_matrix(fmt='coo', triu=True)
        Cs = csg.connected_components(am, directed=False)[1]
        if np.unique(Cs).size > 1:
            for i in np.unique(Cs):
                temp.append(np.where(Cs == i)[0])
            b = np.array([len(item) for item in temp])
            c = np.argsort(b)[::-1]
            for i in range(0, len(c)):
                health['disconnected_clusters'].append(temp[c[i]])
                if i > 0:
                    health['trim_pores'].extend(temp[c[i]])

        # Check for duplicate throats
        am = net.create_adjacency_matrix(fmt='csr', triu=True).tocoo()
        hits = np.where(am.data > 1)[0]
        if len(hits):
            mergeTs = []
            hits = np.vstack((am.row[hits], am.col[hits])).T
            ihits = hits[:, 0] + 1j * hits[:, 1]
            conns = net['throat.conns']
            iconns = conns[:, 0] + 1j * conns[:, 1]  # Convert to imaginary
            for item in ihits:
                mergeTs.append(np.where(iconns == item)[0])
            health['duplicate_throats'] = mergeTs

        # Check for bidirectional throats
        adjmat = net.create_adjacency_matrix(fmt='coo')
        num_full = adjmat.sum()
        temp = sprs.triu(adjmat, k=1)
        num_upper = temp.sum()
        if num_full > num_upper:
            biTs = np.where(
                net['throat.conns'][:, 0] > net['throat.conns'][:, 1])[0]
            health['bidirectional_throats'] = biTs.tolist()

        return health
Пример #33
0
def main(data,
         a=1,
         b=1,
         gamma=0.4,
         stepm=25,
         rtype=1,
         maxiter=1000,
         verbose=True):

    S = data['S']
    li = data['li']
    lj = data['lj']
    w = data['w']

    setup, m, n = bmw.bipartite_setup(li, lj, w)

    S = sps.csr_matrix(S, dtype=float)
    U = sps.csr_matrix(S.shape)

    xbest = np.zeros(len(w))

    flower = 0.0
    fupper = np.inf
    next_reduction_iteration = stepm

    if verbose:
        print(
            '{:5s}   {:>4s}   {:>8s}   {:>7s} {:>7s} {:>7s}  {:>7s} {:>7s} {:>7s} {:>7s}'
            .format('best', 'iter', 'norm-u', 'lower', 'upper', 'cur', 'obj',
                    'weight', 'card', 'overlap'))

    for it in range(1, maxiter + 1):

        q, SM = maxrowmatch((b / 2) * S + U - U.T, li, lj, m, n)

        x = a * w + q

        f, matchval, card, overlap, val, mi = bmw.round_messages(
            x, S, w, a, b, setup, m, n)

        if val < fupper:
            fupper = val
            next_reduction_iteration = it + stepm
        if f > flower:
            flower = f
            itermark = '*'
            xbest = mi
        else:
            itermark = ' '

        if rtype == 1:
            pass
        elif rtype == 2:

            mw = S * x
            mw = a * w + b / 2 * mw

            f, matchval, card, overlap, _, mx = bmw.round_messages(
                mw, S, w, a, b, setup, m, n)

            if f > flower:
                flower = f
                itermark = '**'
                mi = mx
                xbest = mw

        if verbose:
            print(
                '{:5s}   {:4d}   {:8.1e}   {:7.2f} {:7.2f} {:7.2f}  {:7.2f} {:7.2f} {:7d} {:7d}'
                .format(itermark, it, np.linalg.norm(U.data, 1), flower,
                        fupper, val, f, matchval, card, overlap))

        if it == next_reduction_iteration:
            gamma = gamma * 0.5
            if verbose:
                print(f'{"":5s}   {"":4s}   reducing step to {gamma}')
            if gamma < 1e-24:
                break
            next_reduction_iteration = it + stepm

        if (fupper - flower) < 1e-2:
            break

        GM = sps.diags(gamma * mi, format="csr")
        U = U - GM * sps.triu(SM) + sps.tril(SM).T * GM
        U.data = U.data.clip(-0.5, 0.5)

    return sps.csr_matrix((xbest, (li, lj)))
Пример #34
0
    def convert_to_obs_exp_matrix(self, maxdepth=None, zscore=False, perchr=False):
        """
        Converts a corrected counts matrix into a
        obs / expected matrix or z-scores fast.

        The caveat is that the obs/exp or z-score are only
        computed for non-zero values, although zero values that
        are not part of the sparse matrix are considered.

        For each diagonal the mean (and std when computing z-scores) are
        calculated and then each non-zero value of the sparse matrix is
        replaced by the obs/exp or z-score.

        Parameters
        ----------
        maxdepth: maximum distance from the diagonal to consider. All contacts beyond this distance will not
                         be considered.
        zscore: if a zscore wants to be returned instead of obs/exp


        Returns
        -------
        observed / expected sparse matrix


        nans occur where the standard deviation is zero
        """

        binsize = self.getBinSize()
        max_depth_in_bins = None

        if maxdepth:
            if maxdepth < binsize:
                raise Exception("Please specify a maxDepth larger than bin size ({})".format(binsize))

            max_depth_in_bins = int(float(maxdepth * 1.5) / binsize)
            # work only with the upper matrix
            # and remove all pixels that are beyond
            # max_depth_in_bis
            # (this is done by subtracting a second sparse matrix
            # that contains only the upper matrix that wants to be removed.
            self.matrix = triu(self.matrix, k=0, format='csr') - \
                triu(self.matrix, k=max_depth_in_bins, format='csr')
        else:
            self.matrix = triu(self.matrix, k=0, format='csr')

        self.matrix.eliminate_zeros()
        depth = None
        if zscore is True:
            from scipy.sparse import diags
            m_size = self.matrix.shape[0]
            if max_depth_in_bins is not None:
                depth = max_depth_in_bins
            else:
                depth = m_size
                estimated_size_dense_matrix = m_size ** 2 * 8
                if estimated_size_dense_matrix > 100e6:
                    log.info("To compute z-scores a dense matrix is required. This will use \n"
                             "{} Mb of memory.\n To reduce memory use the maxdeph option."
                             "".format(estimated_size_dense_matrix / 1e6))

            # to compute zscore the zero values need to be accounted and the matrix
            # need to become dense. This is only practical if only up to certain distance
            # wants to be evaluated, otherwise the dense matrix is too large.
            # To make the matrix dense and keep the same computations as when
            # the matrix is sparse the following is done:
            # A sparse diagonal matrix of shape = matrix.shape is created with ones
            # (only upper triangle contains diagonals up to maxdeph)
            # This  sparse matrix is then added to self.matrix
            # then, -1 is subtracted to the self.matrix.data, thus effectively
            # adding zeros.
            diag_mat_ones = diags(np.repeat([1], m_size * depth).reshape(depth, m_size), list(range(depth)))

            self.matrix += diag_mat_ones

        from scipy.sparse import lil_matrix
        trasf_matrix = lil_matrix(self.matrix.shape)

        chr_submatrix = OrderedDict()
        cut_intervals = OrderedDict()
        chrom_sizes = OrderedDict()
        chrom_range = OrderedDict()
        if perchr:
            for chrname in self.getChrNames():
                chr_range = self.getChrBinRange(chrname)
                chr_submatrix[chrname] = self.matrix[chr_range[0]:chr_range[1], chr_range[0]:chr_range[1]].tocoo()
                cut_intervals[chrname] = [self.cut_intervals[x] for x in range(chr_range[0], chr_range[1])]
                chrom_sizes[chrname] = [chr_submatrix[chrname].shape[0]]
                chrom_range[chrname] = (chr_range[0], chr_range[1])

        else:
            chr_submatrix['all'] = self.matrix.tocoo()
            cut_intervals['all'] = self.cut_intervals
            # chrom_sizes['all'] = np.array([v[1] - v[0] for k, v in iteritems(self.chrBinBoundaries)])
            chrom_sizes['all'] = np.array([v[1] - v[0] for k, v in self.chrBinBoundaries.items()])

            chrom_range['all'] = (0, self.matrix.shape[0])

        # for chrname, submatrix in iteritems(chr_submatrix):
        for chrname, submatrix in chr_submatrix.items():

            log.info("processing chromosome {}\n".format(chrname))
            if zscore is True:
                # this step has to be done after tocoo()
                submatrix.data -= 1

            dist_list, chrom_list = self.getDistList(submatrix.row, submatrix.col,
                                                     hiCMatrix.fit_cut_intervals(cut_intervals[chrname]))

            # to get the sum of all values at a given distance I use np.bincount which
            # is quite fast. However, the input of bincount is positive integers. Moreover
            # it returns the sum for every consecutive integer, even if this is not on the list.
            # Thus, dist_list, which contains the distance in bp between any two bins is
            # converted to bin distance.

            # Because positive integers are needed we add +1 to all bin distances
            # such that the value of -1 (which means different chromosomes) can now be used

            dist_list[dist_list == -1] = -binsize
            # divide by binsize to get a list of bin distances and add +1 to remove negative values
            dist_list = (np.array(dist_list).astype(float) / binsize).astype(int) + 1

            # for each distance, return the sum of all values
            sum_counts = np.bincount(dist_list, weights=submatrix.data)
            distance_len = np.bincount(dist_list)
            # compute the average for each distance
            mat_size = submatrix.shape[0]
            mu = {}
            std = {}
            # compute mean value for each distance

            for bin_dist_plus_one, sum_value in enumerate(sum_counts):
                if maxdepth and bin_dist_plus_one == 0:  # this is for intra chromosomal counts
                    # when max depth is set, the computation
                    # of the total_intra is not accurate and is safer to
                    # output np.nan
                    mu[bin_dist_plus_one] = np.nan
                    std[bin_dist_plus_one] = np.nan
                    continue

                if bin_dist_plus_one == 0:
                    total_intra = mat_size ** 2 - sum([size ** 2 for size in chrom_sizes[chrname]])
                    diagonal_length = int(total_intra / 2)
                else:
                    # to compute the average counts per distance we take the sum_counts and divide
                    # by the number of values on the respective diagonal
                    # which is equal to the size of each chromosome - the diagonal offset (for those
                    # chromosome larger than the offset)
                    # In the following example with two chromosomes
                    # the first (main) diagonal has a size equal to the matrix (6),
                    # while the next has 1 value less for each chromosome (4) and the last one has only 2 values

                    # 0 1 2 . . .
                    # - 0 1 . . .
                    # - - 0 . . .
                    # . . . 0 1 2
                    # . . . - 0 1
                    # . . . - - 0

                    # idx - 1 because earlier the values where
                    # shifted.
                    diagonal_length = sum([size - (bin_dist_plus_one - 1) for size in chrom_sizes[chrname] if size > (bin_dist_plus_one - 1)])
                    log.debug("Type of diagonal_length {}".format(type(diagonal_length)))

                # the diagonal length should contain the number of values at a certain distance.
                # If the matrix is dense, the distance_len[bin_dist_plus_one] correctly contains the number of values
                # If the matrix is equally spaced, then, the diagonal_length as computed before is accurate.
                # But, if the matrix is both sparse and with unequal bins, then none of the above methods is
                # accurate but the the diagonal_length as computed before will be closer.
                diagonal_length = max(diagonal_length, distance_len[bin_dist_plus_one])
                log.debug("Type of diagonal_length {}".format(type(diagonal_length)))

                if diagonal_length == 0:
                    mu[bin_dist_plus_one] = np.nan
                else:
                    mu[bin_dist_plus_one] = np.float64(sum_value) / diagonal_length

                if np.isnan(sum_value):
                    log.info("nan value found for distance {}\n".format((bin_dist_plus_one - 1) * binsize))

                # if zscore is needed, compute standard deviation: std = sqrt(mean(abs(x - x.mean())**2))
                if zscore:
                    values_sqrt_diff = \
                        np.abs((submatrix.data[dist_list == bin_dist_plus_one] - mu[bin_dist_plus_one]) ** 2)
                    # the standard deviation is the sum of the differences with mu squared (value variable)
                    # plus all zeros that are not included in the sparse matrix
                    # for which the standard deviation is
                    # (0 - mu)**2 = (mu)**2
                    # The number of zeros is the diagonal length - the length of the non zero values
                    zero_values_sqrt_diff_sum = (diagonal_length - len(values_sqrt_diff)) * mu[bin_dist_plus_one] ** 2

                    _std = np.sqrt((values_sqrt_diff.sum() + zero_values_sqrt_diff_sum) / diagonal_length)
                    std[bin_dist_plus_one] = _std

            # use the expected values to compute obs/exp
            transf_ma = np.zeros(len(submatrix.data))
            for idx, value in enumerate(submatrix.data):
                if depth is not None and dist_list[idx] > depth + 1:
                    continue
                if zscore:
                    if std[dist_list[idx]] == 0:
                        transf_ma[idx] = np.nan
                    else:
                        transf_ma[idx] = (value - mu[dist_list[idx]]) / std[dist_list[idx]]
                else:
                    transf_ma[idx] = value / mu[dist_list[idx]]

            submatrix.data = transf_ma
            trasf_matrix[chrom_range[chrname][0]:chrom_range[chrname][1], chrom_range[chrname][0]:chrom_range[chrname][1]] = submatrix.tolil()

        self.matrix = trasf_matrix.tocsr()

        return self.matrix
Пример #35
0
    def solve_via_data(self,
                       data,
                       warm_start,
                       verbose,
                       solver_opts,
                       solver_cache=None):
        import osqp
        P = data[s.P]
        q = data[s.Q]
        A = sp.vstack([data[s.A], data[s.F]]).tocsc()
        data['Ax'] = A
        uA = np.concatenate((data[s.B], data[s.G]))
        data['u'] = uA
        lA = np.concatenate([data[s.B], -np.inf * np.ones(data[s.G].shape)])
        data['l'] = lA

        # Overwrite defaults eps_abs=eps_rel=1e-3, max_iter=4000
        solver_opts['eps_abs'] = solver_opts.get('eps_abs', 1e-5)
        solver_opts['eps_rel'] = solver_opts.get('eps_rel', 1e-5)
        solver_opts['max_iter'] = solver_opts.get('max_iter', 10000)

        if solver_cache is not None and self.name() in solver_cache:
            # Use cached data.
            solver, old_data, results = solver_cache[self.name()]
            same_pattern = (P.shape == old_data[s.P].shape and
                            all(P.indptr == old_data[s.P].indptr) and
                            all(P.indices == old_data[s.P].indices)) and \
                           (A.shape == old_data['Ax'].shape and
                            all(A.indptr == old_data['Ax'].indptr) and
                            all(A.indices == old_data['Ax'].indices))
        else:
            same_pattern = False

        # If sparsity pattern differs need to do setup.
        if warm_start and same_pattern:
            new_args = {}
            for key in ['q', 'l', 'u']:
                if any(data[key] != old_data[key]):
                    new_args[key] = data[key]
            factorizing = False
            if any(P.data != old_data[s.P].data):
                P_triu = sp.triu(P).tocsc()
                new_args['Px'] = P_triu.data
                factorizing = True
            if any(A.data != old_data['Ax'].data):
                new_args['Ax'] = A.data
                factorizing = True

            if new_args:
                solver.update(**new_args)
            # Map OSQP statuses back to CVXPY statuses
            status = self.STATUS_MAP.get(results.info.status_val,
                                         s.SOLVER_ERROR)
            if status == s.OPTIMAL:
                solver.warm_start(results.x, results.y)
            # Polish if factorizing.
            solver_opts['polish'] = solver_opts.get('polish', factorizing)
            solver.update_settings(verbose=verbose, **solver_opts)
        else:
            # Initialize and solve problem
            solver_opts['polish'] = solver_opts.get('polish', True)
            solver = osqp.OSQP()
            solver.setup(P, q, A, lA, uA, verbose=verbose, **solver_opts)

        results = solver.solve()

        if solver_cache is not None:
            solver_cache[self.name()] = (solver, data, results)
        return results
Пример #36
0
def get_edges(sparse_matrix, is_triu=True):
    coo = sp.coo_matrix(sparse_matrix)
    if is_triu:
        coo = sp.triu(coo, 1)
    return np.vstack((coo.row, coo.col)).transpose()  # .tolist()
Пример #37
0
    def check_network_health(self):
        r"""
        This method check the network topological health by checking for:

            (1) Isolated pores
            (2) Islands or isolated clusters of pores
            (3) Duplicate throats
            (4) Bidirectional throats (ie. symmetrical adjacency matrix)
            (5) Headless throats

        Returns
        -------
        A dictionary containing the offending pores or throat numbers under
        each named key.

        It also returns a list of which pores and throats should be trimmed
        from the network to restore health.  This list is a suggestion only,
        and is based on keeping the largest cluster and trimming the others.

        Notes
        -----
        - Does not yet check for duplicate pores
        - Does not yet suggest which throats to remove
        - This is just a 'check' method and does not 'fix' the problems it finds
        """

        health = Tools.HealthDict()
        health['disconnected_clusters'] = []
        health['isolated_pores'] = []
        health['trim_pores'] = []
        health['duplicate_throats'] = []
        health['bidirectional_throats'] = []
        health['headless_throats'] = []
        health['looped_throats'] = []

        # Check for headless throats
        hits = sp.where(self['throat.conns'] > self.Np - 1)[0]
        if sp.size(hits) > 0:
            health['headless_throats'] = sp.unique(hits)
            logger.warning('Health check cannot complete due to connectivity '
                           'errors. Please correct existing errors & recheck.')
            return health

        # Check for throats that loop back onto the same pore
        P12 = self['throat.conns']
        hits = sp.where(P12[:, 0] == P12[:, 1])[0]
        if sp.size(hits) > 0:
            health['looped_throats'] = hits

        # Check for individual isolated pores
        Ps = self.num_neighbors(self.pores())
        if sp.sum(Ps == 0) > 0:
            logger.warning(str(sp.sum(Ps == 0)) + ' pores have no neighbors')
            health['isolated_pores'] = sp.where(Ps == 0)[0]

        # Check for separated clusters of pores
        temp = []
        Cs = self.find_clusters(self.tomask(throats=self.throats('all')))
        if sp.shape(sp.unique(Cs))[0] > 1:
            logger.warning('Isolated clusters exist in the network')
            for i in sp.unique(Cs):
                temp.append(sp.where(Cs == i)[0])
            b = sp.array([len(item) for item in temp])
            c = sp.argsort(b)[::-1]
            for i in range(0, len(c)):
                health['disconnected_clusters'].append(temp[c[i]])
                if i > 0:
                    health['trim_pores'].extend(temp[c[i]])

        # Check for duplicate throats
        i = self['throat.conns'][:, 0]
        j = self['throat.conns'][:, 1]
        v = sp.array(self['throat.all'], dtype=int)
        adjmat = sprs.coo_matrix((v, (i, j)), [self.Np, self.Np])
        temp = adjmat.tolil()  # Convert to lil to combine duplicates
        # Compile lists of which specfic throats are duplicates
        # Be VERY careful here, as throats are not in order
        mergeTs = []
        for i in range(0, self.Np):
            if sp.any(sp.array(temp.data[i]) > 1):
                ind = sp.where(sp.array(temp.data[i]) > 1)[0]
                P = sp.array(temp.rows[i])[ind]
                Ts = self.find_connecting_throat(P1=i, P2=P)[0]
                mergeTs.append(Ts)
        health['duplicate_throats'] = mergeTs

        # Check for bidirectional throats
        num_full = adjmat.sum()
        temp = sprs.triu(adjmat, k=1)
        num_upper = temp.sum()
        if num_full > num_upper:
            biTs = sp.where(
                self['throat.conns'][:, 0] > self['throat.conns'][:, 1])[0]
            health['bidirectional_throats'] = biTs.tolist()

        return health
Пример #38
0
import pandas as pd, os, sys
import numpy as np
import matplotlib.pyplot as plt
import scipy.sparse as sps

syn = pd.read_csv("../doc/synthetic.txt",names=['a','b'],sep="   ")
data = np.array(syn)

from sklearn.metrics.pairwise import euclidean_distances
X = euclidean_distances(data, data)

X2 = X.copy()
# filter out large values / distances so matrix can be sparse
X2[X > 2000] = 0.0
X3 = sps.lil_matrix(X2)
X4 = sps.triu(X3)
print 'non-zero items', len(X4.nonzero()[0])
print X4.shape

import scipy.sparse as sps
from scipy.io import mmwrite, mmread
mmwrite('/tmp/syndist', X4)

os.system("../felzclust/felzclust /tmp/syndist.mtx 20000 100  > /tmp/out")

df = pd.read_csv('/tmp/out',sep=';')

syn['cluster'] = df['cluster']
print syn[:5]

import matplotlib.cm as cm
Пример #39
0
def compute_distance_mean(hicmat, maxdepth=None, perchr=False):
    """
    Converts a corrected counts matrix into a
    obs / expected matrix or z-scores fast.

    The caveat is that the obs/exp or z-score are only
    computed for non-zero values, although zero values that
    are not part of the sparse matrix are considered.

    For each diagonal the mean (and std when computing z-scores) are
    calculated and then each non-zero value of the sparse matrix is
    replaced by the obs/exp or z-score.

    Parameters
    ----------
    hicmat: HiCMatrix object
    maxdepth: maximum distance from the diagonal to consider. All contacts beyond this distance will not
                     be considered.
    perchr: bool to indicate if computations should be perform per chromosome


    Returns
    -------
    observed / expected sparse matrix

    >>> from scipy.sparse import csr_matrix, dia_matrix
    >>> row, col = np.triu_indices(5)
    >>> cut_intervals = [('a', 0, 10, 1), ('a', 10, 20, 1),
    ... ('a', 20, 30, 1), ('a', 30, 40, 1), ('b', 40, 50, 1)]
    >>> hic = HiCMatrix.hiCMatrix()
    >>> hic.nan_bins = []
    >>> matrix = np.array([
    ... [ 1,  8,  5, 3, 0],
    ... [ 0,  4, 15, 5, 1],
    ... [ 0,  0,  0, 7, 2],
    ... [ 0,  0,  0, 0, 1],
    ... [ 0,  0,  0, 0, 0]])

    >>> hic.matrix = csr_matrix(matrix)
    >>> hic.setMatrix(hic.matrix, cut_intervals)
    >>> hic.convert_to_obs_exp_matrix().todense()
    matrix([[ 1. ,  0.8,  1. ,  1. ,  0. ],
            [ 0. ,  4. ,  1.5,  1. ,  1. ],
            [ 0. ,  0. ,  0. ,  0.7,  2. ],
            [ 0. ,  0. ,  0. ,  0. ,  1. ],
            [ 0. ,  0. ,  0. ,  0. ,  0. ]])

    >>> hic.matrix = csr_matrix(matrix)
    >>> hic.convert_to_obs_exp_matrix(maxdepth=20).todense()
    matrix([[ 1. ,  0.8,  1. ,  0. ,  0. ],
            [ 0. ,  4. ,  1.5,  1. ,  0. ],
            [ 0. ,  0. ,  0. ,  0.7,  nan],
            [ 0. ,  0. ,  0. ,  0. ,  nan],
            [ 0. ,  0. ,  0. ,  0. ,  0. ]])

    >>> hic.matrix = csr_matrix(matrix)
    >>> hic.convert_to_obs_exp_matrix(zscore=True).todense()
    matrix([[ 0.        , -0.56195149,         nan,         nan, -1.41421356],
            [ 0.        ,  1.93649167,  1.40487872,         nan,  0.        ],
            [ 0.        ,  0.        , -0.64549722, -0.84292723,  1.41421356],
            [ 0.        ,  0.        ,  0.        , -0.64549722,  0.        ],
            [ 0.        ,  0.        ,  0.        ,  0.        , -0.64549722]])

    nans occur where the standard deviation is zero
    """

    binsize = hicmat.getBinSize()

    if maxdepth:
        if maxdepth < binsize:
            exit("Please specify a maxDepth larger than bin size ({})".format(
                binsize))

        max_depth_in_bins = int(float(maxdepth * 1.5) / binsize)
        # work only with the upper matrix
        # and remove all pixels that are beyond
        # max_depth_in_bis
        # (this is done by subtracting a second sparse matrix
        # that contains only the upper matrix that wants to be removed.
        hicmat.matrix = triu(hicmat.matrix, k=0, format='csr') - \
            triu(hicmat.matrix, k=max_depth_in_bins, format='csr')
    else:
        hicmat.matrix = triu(hicmat.matrix, k=0, format='csr')

    hicmat.matrix.eliminate_zeros()

    chr_submatrix = OrderedDict()
    cut_intervals = OrderedDict()
    chrom_sizes = OrderedDict()
    chrom_range = OrderedDict()
    if perchr:
        for chrname in hicmat.getChrNames():
            chr_range = hicmat.getChrBinRange(chrname)
            chr_submatrix[chrname] = hicmat.matrix[
                chr_range[0]:chr_range[1], chr_range[0]:chr_range[1]].tocoo()
            cut_intervals[chrname] = [
                hicmat.cut_intervals[x]
                for x in range(chr_range[0], chr_range[1])
            ]
            chrom_sizes[chrname] = [chr_submatrix[chrname].shape[0]]
            chrom_range[chrname] = (chr_range[0], chr_range[1])

    else:
        chr_submatrix['all'] = hicmat.matrix.tocoo()
        cut_intervals['all'] = hicmat.cut_intervals
        chrom_sizes['all'] = np.array(
            [v[1] - v[0] for k, v in iteritems(hicmat.chrBinBoundaries)])
        chrom_range['all'] = (0, hicmat.matrix.shape[0])

    mean_dict = {}

    for chrname, submatrix in iteritems(chr_submatrix):
        log.info("processing chromosome {}\n".format(chrname))

        dist_list, chrom_list = hicmat.getDistList(
            submatrix.row, submatrix.col,
            HiCMatrix.hiCMatrix.fit_cut_intervals(cut_intervals[chrname]))

        # to get the sum of all values at a given distance I use np.bincount which
        # is quite fast. However, the input of bincount is positive integers. Moreover
        # it returns the sum for every consecutive integer, even if this is not on the list.
        # Thus, dist_list, which contains the distance in bp between any two bins is
        # converted to bin distance.

        # Because positive integers are needed we add +1 to all bin distances
        # such that the value of -1 (which means different chromosomes) can now be used

        dist_list[dist_list == -1] = -binsize
        # divide by binsize to get a list of bin distances and add +1 to remove negative values
        dist_list = (np.array(dist_list).astype(float) /
                     binsize).astype(int) + 1

        # for each distance, return the sum of all values
        sum_counts = np.bincount(dist_list, weights=submatrix.data)
        distance_len = np.bincount(dist_list)
        # compute the average for each distance
        mat_size = submatrix.shape[0]
        # compute mean value for each distance
        mu = {}
        zero_value_bins = []
        for bin_dist_plus_one, sum_value in enumerate(sum_counts):
            if maxdepth and bin_dist_plus_one == 0:  # this is for intra chromosomal counts
                # when max depth is set, the computation
                # of the total_intra is not accurate and is safer to
                # output np.nan
                mu[bin_dist_plus_one] = np.nan
                continue

            if bin_dist_plus_one == 0:
                total_intra = mat_size**2 - sum(
                    [size**2 for size in chrom_sizes[chrname]])
                diagonal_length = total_intra / 2
            else:
                # to compute the average counts per distance we take the sum_counts and divide
                # by the number of values on the respective diagonal
                # which is equal to the size of each chromosome - the diagonal offset (for those
                # chromosome larger than the offset)
                # In the following example with two chromosomes
                # the first (main) diagonal has a size equal to the matrix (6),
                # while the next has 1 value less for each chromosome (4) and the last one has only 2 values

                # 0 1 2 . . .
                # - 0 1 . . .
                # - - 0 . . .
                # . . . 0 1 2
                # . . . - 0 1
                # . . . - - 0

                # idx - 1 because earlier the values where
                # shifted.
                diagonal_length = sum([
                    size - (bin_dist_plus_one - 1)
                    for size in chrom_sizes[chrname]
                    if size > (bin_dist_plus_one - 1)
                ])

            # the diagonal length should contain the number of values at a certain distance.
            # If the matrix is dense, the distance_len[bin_dist_plus_one] correctly contains the number of values
            # If the matrix is equally spaced, then, the diagonal_length as computed before is accurate.
            # But, if the matrix is both sparse and with unequal bins, then none of the above methods is
            # accurate but the the diagonal_length as computed before will be closer.
            diagonal_length = max(diagonal_length,
                                  distance_len[bin_dist_plus_one])

            if diagonal_length == 0:
                mu[bin_dist_plus_one] = np.nan
            else:
                mu[bin_dist_plus_one] = np.float64(sum_value) / diagonal_length
                if sum_value == 0:
                    zero_value_bins.append(bin_dist_plus_one)
                    log.info("zero value for {}, diagonal len: {}\n".format(
                        bin_dist_plus_one, diagonal_length))
                if len(zero_value_bins) > 10:
                    diff = np.diff(zero_value_bins)
                    if len(diff[diff == 1]) > 10:
                        # if too many consecutive bins with zero are found that means that probably no
                        # further counts will be found
                        log.info(
                            "skipping rest of chromosome {}. Too many emtpy diagonals\n"
                            .format(chrname))
                        break
            if np.isnan(sum_value):
                log.info("nan value found for distance {}\n".format(
                    (bin_dist_plus_one - 1) * binsize))

        if maxdepth is None:
            maxdepth = np.inf
        mean_dict[chrname] = OrderedDict([
            ((k - 1) * binsize, v) for k, v in iteritems(mu)
            if k > 0 and (k - 1) * binsize <= maxdepth
        ])
        # mean_dict[chrname]['intra_chr'] = mu[0]

    return mean_dict
Пример #40
0
        pickle.dump(gp, open('pz/dNdz_gp_%s_%s.p' % (band, depth), 'wb'))

        zs = np.arange(0.0, midz.max(), 0.01).reshape(-1, 1)
        ys, sigma = gp.predict(zs, return_std=True)

        pl.plot(zs, ys, '-', alpha=0.5, color=colors[ii])
        plt.fill(np.concatenate([zs, zs[::-1]]),
                 np.concatenate(
                     [ys - 1.9600 * sigma, (ys + 1.9600 * sigma)[::-1]]),
                 alpha=.2,
                 fc=colors[ii],
                 ec='None',
                 label='')

        ##  sL    = sparse.csr_matrix(gp.L_)
        sL = sparse.triu(gp.L_, k=-1)

        print(gp.L_)
        print('----------------------------------')
        print(sL)

        ##  plt.imshow(sL.todense())

    pl.xlim(0.0, 6.00)
    pl.ylim(0.0, 1.25)
    pl.legend(ncol=2, loc=2, frameon=False)
    pl.xlabel(r'$z$', fontsize=14)
    pl.ylabel(r'$p(z)$')
    plt.tight_layout()

    ax = pl.gca()
Пример #41
0
def plot_delaunay(cells,
                  labels=None,
                  color=None,
                  style='-',
                  centroid_style='g+',
                  negative=None,
                  axes=None,
                  linewidth=1,
                  individual=False,
                  fallback_color='gray'):
    """
    Delaunay plot.

    Arguments:

        cells (Partition):
            full partition

        labels (numpy.ndarray):
            numerical labels for cell adjacency relationship

        color (str):
            single-character colours in a string, e.g. 'rrrbgy'

        style (str):
            line style

        centroid_style (str):
            marker style of the cell centers

        negative (any):
            if ``None``, do not plot edges corresponding to negative adjacency labels;
            if '*voronoi*', plot the corresponding Voronoi edge instead, for edges with
            negative labels

        axes (matplotlib.axes.Axes):
            axes where to plot

        linewidth (int):
            line width

        individual (bool):
            plot each edge independently; this generates a lot of handles and takes time

        fallback_color (str):
            colour for unexpected labels

    Returns:

        tuple: list of handles of the plotted edges,
            handle of the plotted centroids
    """
    if axes is None:
        import matplotlib.pyplot as plt
        axes = plt
    try:
        tessellation = cells.tessellation
    except AttributeError:
        tessellation = cells

    vertices = tessellation.cell_centers
    if negative == 'voronoi':
        voronoi = tessellation.cell_vertices

    labels, color = _graph_theme(tessellation, labels, color, negative)

    # if asymetric, can be either triu or tril
    A = sparse.triu(tessellation.cell_adjacency, format='coo')
    I, J, K = A.row, A.col, A.data
    if not I.size:
        A = sparse.tril(tessellation.cell_adjacency, format='coo')
        I, J, K = A.row, A.col, A.data

    if not individual:
        by_color = defaultdict(list)
    edge_handles, centroid_handle = [], None  # handles

    # plot delaunay
    for i, j, k in zip(I, J, K):
        x, y = zip(vertices[i], vertices[j])
        if labels is None:
            c = 0
        else:
            label = tessellation.adjacency_label[k]
            try:
                c = labels.index(label)
            except ValueError:
                continue
            if label <= 0:
                if negative == 'voronoi':
                    try:
                        vert_ids = set(tessellation.cell_vertices.get(
                            i, [])) & set(tessellation.cell_vertices.get(
                                j, []))
                        x, y = voronoi[vert_ids].T
                    except ValueError:
                        continue
        if individual:
            h = axes.plot(x, y, style, color=color[c], linewidth=linewidth)
            assert not h[1:]
            edge_handles.append(h)
        else:
            by_color[c].append((x, y))

    if not individual:
        if not color[1:]:
            _clr = color[0]
        for c in by_color:
            xy = by_color[c]
            X = np.zeros((len(xy) * 3, ))
            Y = np.empty((len(xy) * 3, ))
            Y[:] = np.nan
            i = 0
            for x, y in xy:
                I = slice(i * 3, i * 3 + 2)
                X[I], Y[I] = x, y
                i += 1
            if color[1:]:
                try:
                    _clr = color[c]
                except IndexError:
                    import warnings
                    warnings.warn(
                        'too few specified colours; at least {:d} needed'.
                        format(c), RuntimeWarning)
                    _clr = fallback_color
            h = axes.plot(X, Y, style, color=_clr, linewidth=linewidth)
            assert not h[1:]
            edge_handles.append(h[0])

    # plot cell centers
    if centroid_style:
        h = axes.plot(vertices[:, 0], vertices[:, 1], centroid_style)
        assert not h[1:]
        centroid_handle = h[0]

    # resize window
    try:
        axes.axis(cells.bounding_box[['x', 'y']].values.flatten('F'))
    except AttributeError:
        pass
    except ValueError:
        print(traceback.format_exc())

    return edge_handles, centroid_handle
Пример #42
0
def visualizeLaplaceWeights(mesh, quantile=.01, weights=None, cmap='seismic', viewer=None, **kwargs):
    """Visualize Laplacian weights.

    Requires ``navis`` to be installed.

    Parameters
    ----------
    mesh :      trimesh.Trimesh
                Mesh to plot the weights for.
    quantile :  float [0-1]
                The vast majority of weights will be close to the mean while the
                interesting outliers will be very few. By default we are showing
                the top and bottom 0.1 quantile (i.e. the 10% highest and
                lowest values).
    weights :   np.ndarray, optional
                Laplacian weights. If not provided, will be computed.

    """
    mesh = make_trimesh(mesh, validate=False)

    try:
        import navis
        import vispy as vp
        import matplotlib.pyplot as plt
    except ImportError:
        raise ImportError('This function requires navis to be installed:\n'
                          '  pip3 install navis')

    if not isinstance(weights, np.ndarray):
        weights = laplacian_cotangent(mesh,
                                      #symmetric=False,
                                      normalized=True)

    if not isinstance(weights, spsp.coo_matrix):
        weights = spsp.coo_matrix(weights)

    # Get data (upper triangle only -> is supposed to be symmetrical)
    # Also removes diagonal (k=1)
    triu = spsp.triu(weights, k=1)
    row, col, data = triu.row, triu.col, triu.data

    if quantile:
        top = data >= np.quantile(data, 1-quantile)
        bottom = data <= np.quantile(data, quantile)
        row = row[top | bottom]
        col = col[top | bottom]
        data = data[top | bottom]

    # Weights are computed per edge
    co1, co2 = mesh.vertices[row], mesh.vertices[col]
    segments = np.hstack((co1, co2)).reshape(co1.shape[0] * 2, 3)

    # Generate colors
    cmap = plt.get_cmap(cmap)
    weights_norm = (data - data.min()) / (data.max() - data.min())

    colors = cmap(weights_norm)
    alpha = np.clip(np.fabs(weights_norm - .5) * 2, a_min=0.01, a_max=1)

    # We need to provide one color per vertex
    colors = np.hstack((colors, colors)).reshape(colors.shape[0] * 2, 4)
    #alpha = np.hstack((alpha, alpha)).reshape(alpha.shape[0] * 2, 1)

    # Combine color with alpha
    #colors = np.hstack((colors[:, :3], alpha))

    t = vp.scene.visuals.Line(pos=segments,
                              color=colors,
                              # Can only be used with method 'agg'
                              width=kwargs.get('linewidth', 1),
                              connect='segments',
                              antialias=kwargs.get('antialias', True),
                              method=kwargs.get('method', 'gl'))

    if not viewer:
        viewer = navis.get_viewer()
    if not viewer:
        viewer = navis.Viewer()

    viewer.add(t)

    return t
def mask_test_edges(adj,
                    test_frac=.1,
                    val_frac=.05,
                    prevent_disconnect=True,
                    verbose=False):
    # NOTE: Splits are randomized and results might slightly deviate from reported numbers in the paper.

    # Remove diagonal elements
    adj = adj - sp.dia_matrix(
        (adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape)
    adj.eliminate_zeros()
    # Check that diag is zero:
    assert np.diag(adj.todense()).sum() == 0

    g = nx.from_scipy_sparse_matrix(adj)
    orig_num_cc = nx.number_connected_components(g)

    adj_triu = sp.triu(adj)  # upper triangular portion of adj matrix
    adj_tuple = sparse_to_tuple(
        adj_triu)  # (coords, values, shape), edges only 1 way
    edges = adj_tuple[0]  # all edges, listed only once (not 2 ways)
    # edges_all = sparse_to_tuple(adj)[0] # ALL edges (includes both ways)
    num_test = int(
        np.floor(edges.shape[0] *
                 test_frac))  # controls how large the test set should be
    num_val = int(
        np.floor(edges.shape[0] *
                 val_frac))  # controls how alrge the validation set should be

    # Store edges in list of ordered tuples (node1, node2) where node1 < node2
    edge_tuples = [(min(edge[0], edge[1]), max(edge[0], edge[1]))
                   for edge in edges]
    all_edge_tuples = set(edge_tuples)
    train_edges = set(edge_tuples)  # initialize train_edges to have all edges
    test_edges = set()
    val_edges = set()

    # Iterate over shuffled edges, add to train/val sets
    np.random.shuffle(edge_tuples)
    counter = 0
    for edge in edge_tuples:
        counter += 1
        if counter % 100 == 0:
            print("processed:" + str(counter))
        # print edge
        node1 = edge[0]
        node2 = edge[1]

        # If removing edge would disconnect a connected component, backtrack and move on
        g.remove_edge(node1, node2)
        if prevent_disconnect == True:
            if nx.number_connected_components(g) > orig_num_cc:
                g.add_edge(node1, node2)
                continue

        # Fill test_edges first
        if len(test_edges) < num_test:
            test_edges.add(edge)
            train_edges.remove(edge)

        # Then, fill val_edges
        elif len(val_edges) < num_val:
            val_edges.add(edge)
            train_edges.remove(edge)

        # Both edge lists full --> break loop
        elif len(test_edges) == num_test and len(val_edges) == num_val:
            break

    if (len(val_edges) < num_val or len(test_edges) < num_test):
        print(
            "WARNING: not enough removable edges to perform full train-test split!"
        )
        print("Num. (test, val) edges requested: (", num_test, ", ", num_val,
              ")")
        print("Num. (test, val) edges returned: (", len(test_edges), ", ",
              len(val_edges), ")")

    if prevent_disconnect == True:
        assert nx.number_connected_components(g) == orig_num_cc

    if verbose == True:
        print('creating false test edges...')

    test_edges_false = set()
    while len(test_edges_false) < num_test:
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j:
            continue

        false_edge = (min(idx_i, idx_j), max(idx_i, idx_j))

        # Make sure false_edge not an actual edge, and not a repeat
        if false_edge in all_edge_tuples:
            continue
        if false_edge in test_edges_false:
            continue

        test_edges_false.add(false_edge)

    if verbose == True:
        print('creating false val edges...')

    val_edges_false = set()
    while len(val_edges_false) < num_val:
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j:
            continue

        false_edge = (min(idx_i, idx_j), max(idx_i, idx_j))

        # Make sure false_edge in not an actual edge, not in test_edges_false, not a repeat
        if false_edge in all_edge_tuples or false_edge in test_edges_false or false_edge in val_edges_false:
            continue

        val_edges_false.add(false_edge)

    if verbose == True:
        print('creating false train edges...')

    train_edges_false = set()
    while len(train_edges_false) < len(train_edges):
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j:
            continue

        false_edge = (min(idx_i, idx_j), max(idx_i, idx_j))

        # Make sure false_edge in not an actual edge, not in test_edges_false,
        # not in val_edges_false, not a repeat
        if false_edge in all_edge_tuples or false_edge in test_edges_false or false_edge in val_edges_false or false_edge in train_edges_false:
            continue

        train_edges_false.add(false_edge)

    if verbose == True:
        print('final checks for disjointness...')

    # assert: false_edges are actually false (not in all_edge_tuples)
    assert test_edges_false.isdisjoint(all_edge_tuples)
    assert val_edges_false.isdisjoint(all_edge_tuples)
    assert train_edges_false.isdisjoint(all_edge_tuples)

    # assert: test, val, train false edges disjoint
    assert test_edges_false.isdisjoint(val_edges_false)
    assert test_edges_false.isdisjoint(train_edges_false)
    assert val_edges_false.isdisjoint(train_edges_false)

    # assert: test, val, train positive edges disjoint
    assert val_edges.isdisjoint(train_edges)
    assert test_edges.isdisjoint(train_edges)
    assert val_edges.isdisjoint(test_edges)

    if verbose == True:
        print('creating adj_train...')

    # Re-build adj matrix using remaining graph
    adj_train = nx.adjacency_matrix(g)

    # Convert edge-lists to numpy arrays
    train_edges = np.array([list(edge_tuple) for edge_tuple in train_edges])
    train_edges_false = np.array(
        [list(edge_tuple) for edge_tuple in train_edges_false])
    val_edges = np.array([list(edge_tuple) for edge_tuple in val_edges])
    val_edges_false = np.array(
        [list(edge_tuple) for edge_tuple in val_edges_false])
    test_edges = np.array([list(edge_tuple) for edge_tuple in test_edges])
    test_edges_false = np.array(
        [list(edge_tuple) for edge_tuple in test_edges_false])

    # NOTE: these edge lists only contain single direction of edge!
    return adj_train, train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false
Пример #44
0
def plotMatrix(matrixinputfile,imageoutputfile, regionindex1, regionindex2, comparematrix, title, bigwig):
        if not checkExtension(matrixinputfile, '.cool'):
            msg = "input matrix must be in cooler format (.cool)"
            raise SystemExit(msg)
        if comparematrix and not checkExtension(comparematrix, ".cool"):
            msg = "if specified, compare matrix must be in cooler format (.cool)"
            raise SystemExit(msg)
        if not imageoutputfile:
            imageoutputfile = matrixinputfile.rstrip('cool') + 'png'
        elif imageoutputfile and not checkExtension(imageoutputfile, ".png"):
            imageoutputfile = os.path.splitext(imageoutputfile)[0] + ".png"
       
        #get the full matrix first to extract the desired region
        ma = hm.hiCMatrix(matrixinputfile)
        cuts = ma.cut_intervals
        chromosome = cuts[0][0]
        maxIndex = len(cuts) - 1
        #check indices and get the region if ok
        if regionindex1 > maxIndex:
            msg = "invalid start region. Allowed is 0 to {0:d} (0 to {1:d})".format(maxIndex, cuts[maxIndex][1])
            raise SystemExit(msg)
        if regionindex2 < regionindex1:
           msg = "region index 2 must be smaller than region index 1"
           raise SystemExit(msg)
        if regionindex2 > maxIndex:
            regionindex2 = maxIndex
            print("region index 2 clamped to max. value {0:d}".format(maxIndex))
        region = str(chromosome) +":"+str(cuts[regionindex1][1])+"-"+ str(cuts[regionindex2][1])
        
        #now get the data for the input matrix, restricted to the desired region
        upperHiCMatrix = hm.hiCMatrix(matrixinputfile ,pChrnameList=[region])
        upperMatrix = triu(upperHiCMatrix.matrix, k=1, format="csr")
        
        #if set, get data from the same region also for the compare matrix
        #there's no compatibility check so far
        lowerHiCMatrix = None
        lowerMatrix = None
        if comparematrix:
            lowerHiCMatrix = hm.hiCMatrix(comparematrix)
            if chromosome not in [row[0] for row in lowerHiCMatrix.cut_intervals]:
                msg = "compare matrix must contain the same chromosome as the input matrix"
                raise SystemExit(msg)
            lowerHiCMatrix = hm.hiCMatrix(comparematrix , pChrnameList=[region])
            lowerMatrix = tril(lowerHiCMatrix.matrix, k=0, format="csr") 

            if lowerMatrix.get_shape() != upperMatrix.get_shape():
                msg = "shapes of input matrix and compare matrix do not match. Check resolutions"
                raise SystemExit(msg)

        #arguments for plotting
        plotArgs = Namespace(bigwig=bigwig, 
                             chromosomeOrder=None, 
                             clearMaskedBins=False, 
                             colorMap='RdYlBu_r', 
                             disable_tight_layout=False, 
                             dpi=300, 
                             flipBigwigSign=False, 
                             log=False, log1p=True, 
                             perChromosome=False, 
                             region=region, 
                             region2=None, 
                             scaleFactorBigwig=1.0, 
                             scoreName=None, 
                             title=title, 
                             vMax=None, vMaxBigwig=None, 
                             vMin=1.0, vMinBigwig=None,
                             matrix = matrixinputfile) 
        
        #following code is largely duplicated from hicPlotMatrix
        #not exactly beautiful, but works for now
        chrom, region_start, region_end, idx1, start_pos1, chrom2, region_start2, region_end2, idx2, start_pos2 = hicPlot.getRegion(plotArgs, upperHiCMatrix)
        

        mixedMatrix = None
        if comparematrix:
            mixedMatrix = np.asarray((lowerMatrix + upperMatrix).todense().astype(float))
        else:
            mixedMatrix = np.asarray(upperHiCMatrix.matrix.todense().astype(float))
        
        #colormap for plotting
        cmap = cm.get_cmap(plotArgs.colorMap) # pylint: disable=no-member
        cmap.set_bad('black')
        
        bigwig_info = None
        if plotArgs.bigwig: # pylint: disable=no-member
            bigwig_info = {'args': plotArgs, 'axis': None, 'axis_colorbar': None, 'nan_bins': upperHiCMatrix.nan_bins}
        norm = None

        if plotArgs.log or plotArgs.log1p: # pylint: disable=no-member
            mask = mixedMatrix == 0
            try:
                mixedMatrix[mask] = np.nanmin(mixedMatrix[mask == False])
            except ValueError:
                log.info('Matrix contains only 0. Set all values to {}'.format(np.finfo(float).tiny))
                mixedMatrix[mask] = np.finfo(float).tiny
            if np.isnan(mixedMatrix).any() or np.isinf(mixedMatrix).any():
                log.debug("any nan {}".format(np.isnan(mixedMatrix).any()))
                log.debug("any inf {}".format(np.isinf(mixedMatrix).any()))
                mask_nan = np.isnan(mixedMatrix)
                mask_inf = np.isinf(mixedMatrix)
                mixedMatrix[mask_nan] = np.nanmin(mixedMatrix[mask_nan == False])
                mixedMatrix[mask_inf] = np.nanmin(mixedMatrix[mask_inf == False])

        log.debug("any nan after remove of nan: {}".format(np.isnan(mixedMatrix).any()))
        log.debug("any inf after remove of inf: {}".format(np.isinf(mixedMatrix).any()))
        if plotArgs.log1p: # pylint: disable=no-member
            mixedMatrix += 1
            norm = LogNorm()
        elif plotArgs.log: # pylint: disable=no-member 
            norm = LogNorm()

        if plotArgs.bigwig: # pylint: disable=no-member
            # increase figure height to accommodate bigwig track
            fig_height = 8.5
        else:
            fig_height = 7
        height = 4.8 / fig_height
        
        fig_width = 8
        width = 5.0 / fig_width
        left_margin = (1.0 - width) * 0.5

        fig = plt.figure(figsize=(fig_width, fig_height), dpi=plotArgs.dpi) # pylint: disable=no-member

        if plotArgs.bigwig: # pylint: disable=no-member
            gs = gridspec.GridSpec(2, 2, height_ratios=[0.90, 0.1], width_ratios=[0.97, 0.03])
            gs.update(hspace=0.05, wspace=0.05)
            ax1 = plt.subplot(gs[0, 0])
            ax2 = plt.subplot(gs[1, 0])
            ax3 = plt.subplot(gs[0, 1])
            bigwig_info['axis'] = ax2
            bigwig_info['axis_colorbar'] = ax3
        else:
            ax1 = None
        
        bottom = 1.3 / fig_height

        position = [left_margin, bottom, width, height]
        hicPlot.plotHeatmap(mixedMatrix, ma.get_chromosome_sizes(), fig, position,
                    plotArgs, cmap, xlabel=chrom, ylabel=chrom2,
                    start_pos=start_pos1, start_pos2=start_pos2, pNorm=norm, pAxis=ax1, pBigwig=bigwig_info)
        plt.savefig(imageoutputfile, dpi=plotArgs.dpi) # pylint: disable=no-member
        plt.close(fig)
Пример #45
0
 def _symmetric_matrix(mat: dok_matrix) -> dok_matrix:
     upper = triu(mat, 1, format="dok") / 2
     # `todok` is necessary because subtraction results in other format
     return (mat + upper.transpose() - upper).todok()
Пример #46
0
def 上三角(s):
    from scipy.sparse import triu
    return np.matrix(triu(rmat(s, s)).toarray())
Пример #47
0
def vis_aggregate_groups(V, E2V, AggOp, mesh_type, fname='output.vtu'):
    """Coarse grid visualization of aggregate groups.

    Create .vtu files for use in Paraview or display with Matplotlib.

    Parameters
    ----------
    V : {array}
        coordinate array (N x D)
    E2V : {array}
        element index array (Nel x Nelnodes)
    AggOp : {csr_matrix}
        sparse matrix for the aggregate-vertex relationship (N x Nagg)
    mesh_type : {string}
        type of elements: vertex, tri, quad, tet, hex (all 3d)
    fname : {string, file object}
        file to be written, e.g. 'output.vtu'

    Returns
    -------
        - Writes data to .vtu file for use in paraview (xml 0.1 format) or
          displays to screen using matplotlib

    Notes
    -----
        - Works for both 2d and 3d elements.  Element groupings are colored
          with data equal to 2.0 and stringy edges in the aggregate are colored
          with 3.0

    Examples
    --------
    >>> from pyamg.aggregation import standard_aggregation
    >>> from pyamg.vis.vis_coarse import vis_aggregate_groups
    >>> from pyamg.gallery import load_example
    >>> data = load_example('unit_square')
    >>> A = data['A'].tocsr()
    >>> V = data['vertices']
    >>> E2V = data['elements']
    >>> AggOp = standard_aggregation(A)[0]
    >>> vis_aggregate_groups(V=V, E2V=E2V, AggOp=AggOp,
    ...                      mesh_type='tri', fname='output.vtu')
    >>> from pyamg.aggregation import standard_aggregation
    >>> from pyamg.vis.vis_coarse import vis_aggregate_groups
    >>> from pyamg.gallery import load_example
    >>> data = load_example('unit_cube')
    >>> A = data['A'].tocsr()
    >>> V = data['vertices']
    >>> E2V = data['elements']
    >>> AggOp = standard_aggregation(A)[0]
    >>> vis_aggregate_groups(V=V, E2V=E2V, AggOp=AggOp,
    ...                      mesh_type='tet', fname='output.vtu')

    """
    check_input(V=V, E2V=E2V, AggOp=AggOp, mesh_type=mesh_type)
    map_type_to_key = {'tri': 5, 'quad': 9, 'tet': 10, 'hex': 12}
    if mesh_type not in map_type_to_key:
        raise ValueError(f'Unknown mesh_type={mesh_type}')
    key = map_type_to_key[mesh_type]

    AggOp = csr_matrix(AggOp)

    # remove elements with dirichlet BCs
    if E2V.max() >= AggOp.shape[0]:
        E2V = E2V[E2V.max(axis=1) < AggOp.shape[0]]

    # 1 #
    # Find elements with all vertices in same aggregate

    # account for 0 rows.  Mark them as solitary aggregates
    if len(AggOp.indices) != AggOp.shape[0]:
        full_aggs = ((AggOp.indptr[1:] - AggOp.indptr[:-1]) == 0).nonzero()[0]
        new_aggs = np.array(AggOp.sum(axis=1), dtype=int).ravel()
        new_aggs[full_aggs == 1] = AggOp.indices  # keep existing aggregate IDs
        new_aggs[full_aggs == 0] = AggOp.shape[1]  # fill in singletons maxID+1
        ElementAggs = new_aggs[E2V]
    else:
        ElementAggs = AggOp.indices[E2V]

    # 2 #
    # find all aggregates encompassing full elements
    # mask[i] == True if all vertices in element i belong to the same aggregate
    mask = np.where(abs(np.diff(ElementAggs)).max(axis=1) == 0)[0]
    # mask = (ElementAggs[:,:] == ElementAggs[:,0]).all(axis=1)
    E2V_a = E2V[mask, :]  # elements where element is full
    Nel_a = E2V_a.shape[0]

    # 3 #
    # find edges of elements in the same aggregate (brute force)

    # construct vertex to vertex graph
    col = E2V.ravel()
    row = np.kron(np.arange(0, E2V.shape[0]),
                  np.ones((E2V.shape[1], ), dtype=int))
    data = np.ones((len(col), ))
    if len(row) != len(col):
        raise ValueError('Problem constructing vertex-to-vertex map')
    V2V = coo_matrix((data, (row, col)), shape=(E2V.shape[0], E2V.max() + 1))
    V2V = V2V.T * V2V
    V2V = triu(V2V, 1).tocoo()

    # get all the edges
    edges = np.vstack((V2V.row, V2V.col)).T

    # all the edges in the same aggregate
    E2V_b = edges[AggOp.indices[V2V.row] == AggOp.indices[V2V.col]]
    Nel_b = E2V_b.shape[0]

    # 3.5 #
    # single node aggregates
    sums = np.array(AggOp.sum(axis=0)).ravel()
    E2V_c = np.where(sums == 1)[0]
    Nel_c = len(E2V_c)

    # 4 #
    # now write out the elements and edges
    colors_a = 3 * np.ones((Nel_a, ))  # color triangles with threes
    colors_b = 2 * np.ones((Nel_b, ))  # color edges with twos
    colors_c = 1 * np.ones((Nel_c, ))  # color the vertices with ones

    cells = {1: E2V_c, 3: E2V_b, key: E2V_a}
    cdata = {1: colors_c, 3: colors_b, key: colors_a}  # make sure it's a tuple
    write_vtu(V=V, cells=cells, fname=fname, cdata=cdata)
Пример #48
0
def mask_test_edges(
    adj: sp.coo_matrix,
    seed: int = 0,
    validation_frac: float = 0.05,
    test_frac: float = 0.1,
    validation_edges_in_adj: bool = False,
):
    """
    Split edges for graph autoencoder into train/validation/test splits.

    Based on https://github.com/tkipf/gae/blob/master/gae/preprocessing.py

    Args:
        adj: scipy.sparse.coo_matrix adjacency matrix.
    """
    rng = np.random.default_rng(seed)

    def sparse_to_tuple(sparse_mx):
        if not sp.isspmatrix_coo(sparse_mx):
            sparse_mx = sparse_mx.tocoo()
        coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose()
        values = sparse_mx.data
        shape = sparse_mx.shape
        return coords, values, shape

    # Remove diagonal elements
    adj = adj - sp.dia_matrix(
        (adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape)
    adj.eliminate_zeros()
    # Check that diag is zero:
    assert np.diag(adj.todense()).sum() == 0

    adj_triu = sp.triu(adj)
    adj_tuple = sparse_to_tuple(adj_triu)
    edges = adj_tuple[0]
    edges_all = sparse_to_tuple(adj)[0]
    num_test = int(np.floor(edges.shape[0] * test_frac))
    num_val = int(np.floor(edges.shape[0] * validation_frac))

    all_edge_idx = list(range(edges.shape[0]))
    rng.shuffle(all_edge_idx)
    val_edge_idx = all_edge_idx[:num_val]
    test_edge_idx = all_edge_idx[num_val:(num_val + num_test)]
    test_edges = edges[test_edge_idx]
    val_edges = edges[val_edge_idx]

    train_edges = np.delete(edges,
                            np.hstack([test_edge_idx, val_edge_idx]),
                            axis=0)

    # TODO: use sets?
    def ismember(a, b, tol=5):
        rows_close = np.all(np.round(a - b[:, None], tol) == 0, axis=-1)
        return np.any(rows_close)

    test_edges_false = []
    while len(test_edges_false) < len(test_edges):
        idx_i = rng.integers(0, adj.shape[0])
        idx_j = rng.integers(0, adj.shape[0])
        if idx_i == idx_j:
            continue
        if ismember([idx_i, idx_j], edges_all):
            continue
        if test_edges_false:
            if ismember([idx_j, idx_i], np.array(test_edges_false)):
                continue
            if ismember([idx_i, idx_j], np.array(test_edges_false)):
                continue
        test_edges_false.append([idx_i, idx_j])

    val_edges_false = []
    while len(val_edges_false) < len(val_edges):
        idx_i = rng.integers(0, adj.shape[0])
        idx_j = rng.integers(0, adj.shape[0])
        if idx_i == idx_j:
            continue
        if ismember([idx_i, idx_j], train_edges):
            continue
        if ismember([idx_j, idx_i], train_edges):
            continue
        if ismember([idx_i, idx_j], val_edges):
            continue
        if ismember([idx_j, idx_i], val_edges):
            continue
        if val_edges_false:
            if ismember([idx_j, idx_i], np.array(val_edges_false)):
                continue
            if ismember([idx_i, idx_j], np.array(val_edges_false)):
                continue
        val_edges_false.append([idx_i, idx_j])

    assert ~ismember(test_edges_false, edges_all)
    assert ~ismember(val_edges_false, edges_all)
    assert ~ismember(val_edges, train_edges)
    assert ~ismember(test_edges, train_edges)
    assert ~ismember(val_edges, test_edges)

    if validation_edges_in_adj:
        adj_edges = np.concatenate((train_edges, val_edges), axis=0)
    else:
        adj_edges = train_edges

    data = np.ones(adj_edges.shape[0])

    # Re-build adj matrix
    adj_train = sp.coo_matrix((data, adj_edges.T), shape=adj.shape)
    adj_train = adj_train + adj_train.T

    # NOTE: these edge lists only contain single direction of edge!
    return (
        adj_train,
        val_edges,
        val_edges_false,
        test_edges,
        test_edges_false,
    )
Пример #49
0
def lambda_test_edges(dataset, adj, l):
    # Function to build training test with proportion l
    # Remove diagonal elements
    adj = adj - sp.dia_matrix(
        (adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape)
    adj.eliminate_zeros()
    # Check that diag is zero:
    assert np.diag(adj.todense()).sum() == 0

    test_ratio = 1 - 0.05 - l
    adj_triu = sp.triu(adj)
    adj_tuple = sparse_to_tuple(adj_triu)
    edges = adj_tuple[0]
    edges_all = sparse_to_tuple(adj)[0]
    num_test = int(np.floor(edges.shape[0] * test_ratio))
    num_val = int(np.floor(edges.shape[0] * 0.05))

    all_edge_idx = list(range(edges.shape[0]))
    np.random.shuffle(all_edge_idx)
    val_edge_idx = all_edge_idx[:num_val]
    test_edge_idx = all_edge_idx[num_val:(num_val + num_test)]
    test_edges = edges[test_edge_idx]
    val_edges = edges[val_edge_idx]
    train_edges = np.delete(edges,
                            np.hstack([test_edge_idx, val_edge_idx]),
                            axis=0)

    def ismember(a, b, tol=5):
        rows_close = np.all(np.round(a - b[:, None], tol) == 0, axis=-1)
        return np.any(rows_close)

    test_edges_false = []
    while len(test_edges_false) < len(test_edges):
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j:
            continue
        if ismember([idx_i, idx_j], edges_all):
            continue
        if test_edges_false:
            if ismember([idx_j, idx_i], np.array(test_edges_false)):
                continue
            if ismember([idx_i, idx_j], np.array(test_edges_false)):
                continue
        test_edges_false.append([idx_i, idx_j])

    val_edges_false = []
    while len(val_edges_false) < len(val_edges):
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j:
            continue
        if ismember([idx_i, idx_j], train_edges):
            continue
        if ismember([idx_j, idx_i], train_edges):
            continue
        if ismember([idx_i, idx_j], val_edges):
            continue
        if ismember([idx_j, idx_i], val_edges):
            continue
        if val_edges_false:
            if ismember([idx_j, idx_i], np.array(val_edges_false)):
                continue
            if ismember([idx_i, idx_j], np.array(val_edges_false)):
                continue
        val_edges_false.append([idx_i, idx_j])

    assert ~ismember(test_edges_false, edges_all)
    assert ~ismember(val_edges_false, edges_all)
    assert ~ismember(val_edges, train_edges)
    assert ~ismember(test_edges, train_edges)
    assert ~ismember(val_edges, test_edges)

    data = np.ones(train_edges.shape[0])

    # Re-build adj matrix
    adj_train = sp.csr_matrix((data, (train_edges[:, 0], train_edges[:, 1])),
                              shape=adj.shape)
    adj_train = adj_train + adj_train.T

    return adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false
Пример #50
0
    def density(self, grid, spinor=None, tol=1e-7, eta=False):
        r""" Expand the density matrix to the charge density on a grid

        This routine calculates the real-space density components on a specified grid.

        This is an *in-place* operation that *adds* to the current values in the grid.

        Note: To calculate :math:`\rho(\mathbf r)` in a unit-cell different from the
        originating geometry, simply pass a grid with a unit-cell different than the originating
        supercell.

        The real-space density is calculated as:

        .. math::
            \rho(\mathbf r) = \sum_{\nu\mu}\phi_\nu(\mathbf r)\phi_\mu(\mathbf r) D_{\nu\mu}

        While for non-collinear/spin-orbit calculations the density is determined from the
        spinor component (`spinor`) by

        .. math::
           \rho_{\boldsymbol\sigma}(\mathbf r) = \sum_{\nu\mu}\phi_\nu(\mathbf r)\phi_\mu(\mathbf r) \sum_\alpha [\boldsymbol\sigma \mathbf \rho_{\nu\mu}]_{\alpha\alpha}

        Here :math:`\boldsymbol\sigma` corresponds to a spinor operator to extract relevant quantities. By passing the identity matrix the total charge is added. By using the Pauli matrix :math:`\boldsymbol\sigma_x`
        only the :math:`x` component of the density is added to the grid (see `Spin.X`).

        Parameters
        ----------
        grid : Grid
           the grid on which to add the density (the density is in ``e/Ang^3``)
        spinor : (2,) or (2, 2), optional
           the spinor matrix to obtain the diagonal components of the density. For un-polarized density matrices
           this keyword has no influence. For spin-polarized it *has* to be either 1 integer or a vector of
           length 2 (defaults to total density).
           For non-collinear/spin-orbit density matrices it has to be a 2x2 matrix (defaults to total density).
        tol : float, optional
           DM tolerance for accepted values. For all density matrix elements with absolute values below
           the tolerance, they will be treated as strictly zeros.
        eta : bool, optional
           show a progressbar on stdout
        """
        try:
            # Once unique has the axis keyword, we know we can safely
            # use it in this routine
            # Otherwise we raise an ImportError
            unique([[0, 1], [2, 3]], axis=0)
        except:
            raise NotImplementedError(
                self.__class__.__name__ +
                '.density requires numpy >= 1.13, either update '
                'numpy or do not use this function!')

        geometry = self.geometry
        # Check that the atomic coordinates, really are all within the intrinsic supercell.
        # If not, it may mean that the DM does not conform to the primary unit-cell paradigm
        # of matrix elements. It complicates things.
        fxyz = geometry.fxyz
        f_min = fxyz.min()
        f_max = fxyz.max()
        del fxyz, f_min, f_max

        # Extract sub variables used throughout the loop
        shape = _a.asarrayi(grid.shape)
        dcell = grid.dcell

        # Sparse matrix data
        csr = self._csr

        # In the following we don't care about division
        # So 1) save error state, 2) turn off divide by 0, 3) calculate, 4) turn on old error state
        old_err = np.seterr(divide='ignore', invalid='ignore')

        # Placeholder for the resulting coefficients
        DM = None
        if self.spin.kind > Spin.POLARIZED:
            if spinor is None:
                # Default to the total density
                spinor = np.identity(2, dtype=np.complex128)
            else:
                spinor = _a.arrayz(spinor)
            if spinor.size != 4 or spinor.ndim != 2:
                raise ValueError(
                    self.__class__.__name__ +
                    '.density with NC/SO spin, requires a 2x2 matrix.')

            DM = _a.emptyz([self.nnz, 2, 2])
            idx = array_arange(csr.ptr[:-1], n=csr.ncol)
            if self.spin.kind == Spin.NONCOLINEAR:
                # non-collinear
                DM[:, 0, 0] = csr._D[idx, 0]
                DM[:, 0, 1] = csr._D[idx, 2] + 1j * csr._D[idx, 3]
                DM[:, 1, 0] = np.conj(DM[:, 0, 1])
                DM[:, 1, 1] = csr._D[idx, 1]
            else:
                # spin-orbit
                DM[:, 0, 0] = csr._D[idx, 0] + 1j * csr._D[idx, 4]
                DM[:, 0, 1] = csr._D[idx, 2] + 1j * csr._D[idx, 3]
                DM[:, 1, 0] = csr._D[idx, 6] + 1j * csr._D[idx, 7]
                DM[:, 1, 1] = csr._D[idx, 1] + 1j * csr._D[idx, 5]

            # Perform dot-product with spinor, and take out the diagonal real part
            DM = dot(DM, spinor.T)[:, [0, 1], [0, 1]].sum(1).real

        elif self.spin.kind == Spin.POLARIZED:
            if spinor is None:
                spinor = _a.onesd(2)

            elif isinstance(spinor, Integral):
                # extract the provided spin-polarization
                s = _a.zerosd(2)
                s[spinor] = 1.
                spinor = s
            else:
                spinor = _a.arrayd(spinor)

            if spinor.size != 2 or spinor.ndim != 1:
                raise ValueError(
                    self.__class__.__name__ +
                    '.density with polarized spin, requires spinor '
                    'argument as an integer, or a vector of length 2')

            idx = array_arange(csr.ptr[:-1], n=csr.ncol)
            DM = csr._D[idx, 0] * spinor[0] + csr._D[idx, 1] * spinor[1]

        else:
            idx = array_arange(csr.ptr[:-1], n=csr.ncol)
            DM = csr._D[idx, 0]

        # Create the DM csr matrix.
        csrDM = csr_matrix(
            (DM, csr.col[idx], np.insert(np.cumsum(csr.ncol), 0, 0)),
            shape=(self.shape[:2]),
            dtype=DM.dtype)

        # Clean-up
        del idx, DM

        # To heavily speed up the construction of the density we can recreate
        # the sparse csrDM matrix by summing the lower and upper triangular part.
        # This means we only traverse the sparse UPPER part of the DM matrix
        # I.e.:
        #    psi_i * DM_{ij} * psi_j + psi_j * DM_{ji} * psi_i
        # is equal to:
        #    psi_i * (DM_{ij} + DM_{ji}) * psi_j
        # Secondly, to ease the loops we extract the main diagonal (on-site terms)
        # and store this for separate usage
        csr_sum = [None] * geometry.n_s
        no = geometry.no
        primary_i_s = geometry.sc_index([0, 0, 0])
        for i_s in range(geometry.n_s):
            # Extract the csr matrix
            o_start, o_end = i_s * no, (i_s + 1) * no
            csr = csrDM[:, o_start:o_end]
            if i_s == primary_i_s:
                csr_sum[i_s] = triu(csr) + tril(csr, -1).transpose()
            else:
                csr_sum[i_s] = csr

        # Recreate the column-stacked csr matrix
        csrDM = ss_hstack(csr_sum, format='csr')
        del csr, csr_sum

        # Remove all zero elements (note we use the tolerance here!)
        csrDM.data = np.where(np.fabs(csrDM.data) > tol, csrDM.data, 0.)

        # Eliminate zeros and sort indices etc.
        csrDM.eliminate_zeros()
        csrDM.sort_indices()
        csrDM.prune()

        # 1. Ensure the grid has a geometry associated with it
        sc = grid.sc.copy()
        # Find the periodic directions
        pbc = [
            bc == grid.PERIODIC or geometry.nsc[i] > 1
            for i, bc in enumerate(grid.bc[:, 0])
        ]
        if grid.geometry is None:
            # Create the actual geometry that encompass the grid
            ia, xyz, _ = geometry.within_inf(sc, periodic=pbc)
            if len(ia) > 0:
                grid.set_geometry(Geometry(xyz, geometry.atoms[ia], sc=sc))

        # Instead of looping all atoms in the supercell we find the exact atoms
        # and their supercell indices.
        add_R = _a.fulld(3, geometry.maxR())
        # Calculate the required additional vectors required to increase the fictitious
        # supercell by add_R in each direction.
        # For extremely skewed lattices this will be way too much, hence we make
        # them square.
        o = sc.toCuboid(True)
        sc = SuperCell(o._v + np.diag(2 * add_R), origo=o.origo - add_R)

        # Retrieve all atoms within the grid supercell
        # (and the neighbours that connect into the cell)
        IA, XYZ, ISC = geometry.within_inf(sc, periodic=pbc)
        XYZ -= grid.sc.origo.reshape(1, 3)

        # Retrieve progressbar
        eta = tqdm_eta(len(IA), self.__class__.__name__ + '.density', 'atom',
                       eta)

        cell = geometry.cell
        atom = geometry.atom
        axyz = geometry.axyz
        a2o = geometry.a2o

        def xyz2spherical(xyz, offset):
            """ Calculate the spherical coordinates from indices """
            rx = xyz[:, 0] - offset[0]
            ry = xyz[:, 1] - offset[1]
            rz = xyz[:, 2] - offset[2]

            # Calculate radius ** 2
            xyz_to_spherical_cos_phi(rx, ry, rz)
            return rx, ry, rz

        def xyz2sphericalR(xyz, offset, R):
            """ Calculate the spherical coordinates from indices """
            rx = xyz[:, 0] - offset[0]
            idx = indices_fabs_le(rx, R)
            ry = xyz[idx, 1] - offset[1]
            ix = indices_fabs_le(ry, R)
            ry = ry[ix]
            idx = idx[ix]
            rz = xyz[idx, 2] - offset[2]
            ix = indices_fabs_le(rz, R)
            ry = ry[ix]
            rz = rz[ix]
            idx = idx[ix]
            if len(idx) == 0:
                return [], [], [], []
            rx = rx[idx]

            # Calculate radius ** 2
            ix = indices_le(rx**2 + ry**2 + rz**2, R**2)
            idx = idx[ix]
            if len(idx) == 0:
                return [], [], [], []
            rx = rx[ix]
            ry = ry[ix]
            rz = rz[ix]
            xyz_to_spherical_cos_phi(rx, ry, rz)
            return idx, rx, ry, rz

        # Looping atoms in the sparse pattern is better since we can pre-calculate
        # the radial parts and then add them.
        # First create a SparseOrbital matrix, then convert to SparseAtom
        spO = SparseOrbital(geometry, dtype=np.int16)
        spO._csr = SparseCSR(csrDM)
        spA = spO.toSparseAtom(dtype=np.int16)
        del spO
        na = geometry.na
        # Remove the diagonal part of the sparse atom matrix
        off = na * primary_i_s
        for ia in range(na):
            del spA[ia, off + ia]

        # Get pointers and delete the atomic sparse pattern
        # The below complexity is because we are not finalizing spA
        csr = spA._csr
        a_ptr = np.insert(_a.cumsumi(csr.ncol), 0, 0)
        a_col = csr.col[array_arange(csr.ptr, n=csr.ncol)]
        del spA, csr

        # Get offset in supercell in orbitals
        off = geometry.no * primary_i_s
        origo = grid.origo
        # TODO sum the non-origo atoms to the csrDM matrix
        #      this would further decrease the loops required.

        # Loop over all atoms in the grid-cell
        for ia, ia_xyz, isc in zip(IA, XYZ, ISC):
            # Get current atom
            ia_atom = atom[ia]
            IO = a2o(ia)
            IO_range = range(ia_atom.no)
            cell_offset = (cell * isc.reshape(3, 1)).sum(0) - origo

            # Extract maximum R
            R = ia_atom.maxR()
            if R <= 0.:
                warn("Atom '{}' does not have a wave-function, skipping atom.".
                     format(ia_atom))
                eta.update()
                continue

            # Retrieve indices of the grid for the atomic shape
            idx = grid.index(ia_atom.toSphere(ia_xyz))

            # Now we have the indices for the largest orbital on the atom

            # Subsequently we have to loop the orbitals and the
            # connecting orbitals
            # Then we find the indices that overlap with these indices
            # First reduce indices to inside the grid-cell
            idx[idx[:, 0] < 0, 0] = 0
            idx[shape[0] <= idx[:, 0], 0] = shape[0] - 1
            idx[idx[:, 1] < 0, 1] = 0
            idx[shape[1] <= idx[:, 1], 1] = shape[1] - 1
            idx[idx[:, 2] < 0, 2] = 0
            idx[shape[2] <= idx[:, 2], 2] = shape[2] - 1

            # Remove duplicates, requires numpy >= 1.13
            idx = unique(idx, axis=0)
            if len(idx) == 0:
                eta.update()
                continue

            # Get real-space coordinates for the current atom
            # as well as the radial parts
            grid_xyz = dot(idx, dcell)

            # Perform loop on connection atoms
            # Allocate the DM_pj arrays
            # This will have a size equal to number of elements times number of
            # orbitals on this atom
            # In this way we do not have to calculate the psi_j multiple times
            DM_io = csrDM[IO:IO + ia_atom.no, :].tolil()
            DM_pj = _a.zerosd([ia_atom.no, grid_xyz.shape[0]])

            # Now we perform the loop on the connections for this atom
            # Remark that we have removed the diagonal atom (it-self)
            # As that will be calculated in the end
            for ja in a_col[a_ptr[ia]:a_ptr[ia + 1]]:
                # Retrieve atom (which contains the orbitals)
                ja_atom = atom[ja % na]
                JO = a2o(ja)
                jR = ja_atom.maxR()
                # Get actual coordinate of the atom
                ja_xyz = axyz(ja) + cell_offset

                # Reduce the ia'th grid points to those that connects to the ja'th atom
                ja_idx, ja_r, ja_theta, ja_cos_phi = xyz2sphericalR(
                    grid_xyz, ja_xyz, jR)

                if len(ja_idx) == 0:
                    # Quick step
                    continue

                # Loop on orbitals on this atom
                for jo in range(ja_atom.no):
                    o = ja_atom.orbital[jo]
                    oR = o.R

                    # Downsize to the correct indices
                    if jR - oR < 1e-6:
                        ja_idx1 = ja_idx
                        ja_r1 = ja_r
                        ja_theta1 = ja_theta
                        ja_cos_phi1 = ja_cos_phi
                    else:
                        ja_idx1 = indices_le(ja_r, oR)
                        if len(ja_idx1) == 0:
                            # Quick step
                            continue

                        # Reduce arrays
                        ja_r1 = ja_r[ja_idx1]
                        ja_theta1 = ja_theta[ja_idx1]
                        ja_cos_phi1 = ja_cos_phi[ja_idx1]
                        ja_idx1 = ja_idx[ja_idx1]

                    # Calculate the psi_j component
                    psi = o.psi_spher(ja_r1,
                                      ja_theta1,
                                      ja_cos_phi1,
                                      cos_phi=True)

                    # Now add this orbital to all components
                    for io in IO_range:
                        DM_pj[io, ja_idx1] += DM_io[io, JO + jo] * psi

                # Temporary clean up
                del ja_idx, ja_r, ja_theta, ja_cos_phi
                del ja_idx1, ja_r1, ja_theta1, ja_cos_phi1, psi

            # Now we have all components for all orbitals connection to all orbitals on atom
            # ia. We simply need to add the diagonal components

            # Loop on the orbitals on this atom
            ia_r, ia_theta, ia_cos_phi = xyz2spherical(grid_xyz, ia_xyz)
            del grid_xyz
            for io in IO_range:
                # Only loop halve the range.
                # This is because: triu + tril(-1).transpose()
                # removes the lower half of the on-site matrix.
                for jo in range(io + 1, ia_atom.no):
                    DM = DM_io[io, off + IO + jo]

                    oj = ia_atom.orbital[jo]
                    ojR = oj.R

                    # Downsize to the correct indices
                    if R - ojR < 1e-6:
                        ja_idx1 = slice(None)
                        ja_r1 = ia_r
                        ja_theta1 = ia_theta
                        ja_cos_phi1 = ia_cos_phi
                    else:
                        ja_idx1 = indices_le(ia_r, ojR)
                        if len(ja_idx1) == 0:
                            # Quick step
                            continue

                        # Reduce arrays
                        ja_r1 = ia_r[ja_idx1]
                        ja_theta1 = ia_theta[ja_idx1]
                        ja_cos_phi1 = ia_cos_phi[ja_idx1]

                    # Calculate the psi_j component
                    DM_pj[io, ja_idx1] += DM * oj.psi_spher(
                        ja_r1, ja_theta1, ja_cos_phi1, cos_phi=True)

                # Calculate the psi_i component
                # Note that this one *also* zeroes points outside the shell
                # I.e. this step is important because it "nullifies" all but points where
                # orbital io is defined.
                psi = ia_atom.orbital[io].psi_spher(ia_r,
                                                    ia_theta,
                                                    ia_cos_phi,
                                                    cos_phi=True)
                DM_pj[io, :] += DM_io[io, off + IO + io] * psi
                DM_pj[io, :] *= psi

            # Temporary clean up
            ja_idx1 = ja_r1 = ja_theta1 = ja_cos_phi1 = None
            del ia_r, ia_theta, ia_cos_phi, psi, DM_io

            # Now add the density
            grid.grid[idx[:, 0], idx[:, 1], idx[:, 2]] += DM_pj.sum(0)

            # Clean-up
            del DM_pj, idx

            eta.update()
        eta.close()

        # Reset the error code for division
        np.seterr(**old_err)
Пример #51
0
def _triu(a, sparse):
    if sparse:
        return sp.triu(a, k=1)
    return np.triu(a, k=1)
Пример #52
0
def assemble_adjacency_matrix(transition_counts,
                              num_edges,
                              inplace=True,
                              seed=None):
    """
    Computes an adjacency matrix for a graph based on the given transition counts and the desired
    number of edges. The resulting adjacency matrix will represent a graph with no singleton nodes
    (however, possibly with multiple connected components).

    Note
    ----
    The strategy is described in *NetGAN: Generating Graphs via Random Walks* (Bojchevski, Shchur,
    Zügner, Günnemann, 2018).

    Parameters
    ----------
    transition_counts: scipy.sparse.csr_matrix [N, N]
        The transition counts (e.g. obtained from random walks) for all pairs of nodes. Must be
        symmetric.
    num_edges: int
        The number of edges the output adjacency matrix should contain.
    inplace: bool, default: True
        Whether the transition_counts matrix may be modified. Otherwise, a copy is performed.
    seed: int, default: None
        The seed to use for generating random values.

    Returns
    -------
    scipy.sparse.csr_matrix
        A binary adjacency matrix containing the desired number of edges. The function tries to
        assemble a matrix with `2 * num_edges` entries. However, if
        `num_edges < transitions_count.shape[0]`, then this cannot be guaranteed.  The diagonal of
        the adjacency matrix is always zero.
    """
    # 1) Setup
    # pylint: disable=no-member
    randomizer = np.random.RandomState(seed)

    # 1.1) Copy if needed
    if not inplace:
        transition_counts = transition_counts.copy()

    # 1.2) Set diagonal to zero
    transition_counts = transition_counts.tolil()
    transition_counts.setdiag(0)

    # 2) Check if the transition matrix can be converted easily
    if len(transition_counts.nonzero()[0]) // 2 <= num_edges:
        transition_counts[transition_counts.nonzero()] = 1
        transition_counts += transition_counts.T
        transition_counts[transition_counts > 1] = 1
        return transition_counts

    # 3) Assemble the adjacency matrix according to paper
    N = transition_counts.shape[0]
    result = sp.dok_matrix((N, N))
    # transition probabilities
    div = transition_counts.sum(axis=0)
    div[div <= 0] = 1
    P = (transition_counts / div).T

    # 3.1) Iterate over nodes in random order to sample one neighbor
    for node in randomizer.permutation(N)[:min(num_edges, N)]:
        # 3.1.1) Skip if no neighbor for the node is present
        if P[node].sum() == 0:
            continue

        # 3.1.2) Sample neighbor according to probabilities
        neighbor = randomizer.choice(N, p=P[node].A1)
        result[node, neighbor] = result[neighbor, node] = 1

    # 3.2) Sample remaining edges
    # 3.2.1) Compute probabilities for drawing
    num_remaining_edges = int(num_edges - result.sum() / 2)
    if num_remaining_edges > 0:
        # equals size of the upper triangular matrix
        num_choices = (N * N + N) // 2
        transition_counts[result.nonzero()] = 0
        P_triu = sp.triu(transition_counts).tocsr()
        P_triu_indices = np.triu_indices_from(transition_counts)
        probabilities = (P_triu / P_triu.sum())[P_triu_indices]

        # 3.2.2) Choose edges
        edges = randomizer.choice(num_choices,
                                  replace=False,
                                  p=probabilities.A1,
                                  size=num_remaining_edges)

        # 3.2.3) Add edge choices to result
        rows = P_triu_indices[0][edges]
        cols = P_triu_indices[1][edges]
        result[rows, cols] = result[cols, rows] = 1

    return result.tocsr()
Пример #53
0
def mask_test_edges2(adj):
    # Function to build test set with 10% positive links
    # NOTE: Splits are randomized and results might slightly deviate from reported numbers in the paper.
    # TODO: Clean up.

    # Remove diagonal elements
    adj = adj - sp.dia_matrix(
        (adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape)
    adj.eliminate_zeros()
    # Efficiently check that diag is zero: DmitriyFradkin
    assert np.sum(adj.diagonal()) == 0

    adj_triu = sp.triu(adj)
    adj_tuple = sparse_to_tuple(adj_triu)
    edges = adj_tuple[0]
    edges_all = sparse_to_tuple(adj)[0]
    num_test = int(np.floor(edges.shape[0] / 10.))
    num_val = int(np.floor(edges.shape[0] / 20.))

    all_edge_idx = list(range(edges.shape[0]))
    np.random.shuffle(all_edge_idx)
    val_edge_idx = all_edge_idx[:num_val]
    test_edge_idx = all_edge_idx[num_val:(num_val + num_test)]
    test_edges = edges[test_edge_idx]
    val_edges = edges[val_edge_idx]
    train_edges = np.delete(edges,
                            np.hstack([test_edge_idx, val_edge_idx]),
                            axis=0)

    data = np.ones(train_edges.shape[0])
    # Re-build adj matrix
    adj_train = sp.csr_matrix((data, (train_edges[:, 0], train_edges[:, 1])),
                              shape=adj.shape)
    adj_train = adj_train + adj_train.T

    #def ismember(a, b, tol=5):
    #    rows_close = np.all(np.round(a - b[:, None], tol) == 0, axis=-1)
    #    return np.any(rows_close)

    print('Generating test_edges_false {}'.format(datetime.now()))
    ### all edges - symmetric
    edges_all_set = set([(x[0], x[1]) for x in edges_all])
    # generate initial set randomly:
    test_edges_false = generate_random_pairs(adj.shape[0], len(test_edges))
    # make sure it doesn't have real edges:
    test_edges_false = test_edges_false - edges_all_set
    # add as many edges as needed:
    while len(test_edges_false) < len(test_edges):
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j or (idx_i, idx_j) in edges_all_set:
            continue
        if (idx_j, idx_i) in test_edges_false or (idx_i,
                                                  idx_j) in test_edges_false:
            continue
        test_edges_false.add((idx_i, idx_j))

    print('Generating val_edges_false {}'.format(datetime.now()))
    val_edges_false = generate_random_pairs(adj.shape[0], len(val_edges))
    # remove edges already existing or in test_false:
    val_edges_false = val_edges_false - edges_all_set
    val_edges_false = val_edges_false - test_edges_false
    while len(val_edges_false) < len(val_edges):
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j or (idx_i, idx_j) in edges_all_set:
            continue
        if (idx_i, idx_j) in test_edges_false or (idx_j,
                                                  idx_i) in test_edges_false:
            continue
        if (idx_i, idx_j) in val_edges_false or (idx_j,
                                                 idx_i) in val_edges_false:
            continue
        val_edges_false.add((idx_i, idx_j))


#    assert ~ismember(test_edges_false, edges_all)
#    assert ~ismember(val_edges_false, edges_all)
#    assert ~ismember(val_edges, train_edges)
#    assert ~ismember(test_edges, train_edges)
#    assert ~ismember(val_edges, test_edges)

# convert sets to numpy arrays:
    test_edges_false = np.array([np.array(x) for x in test_edges_false])
    val_edges_false = np.array([np.array(x) for x in val_edges_false])

    # NOTE: these edge lists only contain single direction of edge!
    return adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false
Пример #54
0
Файл: ham.py Проект: silsgs/sisl
    def write_hamiltonian(self, ham, hermitian=True, **kwargs):
        """ Writes the Hamiltonian model to the file

        Writes a Hamiltonian model to the intrinsic Hamiltonian file format.
        The file can be constructed by the implict force of Hermiticity,
        or without.

        Utilizing the Hermiticity we reduce the file-size by approximately
        50%.

        Parameters
        ----------
        ham : `Hamiltonian` model
        hermitian : boolean=True
            whether the stored data is halved using the Hermitian property
        """
        # We use the upper-triangular form of the Hamiltonian
        # and the overlap matrix for hermitian problems

        geom = ham.geometry

        # First write the geometry
        self.write_geometry(geom, **kwargs)

        # We default to the advanced layuot if we have more than one
        # orbital on any one atom
        advanced = kwargs.get(
            'advanced',
            np.any(np.array([a.no for a in geom.atom.atom], np.int32) > 1))

        fmt = kwargs.get('fmt', 'g')
        if advanced:
            fmt1_str = ' {{0:d}}[{{1:d}}] {{2:d}}[{{3:d}}] {{4:{0}}}\n'.format(
                fmt)
            fmt2_str = ' {{0:d}}[{{1:d}}] {{2:d}}[{{3:d}}] {{4:{0}}} {{5:{0}}}\n'.format(
                fmt)
        else:
            fmt1_str = ' {{0:d}} {{1:d}} {{2:{0}}}\n'.format(fmt)
            fmt2_str = ' {{0:d}} {{1:d}} {{2:{0}}} {{3:{0}}}\n'.format(fmt)

        # We currently force the model to be finalized
        # before we can write it
        # This should be easily circumvented
        H = ham.tocsr(0)
        if not ham.orthogonal:
            S = ham.tocsr(ham.S_idx)

        # If the model is Hermitian we can
        # do with writing out half the entries
        if hermitian:
            herm_acc = kwargs.get('herm_acc', 1e-6)
            # We check whether it is Hermitian (not S)
            for i, isc in enumerate(geom.sc.sc_off):
                oi = i * geom.no
                oj = geom.sc_index(-isc) * geom.no
                # get the difference between the ^\dagger elements
                diff = H[:, oi:oi + geom.no] - \
                    H[:, oj:oj + geom.no].transpose()
                diff.eliminate_zeros()
                if np.any(np.abs(diff.data) > herm_acc):
                    amax = np.amax(np.abs(diff.data))
                    warn(
                        SileWarning(
                            'The model could not be asserted to be Hermitian '
                            'within the accuracy required ({0}).'.format(
                                amax)))
                    hermitian = False
                del diff

        if hermitian:
            # Remove all double stuff
            for i, isc in enumerate(geom.sc.sc_off):
                if np.any(isc < 0):
                    # We have ^\dagger element, remove it
                    o = i * geom.no
                    # Ensure that we remove all nullified quantities
                    # (setting elements to zero will add them internally
                    #  :(, hence this actually constructs the full matrix
                    # Therefore we do it on a row basis, to limit memory
                    # requirements
                    for j in range(geom.no):
                        H[j, o:o + geom.no] = 0.
                        H.eliminate_zeros()
                        if not ham.orthogonal:
                            S[j, o:o + geom.no] = 0.
                            S.eliminate_zeros()
            o = geom.sc_index(np.zeros([3], np.int32))
            # Get upper-triangular matrix of the unit-cell H and S
            ut = triu(H[:, o:o + geom.no], k=0).tocsr()
            for j in range(geom.no):
                H[j, o:o + geom.no] = 0.
                H[j, o:o + geom.no] = ut[j, :]
                H.eliminate_zeros()
            if not ham.orthogonal:
                ut = triu(S[:, o:o + geom.no], k=0).tocsr()
                for j in range(geom.no):
                    S[j, o:o + geom.no] = 0.
                    S[j, o:o + geom.no] = ut[j, :]
                    S.eliminate_zeros()

                # Ensure that S and H have the same sparsity pattern
                for jo, io in ispmatrix(S):
                    H[jo, io] = H[jo, io]

            del ut

        # Start writing of the model
        # We loop on all super-cells
        for i, isc in enumerate(geom.sc.sc_off):
            # Check that we have any contributions in this
            # sub-section
            Hsub = H[:, i * geom.no:(i + 1) * geom.no]
            if not ham.orthogonal:
                Ssub = S[:, i * geom.no:(i + 1) * geom.no]
            if Hsub.getnnz() == 0:
                continue
            # We have a contribution, write out the information
            self._write('\nbegin matrix {0:d} {1:d} {2:d}\n'.format(*isc))
            if advanced:
                for jo, io, h in ispmatrixd(Hsub):
                    o = np.array([jo, io], np.int32)
                    a = geom.o2a(o)
                    o = o - geom.a2o(a)
                    if not ham.orthogonal:
                        s = Ssub[jo, io]
                    elif jo == io:
                        s = 1.
                    else:
                        s = 0.
                    if s == 0.:
                        self._write(fmt1_str.format(a[0], o[0], a[1], o[1], h))
                    else:
                        self._write(
                            fmt2_str.format(a[0], o[0], a[1], o[1], h, s))
            else:
                for jo, io, h in ispmatrixd(Hsub):
                    if not ham.orthogonal:
                        s = Ssub[jo, io]
                    elif jo == io:
                        s = 1.
                    else:
                        s = 0.
                    if s == 0.:
                        self._write(fmt1_str.format(jo, io, h))
                    else:
                        self._write(fmt2_str.format(jo, io, h, s))
            self._write('end matrix {0:d} {1:d} {2:d}\n'.format(*isc))
def _compute_global_cell_graph_features(
    centroids,
    neighbor_distances,
    neighbor_counts,
):
    """Internal support for compute_global_cell_graph_features that
    returns its result in a nested nametuple structure instead of a
    pandas DataFrame.
    """
    vor = Voronoi(centroids)
    centroids = vor.points
    vertices = vor.vertices

    regions = [r for r in vor.regions if r and -1 not in r]
    areas = np.stack(_poly_area(vertices[r]) for r in regions)
    peris = np.stack(_poly_peri(vertices[r]) for r in regions)
    max_dists = np.stack(pdist(vertices[r]).max() for r in regions)
    poly_props = PolyProps._make(map(_pop_stats, (areas, peris, max_dists)))

    de = Delaunay(centroids)
    # From the docs: "Coplanar points are input points which were not
    # included in the triangulation due to numerical precision
    # issues."  I don't know how this would affect the results if
    # present, and it doesn't appear to happen, so it's excluded here.
    assert not de.coplanar.size
    indptr, indices = de.vertex_neighbor_vertices
    bin_connectivity = sparse.csr_matrix(
        (np.ones(len(indices), dtype=bool), indices, indptr),
        (len(centroids), ) * 2)
    ridge_points = sparse.triu(bin_connectivity, format='coo')
    ridge_points = np.stack((ridge_points.row, ridge_points.col), axis=-1)

    # This isn't exactly the collection of sides, since if they should
    # be counted per-triangle then we weight border ridges wrong
    # relative to ridges that are part of two triangles.
    ridge_lengths = _dist(*np.swapaxes(centroids[ridge_points], 0, 1))
    sides = ridge_lengths
    areas = np.stack(_poly_area(centroids[t]) for t in de.simplices)
    tri_props = TriProps._make(map(_pop_stats, (sides, areas)))

    graph = sparse.coo_matrix((ridge_lengths, ridge_points.T),
                              (len(centroids), len(centroids)))
    mst = minimum_spanning_tree(graph)
    # Without looking into exactly how minimum_spanning_tree
    # constructs its output, elimate any explicit zeros to be on the
    # safe side.
    mst_branches = _pop_stats(mst.data[mst.data != 0])

    tree = KDTree(centroids)
    neigbors_in_distance = {
        # Yes, we just throw away the actual points
        r: _pop_stats(np.stack(map(len, tree.query_ball_tree(tree, r))) - 1)
        for r in neighbor_distances
    }
    distance_for_neighbors = dict(
        zip(
            neighbor_counts,
            map(_pop_stats,
                tree.query(centroids, [c + 1 for c in neighbor_counts])[0].T),
        ))
    density_props = DensityProps(neigbors_in_distance, distance_for_neighbors)

    return Props(poly_props, tri_props, mst_branches, density_props)
Пример #56
0
def test_triul(shape, k):
    s = sparse.random(shape, density=0.5)
    x = s.todense()

    assert_eq(np.triu(x, k), sparse.triu(s, k))
    assert_eq(np.tril(x, k), sparse.tril(s, k))
Пример #57
0
for EI in G6.Edges():
    print EI.GetSrcNId(), EI.GetDstNId()
    
    
import random
import networkx as nx
import numpy as np
from scipy import sparse
random.seed(10)
np.random.seed(123)
p = 5
d = 1
# G = nx.scale_free_graph(p)
S = nx.barabasi_albert_graph(p, d)  
S = nx.adjacency_matrix(S)
S = sparse.triu(S)
row_ix, col_ix = sparse.find(S)[0:2]
n_nonzero = len(sparse.find(S)[2])
S = S.todense().astype(float)
S0 = S.copy()
for i in range(n_nonzero):
    r = np.random.uniform(0, 1.)
    S[row_ix[i], col_ix[i]] = r-1. if r < 0.5 else r

vec_div = 1.5*np.sum(np.absolute(S), axis = 1) 
for i in range(p):
    if vec_div[i]: 
        # only when the absolute value of the vector is not zero do the standardization
        S[i,:] = S[i,:]/vec_div[i]
A = (S + S.T)/2 + np.matrix(np.eye(p))
# check if A is PD
Пример #58
0
row = []
col = []
for contig in contigs:
    for spacer in d[contig]:
        row.append(contigs_id[contig])
        col.append(spacers_id[spacer])

data = np.ones(len(row))

from scipy.sparse import csr_matrix, find

contig_spacer_mat = csr_matrix((data, (row, col)),
                               shape=(len(contigs), len(spacers)))

spacer_cooccur_mat = contig_spacer_mat.T * contig_spacer_mat

i, j, v = find(spacer_cooccur_mat)
diag = spacer_cooccur_mat.diagonal()

w = np.where(
    np.logical_and(2 * v / (diag[i] + diag[j]) >= args.min_dice_coefficient,
                   v >= args.min_co_occurance), v, 0)
spacer_cooccur_mat_ = csr_matrix((w, (i, j)), shape=spacer_cooccur_mat.shape)
spacer_cooccur_mat_.setdiag(0)
spacer_cooccur_mat_.eliminate_zeros()
from scipy.sparse import triu

for i, j, v in zip(*find(triu(spacer_cooccur_mat_, k=1))):
    print(spacers[i], spacers[j], v)
Пример #59
0
def mask_bipartite_perturbation_test_edges(adj):
    print('args.dataset: ', args.dataset)
    with open('data/bipartite/id2name/'+ str(args.dataset) +'u2id.pkl', 'rb') as f:
        u2id = pickle.load(f)
    with open('data/bipartite/id2name/'+ str(args.dataset) +'v2id.pkl', 'rb') as f:
        v2id = pickle.load(f)
    # Function to build test set with 10% positive links
    # NOTE: Splits are randomized and results might slightly deviate from reported numbers in the paper.
    # TODO: Clean up.

    # Remove diagonal elements
    adj = adj - sp.dia_matrix((adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape)
    adj.eliminate_zeros()
    # Check that diag is zero:
    assert np.diag(adj.todense()).sum() == 0

    adj_triu = sp.triu(adj)
    adj_tuple = sparse_to_tuple(adj_triu)
    edges = adj_tuple[0]
    edges_all = sparse_to_tuple(adj)[0]

    ''' original training/test'''
    num_test = int(np.floor(edges.shape[0] / args.num_test))
    num_val = int(np.floor(edges.shape[0] / 20.))
    all_edge_idx = list(range(edges.shape[0]))
    np.random.seed(args.edge_idx_seed)
    np.random.shuffle(all_edge_idx)
    args.edge_idx_seed += 1
    val_edge_idx = all_edge_idx[:num_val]
    test_edge_idx = all_edge_idx[num_val:(num_val + num_test)]

    test_edges = edges[test_edge_idx]
    val_edges = edges[val_edge_idx]
    train_edges = np.delete(edges, np.hstack([test_edge_idx, val_edge_idx]), axis=0)

    # Re-build adj matrix
    data = np.ones(train_edges.shape[0])
    adj_train = sp.csr_matrix((data, (train_edges[:, 0], train_edges[:, 1])), shape=adj.shape)
    # adj_train = adj_train + adj_train.T

    def ismember(a, b, tol=5):
        rows_close = np.all(np.round(a - b[:, None], tol) == 0, axis=-1)
        return np.any(rows_close)

    def isSetValidMember(a,b):
        setA = set()
        setB = set()

        for (x,y) in a:
            setA.add((x,y))
        for (x,y) in b:
            setA.add((x,y))
        return len(setA.intersection(setB)) > 0

    def isSetMember(a,b):
        setA = set()
        setB = set()

        for (x,y) in a:
            setA.add((x,y))
        for index in range(b.shape[0]):
            setB.add((b[index,0],b[index,1]))
        return len(setA.intersection(setB)) > 0

    if args.use_saved_edge_false:
        with open(str(args.dataset) +'_test_edges_false.pkl', 'rb') as f:
            test_edges_false = pickle.load(f)
        with open(str(args.dataset) +'_val_edges_false.pkl', 'rb') as f:
            val_edges_false = pickle.load(f)

        print('len(train_edges): ',len(train_edges))
        print('len(test_edges): ',len(test_edges))
        print('len(edges): ', len(edges))

        assert ~isSetMember(test_edges_false, edges)
        print('~isSetMember(test_edges_false, edges) is True')
        assert ~isSetMember(val_edges_false, edges)
        print('~isSetMember(val_edges_false, edges) is True')
        assert ~isSetMember(val_edges, train_edges)
        print('~isSetMember(val_edges, train_edges) is True')
        assert ~isSetMember(test_edges, train_edges)
        print('~isSetMember(test_edges, train_edges) is True')
        assert ~isSetMember(val_edges, test_edges)
        print('~isSetMember(val_edges, test_edges) is True')

        return adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false, edges_all, None

    test_edges_false = []
    val_edges_false = []

    ''' only for large datasets '''
    # if args.dataset == 'movie1m' or args.dataset == 'movie100k' or args.dataset == 'pubmed' or args.dataset == 'nanet':

    top_right_adj = adj[:len(u2id),len(u2id):].toarray()
    indexes = np.where(top_right_adj==0.0)
    np.random.seed(args.edge_idx_seed)
    np.random.shuffle(indexes[0])
    np.random.seed(args.edge_idx_seed)
    np.random.shuffle(indexes[1])

    val_index_i = indexes[0][:num_val]
    val_index_j = np.array(indexes[1][:num_val]) + len(u2id)

    test_index_i = indexes[0][num_val:num_test+num_val]
    test_index_j =  np.array(indexes[1][num_val:num_test+num_val]) + len(u2id)

    false_edges = []
    for i in range(len(indexes[0])):
        idx_i = indexes[0][i]
        idx_j = indexes[1][i]
        false_edges.append([idx_i, idx_j])

    for i in range(len(val_edges)):
        idx_i = val_index_i[i]
        idx_j = val_index_j[i]
        val_edges_false.append([idx_i, idx_j])

    for i in range(len(test_edges)):
        idx_i = test_index_i[i]
        idx_j = test_index_j[i]
        test_edges_false.append([idx_i, idx_j])

    # print(test_edges_false)
    # print(val_edges_false)

    # print(np.hstack([val_edges_false, test_edges_false]))
    train_false_edges = np.delete(false_edges, val_edges_false + test_edges_false, axis=0)
    train_false_edges = train_false_edges[:len(train_edges)]

    assert ~isSetMember(test_edges_false, edges)
    print('~isSetMember(test_edges_false, edges) is True')
    assert ~isSetMember(val_edges_false, edges)
    print('~isSetMember(val_edges_false, edges) is True')
    assert ~isSetMember(val_edges, train_edges)
    print('~isSetMember(val_edges, train_edges) is True')
    assert ~isSetMember(test_edges, train_edges)
    print('~isSetMember(test_edges, train_edges) is True')
    assert ~isSetMember(val_edges, test_edges)
    print('~isSetMember(val_edges, test_edges) is True')
    assert ~isSetValidMember(val_edges_false, test_edges_false)
    print('~isSetMember(val_edges_false, test_edges_false) is True')
    
    print('len(train_edges): ',len(train_edges))
    print('len(val_edges): ',len(val_edges))
    print('len(test_edges): ',len(test_edges))
    print('len(edges): ', len(edges))
    print('len(val_edges_false):', len(val_edges_false))
    print('len(test_edges_false):', len(test_edges_false))
    print('len(false_edges):', len(false_edges))
    print('len(edges_all):', len(edges_all))
    # print('train false edges!')
    return adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false, edges_all, false_edges
Пример #60
0
def main(args=None):
    args = parse_arguments().parse_args(args)
    log.debug(args)

    # parse from hicpro, homer, h5 and hic to cool
    if args.inputFormat != 'hic' and args.outputFormat != 'mcool':
        if len(args.matrices) != len(args.outFileName):
            log.error(
                'Number of input matrices does not match number output matrices!'
            )
            exit(1)
    if args.inputFormat == 'hic' and args.outputFormat == 'cool':
        log.info('Converting with hic2cool.')
        for i, matrix in enumerate(args.matrices):
            if args.resolutions is None:
                hic2cool_convert(matrix, args.outFileName[i], 0)
            else:

                for resolution in args.resolutions:
                    out_name = args.outFileName[i].split('.')
                    out_name[-2] = out_name[-2] + '_' + str(resolution)
                    out_name = '.'.join(out_name)
                    hic2cool_convert(matrix, out_name, resolution)
        return
    elif args.inputFormat in ['hicpro', 'homer', 'h5', 'cool']:
        format_was_h5 = False
        if args.inputFormat == 'h5':
            format_was_h5 = True
        applyCorrection = True
        if args.store_applied_correction:
            applyCorrection = False
        if args.inputFormat == 'hicpro':
            if len(args.matrices) != len(args.bedFileHicpro):
                log.error(
                    'Number of matrices and associated bed files need to be the same.'
                )
                log.error('Matrices: {}; Bed files: {}'.format(
                    len(args.matrices), len(args.bedFileHicpro)))
                sys.exit(1)

        for i, matrix in enumerate(args.matrices):
            if args.inputFormat == 'hicpro':
                matrixFileHandlerInput = MatrixFileHandler(
                    pFileType=args.inputFormat,
                    pMatrixFile=matrix,
                    pBedFileHicPro=args.bedFileHicpro[i])
            else:
                correction_operator = None

                if args.correction_division:
                    correction_operator = '/'

                chromosomes_to_load = None
                if args.chromosome:
                    chromosomes_to_load = [args.chromosome]

                applyCorrectionCoolerLoad = True
                if args.load_raw_values:
                    applyCorrectionCoolerLoad = False
                matrixFileHandlerInput = MatrixFileHandler(
                    pFileType=args.inputFormat,
                    pMatrixFile=matrix,
                    pCorrectionFactorTable=args.correction_name,
                    pCorrectionOperator=correction_operator,
                    pChrnameList=chromosomes_to_load,
                    pEnforceInteger=args.enforce_integer,
                    pApplyCorrectionCoolerLoad=applyCorrectionCoolerLoad)

            _matrix, cut_intervals, nan_bins, \
                distance_counts, correction_factors = matrixFileHandlerInput.load()

            log.debug('Setting done')

            if args.outputFormat in ['cool', 'h5', 'homer', 'ginteractions']:
                if args.outputFormat in ['homer', 'ginteractions']:
                    # make it a upper triangular matrix in case it is not already
                    _matrix = triu(_matrix)
                    # make it a full symmetrical matrix
                    _matrix = _matrix.maximum(_matrix.T)
                matrixFileHandlerOutput = MatrixFileHandler(
                    pFileType=args.outputFormat,
                    pEnforceInteger=args.enforce_integer,
                    pFileWasH5=format_was_h5)

                matrixFileHandlerOutput.set_matrix_variables(
                    _matrix, cut_intervals, nan_bins, correction_factors,
                    distance_counts)
                matrixFileHandlerOutput.save(args.outFileName[i],
                                             pSymmetric=True,
                                             pApplyCorrection=applyCorrection)
            elif args.outputFormat in ['mcool']:

                log.debug('outformat is mcool')
                if args.resolutions and len(args.matrices) > 1:
                    log.error(
                        'Please define one matrix and many resolutions which should be created or multiple matrices.'
                    )
                if args.resolutions:
                    log.info(
                        'Correction factors are removed. They are not valid for any new created resolution.'
                    )
                    hic_matrix = HiCMatrix.hiCMatrix()
                    hic_matrix.setMatrix(_matrix, cut_intervals)

                    bin_size = hic_matrix.getBinSize()

                    for j, resolution in enumerate(args.resolutions):
                        hic_matrix_res = deepcopy(hic_matrix)

                        _mergeFactor = int(resolution) // bin_size

                        log.debug('bin size {}'.format(bin_size))
                        log.debug('_mergeFactor {}'.format(_mergeFactor))
                        if int(resolution) != bin_size:
                            merged_matrix = hicMergeMatrixBins.merge_bins(
                                hic_matrix_res, _mergeFactor)
                        else:
                            merged_matrix = hic_matrix_res
                        append = False
                        if j > 0:
                            append = True
                        matrixFileHandlerOutput = MatrixFileHandler(
                            pFileType='cool',
                            pEnforceInteger=args.enforce_integer,
                            pAppend=append,
                            pFileWasH5=format_was_h5)

                        matrixFileHandlerOutput.set_matrix_variables(
                            merged_matrix.matrix, merged_matrix.cut_intervals,
                            merged_matrix.nan_bins,
                            merged_matrix.correction_factors,
                            merged_matrix.distance_counts)
                        matrixFileHandlerOutput.save(
                            args.outFileName[0] + '::/resolutions/' +
                            str(resolution),
                            pSymmetric=True,
                            pApplyCorrection=applyCorrection)

                else:
                    append = False
                    if i > 0:
                        append = True
                    hic_matrix = HiCMatrix.hiCMatrix()
                    hic_matrix.setMatrix(_matrix, cut_intervals)
                    bin_size = hic_matrix.getBinSize()
                    matrixFileHandlerOutput = MatrixFileHandler(
                        pFileType='cool',
                        pAppend=append,
                        pFileWasH5=format_was_h5)

                    matrixFileHandlerOutput.set_matrix_variables(
                        _matrix, cut_intervals, nan_bins, correction_factors,
                        distance_counts)
                    matrixFileHandlerOutput.save(
                        args.outFileName[0] + '::/resolutions/' +
                        str(bin_size),
                        pSymmetric=True,
                        pApplyCorrection=applyCorrection)