示例#1
0
def get_response_content(fs):
    # check input compatibility
    if fs.nvertices < fs.naxes+1:
        msg_a = 'attempting to plot too many eigenvectors '
        msg_b = 'for the given number of vertices'
        raise ValueError(msg_a + msg_b)
    # define the requested physical size of the images (in pixels)
    physical_size = (640, 480)
    # get the points
    L = create_laplacian_matrix(fs.nvertices)
    D = Euclid.laplacian_to_edm(L)
    HSH = Euclid.edm_to_dccov(D)
    W, VT = np.linalg.eigh(HSH)
    V = VT.T.tolist()
    if fs.eigenvalue_scaling:
        vectors = [np.array(v)*w for w, v in list(reversed(sorted(zip(np.sqrt(W), V))))[:-1]]
    else:
        vectors = [np.array(v) for w, v in list(reversed(sorted(zip(np.sqrt(W), V))))[:-1]]
    X = np.array(zip(*vectors))
    # transform the points to eigenfunctions such that the first point is positive
    F = X.T[:fs.naxes]
    for i in range(fs.naxes):
        if F[i][0] < 0:
            F[i] *= -1
    # draw the image
    try:
        ext = Form.g_imageformat_to_ext[fs.imageformat]
        return create_image_string(ext, physical_size, F, fs.xaxis_length)
    except CairoUtil.CairoUtilError as e:
        raise HandlingError(e)
示例#2
0
def process(tree_string):
    """
    @param tree_string: a newick string
    @return: a multi-line string that summarizes the results
    """
    np.set_printoptions(linewidth=200)
    out = StringIO()
    # build the newick tree from the string
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    # get ordered names and ids
    ordered_ids, ordered_names = get_ordered_ids_and_names(tree)
    # get the distance matrix with ordered indices including all nodes in the tree
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids))
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # define mass vectors
    m_uniform_unscaled = [1] * nvertices
    m_degenerate_unscaled = [1] * nleaves + [0] * (nvertices - nleaves)
    m_uniform = np.array(m_uniform_unscaled,
                         dtype=float) / sum(m_uniform_unscaled)
    m_degenerate = np.array(m_degenerate_unscaled,
                            dtype=float) / sum(m_degenerate_unscaled)
    # show some of the distance matrices
    print >> out, 'ordered names:'
    print >> out, ordered_names
    print >> out
    print >> out, 'embedded points with mass uniformly distributed among all vertices:'
    print >> out, Euclid.edm_to_weighted_points(D, m_uniform)
    print >> out
    print >> out, 'embedded points with mass uniformly distributed among the leaves:'
    print >> out, Euclid.edm_to_weighted_points(D, m_degenerate)
    print >> out
    # return the response
    return out.getvalue().strip()
示例#3
0
def process():
    """
    @return: a multi-line string that summarizes the results
    """
    np.set_printoptions(linewidth=200)
    out = StringIO()
    # define a degenerate mass vector
    m_degenerate = np.array([0.25, 0.25, 0.25, 0.25, 0, 0])
    # define some distance matrices
    D_leaves = Euclid.g_D_b
    D_all = Euclid.g_D_c
    nvertices = 6
    nleaves = 4
    # get the projection and the weighted multidimensional scaling
    X = Euclid.edm_to_points(D_all)
    Y = Euclid.edm_to_weighted_points(D_all, m_degenerate)
    D_X = np.array([[np.dot(pb - pa, pb - pa) for pa in X] for pb in X])
    D_Y = np.array([[np.dot(pb - pa, pb - pa) for pa in Y] for pb in Y])
    # get the embedding using only the leaves
    print >> out, 'embedding of leaves from the leaf distance matrix:'
    print >> out, Euclid.edm_to_points(D_leaves)
    print >> out, 'projection of all vertices onto the MDS space of the leaves:'
    print >> out, do_projection(D_all, nleaves)
    print >> out, 'embedding of all vertices using uniform weights:'
    print >> out, X
    print >> out, 'corresponding distance matrix:'
    print >> out, D_X
    print >> out, 'embedding of all vertices using degenerate weights:'
    print >> out, Y
    print >> out, 'corresponding distance matrix:'
    print >> out, D_Y
    return out.getvalue().strip()
示例#4
0
def process(tree_string):
    """
    @param tree_string: a newick string
    @return: a multi-line string that summarizes the results
    """
    np.set_printoptions(linewidth=200)
    out = StringIO()
    # build the newick tree from the string
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    # get ordered names and ids
    ordered_ids, ordered_names = get_ordered_ids_and_names(tree)
    # get the distance matrix with ordered indices including all nodes in the tree
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids))
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # define mass vectors
    m_uniform_unscaled = [1]*nvertices
    m_degenerate_unscaled = [1]*nleaves + [0]*(nvertices-nleaves)
    m_uniform = np.array(m_uniform_unscaled, dtype=float) / sum(m_uniform_unscaled)
    m_degenerate = np.array(m_degenerate_unscaled, dtype=float) / sum(m_degenerate_unscaled)
    # show some of the distance matrices
    print >> out, 'ordered names:'
    print >> out, ordered_names
    print >> out
    print >> out, 'embedded points with mass uniformly distributed among all vertices:'
    print >> out, Euclid.edm_to_weighted_points(D, m_uniform)
    print >> out
    print >> out, 'embedded points with mass uniformly distributed among the leaves:'
    print >> out, Euclid.edm_to_weighted_points(D, m_degenerate)
    print >> out
    # return the response
    return out.getvalue().strip()
示例#5
0
def process():
    """
    @return: a multi-line string that summarizes the results
    """
    np.set_printoptions(linewidth=200)
    out = StringIO()
    # define a degenerate mass vector
    m_degenerate = np.array([0.25, 0.25, 0.25, 0.25, 0, 0])
    # define some distance matrices
    D_leaves = Euclid.g_D_b
    D_all = Euclid.g_D_c
    nvertices = 6
    nleaves = 4
    # get the projection and the weighted multidimensional scaling
    X = Euclid.edm_to_points(D_all)
    Y = Euclid.edm_to_weighted_points(D_all, m_degenerate)
    D_X = np.array([[np.dot(pb-pa, pb-pa) for pa in X] for pb in X])
    D_Y = np.array([[np.dot(pb-pa, pb-pa) for pa in Y] for pb in Y])
    # get the embedding using only the leaves
    print >> out, 'embedding of leaves from the leaf distance matrix:'
    print >> out, Euclid.edm_to_points(D_leaves)
    print >> out, 'projection of all vertices onto the MDS space of the leaves:'
    print >> out, do_projection(D_all, nleaves)
    print >> out, 'embedding of all vertices using uniform weights:'
    print >> out, X
    print >> out, 'corresponding distance matrix:'
    print >> out, D_X
    print >> out, 'embedding of all vertices using degenerate weights:'
    print >> out, Y
    print >> out, 'corresponding distance matrix:'
    print >> out, D_Y
    return out.getvalue().strip()
示例#6
0
def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    ninternal = nvertices - nleaves
    # get ordered ids with the internal nodes first
    ordered_ids = get_ordered_ids(tree)
    leaf_ids = [id(node) for node in tree.gen_tips()]
    # get the distance matrix and the augmented distance matrix
    D_leaf = np.array(tree.get_partial_distance_matrix(leaf_ids))
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    D_aug = get_augmented_distance(D, nleaves, fs.ndups)
    # analyze the leaf distance matrix
    X_leaf = Euclid.edm_to_points(D_leaf)
    # get the eigendecomposition of the centered augmented distance matrix
    X_aug = Euclid.edm_to_points(D_aug, nvertices-1)
    # explicitly compute the points for the given number of dups using weights
    m = [1]*ninternal + [1+fs.ndups]*nleaves
    m = np.array(m, dtype=float) / sum(m)
    X_weighted = Euclid.edm_to_weighted_points(D, m)
    # explicitly compute the points for 10x dups
    m = [1]*ninternal + [1+fs.ndups*10]*nleaves
    m = np.array(m, dtype=float) / sum(m)
    X_weighted_10x = Euclid.edm_to_weighted_points(D, m)
    # explicitly compute the limiting points as the number of dups increases
    X = Euclid.edm_to_points(D)
    X -= np.mean(X[-nleaves:], axis=0)
    XL = X[-nleaves:]
    U, s, Vt = np.linalg.svd(XL)
    Z = np.dot(X, Vt.T)
    # report the results
    np.set_printoptions(linewidth=300, threshold=10000)
    out = StringIO()
    print >> out, 'leaf distance matrix:'
    print >> out, D_leaf
    print >> out
    print >> out, 'points derived from the leaf distance matrix'
    print >> out, '(the first column is proportional to the Fiedler vector):'
    print >> out, X_leaf
    print >> out
    if fs.show_aug:
        print >> out, 'augmented distance matrix:'
        print >> out, D_aug
        print >> out
    print >> out, 'points derived from the augmented distance matrix'
    print >> out, '(the first column is proportional to the Fiedler vector):'
    print >> out, get_ugly_matrix(X_aug, ninternal, nleaves)
    print >> out
    print >> out, 'points computed using masses:'
    print >> out, X_weighted
    print >> out
    print >> out, 'points computed using masses with 10x dups:'
    print >> out, X_weighted_10x
    print >> out
    print >> out, 'limiting points:'
    print >> out, Z
    print >> out
    return out.getvalue()
示例#7
0
def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    ninternal = nvertices - nleaves
    # get ordered ids with the internal nodes first
    ordered_ids = get_ordered_ids(tree)
    leaf_ids = [id(node) for node in tree.gen_tips()]
    # get the distance matrix and the augmented distance matrix
    D_leaf = np.array(tree.get_partial_distance_matrix(leaf_ids))
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    D_aug = get_augmented_distance(D, nleaves, fs.ndups)
    # analyze the leaf distance matrix
    X_leaf = Euclid.edm_to_points(D_leaf)
    # get the eigendecomposition of the centered augmented distance matrix
    X_aug = Euclid.edm_to_points(D_aug, nvertices - 1)
    # explicitly compute the points for the given number of dups using weights
    m = [1] * ninternal + [1 + fs.ndups] * nleaves
    m = np.array(m, dtype=float) / sum(m)
    X_weighted = Euclid.edm_to_weighted_points(D, m)
    # explicitly compute the points for 10x dups
    m = [1] * ninternal + [1 + fs.ndups * 10] * nleaves
    m = np.array(m, dtype=float) / sum(m)
    X_weighted_10x = Euclid.edm_to_weighted_points(D, m)
    # explicitly compute the limiting points as the number of dups increases
    X = Euclid.edm_to_points(D)
    X -= np.mean(X[-nleaves:], axis=0)
    XL = X[-nleaves:]
    U, s, Vt = np.linalg.svd(XL)
    Z = np.dot(X, Vt.T)
    # report the results
    np.set_printoptions(linewidth=300, threshold=10000)
    out = StringIO()
    print >> out, 'leaf distance matrix:'
    print >> out, D_leaf
    print >> out
    print >> out, 'points derived from the leaf distance matrix'
    print >> out, '(the first column is proportional to the Fiedler vector):'
    print >> out, X_leaf
    print >> out
    if fs.show_aug:
        print >> out, 'augmented distance matrix:'
        print >> out, D_aug
        print >> out
    print >> out, 'points derived from the augmented distance matrix'
    print >> out, '(the first column is proportional to the Fiedler vector):'
    print >> out, get_ugly_matrix(X_aug, ninternal, nleaves)
    print >> out
    print >> out, 'points computed using masses:'
    print >> out, X_weighted
    print >> out
    print >> out, 'points computed using masses with 10x dups:'
    print >> out, X_weighted_10x
    print >> out
    print >> out, 'limiting points:'
    print >> out, Z
    print >> out
    return out.getvalue()
示例#8
0
def get_response_content(fs):
    locations = get_locations()
    np_locs = [np.array(p) for p in locations]
    edges = get_edges()
    npoints = len(locations)
    # start writing the response
    np.set_printoptions(linewidth=200)
    out = StringIO()
    # print the layout data
    print >> out, 'POINTS'
    for i, (x, y) in enumerate(locations):
        print >> out, i, x, y
    print >> out, 'EDGES'
    for i, j in edges:
        print >> out, i, j
    print >> out
    # show the unweighted adjacency matrix
    UA = np.zeros((npoints, npoints))
    for i, j in edges:
        UA[i, j] = 1
        UA[j, i] = 1
    print >> out, 'unweighted adjacency matrix:'
    print >> out, UA
    print >> out
    # show the unweighted laplacian matrix
    UL = Euclid.adjacency_to_laplacian(UA)
    print >> out, 'unweighted laplacian matrix:'
    print >> out, UL
    print >> out
    # show the weighted adjacency matrix
    WA = np.zeros((npoints, npoints))
    for i, j in edges:
        d = np.linalg.norm(np_locs[i] - np_locs[j]) / math.sqrt(2.0)
        w = 1.0 / d
        WA[i, j] = w
        WA[j, i] = w
    print >> out, 'weighted adjacency matrix:'
    print >> out, WA
    print >> out
    # show the weighted laplacian matrix
    WL = Euclid.adjacency_to_laplacian(WA)
    print >> out, 'weighted laplacian matrix:'
    print >> out, WL
    print >> out
    # remove the two internal nodes by schur complementation
    ntips = 4
    schur_L = SchurAlgebra.schur_helper(WL, 2)
    X = Euclid.dccov_to_points(np.linalg.pinv(schur_L))
    print >> out, 'schur graph layout:'
    print >> out, 'POINTS'
    for i, v in enumerate(X):
        print >> out, i, v[0], v[1]
    print >> out, 'EDGES'
    for i in range(ntips):
        for j in range(i+1, ntips):
            print >> out, i, j
    # return the response
    return out.getvalue()
示例#9
0
def get_response_content(fs):
    locations = get_locations()
    np_locs = [np.array(p) for p in locations]
    edges = get_edges()
    npoints = len(locations)
    # start writing the response
    np.set_printoptions(linewidth=200)
    out = StringIO()
    # print the layout data
    print >> out, 'POINTS'
    for i, (x, y) in enumerate(locations):
        print >> out, i, x, y
    print >> out, 'EDGES'
    for i, j in edges:
        print >> out, i, j
    print >> out
    # show the unweighted adjacency matrix
    UA = np.zeros((npoints, npoints))
    for i, j in edges:
        UA[i, j] = 1
        UA[j, i] = 1
    print >> out, 'unweighted adjacency matrix:'
    print >> out, UA
    print >> out
    # show the unweighted laplacian matrix
    UL = Euclid.adjacency_to_laplacian(UA)
    print >> out, 'unweighted laplacian matrix:'
    print >> out, UL
    print >> out
    # show the weighted adjacency matrix
    WA = np.zeros((npoints, npoints))
    for i, j in edges:
        d = np.linalg.norm(np_locs[i] - np_locs[j]) / math.sqrt(2.0)
        w = 1.0 / d
        WA[i, j] = w
        WA[j, i] = w
    print >> out, 'weighted adjacency matrix:'
    print >> out, WA
    print >> out
    # show the weighted laplacian matrix
    WL = Euclid.adjacency_to_laplacian(WA)
    print >> out, 'weighted laplacian matrix:'
    print >> out, WL
    print >> out
    # remove the two internal nodes by schur complementation
    ntips = 4
    schur_L = SchurAlgebra.schur_helper(WL, 2)
    X = Euclid.dccov_to_points(np.linalg.pinv(schur_L))
    print >> out, 'schur graph layout:'
    print >> out, 'POINTS'
    for i, v in enumerate(X):
        print >> out, i, v[0], v[1]
    print >> out, 'EDGES'
    for i in range(ntips):
        for j in range(i + 1, ntips):
            print >> out, i, j
    # return the response
    return out.getvalue()
示例#10
0
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    # get information about the tree topology
    internal = [id(node) for node in tree.gen_internal_nodes()]
    tips = [id(node) for node in tree.gen_tips()]
    vertices = internal + tips
    ntips = len(tips)
    ninternal = len(internal)
    nvertices = len(vertices)
    # get the ordered ids with the leaves first
    ordered_ids = vertices
    # get the full weighted adjacency matrix
    A = np.array(tree.get_affinity_matrix(ordered_ids))
    # compute the weighted adjacency matrix of the decorated tree
    p = ninternal
    q = ntips
    N = fs.N
    if fs.weight_n:
        weight = float(N)
    elif fs.weight_sqrt_n:
        weight = math.sqrt(N)
    A_aug = get_A_aug(A, weight, p, q, N)
    # compute the weighted Laplacian matrix of the decorated tree
    L_aug = Euclid.adjacency_to_laplacian(A_aug)
    # compute the eigendecomposition
    w, vt = np.linalg.eigh(L_aug)
    # show the output
    np.set_printoptions(linewidth=1000, threshold=10000)
    out = StringIO()
    if fs.lap:
        print >> out, 'Laplacian of the decorated tree:'
        print >> out, L_aug
        print >> out
    if fs.eigvals:
        print >> out, 'eigenvalues:'
        for x in w:
            print >> out, x
        print >> out
    if fs.eigvecs:
        print >> out, 'eigenvector matrix:'
        print >> out, vt
        print >> out
    if fs.compare:
        # get the distance matrix for only the original tips
        D_tips = np.array(tree.get_partial_distance_matrix(tips))
        X_tips = Euclid.edm_to_points(D_tips)
        # wring the approximate points out of the augmented tree
        X_approx = vt[p:p+q].T[1:1+q-1].T / np.sqrt(w[1:1+q-1])
        # do the comparison
        print >> out, 'points from tip-only MDS:'
        print >> out, X_tips
        print >> out
        print >> out, 'approximate points from decorated tree:'
        print >> out, X_approx
        print >> out
    return out.getvalue()
示例#11
0
def produce_CRT():
    (p,q) = produce_p_q()
    n = p*q
    Euler = (p-1)*(q-1) #欧拉函数
    d = Euclid.extended_Euclid(e,Euler)#求出e模Euler的逆元d,e*d=1mod(Euler)
    dP = Euclid.extended_Euclid(e,p-1)
    dQ = Euclid.extended_Euclid(e,q-1)
    qInv = Euclid.extended_Euclid(q,p)
    return (p,q,n,d,dP,dQ,qInv)
示例#12
0
def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    # get ordered ids with the leaves first
    ordered_ids = get_ordered_ids(tree)
    # get the adjacency matrix and the augmented adjacency matrix
    A = np.array(tree.get_affinity_matrix(ordered_ids))
    A_aug = get_augmented_adjacency(A, nleaves, fs.ndups, fs.strength)
    # get the laplacian matrices
    L = Euclid.adjacency_to_laplacian(A)
    L_aug = Euclid.adjacency_to_laplacian(A_aug)
    # get the schur complement
    R = SchurAlgebra.mschur(L, set(range(nleaves, nvertices)))
    R_pinv = np.linalg.pinv(R)
    vals, vecs = EigUtil.eigh(R_pinv)
    # get the scaled Fiedler vector for the Schur complement
    w, v = EigUtil.principal_eigh(R_pinv)
    fiedler = v * math.sqrt(w)
    # get the eigendecomposition of the augmented Laplacian
    L_aug_pinv = np.linalg.pinv(L_aug)
    vals_aug, vecs_aug = EigUtil.eigh(L_aug_pinv)
    # get the scaled Fiedler vector for the augmented Laplacian
    w_aug, v_aug = EigUtil.principal_eigh(L_aug_pinv)
    fiedler_aug = v_aug * math.sqrt(w_aug)
    # report the results
    np.set_printoptions(linewidth=300)
    out = StringIO()
    print >> out, 'Laplacian matrix:'
    print >> out, L
    print >> out
    print >> out, 'Schur complement of Laplacian matrix:'
    print >> out, R
    print >> out
    print >> out, 'scaled Fiedler vector of Schur complement:'
    print >> out, fiedler
    print >> out
    print >> out, 'eigenvalues of pinv of Schur complement:'
    print >> out, vals
    print >> out
    print >> out, 'corresponding eigenvectors of pinv of Schur complement:'
    print >> out, np.array(vecs).T
    print >> out
    print >> out
    print >> out, 'augmented Laplacian matrix:'
    print >> out, L_aug
    print >> out
    print >> out, 'scaled Fiedler vector of augmented Laplacian:'
    print >> out, fiedler_aug
    print >> out
    print >> out, 'eigenvalues of pinv of augmented Laplacian:'
    print >> out, vals_aug
    print >> out
    print >> out, 'rows are eigenvectors of pinv of augmented Laplacian:'
    print >> out, np.array(vecs_aug)
    return out.getvalue()
示例#13
0
def get_splits(initial_distance_matrix,
               split_function,
               update_function,
               on_label_split=None):
    """
    This is the most external of the functions in this module.
    Get the set of splits implied by the tree that would be reconstructed.
    @param initial_distance_matrix: a distance matrix
    @param split_function: takes a distance matrix and returns an index split
    @param update_function: takes a distance matrix and an index subset and returns a distance matrix
    @param on_label_split: notifies the caller of the label split induced by an index split
    @return: a set of splits
    """
    n = len(initial_distance_matrix)
    # keep a stack of (label_set_per_vertex, distance_matrix) pairs
    initial_state = ([set([i]) for i in range(n)], initial_distance_matrix)
    stack = [initial_state]
    # process the stack in a depth first manner, building the split set
    label_split_set = set()
    while stack:
        label_sets, D = stack.pop()
        # if the matrix is small then we are done
        if len(D) < 4:
            continue
        # split the indices using the specified function
        try:
            index_split = split_function(D)
            # convert the index split to a label split
            label_split = index_split_to_label_split(index_split, label_sets)
            # notify the caller if a callback is requested
            if on_label_split:
                on_label_split(label_split)
            # add the split to the master set of label splits
            label_split_set.add(label_split)
            # for large matrices create the new label sets and the new conformant distance matrices
            a, b = index_split
            for index_selection, index_complement in ((a, b), (b, a)):
                if len(index_complement) > 2:
                    next_label_sets = SchurAlgebra.vmerge(
                        label_sets, index_selection)
                    next_D = update_function(D, index_selection)
                    next_state = (next_label_sets, next_D)
                    stack.append(next_state)
        except DegenerateSplitException, e:
            # we cannot recover from a degenerate split unless there are more than four indices
            if len(D) <= 4:
                continue
            # with more than four indices we can fall back to partial splits
            index_set = set([e.index])
            # get the next label sets
            next_label_sets = SchurAlgebra.vdelete(label_sets, index_set)
            # get the next conformant distance matrix by schur complementing out the offending index
            L = Euclid.edm_to_laplacian(D)
            L_small = SchurAlgebra.mschur(L, index_set)
            next_D = Euclid.laplacian_to_edm(L_small)
            next_state = (next_label_sets, next_D)
            stack.append(next_state)
示例#14
0
def process():
    """
    @return: a multi-line string that summarizes the results
    """
    np.set_printoptions(linewidth=200)
    # define the adjacency matrix
    A = g_A
    n = 6
    # define some mass distributions
    m_uniform = np.ones(n) / float(n)
    m_weighted = np.array([102, 102, 102, 102, 1, 1], dtype=float) / 410
    # make the response
    out = StringIO()
    # look at the eigendecomposition of -(1/2)HDH where D is the leaf distance matrix
    HSH = Euclid.edm_to_dccov(Euclid.g_D_b)
    W_HSH, VT_HSH = np.linalg.eigh(HSH)
    print >> out, 'W for -(1/2)HDH of the leaf distance matrix:'
    print >> out, W_HSH
    print >> out, 'VT for -(1/2)HDH of the leaf distance matrix:'
    print >> out, VT_HSH
    # look at the eigendecomposition of S given a degenerate mass distribution on the full tree
    m_degenerate = np.array([.25, .25, .25, .25, 0, 0])
    S = Euclid.edm_to_weighted_cross_product(Euclid.g_D_c, m_degenerate)
    W_S, VT_S = np.linalg.eigh(S)
    print >> out, 'W for -(1/2)(Xi)D(Xi)^T of the full distance matrix with degenerate masses:'
    print >> out, W_S
    print >> out, 'VT for -(1/2)(Xi)D(Xi)^T of the full distance matrix with degenerate masses:'
    print >> out, VT_S
    # look at the effects of various mass distributions on the MDS of the full tree
    for m in (m_uniform, m_weighted):
        # the mass distribution should sum to 1
        if not np.allclose(np.sum(m), 1):
            raise ValueError('masses should sum to 1')
        # to compute the perturbed laplacian matrix first get weighted sums
        v = np.dot(m, A)
        # now divide elementwise by the masses
        v /= m
        # subtract the adjacency matrix from the diagonal formed by elements of this vector
        Lp = np.diag(v) - A
        # now get the eigendecomposition of the pseudoinverse of the perturbed laplacian
        W_Lp_pinv, VT_Lp_pinv = np.linalg.eigh(np.linalg.pinv(Lp))
        # look at the eigendecomposition of the S matrix associated with the distance matrix of this tree
        D = Euclid.g_D_c
        S = Euclid.edm_to_weighted_cross_product(D, m)
        W_S, VT_S = np.linalg.eigh(S)
        print >> out, 'perturbed laplacian:'
        print >> out, Lp
        print >> out, 'm:', m
        print >> out, 'W for the pseudoinverse of the perturbed laplacian:'
        print >> out, W_Lp_pinv
        print >> out, 'VT for the pseudoinverse of the perturbed laplacian:'
        print >> out, VT_Lp_pinv
        print >> out, 'W for the cross product matrix:'
        print >> out, W_S
        print >> out, 'VT for the cross product matrix:'
        print >> out, VT_S
    return out.getvalue().strip()
示例#15
0
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    # get information about the tree topology
    internal = [id(node) for node in tree.gen_internal_nodes()]
    tips = [id(node) for node in tree.gen_tips()]
    vertices = internal + tips
    ntips = len(tips)
    ninternal = len(internal)
    nvertices = len(vertices)
    # get the ordered ids with the leaves first
    ordered_ids = vertices
    # get the full weighted adjacency matrix
    A = np.array(tree.get_affinity_matrix(ordered_ids))
    # compute the weighted adjacency matrix of the decorated tree
    p = ninternal
    q = ntips
    N = fs.N
    if fs.weight_n:
        weight = float(N)
    elif fs.weight_sqrt_n:
        weight = math.sqrt(N)
    A_aug = get_A_aug(A, weight, p, q, N)
    # compute the weighted Laplacian matrix of the decorated tree
    L_aug = Euclid.adjacency_to_laplacian(A_aug)
    # compute the eigendecomposition
    w, vt = np.linalg.eigh(L_aug)
    # show the output
    np.set_printoptions(linewidth=1000, threshold=10000)
    out = StringIO()
    if fs.lap:
        print >> out, 'Laplacian of the decorated tree:'
        print >> out, L_aug
        print >> out
    if fs.eigvals:
        print >> out, 'eigenvalues:'
        for x in w:
            print >> out, x
        print >> out
    if fs.eigvecs:
        print >> out, 'eigenvector matrix:'
        print >> out, vt
        print >> out
    if fs.compare:
        # get the distance matrix for only the original tips
        D_tips = np.array(tree.get_partial_distance_matrix(tips))
        X_tips = Euclid.edm_to_points(D_tips)
        # wring the approximate points out of the augmented tree
        X_approx = vt[p:p + q].T[1:1 + q - 1].T / np.sqrt(w[1:1 + q - 1])
        # do the comparison
        print >> out, 'points from tip-only MDS:'
        print >> out, X_tips
        print >> out
        print >> out, 'approximate points from decorated tree:'
        print >> out, X_approx
        print >> out
    return out.getvalue()
示例#16
0
def process():
    """
    @return: a multi-line string that summarizes the results
    """
    np.set_printoptions(linewidth=200)
    # define the adjacency matrix
    A = g_A
    n = 6
    # define some mass distributions
    m_uniform = np.ones(n) / float(n)
    m_weighted = np.array([102, 102, 102, 102, 1, 1], dtype=float) / 410
    # make the response
    out = StringIO()
    # look at the eigendecomposition of -(1/2)HDH where D is the leaf distance matrix
    HSH = Euclid.edm_to_dccov(Euclid.g_D_b)
    W_HSH, VT_HSH = np.linalg.eigh(HSH)
    print >> out, 'W for -(1/2)HDH of the leaf distance matrix:'
    print >> out, W_HSH
    print >> out, 'VT for -(1/2)HDH of the leaf distance matrix:'
    print >> out, VT_HSH
    # look at the eigendecomposition of S given a degenerate mass distribution on the full tree
    m_degenerate = np.array([.25, .25, .25, .25, 0, 0])
    S = Euclid.edm_to_weighted_cross_product(Euclid.g_D_c, m_degenerate)
    W_S, VT_S = np.linalg.eigh(S)
    print >> out, 'W for -(1/2)(Xi)D(Xi)^T of the full distance matrix with degenerate masses:'
    print >> out, W_S
    print >> out, 'VT for -(1/2)(Xi)D(Xi)^T of the full distance matrix with degenerate masses:'
    print >> out, VT_S
    # look at the effects of various mass distributions on the MDS of the full tree
    for m in (m_uniform, m_weighted):
        # the mass distribution should sum to 1
        if not np.allclose(np.sum(m), 1):
            raise ValueError('masses should sum to 1')
        # to compute the perturbed laplacian matrix first get weighted sums
        v = np.dot(m, A)
        # now divide elementwise by the masses
        v /= m
        # subtract the adjacency matrix from the diagonal formed by elements of this vector
        Lp = np.diag(v) - A
        # now get the eigendecomposition of the pseudoinverse of the perturbed laplacian
        W_Lp_pinv, VT_Lp_pinv = np.linalg.eigh(np.linalg.pinv(Lp))
        # look at the eigendecomposition of the S matrix associated with the distance matrix of this tree
        D = Euclid.g_D_c
        S = Euclid.edm_to_weighted_cross_product(D, m)
        W_S, VT_S = np.linalg.eigh(S)
        print >> out, 'perturbed laplacian:'
        print >> out, Lp
        print >> out, 'm:', m
        print >> out, 'W for the pseudoinverse of the perturbed laplacian:'
        print >> out, W_Lp_pinv
        print >> out, 'VT for the pseudoinverse of the perturbed laplacian:'
        print >> out, VT_Lp_pinv
        print >> out, 'W for the cross product matrix:'
        print >> out, W_S
        print >> out, 'VT for the cross product matrix:'
        print >> out, VT_S
    return out.getvalue().strip()
示例#17
0
def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    # get ordered ids with the leaves first
    ordered_ids = get_ordered_ids(tree)
    # get the adjacency matrix and the augmented adjacency matrix
    A = np.array(tree.get_affinity_matrix(ordered_ids))
    A_aug = get_augmented_adjacency(A, nleaves, fs.ndups, fs.strength)
    # get the laplacian matrices
    L = Euclid.adjacency_to_laplacian(A)
    L_aug = Euclid.adjacency_to_laplacian(A_aug)
    # get the schur complement
    R = SchurAlgebra.mschur(L, set(range(nleaves, nvertices)))
    R_pinv = np.linalg.pinv(R)
    vals, vecs = EigUtil.eigh(R_pinv)
    # get the scaled Fiedler vector for the Schur complement
    w, v = EigUtil.principal_eigh(R_pinv)
    fiedler = v * math.sqrt(w)
    # get the eigendecomposition of the augmented Laplacian
    L_aug_pinv = np.linalg.pinv(L_aug)
    vals_aug, vecs_aug = EigUtil.eigh(L_aug_pinv)
    # get the scaled Fiedler vector for the augmented Laplacian
    w_aug, v_aug = EigUtil.principal_eigh(L_aug_pinv)
    fiedler_aug = v_aug * math.sqrt(w_aug)
    # report the results
    np.set_printoptions(linewidth=300)
    out = StringIO()
    print >> out, 'Laplacian matrix:'
    print >> out, L
    print >> out
    print >> out, 'Schur complement of Laplacian matrix:'
    print >> out, R
    print >> out
    print >> out, 'scaled Fiedler vector of Schur complement:'
    print >> out, fiedler
    print >> out
    print >> out, 'eigenvalues of pinv of Schur complement:'
    print >> out, vals
    print >> out
    print >> out, 'corresponding eigenvectors of pinv of Schur complement:'
    print >> out, np.array(vecs).T
    print >> out
    print >> out
    print >> out, 'augmented Laplacian matrix:'
    print >> out, L_aug
    print >> out
    print >> out, 'scaled Fiedler vector of augmented Laplacian:'
    print >> out, fiedler_aug
    print >> out
    print >> out, 'eigenvalues of pinv of augmented Laplacian:'
    print >> out, vals_aug
    print >> out
    print >> out, 'rows are eigenvectors of pinv of augmented Laplacian:'
    print >> out, np.array(vecs_aug)
    return out.getvalue()
示例#18
0
def get_response_content(fs):
    # read the lat-lon points from the input
    lines = Util.get_stripped_lines(fs.datalines.splitlines())
    rows = parse_lines(lines)
    latlon_points = []
    city_names = []
    for city, latd, latm, lond, lonm in rows:
        lat = math.radians(GPS.degrees_minutes_to_degrees(latd, latm))
        lon = math.radians(GPS.degrees_minutes_to_degrees(lond, lonm))
        latlon_points.append((lat, lon))
        city_names.append(city)
    npoints = len(latlon_points)
    # start writing the response
    np.set_printoptions(linewidth=200)
    out = StringIO()
    radius = GPS.g_earth_radius_miles
    for dfunc, name in (
            (GPS.get_arc_distance, 'great arc'),
            (GPS.get_euclidean_distance, 'euclidean')):
        # define the edm whose elements are squared euclidean-like distances
        edm = np.zeros((npoints, npoints))
        D = np.zeros((npoints, npoints))
        for i, pointa in enumerate(latlon_points):
            for j, pointb in enumerate(latlon_points):
                D[i, j] = dfunc(pointa, pointb, radius)
                edm[i, j] = D[i, j]**2
        print >> out, name, 'distances:'
        print >> out, D
        print >> out
        print >> out, name, 'EDM:'
        print >> out, edm
        print >> out
        G = Euclid.edm_to_dccov(edm)
        print >> out, name, 'Gower centered matrix:'
        print >> out, G
        print >> out
        spectrum = np.array(list(reversed(sorted(np.linalg.eigvals(G)))))
        print >> out, name, 'spectrum of Gower centered matrix:'
        for x in spectrum:
            print >> out, x
        print >> out
        print >> out, name, 'rounded spectrum:'
        for x in spectrum:
            print >> out, '%.1f' % x
        print >> out
        mds_points = Euclid.edm_to_points(edm)
        print >> out, '2D MDS coordinates:'
        for name, mds_point in zip(city_names, mds_points):
            x = mds_point[0]
            y = mds_point[1]
            print >> out, '\t'.join(str(x) for x in [name, x, y])
        print >> out
        # break between distance methods
        print >> out
    # return the response
    return out.getvalue()
示例#19
0
def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    # get ordered ids with the leaves first
    ordered_ids = get_ordered_ids(tree)
    # get the distance matrix and the augmented distance matrix
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    D_aug = get_augmented_distance(D, nleaves, fs.ndups)
    # get the laplacian matrix
    L = Euclid.edm_to_laplacian(D)
    # get the schur complement
    R = SchurAlgebra.mschur(L, set(range(nleaves, nvertices)))
    R_pinv = np.linalg.pinv(R)
    vals, vecs = EigUtil.eigh(R_pinv)
    # get the scaled Fiedler vector for the Schur complement
    w, v = EigUtil.principal_eigh(R_pinv)
    fiedler = v * math.sqrt(w)
    # get the eigendecomposition of the centered augmented distance matrix
    L_aug_pinv = Euclid.edm_to_dccov(D_aug)
    vals_aug, vecs_aug = EigUtil.eigh(L_aug_pinv)
    # get the scaled Fiedler vector for the augmented Laplacian
    w_aug, v_aug = EigUtil.principal_eigh(L_aug_pinv)
    fiedler_aug = v_aug * math.sqrt(w_aug)
    # report the results
    np.set_printoptions(linewidth=300, threshold=10000)
    out = StringIO()
    print >> out, "Laplacian matrix:"
    print >> out, L
    print >> out
    print >> out, "Schur complement of Laplacian matrix:"
    print >> out, R
    print >> out
    print >> out, "scaled Fiedler vector of Schur complement:"
    print >> out, fiedler
    print >> out
    print >> out, "eigenvalues of pinv of Schur complement:"
    print >> out, vals
    print >> out
    print >> out, "corresponding eigenvectors of pinv of Schur complement:"
    print >> out, np.array(vecs).T
    print >> out
    print >> out
    print >> out, "augmented distance matrix:"
    print >> out, D_aug
    print >> out
    print >> out, "scaled Fiedler vector of augmented Laplacian limit:"
    print >> out, fiedler_aug
    print >> out
    print >> out, "eigenvalues of pinv of augmented Laplacian limit:"
    print >> out, vals_aug
    print >> out
    print >> out, "rows are eigenvectors of pinv of augmented Laplacian limit:"
    print >> out, np.array(vecs_aug)
    return out.getvalue()
示例#20
0
文件: RSA.py 项目: fireboy38/RSA
def produce_e_d():
    """产生(e,d)"""
    e = 3
    while True:
        d = Euclid.extended_Euclid(e,m)#e*d=1modm
        if Euclid.gcd(m,e) == 1 and d > 0:
            break
        else:
            e += 2
    return (e,d)
示例#21
0
def get_response_content(fs):
    # read the lat-lon points from the input
    lines = Util.get_stripped_lines(fs.datalines.splitlines())
    rows = parse_lines(lines)
    latlon_points = []
    city_names = []
    for city, latd, latm, lond, lonm in rows:
        lat = math.radians(GPS.degrees_minutes_to_degrees(latd, latm))
        lon = math.radians(GPS.degrees_minutes_to_degrees(lond, lonm))
        latlon_points.append((lat, lon))
        city_names.append(city)
    npoints = len(latlon_points)
    # start writing the response
    np.set_printoptions(linewidth=200)
    out = StringIO()
    radius = GPS.g_earth_radius_miles
    for dfunc, name in ((GPS.get_arc_distance, 'great arc'),
                        (GPS.get_euclidean_distance, 'euclidean')):
        # define the edm whose elements are squared euclidean-like distances
        edm = np.zeros((npoints, npoints))
        D = np.zeros((npoints, npoints))
        for i, pointa in enumerate(latlon_points):
            for j, pointb in enumerate(latlon_points):
                D[i, j] = dfunc(pointa, pointb, radius)
                edm[i, j] = D[i, j]**2
        print >> out, name, 'distances:'
        print >> out, D
        print >> out
        print >> out, name, 'EDM:'
        print >> out, edm
        print >> out
        G = Euclid.edm_to_dccov(edm)
        print >> out, name, 'Gower centered matrix:'
        print >> out, G
        print >> out
        spectrum = np.array(list(reversed(sorted(np.linalg.eigvals(G)))))
        print >> out, name, 'spectrum of Gower centered matrix:'
        for x in spectrum:
            print >> out, x
        print >> out
        print >> out, name, 'rounded spectrum:'
        for x in spectrum:
            print >> out, '%.1f' % x
        print >> out
        mds_points = Euclid.edm_to_points(edm)
        print >> out, '2D MDS coordinates:'
        for name, mds_point in zip(city_names, mds_points):
            x = mds_point[0]
            y = mds_point[1]
            print >> out, '\t'.join(str(x) for x in [name, x, y])
        print >> out
        # break between distance methods
        print >> out
    # return the response
    return out.getvalue()
示例#22
0
文件: KenGen.py 项目: tutengfei/PDP
def produce_e_d():
    """产生(e, d)"""
    # Generate a number e so that gcd(e, m) = 1, start with e = 3
    e = 3
    while True:
        d = Euclid.extended_Euclid(e, m)
        if Euclid.gcd(m, e) == 1 and d > 0:
            break
        else:
            e += 2
    return (e, d)
示例#23
0
def update_using_laplacian(D, index_set):
    """
    Update the distance matrix by summing rows and columns of the removed indices.
    @param D: the distance matrix
    @param index_set: the set of indices that will be removed from the updated distance matrix
    @return: an updated distance matrix
    """
    L = Euclid.edm_to_laplacian(D)
    L_small = SchurAlgebra.mmerge(L, index_set)
    D_small = Euclid.laplacian_to_edm(L_small)
    return D_small
示例#24
0
def update_using_laplacian(D, index_set):
    """
    Update the distance matrix by summing rows and columns of the removed indices.
    @param D: the distance matrix
    @param index_set: the set of indices that will be removed from the updated distance matrix
    @return: an updated distance matrix
    """
    L = Euclid.edm_to_laplacian(D)
    L_small = SchurAlgebra.mmerge(L, index_set)
    D_small = Euclid.laplacian_to_edm(L_small)
    return D_small
示例#25
0
def get_splits(initial_distance_matrix, split_function, update_function, on_label_split=None):
    """
    This is the most external of the functions in this module.
    Get the set of splits implied by the tree that would be reconstructed.
    @param initial_distance_matrix: a distance matrix
    @param split_function: takes a distance matrix and returns an index split
    @param update_function: takes a distance matrix and an index subset and returns a distance matrix
    @param on_label_split: notifies the caller of the label split induced by an index split
    @return: a set of splits
    """
    n = len(initial_distance_matrix)
    # keep a stack of (label_set_per_vertex, distance_matrix) pairs
    initial_state = ([set([i]) for i in range(n)], initial_distance_matrix)
    stack = [initial_state]
    # process the stack in a depth first manner, building the split set
    label_split_set = set()
    while stack:
        label_sets, D = stack.pop()
        # if the matrix is small then we are done
        if len(D) < 4:
            continue
        # split the indices using the specified function
        try:
            index_split = split_function(D)
            # convert the index split to a label split
            label_split = index_split_to_label_split(index_split, label_sets)
            # notify the caller if a callback is requested
            if on_label_split:
                on_label_split(label_split)
            # add the split to the master set of label splits
            label_split_set.add(label_split)
            # for large matrices create the new label sets and the new conformant distance matrices
            a, b = index_split
            for index_selection, index_complement in ((a, b), (b, a)):
                if len(index_complement) > 2:
                    next_label_sets = SchurAlgebra.vmerge(label_sets, index_selection)
                    next_D = update_function(D, index_selection)
                    next_state = (next_label_sets, next_D)
                    stack.append(next_state)
        except DegenerateSplitException, e:
            # we cannot recover from a degenerate split unless there are more than four indices
            if len(D) <= 4:
                continue
            # with more than four indices we can fall back to partial splits
            index_set = set([e.index])
            # get the next label sets
            next_label_sets = SchurAlgebra.vdelete(label_sets, index_set)
            # get the next conformant distance matrix by schur complementing out the offending index
            L = Euclid.edm_to_laplacian(D)
            L_small = SchurAlgebra.mschur(L, index_set)
            next_D = Euclid.laplacian_to_edm(L_small)
            next_state = (next_label_sets, next_D)
            stack.append(next_state)
示例#26
0
 def _do_analysis(self, use_generalized_nj):
     """
     Do some splits of the tree.
     @param use_generalized_nj: True if we use an old method of outgrouping
     """
     # define the distance matrix
     D = np.array(self.pruned_tree.get_distance_matrix(self.pruned_names))
     # get the primary split of the criterion matrix
     L = Euclid.edm_to_laplacian(D)
     v = BuildTreeTopology.laplacian_to_fiedler(L)
     eigensplit = BuildTreeTopology.eigenvector_to_split(v)
     # assert that the first split cleanly separates the bacteria from the rest
     left_indices, right_indices = eigensplit
     left_domains = self._get_domains([self.pruned_names[x] for x in left_indices])
     right_domains = self._get_domains([self.pruned_names[x] for x in right_indices])
     if ('bacteria' in left_domains) and ('bacteria' in right_domains):
         raise HandlingError('bacteria were not defined by the first split')
     # now we have enough info to define the first supplementary csv file
     self.first_split_object = SupplementarySpreadsheetObject(self.pruned_names, L, v)
     # define the bacteria indices vs the non-bacteria indices for the second split
     if 'bacteria' in left_domains:
         bacteria_indices = left_indices
         non_bacteria_indices = right_indices
     elif 'bacteria' in right_domains:
         bacteria_indices = right_indices
         non_bacteria_indices = left_indices
     # get the secondary split of interest
     if use_generalized_nj:
         D_secondary = BuildTreeTopology.update_generalized_nj(D, bacteria_indices)
         L_secondary = Euclid.edm_to_laplacian(D_secondary)
     else:
         L_secondary = SchurAlgebra.mmerge(L, bacteria_indices)
     full_label_sets = [set([i]) for i in range(len(self.pruned_names))]
     next_label_sets = SchurAlgebra.vmerge(full_label_sets, bacteria_indices)
     v_secondary = BuildTreeTopology.laplacian_to_fiedler(L_secondary)
     eigensplit_secondary = BuildTreeTopology.eigenvector_to_split(v_secondary)
     left_subindices, right_subindices = eigensplit_secondary
     pruned_names_secondary = []
     for label_set in next_label_sets:
         if len(label_set) == 1:
             label = list(label_set)[0]
             pruned_names_secondary.append(self.pruned_names[label])
         else:
             pruned_names_secondary.append('all-bacteria')
     # assert that the second split cleanly separates the eukaryota from the rest
     left_subdomains = self._get_domains([pruned_names_secondary[x] for x in left_subindices])
     right_subdomains = self._get_domains([pruned_names_secondary[x] for x in right_subindices])
     if ('eukaryota' in left_subdomains) and ('eukaryota' in right_subdomains):
         raise HandlingError('eukaryota were not defined by the second split')
     # now we have enough info to define the second supplementary csv file
     self.second_split_object = SupplementarySpreadsheetObject(pruned_names_secondary, L_secondary, v_secondary)
示例#27
0
def get_response_content(fs):
    # read the matrix
    D = fs.matrix
    # read the ordered labels
    ordered_labels = Util.get_stripped_lines(StringIO(fs.labels))
    if not ordered_labels:
        raise HandlingError('no ordered taxa were provided')
    if len(ordered_labels) != len(set(ordered_labels)):
        raise HandlingError('the ordered taxa should be unique')
    # get the label selection and its complement
    min_selected_labels = 2
    min_unselected_labels = 1
    selected_labels = set(Util.get_stripped_lines(StringIO(fs.selection)))
    if len(selected_labels) < min_selected_labels:
        raise HandlingError('at least %d taxa should be selected to be grouped' % min_selected_labels)
    # get the set of labels in the complement
    unselected_labels = set(ordered_labels) - selected_labels
    if len(unselected_labels) < min_unselected_labels:
        raise HandlingError('at least %d taxa should remain outside the selected group' % min_unselected_labels)
    # assert that no bizarre labels were selected
    weird_labels = selected_labels - set(ordered_labels)
    if weird_labels:
        raise HandlingError('some selected taxa are invalid: ' + str(weird_labels))
    # assert that the size of the distance matrix is compatible with the number of ordered labels
    if len(D) != len(ordered_labels):
        raise HandlingError('the number of listed taxa does not match the number of rows in the distance matrix')
    # get the set of selected indices and its complement
    n = len(D)
    index_selection = set(i for i, label in enumerate(ordered_labels) if label in selected_labels)
    index_complement = set(range(n)) - index_selection
    # begin the response
    out = StringIO()
    # get the ordered list of sets of indices to merge
    merged_indices = SchurAlgebra.vmerge([set([x]) for x in range(n)], index_selection)
    # calculate the new distance matrix
    L = Euclid.edm_to_laplacian(D)
    L_merged = SchurAlgebra.mmerge(L, index_selection)
    D_merged = Euclid.laplacian_to_edm(L_merged)
    # print the output distance matrix and the labels of its rows
    print >> out, 'new distance matrix:'
    print >> out, MatrixUtil.m_to_string(D_merged)
    print >> out
    print >> out, 'new taxon labels:'
    for merged_index_set in merged_indices:
        if len(merged_index_set) == 1:
            print >> out, ordered_labels[merged_index_set.pop()]
        else:
            print >> out, '{' + ', '.join(selected_labels) + '}'
    # write the response
    return out.getvalue()
示例#28
0
文件: keygen.py 项目: tutengfei/PDP
def gen_e_d(m):
    """
    产生(e, d); m is p1 * q1
    :param m:
    :return:
    """
    # Generate a number e so that gcd(e, m) = 1, start with e = 3
    e = 3
    while True:
        d = Euclid.extended_Euclid(e, m)
        if Euclid.gcd(m, e) == 1 and d > 0:
            break
        else:
            e += 2
    return e, d
示例#29
0
def get_response_content(fs):
    np.set_printoptions(linewidth=200)
    n = len(fs.D)
    # create the Laplacian matrix
    L = Euclid.edm_to_laplacian(fs.D)
    # create the Laplacian matrix with the extra node added
    L_dup = get_pseudoduplicate_laplacian(L, fs.strength)
    # get the principal axis projection from the Laplacian dup matrix
    X_w, X_v = EigUtil.principal_eigh(np.linalg.pinv(L_dup))
    L_dup_x = X_v * math.sqrt(X_w)
    # get masses summing to one
    m = np.array([1] * (n - 1) + [2], dtype=float) / (n + 1)
    # get the principal axis projection using the weight formula
    M = np.diag(np.sqrt(m))
    L_pinv = np.linalg.pinv(L)
    I = np.eye(n, dtype=float)
    E = I - np.outer(np.ones(n, dtype=float), m)
    ME = np.dot(M, E)
    Q = np.dot(ME, np.dot(L_pinv, ME.T))
    Q_w, Q_v = EigUtil.principal_eigh(Q)
    Q_x = Q_v * math.sqrt(Q_w) / np.sqrt(m)
    # make the response
    out = StringIO()
    print >> out, 'Laplacian matrix with pseudo-duplicate node:'
    print >> out, L_dup
    print >> out
    print >> out, 'principal axis projection:'
    print >> out, L_dup_x
    print >> out
    print >> out, 'principal axis projection using the weight formula:'
    print >> out, Q_x
    return out.getvalue()
示例#30
0
def do_projection(D_full, nleaves):
    """
    Project points onto the space of the leaves.
    The resulting points are in the subspace
    whose basis vectors are the principal axes of the leaf ellipsoid.
    @param D_full: distances relating all, including internal, vertices.
    @param nleaves: the first few indices in D_full represent leaves
    @return: a numpy array where each row is a vertex of the tree
    """
    # Get the points
    # such that the n rows in X are points in n-1 dimensional space.
    X = Euclid.edm_to_points(D_full)
    # Translate all of the points
    # so that the origin is at the centroid of the leaves.
    X -= np.mean(X[:nleaves], 0)
    # Extract the subset of points that define the leaves.
    L = X[:nleaves]
    # Find the orthogonal transformation of the leaves onto their MDS axes.
    # According to the python svd documentation,
    # singular values are sorted most important to least important.
    U, s, Vt = np.linalg.svd(L)
    # Transform all of the points (including the internal vertices)
    # according to this orthogonal transformation.
    # The axes are now the principal axes
    # of the Steiner circumscribed ellipsoid of the leaf vertices.
    # I am using M.T[:k].T to get the first k columns of M.
    points = np.dot(X, Vt.T).T[:(nleaves-1)].T
    return points
示例#31
0
def get_response_content(fs):
    np.set_printoptions(linewidth=200)
    n = len(fs.D)
    # create the Laplacian matrix
    L = Euclid.edm_to_laplacian(fs.D)
    # create the Laplacian matrix with the extra node added
    L_dup = get_pseudoduplicate_laplacian(L, fs.strength)
    # get the principal axis projection from the Laplacian dup matrix
    X_w, X_v = EigUtil.principal_eigh(np.linalg.pinv(L_dup))
    L_dup_x = X_v * math.sqrt(X_w)
    # get masses summing to one
    m = np.array([1]*(n-1) + [2], dtype=float) / (n+1)
    # get the principal axis projection using the weight formula
    M = np.diag(np.sqrt(m))
    L_pinv = np.linalg.pinv(L)
    I = np.eye(n, dtype=float)
    E = I - np.outer(np.ones(n, dtype=float), m)
    ME = np.dot(M, E)
    Q = np.dot(ME, np.dot(L_pinv, ME.T))
    Q_w, Q_v = EigUtil.principal_eigh(Q)
    Q_x = Q_v * math.sqrt(Q_w) / np.sqrt(m)
    # make the response
    out = StringIO()
    print >> out, 'Laplacian matrix with pseudo-duplicate node:'
    print >> out, L_dup
    print >> out
    print >> out, 'principal axis projection:'
    print >> out, L_dup_x
    print >> out
    print >> out, 'principal axis projection using the weight formula:'
    print >> out, Q_x
    return out.getvalue()
示例#32
0
def get_response_content(fs):
    D = fs.matrix
    L = Euclid.edm_to_laplacian(D)
    S = get_sigma_matrix(D)
    P = get_precision_matrix(S)
    # begin the response
    out = StringIO()
    print >> out, 'the Laplacian matrix:'
    print >> out, MatrixUtil.m_to_string(L)
    print >> out
    print >> out, 'the sigma matrix corresponding to the Q matrix:'
    print >> out, MatrixUtil.m_to_string(S)
    print >> out
    print >> out, 'the precision matrix corresponding to the Q matrix:'
    print >> out, MatrixUtil.m_to_string(P)
    print >> out
    print >> out, 'the precision matrix minus the laplacian matrix:'
    print >> out, MatrixUtil.m_to_string(P-L)
    print >> out
    print >> out, 'the double centered precision matrix minus the laplacian matrix:'
    print >> out, MatrixUtil.m_to_string(MatrixUtil.double_centered(P)-L)
    print >> out
    print >> out, 'the pseudo-inverse of the double centered sigma matrix minus the laplacian matrix:'
    print >> out, MatrixUtil.m_to_string(np.linalg.pinv(MatrixUtil.double_centered(S))-L)
    # write the response
    return out.getvalue()
示例#33
0
def get_response_content(fs):
    # Collect the image format information.
    border_info = BorderInfo(fs.border_x, fs.border_y)
    axis_info = AxisInfo(fs.flip_x, fs.flip_y, fs.show_x, fs.show_y)
    # read the points and edges
    points, edges = read_points_and_edges(fs.graph_data)
    # define edge weights
    if fs.weighted:
        np_points = [np.array(p) for p in points]
        dists = [np.linalg.norm(np_points[j] - np_points[i]) for i, j in edges]
        weights = [1.0 / d for d in dists]
    else:
        weights = [1.0 for e in edges]
    # define the point colors using the graph Fiedler loadings
    L = edges_to_laplacian(edges, weights)
    G = np.linalg.pinv(L)
    X = Euclid.dccov_to_points(G)
    points = [(p[0], p[1]) for p in X]
    xs, ys = zip(*points)
    colors = valuations_to_colors(xs)
    # Get the image.
    ext = Form.g_imageformat_to_ext[fs.imageformat]
    image_info = ImageInfo(fs.width, fs.height, fs.black, fs.show_edges,
                           fs.show_labels, axis_info, border_info, ext)
    return get_image_string(xs, ys, colors, edges, image_info)
示例#34
0
def get_response_content(fs):
    # read the matrix
    D = fs.matrix
    # begin the response
    out = StringIO()
    # Look at the eigenvalues
    # of the associated doubly centered covariance matrix.
    HSH = Euclid.edm_to_dccov(D)
    w, V_T = np.linalg.eigh(HSH)
    V = V_T.T
    print >> out, 'eigenvalues of the associated doubly centered covariance matrix:'
    for x in reversed(sorted(w)):
        print >> out, x
    print >> out
    print >> out, 'eigenvector associated with last eigenvalue:'
    last_eigenvector = min(zip(w, V))[1]
    for x in last_eigenvector:
        print >> out, x
    print >> out
    # look at another criterion
    D_pinv = np.linalg.pinv(D)
    criterion = np.sum(D_pinv)
    if criterion > 0:
        print >> out, 'sum of elements of the pseudoinverse of the distance matrix is positive'
    else:
        print >> out, 'sum of elements of the pseudoinverse of the distance matrix is nonpositive'
    print >> out, 'A Euclidean distance matrix is spherical if and only if the sum of the elements of its pseudoinverse is positive.'
    print >> out, 'For this distance matrix, this sum is', criterion
    # write the response
    return out.getvalue()
示例#35
0
def get_response_content(fs):
    """
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    """
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # read the ordered labels
    ordered_labels = Util.get_stripped_lines(StringIO(fs.labels))
    # validate the input
    observed_label_set = set(node.get_name() for node in tree.gen_tips())
    if set(ordered_labels) != observed_label_set:
        msg = 'the labels should match the labels of the leaves of the tree'
        raise HandlingError(msg)
    # get the matrix of pairwise distances among the tips
    D = np.array(tree.get_distance_matrix(ordered_labels))
    L = Euclid.edm_to_laplacian(D)
    w, v = get_eigendecomposition(L)
    C = get_contrast_matrix(w, v)
    # set elements with small absolute value to zero
    C[abs(C) < fs.epsilon] = 0
    # start to prepare the reponse
    out = StringIO()
    if fs.plain_format:
        print >> out, MatrixUtil.m_to_string(C)
    elif fs.matlab_format:
        print >> out, MatrixUtil.m_to_matlab_string(C)
    elif fs.r_format:
        print >> out, MatrixUtil.m_to_R_string(C)
    # write the response
    return out.getvalue()
示例#36
0
def get_eigendecomposition_report(D):
    """
    @param D: a distance matrix
    @return: a multi-line string
    """
    out = StringIO()
    # get some intermediate matrices and vectors
    L = Euclid.edm_to_laplacian(D)
    laplacian_fiedler = BuildTreeTopology.laplacian_to_fiedler(L)
    distance_fiedler = BuildTreeTopology.edm_to_fiedler(D)
    eigensplit = BuildTreeTopology.eigenvector_to_split(laplacian_fiedler)
    # report the two eigenvalue lists that should be the same
    HDH = MatrixUtil.double_centered(D)
    HSH = -0.5 * HDH
    w_distance, vt_distance = np.linalg.eigh(HSH)
    print >> out, 'the laplacian-derived and distance-derived eigenvalues:'
    w_laplacian, vt_laplacian = np.linalg.eigh(L)
    for a, b in zip(sorted(w_laplacian), sorted(w_distance)):
        print >> out, a, '\t', b
    print >> out
    # report the two fiedler vectors that should be the same
    print >> out, 'the laplacian-derived and distance-derived fiedler vectors:'
    for a, b in zip(laplacian_fiedler, distance_fiedler):
        print >> out, a, '\t', b
    return out.getvalue().strip()
示例#37
0
def get_response_content(fs):
    # read the points and edges
    points, edges = read_points_and_edges(fs.graph_data)
    # define edge weights
    if fs.weighted:
        np_points = [np.array(p) for p in points]
        dists = [np.linalg.norm(np_points[j] - np_points[i]) for i, j in edges]
        weights = [1.0 / d for d in dists]
    else:
        weights = [1.0 for e in edges]
    # get the width and height of the drawable area of the image
    width = fs.total_width - 2*fs.border
    height = fs.total_height - 2*fs.border
    if width < 1 or height < 1:
        msg = 'the image dimensions do not allow for enough drawable area'
        raise HandlingError(msg)
    # define the point colors using the unweighted graph Fiedler loadings
    L = edges_to_laplacian(edges, weights)
    G = np.linalg.pinv(L)
    X = Euclid.dccov_to_points(G)
    points = [(-p[0] if fs.flip else p[0], p[1]) for p in X]
    x_coords, y_coords = zip(*points)
    colors = valuations_to_colors(x_coords)
    # draw the image
    ext = Form.g_imageformat_to_ext[fs.imageformat]
    info = ImageInfo(fs.total_width, fs.total_height, fs.border, ext)
    try:
        return get_image_string(points, edges, colors, fs.black, info)
    except CairoUtil.CairoUtilError as e:
        raise HandlingError(e)
示例#38
0
def create_laplacian_matrix(lena, lenb, lenc):
    """
    @param lena: integer length of first branch.
    @param lenb: integer length of second branch.
    @param lenc: integer length of third branch.
    """
    N = 1 + lena + lenb + lenc
    A = np.zeros((N, N), dtype=float)
    # Add connections to the hub vertex.
    if lena:
        A[0, 1] = 1
        A[1, 0] = 1
    if lenb:
        A[0, lena + 1] = 1
        A[lena + 1, 0] = 1
    if lenc:
        A[0, lena + lenb + 1] = 1
        A[lena + lenb + 1, 0] = 1
    # Add tridiagonal connections on the first branch.
    for i in range(lena - 1):
        j = i + 1
        A[j, j + 1] = 1
        A[j + 1, j] = 1
    # Add tridiagonal connections on the second branch.
    for i in range(lenb - 1):
        j = lena + i + 1
        A[j, j + 1] = 1
        A[j + 1, j] = 1
    # Add tridiagonal connections on the second branch.
    for i in range(lenc - 1):
        j = lena + lenb + i + 1
        A[j, j + 1] = 1
        A[j + 1, j] = 1
    L = Euclid.adjacency_to_laplacian(A)
    return L
示例#39
0
def get_eigendecomposition_report(D):
    """
    @param D: a distance matrix
    @return: a multi-line string
    """
    out = StringIO()
    # get some intermediate matrices and vectors
    L = Euclid.edm_to_laplacian(D)
    laplacian_fiedler = BuildTreeTopology.laplacian_to_fiedler(L)
    distance_fiedler = BuildTreeTopology.edm_to_fiedler(D)
    eigensplit = BuildTreeTopology.eigenvector_to_split(laplacian_fiedler)
    # report the two eigenvalue lists that should be the same
    HDH = MatrixUtil.double_centered(D)
    HSH = -0.5 * HDH
    w_distance, vt_distance = np.linalg.eigh(HSH)
    print >> out, 'the laplacian-derived and distance-derived eigenvalues:'
    w_laplacian, vt_laplacian = np.linalg.eigh(L)
    for a, b in zip(sorted(w_laplacian), sorted(w_distance)):
        print >> out, a, '\t', b
    print >> out
    # report the two fiedler vectors that should be the same
    print >> out, 'the laplacian-derived and distance-derived fiedler vectors:'
    for a, b in zip(laplacian_fiedler, distance_fiedler):
        print >> out, a, '\t', b
    return out.getvalue().strip()
示例#40
0
def get_response_content(fs):
    """
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    """
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # read the ordered labels
    ordered_labels = Util.get_stripped_lines(StringIO(fs.labels))
    # validate the input
    observed_label_set = set(node.get_name() for node in tree.gen_tips())
    if set(ordered_labels) != observed_label_set:
        msg = 'the labels should match the labels of the leaves of the tree'
        raise HandlingError(msg)
    # get the matrix of pairwise distances among the tips
    D = np.array(tree.get_distance_matrix(ordered_labels))
    L = Euclid.edm_to_laplacian(D)
    w, v = get_eigendecomposition(L)
    C = get_contrast_matrix(w, v)
    # set elements with small absolute value to zero
    C[abs(C) < fs.epsilon] = 0
    # start to prepare the reponse
    out = StringIO()
    if fs.plain_format:
        print >> out, MatrixUtil.m_to_string(C)
    elif fs.matlab_format:
        print >> out, MatrixUtil.m_to_matlab_string(C)
    elif fs.r_format:
        print >> out, MatrixUtil.m_to_R_string(C)
    # write the response
    return out.getvalue()
示例#41
0
 def split_function(self, D):
     """
     Split the distance matrix using signs of an eigenvector of -HDH/2.
     If a degenerate split is found then a DegenerateSplitException is raised.
     @param D: the distance matrix
     @return: a set of two index sets defining a split of the indices
     """
     try:
         # get the matrix whose eigendecomposition is of interest
         HSH = Euclid.edm_to_dccov(D)
         # get the eigendecomposition
         eigenvalues, V_T = np.linalg.eigh(HSH)
         eigenvectors = V_T.T.tolist()
         # save the eigenvalues for reporting
         self.eigenvalues = eigenvalues
         # get the eigenvector of interest
         w, v = max(zip(eigenvalues, eigenvectors))
         # get the indices with positive eigenvector valuations
         n = len(D)
         positive = frozenset(i for i, x in enumerate(v) if x > 0)
         nonpositive = frozenset(set(range(n)) - positive)
         # check for a degenerate split
         for index_set in (positive, nonpositive):
             assert len(index_set) > 0
         for index_set in (positive, nonpositive):
             if len(index_set) == 1:
                 index, = index_set
                 raise BuildTreeTopology.DegenerateSplitException(index)
         return frozenset((positive, nonpositive))
     except BuildTreeTopology.DegenerateSplitException, e:
         self.eigenvalues = None
         return BuildTreeTopology.split_nj(D)
示例#42
0
def get_response_content(fs):
    # define the requested physical size of the images (in pixels)
    physical_size = (640, 480)
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    # Get ordered ids with the leaves first,
    # and get the corresponding distance matrix.
    ordered_ids = get_ordered_ids(tree)
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # get the image extension
    ext = Form.g_imageformat_to_ext[fs.imageformat]
    # get the scaling factors and offsets
    if fs.hticks < 2:
        msg = 'expected at least two ticks on the horizontal axis'
        raise HandlingError(msg)
    width, height = physical_size
    xoffset = fs.border
    yoffset = fs.border
    yscale = float(height - 2 * fs.border)
    xscale = (width - 2 * fs.border) / float(fs.hticks - 1)
    # define the eigendecomposition function
    if fs.slow:
        fn = get_augmented_spectrum
    elif fs.fast:
        fn = get_augmented_spectrum_fast
    # define the target eigenvalues
    tip_ids = [id(node) for node in tree.gen_tips()]
    D_tips = np.array(tree.get_partial_distance_matrix(tip_ids))
    G_tips = Euclid.edm_to_dccov(D_tips)
    target_ws = scipy.linalg.eigh(G_tips, eigvals_only=True) * fs.denom
    # draw the image
    return create_image(ext, physical_size, xscale, yscale, xoffset, yoffset,
                        D, nleaves, fs.hticks, fs.denom, fn, target_ws)
示例#43
0
def get_response_content(fs):
    # Collect the image format information.
    border_info = BorderInfo(fs.border_x, fs.border_y)
    axis_info = AxisInfo(fs.flip_x, fs.flip_y, fs.show_x, fs.show_y)
    # read the points and edges
    points, edges = read_points_and_edges(fs.graph_data)
    # define edge weights
    if fs.weighted:
        np_points = [np.array(p) for p in points]
        dists = [np.linalg.norm(np_points[j] - np_points[i]) for i, j in edges]
        weights = [1.0 / d for d in dists]
    else:
        weights = [1.0 for e in edges]
    # define the point colors using the graph Fiedler loadings
    L = edges_to_laplacian(edges, weights)
    G = np.linalg.pinv(L)
    X = Euclid.dccov_to_points(G)
    points = [(p[0], p[1]) for p in X]
    xs, ys = zip(*points)
    colors = valuations_to_colors(xs)
    # Get the image.
    ext = Form.g_imageformat_to_ext[fs.imageformat]
    image_info = ImageInfo(fs.width, fs.height,
            fs.black, fs.show_edges, fs.show_labels,
            axis_info, border_info, ext)
    return get_image_string(xs, ys, colors, edges, image_info)
示例#44
0
def create_laplacian_matrix(lena, lenb, lenc):
    """
    @param lena: integer length of first branch.
    @param lenb: integer length of second branch.
    @param lenc: integer length of third branch.
    """
    N = 1 + lena + lenb + lenc
    A = np.zeros((N,N), dtype=float)
    # Add connections to the hub vertex.
    if lena:
        A[0, 1] = 1
        A[1, 0] = 1
    if lenb:
        A[0, lena+1] = 1
        A[lena+1, 0] = 1
    if lenc:
        A[0, lena+lenb+1] = 1
        A[lena+lenb+1, 0] = 1
    # Add tridiagonal connections on the first branch.
    for i in range(lena-1):
        j = i + 1
        A[j, j+1] = 1
        A[j+1, j] = 1
    # Add tridiagonal connections on the second branch.
    for i in range(lenb-1):
        j = lena + i + 1
        A[j, j+1] = 1
        A[j+1, j] = 1
    # Add tridiagonal connections on the second branch.
    for i in range(lenc-1):
        j = lena + lenb + i + 1
        A[j, j+1] = 1
        A[j+1, j] = 1
    L = Euclid.adjacency_to_laplacian(A)
    return L
示例#45
0
def get_full_tree_message(tree, m_to_string):
    """
    In this function we find the Fiedler split of the full tree.
    @param tree: each node in this tree must have a name
    @param m_to_string: a function that converts a matrix to a string
    @return: a message about the split of the tips of the tree induced by the fiedler vector
    """
    out = StringIO()
    # get the alphabetically ordered names
    ordered_names = list(sorted(node.get_name() for node in tree.preorder()))
    # get the corresponding ordered ids
    name_to_id = dict((node.get_name(), id(node)) for node in tree.preorder())
    ordered_ids = [name_to_id[name] for name in ordered_names]
    # get the full weighted adjacency matrix
    A = np.array(tree.get_affinity_matrix(ordered_ids))
    print >> out, 'the weighted reciprocal adjacency matrix of the full tree:'
    print >> out, m_to_string(get_reciprocal_matrix(A))
    print >> out
    # get the full Laplacian matrix
    L = Euclid.adjacency_to_laplacian(A)
    # get the fiedler split
    v = BuildTreeTopology.laplacian_to_fiedler(L)
    print >> out, 'the Fiedler split of the full tree:'
    for name, value in zip(ordered_names, v):
        print >> out, name, ':', value
    return out.getvalue().strip()
示例#46
0
def get_response_content(fs):
    # read the points and edges
    points, edges = read_points_and_edges(fs.graph_data)
    # define edge weights
    if fs.weighted:
        np_points = [np.array(p) for p in points]
        dists = [np.linalg.norm(np_points[j] - np_points[i]) for i, j in edges]
        weights = [1.0 / d for d in dists]
    else:
        weights = [1.0 for e in edges]
    # get the width and height of the drawable area of the image
    width = fs.total_width - 2 * fs.border
    height = fs.total_height - 2 * fs.border
    if width < 1 or height < 1:
        msg = 'the image dimensions do not allow for enough drawable area'
        raise HandlingError(msg)
    # define the point colors using the unweighted graph Fiedler loadings
    L = edges_to_laplacian(edges, weights)
    G = np.linalg.pinv(L)
    X = Euclid.dccov_to_points(G)
    points = [(-p[0] if fs.flip else p[0], p[1]) for p in X]
    x_coords, y_coords = zip(*points)
    colors = valuations_to_colors(x_coords)
    # draw the image
    ext = Form.g_imageformat_to_ext[fs.imageformat]
    info = ImageInfo(fs.total_width, fs.total_height, fs.border, ext)
    try:
        return get_image_string(points, edges, colors, fs.black, info)
    except CairoUtil.CairoUtilError as e:
        raise HandlingError(e)
示例#47
0
def get_full_tree_message(tree, m_to_string):
    """
    In this function we find the Fiedler split of the full tree.
    @param tree: each node in this tree must have a name
    @param m_to_string: a function that converts a matrix to a string
    @return: a message about the split of the tips of the tree induced by the fiedler vector
    """
    out = StringIO()
    # get the alphabetically ordered names
    ordered_names = list(sorted(node.get_name() for node in tree.preorder()))
    # get the corresponding ordered ids
    name_to_id = dict((node.get_name(), id(node)) for node in tree.preorder())
    ordered_ids = [name_to_id[name] for name in ordered_names]
    # get the full weighted adjacency matrix
    A = np.array(tree.get_affinity_matrix(ordered_ids))
    print >> out, 'the weighted reciprocal adjacency matrix of the full tree:'
    print >> out, m_to_string(get_reciprocal_matrix(A))
    print >> out
    # get the full Laplacian matrix
    L = Euclid.adjacency_to_laplacian(A)
    # get the fiedler split
    v = BuildTreeTopology.laplacian_to_fiedler(L)
    print >> out, 'the Fiedler split of the full tree:'
    for name, value in zip(ordered_names, v):
        print >> out, name, ':', value
    return out.getvalue().strip()
示例#48
0
def newimg(img):  #Image will be taken for Deskewing
    newim = Deskew.deskew(img)  #Deskew the image
    #print(newim)
    data = MyClust.get_all_points(
        newim)  #Gives all points according to threshold
    print("Shape:", data.shape[0])
    if (data.shape[0] < 300): print("Continue")
    else:
        print("Nice try !!!!")
        return
    #############      CLUSTERING       #################
    lm = MyClust.Get_Clusters(
        data,
        num_clusters)  #Data of all points will be clustered into some points
    lm2 = np.array(Euclid.fun1(lm,
                               num_clusters))  #Distance array of all clusters
    #print(lm2)
    lm2 = Ham.Shortest_path_way(
        lm2, num_clusters)  #Order of the path it needed to travel
    #print(lm2)
    lm2 = np.array(lm2[0][:-1], dtype=np.int64)
    #print(lm2)
    mm = Ham.path_order(
        lm2, lm,
        num_clusters)  #After sorting of the landmarks,path has been defined
    #print(mm)
    return (mm)  #Returning path to comp2
示例#49
0
def do_projection(D_full, nleaves):
    """
    Project points onto the space of the leaves.
    The resulting points are in the subspace
    whose basis vectors are the principal axes of the leaf ellipsoid.
    @param D_full: distances relating all, including internal, vertices.
    @param nleaves: the first few indices in D_full represent leaves
    @return: a numpy array where each row is a vertex of the tree
    """
    # Get the points
    # such that the n rows in X are points in n-1 dimensional space.
    X = Euclid.edm_to_points(D_full)
    # Translate all of the points
    # so that the origin is at the centroid of the leaves.
    X -= np.mean(X[:nleaves], 0)
    # Extract the subset of points that define the leaves.
    L = X[:nleaves]
    # Find the orthogonal transformation of the leaves onto their MDS axes.
    # According to the python svd documentation,
    # singular values are sorted most important to least important.
    U, s, Vt = np.linalg.svd(L)
    # Transform all of the points (including the internal vertices)
    # according to this orthogonal transformation.
    # The axes are now the principal axes
    # of the Steiner circumscribed ellipsoid of the leaf vertices.
    # I am using M.T[:k].T to get the first k columns of M.
    points = np.dot(X, Vt.T).T[:(nleaves - 1)].T
    return points
示例#50
0
def get_response_content(fs):
    D = fs.matrix
    L = Euclid.edm_to_laplacian(D)
    S = get_sigma_matrix(D)
    P = get_precision_matrix(S)
    # begin the response
    out = StringIO()
    print >> out, 'the Laplacian matrix:'
    print >> out, MatrixUtil.m_to_string(L)
    print >> out
    print >> out, 'the sigma matrix corresponding to the Q matrix:'
    print >> out, MatrixUtil.m_to_string(S)
    print >> out
    print >> out, 'the precision matrix corresponding to the Q matrix:'
    print >> out, MatrixUtil.m_to_string(P)
    print >> out
    print >> out, 'the precision matrix minus the laplacian matrix:'
    print >> out, MatrixUtil.m_to_string(P - L)
    print >> out
    print >> out, 'the double centered precision matrix minus the laplacian matrix:'
    print >> out, MatrixUtil.m_to_string(MatrixUtil.double_centered(P) - L)
    print >> out
    print >> out, 'the pseudo-inverse of the double centered sigma matrix minus the laplacian matrix:'
    print >> out, MatrixUtil.m_to_string(
        np.linalg.pinv(MatrixUtil.double_centered(S)) - L)
    # write the response
    return out.getvalue()
示例#51
0
def get_response_content(fs):
    out = StringIO()
    # try to make some graphs
    unconnected_count = 0
    invalid_split_count = 0
    valid_split_count = 0
    for graph_index in range(fs.ngraphs):
        G = erdos_renyi(fs.nvertices, fs.pedge)
        if is_connected(G):
            # add interesting edge weights
            add_exponential_weights(G)
            # turn the adjacency matrix into a laplacian matrix
            L = Euclid.adjacency_to_laplacian(G)
            for v in range(fs.nvertices):
                small_index_to_big_index = {}
                for i_small, i_big in enumerate([i for i in range(fs.nvertices) if i != v]):
                    small_index_to_big_index[i_small] = i_big
                # take the schur complement with respect to the given vertex
                L_reduced = get_single_element_schur_complement(L, v)
                assert len(L_reduced) == len(L) - 1
                # get the loadings of the vertices of the reduced graph
                if fs.fiedler_cut:
                    Y_reduced = BuildTreeTopology.laplacian_to_fiedler(L_reduced)
                elif fs.random_cut:
                    Y_reduced = get_random_vector(L_reduced)
                assert len(Y_reduced) == len(L_reduced)
                # expand the fiedler vector with positive and negative valuations for the removed vertex
                found_valid_split = False
                for augmented_loading in (-1.0, 1.0):
                    # get the augmented split vector for this assignment of the removed vertex
                    Y_full = [0]*len(G)
                    for i_reduced, loading in enumerate(Y_reduced):
                        i_big = small_index_to_big_index[i_reduced]
                        Y_full[i_big] = loading
                    Y_full[v] = augmented_loading
                    assert len(Y_full) == len(G)
                    # get the two graphs defined by the split
                    subgraph_a, subgraph_b = list(gen_subgraphs(G, Y_full))
                    # if the subgraphs are both connected then the split is valid
                    if is_connected(subgraph_a) and is_connected(subgraph_b):
                        found_valid_split = True
                # if a valid split was not found then show the matrix
                if found_valid_split:
                    valid_split_count += 1
                else:
                    print >> out, 'Found a matrix that was split incompatibly by a cut of its schur complement!'
                    print >> out, 'matrix:'
                    print >> out, MatrixUtil.m_to_string(G)
                    print >> out, 'index that was removed:', v
                    invalid_split_count += 1
        else:
            unconnected_count += 1
    # show the number of connected and of unconnected graphs
    print >> out, 'this many random graphs were connected:', fs.ngraphs - unconnected_count
    print >> out, 'this many random graphs were not connected:', unconnected_count
    print >> out, 'this many splits were valid:', valid_split_count
    print >> out, 'this many splits were invalid:', invalid_split_count
    # return the result
    return out.getvalue()
示例#52
0
def get_response_content(fs):
    # read the distance matrix
    D = fs.matrix
    L = Euclid.edm_to_laplacian(D)
    resistor = -1/L
    resistor -= np.diag(np.diag(resistor))
    # return the edge resistor matrix
    return MatrixUtil.m_to_string(resistor) + '\n'
示例#53
0
def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    # get ordered ids with the leaves first
    ordered_ids = get_ordered_ids(tree)
    # get the adjacency matrix and the augmented adjacency matrix
    A = np.array(tree.get_affinity_matrix(ordered_ids))
    A_aug = get_augmented_adjacency(A, nleaves, fs.strength)
    # get the laplacian matrices
    L = Euclid.adjacency_to_laplacian(A)
    L_aug = Euclid.adjacency_to_laplacian(A_aug)
    # get the schur complements
    R = SchurAlgebra.mschur(L, set(range(nleaves, nvertices)))
    R_aug = SchurAlgebra.mschur(L_aug, set(range(nleaves, nvertices)))
    # get the scaled Fiedler vectors
    w, v = EigUtil.principal_eigh(np.linalg.pinv(R))
    fiedler = v * math.sqrt(w)
    w_aug, v_aug = EigUtil.principal_eigh(np.linalg.pinv(R_aug))
    fiedler_aug = v_aug * math.sqrt(w_aug)
    # report the results
    np.set_printoptions(linewidth=200)
    out = StringIO()
    print >> out, 'Laplacian matrix:'
    print >> out, L
    print >> out
    print >> out, 'Schur complement of Laplacian matrix:'
    print >> out, R
    print >> out
    print >> out, 'scaled Fiedler vector:'
    print >> out, fiedler
    print >> out
    print >> out, 'augmented Laplacian matrix:'
    print >> out, L_aug
    print >> out
    print >> out, 'Schur complement of augmented Laplacian matrix:'
    print >> out, R_aug
    print >> out
    print >> out, 'scaled Fiedler vector of augmented matrix:'
    print >> out, fiedler_aug
    print >> out
    return out.getvalue()
示例#54
0
def do_internal_projection(D_full):
    """
    The resulting points are in the subspace whose basis vectors are the principal axes of the whole ellipsoid.
    @param D_full: the distance matrix as a numpy array relating all vertices including internal vertices
    @return: a numpy array where each row is a vertex of the tree
    """
    # Get the points such that the n rows in are points in n-1 dimensional space.
    # The first coordinate is the principal axis.
    points = Euclid.edm_to_points(D_full)
    return points
示例#55
0
def get_response_content(fs):
    # read the matrix
    L = fs.laplacian
    # read the ordered labels
    ordered_labels = Util.get_stripped_lines(StringIO(fs.labels))
    if not ordered_labels:
        raise HandlingError('no ordered taxa were provided')
    if len(ordered_labels) != len(set(ordered_labels)):
        raise HandlingError('the ordered taxa should be unique')
    # get the label selection and its complement
    min_selected_labels = 2
    min_unselected_labels = 1
    selected_labels = set(Util.get_stripped_lines(StringIO(fs.selection)))
    if len(selected_labels) < min_selected_labels:
        raise HandlingError(
            'at least %d taxa should be selected to be grouped' %
            min_selected_labels)
    # get the set of labels in the complement
    unselected_labels = set(ordered_labels) - selected_labels
    if len(unselected_labels) < min_unselected_labels:
        raise HandlingError(
            'at least %d taxa should remain outside the selected group' %
            min_unselected_labels)
    # assert that no bizarre labels were selected
    weird_labels = selected_labels - set(ordered_labels)
    if weird_labels:
        raise HandlingError('some selected taxa are invalid: ' +
                            str(weird_labels))
    # assert that the size of the distance matrix is compatible with the number of ordered labels
    if len(L) != len(ordered_labels):
        raise HandlingError(
            'the number of listed taxa does not match the number of rows in the distance matrix'
        )
    # get the set of selected indices and its complement
    n = len(L)
    index_selection = set(i for i, label in enumerate(ordered_labels)
                          if label in selected_labels)
    index_complement = set(range(n)) - index_selection
    # begin the response
    out = StringIO()
    # calculate the new laplacian matrix
    L_small = SchurAlgebra.mschur(L, index_selection)
    D_small = Euclid.laplacian_to_edm(L_small)
    # print the matrices and the labels of its rows
    print >> out, 'new laplacian matrix:'
    print >> out, MatrixUtil.m_to_string(L_small)
    print >> out
    print >> out, 'new distance matrix:'
    print >> out, MatrixUtil.m_to_string(D_small)
    print >> out
    print >> out, 'new taxon labels:'
    for index in sorted(index_complement):
        print >> out, ordered_labels[index]
    # write the response
    return out.getvalue()
示例#56
0
def get_response_content(fs):
    # use a fixed seed if requested
    if fs.seed:
        random.seed(fs.seed)
    # define the max number of rejection iterations
    limit = fs.npoints * 100
    # validate input
    if fs.axis < 0:
        raise ValueError('the mds axis must be nonnegative')
    # get points defining the boundary of africa
    nafrica = len(g_africa_poly)
    africa_edges = [(i, (i + 1) % nafrica) for i in range(nafrica)]
    # get some points and edges inside africa
    points = sample_with_rejection(fs.npoints, g_africa_poly, limit)
    x_list, y_list = zip(*points)
    tri = Triangulation(x_list, y_list)
    tri_edges = [(i + nafrica, j + nafrica) for i, j in tri.edge_db.tolist()]
    # get the whole list of points
    allpoints = g_africa_poly + points
    # refine the list of edges
    tri_edges = list(gen_noncrossing_edges(tri_edges, africa_edges, allpoints))
    tri_edges = get_mst(tri_edges, allpoints)
    alledges = africa_edges + tri_edges
    # make the graph laplacian
    A = np.zeros((len(points), len(points)))
    for ia, ib in tri_edges:
        xa, ya = allpoints[ia]
        xb, yb = allpoints[ib]
        d = math.hypot(xb - xa, yb - ya)
        A[ia - nafrica, ib - nafrica] = 1 / d
        A[ib - nafrica, ia - nafrica] = 1 / d
    L = Euclid.adjacency_to_laplacian(A)
    ws, vs = EigUtil.eigh(np.linalg.pinv(L))
    if fs.axis >= len(ws):
        raise ValueError('choose a smaller mds axis')
    v = vs[fs.axis]
    # get the color and sizes for the points
    v /= max(np.abs(v))
    colors = [(0, 0, 0)] * nafrica + [get_color(x) for x in v]
    radii = [2] * nafrica + [5 for p in points]
    # get the width and height of the drawable area of the image
    width = fs.total_width - 2 * fs.border
    height = fs.total_height - 2 * fs.border
    if width < 1 or height < 1:
        msg = 'the image dimensions do not allow for enough drawable area'
        raise HandlingError(msg)
    # draw the image
    ext = Form.g_imageformat_to_ext[fs.imageformat]
    try:
        helper = ImgHelper(allpoints, alledges, fs.total_width,
                           fs.total_height, fs.border)
        return helper.get_image_string(colors, radii, ext)
    except CairoUtil.CairoUtilError as e:
        raise HandlingError(e)
示例#57
0
def process(npoints, nseconds):
    """
    @param npoints: attempt to form each counterexample from this many points
    @param nseconds: allow this many seconds to run
    @return: a multi-line string that summarizes the results
    """
    start_time = time.time()
    best_result = None
    nchecked = 0
    while time.time() - start_time < nseconds:
        # look for a counterexample
        points = sample_points(npoints)
        D = points_to_edm(points)
        L = Euclid.edm_to_laplacian(D)
        L_small = SchurAlgebra.mmerge(L, set([0, 1]))
        w = np.linalg.eigvalsh(L_small)
        D_small = Euclid.laplacian_to_edm(L_small)
        result = Counterexample(points, D, w, D_small)
        # see if the counterexample is interesting
        if best_result is None:
            best_result = result
        elif min(result.L_eigenvalues) < min(best_result.L_eigenvalues):
            best_result = result
        nchecked += 1
    out = StringIO()
    print >> out, 'checked', nchecked, 'matrices each formed from', npoints, 'points'
    print >> out
    print >> out, 'eigenvalues of the induced matrix with lowest eigenvalue:'
    for value in reversed(sorted(best_result.L_eigenvalues)):
        print >> out, value
    print >> out
    print >> out, 'corresponding induced distance matrix:'
    print >> out, MatrixUtil.m_to_string(best_result.D_small)
    print >> out
    print >> out, 'the original distance matrix corresponding to this matrix:'
    print >> out, MatrixUtil.m_to_string(best_result.D)
    print >> out
    print >> out, 'the points that formed the original distance matrix:'
    for point in best_result.points:
        print >> out, '\t'.join(str(x) for x in point)
    return out.getvalue().strip()