Пример #1
0
def process():
    """
    @return: a multi-line string that summarizes the results
    """
    np.set_printoptions(linewidth=200)
    # define the adjacency matrix
    A = g_A
    n = 6
    # define some mass distributions
    m_uniform = np.ones(n) / float(n)
    m_weighted = np.array([102, 102, 102, 102, 1, 1], dtype=float) / 410
    # make the response
    out = StringIO()
    # look at the eigendecomposition of -(1/2)HDH where D is the leaf distance matrix
    HSH = Euclid.edm_to_dccov(Euclid.g_D_b)
    W_HSH, VT_HSH = np.linalg.eigh(HSH)
    print >> out, 'W for -(1/2)HDH of the leaf distance matrix:'
    print >> out, W_HSH
    print >> out, 'VT for -(1/2)HDH of the leaf distance matrix:'
    print >> out, VT_HSH
    # look at the eigendecomposition of S given a degenerate mass distribution on the full tree
    m_degenerate = np.array([.25, .25, .25, .25, 0, 0])
    S = Euclid.edm_to_weighted_cross_product(Euclid.g_D_c, m_degenerate)
    W_S, VT_S = np.linalg.eigh(S)
    print >> out, 'W for -(1/2)(Xi)D(Xi)^T of the full distance matrix with degenerate masses:'
    print >> out, W_S
    print >> out, 'VT for -(1/2)(Xi)D(Xi)^T of the full distance matrix with degenerate masses:'
    print >> out, VT_S
    # look at the effects of various mass distributions on the MDS of the full tree
    for m in (m_uniform, m_weighted):
        # the mass distribution should sum to 1
        if not np.allclose(np.sum(m), 1):
            raise ValueError('masses should sum to 1')
        # to compute the perturbed laplacian matrix first get weighted sums
        v = np.dot(m, A)
        # now divide elementwise by the masses
        v /= m
        # subtract the adjacency matrix from the diagonal formed by elements of this vector
        Lp = np.diag(v) - A
        # now get the eigendecomposition of the pseudoinverse of the perturbed laplacian
        W_Lp_pinv, VT_Lp_pinv = np.linalg.eigh(np.linalg.pinv(Lp))
        # look at the eigendecomposition of the S matrix associated with the distance matrix of this tree
        D = Euclid.g_D_c
        S = Euclid.edm_to_weighted_cross_product(D, m)
        W_S, VT_S = np.linalg.eigh(S)
        print >> out, 'perturbed laplacian:'
        print >> out, Lp
        print >> out, 'm:', m
        print >> out, 'W for the pseudoinverse of the perturbed laplacian:'
        print >> out, W_Lp_pinv
        print >> out, 'VT for the pseudoinverse of the perturbed laplacian:'
        print >> out, VT_Lp_pinv
        print >> out, 'W for the cross product matrix:'
        print >> out, W_S
        print >> out, 'VT for the cross product matrix:'
        print >> out, VT_S
    return out.getvalue().strip()
Пример #2
0
def process():
    """
    @return: a multi-line string that summarizes the results
    """
    np.set_printoptions(linewidth=200)
    # define the adjacency matrix
    A = g_A
    n = 6
    # define some mass distributions
    m_uniform = np.ones(n) / float(n)
    m_weighted = np.array([102, 102, 102, 102, 1, 1], dtype=float) / 410
    # make the response
    out = StringIO()
    # look at the eigendecomposition of -(1/2)HDH where D is the leaf distance matrix
    HSH = Euclid.edm_to_dccov(Euclid.g_D_b)
    W_HSH, VT_HSH = np.linalg.eigh(HSH)
    print >> out, 'W for -(1/2)HDH of the leaf distance matrix:'
    print >> out, W_HSH
    print >> out, 'VT for -(1/2)HDH of the leaf distance matrix:'
    print >> out, VT_HSH
    # look at the eigendecomposition of S given a degenerate mass distribution on the full tree
    m_degenerate = np.array([.25, .25, .25, .25, 0, 0])
    S = Euclid.edm_to_weighted_cross_product(Euclid.g_D_c, m_degenerate)
    W_S, VT_S = np.linalg.eigh(S)
    print >> out, 'W for -(1/2)(Xi)D(Xi)^T of the full distance matrix with degenerate masses:'
    print >> out, W_S
    print >> out, 'VT for -(1/2)(Xi)D(Xi)^T of the full distance matrix with degenerate masses:'
    print >> out, VT_S
    # look at the effects of various mass distributions on the MDS of the full tree
    for m in (m_uniform, m_weighted):
        # the mass distribution should sum to 1
        if not np.allclose(np.sum(m), 1):
            raise ValueError('masses should sum to 1')
        # to compute the perturbed laplacian matrix first get weighted sums
        v = np.dot(m, A)
        # now divide elementwise by the masses
        v /= m
        # subtract the adjacency matrix from the diagonal formed by elements of this vector
        Lp = np.diag(v) - A
        # now get the eigendecomposition of the pseudoinverse of the perturbed laplacian
        W_Lp_pinv, VT_Lp_pinv = np.linalg.eigh(np.linalg.pinv(Lp))
        # look at the eigendecomposition of the S matrix associated with the distance matrix of this tree
        D = Euclid.g_D_c
        S = Euclid.edm_to_weighted_cross_product(D, m)
        W_S, VT_S = np.linalg.eigh(S)
        print >> out, 'perturbed laplacian:'
        print >> out, Lp
        print >> out, 'm:', m
        print >> out, 'W for the pseudoinverse of the perturbed laplacian:'
        print >> out, W_Lp_pinv
        print >> out, 'VT for the pseudoinverse of the perturbed laplacian:'
        print >> out, VT_Lp_pinv
        print >> out, 'W for the cross product matrix:'
        print >> out, W_S
        print >> out, 'VT for the cross product matrix:'
        print >> out, VT_S
    return out.getvalue().strip()
Пример #3
0
def get_weighted_embedding_b(D, m):
    """
    This method was suggested by Eric but it has some limitations.
    In particular, it fails when some elements of the mass vector are zero.
    @param D: a distance matrix
    @param m: a mass vector
    @return: an embedding
    """
    M = np.diag(np.sqrt(m))
    cross_product_matrix = Euclid.edm_to_weighted_cross_product(D, m)
    Q = np.dot(M, np.dot(cross_product_matrix, M.T))
    U, S, VT = np.linalg.svd(Q, full_matrices=False)
    Z = np.dot(np.linalg.pinv(M), np.dot(U, np.sqrt(np.diag(S))))
    return Z
Пример #4
0
def process():
    """
    @return: a multi-line string that summarizes the results
    """
    np.set_printoptions(linewidth=200)
    out = StringIO()
    # define some distance matrices
    D_leaves = Euclid.g_D_b
    D_all = Euclid.g_D_c
    nvertices = 6
    nleaves = 4
    # define mass vectors
    m_degenerate = np.array([0.25, 0.25, 0.25, 0.25, 0, 0])
    m_interesting = np.array([.2, .2, .2, .2, .1, .1])
    m_uniform = np.ones(nvertices) / float(nvertices)
    # augment a distance matrix by adding leaflets
    D_augmented = add_leaflets(D_all, nleaves)
    # create the projection of points
    X_projected = do_projection(D_all, nleaves)
    # show some of the distance matrices
    print >> out, 'pairwise distances among vertices in the original tree:'
    print >> out, D_all
    print >> out, 'pairwise distance matrix augmented with one leaflet per leaf:'
    print >> out, D_augmented
    # get the distance matrices corresponding to the cases in the docstring
    print >> out, 'case 1: embedding of all vertices:'
    print >> out, Euclid.edm_to_points(D_all)
    print >> out, 'case 2: embedding of leaves and leaflets from the leaflet-augmented distance matrix:'
    print >> out, Euclid.edm_to_points(D_augmented)
    print >> out, 'case 3: projection of all vertices onto the MDS space of the leaves:'
    print >> out, X_projected
    # another embedding
    print >> out, 'embedding of leaves from the leaf distance matrix:'
    print >> out, Euclid.edm_to_points(D_leaves)
    # show embeddings of a tree augmented with leaflets
    print >> out, 'first few coordinates of the original vertices of the embedded tree with lots of leaflets per leaf:'
    D_super_augmented = D_all.copy()
    for i in range(20):
        D_super_augmented = add_leaflets(D_super_augmented, nleaves)
    X_super = Euclid.edm_to_points(D_super_augmented)
    X_super_block_small = X_super[:6].T[:3].T
    print >> out, X_super_block_small
    print >> out, 'ratio of coordinates of projected points to coordinates of this block of the embedding of the augmented tree:'
    print >> out, X_projected / X_super_block_small
    # test
    Z = Euclid.edm_to_weighted_points(D_all, m_uniform)
    print >> out, 'generalized case 1:'
    print >> out, Z
    # test
    Z = Euclid.edm_to_weighted_points(D_all, m_interesting)
    print >> out, 'generalized case 2:'
    print >> out, Z
    # test
    Z = Euclid.edm_to_weighted_points(D_all, m_degenerate)
    print >> out, 'generalized case 3:'
    print >> out, Z
    # test
    Z = get_weighted_embedding_b(D_all, m_uniform)
    print >> out, 'eric formula case 1:'
    print >> out, Z
    # test
    Z = get_weighted_embedding_b(D_all, m_interesting)
    print >> out, 'eric formula case 2:'
    print >> out, Z
    # test
    Z = get_weighted_embedding_b(D_all, m_degenerate)
    print >> out, 'eric formula case 3:'
    print >> out, Z
    # test stuff
    print >> out, 'testing random stuff:'
    D = D_all
    m = m_degenerate
    nvertices = len(m)
    sqrtm = np.sqrt(m)
    M = np.diag(sqrtm)
    cross_product_matrix = Euclid.edm_to_weighted_cross_product(D, m)
    U_cross, S_cross, VT_cross = np.linalg.svd(cross_product_matrix, full_matrices=False)
    Q = np.dot(M, np.dot(cross_product_matrix, M.T))
    U, B, VT = np.linalg.svd(Q, full_matrices=False)
    S = np.sqrt(np.diag(B))
    US = np.dot(U, S)
    M_pinv = np.linalg.pinv(M)
    M_pinv_narrow = M_pinv.T[:-2].T
    US_short = US[:-2]
    print >> out, 'eigenvalues of the abdi cross product:', S_cross
    print >> out, 'eigenvalues of the eric cross product:', B
    print >> out, M_pinv
    print >> out, US
    print >> out, M_pinv_narrow
    print >> out, US_short
    Z = np.dot(M_pinv_narrow, US_short)
    print >> out, Z
    # return the response
    return out.getvalue().strip()