Exemplo n.º 1
0
def process(tree_string):
    """
    @param tree_string: a newick string
    @return: a multi-line string that summarizes the results
    """
    np.set_printoptions(linewidth=200)
    out = StringIO()
    # build the newick tree from the string
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    # get ordered names and ids
    ordered_ids, ordered_names = get_ordered_ids_and_names(tree)
    # get the distance matrix with ordered indices including all nodes in the tree
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids))
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # define mass vectors
    m_uniform_unscaled = [1]*nvertices
    m_degenerate_unscaled = [1]*nleaves + [0]*(nvertices-nleaves)
    m_uniform = np.array(m_uniform_unscaled, dtype=float) / sum(m_uniform_unscaled)
    m_degenerate = np.array(m_degenerate_unscaled, dtype=float) / sum(m_degenerate_unscaled)
    # show some of the distance matrices
    print >> out, 'ordered names:'
    print >> out, ordered_names
    print >> out
    print >> out, 'embedded points with mass uniformly distributed among all vertices:'
    print >> out, Euclid.edm_to_weighted_points(D, m_uniform)
    print >> out
    print >> out, 'embedded points with mass uniformly distributed among the leaves:'
    print >> out, Euclid.edm_to_weighted_points(D, m_degenerate)
    print >> out
    # return the response
    return out.getvalue().strip()
Exemplo n.º 2
0
def process(tree_string):
    """
    @param tree_string: a newick string
    @return: a multi-line string that summarizes the results
    """
    np.set_printoptions(linewidth=200)
    out = StringIO()
    # build the newick tree from the string
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    # get ordered names and ids
    ordered_ids, ordered_names = get_ordered_ids_and_names(tree)
    # get the distance matrix with ordered indices including all nodes in the tree
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids))
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # define mass vectors
    m_uniform_unscaled = [1] * nvertices
    m_degenerate_unscaled = [1] * nleaves + [0] * (nvertices - nleaves)
    m_uniform = np.array(m_uniform_unscaled,
                         dtype=float) / sum(m_uniform_unscaled)
    m_degenerate = np.array(m_degenerate_unscaled,
                            dtype=float) / sum(m_degenerate_unscaled)
    # show some of the distance matrices
    print >> out, 'ordered names:'
    print >> out, ordered_names
    print >> out
    print >> out, 'embedded points with mass uniformly distributed among all vertices:'
    print >> out, Euclid.edm_to_weighted_points(D, m_uniform)
    print >> out
    print >> out, 'embedded points with mass uniformly distributed among the leaves:'
    print >> out, Euclid.edm_to_weighted_points(D, m_degenerate)
    print >> out
    # return the response
    return out.getvalue().strip()
Exemplo n.º 3
0
def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    ninternal = nvertices - nleaves
    # get ordered ids with the internal nodes first
    ordered_ids = get_ordered_ids(tree)
    leaf_ids = [id(node) for node in tree.gen_tips()]
    # get the distance matrix and the augmented distance matrix
    D_leaf = np.array(tree.get_partial_distance_matrix(leaf_ids))
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    D_aug = get_augmented_distance(D, nleaves, fs.ndups)
    # analyze the leaf distance matrix
    X_leaf = Euclid.edm_to_points(D_leaf)
    # get the eigendecomposition of the centered augmented distance matrix
    X_aug = Euclid.edm_to_points(D_aug, nvertices - 1)
    # explicitly compute the points for the given number of dups using weights
    m = [1] * ninternal + [1 + fs.ndups] * nleaves
    m = np.array(m, dtype=float) / sum(m)
    X_weighted = Euclid.edm_to_weighted_points(D, m)
    # explicitly compute the points for 10x dups
    m = [1] * ninternal + [1 + fs.ndups * 10] * nleaves
    m = np.array(m, dtype=float) / sum(m)
    X_weighted_10x = Euclid.edm_to_weighted_points(D, m)
    # explicitly compute the limiting points as the number of dups increases
    X = Euclid.edm_to_points(D)
    X -= np.mean(X[-nleaves:], axis=0)
    XL = X[-nleaves:]
    U, s, Vt = np.linalg.svd(XL)
    Z = np.dot(X, Vt.T)
    # report the results
    np.set_printoptions(linewidth=300, threshold=10000)
    out = StringIO()
    print >> out, 'leaf distance matrix:'
    print >> out, D_leaf
    print >> out
    print >> out, 'points derived from the leaf distance matrix'
    print >> out, '(the first column is proportional to the Fiedler vector):'
    print >> out, X_leaf
    print >> out
    if fs.show_aug:
        print >> out, 'augmented distance matrix:'
        print >> out, D_aug
        print >> out
    print >> out, 'points derived from the augmented distance matrix'
    print >> out, '(the first column is proportional to the Fiedler vector):'
    print >> out, get_ugly_matrix(X_aug, ninternal, nleaves)
    print >> out
    print >> out, 'points computed using masses:'
    print >> out, X_weighted
    print >> out
    print >> out, 'points computed using masses with 10x dups:'
    print >> out, X_weighted_10x
    print >> out
    print >> out, 'limiting points:'
    print >> out, Z
    print >> out
    return out.getvalue()
Exemplo n.º 4
0
def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    ninternal = nvertices - nleaves
    # get ordered ids with the internal nodes first
    ordered_ids = get_ordered_ids(tree)
    leaf_ids = [id(node) for node in tree.gen_tips()]
    # get the distance matrix and the augmented distance matrix
    D_leaf = np.array(tree.get_partial_distance_matrix(leaf_ids))
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    D_aug = get_augmented_distance(D, nleaves, fs.ndups)
    # analyze the leaf distance matrix
    X_leaf = Euclid.edm_to_points(D_leaf)
    # get the eigendecomposition of the centered augmented distance matrix
    X_aug = Euclid.edm_to_points(D_aug, nvertices-1)
    # explicitly compute the points for the given number of dups using weights
    m = [1]*ninternal + [1+fs.ndups]*nleaves
    m = np.array(m, dtype=float) / sum(m)
    X_weighted = Euclid.edm_to_weighted_points(D, m)
    # explicitly compute the points for 10x dups
    m = [1]*ninternal + [1+fs.ndups*10]*nleaves
    m = np.array(m, dtype=float) / sum(m)
    X_weighted_10x = Euclid.edm_to_weighted_points(D, m)
    # explicitly compute the limiting points as the number of dups increases
    X = Euclid.edm_to_points(D)
    X -= np.mean(X[-nleaves:], axis=0)
    XL = X[-nleaves:]
    U, s, Vt = np.linalg.svd(XL)
    Z = np.dot(X, Vt.T)
    # report the results
    np.set_printoptions(linewidth=300, threshold=10000)
    out = StringIO()
    print >> out, 'leaf distance matrix:'
    print >> out, D_leaf
    print >> out
    print >> out, 'points derived from the leaf distance matrix'
    print >> out, '(the first column is proportional to the Fiedler vector):'
    print >> out, X_leaf
    print >> out
    if fs.show_aug:
        print >> out, 'augmented distance matrix:'
        print >> out, D_aug
        print >> out
    print >> out, 'points derived from the augmented distance matrix'
    print >> out, '(the first column is proportional to the Fiedler vector):'
    print >> out, get_ugly_matrix(X_aug, ninternal, nleaves)
    print >> out
    print >> out, 'points computed using masses:'
    print >> out, X_weighted
    print >> out
    print >> out, 'points computed using masses with 10x dups:'
    print >> out, X_weighted_10x
    print >> out
    print >> out, 'limiting points:'
    print >> out, Z
    print >> out
    return out.getvalue()
Exemplo n.º 5
0
def process():
    """
    @return: a multi-line string that summarizes the results
    """
    np.set_printoptions(linewidth=200)
    out = StringIO()
    # define a degenerate mass vector
    m_degenerate = np.array([0.25, 0.25, 0.25, 0.25, 0, 0])
    # define some distance matrices
    D_leaves = Euclid.g_D_b
    D_all = Euclid.g_D_c
    nvertices = 6
    nleaves = 4
    # get the projection and the weighted multidimensional scaling
    X = Euclid.edm_to_points(D_all)
    Y = Euclid.edm_to_weighted_points(D_all, m_degenerate)
    D_X = np.array([[np.dot(pb - pa, pb - pa) for pa in X] for pb in X])
    D_Y = np.array([[np.dot(pb - pa, pb - pa) for pa in Y] for pb in Y])
    # get the embedding using only the leaves
    print >> out, 'embedding of leaves from the leaf distance matrix:'
    print >> out, Euclid.edm_to_points(D_leaves)
    print >> out, 'projection of all vertices onto the MDS space of the leaves:'
    print >> out, do_projection(D_all, nleaves)
    print >> out, 'embedding of all vertices using uniform weights:'
    print >> out, X
    print >> out, 'corresponding distance matrix:'
    print >> out, D_X
    print >> out, 'embedding of all vertices using degenerate weights:'
    print >> out, Y
    print >> out, 'corresponding distance matrix:'
    print >> out, D_Y
    return out.getvalue().strip()
Exemplo n.º 6
0
def process():
    """
    @return: a multi-line string that summarizes the results
    """
    np.set_printoptions(linewidth=200)
    out = StringIO()
    # define a degenerate mass vector
    m_degenerate = np.array([0.25, 0.25, 0.25, 0.25, 0, 0])
    # define some distance matrices
    D_leaves = Euclid.g_D_b
    D_all = Euclid.g_D_c
    nvertices = 6
    nleaves = 4
    # get the projection and the weighted multidimensional scaling
    X = Euclid.edm_to_points(D_all)
    Y = Euclid.edm_to_weighted_points(D_all, m_degenerate)
    D_X = np.array([[np.dot(pb-pa, pb-pa) for pa in X] for pb in X])
    D_Y = np.array([[np.dot(pb-pa, pb-pa) for pa in Y] for pb in Y])
    # get the embedding using only the leaves
    print >> out, 'embedding of leaves from the leaf distance matrix:'
    print >> out, Euclid.edm_to_points(D_leaves)
    print >> out, 'projection of all vertices onto the MDS space of the leaves:'
    print >> out, do_projection(D_all, nleaves)
    print >> out, 'embedding of all vertices using uniform weights:'
    print >> out, X
    print >> out, 'corresponding distance matrix:'
    print >> out, D_X
    print >> out, 'embedding of all vertices using degenerate weights:'
    print >> out, Y
    print >> out, 'corresponding distance matrix:'
    print >> out, D_Y
    return out.getvalue().strip()
Exemplo n.º 7
0
def get_canonical_2d_mds(D, m, reference_points):
    """
    This function is about projecting the points.
    It is like MDS except the reflections across the axes are not arbitrary.
    Also it only uses the first two axes.
    @param D: the full distance matrix
    @param m: the mass vector
    @param reference_points: a 2D reference projection of vertices of the tree
    @return: the weighted MDS points as a numpy matrix
    """
    X = Euclid.edm_to_weighted_points(D, m)
    return reflect_to_reference(X.T[:2].T, reference_points)
Exemplo n.º 8
0
def get_canonical_2d_mds(D, m, reference_points):
    """
    This function is about projecting the points.
    It is like MDS except the reflections across the axes are not arbitrary.
    Also it only uses the first two axes.
    @param D: the full distance matrix
    @param m: the mass vector
    @param reference_points: a 2D reference projection of vertices of the tree
    @return: the weighted MDS points as a numpy matrix
    """
    X = Euclid.edm_to_weighted_points(D, m)
    return reflect_to_reference(X.T[:2].T, reference_points)
Exemplo n.º 9
0
def get_canonical_3d_mds(D, m, reference_points):
    """
    This function is about projecting the points.
    It is like MDS except the reflections across the axes are not arbitrary.
    Also it only uses the first three axes.
    @param D: the full distance matrix
    @param m: the mass vector
    @param reference_points: a 3D reference projection of vertices of the tree
    @return: the weighted MDS points as a numpy matrix
    """
    X = Euclid.edm_to_weighted_points(D, m)
    X_3d = X.T[:3].T
    sign_vector = MatrixUtil.get_best_reflection(X_3d, reference_points)
    return X_3d * sign_vector
Exemplo n.º 10
0
def get_canonical_3d_mds(D, m, reference_points):
    """
    This function is about projecting the points.
    It is like MDS except the reflections across the axes are not arbitrary.
    Also it only uses the first three axes.
    @param D: the full distance matrix
    @param m: the mass vector
    @param reference_points: a 3D reference projection of vertices of the tree
    @return: the weighted MDS points as a numpy matrix
    """
    X = Euclid.edm_to_weighted_points(D, m)
    X_3d = X.T[:3].T
    sign_vector = MatrixUtil.get_best_reflection(X_3d, reference_points)
    return X_3d * sign_vector
Exemplo n.º 11
0
def process():
    """
    @return: a multi-line string that summarizes the results
    """
    np.set_printoptions(linewidth=200)
    out = StringIO()
    # define some distance matrices
    D_leaves = Euclid.g_D_b
    D_all = Euclid.g_D_c
    nvertices = 6
    nleaves = 4
    # define mass vectors
    m_degenerate = np.array([0.25, 0.25, 0.25, 0.25, 0, 0])
    m_interesting = np.array([.2, .2, .2, .2, .1, .1])
    m_uniform = np.ones(nvertices) / float(nvertices)
    # augment a distance matrix by adding leaflets
    D_augmented = add_leaflets(D_all, nleaves)
    # create the projection of points
    X_projected = do_projection(D_all, nleaves)
    # show some of the distance matrices
    print >> out, 'pairwise distances among vertices in the original tree:'
    print >> out, D_all
    print >> out, 'pairwise distance matrix augmented with one leaflet per leaf:'
    print >> out, D_augmented
    # get the distance matrices corresponding to the cases in the docstring
    print >> out, 'case 1: embedding of all vertices:'
    print >> out, Euclid.edm_to_points(D_all)
    print >> out, 'case 2: embedding of leaves and leaflets from the leaflet-augmented distance matrix:'
    print >> out, Euclid.edm_to_points(D_augmented)
    print >> out, 'case 3: projection of all vertices onto the MDS space of the leaves:'
    print >> out, X_projected
    # another embedding
    print >> out, 'embedding of leaves from the leaf distance matrix:'
    print >> out, Euclid.edm_to_points(D_leaves)
    # show embeddings of a tree augmented with leaflets
    print >> out, 'first few coordinates of the original vertices of the embedded tree with lots of leaflets per leaf:'
    D_super_augmented = D_all.copy()
    for i in range(20):
        D_super_augmented = add_leaflets(D_super_augmented, nleaves)
    X_super = Euclid.edm_to_points(D_super_augmented)
    X_super_block_small = X_super[:6].T[:3].T
    print >> out, X_super_block_small
    print >> out, 'ratio of coordinates of projected points to coordinates of this block of the embedding of the augmented tree:'
    print >> out, X_projected / X_super_block_small
    # test
    Z = Euclid.edm_to_weighted_points(D_all, m_uniform)
    print >> out, 'generalized case 1:'
    print >> out, Z
    # test
    Z = Euclid.edm_to_weighted_points(D_all, m_interesting)
    print >> out, 'generalized case 2:'
    print >> out, Z
    # test
    Z = Euclid.edm_to_weighted_points(D_all, m_degenerate)
    print >> out, 'generalized case 3:'
    print >> out, Z
    # test
    Z = get_weighted_embedding_b(D_all, m_uniform)
    print >> out, 'eric formula case 1:'
    print >> out, Z
    # test
    Z = get_weighted_embedding_b(D_all, m_interesting)
    print >> out, 'eric formula case 2:'
    print >> out, Z
    # test
    Z = get_weighted_embedding_b(D_all, m_degenerate)
    print >> out, 'eric formula case 3:'
    print >> out, Z
    # test stuff
    print >> out, 'testing random stuff:'
    D = D_all
    m = m_degenerate
    nvertices = len(m)
    sqrtm = np.sqrt(m)
    M = np.diag(sqrtm)
    cross_product_matrix = Euclid.edm_to_weighted_cross_product(D, m)
    U_cross, S_cross, VT_cross = np.linalg.svd(cross_product_matrix, full_matrices=False)
    Q = np.dot(M, np.dot(cross_product_matrix, M.T))
    U, B, VT = np.linalg.svd(Q, full_matrices=False)
    S = np.sqrt(np.diag(B))
    US = np.dot(U, S)
    M_pinv = np.linalg.pinv(M)
    M_pinv_narrow = M_pinv.T[:-2].T
    US_short = US[:-2]
    print >> out, 'eigenvalues of the abdi cross product:', S_cross
    print >> out, 'eigenvalues of the eric cross product:', B
    print >> out, M_pinv
    print >> out, US
    print >> out, M_pinv_narrow
    print >> out, US_short
    Z = np.dot(M_pinv_narrow, US_short)
    print >> out, Z
    # return the response
    return out.getvalue().strip()