Exemplo n.º 1
0
def process():
    """
    @return: a multi-line string that summarizes the results
    """
    np.set_printoptions(linewidth=200)
    out = StringIO()
    # define a degenerate mass vector
    m_degenerate = np.array([0.25, 0.25, 0.25, 0.25, 0, 0])
    # define some distance matrices
    D_leaves = Euclid.g_D_b
    D_all = Euclid.g_D_c
    nvertices = 6
    nleaves = 4
    # get the projection and the weighted multidimensional scaling
    X = Euclid.edm_to_points(D_all)
    Y = Euclid.edm_to_weighted_points(D_all, m_degenerate)
    D_X = np.array([[np.dot(pb-pa, pb-pa) for pa in X] for pb in X])
    D_Y = np.array([[np.dot(pb-pa, pb-pa) for pa in Y] for pb in Y])
    # get the embedding using only the leaves
    print >> out, 'embedding of leaves from the leaf distance matrix:'
    print >> out, Euclid.edm_to_points(D_leaves)
    print >> out, 'projection of all vertices onto the MDS space of the leaves:'
    print >> out, do_projection(D_all, nleaves)
    print >> out, 'embedding of all vertices using uniform weights:'
    print >> out, X
    print >> out, 'corresponding distance matrix:'
    print >> out, D_X
    print >> out, 'embedding of all vertices using degenerate weights:'
    print >> out, Y
    print >> out, 'corresponding distance matrix:'
    print >> out, D_Y
    return out.getvalue().strip()
Exemplo n.º 2
0
def process():
    """
    @return: a multi-line string that summarizes the results
    """
    np.set_printoptions(linewidth=200)
    out = StringIO()
    # define a degenerate mass vector
    m_degenerate = np.array([0.25, 0.25, 0.25, 0.25, 0, 0])
    # define some distance matrices
    D_leaves = Euclid.g_D_b
    D_all = Euclid.g_D_c
    nvertices = 6
    nleaves = 4
    # get the projection and the weighted multidimensional scaling
    X = Euclid.edm_to_points(D_all)
    Y = Euclid.edm_to_weighted_points(D_all, m_degenerate)
    D_X = np.array([[np.dot(pb - pa, pb - pa) for pa in X] for pb in X])
    D_Y = np.array([[np.dot(pb - pa, pb - pa) for pa in Y] for pb in Y])
    # get the embedding using only the leaves
    print >> out, 'embedding of leaves from the leaf distance matrix:'
    print >> out, Euclid.edm_to_points(D_leaves)
    print >> out, 'projection of all vertices onto the MDS space of the leaves:'
    print >> out, do_projection(D_all, nleaves)
    print >> out, 'embedding of all vertices using uniform weights:'
    print >> out, X
    print >> out, 'corresponding distance matrix:'
    print >> out, D_X
    print >> out, 'embedding of all vertices using degenerate weights:'
    print >> out, Y
    print >> out, 'corresponding distance matrix:'
    print >> out, D_Y
    return out.getvalue().strip()
Exemplo n.º 3
0
def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    ninternal = nvertices - nleaves
    # get ordered ids with the internal nodes first
    ordered_ids = get_ordered_ids(tree)
    leaf_ids = [id(node) for node in tree.gen_tips()]
    # get the distance matrix and the augmented distance matrix
    D_leaf = np.array(tree.get_partial_distance_matrix(leaf_ids))
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    D_aug = get_augmented_distance(D, nleaves, fs.ndups)
    # analyze the leaf distance matrix
    X_leaf = Euclid.edm_to_points(D_leaf)
    # get the eigendecomposition of the centered augmented distance matrix
    X_aug = Euclid.edm_to_points(D_aug, nvertices - 1)
    # explicitly compute the points for the given number of dups using weights
    m = [1] * ninternal + [1 + fs.ndups] * nleaves
    m = np.array(m, dtype=float) / sum(m)
    X_weighted = Euclid.edm_to_weighted_points(D, m)
    # explicitly compute the points for 10x dups
    m = [1] * ninternal + [1 + fs.ndups * 10] * nleaves
    m = np.array(m, dtype=float) / sum(m)
    X_weighted_10x = Euclid.edm_to_weighted_points(D, m)
    # explicitly compute the limiting points as the number of dups increases
    X = Euclid.edm_to_points(D)
    X -= np.mean(X[-nleaves:], axis=0)
    XL = X[-nleaves:]
    U, s, Vt = np.linalg.svd(XL)
    Z = np.dot(X, Vt.T)
    # report the results
    np.set_printoptions(linewidth=300, threshold=10000)
    out = StringIO()
    print >> out, 'leaf distance matrix:'
    print >> out, D_leaf
    print >> out
    print >> out, 'points derived from the leaf distance matrix'
    print >> out, '(the first column is proportional to the Fiedler vector):'
    print >> out, X_leaf
    print >> out
    if fs.show_aug:
        print >> out, 'augmented distance matrix:'
        print >> out, D_aug
        print >> out
    print >> out, 'points derived from the augmented distance matrix'
    print >> out, '(the first column is proportional to the Fiedler vector):'
    print >> out, get_ugly_matrix(X_aug, ninternal, nleaves)
    print >> out
    print >> out, 'points computed using masses:'
    print >> out, X_weighted
    print >> out
    print >> out, 'points computed using masses with 10x dups:'
    print >> out, X_weighted_10x
    print >> out
    print >> out, 'limiting points:'
    print >> out, Z
    print >> out
    return out.getvalue()
Exemplo n.º 4
0
def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    ninternal = nvertices - nleaves
    # get ordered ids with the internal nodes first
    ordered_ids = get_ordered_ids(tree)
    leaf_ids = [id(node) for node in tree.gen_tips()]
    # get the distance matrix and the augmented distance matrix
    D_leaf = np.array(tree.get_partial_distance_matrix(leaf_ids))
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    D_aug = get_augmented_distance(D, nleaves, fs.ndups)
    # analyze the leaf distance matrix
    X_leaf = Euclid.edm_to_points(D_leaf)
    # get the eigendecomposition of the centered augmented distance matrix
    X_aug = Euclid.edm_to_points(D_aug, nvertices-1)
    # explicitly compute the points for the given number of dups using weights
    m = [1]*ninternal + [1+fs.ndups]*nleaves
    m = np.array(m, dtype=float) / sum(m)
    X_weighted = Euclid.edm_to_weighted_points(D, m)
    # explicitly compute the points for 10x dups
    m = [1]*ninternal + [1+fs.ndups*10]*nleaves
    m = np.array(m, dtype=float) / sum(m)
    X_weighted_10x = Euclid.edm_to_weighted_points(D, m)
    # explicitly compute the limiting points as the number of dups increases
    X = Euclid.edm_to_points(D)
    X -= np.mean(X[-nleaves:], axis=0)
    XL = X[-nleaves:]
    U, s, Vt = np.linalg.svd(XL)
    Z = np.dot(X, Vt.T)
    # report the results
    np.set_printoptions(linewidth=300, threshold=10000)
    out = StringIO()
    print >> out, 'leaf distance matrix:'
    print >> out, D_leaf
    print >> out
    print >> out, 'points derived from the leaf distance matrix'
    print >> out, '(the first column is proportional to the Fiedler vector):'
    print >> out, X_leaf
    print >> out
    if fs.show_aug:
        print >> out, 'augmented distance matrix:'
        print >> out, D_aug
        print >> out
    print >> out, 'points derived from the augmented distance matrix'
    print >> out, '(the first column is proportional to the Fiedler vector):'
    print >> out, get_ugly_matrix(X_aug, ninternal, nleaves)
    print >> out
    print >> out, 'points computed using masses:'
    print >> out, X_weighted
    print >> out
    print >> out, 'points computed using masses with 10x dups:'
    print >> out, X_weighted_10x
    print >> out
    print >> out, 'limiting points:'
    print >> out, Z
    print >> out
    return out.getvalue()
Exemplo n.º 5
0
def do_projection(D_full, nleaves):
    """
    Project points onto the space of the leaves.
    The resulting points are in the subspace
    whose basis vectors are the principal axes of the leaf ellipsoid.
    @param D_full: distances relating all, including internal, vertices.
    @param nleaves: the first few indices in D_full represent leaves
    @return: a numpy array where each row is a vertex of the tree
    """
    # Get the points
    # such that the n rows in X are points in n-1 dimensional space.
    X = Euclid.edm_to_points(D_full)
    # Translate all of the points
    # so that the origin is at the centroid of the leaves.
    X -= np.mean(X[:nleaves], 0)
    # Extract the subset of points that define the leaves.
    L = X[:nleaves]
    # Find the orthogonal transformation of the leaves onto their MDS axes.
    # According to the python svd documentation,
    # singular values are sorted most important to least important.
    U, s, Vt = np.linalg.svd(L)
    # Transform all of the points (including the internal vertices)
    # according to this orthogonal transformation.
    # The axes are now the principal axes
    # of the Steiner circumscribed ellipsoid of the leaf vertices.
    # I am using M.T[:k].T to get the first k columns of M.
    points = np.dot(X, Vt.T).T[:(nleaves-1)].T
    return points
Exemplo n.º 6
0
def do_projection(D_full, nleaves):
    """
    Project points onto the space of the leaves.
    The resulting points are in the subspace
    whose basis vectors are the principal axes of the leaf ellipsoid.
    @param D_full: distances relating all, including internal, vertices.
    @param nleaves: the first few indices in D_full represent leaves
    @return: a numpy array where each row is a vertex of the tree
    """
    # Get the points
    # such that the n rows in X are points in n-1 dimensional space.
    X = Euclid.edm_to_points(D_full)
    # Translate all of the points
    # so that the origin is at the centroid of the leaves.
    X -= np.mean(X[:nleaves], 0)
    # Extract the subset of points that define the leaves.
    L = X[:nleaves]
    # Find the orthogonal transformation of the leaves onto their MDS axes.
    # According to the python svd documentation,
    # singular values are sorted most important to least important.
    U, s, Vt = np.linalg.svd(L)
    # Transform all of the points (including the internal vertices)
    # according to this orthogonal transformation.
    # The axes are now the principal axes
    # of the Steiner circumscribed ellipsoid of the leaf vertices.
    # I am using M.T[:k].T to get the first k columns of M.
    points = np.dot(X, Vt.T).T[:(nleaves - 1)].T
    return points
Exemplo n.º 7
0
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    # get information about the tree topology
    internal = [id(node) for node in tree.gen_internal_nodes()]
    tips = [id(node) for node in tree.gen_tips()]
    vertices = internal + tips
    ntips = len(tips)
    ninternal = len(internal)
    nvertices = len(vertices)
    # get the ordered ids with the leaves first
    ordered_ids = vertices
    # get the full weighted adjacency matrix
    A = np.array(tree.get_affinity_matrix(ordered_ids))
    # compute the weighted adjacency matrix of the decorated tree
    p = ninternal
    q = ntips
    N = fs.N
    if fs.weight_n:
        weight = float(N)
    elif fs.weight_sqrt_n:
        weight = math.sqrt(N)
    A_aug = get_A_aug(A, weight, p, q, N)
    # compute the weighted Laplacian matrix of the decorated tree
    L_aug = Euclid.adjacency_to_laplacian(A_aug)
    # compute the eigendecomposition
    w, vt = np.linalg.eigh(L_aug)
    # show the output
    np.set_printoptions(linewidth=1000, threshold=10000)
    out = StringIO()
    if fs.lap:
        print >> out, 'Laplacian of the decorated tree:'
        print >> out, L_aug
        print >> out
    if fs.eigvals:
        print >> out, 'eigenvalues:'
        for x in w:
            print >> out, x
        print >> out
    if fs.eigvecs:
        print >> out, 'eigenvector matrix:'
        print >> out, vt
        print >> out
    if fs.compare:
        # get the distance matrix for only the original tips
        D_tips = np.array(tree.get_partial_distance_matrix(tips))
        X_tips = Euclid.edm_to_points(D_tips)
        # wring the approximate points out of the augmented tree
        X_approx = vt[p:p+q].T[1:1+q-1].T / np.sqrt(w[1:1+q-1])
        # do the comparison
        print >> out, 'points from tip-only MDS:'
        print >> out, X_tips
        print >> out
        print >> out, 'approximate points from decorated tree:'
        print >> out, X_approx
        print >> out
    return out.getvalue()
Exemplo n.º 8
0
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    # get information about the tree topology
    internal = [id(node) for node in tree.gen_internal_nodes()]
    tips = [id(node) for node in tree.gen_tips()]
    vertices = internal + tips
    ntips = len(tips)
    ninternal = len(internal)
    nvertices = len(vertices)
    # get the ordered ids with the leaves first
    ordered_ids = vertices
    # get the full weighted adjacency matrix
    A = np.array(tree.get_affinity_matrix(ordered_ids))
    # compute the weighted adjacency matrix of the decorated tree
    p = ninternal
    q = ntips
    N = fs.N
    if fs.weight_n:
        weight = float(N)
    elif fs.weight_sqrt_n:
        weight = math.sqrt(N)
    A_aug = get_A_aug(A, weight, p, q, N)
    # compute the weighted Laplacian matrix of the decorated tree
    L_aug = Euclid.adjacency_to_laplacian(A_aug)
    # compute the eigendecomposition
    w, vt = np.linalg.eigh(L_aug)
    # show the output
    np.set_printoptions(linewidth=1000, threshold=10000)
    out = StringIO()
    if fs.lap:
        print >> out, 'Laplacian of the decorated tree:'
        print >> out, L_aug
        print >> out
    if fs.eigvals:
        print >> out, 'eigenvalues:'
        for x in w:
            print >> out, x
        print >> out
    if fs.eigvecs:
        print >> out, 'eigenvector matrix:'
        print >> out, vt
        print >> out
    if fs.compare:
        # get the distance matrix for only the original tips
        D_tips = np.array(tree.get_partial_distance_matrix(tips))
        X_tips = Euclid.edm_to_points(D_tips)
        # wring the approximate points out of the augmented tree
        X_approx = vt[p:p + q].T[1:1 + q - 1].T / np.sqrt(w[1:1 + q - 1])
        # do the comparison
        print >> out, 'points from tip-only MDS:'
        print >> out, X_tips
        print >> out
        print >> out, 'approximate points from decorated tree:'
        print >> out, X_approx
        print >> out
    return out.getvalue()
Exemplo n.º 9
0
def get_response_content(fs):
    # read the lat-lon points from the input
    lines = Util.get_stripped_lines(fs.datalines.splitlines())
    rows = parse_lines(lines)
    latlon_points = []
    city_names = []
    for city, latd, latm, lond, lonm in rows:
        lat = math.radians(GPS.degrees_minutes_to_degrees(latd, latm))
        lon = math.radians(GPS.degrees_minutes_to_degrees(lond, lonm))
        latlon_points.append((lat, lon))
        city_names.append(city)
    npoints = len(latlon_points)
    # start writing the response
    np.set_printoptions(linewidth=200)
    out = StringIO()
    radius = GPS.g_earth_radius_miles
    for dfunc, name in (
            (GPS.get_arc_distance, 'great arc'),
            (GPS.get_euclidean_distance, 'euclidean')):
        # define the edm whose elements are squared euclidean-like distances
        edm = np.zeros((npoints, npoints))
        D = np.zeros((npoints, npoints))
        for i, pointa in enumerate(latlon_points):
            for j, pointb in enumerate(latlon_points):
                D[i, j] = dfunc(pointa, pointb, radius)
                edm[i, j] = D[i, j]**2
        print >> out, name, 'distances:'
        print >> out, D
        print >> out
        print >> out, name, 'EDM:'
        print >> out, edm
        print >> out
        G = Euclid.edm_to_dccov(edm)
        print >> out, name, 'Gower centered matrix:'
        print >> out, G
        print >> out
        spectrum = np.array(list(reversed(sorted(np.linalg.eigvals(G)))))
        print >> out, name, 'spectrum of Gower centered matrix:'
        for x in spectrum:
            print >> out, x
        print >> out
        print >> out, name, 'rounded spectrum:'
        for x in spectrum:
            print >> out, '%.1f' % x
        print >> out
        mds_points = Euclid.edm_to_points(edm)
        print >> out, '2D MDS coordinates:'
        for name, mds_point in zip(city_names, mds_points):
            x = mds_point[0]
            y = mds_point[1]
            print >> out, '\t'.join(str(x) for x in [name, x, y])
        print >> out
        # break between distance methods
        print >> out
    # return the response
    return out.getvalue()
Exemplo n.º 10
0
def do_internal_projection(D_full):
    """
    The resulting points are in the subspace whose basis vectors are the principal axes of the whole ellipsoid.
    @param D_full: the distance matrix as a numpy array relating all vertices including internal vertices
    @return: a numpy array where each row is a vertex of the tree
    """
    # Get the points such that the n rows in are points in n-1 dimensional space.
    # The first coordinate is the principal axis.
    points = Euclid.edm_to_points(D_full)
    return points
Exemplo n.º 11
0
def do_internal_projection(D_full):
    """
    The resulting points are in the subspace whose basis vectors are the principal axes of the whole ellipsoid.
    @param D_full: the distance matrix as a numpy array relating all vertices including internal vertices
    @return: a numpy array where each row is a vertex of the tree
    """
    # Get the points such that the n rows in are points in n-1 dimensional space.
    # The first coordinate is the principal axis.
    points = Euclid.edm_to_points(D_full)
    return points
Exemplo n.º 12
0
def get_response_content(fs):
    # read the lat-lon points from the input
    lines = Util.get_stripped_lines(fs.datalines.splitlines())
    rows = parse_lines(lines)
    latlon_points = []
    city_names = []
    for city, latd, latm, lond, lonm in rows:
        lat = math.radians(GPS.degrees_minutes_to_degrees(latd, latm))
        lon = math.radians(GPS.degrees_minutes_to_degrees(lond, lonm))
        latlon_points.append((lat, lon))
        city_names.append(city)
    npoints = len(latlon_points)
    # start writing the response
    np.set_printoptions(linewidth=200)
    out = StringIO()
    radius = GPS.g_earth_radius_miles
    for dfunc, name in ((GPS.get_arc_distance, 'great arc'),
                        (GPS.get_euclidean_distance, 'euclidean')):
        # define the edm whose elements are squared euclidean-like distances
        edm = np.zeros((npoints, npoints))
        D = np.zeros((npoints, npoints))
        for i, pointa in enumerate(latlon_points):
            for j, pointb in enumerate(latlon_points):
                D[i, j] = dfunc(pointa, pointb, radius)
                edm[i, j] = D[i, j]**2
        print >> out, name, 'distances:'
        print >> out, D
        print >> out
        print >> out, name, 'EDM:'
        print >> out, edm
        print >> out
        G = Euclid.edm_to_dccov(edm)
        print >> out, name, 'Gower centered matrix:'
        print >> out, G
        print >> out
        spectrum = np.array(list(reversed(sorted(np.linalg.eigvals(G)))))
        print >> out, name, 'spectrum of Gower centered matrix:'
        for x in spectrum:
            print >> out, x
        print >> out
        print >> out, name, 'rounded spectrum:'
        for x in spectrum:
            print >> out, '%.1f' % x
        print >> out
        mds_points = Euclid.edm_to_points(edm)
        print >> out, '2D MDS coordinates:'
        for name, mds_point in zip(city_names, mds_points):
            x = mds_point[0]
            y = mds_point[1]
            print >> out, '\t'.join(str(x) for x in [name, x, y])
        print >> out
        # break between distance methods
        print >> out
    # return the response
    return out.getvalue()
Exemplo n.º 13
0
def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    leaf_names = list(sorted(leaf.get_name() for leaf in tree.gen_tips()))
    # assert that the newick tree has the correct set of leaves
    if leaf_names != ['a', 'b', 'c', 'd']:
        msg = 'expected the tree to have leaves named {a, b, c, d}'
        raise HandlingError(msg)
    # start writing the response
    out = StringIO()
    # Get the distance matrix with ordered indices
    # including all nodes in the tree.
    D = np.array(tree.get_distance_matrix(leaf_names))
    # get the embedded points
    X = Euclid.edm_to_points(D)
    print >> out, 'distance matrix:'
    print >> out, D
    print >> out, 'embedded points:'
    print >> out, X
    # set up the optimization
    a, b, c, d = X.tolist()
    objective = Objective(a, b, c, d)
    s1_initial = (np.mean(X, 0) + X[0] + X[1])/3 + get_random_point()
    s2_initial = (np.mean(X, 0) + X[2] + X[3])/3 + get_random_point()
    data_initial = np.hstack([s1_initial, s2_initial])
    data_final = scipy.optimize.fmin_bfgs(objective.get_value, data_initial, fprime=objective.get_gradient, gtol=1e-10)
    s1 = data_final[:3]
    s2 = data_final[3:]
    gradient_final = objective.get_gradient(data_final)
    s1_gradient = gradient_final[:3]
    s2_gradient = gradient_final[3:]
    print >> out, 'initial random steiner point guesses:'
    print >> out, s1_initial
    print >> out, s2_initial
    print >> out, 'final steiner point estimates:'
    print >> out, s1
    print >> out, s2
    print >> out, 'each of these angles should be %f radians:' % ((2*math.pi)/3)
    print >> out, get_angle(a-s1, b-s1)
    print >> out, get_angle(b-s1, s2-s1)
    print >> out, get_angle(s2-s1, a-s1)
    print >> out, get_angle(c-s2, d-s2)
    print >> out, get_angle(d-s2, s1-s2)
    print >> out, get_angle(s1-s2, c-s2)
    print >> out, 'value of the objective function at the estimated solution:'
    print >> out, objective.get_value(data_final)
    print >> out, 'gradient of the objective function at each estimated steiner point:'
    print >> out, s1_gradient
    print >> out, s2_gradient
    # return the response
    return out.getvalue()
Exemplo n.º 14
0
def process(nseconds=None):
    """
    @param nseconds: allow this many seconds to run or None to run forever
    @return: a multi-line string that summarizes the results
    """
    # load the tree
    tree = NewickIO.parse(g_tree_string, FelTree.NewickTree)
    # get the alphabetically ordered tip names
    ordered_tip_names = list(
        sorted(node.get_name() for node in tree.gen_tips()))
    # initialize the search
    start_time = time.time()
    nsamples_rejected = 0
    nsamples_accepted = 0
    counterexample_message = 'no counterexample was found'
    try:
        while True:
            elapsed_time = time.time() - start_time
            if nseconds and elapsed_time > nseconds:
                break
            # sample some random branch lengths
            sample_branch_lengths(tree)
            # get the distance matrix
            D = np.array(tree.get_distance_matrix(ordered_tip_names))
            # get the projections onto the MDS axes of the leaves
            X = Euclid.edm_to_points(D)
            # if any coordinate is near zero then reject the sample
            if np.min(np.abs(X)) < g_epsilon:
                nsamples_rejected += 1
                continue
            # see if the sign pattern matches for each coordinate
            for v_observed, v_target in zip(X.T, g_target_sign_patterns):
                hadamard_product = v_observed * v_target
                all_positive = all(x > 0 for x in hadamard_product)
                all_negative = all(x < 0 for x in hadamard_product)
                if not (all_positive or all_negative):
                    # the target sign pattern was not met
                    break
            else:
                # the sign pattern matched for each coordinate so we have a counterexample
                msg = NewickIO.get_newick_string(tree)
                raise CounterexampleError(msg)
            # increment the count of accepted samples
            nsamples_accepted += 1
    except KeyboardInterrupt, e:
        pass
Exemplo n.º 15
0
def process(nseconds=None):
    """
    @param nseconds: allow this many seconds to run or None to run forever
    @return: a multi-line string that summarizes the results
    """
    # load the tree
    tree = NewickIO.parse(g_tree_string, FelTree.NewickTree)
    # get the alphabetically ordered tip names
    ordered_tip_names = list(sorted(node.get_name() for node in tree.gen_tips()))
    # initialize the search
    start_time = time.time()
    nsamples_rejected = 0
    nsamples_accepted = 0
    counterexample_message = 'no counterexample was found'
    try:
        while True:
            elapsed_time = time.time() - start_time
            if nseconds and elapsed_time > nseconds:
                break
            # sample some random branch lengths
            sample_branch_lengths(tree)
            # get the distance matrix
            D = np.array(tree.get_distance_matrix(ordered_tip_names))
            # get the projections onto the MDS axes of the leaves
            X = Euclid.edm_to_points(D)
            # if any coordinate is near zero then reject the sample
            if np.min(np.abs(X)) < g_epsilon:
                nsamples_rejected += 1
                continue
            # see if the sign pattern matches for each coordinate
            for v_observed, v_target in zip(X.T, g_target_sign_patterns):
                hadamard_product = v_observed * v_target
                all_positive = all(x>0 for x in hadamard_product)
                all_negative = all(x<0 for x in hadamard_product)
                if not (all_positive or all_negative):
                    # the target sign pattern was not met
                    break
            else:
                # the sign pattern matched for each coordinate so we have a counterexample
                msg = NewickIO.get_newick_string(tree)
                raise CounterexampleError(msg)
            # increment the count of accepted samples
            nsamples_accepted += 1
    except KeyboardInterrupt, e:
        pass
Exemplo n.º 16
0
def main(args):
    # do some validation
    if args.nframes < 2:
        raise ValueError('nframes should be at least 2')
    # define the requested physical size of the images (in pixels)
    physical_size = (args.physical_width, args.physical_height)
    # build the newick tree from the string
    tree = NewickIO.parse(args.tree, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    # Get ordered ids with the leaves first,
    # and get the corresponding distance matrix.
    ordered_ids = get_ordered_ids(tree)
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    index_edges = get_index_edges(tree, ordered_ids)
    # Create the reference points
    # so that the video frames are not reflected arbitrarily.
    reference_points = Euclid.edm_to_points(D).T[:3].T
    # create the animation frames and write them as image files
    pbar = Progress.Bar(args.nframes)
    for frame_index in range(args.nframes):
        linear_progress = frame_index / float(args.nframes - 1)
        if args.interpolation == 'sigmoid':
            progress = sigmoid(linear_progress)
        else:
            progress = linear_progress
        mass_vector = get_mass_vector(nvertices, nleaves, progress)
        points = get_canonical_3d_mds(D, mass_vector, reference_points)
        crossings = get_crossings(index_edges, points)
        # define the frame path name
        image_filename = 'frame-%04d.%s' % (frame_index, args.image_format)
        image_pathname = os.path.join(args.output_directory, image_filename)
        # clear the old figure and render the new figure
        mlab.clf()
        add_yz_plane()
        add_zx_plane()
        add_xy_plane()
        X, Y, Z = points.T[0], points.T[1], points.T[2]
        draw_3d_tree(X, Y, Z, index_edges)
        draw_crossings(X, Y, Z, index_edges)
        mlab.savefig(image_pathname, size=physical_size)
        # update the progress bar
        pbar.update(frame_index + 1)
    pbar.finish()
Exemplo n.º 17
0
def main(args):
    # do some validation
    if args.nframes < 2:
        raise ValueError('nframes should be at least 2')
    # define the requested physical size of the images (in pixels)
    physical_size = (args.physical_width, args.physical_height)
    # build the newick tree from the string
    tree = NewickIO.parse(args.tree, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    # Get ordered ids with the leaves first,
    # and get the corresponding distance matrix.
    ordered_ids = get_ordered_ids(tree)
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    index_edges = get_index_edges(tree, ordered_ids)
    # Create the reference points
    # so that the video frames are not reflected arbitrarily.
    reference_points = Euclid.edm_to_points(D).T[:3].T
    # create the animation frames and write them as image files
    pbar = Progress.Bar(args.nframes)
    for frame_index in range(args.nframes):
        linear_progress = frame_index / float(args.nframes - 1)
        if args.interpolation == 'sigmoid':
            progress = sigmoid(linear_progress)
        else:
            progress = linear_progress
        mass_vector = get_mass_vector(nvertices, nleaves, progress)
        points = get_canonical_3d_mds(D, mass_vector, reference_points)
        crossings = get_crossings(index_edges, points)
        # define the frame path name
        image_filename = 'frame-%04d.%s' % (frame_index, args.image_format)
        image_pathname = os.path.join(args.output_directory, image_filename)
        # clear the old figure and render the new figure
        mlab.clf()
        add_yz_plane()
        add_zx_plane()
        add_xy_plane()
        X, Y, Z = points.T[0], points.T[1], points.T[2]
        draw_3d_tree(X, Y, Z, index_edges)
        draw_crossings(X, Y, Z, index_edges)
        mlab.savefig(image_pathname, size=physical_size)
        # update the progress bar
        pbar.update(frame_index+1)
    pbar.finish()
Exemplo n.º 18
0
def get_response_content(fs):
    # define the requested physical size of the images (in pixels)
    physical_size = (640, 480)
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    # Get ordered ids with the leaves first,
    # and get the corresponding distance matrix.
    ordered_ids = get_ordered_ids(tree)
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    index_edges = get_index_edges(tree, ordered_ids)
    # Create the reference points so that the video frames
    # are not reflected arbitrarily.
    reference_points = Euclid.edm_to_points(D).T[:2].T
    # draw the image
    ext = Form.g_imageformat_to_ext[fs.imageformat]
    mass_vector = get_mass_vector(nvertices, nleaves, fs.progress)
    points = get_canonical_2d_mds(D, mass_vector, reference_points)
    return get_animation_frame(ext, physical_size, fs.scale, mass_vector,
                               index_edges, points)
Exemplo n.º 19
0
def get_response_content(fs):
    # define the requested physical size of the images (in pixels)
    physical_size = (640, 480)
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    # Get ordered ids with the leaves first,
    # and get the corresponding distance matrix.
    ordered_ids = get_ordered_ids(tree)
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    index_edges = get_index_edges(tree, ordered_ids)
    # Create the reference points so that the video frames
    # are not reflected arbitrarily.
    reference_points = Euclid.edm_to_points(D).T[:2].T
    # draw the image
    ext = Form.g_imageformat_to_ext[fs.imageformat]
    mass_vector = get_mass_vector(nvertices, nleaves, fs.progress)
    points = get_canonical_2d_mds(D, mass_vector, reference_points)
    return get_animation_frame(ext, physical_size, fs.scale,
            mass_vector, index_edges, points)
Exemplo n.º 20
0
def do_full_projection(D_full, nleaves):
    """
    Compute all projected points onto the subspace defined by the leaves.
    @param D_full: the distance matrix as a numpy array relating all vertices including internal vertices
    @param nleaves: the first few indices in D_full represent leaves
    @return: a numpy array where each row is a vertex of the tree viewed as a point
    """
    # Get the points such that the n rows in X are points in n-1 dimensional space.
    X = Euclid.edm_to_points(D_full)
    # Translate all of the points so that the origin is at the centroid of the leaves.
    X -= np.mean(X[:nleaves], 0)
    # Extract the subset of points that define the leaves.
    L = X[:nleaves]
    # Find the orthogonal transformation of the leaves onto their MDS axes.
    # According to the python svd documentation, singular values are sorted most important to least important.
    U, s, Vt = np.linalg.svd(L)
    # Transform all of the points (including the internal vertices) according to this orthogonal transformation.
    # The axes are now the principal axes of the Steiner circumscribed ellipsoid of the leaf vertices.
    # I am using M.T[:k].T to get the first k columns of M.
    vertices_on_plane = np.dot(X, Vt.T).T[:(nleaves - 1)].T
    return vertices_on_plane
Exemplo n.º 21
0
def do_full_projection(D_full, nleaves):
    """
    Compute all projected points onto the subspace defined by the leaves.
    @param D_full: the distance matrix as a numpy array relating all vertices including internal vertices
    @param nleaves: the first few indices in D_full represent leaves
    @return: a numpy array where each row is a vertex of the tree viewed as a point
    """
    # Get the points such that the n rows in X are points in n-1 dimensional space.
    X = Euclid.edm_to_points(D_full)
    # Translate all of the points so that the origin is at the centroid of the leaves.
    X -= np.mean(X[:nleaves], 0)
    # Extract the subset of points that define the leaves.
    L = X[:nleaves]
    # Find the orthogonal transformation of the leaves onto their MDS axes.
    # According to the python svd documentation, singular values are sorted most important to least important.
    U, s, Vt = np.linalg.svd(L)
    # Transform all of the points (including the internal vertices) according to this orthogonal transformation.
    # The axes are now the principal axes of the Steiner circumscribed ellipsoid of the leaf vertices.
    # I am using M.T[:k].T to get the first k columns of M.
    vertices_on_plane = np.dot(X, Vt.T).T[:(nleaves-1)].T
    return vertices_on_plane
Exemplo n.º 22
0
def main(args):
    # do some validation
    if args.nframes < 2:
        raise ValueError('nframes should be at least 2')
    # define the requested physical size of the images (in pixels)
    physical_size = (args.physical_width, args.physical_height)
    # build the newick tree from the string
    tree = NewickIO.parse(args.tree, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    # Get ordered ids with the leaves first,
    # and get the corresponding distance matrix.
    ordered_ids = get_ordered_ids(tree)
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    index_edges = get_index_edges(tree, ordered_ids)
    # Create the reference points
    # so that the video frames are not reflected arbitrarily.
    reference_points = Euclid.edm_to_points(D).T[:2].T
    # create the animation frames and write them as image files
    pbar = Progress.Bar(args.nframes)
    for frame_index in range(args.nframes):
        linear_progress = frame_index / float(args.nframes - 1)
        if args.interpolation == 'sigmoid':
            progress = sigmoid(linear_progress)
        else:
            progress = linear_progress
        mass_vector = get_mass_vector(nvertices, nleaves, progress)
        points = get_canonical_2d_mds(D, mass_vector, reference_points)
        image_string = get_animation_frame(
                args.image_format, physical_size, args.scale,
                mass_vector, index_edges, points)
        image_filename = 'frame-%04d.%s' % (frame_index, args.image_format)
        image_pathname = os.path.join(args.output_directory, image_filename)
        with open(image_pathname, 'wb') as fout:
            fout.write(image_string)
        pbar.update(frame_index+1)
    pbar.finish()
Exemplo n.º 23
0
def main(args):
    # do some validation
    if args.nframes < 2:
        raise ValueError('nframes should be at least 2')
    # define the requested physical size of the images (in pixels)
    physical_size = (args.physical_width, args.physical_height)
    # build the newick tree from the string
    tree = NewickIO.parse(args.tree, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    # Get ordered ids with the leaves first,
    # and get the corresponding distance matrix.
    ordered_ids = get_ordered_ids(tree)
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    index_edges = get_index_edges(tree, ordered_ids)
    # Create the reference points
    # so that the video frames are not reflected arbitrarily.
    reference_points = Euclid.edm_to_points(D).T[:2].T
    # create the animation frames and write them as image files
    pbar = Progress.Bar(args.nframes)
    for frame_index in range(args.nframes):
        linear_progress = frame_index / float(args.nframes - 1)
        if args.interpolation == 'sigmoid':
            progress = sigmoid(linear_progress)
        else:
            progress = linear_progress
        mass_vector = get_mass_vector(nvertices, nleaves, progress)
        points = get_canonical_2d_mds(D, mass_vector, reference_points)
        image_string = get_animation_frame(args.image_format, physical_size,
                                           args.scale, mass_vector,
                                           index_edges, points)
        image_filename = 'frame-%04d.%s' % (frame_index, args.image_format)
        image_pathname = os.path.join(args.output_directory, image_filename)
        with open(image_pathname, 'wb') as fout:
            fout.write(image_string)
        pbar.update(frame_index + 1)
    pbar.finish()
Exemplo n.º 24
0
def process(ntaxa, nseconds):
    """
    @param nseconds: allow this many seconds to run or None to run forever
    @return: a multi-line string that summarizes the results
    """
    start_time = time.time()
    nsamples_rejected = 0
    nsamples_accepted = 0
    pattern_to_topo_surrogate = {}
    pattern_to_tree_string = {}
    counterexample_message = 'no counterexample was found'
    try:
        while True:
            elapsed_time = time.time() - start_time
            if nseconds and elapsed_time > nseconds:
                break
            # sample an xtree topology
            xtree = TreeSampler.sample_agglomerated_tree(ntaxa)
            # convert the xtree to a FelTree, although I guess this might not be necessary
            tree_string = xtree.get_newick_string()
            tree = NewickIO.parse(tree_string, FelTree.NewickTree)
            # get ordered ids and the number of leaves and some auxiliary variables
            ordered_ids = get_ordered_ids(tree)
            nleaves = len(list(tree.gen_tips()))
            id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids))
            # force every branch length to be the unit length
            reset_branch_lengths(tree)
            # get the unweighted distance matrix among tips in convenient hashable form
            D_unit = np.array(tree.get_partial_distance_matrix(ordered_ids))
            topo_surrogate = tuple(tuple(row.tolist()) for row in D_unit)
            # sample random branch lengths
            sample_branch_lengths(tree)
            # get the weighted tree string
            weighted_tree_string = NewickIO.get_newick_string(tree)
            # get the distance matrix relating the leaves
            D = np.array(tree.get_partial_distance_matrix(ordered_ids))
            # get the projections onto the MDS axes of the leaves
            X = Euclid.edm_to_points(D)
            # if any coordinate is near zero then reject the sample
            if np.min(np.abs(X)) < g_epsilon:
                nsamples_rejected += 1
                continue
            # do an orthogonal transformation that puts the first point in the positive orthant
            canonizing_vector = np.array(point_to_orthant(X[0]))
            X *= canonizing_vector
            # get the canonical sign pattern
            sign_pattern = tuple(point_to_orthant(row) for row in X)
            # compare the topo surrogate of this sign pattern to the one in memory
            expected_topo_surrogate = pattern_to_topo_surrogate.get(sign_pattern, None)
            if expected_topo_surrogate:
                if topo_surrogate != expected_topo_surrogate:
                    remembered_tree_string = pattern_to_tree_string[sign_pattern]
                    msg = 'these trees have the same sign pattern but different topologies: {%s, %s}' % (weighted_tree_string, remembered_tree_string)
                    raise CounterexampleError(msg)
            else:
                pattern_to_topo_surrogate[sign_pattern] = topo_surrogate
                pattern_to_tree_string[sign_pattern] = weighted_tree_string
            # increment the count of accepted samples
            nsamples_accepted += 1
    except KeyboardInterrupt, e:
        pass
Exemplo n.º 25
0
def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    ninternal = nvertices - nleaves
    # get ordered ids with the internal nodes first
    ordered_ids = get_ordered_ids(tree)
    leaf_ids = [id(node) for node in tree.gen_tips()]
    # get the distance matrix and the augmented distance matrix
    D_leaf = np.array(tree.get_partial_distance_matrix(leaf_ids))
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # analyze the leaf distance matrix
    X_leaf = Euclid.edm_to_points(D_leaf)
    w_leaf, v_leaf = EigUtil.eigh(Euclid.edm_to_dccov(D_leaf))
    V_leaf = np.array(v_leaf).T
    # explicitly compute the limiting points as the number of dups increases
    X = Euclid.edm_to_points(D)
    X -= np.mean(X[-nleaves:], axis=0)
    XL = X[-nleaves:]
    U, s, Vt = np.linalg.svd(XL)
    Z = np.dot(X, Vt.T)
    # hack the Z matrix to show the leaf-related eigenvectors
    Z = Z.T[: nleaves - 1].T
    WY = Z / np.sqrt(w_leaf[:-1])
    # compute a product using the first few rows of WY
    W = WY[:ninternal]
    M_alpha = get_alpha_multiplier(D, nleaves)
    MW_alpha = np.dot(M_alpha, W)
    # compute a product using the first few rows of WY
    M_beta = get_beta_multiplier(D, nleaves)
    MY_beta = np.dot(M_beta, V_leaf)
    # report the results
    np.set_printoptions(linewidth=300, threshold=10000)
    out = StringIO()
    print >> out, "leaf distance matrix:"
    print >> out, D_leaf
    print >> out
    print >> out, "eigenvalues derived from the leaf distance matrix"
    print >> out, w_leaf
    print >> out
    print >> out, "corresponding eigenvectors (as columns)"
    print >> out, V_leaf
    print >> out
    print >> out, "candidates for [W' Y']':"
    print >> out, WY
    print >> out
    print >> out, "candidates for W:"
    print >> out, W
    print >> out
    print >> out, "left multiplier of W:"
    print >> out, M_alpha
    print >> out
    print >> out, "each column is a (left multiplier, W) product:"
    print >> out, MW_alpha
    print >> out
    print >> out, "left multiplier of Y:"
    print >> out, M_beta
    print >> out
    print >> out, "each column is a (left multiplier, Y) product:"
    print >> out, MY_beta
    print >> out
    print >> out, "the above matrix divided by 2*eigenvalue:"
    print >> out, MY_beta / (2 * np.array(w_leaf))
    print >> out
    return out.getvalue()
Exemplo n.º 26
0
def process(ntaxa):
    np.set_printoptions(linewidth=200)
    out = StringIO()
    # sample an xtree topology
    xtree = TreeSampler.sample_agglomerated_tree(ntaxa)
    # sample an xtree with exponentially distributed branch lengths
    mu = 2.0
    for branch in xtree.get_branches():
        branch.length = random.expovariate(1 / mu)
    # convert the xtree to a FelTree so we can use the internal vertices
    tree_string = xtree.get_newick_string()
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    # get ordered ids and the number of leaves and some auxiliary variables
    ordered_ids = get_ordered_ids(tree)
    nleaves = len(list(tree.gen_tips()))
    id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids))
    # get the distance matrix relating all of the points
    D_full = np.array(tree.get_full_distance_matrix(ordered_ids))
    # Now do the projection so that
    # the resulting points are in the subspace whose basis vectors are the axes of the leaf ellipsoid.
    # First get the points such that the n rows in X are points in n-1 dimensional space.
    X = Euclid.edm_to_points(D_full)
    print >> out, 'points with centroid at origin:'
    print >> out, X
    print >> out
    # Translate all of the points so that the origin is at the centroid of the leaves.
    X -= np.mean(X[:nleaves], 0)
    print >> out, 'points with centroid of leaves at origin:'
    print >> out, X
    print >> out
    # Extract the subset of points that define the leaves.
    L = X[:nleaves]
    # Find the orthogonal transformation of the leaves onto their MDS axes.
    # According to the python svd documentation, singular values are sorted most important to least important.
    U, s, Vt = np.linalg.svd(L)
    # Transform all of the points (including the internal vertices) according to this orthogonal transformation.
    # The axes are now the axes of the Steiner circumscribed ellipsoid of the leaf vertices.
    # I am using M.T[:k].T to get the first k columns of M.
    Z = np.dot(X, Vt.T)
    print >> out, 'orthogonally transformed points (call this Z):'
    print >> out, Z
    print >> out
    Y = Z.T[:(nleaves - 1)].T
    print >> out, 'projection of the points onto the axes of the leaf ellipsoid,'
    print >> out, '(these are the first columns of Z; call this projected matrix Y):'
    print >> out, Y
    print >> out
    # Show the inner products.
    inner_products_of_columns = np.dot(Y.T, Y)
    print >> out, "pairwise inner products of the columns of Y (that is, Y'Y)"
    print >> out, inner_products_of_columns
    print >> out
    # Show other inner products.
    inner_products_of_columns = np.dot(Y[:5].T, Y[:5])
    print >> out, "pairwise inner products of the first few columns of Y"
    print >> out, inner_products_of_columns
    print >> out
    # Extract the subset of points that define the points of articulation.
    # Note that the origin is the centroid of the leaves.
    R = X[nleaves:]
    Y_leaves = Y[:nleaves]
    W = np.dot(np.linalg.pinv(L), Y_leaves)
    print >> out, 'leaf projection using pseudoinverse (first few rows of Y):'
    print >> out, np.dot(L, W)
    print >> out
    print >> out, 'projection of points of articulation using pseudoinverse (remaining rows of Y):'
    print >> out, np.dot(R, W)
    print >> out
    # Get all of the points in high dimensional space.
    X = Euclid.edm_to_points(D_full)
    # Get the MDS onto the lower dimensional space.
    X = X.T[:(nleaves - 1)].T
    assert np.allclose(sum(X, 0), 0)
    print >> out, 'all points projected onto the first principal axes of the full ellipsoid:'
    print >> out, X
    print >> out
    # Look at only the leaves in this space.
    L = X[:nleaves]
    L -= np.mean(L, 0)
    print >> out, 'leaves projected onto the first principal axes of the full ellipsoid and then centered:'
    print >> out, L
    print >> out
    # Re-project the leaves onto the axes of leaf ellipsoid.
    D_leaves = Euclid.dccov_to_edm(np.dot(L, L.T))
    Y = Euclid.edm_to_points(D_leaves)
    print >> out, 'leaves further projected onto principal axes of their own ellipsoid:'
    print >> out, Y
    print >> out
    # Try something else
    D_all = Euclid.dccov_to_edm(np.dot(X, X.T))
    Y = Euclid.edm_to_points(D_all).T[:(nleaves - 1)].T
    print >> out, 'all points further projected onto their own principal axes of inertia:'
    print >> out, Y
    print >> out
    # Try the same thing some more
    D_again = Euclid.dccov_to_edm(np.dot(Y, Y.T))
    Z = Euclid.edm_to_points(D_again).T[:(nleaves - 1)].T
    print >> out, 'all points further projected onto their own principal axes of inertia (second iteration):'
    print >> out, Z
    print >> out
    return out.getvalue().strip()
Exemplo n.º 27
0
def process(ntaxa):
    np.set_printoptions(linewidth=200)
    out = StringIO()
    # sample an xtree topology
    xtree = TreeSampler.sample_agglomerated_tree(ntaxa)
    # sample an xtree with exponentially distributed branch lengths
    mu = 2.0
    for branch in xtree.get_branches():
        branch.length = random.expovariate(1/mu)
    # convert the xtree to a FelTree so we can use the internal vertices
    tree_string = xtree.get_newick_string()
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    # get ordered ids and the number of leaves and some auxiliary variables
    ordered_ids = get_ordered_ids(tree)
    nleaves = len(list(tree.gen_tips()))
    id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids))
    # get the distance matrix relating all of the points
    D_full = np.array(tree.get_full_distance_matrix(ordered_ids))
    # Now do the projection so that
    # the resulting points are in the subspace whose basis vectors are the axes of the leaf ellipsoid.
    # First get the points such that the n rows in X are points in n-1 dimensional space.
    X = Euclid.edm_to_points(D_full)
    print >> out, 'points with centroid at origin:'
    print >> out, X
    print >> out
    # Translate all of the points so that the origin is at the centroid of the leaves.
    X -= np.mean(X[:nleaves], 0)
    print >> out, 'points with centroid of leaves at origin:'
    print >> out, X
    print >> out
    # Extract the subset of points that define the leaves.
    L = X[:nleaves]
    # Find the orthogonal transformation of the leaves onto their MDS axes.
    # According to the python svd documentation, singular values are sorted most important to least important.
    U, s, Vt = np.linalg.svd(L)
    # Transform all of the points (including the internal vertices) according to this orthogonal transformation.
    # The axes are now the axes of the Steiner circumscribed ellipsoid of the leaf vertices.
    # I am using M.T[:k].T to get the first k columns of M.
    Z = np.dot(X, Vt.T)
    print >> out, 'orthogonally transformed points (call this Z):'
    print >> out, Z
    print >> out
    Y = Z.T[:(nleaves-1)].T
    print >> out, 'projection of the points onto the axes of the leaf ellipsoid,'
    print >> out, '(these are the first columns of Z; call this projected matrix Y):'
    print >> out, Y
    print >> out
    # Show the inner products.
    inner_products_of_columns = np.dot(Y.T, Y)
    print >> out, "pairwise inner products of the columns of Y (that is, Y'Y)"
    print >> out, inner_products_of_columns
    print >> out
    # Show other inner products.
    inner_products_of_columns = np.dot(Y[:5].T, Y[:5])
    print >> out, "pairwise inner products of the first few columns of Y"
    print >> out, inner_products_of_columns
    print >> out
    # Extract the subset of points that define the points of articulation.
    # Note that the origin is the centroid of the leaves.
    R = X[nleaves:]
    Y_leaves = Y[:nleaves]
    W = np.dot(np.linalg.pinv(L), Y_leaves)
    print >> out, 'leaf projection using pseudoinverse (first few rows of Y):'
    print >> out, np.dot(L, W)
    print >> out
    print >> out, 'projection of points of articulation using pseudoinverse (remaining rows of Y):'
    print >> out, np.dot(R, W)
    print >> out
    # Get all of the points in high dimensional space.
    X = Euclid.edm_to_points(D_full)
    # Get the MDS onto the lower dimensional space.
    X = X.T[:(nleaves-1)].T
    assert np.allclose(sum(X, 0), 0)
    print >> out, 'all points projected onto the first principal axes of the full ellipsoid:'
    print >> out, X
    print >> out
    # Look at only the leaves in this space.
    L = X[:nleaves]
    L -= np.mean(L, 0)
    print >> out, 'leaves projected onto the first principal axes of the full ellipsoid and then centered:'
    print >> out, L
    print >> out
    # Re-project the leaves onto the axes of leaf ellipsoid.
    D_leaves = Euclid.dccov_to_edm(np.dot(L, L.T))
    Y = Euclid.edm_to_points(D_leaves)
    print >> out, 'leaves further projected onto principal axes of their own ellipsoid:'
    print >> out, Y
    print >> out
    # Try something else
    D_all = Euclid.dccov_to_edm(np.dot(X, X.T))
    Y = Euclid.edm_to_points(D_all).T[:(nleaves-1)].T
    print >> out, 'all points further projected onto their own principal axes of inertia:'
    print >> out, Y
    print >> out
    # Try the same thing some more
    D_again = Euclid.dccov_to_edm(np.dot(Y, Y.T))
    Z = Euclid.edm_to_points(D_again).T[:(nleaves-1)].T
    print >> out, 'all points further projected onto their own principal axes of inertia (second iteration):'
    print >> out, Z
    print >> out
    return out.getvalue().strip()
Exemplo n.º 28
0
def process(ntaxa, nseconds):
    """
    @param nseconds: allow this many seconds to run or None to run forever
    @return: a multi-line string that summarizes the results
    """
    start_time = time.time()
    nsamples_rejected = 0
    nsamples_accepted = 0
    pattern_to_topo_surrogate = {}
    pattern_to_tree_string = {}
    counterexample_message = 'no counterexample was found'
    try:
        while True:
            elapsed_time = time.time() - start_time
            if nseconds and elapsed_time > nseconds:
                break
            # sample an xtree topology
            xtree = TreeSampler.sample_agglomerated_tree(ntaxa)
            # convert the xtree to a FelTree, although I guess this might not be necessary
            tree_string = xtree.get_newick_string()
            tree = NewickIO.parse(tree_string, FelTree.NewickTree)
            # get ordered ids and the number of leaves and some auxiliary variables
            ordered_ids = get_ordered_ids(tree)
            nleaves = len(list(tree.gen_tips()))
            id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids))
            # force every branch length to be the unit length
            reset_branch_lengths(tree)
            # get the unweighted distance matrix among tips in convenient hashable form
            D_unit = np.array(tree.get_partial_distance_matrix(ordered_ids))
            topo_surrogate = tuple(tuple(row.tolist()) for row in D_unit)
            # sample random branch lengths
            sample_branch_lengths(tree)
            # get the weighted tree string
            weighted_tree_string = NewickIO.get_newick_string(tree)
            # get the distance matrix relating the leaves
            D = np.array(tree.get_partial_distance_matrix(ordered_ids))
            # get the projections onto the MDS axes of the leaves
            X = Euclid.edm_to_points(D)
            # if any coordinate is near zero then reject the sample
            if np.min(np.abs(X)) < g_epsilon:
                nsamples_rejected += 1
                continue
            # do an orthogonal transformation that puts the first point in the positive orthant
            canonizing_vector = np.array(point_to_orthant(X[0]))
            X *= canonizing_vector
            # get the canonical sign pattern
            sign_pattern = tuple(point_to_orthant(row) for row in X)
            # compare the topo surrogate of this sign pattern to the one in memory
            expected_topo_surrogate = pattern_to_topo_surrogate.get(
                sign_pattern, None)
            if expected_topo_surrogate:
                if topo_surrogate != expected_topo_surrogate:
                    remembered_tree_string = pattern_to_tree_string[
                        sign_pattern]
                    msg = 'these trees have the same sign pattern but different topologies: {%s, %s}' % (
                        weighted_tree_string, remembered_tree_string)
                    raise CounterexampleError(msg)
            else:
                pattern_to_topo_surrogate[sign_pattern] = topo_surrogate
                pattern_to_tree_string[sign_pattern] = weighted_tree_string
            # increment the count of accepted samples
            nsamples_accepted += 1
    except KeyboardInterrupt, e:
        pass
Exemplo n.º 29
0
def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    ninternal = nvertices - nleaves
    # get ordered ids with the internal nodes first
    ordered_ids = get_ordered_ids(tree)
    leaf_ids = [id(node) for node in tree.gen_tips()]
    # get the distance matrix and the augmented distance matrix
    D_leaf = np.array(tree.get_partial_distance_matrix(leaf_ids))
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # analyze the leaf distance matrix
    X_leaf = Euclid.edm_to_points(D_leaf)
    w_leaf, v_leaf = EigUtil.eigh(Euclid.edm_to_dccov(D_leaf))
    V_leaf = np.array(v_leaf).T
    # explicitly compute the limiting points as the number of dups increases
    X = Euclid.edm_to_points(D)
    X -= np.mean(X[-nleaves:], axis=0)
    XL = X[-nleaves:]
    U, s, Vt = np.linalg.svd(XL)
    Z = np.dot(X, Vt.T)
    # hack the Z matrix to show the leaf-related eigenvectors
    Z = Z.T[:nleaves - 1].T
    WY = Z / np.sqrt(w_leaf[:-1])
    # compute a product using the first few rows of WY
    W = WY[:ninternal]
    M_alpha = get_alpha_multiplier(D, nleaves)
    MW_alpha = np.dot(M_alpha, W)
    # compute a product using the first few rows of WY
    M_beta = get_beta_multiplier(D, nleaves)
    MY_beta = np.dot(M_beta, V_leaf)
    # report the results
    np.set_printoptions(linewidth=300, threshold=10000)
    out = StringIO()
    print >> out, 'leaf distance matrix:'
    print >> out, D_leaf
    print >> out
    print >> out, 'eigenvalues derived from the leaf distance matrix'
    print >> out, w_leaf
    print >> out
    print >> out, 'corresponding eigenvectors (as columns)'
    print >> out, V_leaf
    print >> out
    print >> out, "candidates for [W' Y']':"
    print >> out, WY
    print >> out
    print >> out, 'candidates for W:'
    print >> out, W
    print >> out
    print >> out, 'left multiplier of W:'
    print >> out, M_alpha
    print >> out
    print >> out, 'each column is a (left multiplier, W) product:'
    print >> out, MW_alpha
    print >> out
    print >> out, 'left multiplier of Y:'
    print >> out, M_beta
    print >> out
    print >> out, 'each column is a (left multiplier, Y) product:'
    print >> out, MY_beta
    print >> out
    print >> out, 'the above matrix divided by 2*eigenvalue:'
    print >> out, MY_beta / (2 * np.array(w_leaf))
    print >> out
    return out.getvalue()
Exemplo n.º 30
0
def process():
    """
    @return: a multi-line string that summarizes the results
    """
    np.set_printoptions(linewidth=200)
    out = StringIO()
    # define some distance matrices
    D_leaves = Euclid.g_D_b
    D_all = Euclid.g_D_c
    nvertices = 6
    nleaves = 4
    # define mass vectors
    m_degenerate = np.array([0.25, 0.25, 0.25, 0.25, 0, 0])
    m_interesting = np.array([.2, .2, .2, .2, .1, .1])
    m_uniform = np.ones(nvertices) / float(nvertices)
    # augment a distance matrix by adding leaflets
    D_augmented = add_leaflets(D_all, nleaves)
    # create the projection of points
    X_projected = do_projection(D_all, nleaves)
    # show some of the distance matrices
    print >> out, 'pairwise distances among vertices in the original tree:'
    print >> out, D_all
    print >> out, 'pairwise distance matrix augmented with one leaflet per leaf:'
    print >> out, D_augmented
    # get the distance matrices corresponding to the cases in the docstring
    print >> out, 'case 1: embedding of all vertices:'
    print >> out, Euclid.edm_to_points(D_all)
    print >> out, 'case 2: embedding of leaves and leaflets from the leaflet-augmented distance matrix:'
    print >> out, Euclid.edm_to_points(D_augmented)
    print >> out, 'case 3: projection of all vertices onto the MDS space of the leaves:'
    print >> out, X_projected
    # another embedding
    print >> out, 'embedding of leaves from the leaf distance matrix:'
    print >> out, Euclid.edm_to_points(D_leaves)
    # show embeddings of a tree augmented with leaflets
    print >> out, 'first few coordinates of the original vertices of the embedded tree with lots of leaflets per leaf:'
    D_super_augmented = D_all.copy()
    for i in range(20):
        D_super_augmented = add_leaflets(D_super_augmented, nleaves)
    X_super = Euclid.edm_to_points(D_super_augmented)
    X_super_block_small = X_super[:6].T[:3].T
    print >> out, X_super_block_small
    print >> out, 'ratio of coordinates of projected points to coordinates of this block of the embedding of the augmented tree:'
    print >> out, X_projected / X_super_block_small
    # test
    Z = Euclid.edm_to_weighted_points(D_all, m_uniform)
    print >> out, 'generalized case 1:'
    print >> out, Z
    # test
    Z = Euclid.edm_to_weighted_points(D_all, m_interesting)
    print >> out, 'generalized case 2:'
    print >> out, Z
    # test
    Z = Euclid.edm_to_weighted_points(D_all, m_degenerate)
    print >> out, 'generalized case 3:'
    print >> out, Z
    # test
    Z = get_weighted_embedding_b(D_all, m_uniform)
    print >> out, 'eric formula case 1:'
    print >> out, Z
    # test
    Z = get_weighted_embedding_b(D_all, m_interesting)
    print >> out, 'eric formula case 2:'
    print >> out, Z
    # test
    Z = get_weighted_embedding_b(D_all, m_degenerate)
    print >> out, 'eric formula case 3:'
    print >> out, Z
    # test stuff
    print >> out, 'testing random stuff:'
    D = D_all
    m = m_degenerate
    nvertices = len(m)
    sqrtm = np.sqrt(m)
    M = np.diag(sqrtm)
    cross_product_matrix = Euclid.edm_to_weighted_cross_product(D, m)
    U_cross, S_cross, VT_cross = np.linalg.svd(cross_product_matrix, full_matrices=False)
    Q = np.dot(M, np.dot(cross_product_matrix, M.T))
    U, B, VT = np.linalg.svd(Q, full_matrices=False)
    S = np.sqrt(np.diag(B))
    US = np.dot(U, S)
    M_pinv = np.linalg.pinv(M)
    M_pinv_narrow = M_pinv.T[:-2].T
    US_short = US[:-2]
    print >> out, 'eigenvalues of the abdi cross product:', S_cross
    print >> out, 'eigenvalues of the eric cross product:', B
    print >> out, M_pinv
    print >> out, US
    print >> out, M_pinv_narrow
    print >> out, US_short
    Z = np.dot(M_pinv_narrow, US_short)
    print >> out, Z
    # return the response
    return out.getvalue().strip()