Exemplo n.º 1
0
 def get_harmonically_extended_MDS(self):
     Lbb = Ftree.TB_to_L_block(self.T, self.B, self.internal, self.internal)
     Lba = Ftree.TB_to_L_block(self.T, self.B, self.internal, self.leaves)
     L_schur = Ftree.TB_to_L_schur(self.T, self.B, self.leaves)
     W, V = scipy.linalg.eigh(L_schur, eigvals=(1, 2))
     V = V * np.reciprocal(np.sqrt(W))
     V = self._reflected_to_reference(V)
     Y = -np.dot(np.dot(np.linalg.pinv(Lbb), Lba), V)
     return np.vstack([V, Y])
Exemplo n.º 2
0
def get_response_content(fs):
    # read the trees
    T_true, B_true, N_true = FtreeIO.newick_to_TBN(fs.true_tree)
    T_test, B_test, N_test = FtreeIO.newick_to_TBN(fs.test_tree)
    # we are concerned about the names of the leaves of the two trees
    true_leaves = Ftree.T_to_leaves(T_true)
    test_leaves = Ftree.T_to_leaves(T_test)
    true_leaf_to_n = dict((v, N_true[v]) for v in true_leaves)
    test_leaf_to_n = dict((v, N_test[v]) for v in test_leaves)
    # check that all leaves are named
    if len(true_leaves) != len(true_leaf_to_n):
        raise ValueError(
                'all leaves in the leaf MDS tree should be named')
    if len(test_leaves) != len(test_leaf_to_n):
        raise ValueError(
                'all leaves in the harmonic extension tree should be named')
    # check that within each tree all leaves are uniquely named
    if len(set(true_leaf_to_n.values())) != len(true_leaves):
        raise ValueError(
                'all leaf names in the leaf MDS tree should be unique')
    if len(set(test_leaf_to_n.values())) != len(test_leaves):
        raise ValueError(
                'all leaf names in the harmonic extension tree '
                'should be unique')
    # check that the leaf name sets are the same
    if set(true_leaf_to_n.values()) != set(test_leaf_to_n.values()):
        raise ValueError(
                'the two trees should have corresponding leaf names')
    # invert the leaf name maps
    true_n_to_leaf = dict((n, v) for v, n in true_leaf_to_n.items())
    test_n_to_leaf = dict((n, v) for v, n in test_leaf_to_n.items())
    # get correspondingly ordered leaf sequences
    leaf_names = true_leaf_to_n.values()
    true_leaves_reordered = [true_n_to_leaf[n] for n in leaf_names]
    test_leaves_reordered = [test_n_to_leaf[n] for n in leaf_names]
    # get the Schur complement matrix for the leaves
    L_schur_true = Ftree.TB_to_L_schur(T_true, B_true, true_leaves_reordered)
    # get the MDS points
    w, V = scipy.linalg.eigh(L_schur_true, eigvals=(1, 2))
    X = np.dot(V, np.diag(np.reciprocal(np.sqrt(w))))
    # get the linear operator that defines the harmonic extension
    test_internal = Ftree.T_to_internal_vertices(T_test)
    L22 = Ftree.TB_to_L_block(T_test, B_test,
            test_internal, test_internal)
    L21 = Ftree.TB_to_L_block(T_test, B_test,
            test_internal, test_leaves_reordered)
    M = -np.dot(np.linalg.pinv(L22), L21)
    # get the harmonic extension
    X_extension = np.dot(M, X)
    X_extended = np.vstack([X, X_extension])
    # draw the image
    v_to_index = Ftree.invseq(test_leaves_reordered + test_internal)
    physical_size = (640, 480)
    ext = Form.g_imageformat_to_ext[fs.imageformat]
    return get_animation_frame(ext, physical_size, fs.scale,
            v_to_index, T_test, X_extended)
Exemplo n.º 3
0
def get_response_content(fs):
    # read the tree
    T, B, N = FtreeIO.newick_to_TBN(fs.tree)
    leaves = Ftree.T_to_leaves(T)
    internal = Ftree.T_to_internal_vertices(T)
    # get the distinguished vertex of articulation
    r = get_unique_vertex(N, fs.vertex)
    if r not in internal:
        raise ValueError(
                'the distinguished vertex should have degree at least two')
    # Partition the leaves with respect to the given root.
    # Each set of leaves will eventually define a connected component.
    R = Ftree.T_to_R_specific(T, r)
    v_to_sinks = Ftree.R_to_v_to_sinks(R)
    # break some edges
    R_pruned = set(R)
    neighbors = Ftree.T_to_v_to_neighbors(T)[r]
    for adj in neighbors:
        R_pruned.remove((r, adj))
    T_pruned = Ftree.R_to_T(R_pruned)
    # get the leaf partition
    ordered_leaves = []
    leaf_lists = []
    for adj in neighbors:
        R_subtree = Ftree.T_to_R_specific(T_pruned, adj)
        C = sorted(b for a, b in R_subtree if b not in v_to_sinks)
        ordered_leaves.extend(C)
        leaf_lists.append(C)
    # define the vertices to keep and those to remove
    keepers = ordered_leaves + [r]
    # get the schur complement
    L_schur = Ftree.TB_to_L_schur(T, B, keepers)
    # get principal submatrices of the schur complement
    principal_matrices = []
    accum = 0
    for component_leaves in leaf_lists:
        n = len(component_leaves)
        M = L_schur[accum:accum+n, accum:accum+n]
        principal_matrices.append(M)
        accum += n
    # write the report
    out = StringIO()
    print >> out, 'algebraic connectivity:'
    print >> out, get_algebraic_connectivity(T, B, leaves)
    print >> out
    print >> out
    print >> out, 'perron values:'
    print >> out
    for M, leaf_list in zip(principal_matrices, leaf_lists):
        value = scipy.linalg.eigh(M, eigvals_only=True)[0]
        name_list = [N[v] for v in leaf_list]
        print >> out, name_list
        print >> out, value
        print >> out
    return out.getvalue()
Exemplo n.º 4
0
def get_harmonically_extended_MDS(T, B, leaves, internal):
    """
    Use harmonically extended 2D MDS.
    """
    Lbb = Ftree.TB_to_L_block(T, B, internal, internal)
    Lba = Ftree.TB_to_L_block(T, B, internal, leaves)
    L_schur = Ftree.TB_to_L_schur(T, B, leaves)
    W, V = scipy.linalg.eigh(L_schur, eigvals=(1, 2))
    V = V * np.reciprocal(np.sqrt(W))
    Y = -np.dot(np.dot(np.linalg.pinv(Lbb), Lba), V)
    return np.vstack([V, Y])
Exemplo n.º 5
0
def get_gap(blen, T, B, u_edge):
    """
    This function will be minimized.
    @param blen: proposed branch length
    @param T: topology
    @param B: branch lengths
    @param u_edge: undirected edge
    """
    if blen <= 0:
        return 1
    leaves = Ftree.T_to_leaves(T)
    B[u_edge] = blen
    L_schur = Ftree.TB_to_L_schur(T, B, leaves)
    ev1, ev2 = scipy.linalg.eigh(L_schur, eigvals_only=True, eigvals=(1, 2))
    gap = abs(ev1 - ev2)
    return gap
Exemplo n.º 6
0
 def get_v_to_point(self):
     """
     This uses the harmonic extension.
     Also it uses the reference MDS for reflection.
     @return: a map from vertex to point
     """
     Lbb = Ftree.TB_to_L_block(self.T, self.B, self.internal, self.internal)
     Lba = Ftree.TB_to_L_block(self.T, self.B, self.internal, self.leaves)
     L_schur = Ftree.TB_to_L_schur(self.T, self.B, self.leaves)
     W, V = scipy.linalg.eigh(L_schur, eigvals=(1, 2))
     V = V * np.reciprocal(np.sqrt(W))
     V = self._reflected_to_reference(V)
     Y = -np.dot(np.dot(np.linalg.pinv(Lbb), Lba), V)
     MDS = np.vstack([V, Y])
     vertices = self.leaves + self.internal
     return dict((vertices[i], tuple(pt)) for i, pt in enumerate(MDS))
Exemplo n.º 7
0
def TB_to_harmonic_valuations(T, B):
    """
    @param T: topology
    @param B: branch lengths
    @return: a number of dictionaries equal to the number of leaves
    """
    leaves = Ftree.T_to_leaves(T)
    internal = Ftree.T_to_internal_vertices(T)
    vertices = leaves + internal
    nleaves = len(leaves)
    Lbb = Ftree.TB_to_L_block(T, B, internal, internal)
    Lba = Ftree.TB_to_L_block(T, B, internal, leaves)
    L_schur = Ftree.TB_to_L_schur(T, B, leaves)
    w, v1 = scipy.linalg.eigh(L_schur)
    v2 = -np.dot(np.dot(np.linalg.pinv(Lbb), Lba), v1)
    V = np.vstack([v1, v2])
    vs = []
    for col in range(nleaves):
        d = dict((v, V[row, col]) for row, v in enumerate(vertices))
        vs.append(d)
    return vs
Exemplo n.º 8
0
def get_leaf_distn_schur(R, B):
    """
    This is a possibly equivalent formulation.
    It is based on removing all internal vertices except the root
    by Schur complement.
    """
    # Get the vertex order.
    # This order is different from the acl order.
    T = Ftree.R_to_T(R)
    r = Ftree.R_to_root(R)
    leaves = Ftree.T_to_leaves(T)
    non_r_internal = [v for v in Ftree.T_to_internal_vertices(T) if v != r]
    vertices = leaves + [r] + non_r_internal
    # Get the combinatorial Laplacian matrix
    # and Schur complement out all of the non-root internal vertices.
    L_schur = Ftree.TB_to_L_schur(T, B, leaves + [r])
    # Get the vector of negative weights between the root and the leaves.
    w_unnormalized = L_schur[-1, :-1]
    # Get the normalized weight vector
    w = w_unnormalized / w_unnormalized.sum()
    return dict((v, w[i]) for i, v in enumerate(leaves))
Exemplo n.º 9
0
 def __call__(self, X):
     """
     First few entries of X are logs of branch lengths.
     Next few entries are vr1 entries.
     Next few entries are vr2 entries.
     @param X: a 1D numpy array of floats
     """
     # unpack the parameter array
     B, Vr = self.X_to_B_Vr(X)
     L, V = self.X_to_L_V(X)
     # get the error matrix
     E = np.dot(L, V) - self.C
     # compute the squared frobenius norm of the error
     frob_norm_err = np.sum(E * E)
     # use a hack to make sure we are really using the first two eigenvalues
     L_schur = Ftree.TB_to_L_schur(self.T_test, B, self.leaves)
     w_observed = scipy.linalg.eigvalsh(L_schur, eigvals=(1, 2))
     w_error = w_observed - self.w
     eigenvalue_err = np.sum(w_error * w_error)
     # return the total error
     return frob_norm_err + eigenvalue_err
Exemplo n.º 10
0
def get_response_content(fs):
    # read the user input
    weight_delta_mu = fs.weight_delta_mu
    T, B, N = FtreeIO.newick_to_TBN(fs.newick)
    # summarize the tree
    leaves = Ftree.T_to_leaves(T)
    internal = Ftree.T_to_internal_vertices(T)
    vertices = leaves + internal
    nleaves = len(leaves)
    # define the fully connected schur complement graph as a Laplacian matrix
    # init the tree reconstruction state
    v_to_name = {}
    for v in leaves:
        name = N.get(v, None)
        if name is None:
            name = 'P' + chr(ord('a') + v)
        v_to_name[v] = name
    v_to_svs = dict((v, set([0])) for v in leaves)
    sv_to_vs = {0: set(leaves)}
    # define edge weights (used only for spectral split strategy)
    G = Ftree.TB_to_L_schur(T, B, leaves)
    # add some random amount to each edge weight
    for i in range(nleaves):
        for j in range(i):
            rate = 1 / fs.weight_delta_mu
            x = random.expovariate(rate)
            G[i, j] -= x
            G[j, i] -= x
            G[i, i] += x
            G[j, j] += x
    edge_to_weight = {}
    for index_pair in itertools.combinations(range(nleaves), 2):
        i, j = index_pair
        leaf_pair = (leaves[i], leaves[j])
        edge_to_weight[frozenset(leaf_pair)] = -G[index_pair]
    # define pairwise distances (used only for nj split strategy)
    D = Ftree.TB_to_D(T, B, leaves)
    edge_to_distance = {}
    for index_pair in itertools.combinations(range(nleaves), 2):
        i, j = index_pair
        leaf_pair = (leaves[i], leaves[j])
        edge_to_distance[frozenset(leaf_pair)] = D[index_pair]
    # pairs like (-(number of vertices in supervertex sv), supervertex sv)
    active_svs = set([0])
    # initialize the sources of unique vertex and supervertex identifiers
    v_gen = itertools.count(max(leaves) + 1)
    sv_gen = itertools.count(1)
    # write the output
    out = StringIO()
    print >> out, '<html>'
    print >> out, '<body>'
    for count_pos in itertools.count(1):
        # add the graph rendering before the decomposition at this stage
        if fs.nj_split:
            edge_to_branch_weight = {}
            for k, v in edge_to_distance.items():
                edge_to_branch_weight[k] = 1 / v
        elif fs.spectral_split:
            edge_to_branch_weight = edge_to_weight
        print >> out, '<div>'
        if fs.vis_star:
            print >> out, nhj.get_svg_star_components(active_svs, sv_to_vs,
                                                      v_to_name, v_to_svs,
                                                      edge_to_branch_weight)
        elif fs.vis_complete:
            print >> out, nhj.get_svg(active_svs, sv_to_vs, v_to_name,
                                      v_to_svs, edge_to_branch_weight)
        print >> out, '</div>'
        # update the splits
        next_active_svs = set()
        # svs can be decomposed independently in arbitrary order
        alpha_index_gen = itertools.count()
        for sv in active_svs:
            nstates = len(sv_to_vs[sv])
            if nstates > 2:
                v_new = next(v_gen)
                sv_new_a = next(sv_gen)
                sv_new_b = next(sv_gen)
                alpha_index = next(alpha_index_gen)
                alpha = chr(ord('a') + alpha_index)
                v_to_name[v_new] = 'R%s%s' % (count_pos, alpha)
                next_active_svs.add(sv_new_a)
                next_active_svs.add(sv_new_b)
                if fs.spectral_split:
                    if len(sv_to_vs[sv]) == 3:
                        sv_new_c = next(sv_gen)
                        nhj.delta_wye_transform(sv, v_to_svs, sv_to_vs,
                                                edge_to_weight, v_new,
                                                sv_new_a, sv_new_b, sv_new_c)
                        next_active_svs.add(sv_new_c)
                    else:
                        nhj.harmonic_split_transform(sv, v_to_svs, sv_to_vs,
                                                     edge_to_weight, v_new,
                                                     sv_new_a, sv_new_b)
                elif fs.nj_split:
                    sv_new_big = next(sv_gen)
                    nhj.nj_split_transform(sv, v_to_svs, sv_to_vs,
                                           edge_to_distance, v_new, sv_new_big,
                                           sv_new_a, sv_new_b)
                    next_active_svs.add(sv_new_big)
            elif nstates == 2:
                next_active_svs.add(sv)
            else:
                raise ValueError('supervertex has too few vertices')
        # if the set of active svs has not changed then we are done
        if active_svs == next_active_svs:
            break
        else:
            active_svs = next_active_svs
    print >> out, '</html>'
    print >> out, '</body>'
    return out.getvalue()
Exemplo n.º 11
0
def get_response_content(fs):
    T, B, N = FtreeIO.newick_to_TBN(fs.tree)
    leaves = Ftree.T_to_leaves(T)
    L_schur = Ftree.TB_to_L_schur(T, B, leaves)
    mu = scipy.linalg.eigh(L_schur, eigvals_only=True)[1]
    return str(mu)
Exemplo n.º 12
0
def get_response_content(fs):
    # read the tree
    R, B, N = FtreeIO.newick_to_RBN(fs.tree)
    r = Ftree.R_to_root(R)
    T = Ftree.R_to_T(R)
    leaves = Ftree.T_to_leaves(T)
    internal_not_r = [v for v in Ftree.T_to_internal_vertices(T) if v is not r]
    # define the lists of leaves induced by the root
    vertex_partition = sorted(Ftree.R_to_vertex_partition(R))
    vertex_lists = [sorted(p) for p in vertex_partition]
    leaf_set = set(leaves)
    leaf_lists = [sorted(s & leaf_set) for s in vertex_partition]
    # order the list of leaves in a nice block form
    leaves = [v for lst in leaf_lists for v in lst]
    # remove internal vertices by Schur complementation
    L_schur_rooted = Ftree.TB_to_L_schur(T, B, leaves + [r])
    L_schur_full = Ftree.TB_to_L_schur(T, B, leaves)
    # show the matrix
    np.set_printoptions(linewidth=132)
    out = StringIO()
    # show the rooted schur complement
    w, v = scipy.linalg.eigh(L_schur_rooted)
    print >> out, 'rooted Schur complement:'
    print >> out, L_schur_rooted
    print >> out, 'Felsenstein weights at the root:'
    print >> out, -L_schur_rooted[-1][:-1] / L_schur_rooted[-1, -1]
    print >> out, 'rooted Schur complement eigendecomposition:'
    print >> out, w
    print >> out, v
    print >> out
    # show the full schur complement
    w, v = scipy.linalg.eigh(L_schur_full)
    print >> out, 'full Schur complement:'
    print >> out, L_schur_full
    print >> out, 'full Schur complement eigendecomposition:'
    print >> out, w
    print >> out, v
    print >> out
    # analyze perron components
    print >> out, 'perron components:'
    print >> out
    start = 0
    for lst in leaf_lists:
        n = len(lst)
        C = L_schur_rooted[start:start + n, start:start + n]
        print >> out, 'C:'
        print >> out, C
        w_eff = np.sum(C)
        b_eff = 1 / w_eff
        print >> out, 'effective conductance:'
        print >> out, w_eff
        print >> out, 'effective branch length (or resistance or variance):'
        print >> out, b_eff
        S = np.linalg.pinv(C)
        print >> out, 'C^-1 (rooted covariance-like):'
        print >> out, S
        w, v = scipy.linalg.eigh(S)
        print >> out, 'rooted covariance-like eigendecomposition:'
        print >> out, w
        print >> out, v
        print >> out, 'perron value:'
        print >> out, w[-1]
        print >> out, 'reciprocal of perron value:'
        print >> out, 1 / w[-1]
        print >> out
        start += n
    print >> out
    # analyze subtrees
    print >> out, 'subtree Laplacian analysis:'
    print >> out
    start = 0
    for lst in vertex_lists:
        n = len(lst)
        C = Ftree.TB_to_L_schur(T, B, lst + [r])
        w, v = scipy.linalg.eigh(C)
        print >> out, 'subtree Laplacian:'
        print >> out, C
        print >> out, 'eigendecomposition:'
        print >> out, w
        print >> out, v
        print >> out
        start += n
    # analyze subtrees
    print >> out, 'full Schur complement subtree analysis:'
    print >> out
    start = 0
    for lst in leaf_lists:
        n = len(lst)
        C = Ftree.TB_to_L_schur(T, B, lst + [r])
        w, v = scipy.linalg.eigh(C)
        print >> out, 'full Schur complement in subtree:'
        print >> out, C
        print >> out, 'eigendecomposition:'
        print >> out, w
        print >> out, v
        print >> out
        start += n
    return out.getvalue()
Exemplo n.º 13
0
def get_algebraic_connectivity(T, B, leaves):
    L_schur = Ftree.TB_to_L_schur(T, B, leaves)
    w = scipy.linalg.eigh(L_schur, eigvals_only=True)
    return w[1]
Exemplo n.º 14
0
def get_response_content(fs):
    nseconds_limit = 5.0
    R_true, B_true = FtreeIO.newick_to_RB(fs.true_tree, int)
    R_test = FtreeIO.newick_to_R(fs.test_tree, int)
    # get the unrooted tree topology
    T_true = Ftree.R_to_T(R_true)
    T_test = Ftree.R_to_T(R_test)
    # check the trees for vertex compatibility
    if set(Ftree.T_to_order(T_true)) != set(Ftree.T_to_order(T_test)):
        raise ValueError('vertex sets are not equal')
    if set(Ftree.T_to_leaves(T_true)) != set(Ftree.T_to_leaves(T_test)):
        raise ValueError('leaf vertex sets are not equal')
    if set(Ftree.T_to_internal_vertices(T_true)) != set(
            Ftree.T_to_internal_vertices(T_test)):
        raise ValueError('internal vertex sets are not equal')
    # get the 2D MDS for the true tree
    leaves = Ftree.T_to_leaves(T_true)
    internal = Ftree.T_to_internal_vertices(T_true)
    vertices = leaves + internal
    L_schur = Ftree.TB_to_L_schur(T_true, B_true, leaves)
    w_all, Vp_all = scipy.linalg.eigh(L_schur)
    w, Vp = w_all[1:3], Vp_all[:, 1:3]
    # make the constant matrix for Frobenius norm comparison
    C = np.zeros((len(vertices), 2))
    C[:len(leaves)] = w * Vp
    # keep doing iterations until we run out of time
    mymax = 256
    t_initial = time.time()
    while time.time() - t_initial < nseconds_limit / 2:
        mymax *= 2
        f = Functor(T_test.copy(), Vp.copy(), C.copy(), w.copy())
        initial_guess = np.ones(len(T_test) + 2 * len(internal))
        results = scipy.optimize.fmin(f,
                                      initial_guess,
                                      ftol=1e-8,
                                      xtol=1e-8,
                                      full_output=True,
                                      maxfun=mymax,
                                      maxiter=mymax)
        xopt, fopt, itr, funcalls, warnflag = results
    # look at the values from the longest running iteration
    B, Vr = f.X_to_B_Vr(xopt)
    L, V = f.X_to_L_V(xopt)
    Lrr = Ftree.TB_to_L_block(T_test, B, internal, internal)
    Lrp = Ftree.TB_to_L_block(T_test, B, internal, leaves)
    H_ext = -np.dot(np.linalg.pinv(Lrr), Lrp)
    N = dict((v, str(v)) for v in vertices)
    # start writing the response
    out = StringIO()
    print >> out, 'xopt:', xopt
    print >> out, 'fopt:', fopt
    print >> out, 'number of iterations:', itr
    print >> out, 'number of function calls:', funcalls
    print >> out, 'warning flags:', warnflag
    print >> out, 'first four eigenvalues:', w_all[:4]
    print >> out, 'Vr:'
    print >> out, Vr
    print >> out, '-Lrr^-1 Lrp Vp:'
    print >> out, np.dot(H_ext, Vp)
    print >> out, C
    print >> out, np.dot(L, V)
    print >> out, FtreeIO.RBN_to_newick(R_test, B, N)
    return out.getvalue()