def get_response_content(fs): # define the requested physical size of the images (in pixels) physical_size = (640, 480) # get the directed edges and the branch lengths and vertex names R, B, N = FtreeIO.newick_to_RBN(fs.tree_string) # get the requested undirected edge edge = get_edge(R, N, fs.branch_name) # get the undirected tree topology T = Ftree.R_to_T(R) # get the leaves and the vertices of articulation leaves = Ftree.T_to_leaves(T) internal = Ftree.T_to_internal_vertices(T) vertices = leaves + internal nleaves = len(leaves) v_to_index = Ftree.invseq(vertices) # get the requested indices x_index = fs.x_axis - 1 y_index = fs.y_axis - 1 if x_index >= nleaves - 1 or y_index >= nleaves - 1: raise ValueError( 'projection indices must be smaller than the number of leaves') # adjust the branch length initial_length = B[edge] t = sigmoid(fs.frame_progress) B[edge] = (1 - t) * initial_length + t * fs.final_length # get the points w, v = Ftree.TB_to_harmonic_extension(T, B, leaves, internal) X_full = np.dot(v, np.diag(np.reciprocal(np.sqrt(w)))) X = np.vstack([X_full[:, x_index], X_full[:, y_index]]).T # draw the image ext = Form.g_imageformat_to_ext[fs.imageformat] return get_animation_frame(ext, physical_size, fs.scale, v_to_index, T, X, w)
def test_leaf_distn_a(self): # Read the example tree. example_tree = '(a:2, (b:1, c:1, d:1, e:1)x:1)y;' R, B, N = FtreeIO.newick_to_RBN(example_tree) T = Ftree.R_to_T(R) r = Ftree.R_to_root(R) # Get the leaf distribution associated with the root. internal_to_leaf_distn = get_internal_vertex_to_leaf_distn(T, B) r_to_leaf_distn = internal_to_leaf_distn[r] leaves = Ftree.T_to_leaves(T) observed_name_weight_pairs = [ (N[v], r_to_leaf_distn[v]) for v in leaves] # Set up the expectation for the test. n = 5.0 expected_name_weight_pairs = [] expected_first_value = n / (3*n - 2) expected_non_first_value = 2 / (3*n - 2) expected_name_weight_pairs.append(('a', expected_first_value)) for name in list('bcde'): expected_name_weight_pairs.append((name, expected_non_first_value)) # Do the comparison for testing. expected_d = dict(expected_name_weight_pairs) observed_d = dict(observed_name_weight_pairs) for v in leaves: name = N[v] expected_value = expected_d[name] observed_value = observed_d[name] self.assertTrue(np.allclose(expected_value, observed_value))
def get_leaf_distn_acl(R, B): """ This is a possibly equivalent formulation. It is based on Felsenstein weights. """ # Get the vertex order. T = Ftree.R_to_T(R) r = Ftree.R_to_root(R) leaves = Ftree.T_to_leaves(T) non_r_internal = [v for v in Ftree.T_to_internal_vertices(T) if v != r] vertices = leaves + non_r_internal + [r] # Get the pseudoinverse of the Laplacian. # This is also the doubly centered covariance matrix. L = Ftree.TB_to_L_principal(T, B, vertices) HSH = np.linalg.pinv(L) # Decenter the covariance matrix using the root. # This should give the rooted covariance matrix # which is M in the appendix of Weights for Data Related by a Tree # by Altschul, Carroll, and Lipman, 1989. e = np.ones_like(HSH[-1]) J = np.ones_like(HSH) M = HSH - np.outer(e, HSH[-1]) - np.outer(HSH[-1], e) + HSH[-1,-1]*J # Pick out the part corresponding to leaves. nleaves = len(leaves) S = M[:nleaves, :nleaves] S_pinv = np.linalg.pinv(S) # Normalized row or column sums of inverse of M gives the leaf distribution. w = S_pinv.sum(axis=0) / S_pinv.sum() return dict((v, w[i]) for i, v in enumerate(leaves))
def newick_to_TN(s): """ Everything to do with branch lengths is ignored. @param s: newick string @return: undirected topology, vertex name map """ tree = NewickIO.parse_simple(s, _IO_Tree()) return Ftree.R_to_T(tree.R), tree.v_to_name
def test_topo_b_from_newick(self): s = '((1:1, 2:0.5)6:1, (3:0.33333333333, 4:0.5)7:1, 5:1)8;' observed_T, observed_B = newick_to_TB(s, int) expected_T = Ftree.R_to_T(set([ (8,7), (8,6), (8,5), (7,4), (7,3), (6,2), (6,1)])) self.assertEqual(observed_T, expected_T) observed_leaves = Ftree.T_to_leaves(observed_T) expected_leaves = [1, 2, 3, 4, 5] self.assertEqual(observed_leaves, expected_leaves)
def newick_to_TBN(s): """ @param s: newick string @return: undirected topology, branch lengths, vertex name map """ tree = NewickIO.parse_simple(s, _IO_Tree()) T = Ftree.R_to_T(tree.R) Ftree.TB_assert_branch_lengths(T, tree.B) return T, tree.B, tree.v_to_name
def get_response_content(fs): # read the tree T, B, N = FtreeIO.newick_to_TBN(fs.tree) leaves = Ftree.T_to_leaves(T) internal = Ftree.T_to_internal_vertices(T) # get the distinguished vertex of articulation r = get_unique_vertex(N, fs.vertex) if r not in internal: raise ValueError( 'the distinguished vertex should have degree at least two') # Partition the leaves with respect to the given root. # Each set of leaves will eventually define a connected component. R = Ftree.T_to_R_specific(T, r) v_to_sinks = Ftree.R_to_v_to_sinks(R) # break some edges R_pruned = set(R) neighbors = Ftree.T_to_v_to_neighbors(T)[r] for adj in neighbors: R_pruned.remove((r, adj)) T_pruned = Ftree.R_to_T(R_pruned) # get the leaf partition ordered_leaves = [] leaf_lists = [] for adj in neighbors: R_subtree = Ftree.T_to_R_specific(T_pruned, adj) C = sorted(b for a, b in R_subtree if b not in v_to_sinks) ordered_leaves.extend(C) leaf_lists.append(C) # define the vertices to keep and those to remove keepers = ordered_leaves + [r] # get the schur complement L_schur = Ftree.TB_to_L_schur(T, B, keepers) # get principal submatrices of the schur complement principal_matrices = [] accum = 0 for component_leaves in leaf_lists: n = len(component_leaves) M = L_schur[accum:accum+n, accum:accum+n] principal_matrices.append(M) accum += n # write the report out = StringIO() print >> out, 'algebraic connectivity:' print >> out, get_algebraic_connectivity(T, B, leaves) print >> out print >> out print >> out, 'perron values:' print >> out for M, leaf_list in zip(principal_matrices, leaf_lists): value = scipy.linalg.eigh(M, eigvals_only=True)[0] name_list = [N[v] for v in leaf_list] print >> out, name_list print >> out, value print >> out return out.getvalue()
def main(args): # do some validation if args.nframes < 2: raise ValueError('nframes should be at least 2') # define the requested physical size of the images (in pixels) physical_size = (args.physical_width, args.physical_height) # get the directed edges and the branch lengths and vertex names R, B, N = FtreeIO.newick_to_RBN(args.tree) # get the requested undirected edge edge = get_edge(R, N, args.branch_name) initial_length = B[edge] # get the undirected tree topology T = Ftree.R_to_T(R) # get the leaves and the vertices of articulation leaves = Ftree.T_to_leaves(T) internal = Ftree.T_to_internal_vertices(T) vertices = leaves + internal nleaves = len(leaves) v_to_index = Ftree.invseq(vertices) # get the requested indices x_index = args.x_axis - 1 y_index = args.y_axis - 1 if x_index >= nleaves - 1 or y_index >= nleaves - 1: raise ValueError( 'projection indices must be smaller than the number of leaves') X_prev = None # create the animation frames and write them as image files pbar = Progress.Bar(args.nframes) for frame_index in range(args.nframes): linear_progress = frame_index / float(args.nframes - 1) if args.interpolation == 'sigmoid': t = sigmoid(linear_progress) else: t = linear_progress B[edge] = (1 - t) * initial_length + t * args.final_length w, v = Ftree.TB_to_harmonic_extension(T, B, leaves, internal) X_full = np.dot(v, np.diag(np.reciprocal(np.sqrt(w)))) X = np.vstack([X_full[:, x_index], X_full[:, y_index]]).T if X_prev is not None: X = reflect_to_match(X, X_prev) X_prev = X image_string = get_animation_frame(args.image_format, physical_size, args.scale, v_to_index, T, X, w) image_filename = 'frame-%04d.%s' % (frame_index, args.image_format) image_pathname = os.path.join(args.output_directory, image_filename) with open(image_pathname, 'wb') as fout: fout.write(image_string) pbar.update(frame_index + 1) pbar.finish()
def test_leaf_distn_schur(self): # Read the example tree. example_tree = LeafWeights.g_acl_tree R, B, N = FtreeIO.newick_to_RBN(example_tree) T = Ftree.R_to_T(R) r = Ftree.R_to_root(R) # Get the leaf distribution associated with the root. leaf_distn = get_leaf_distn_schur(R, B) leaves = Ftree.T_to_leaves(T) observed_name_weight_pairs = [ (N[v], leaf_distn[v]) for v in leaves] # Do the comparison for testing. observed_name_to_weight = dict(observed_name_weight_pairs) for name in LeafWeights.g_acl_ordered_names: s_expected = LeafWeights.g_acl_expected_weights[name] s_observed = '%.3f' % observed_name_to_weight[name] self.assertEqual(s_expected, s_observed)
def get_leaf_distn_schur(R, B): """ This is a possibly equivalent formulation. It is based on removing all internal vertices except the root by Schur complement. """ # Get the vertex order. # This order is different from the acl order. T = Ftree.R_to_T(R) r = Ftree.R_to_root(R) leaves = Ftree.T_to_leaves(T) non_r_internal = [v for v in Ftree.T_to_internal_vertices(T) if v != r] vertices = leaves + [r] + non_r_internal # Get the combinatorial Laplacian matrix # and Schur complement out all of the non-root internal vertices. L_schur = Ftree.TB_to_L_schur(T, B, leaves + [r]) # Get the vector of negative weights between the root and the leaves. w_unnormalized = L_schur[-1, :-1] # Get the normalized weight vector w = w_unnormalized / w_unnormalized.sum() return dict((v, w[i]) for i, v in enumerate(leaves))
def get_response_content(fs): # read the tree R, B, N = FtreeIO.newick_to_RBN(fs.tree) r = Ftree.R_to_root(R) T = Ftree.R_to_T(R) leaves = Ftree.T_to_leaves(T) internal_not_r = [v for v in Ftree.T_to_internal_vertices(T) if v is not r] # define the lists of leaves induced by the root vertex_partition = sorted(Ftree.R_to_vertex_partition(R)) vertex_lists = [sorted(p) for p in vertex_partition] leaf_set = set(leaves) leaf_lists = [sorted(s & leaf_set) for s in vertex_partition] # order the list of leaves in a nice block form leaves = [v for lst in leaf_lists for v in lst] # remove internal vertices by Schur complementation L_schur_rooted = Ftree.TB_to_L_schur(T, B, leaves + [r]) L_schur_full = Ftree.TB_to_L_schur(T, B, leaves) # show the matrix np.set_printoptions(linewidth=132) out = StringIO() # show the rooted schur complement w, v = scipy.linalg.eigh(L_schur_rooted) print >> out, 'rooted Schur complement:' print >> out, L_schur_rooted print >> out, 'Felsenstein weights at the root:' print >> out, -L_schur_rooted[-1][:-1] / L_schur_rooted[-1, -1] print >> out, 'rooted Schur complement eigendecomposition:' print >> out, w print >> out, v print >> out # show the full schur complement w, v = scipy.linalg.eigh(L_schur_full) print >> out, 'full Schur complement:' print >> out, L_schur_full print >> out, 'full Schur complement eigendecomposition:' print >> out, w print >> out, v print >> out # analyze perron components print >> out, 'perron components:' print >> out start = 0 for lst in leaf_lists: n = len(lst) C = L_schur_rooted[start:start + n, start:start + n] print >> out, 'C:' print >> out, C w_eff = np.sum(C) b_eff = 1 / w_eff print >> out, 'effective conductance:' print >> out, w_eff print >> out, 'effective branch length (or resistance or variance):' print >> out, b_eff S = np.linalg.pinv(C) print >> out, 'C^-1 (rooted covariance-like):' print >> out, S w, v = scipy.linalg.eigh(S) print >> out, 'rooted covariance-like eigendecomposition:' print >> out, w print >> out, v print >> out, 'perron value:' print >> out, w[-1] print >> out, 'reciprocal of perron value:' print >> out, 1 / w[-1] print >> out start += n print >> out # analyze subtrees print >> out, 'subtree Laplacian analysis:' print >> out start = 0 for lst in vertex_lists: n = len(lst) C = Ftree.TB_to_L_schur(T, B, lst + [r]) w, v = scipy.linalg.eigh(C) print >> out, 'subtree Laplacian:' print >> out, C print >> out, 'eigendecomposition:' print >> out, w print >> out, v print >> out start += n # analyze subtrees print >> out, 'full Schur complement subtree analysis:' print >> out start = 0 for lst in leaf_lists: n = len(lst) C = Ftree.TB_to_L_schur(T, B, lst + [r]) w, v = scipy.linalg.eigh(C) print >> out, 'full Schur complement in subtree:' print >> out, C print >> out, 'eigendecomposition:' print >> out, w print >> out, v print >> out start += n return out.getvalue()
def set_root(self, v): """ This is slow, probably as a result of the design. """ self.R = Ftree.T_to_R_specific(Ftree.R_to_T(self.R), v) self.v_to_source = Ftree.R_to_v_to_source(self.R)
def get_response_content(fs): nseconds_limit = 5.0 R_true, B_true = FtreeIO.newick_to_RB(fs.true_tree, int) R_test = FtreeIO.newick_to_R(fs.test_tree, int) # get the unrooted tree topology T_true = Ftree.R_to_T(R_true) T_test = Ftree.R_to_T(R_test) # check the trees for vertex compatibility if set(Ftree.T_to_order(T_true)) != set(Ftree.T_to_order(T_test)): raise ValueError('vertex sets are not equal') if set(Ftree.T_to_leaves(T_true)) != set(Ftree.T_to_leaves(T_test)): raise ValueError('leaf vertex sets are not equal') if set(Ftree.T_to_internal_vertices(T_true)) != set( Ftree.T_to_internal_vertices(T_test)): raise ValueError('internal vertex sets are not equal') # get the 2D MDS for the true tree leaves = Ftree.T_to_leaves(T_true) internal = Ftree.T_to_internal_vertices(T_true) vertices = leaves + internal L_schur = Ftree.TB_to_L_schur(T_true, B_true, leaves) w_all, Vp_all = scipy.linalg.eigh(L_schur) w, Vp = w_all[1:3], Vp_all[:, 1:3] # make the constant matrix for Frobenius norm comparison C = np.zeros((len(vertices), 2)) C[:len(leaves)] = w * Vp # keep doing iterations until we run out of time mymax = 256 t_initial = time.time() while time.time() - t_initial < nseconds_limit / 2: mymax *= 2 f = Functor(T_test.copy(), Vp.copy(), C.copy(), w.copy()) initial_guess = np.ones(len(T_test) + 2 * len(internal)) results = scipy.optimize.fmin(f, initial_guess, ftol=1e-8, xtol=1e-8, full_output=True, maxfun=mymax, maxiter=mymax) xopt, fopt, itr, funcalls, warnflag = results # look at the values from the longest running iteration B, Vr = f.X_to_B_Vr(xopt) L, V = f.X_to_L_V(xopt) Lrr = Ftree.TB_to_L_block(T_test, B, internal, internal) Lrp = Ftree.TB_to_L_block(T_test, B, internal, leaves) H_ext = -np.dot(np.linalg.pinv(Lrr), Lrp) N = dict((v, str(v)) for v in vertices) # start writing the response out = StringIO() print >> out, 'xopt:', xopt print >> out, 'fopt:', fopt print >> out, 'number of iterations:', itr print >> out, 'number of function calls:', funcalls print >> out, 'warning flags:', warnflag print >> out, 'first four eigenvalues:', w_all[:4] print >> out, 'Vr:' print >> out, Vr print >> out, '-Lrr^-1 Lrp Vp:' print >> out, np.dot(H_ext, Vp) print >> out, C print >> out, np.dot(L, V) print >> out, FtreeIO.RBN_to_newick(R_test, B, N) return out.getvalue()