def test_leaf_distn_a(self): # Read the example tree. example_tree = '(a:2, (b:1, c:1, d:1, e:1)x:1)y;' R, B, N = FtreeIO.newick_to_RBN(example_tree) T = Ftree.R_to_T(R) r = Ftree.R_to_root(R) # Get the leaf distribution associated with the root. internal_to_leaf_distn = get_internal_vertex_to_leaf_distn(T, B) r_to_leaf_distn = internal_to_leaf_distn[r] leaves = Ftree.T_to_leaves(T) observed_name_weight_pairs = [ (N[v], r_to_leaf_distn[v]) for v in leaves] # Set up the expectation for the test. n = 5.0 expected_name_weight_pairs = [] expected_first_value = n / (3*n - 2) expected_non_first_value = 2 / (3*n - 2) expected_name_weight_pairs.append(('a', expected_first_value)) for name in list('bcde'): expected_name_weight_pairs.append((name, expected_non_first_value)) # Do the comparison for testing. expected_d = dict(expected_name_weight_pairs) observed_d = dict(observed_name_weight_pairs) for v in leaves: name = N[v] expected_value = expected_d[name] observed_value = observed_d[name] self.assertTrue(np.allclose(expected_value, observed_value))
def R_to_newick(R): """ @param R: a directed topology @return: a newick string """ r = Ftree.R_to_root(R) return _v_to_newick(Ftree.R_to_v_to_sinks(R), r) + ';'
def equal_daylight_layout(T, B, iteration_count): """ @param T: topology @param B: branch lengths """ R = Ftree.T_to_R_canonical(T) r = Ftree.R_to_root(R) # create the initial equal arc layout v_to_location = equal_arc_layout(T, B) # use sax-like events to create a parallel tree in the C extension v_to_sinks = Ftree.R_to_v_to_sinks(R) v_to_dtree_id = {} dtree = day.Day() count = _build_dtree( dtree, r, v_to_sinks, v_to_location, v_to_dtree_id, 0) # repeatedly reroot and equalize v_to_neighbors = Ftree.T_to_v_to_neighbors(T) for i in range(iteration_count): for v in Ftree.T_to_inside_out(T): neighbor_count = len(v_to_neighbors[v]) if neighbor_count > 2: dtree.select_node(v_to_dtree_id[v]) dtree.reroot() dtree.equalize() # extract the x and y coordinates from the dtree v_to_location = {} for v, dtree_id in v_to_dtree_id.items(): dtree.select_node(dtree_id) x = dtree.get_x() y = dtree.get_y() v_to_location[v] = (x, y) return v_to_location
def get_response_content(fs): # define the requested physical size of the images (in pixels) physical_size = (640, 480) # get the directed edges and the branch lengths and vertex names R, B, N = FtreeIO.newick_to_RBN(fs.tree_string) # get the requested undirected edge edge = get_edge(R, N, fs.branch_name) # get the undirected tree topology T = Ftree.R_to_T(R) # get the leaves and the vertices of articulation leaves = Ftree.T_to_leaves(T) internal = Ftree.T_to_internal_vertices(T) vertices = leaves + internal nleaves = len(leaves) v_to_index = Ftree.invseq(vertices) # get the requested indices x_index = fs.x_axis - 1 y_index = fs.y_axis - 1 if x_index >= nleaves - 1 or y_index >= nleaves - 1: raise ValueError( 'projection indices must be smaller than the number of leaves') # adjust the branch length initial_length = B[edge] t = sigmoid(fs.frame_progress) B[edge] = (1 - t) * initial_length + t * fs.final_length # get the points w, v = Ftree.TB_to_harmonic_extension(T, B, leaves, internal) X_full = np.dot(v, np.diag(np.reciprocal(np.sqrt(w)))) X = np.vstack([X_full[:, x_index], X_full[:, y_index]]).T # draw the image ext = Form.g_imageformat_to_ext[fs.imageformat] return get_animation_frame(ext, physical_size, fs.scale, v_to_index, T, X, w)
def get_leaf_distn_acl(R, B): """ This is a possibly equivalent formulation. It is based on Felsenstein weights. """ # Get the vertex order. T = Ftree.R_to_T(R) r = Ftree.R_to_root(R) leaves = Ftree.T_to_leaves(T) non_r_internal = [v for v in Ftree.T_to_internal_vertices(T) if v != r] vertices = leaves + non_r_internal + [r] # Get the pseudoinverse of the Laplacian. # This is also the doubly centered covariance matrix. L = Ftree.TB_to_L_principal(T, B, vertices) HSH = np.linalg.pinv(L) # Decenter the covariance matrix using the root. # This should give the rooted covariance matrix # which is M in the appendix of Weights for Data Related by a Tree # by Altschul, Carroll, and Lipman, 1989. e = np.ones_like(HSH[-1]) J = np.ones_like(HSH) M = HSH - np.outer(e, HSH[-1]) - np.outer(HSH[-1], e) + HSH[-1,-1]*J # Pick out the part corresponding to leaves. nleaves = len(leaves) S = M[:nleaves, :nleaves] S_pinv = np.linalg.pinv(S) # Normalized row or column sums of inverse of M gives the leaf distribution. w = S_pinv.sum(axis=0) / S_pinv.sum() return dict((v, w[i]) for i, v in enumerate(leaves))
def get_tikz_lines(newick, eigenvector_index, yaw, pitch): """ @param eigenvector_index: 1 is Fiedler """ tree = Newick.parse(newick, SpatialTree.SpatialTree) # change the node names and get the new tree string for node in tree.preorder(): node.name = 'n' + str(id(node)) newick = NewickIO.get_newick_string(tree) # do the layout layout = FastDaylightLayout.StraightBranchLayout() layout.do_layout(tree) tree.fit((g_xy_scale, g_xy_scale)) name_to_location = dict( (x.name, tree._layout_to_display(x.location)) for x in tree.preorder()) T, B, N = FtreeIO.newick_to_TBN(newick) # get some vertices leaves = Ftree.T_to_leaves(T) internal = Ftree.T_to_internal_vertices(T) vertices = leaves + internal # get the locations v_to_location = dict((v, name_to_location[N[v]]) for v in vertices) # get the valuations w, V = Ftree.TB_to_harmonic_extension(T, B, leaves, internal) index_to_val = V[:, eigenvector_index - 1] v_to_val = dict( (vertices[i], g_z_scale * val) for i, val in enumerate(index_to_val)) # get the coordinates v_to_xyz = get_v_to_xyz(yaw, v_to_location, v_to_val) # add intersection vertices add_intersection_vertices(T, B, v_to_xyz) intersection_vertices = sorted(v for v in v_to_xyz if v not in vertices) # get lines of the tikz file return xyz_to_tikz_lines(T, B, pitch, v_to_xyz, leaves, internal, intersection_vertices)
def sample_b_to_rate(R): """ The b in this function name means branch. @param R: directed topology @return: a sampled map from vertex to expected rate """ b_to_rate = {} v_to_source = Ftree.R_to_v_to_source(R) for v in Ftree.R_to_preorder(R): p = v_to_source.get(v, None) if p is None: continue # sample a coefficient regardless of whether we use it # this is an obsolete method #log_coeff = (random.random() - 0.5) * epsrate #coeff = math.exp(log_coeff) curr_branch = frozenset([v, p]) gp = v_to_source.get(p, None) if gp is None: parent_rate = 1.0 else: prev_branch = frozenset([p, gp]) parent_rate = b_to_rate[prev_branch] b_to_rate[curr_branch] = random.expovariate(1/parent_rate) return b_to_rate
def get_response_content(fs): """ @param fs: a FieldStorage object containing the cgi arguments @return: the response """ # get a properly formatted newick tree with branch lengths T, B, N = FtreeIO.newick_to_TBN(fs.tree) # get the vertex valuations reflect = False all_valuations = TB_to_harmonic_valuations(T, B, reflect) fiedler_valuations = all_valuations[1] # do the layout v_to_location = FtreeAux.equal_daylight_layout(T, B, 3) # get the vertex list and the initial vertex locations vertices = Ftree.T_to_leaves(T) + Ftree.T_to_internal_vertices(T) X_in = np.array([tuple(v_to_location[v]) for v in vertices]) # fit the tree to the physical size physical_size = (fs.width, fs.height) theta = layout.get_best_angle(X_in, physical_size) X = layout.rotate_2d_centroid(X_in, theta) sz = layout.get_axis_aligned_size(X) sf = layout.get_scaling_factor(sz, physical_size) X *= sf # get the map from id to location for the final tree layout v_to_location = dict((v, tuple(r)) for v, r in zip(vertices, X)) # draw the image context = TikzContext() draw_plain_branches_ftree(T, B, context, v_to_location) draw_ticks_ftree(T, B, context, fiedler_valuations, v_to_location) draw_labels_ftree(T, N, context, v_to_location) context.finish() # get the response tikzpicture = context.get_text() return tikz.get_response(tikzpicture, fs.tikzformat)
def equal_arc_layout(T, B): """ @param T: tree topology @param B: branch lengths @return: a map from vertex to location """ # arbitrarily root the tree R = Ftree.T_to_R_canonical(T) r = Ftree.R_to_root(R) # map vertices to subtree tip count v_to_sinks = Ftree.R_to_v_to_sinks(R) v_to_count = {} for v in Ftree.R_to_postorder(R): sinks = v_to_sinks.get(v, []) if sinks: v_to_count[v] = sum(v_to_count[sink] for sink in sinks) else: v_to_count[v] = 1 # create the equal arc angles v_to_theta = {} _force_equal_arcs( v_to_sinks, v_to_count, v_to_theta, r, -math.pi, math.pi) # convert angles to coordinates v_to_source = Ftree.R_to_v_to_source(R) v_to_location = {} _update_locations( R, B, v_to_source, v_to_sinks, v_to_theta, v_to_location, r, (0, 0), 0) return v_to_location
def test_topo_b_from_newick(self): s = '((1:1, 2:0.5)6:1, (3:0.33333333333, 4:0.5)7:1, 5:1)8;' observed_T, observed_B = newick_to_TB(s, int) expected_T = Ftree.R_to_T(set([ (8,7), (8,6), (8,5), (7,4), (7,3), (6,2), (6,1)])) self.assertEqual(observed_T, expected_T) observed_leaves = Ftree.T_to_leaves(observed_T) expected_leaves = [1, 2, 3, 4, 5] self.assertEqual(observed_leaves, expected_leaves)
def get_harmonically_extended_MDS(self): Lbb = Ftree.TB_to_L_block(self.T, self.B, self.internal, self.internal) Lba = Ftree.TB_to_L_block(self.T, self.B, self.internal, self.leaves) L_schur = Ftree.TB_to_L_schur(self.T, self.B, self.leaves) W, V = scipy.linalg.eigh(L_schur, eigvals=(1, 2)) V = V * np.reciprocal(np.sqrt(W)) V = self._reflected_to_reference(V) Y = -np.dot(np.dot(np.linalg.pinv(Lbb), Lba), V) return np.vstack([V, Y])
def newick_to_TBN(s): """ @param s: newick string @return: undirected topology, branch lengths, vertex name map """ tree = NewickIO.parse_simple(s, _IO_Tree()) T = Ftree.R_to_T(tree.R) Ftree.TB_assert_branch_lengths(T, tree.B) return T, tree.B, tree.v_to_name
def RB_to_newick(R, B): """ @param R: a directed topology @param B: branch lengths @return: a newick string """ r = Ftree.R_to_root(R) v_to_source = Ftree.R_to_v_to_source(R) v_to_sinks = Ftree.R_to_v_to_sinks(R) return _Bv_to_newick(v_to_source, v_to_sinks, B, r) + ';'
def get_harmonically_extended_MDS(T, B, leaves, internal): """ Use harmonically extended 2D MDS. """ Lbb = Ftree.TB_to_L_block(T, B, internal, internal) Lba = Ftree.TB_to_L_block(T, B, internal, leaves) L_schur = Ftree.TB_to_L_schur(T, B, leaves) W, V = scipy.linalg.eigh(L_schur, eigvals=(1, 2)) V = V * np.reciprocal(np.sqrt(W)) Y = -np.dot(np.dot(np.linalg.pinv(Lbb), Lba), V) return np.vstack([V, Y])
def RBN_to_newick(R, B, N): """ @param R: a directed topology @param B: branch lengths @param N: map from vertices to names @return: a newick string """ r = Ftree.R_to_root(R) v_to_source = Ftree.R_to_v_to_source(R) v_to_sinks = Ftree.R_to_v_to_sinks(R) return _BNv_to_newick(v_to_source, v_to_sinks, B, N, r) + ';'
def get_response_content(fs): # read the tree T, B, N = FtreeIO.newick_to_TBN(fs.tree) leaves = Ftree.T_to_leaves(T) internal = Ftree.T_to_internal_vertices(T) # get the valuations with harmonic extensions w, V = Ftree.TB_to_harmonic_extension(T, B, leaves, internal) # get the Fiedler valuations with harmonic extensions h = V[:, 0] # check for vertices with small valuations eps = 1e-8 if any(abs(x) < x for x in h): raise ValueError('the tree has no clear harmonic Fiedler point') # find the edge contining the harmonic Fiedler point v_to_val = dict((v, h[i]) for i, v in enumerate(leaves + internal)) d_edges = [(a, b) for a, b in T if v_to_val[a] * v_to_val[b] < 0] if len(d_edges) != 1: raise ValueError('expected the point to fall clearly on a single edge') d_edge = d_edges[0] a, b = d_edge # find the proportion along the directed edge t = v_to_val[a] / (v_to_val[a] - v_to_val[b]) # find the distance from the new root to each endpoint vertices u_edge = frozenset(d_edge) d = B[u_edge] da = t * d db = (1 - t) * d # create the new tree r = max(Ftree.T_to_order(T)) + 1 N[r] = fs.root_name T.remove(u_edge) del B[u_edge] ea = frozenset((r, a)) eb = frozenset((r, b)) T.add(ea) T.add(eb) B[ea] = da B[eb] = db # add a new leaf with arbitrary branch length leaf = r + 1 N[leaf] = fs.leaf_name u_edge = frozenset((r, leaf)) T.add(u_edge) B[u_edge] = 1.0 # get the best branch length to cause eigenvalue multiplicity blen = scipy.optimize.golden(get_gap, (T, B, u_edge), full_output=False, tol=1e-12) B[u_edge] = blen # return the string representation of the new tree R = Ftree.T_to_R_specific(T, r) return FtreeIO.RBN_to_newick(R, B, N)
def _get_v_to_point(self): # Get the leaf vertices and the internal vertices. leaves = Ftree.T_to_leaves(self.T) internal = Ftree.T_to_internal_vertices(self.T) vertices = leaves + internal # Get the harmonic extensions of eigenvectors of schur complement. w, v = Ftree.TB_to_harmonic_extension(self.T, self.B, leaves, internal) x_values = -v.T[0] y_values = -v.T[1] z_values = v.T[2] points = [np.array(xyz) for xyz in zip(x_values, y_values, z_values)] # get the vertex to point map return dict(zip(vertices, points))
def _get_v_to_point(self): # get the full tree laplacian matrix vertices = Ftree.T_to_order(self.T) L = Ftree.TB_to_L_principal(self.T, self.B, vertices) # get the eigendecomposition by increasing eigenvalue w, vt = scipy.linalg.eigh(L) # get the point valuations of interest x_values = vt.T[1] y_values = vt.T[2] z_values = vt.T[3] points = [np.array(xyz) for xyz in zip(x_values, y_values, z_values)] # get the vertex to point map return dict(zip(vertices, points))
def __init__(self, T, B): """ @param T: tree topology @param B: branch lengths """ self.T = T self.B = B # define a leaf and internal vertex order self.leaves = Ftree.T_to_leaves(self.T) self.internal = Ftree.T_to_internal_vertices(self.T) # compute the reference MDS for canonical reflection D = Ftree.TB_to_D(self.T, self.B, self.leaves) self.reference_MDS = self._D_to_MDS(D)
def T_to_edge_to_neighbor_edges(T): """ @param T: topology @return: map from undirected edge to list of undirected edges """ edge_to_neighbor_edges = defaultdict(list) v_to_neighbors = Ftree.T_to_v_to_neighbors(T) for edge in Ftree.T_to_edges(T): for source in edge: for sink in v_to_neighbors[source]: if sink not in edge: n_edge = frozenset((source, sink)) edge_to_neighbor_edges[edge].append(n_edge) return edge_to_neighbor_edges
def main(args): # do some validation if args.nframes < 2: raise ValueError('nframes should be at least 2') # define the requested physical size of the images (in pixels) physical_size = (args.physical_width, args.physical_height) # get the directed edges and the branch lengths and vertex names R, B, N = FtreeIO.newick_to_RBN(args.tree) # get the requested undirected edge edge = get_edge(R, N, args.branch_name) initial_length = B[edge] # get the undirected tree topology T = Ftree.R_to_T(R) # get the leaves and the vertices of articulation leaves = Ftree.T_to_leaves(T) internal = Ftree.T_to_internal_vertices(T) vertices = leaves + internal nleaves = len(leaves) v_to_index = Ftree.invseq(vertices) # get the requested indices x_index = args.x_axis - 1 y_index = args.y_axis - 1 if x_index >= nleaves - 1 or y_index >= nleaves - 1: raise ValueError( 'projection indices must be smaller than the number of leaves') X_prev = None # create the animation frames and write them as image files pbar = Progress.Bar(args.nframes) for frame_index in range(args.nframes): linear_progress = frame_index / float(args.nframes - 1) if args.interpolation == 'sigmoid': t = sigmoid(linear_progress) else: t = linear_progress B[edge] = (1 - t) * initial_length + t * args.final_length w, v = Ftree.TB_to_harmonic_extension(T, B, leaves, internal) X_full = np.dot(v, np.diag(np.reciprocal(np.sqrt(w)))) X = np.vstack([X_full[:, x_index], X_full[:, y_index]]).T if X_prev is not None: X = reflect_to_match(X, X_prev) X_prev = X image_string = get_animation_frame(args.image_format, physical_size, args.scale, v_to_index, T, X, w) image_filename = 'frame-%04d.%s' % (frame_index, args.image_format) image_pathname = os.path.join(args.output_directory, image_filename) with open(image_pathname, 'wb') as fout: fout.write(image_string) pbar.update(frame_index + 1) pbar.finish()
def get_internal_vertex_to_leaf_distn_cov(T, B): """ This is a possibly equivalent formualtion. It is based on Schur complementation in the unrooted covariance matrix. Return a map from an internal vertex to a leaf distribution. @return: a dictionary that maps an internal vertex to a leaf distribution """ leaves = Ftree.T_to_leaves(T) internal = Ftree.T_to_internal_vertices(T) vertices = leaves + internal # Get the full tree Laplacian matrix. L = Ftree.TB_to_L_principal(T, B, vertices) # Get the unrooted covariance matrix. HSH = np.linalg.pinv(L) # Use the multivariate normal distribution wikipedia page # for conditional distributions. nleaves = len(leaves) ninternal = len(internal) # # This interpolator works. #Lbb = L[nleaves:, nleaves:] #Lba = L[nleaves:, :nleaves] #interpolator = -ndot(np.linalg.pinv(Lbb), Lba) # # This interpolator seems like it should work but it does not. Saa = HSH[:nleaves, :nleaves] Sba = HSH[nleaves:, :nleaves] #print 'det(Saa)' #print np.linalg.det(Saa) interpolator = ndot(Sba, np.linalg.pinv(Saa)) # # Try a hack. #eps = 1e-12 #nvertices = len(vertices) #J = np.ones((nvertices, nvertices)) #Saa = (HSH + J)[:nleaves, :nleaves] #Sba = (HSH + J)[nleaves:, :nleaves] #interpolator = ndot(Sba, np.linalg.pinv(Saa)) # #print 'cov interpolator:' #print interpolator.shape #print interpolator d = {} for i, v in enumerate(internal): distn = {} for j, leaf in enumerate(leaves): distn[leaf] = interpolator[i, j] d[v] = distn return d
def RB_to_v_to_age(R, B): """ @param R: directed topology @param B: branch lengths in time units @return: map from vertex to age """ sources, sinks = zip(*R) leaves = set(sinks) - set(sources) v_to_age = dict((v, 0) for v in leaves) v_to_source = Ftree.R_to_v_to_source(R) for v in Ftree.R_to_postorder(R): p = v_to_source.get(v, None) if p is not None: v_to_age[p] = v_to_age[v] + B[frozenset([v, p])] return v_to_age
def get_response_content(fs): # read the tree T, B, N = FtreeIO.newick_to_TBN(fs.tree) leaves = Ftree.T_to_leaves(T) internal = Ftree.T_to_internal_vertices(T) # root arbitrarily R = Ftree.T_to_R_canonical(T) # init some sampling parameters nsamples = 1000 npillars = 10 # Init the accumulators. # Accumulate the sum of squares of differences # and the sum of differences. # The differences are from the leaf mean. dsum = defaultdict(float) dsumsq = defaultdict(float) # Repeatedly sample using Brownian motion on the tree. for i in range(nsamples): # Sample using Brownian motion at vertices on the tree. v_to_sample = sample_brownian_motion(R, B) # Compute the mean at the leaves. mu = sum(v_to_sample[v] for v in leaves) / len(leaves) # Accumulate difference moments at vertices of the tree. for v, x in v_to_sample.items(): dsum[(v, -1, -1)] += x - mu dsumsq[(v, -1, -1)] += (x - mu)**2 # Sample using Brownian bridge on edges. for d_edge in R: u_edge = frozenset(d_edge) va, vb = d_edge a = v_to_sample[va] b = v_to_sample[vb] samples = bridge(a, b, npillars, B[u_edge]) for i, x in enumerate(samples): dsum[(va, vb, i)] += x - mu dsumsq[(va, vb, i)] += (x - mu)**2 quad = min((val, va, vb, i) for (va, vb, i), val in dsumsq.items()) val, va, vb, i = quad # write the report out = StringIO() if i < 0: print >> out, 'min sum of squares was at vertex', N[va] else: print >> out, 'min sum of squares was at edge', print >> out, N[va], '--[', i, ']-->', N[vb] print >> out print >> out, 'the min sum of squares value was', val return out.getvalue()
def get_response_content(fs): """ @param fs: a FieldStorage object containing the cgi arguments @return: the response """ T, B, N = FtreeIO.newick_to_TBN(fs.tree_string) # sanitize taxon labels if requested if fs.sanitization: for v in N: N[v] = latexutil.sanitize(N[v]) # scale branch lengths so the diameter is 1 diameter = np.max(Ftree.TB_to_D(T, B, Ftree.T_to_leaves(T))) # scale the branch lengths for u_edge in T: B[u_edge] /= diameter info = FigureInfo(T, B, N, fs.label_mode) # get the texts tikz_bodies = [ info.get_tikz_tree(fs.tree_layout), info.get_tikz_MDS_full(), info.get_tikz_MDS_partial(), info.get_tikz_MDS_harmonic(), ] tikz_pictures = [] for b in tikz_bodies: tikzpicture = tikz.get_picture(b, 'auto', scale=fs.scaling_factor) tikz_pictures.append(tikzpicture) figure_body = '\n'.join([ '\\subfloat[]{', tikz_pictures[0], '}', '\\subfloat[]{', tikz_pictures[1], '} \\\\', '\\subfloat[]{', tikz_pictures[2], '}', '\\subfloat[]{', tikz_pictures[3], '}', ]) packages = ['tikz', 'subfig'] preamble = '' figure_caption = None figure_label = None return latexutil.get_centered_figure_response( figure_body, fs.latexformat, figure_caption, figure_label, packages, preamble)
def get_gap(blen, T, B, u_edge): """ This function will be minimized. @param blen: proposed branch length @param T: topology @param B: branch lengths @param u_edge: undirected edge """ if blen <= 0: return 1 leaves = Ftree.T_to_leaves(T) B[u_edge] = blen L_schur = Ftree.TB_to_L_schur(T, B, leaves) ev1, ev2 = scipy.linalg.eigh(L_schur, eigvals_only=True, eigvals=(1, 2)) gap = abs(ev1 - ev2) return gap
def get_response_content(fs): # define the requested physical size of the images (in pixels) physical_size = (640, 480) # get the directed edges and the branch lengths and vertex names R, B, N = FtreeIO.newick_to_RBN(fs.tree_string) # get the requested undirected edge edge = get_edge(R, N, fs.branch_name) # get the undirected tree topology T = Ftree.R_to_T(R) # get the leaves and the vertices of articulation leaves = Ftree.T_to_leaves(T) internal = Ftree.T_to_internal_vertices(T) vertices = leaves + internal nleaves = len(leaves) v_to_index = Ftree.invseq(vertices) # get the requested indices x_index = fs.x_axis - 1 y_index = fs.y_axis - 1 if x_index >= nleaves-1 or y_index >= nleaves-1: raise ValueError( 'projection indices must be smaller than the number of leaves') # adjust the branch length initial_length = B[edge] t = sigmoid(fs.frame_progress) B[edge] = (1-t)*initial_length + t*fs.final_length # get the points w, v = Ftree.TB_to_harmonic_extension(T, B, leaves, internal) X_full = np.dot(v, np.diag(np.reciprocal(np.sqrt(w)))) X = np.vstack([X_full[:,x_index], X_full[:,y_index]]).T # draw the image ext = Form.g_imageformat_to_ext[fs.imageformat] return get_animation_frame(ext, physical_size, fs.scale, v_to_index, T, X, w)
def T_to_newick(T): """ Get a newick string from an unweighted topology. @param T: topology @return: newick string """ return R_to_newick(Ftree.T_to_R_canonical(T))
def get_v_to_point(self): """ This uses the harmonic extension. Also it uses the reference MDS for reflection. @return: a map from vertex to point """ Lbb = Ftree.TB_to_L_block(self.T, self.B, self.internal, self.internal) Lba = Ftree.TB_to_L_block(self.T, self.B, self.internal, self.leaves) L_schur = Ftree.TB_to_L_schur(self.T, self.B, self.leaves) W, V = scipy.linalg.eigh(L_schur, eigvals=(1, 2)) V = V * np.reciprocal(np.sqrt(W)) V = self._reflected_to_reference(V) Y = -np.dot(np.dot(np.linalg.pinv(Lbb), Lba), V) MDS = np.vstack([V, Y]) vertices = self.leaves + self.internal return dict((vertices[i], tuple(pt)) for i, pt in enumerate(MDS))
def get_response_content(fs): # read the ordered leaf names for the distance matrix D_names = Util.get_stripped_lines(fs.names.splitlines()) # read the tree T_test, B_test, N_test = FtreeIO.newick_to_TBN(fs.test_tree) # we are concerned about the names of the leaves of the two trees test_leaves = Ftree.T_to_leaves(T_test) test_leaf_to_n = dict((v, N_test[v]) for v in test_leaves) # check that all leaves are named if len(D_names) != len(fs.D): raise HandlingError('the number of ordered leaf names ' 'should be the same as the number of rows ' 'in the distance matrix') if len(test_leaves) != len(test_leaf_to_n): raise ValueError('all leaves in the harmonic extension tree ' 'should be named') # check that leaves are uniquely named if len(set(D_names)) != len(D_names): raise ValueError('all ordered leaf names in the distance matrix ' 'should be unique') # check that the leaf name sets are the same if set(D_names) != set(test_leaf_to_n.values()): raise ValueError('the set of leaf names on the tree ' 'should be the same as ' 'the set of leaf names for the distance matrix') # invert the leaf name map test_n_to_leaf = dict((n, v) for v, n in test_leaf_to_n.items()) # get correspondingly ordered leaf sequences test_leaves_reordered = [test_n_to_leaf[n] for n in D_names] # get the MDS points X = MDS_v4(fs.D) # get the linear operator that defines the harmonic extension test_internal = Ftree.T_to_internal_vertices(T_test) L22 = Ftree.TB_to_L_block(T_test, B_test, test_internal, test_internal) L21 = Ftree.TB_to_L_block(T_test, B_test, test_internal, test_leaves_reordered) M = -np.dot(np.linalg.pinv(L22), L21) # get the harmonic extension X_extension = np.dot(M, X) X_extended = np.vstack([X, X_extension]) # draw the image v_to_index = Ftree.invseq(test_leaves_reordered + test_internal) physical_size = (640, 480) ext = Form.g_imageformat_to_ext[fs.imageformat] return get_animation_frame(ext, physical_size, fs.scale, v_to_index, T_test, X_extended)
def sample_brownian_motion(R, B): """ Sample brownian motion on a tree. @param R: directed tree @param B: branch lengths @return: map from vertex to sample """ r = Ftree.R_to_root(R) v_to_sample = {r: 0} v_to_sinks = Ftree.R_to_v_to_sinks(R) for v in Ftree.R_to_preorder(R): for sink in v_to_sinks[v]: u_edge = frozenset((v, sink)) mu = v_to_sample[v] var = B[u_edge] v_to_sample[sink] = random.gauss(mu, math.sqrt(var)) return v_to_sample
def get_response_content(fs): # read the ordered leaf names for the distance matrix D_names = Util.get_stripped_lines(fs.names.splitlines()) # read the tree T_test, B_test, N_test = FtreeIO.newick_to_TBN(fs.test_tree) # we are concerned about the names of the leaves of the two trees test_leaves = Ftree.T_to_leaves(T_test) test_leaf_to_n = dict((v, N_test[v]) for v in test_leaves) # check that all leaves are named if len(D_names) != len(fs.D): raise HandlingError( 'the number of ordered leaf names ' 'should be the same as the number of rows ' 'in the distance matrix') if len(test_leaves) != len(test_leaf_to_n): raise ValueError( 'all leaves in the harmonic extension tree ' 'should be named') # check that leaves are uniquely named if len(set(D_names)) != len(D_names): raise ValueError( 'all ordered leaf names in the distance matrix ' 'should be unique') # check that the leaf name sets are the same if set(D_names) != set(test_leaf_to_n.values()): raise ValueError( 'the set of leaf names on the tree ' 'should be the same as ' 'the set of leaf names for the distance matrix') # invert the leaf name map test_n_to_leaf = dict((n, v) for v, n in test_leaf_to_n.items()) # get correspondingly ordered leaf sequences test_leaves_reordered = [test_n_to_leaf[n] for n in D_names] # get the MDS points X = MDS_v4(fs.D) # get the linear operator that defines the harmonic extension test_internal = Ftree.T_to_internal_vertices(T_test) L22 = Ftree.TB_to_L_block(T_test, B_test, test_internal, test_internal) L21 = Ftree.TB_to_L_block(T_test, B_test, test_internal, test_leaves_reordered) M = -np.dot(np.linalg.pinv(L22), L21) # get the harmonic extension X_extension = np.dot(M, X) X_extended = np.vstack([X, X_extension]) # draw the image v_to_index = Ftree.invseq(test_leaves_reordered + test_internal) physical_size = (640, 480) ext = Form.g_imageformat_to_ext[fs.imageformat] return get_animation_frame(ext, physical_size, fs.scale, v_to_index, T_test, X_extended)
def main(args): # do some validation if args.nframes < 2: raise ValueError('nframes should be at least 2') # define the requested physical size of the images (in pixels) physical_size = (args.physical_width, args.physical_height) # get the directed edges and the branch lengths and vertex names R, B, N = FtreeIO.newick_to_RBN(args.tree) # get the requested undirected edge edge = get_edge(R, N, args.branch_name) initial_length = B[edge] # get the undirected tree topology T = Ftree.R_to_T(R) # get the leaves and the vertices of articulation leaves = Ftree.T_to_leaves(T) internal = Ftree.T_to_internal_vertices(T) vertices = leaves + internal nleaves = len(leaves) v_to_index = Ftree.invseq(vertices) # get the requested indices x_index = args.x_axis - 1 y_index = args.y_axis - 1 if x_index >= nleaves-1 or y_index >= nleaves-1: raise ValueError( 'projection indices must be smaller than the number of leaves') X_prev = None # create the animation frames and write them as image files pbar = Progress.Bar(args.nframes) for frame_index in range(args.nframes): linear_progress = frame_index / float(args.nframes - 1) if args.interpolation == 'sigmoid': t = sigmoid(linear_progress) else: t = linear_progress B[edge] = (1-t)*initial_length + t*args.final_length w, v = Ftree.TB_to_harmonic_extension(T, B, leaves, internal) X_full = np.dot(v, np.diag(np.reciprocal(np.sqrt(w)))) X = np.vstack([X_full[:,x_index], X_full[:,y_index]]).T if X_prev is not None: X = reflect_to_match(X, X_prev) X_prev = X image_string = get_animation_frame( args.image_format, physical_size, args.scale, v_to_index, T, X, w) image_filename = 'frame-%04d.%s' % (frame_index, args.image_format) image_pathname = os.path.join(args.output_directory, image_filename) with open(image_pathname, 'wb') as fout: fout.write(image_string) pbar.update(frame_index+1) pbar.finish()
def get_response_content(fs): # read the trees T_true, B_true, N_true = FtreeIO.newick_to_TBN(fs.true_tree) T_test, B_test, N_test = FtreeIO.newick_to_TBN(fs.test_tree) # we are concerned about the names of the leaves of the two trees true_leaves = Ftree.T_to_leaves(T_true) test_leaves = Ftree.T_to_leaves(T_test) true_leaf_to_n = dict((v, N_true[v]) for v in true_leaves) test_leaf_to_n = dict((v, N_test[v]) for v in test_leaves) # check that all leaves are named if len(true_leaves) != len(true_leaf_to_n): raise ValueError("all leaves in the leaf MDS tree should be named") if len(test_leaves) != len(test_leaf_to_n): raise ValueError("all leaves in the harmonic extension tree should be named") # check that within each tree all leaves are uniquely named if len(set(true_leaf_to_n.values())) != len(true_leaves): raise ValueError("all leaf names in the leaf MDS tree should be unique") if len(set(test_leaf_to_n.values())) != len(test_leaves): raise ValueError("all leaf names in the harmonic extension tree " "should be unique") # check that the leaf name sets are the same if set(true_leaf_to_n.values()) != set(test_leaf_to_n.values()): raise ValueError("the two trees should have corresponding leaf names") # invert the leaf name maps true_n_to_leaf = dict((n, v) for v, n in true_leaf_to_n.items()) test_n_to_leaf = dict((n, v) for v, n in test_leaf_to_n.items()) # get correspondingly ordered leaf sequences leaf_names = true_leaf_to_n.values() true_leaves_reordered = [true_n_to_leaf[n] for n in leaf_names] test_leaves_reordered = [test_n_to_leaf[n] for n in leaf_names] # get the Schur complement matrix for the leaves L_schur_true = Ftree.TB_to_L_schur(T_true, B_true, true_leaves_reordered) # get the MDS points w, V = scipy.linalg.eigh(L_schur_true, eigvals=(1, 2)) X = np.dot(V, np.diag(np.reciprocal(np.sqrt(w)))) # get the linear operator that defines the harmonic extension test_internal = Ftree.T_to_internal_vertices(T_test) L22 = Ftree.TB_to_L_block(T_test, B_test, test_internal, test_internal) L21 = Ftree.TB_to_L_block(T_test, B_test, test_internal, test_leaves_reordered) M = -np.dot(np.linalg.pinv(L22), L21) # get the harmonic extension X_extension = np.dot(M, X) X_extended = np.vstack([X, X_extension]) # draw the image v_to_index = Ftree.invseq(test_leaves_reordered + test_internal) physical_size = (640, 480) ext = Form.g_imageformat_to_ext[fs.imageformat] return get_animation_frame(ext, physical_size, fs.scale, v_to_index, T_test, X_extended)