def process(): """ @return: a multi-line string that summarizes the results """ np.set_printoptions(linewidth=200) out = StringIO() # define a degenerate mass vector m_degenerate = np.array([0.25, 0.25, 0.25, 0.25, 0, 0]) # define some distance matrices D_leaves = Euclid.g_D_b D_all = Euclid.g_D_c nvertices = 6 nleaves = 4 # get the projection and the weighted multidimensional scaling X = Euclid.edm_to_points(D_all) Y = Euclid.edm_to_weighted_points(D_all, m_degenerate) D_X = np.array([[np.dot(pb-pa, pb-pa) for pa in X] for pb in X]) D_Y = np.array([[np.dot(pb-pa, pb-pa) for pa in Y] for pb in Y]) # get the embedding using only the leaves print >> out, 'embedding of leaves from the leaf distance matrix:' print >> out, Euclid.edm_to_points(D_leaves) print >> out, 'projection of all vertices onto the MDS space of the leaves:' print >> out, do_projection(D_all, nleaves) print >> out, 'embedding of all vertices using uniform weights:' print >> out, X print >> out, 'corresponding distance matrix:' print >> out, D_X print >> out, 'embedding of all vertices using degenerate weights:' print >> out, Y print >> out, 'corresponding distance matrix:' print >> out, D_Y return out.getvalue().strip()
def process(): """ @return: a multi-line string that summarizes the results """ np.set_printoptions(linewidth=200) out = StringIO() # define a degenerate mass vector m_degenerate = np.array([0.25, 0.25, 0.25, 0.25, 0, 0]) # define some distance matrices D_leaves = Euclid.g_D_b D_all = Euclid.g_D_c nvertices = 6 nleaves = 4 # get the projection and the weighted multidimensional scaling X = Euclid.edm_to_points(D_all) Y = Euclid.edm_to_weighted_points(D_all, m_degenerate) D_X = np.array([[np.dot(pb - pa, pb - pa) for pa in X] for pb in X]) D_Y = np.array([[np.dot(pb - pa, pb - pa) for pa in Y] for pb in Y]) # get the embedding using only the leaves print >> out, 'embedding of leaves from the leaf distance matrix:' print >> out, Euclid.edm_to_points(D_leaves) print >> out, 'projection of all vertices onto the MDS space of the leaves:' print >> out, do_projection(D_all, nleaves) print >> out, 'embedding of all vertices using uniform weights:' print >> out, X print >> out, 'corresponding distance matrix:' print >> out, D_X print >> out, 'embedding of all vertices using degenerate weights:' print >> out, Y print >> out, 'corresponding distance matrix:' print >> out, D_Y return out.getvalue().strip()
def get_response_content(fs): # build the newick tree from the string tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree) nvertices = len(list(tree.preorder())) nleaves = len(list(tree.gen_tips())) ninternal = nvertices - nleaves # get ordered ids with the internal nodes first ordered_ids = get_ordered_ids(tree) leaf_ids = [id(node) for node in tree.gen_tips()] # get the distance matrix and the augmented distance matrix D_leaf = np.array(tree.get_partial_distance_matrix(leaf_ids)) D = np.array(tree.get_partial_distance_matrix(ordered_ids)) D_aug = get_augmented_distance(D, nleaves, fs.ndups) # analyze the leaf distance matrix X_leaf = Euclid.edm_to_points(D_leaf) # get the eigendecomposition of the centered augmented distance matrix X_aug = Euclid.edm_to_points(D_aug, nvertices - 1) # explicitly compute the points for the given number of dups using weights m = [1] * ninternal + [1 + fs.ndups] * nleaves m = np.array(m, dtype=float) / sum(m) X_weighted = Euclid.edm_to_weighted_points(D, m) # explicitly compute the points for 10x dups m = [1] * ninternal + [1 + fs.ndups * 10] * nleaves m = np.array(m, dtype=float) / sum(m) X_weighted_10x = Euclid.edm_to_weighted_points(D, m) # explicitly compute the limiting points as the number of dups increases X = Euclid.edm_to_points(D) X -= np.mean(X[-nleaves:], axis=0) XL = X[-nleaves:] U, s, Vt = np.linalg.svd(XL) Z = np.dot(X, Vt.T) # report the results np.set_printoptions(linewidth=300, threshold=10000) out = StringIO() print >> out, 'leaf distance matrix:' print >> out, D_leaf print >> out print >> out, 'points derived from the leaf distance matrix' print >> out, '(the first column is proportional to the Fiedler vector):' print >> out, X_leaf print >> out if fs.show_aug: print >> out, 'augmented distance matrix:' print >> out, D_aug print >> out print >> out, 'points derived from the augmented distance matrix' print >> out, '(the first column is proportional to the Fiedler vector):' print >> out, get_ugly_matrix(X_aug, ninternal, nleaves) print >> out print >> out, 'points computed using masses:' print >> out, X_weighted print >> out print >> out, 'points computed using masses with 10x dups:' print >> out, X_weighted_10x print >> out print >> out, 'limiting points:' print >> out, Z print >> out return out.getvalue()
def get_response_content(fs): # build the newick tree from the string tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree) nvertices = len(list(tree.preorder())) nleaves = len(list(tree.gen_tips())) ninternal = nvertices - nleaves # get ordered ids with the internal nodes first ordered_ids = get_ordered_ids(tree) leaf_ids = [id(node) for node in tree.gen_tips()] # get the distance matrix and the augmented distance matrix D_leaf = np.array(tree.get_partial_distance_matrix(leaf_ids)) D = np.array(tree.get_partial_distance_matrix(ordered_ids)) D_aug = get_augmented_distance(D, nleaves, fs.ndups) # analyze the leaf distance matrix X_leaf = Euclid.edm_to_points(D_leaf) # get the eigendecomposition of the centered augmented distance matrix X_aug = Euclid.edm_to_points(D_aug, nvertices-1) # explicitly compute the points for the given number of dups using weights m = [1]*ninternal + [1+fs.ndups]*nleaves m = np.array(m, dtype=float) / sum(m) X_weighted = Euclid.edm_to_weighted_points(D, m) # explicitly compute the points for 10x dups m = [1]*ninternal + [1+fs.ndups*10]*nleaves m = np.array(m, dtype=float) / sum(m) X_weighted_10x = Euclid.edm_to_weighted_points(D, m) # explicitly compute the limiting points as the number of dups increases X = Euclid.edm_to_points(D) X -= np.mean(X[-nleaves:], axis=0) XL = X[-nleaves:] U, s, Vt = np.linalg.svd(XL) Z = np.dot(X, Vt.T) # report the results np.set_printoptions(linewidth=300, threshold=10000) out = StringIO() print >> out, 'leaf distance matrix:' print >> out, D_leaf print >> out print >> out, 'points derived from the leaf distance matrix' print >> out, '(the first column is proportional to the Fiedler vector):' print >> out, X_leaf print >> out if fs.show_aug: print >> out, 'augmented distance matrix:' print >> out, D_aug print >> out print >> out, 'points derived from the augmented distance matrix' print >> out, '(the first column is proportional to the Fiedler vector):' print >> out, get_ugly_matrix(X_aug, ninternal, nleaves) print >> out print >> out, 'points computed using masses:' print >> out, X_weighted print >> out print >> out, 'points computed using masses with 10x dups:' print >> out, X_weighted_10x print >> out print >> out, 'limiting points:' print >> out, Z print >> out return out.getvalue()
def do_projection(D_full, nleaves): """ Project points onto the space of the leaves. The resulting points are in the subspace whose basis vectors are the principal axes of the leaf ellipsoid. @param D_full: distances relating all, including internal, vertices. @param nleaves: the first few indices in D_full represent leaves @return: a numpy array where each row is a vertex of the tree """ # Get the points # such that the n rows in X are points in n-1 dimensional space. X = Euclid.edm_to_points(D_full) # Translate all of the points # so that the origin is at the centroid of the leaves. X -= np.mean(X[:nleaves], 0) # Extract the subset of points that define the leaves. L = X[:nleaves] # Find the orthogonal transformation of the leaves onto their MDS axes. # According to the python svd documentation, # singular values are sorted most important to least important. U, s, Vt = np.linalg.svd(L) # Transform all of the points (including the internal vertices) # according to this orthogonal transformation. # The axes are now the principal axes # of the Steiner circumscribed ellipsoid of the leaf vertices. # I am using M.T[:k].T to get the first k columns of M. points = np.dot(X, Vt.T).T[:(nleaves-1)].T return points
def do_projection(D_full, nleaves): """ Project points onto the space of the leaves. The resulting points are in the subspace whose basis vectors are the principal axes of the leaf ellipsoid. @param D_full: distances relating all, including internal, vertices. @param nleaves: the first few indices in D_full represent leaves @return: a numpy array where each row is a vertex of the tree """ # Get the points # such that the n rows in X are points in n-1 dimensional space. X = Euclid.edm_to_points(D_full) # Translate all of the points # so that the origin is at the centroid of the leaves. X -= np.mean(X[:nleaves], 0) # Extract the subset of points that define the leaves. L = X[:nleaves] # Find the orthogonal transformation of the leaves onto their MDS axes. # According to the python svd documentation, # singular values are sorted most important to least important. U, s, Vt = np.linalg.svd(L) # Transform all of the points (including the internal vertices) # according to this orthogonal transformation. # The axes are now the principal axes # of the Steiner circumscribed ellipsoid of the leaf vertices. # I am using M.T[:k].T to get the first k columns of M. points = np.dot(X, Vt.T).T[:(nleaves - 1)].T return points
def get_response_content(fs): # get the tree tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree) # get information about the tree topology internal = [id(node) for node in tree.gen_internal_nodes()] tips = [id(node) for node in tree.gen_tips()] vertices = internal + tips ntips = len(tips) ninternal = len(internal) nvertices = len(vertices) # get the ordered ids with the leaves first ordered_ids = vertices # get the full weighted adjacency matrix A = np.array(tree.get_affinity_matrix(ordered_ids)) # compute the weighted adjacency matrix of the decorated tree p = ninternal q = ntips N = fs.N if fs.weight_n: weight = float(N) elif fs.weight_sqrt_n: weight = math.sqrt(N) A_aug = get_A_aug(A, weight, p, q, N) # compute the weighted Laplacian matrix of the decorated tree L_aug = Euclid.adjacency_to_laplacian(A_aug) # compute the eigendecomposition w, vt = np.linalg.eigh(L_aug) # show the output np.set_printoptions(linewidth=1000, threshold=10000) out = StringIO() if fs.lap: print >> out, 'Laplacian of the decorated tree:' print >> out, L_aug print >> out if fs.eigvals: print >> out, 'eigenvalues:' for x in w: print >> out, x print >> out if fs.eigvecs: print >> out, 'eigenvector matrix:' print >> out, vt print >> out if fs.compare: # get the distance matrix for only the original tips D_tips = np.array(tree.get_partial_distance_matrix(tips)) X_tips = Euclid.edm_to_points(D_tips) # wring the approximate points out of the augmented tree X_approx = vt[p:p+q].T[1:1+q-1].T / np.sqrt(w[1:1+q-1]) # do the comparison print >> out, 'points from tip-only MDS:' print >> out, X_tips print >> out print >> out, 'approximate points from decorated tree:' print >> out, X_approx print >> out return out.getvalue()
def get_response_content(fs): # get the tree tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree) # get information about the tree topology internal = [id(node) for node in tree.gen_internal_nodes()] tips = [id(node) for node in tree.gen_tips()] vertices = internal + tips ntips = len(tips) ninternal = len(internal) nvertices = len(vertices) # get the ordered ids with the leaves first ordered_ids = vertices # get the full weighted adjacency matrix A = np.array(tree.get_affinity_matrix(ordered_ids)) # compute the weighted adjacency matrix of the decorated tree p = ninternal q = ntips N = fs.N if fs.weight_n: weight = float(N) elif fs.weight_sqrt_n: weight = math.sqrt(N) A_aug = get_A_aug(A, weight, p, q, N) # compute the weighted Laplacian matrix of the decorated tree L_aug = Euclid.adjacency_to_laplacian(A_aug) # compute the eigendecomposition w, vt = np.linalg.eigh(L_aug) # show the output np.set_printoptions(linewidth=1000, threshold=10000) out = StringIO() if fs.lap: print >> out, 'Laplacian of the decorated tree:' print >> out, L_aug print >> out if fs.eigvals: print >> out, 'eigenvalues:' for x in w: print >> out, x print >> out if fs.eigvecs: print >> out, 'eigenvector matrix:' print >> out, vt print >> out if fs.compare: # get the distance matrix for only the original tips D_tips = np.array(tree.get_partial_distance_matrix(tips)) X_tips = Euclid.edm_to_points(D_tips) # wring the approximate points out of the augmented tree X_approx = vt[p:p + q].T[1:1 + q - 1].T / np.sqrt(w[1:1 + q - 1]) # do the comparison print >> out, 'points from tip-only MDS:' print >> out, X_tips print >> out print >> out, 'approximate points from decorated tree:' print >> out, X_approx print >> out return out.getvalue()
def get_response_content(fs): # read the lat-lon points from the input lines = Util.get_stripped_lines(fs.datalines.splitlines()) rows = parse_lines(lines) latlon_points = [] city_names = [] for city, latd, latm, lond, lonm in rows: lat = math.radians(GPS.degrees_minutes_to_degrees(latd, latm)) lon = math.radians(GPS.degrees_minutes_to_degrees(lond, lonm)) latlon_points.append((lat, lon)) city_names.append(city) npoints = len(latlon_points) # start writing the response np.set_printoptions(linewidth=200) out = StringIO() radius = GPS.g_earth_radius_miles for dfunc, name in ( (GPS.get_arc_distance, 'great arc'), (GPS.get_euclidean_distance, 'euclidean')): # define the edm whose elements are squared euclidean-like distances edm = np.zeros((npoints, npoints)) D = np.zeros((npoints, npoints)) for i, pointa in enumerate(latlon_points): for j, pointb in enumerate(latlon_points): D[i, j] = dfunc(pointa, pointb, radius) edm[i, j] = D[i, j]**2 print >> out, name, 'distances:' print >> out, D print >> out print >> out, name, 'EDM:' print >> out, edm print >> out G = Euclid.edm_to_dccov(edm) print >> out, name, 'Gower centered matrix:' print >> out, G print >> out spectrum = np.array(list(reversed(sorted(np.linalg.eigvals(G))))) print >> out, name, 'spectrum of Gower centered matrix:' for x in spectrum: print >> out, x print >> out print >> out, name, 'rounded spectrum:' for x in spectrum: print >> out, '%.1f' % x print >> out mds_points = Euclid.edm_to_points(edm) print >> out, '2D MDS coordinates:' for name, mds_point in zip(city_names, mds_points): x = mds_point[0] y = mds_point[1] print >> out, '\t'.join(str(x) for x in [name, x, y]) print >> out # break between distance methods print >> out # return the response return out.getvalue()
def do_internal_projection(D_full): """ The resulting points are in the subspace whose basis vectors are the principal axes of the whole ellipsoid. @param D_full: the distance matrix as a numpy array relating all vertices including internal vertices @return: a numpy array where each row is a vertex of the tree """ # Get the points such that the n rows in are points in n-1 dimensional space. # The first coordinate is the principal axis. points = Euclid.edm_to_points(D_full) return points
def get_response_content(fs): # read the lat-lon points from the input lines = Util.get_stripped_lines(fs.datalines.splitlines()) rows = parse_lines(lines) latlon_points = [] city_names = [] for city, latd, latm, lond, lonm in rows: lat = math.radians(GPS.degrees_minutes_to_degrees(latd, latm)) lon = math.radians(GPS.degrees_minutes_to_degrees(lond, lonm)) latlon_points.append((lat, lon)) city_names.append(city) npoints = len(latlon_points) # start writing the response np.set_printoptions(linewidth=200) out = StringIO() radius = GPS.g_earth_radius_miles for dfunc, name in ((GPS.get_arc_distance, 'great arc'), (GPS.get_euclidean_distance, 'euclidean')): # define the edm whose elements are squared euclidean-like distances edm = np.zeros((npoints, npoints)) D = np.zeros((npoints, npoints)) for i, pointa in enumerate(latlon_points): for j, pointb in enumerate(latlon_points): D[i, j] = dfunc(pointa, pointb, radius) edm[i, j] = D[i, j]**2 print >> out, name, 'distances:' print >> out, D print >> out print >> out, name, 'EDM:' print >> out, edm print >> out G = Euclid.edm_to_dccov(edm) print >> out, name, 'Gower centered matrix:' print >> out, G print >> out spectrum = np.array(list(reversed(sorted(np.linalg.eigvals(G))))) print >> out, name, 'spectrum of Gower centered matrix:' for x in spectrum: print >> out, x print >> out print >> out, name, 'rounded spectrum:' for x in spectrum: print >> out, '%.1f' % x print >> out mds_points = Euclid.edm_to_points(edm) print >> out, '2D MDS coordinates:' for name, mds_point in zip(city_names, mds_points): x = mds_point[0] y = mds_point[1] print >> out, '\t'.join(str(x) for x in [name, x, y]) print >> out # break between distance methods print >> out # return the response return out.getvalue()
def get_response_content(fs): # build the newick tree from the string tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree) leaf_names = list(sorted(leaf.get_name() for leaf in tree.gen_tips())) # assert that the newick tree has the correct set of leaves if leaf_names != ['a', 'b', 'c', 'd']: msg = 'expected the tree to have leaves named {a, b, c, d}' raise HandlingError(msg) # start writing the response out = StringIO() # Get the distance matrix with ordered indices # including all nodes in the tree. D = np.array(tree.get_distance_matrix(leaf_names)) # get the embedded points X = Euclid.edm_to_points(D) print >> out, 'distance matrix:' print >> out, D print >> out, 'embedded points:' print >> out, X # set up the optimization a, b, c, d = X.tolist() objective = Objective(a, b, c, d) s1_initial = (np.mean(X, 0) + X[0] + X[1])/3 + get_random_point() s2_initial = (np.mean(X, 0) + X[2] + X[3])/3 + get_random_point() data_initial = np.hstack([s1_initial, s2_initial]) data_final = scipy.optimize.fmin_bfgs(objective.get_value, data_initial, fprime=objective.get_gradient, gtol=1e-10) s1 = data_final[:3] s2 = data_final[3:] gradient_final = objective.get_gradient(data_final) s1_gradient = gradient_final[:3] s2_gradient = gradient_final[3:] print >> out, 'initial random steiner point guesses:' print >> out, s1_initial print >> out, s2_initial print >> out, 'final steiner point estimates:' print >> out, s1 print >> out, s2 print >> out, 'each of these angles should be %f radians:' % ((2*math.pi)/3) print >> out, get_angle(a-s1, b-s1) print >> out, get_angle(b-s1, s2-s1) print >> out, get_angle(s2-s1, a-s1) print >> out, get_angle(c-s2, d-s2) print >> out, get_angle(d-s2, s1-s2) print >> out, get_angle(s1-s2, c-s2) print >> out, 'value of the objective function at the estimated solution:' print >> out, objective.get_value(data_final) print >> out, 'gradient of the objective function at each estimated steiner point:' print >> out, s1_gradient print >> out, s2_gradient # return the response return out.getvalue()
def process(nseconds=None): """ @param nseconds: allow this many seconds to run or None to run forever @return: a multi-line string that summarizes the results """ # load the tree tree = NewickIO.parse(g_tree_string, FelTree.NewickTree) # get the alphabetically ordered tip names ordered_tip_names = list( sorted(node.get_name() for node in tree.gen_tips())) # initialize the search start_time = time.time() nsamples_rejected = 0 nsamples_accepted = 0 counterexample_message = 'no counterexample was found' try: while True: elapsed_time = time.time() - start_time if nseconds and elapsed_time > nseconds: break # sample some random branch lengths sample_branch_lengths(tree) # get the distance matrix D = np.array(tree.get_distance_matrix(ordered_tip_names)) # get the projections onto the MDS axes of the leaves X = Euclid.edm_to_points(D) # if any coordinate is near zero then reject the sample if np.min(np.abs(X)) < g_epsilon: nsamples_rejected += 1 continue # see if the sign pattern matches for each coordinate for v_observed, v_target in zip(X.T, g_target_sign_patterns): hadamard_product = v_observed * v_target all_positive = all(x > 0 for x in hadamard_product) all_negative = all(x < 0 for x in hadamard_product) if not (all_positive or all_negative): # the target sign pattern was not met break else: # the sign pattern matched for each coordinate so we have a counterexample msg = NewickIO.get_newick_string(tree) raise CounterexampleError(msg) # increment the count of accepted samples nsamples_accepted += 1 except KeyboardInterrupt, e: pass
def process(nseconds=None): """ @param nseconds: allow this many seconds to run or None to run forever @return: a multi-line string that summarizes the results """ # load the tree tree = NewickIO.parse(g_tree_string, FelTree.NewickTree) # get the alphabetically ordered tip names ordered_tip_names = list(sorted(node.get_name() for node in tree.gen_tips())) # initialize the search start_time = time.time() nsamples_rejected = 0 nsamples_accepted = 0 counterexample_message = 'no counterexample was found' try: while True: elapsed_time = time.time() - start_time if nseconds and elapsed_time > nseconds: break # sample some random branch lengths sample_branch_lengths(tree) # get the distance matrix D = np.array(tree.get_distance_matrix(ordered_tip_names)) # get the projections onto the MDS axes of the leaves X = Euclid.edm_to_points(D) # if any coordinate is near zero then reject the sample if np.min(np.abs(X)) < g_epsilon: nsamples_rejected += 1 continue # see if the sign pattern matches for each coordinate for v_observed, v_target in zip(X.T, g_target_sign_patterns): hadamard_product = v_observed * v_target all_positive = all(x>0 for x in hadamard_product) all_negative = all(x<0 for x in hadamard_product) if not (all_positive or all_negative): # the target sign pattern was not met break else: # the sign pattern matched for each coordinate so we have a counterexample msg = NewickIO.get_newick_string(tree) raise CounterexampleError(msg) # increment the count of accepted samples nsamples_accepted += 1 except KeyboardInterrupt, e: pass
def main(args): # do some validation if args.nframes < 2: raise ValueError('nframes should be at least 2') # define the requested physical size of the images (in pixels) physical_size = (args.physical_width, args.physical_height) # build the newick tree from the string tree = NewickIO.parse(args.tree, FelTree.NewickTree) nvertices = len(list(tree.preorder())) nleaves = len(list(tree.gen_tips())) # Get ordered ids with the leaves first, # and get the corresponding distance matrix. ordered_ids = get_ordered_ids(tree) D = np.array(tree.get_partial_distance_matrix(ordered_ids)) index_edges = get_index_edges(tree, ordered_ids) # Create the reference points # so that the video frames are not reflected arbitrarily. reference_points = Euclid.edm_to_points(D).T[:3].T # create the animation frames and write them as image files pbar = Progress.Bar(args.nframes) for frame_index in range(args.nframes): linear_progress = frame_index / float(args.nframes - 1) if args.interpolation == 'sigmoid': progress = sigmoid(linear_progress) else: progress = linear_progress mass_vector = get_mass_vector(nvertices, nleaves, progress) points = get_canonical_3d_mds(D, mass_vector, reference_points) crossings = get_crossings(index_edges, points) # define the frame path name image_filename = 'frame-%04d.%s' % (frame_index, args.image_format) image_pathname = os.path.join(args.output_directory, image_filename) # clear the old figure and render the new figure mlab.clf() add_yz_plane() add_zx_plane() add_xy_plane() X, Y, Z = points.T[0], points.T[1], points.T[2] draw_3d_tree(X, Y, Z, index_edges) draw_crossings(X, Y, Z, index_edges) mlab.savefig(image_pathname, size=physical_size) # update the progress bar pbar.update(frame_index + 1) pbar.finish()
def main(args): # do some validation if args.nframes < 2: raise ValueError('nframes should be at least 2') # define the requested physical size of the images (in pixels) physical_size = (args.physical_width, args.physical_height) # build the newick tree from the string tree = NewickIO.parse(args.tree, FelTree.NewickTree) nvertices = len(list(tree.preorder())) nleaves = len(list(tree.gen_tips())) # Get ordered ids with the leaves first, # and get the corresponding distance matrix. ordered_ids = get_ordered_ids(tree) D = np.array(tree.get_partial_distance_matrix(ordered_ids)) index_edges = get_index_edges(tree, ordered_ids) # Create the reference points # so that the video frames are not reflected arbitrarily. reference_points = Euclid.edm_to_points(D).T[:3].T # create the animation frames and write them as image files pbar = Progress.Bar(args.nframes) for frame_index in range(args.nframes): linear_progress = frame_index / float(args.nframes - 1) if args.interpolation == 'sigmoid': progress = sigmoid(linear_progress) else: progress = linear_progress mass_vector = get_mass_vector(nvertices, nleaves, progress) points = get_canonical_3d_mds(D, mass_vector, reference_points) crossings = get_crossings(index_edges, points) # define the frame path name image_filename = 'frame-%04d.%s' % (frame_index, args.image_format) image_pathname = os.path.join(args.output_directory, image_filename) # clear the old figure and render the new figure mlab.clf() add_yz_plane() add_zx_plane() add_xy_plane() X, Y, Z = points.T[0], points.T[1], points.T[2] draw_3d_tree(X, Y, Z, index_edges) draw_crossings(X, Y, Z, index_edges) mlab.savefig(image_pathname, size=physical_size) # update the progress bar pbar.update(frame_index+1) pbar.finish()
def get_response_content(fs): # define the requested physical size of the images (in pixels) physical_size = (640, 480) # build the newick tree from the string tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree) nvertices = len(list(tree.preorder())) nleaves = len(list(tree.gen_tips())) # Get ordered ids with the leaves first, # and get the corresponding distance matrix. ordered_ids = get_ordered_ids(tree) D = np.array(tree.get_partial_distance_matrix(ordered_ids)) index_edges = get_index_edges(tree, ordered_ids) # Create the reference points so that the video frames # are not reflected arbitrarily. reference_points = Euclid.edm_to_points(D).T[:2].T # draw the image ext = Form.g_imageformat_to_ext[fs.imageformat] mass_vector = get_mass_vector(nvertices, nleaves, fs.progress) points = get_canonical_2d_mds(D, mass_vector, reference_points) return get_animation_frame(ext, physical_size, fs.scale, mass_vector, index_edges, points)
def do_full_projection(D_full, nleaves): """ Compute all projected points onto the subspace defined by the leaves. @param D_full: the distance matrix as a numpy array relating all vertices including internal vertices @param nleaves: the first few indices in D_full represent leaves @return: a numpy array where each row is a vertex of the tree viewed as a point """ # Get the points such that the n rows in X are points in n-1 dimensional space. X = Euclid.edm_to_points(D_full) # Translate all of the points so that the origin is at the centroid of the leaves. X -= np.mean(X[:nleaves], 0) # Extract the subset of points that define the leaves. L = X[:nleaves] # Find the orthogonal transformation of the leaves onto their MDS axes. # According to the python svd documentation, singular values are sorted most important to least important. U, s, Vt = np.linalg.svd(L) # Transform all of the points (including the internal vertices) according to this orthogonal transformation. # The axes are now the principal axes of the Steiner circumscribed ellipsoid of the leaf vertices. # I am using M.T[:k].T to get the first k columns of M. vertices_on_plane = np.dot(X, Vt.T).T[:(nleaves - 1)].T return vertices_on_plane
def do_full_projection(D_full, nleaves): """ Compute all projected points onto the subspace defined by the leaves. @param D_full: the distance matrix as a numpy array relating all vertices including internal vertices @param nleaves: the first few indices in D_full represent leaves @return: a numpy array where each row is a vertex of the tree viewed as a point """ # Get the points such that the n rows in X are points in n-1 dimensional space. X = Euclid.edm_to_points(D_full) # Translate all of the points so that the origin is at the centroid of the leaves. X -= np.mean(X[:nleaves], 0) # Extract the subset of points that define the leaves. L = X[:nleaves] # Find the orthogonal transformation of the leaves onto their MDS axes. # According to the python svd documentation, singular values are sorted most important to least important. U, s, Vt = np.linalg.svd(L) # Transform all of the points (including the internal vertices) according to this orthogonal transformation. # The axes are now the principal axes of the Steiner circumscribed ellipsoid of the leaf vertices. # I am using M.T[:k].T to get the first k columns of M. vertices_on_plane = np.dot(X, Vt.T).T[:(nleaves-1)].T return vertices_on_plane
def main(args): # do some validation if args.nframes < 2: raise ValueError('nframes should be at least 2') # define the requested physical size of the images (in pixels) physical_size = (args.physical_width, args.physical_height) # build the newick tree from the string tree = NewickIO.parse(args.tree, FelTree.NewickTree) nvertices = len(list(tree.preorder())) nleaves = len(list(tree.gen_tips())) # Get ordered ids with the leaves first, # and get the corresponding distance matrix. ordered_ids = get_ordered_ids(tree) D = np.array(tree.get_partial_distance_matrix(ordered_ids)) index_edges = get_index_edges(tree, ordered_ids) # Create the reference points # so that the video frames are not reflected arbitrarily. reference_points = Euclid.edm_to_points(D).T[:2].T # create the animation frames and write them as image files pbar = Progress.Bar(args.nframes) for frame_index in range(args.nframes): linear_progress = frame_index / float(args.nframes - 1) if args.interpolation == 'sigmoid': progress = sigmoid(linear_progress) else: progress = linear_progress mass_vector = get_mass_vector(nvertices, nleaves, progress) points = get_canonical_2d_mds(D, mass_vector, reference_points) image_string = get_animation_frame( args.image_format, physical_size, args.scale, mass_vector, index_edges, points) image_filename = 'frame-%04d.%s' % (frame_index, args.image_format) image_pathname = os.path.join(args.output_directory, image_filename) with open(image_pathname, 'wb') as fout: fout.write(image_string) pbar.update(frame_index+1) pbar.finish()
def main(args): # do some validation if args.nframes < 2: raise ValueError('nframes should be at least 2') # define the requested physical size of the images (in pixels) physical_size = (args.physical_width, args.physical_height) # build the newick tree from the string tree = NewickIO.parse(args.tree, FelTree.NewickTree) nvertices = len(list(tree.preorder())) nleaves = len(list(tree.gen_tips())) # Get ordered ids with the leaves first, # and get the corresponding distance matrix. ordered_ids = get_ordered_ids(tree) D = np.array(tree.get_partial_distance_matrix(ordered_ids)) index_edges = get_index_edges(tree, ordered_ids) # Create the reference points # so that the video frames are not reflected arbitrarily. reference_points = Euclid.edm_to_points(D).T[:2].T # create the animation frames and write them as image files pbar = Progress.Bar(args.nframes) for frame_index in range(args.nframes): linear_progress = frame_index / float(args.nframes - 1) if args.interpolation == 'sigmoid': progress = sigmoid(linear_progress) else: progress = linear_progress mass_vector = get_mass_vector(nvertices, nleaves, progress) points = get_canonical_2d_mds(D, mass_vector, reference_points) image_string = get_animation_frame(args.image_format, physical_size, args.scale, mass_vector, index_edges, points) image_filename = 'frame-%04d.%s' % (frame_index, args.image_format) image_pathname = os.path.join(args.output_directory, image_filename) with open(image_pathname, 'wb') as fout: fout.write(image_string) pbar.update(frame_index + 1) pbar.finish()
def process(ntaxa, nseconds): """ @param nseconds: allow this many seconds to run or None to run forever @return: a multi-line string that summarizes the results """ start_time = time.time() nsamples_rejected = 0 nsamples_accepted = 0 pattern_to_topo_surrogate = {} pattern_to_tree_string = {} counterexample_message = 'no counterexample was found' try: while True: elapsed_time = time.time() - start_time if nseconds and elapsed_time > nseconds: break # sample an xtree topology xtree = TreeSampler.sample_agglomerated_tree(ntaxa) # convert the xtree to a FelTree, although I guess this might not be necessary tree_string = xtree.get_newick_string() tree = NewickIO.parse(tree_string, FelTree.NewickTree) # get ordered ids and the number of leaves and some auxiliary variables ordered_ids = get_ordered_ids(tree) nleaves = len(list(tree.gen_tips())) id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids)) # force every branch length to be the unit length reset_branch_lengths(tree) # get the unweighted distance matrix among tips in convenient hashable form D_unit = np.array(tree.get_partial_distance_matrix(ordered_ids)) topo_surrogate = tuple(tuple(row.tolist()) for row in D_unit) # sample random branch lengths sample_branch_lengths(tree) # get the weighted tree string weighted_tree_string = NewickIO.get_newick_string(tree) # get the distance matrix relating the leaves D = np.array(tree.get_partial_distance_matrix(ordered_ids)) # get the projections onto the MDS axes of the leaves X = Euclid.edm_to_points(D) # if any coordinate is near zero then reject the sample if np.min(np.abs(X)) < g_epsilon: nsamples_rejected += 1 continue # do an orthogonal transformation that puts the first point in the positive orthant canonizing_vector = np.array(point_to_orthant(X[0])) X *= canonizing_vector # get the canonical sign pattern sign_pattern = tuple(point_to_orthant(row) for row in X) # compare the topo surrogate of this sign pattern to the one in memory expected_topo_surrogate = pattern_to_topo_surrogate.get(sign_pattern, None) if expected_topo_surrogate: if topo_surrogate != expected_topo_surrogate: remembered_tree_string = pattern_to_tree_string[sign_pattern] msg = 'these trees have the same sign pattern but different topologies: {%s, %s}' % (weighted_tree_string, remembered_tree_string) raise CounterexampleError(msg) else: pattern_to_topo_surrogate[sign_pattern] = topo_surrogate pattern_to_tree_string[sign_pattern] = weighted_tree_string # increment the count of accepted samples nsamples_accepted += 1 except KeyboardInterrupt, e: pass
def get_response_content(fs): # build the newick tree from the string tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree) nvertices = len(list(tree.preorder())) nleaves = len(list(tree.gen_tips())) ninternal = nvertices - nleaves # get ordered ids with the internal nodes first ordered_ids = get_ordered_ids(tree) leaf_ids = [id(node) for node in tree.gen_tips()] # get the distance matrix and the augmented distance matrix D_leaf = np.array(tree.get_partial_distance_matrix(leaf_ids)) D = np.array(tree.get_partial_distance_matrix(ordered_ids)) # analyze the leaf distance matrix X_leaf = Euclid.edm_to_points(D_leaf) w_leaf, v_leaf = EigUtil.eigh(Euclid.edm_to_dccov(D_leaf)) V_leaf = np.array(v_leaf).T # explicitly compute the limiting points as the number of dups increases X = Euclid.edm_to_points(D) X -= np.mean(X[-nleaves:], axis=0) XL = X[-nleaves:] U, s, Vt = np.linalg.svd(XL) Z = np.dot(X, Vt.T) # hack the Z matrix to show the leaf-related eigenvectors Z = Z.T[: nleaves - 1].T WY = Z / np.sqrt(w_leaf[:-1]) # compute a product using the first few rows of WY W = WY[:ninternal] M_alpha = get_alpha_multiplier(D, nleaves) MW_alpha = np.dot(M_alpha, W) # compute a product using the first few rows of WY M_beta = get_beta_multiplier(D, nleaves) MY_beta = np.dot(M_beta, V_leaf) # report the results np.set_printoptions(linewidth=300, threshold=10000) out = StringIO() print >> out, "leaf distance matrix:" print >> out, D_leaf print >> out print >> out, "eigenvalues derived from the leaf distance matrix" print >> out, w_leaf print >> out print >> out, "corresponding eigenvectors (as columns)" print >> out, V_leaf print >> out print >> out, "candidates for [W' Y']':" print >> out, WY print >> out print >> out, "candidates for W:" print >> out, W print >> out print >> out, "left multiplier of W:" print >> out, M_alpha print >> out print >> out, "each column is a (left multiplier, W) product:" print >> out, MW_alpha print >> out print >> out, "left multiplier of Y:" print >> out, M_beta print >> out print >> out, "each column is a (left multiplier, Y) product:" print >> out, MY_beta print >> out print >> out, "the above matrix divided by 2*eigenvalue:" print >> out, MY_beta / (2 * np.array(w_leaf)) print >> out return out.getvalue()
def process(ntaxa): np.set_printoptions(linewidth=200) out = StringIO() # sample an xtree topology xtree = TreeSampler.sample_agglomerated_tree(ntaxa) # sample an xtree with exponentially distributed branch lengths mu = 2.0 for branch in xtree.get_branches(): branch.length = random.expovariate(1 / mu) # convert the xtree to a FelTree so we can use the internal vertices tree_string = xtree.get_newick_string() tree = NewickIO.parse(tree_string, FelTree.NewickTree) # get ordered ids and the number of leaves and some auxiliary variables ordered_ids = get_ordered_ids(tree) nleaves = len(list(tree.gen_tips())) id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids)) # get the distance matrix relating all of the points D_full = np.array(tree.get_full_distance_matrix(ordered_ids)) # Now do the projection so that # the resulting points are in the subspace whose basis vectors are the axes of the leaf ellipsoid. # First get the points such that the n rows in X are points in n-1 dimensional space. X = Euclid.edm_to_points(D_full) print >> out, 'points with centroid at origin:' print >> out, X print >> out # Translate all of the points so that the origin is at the centroid of the leaves. X -= np.mean(X[:nleaves], 0) print >> out, 'points with centroid of leaves at origin:' print >> out, X print >> out # Extract the subset of points that define the leaves. L = X[:nleaves] # Find the orthogonal transformation of the leaves onto their MDS axes. # According to the python svd documentation, singular values are sorted most important to least important. U, s, Vt = np.linalg.svd(L) # Transform all of the points (including the internal vertices) according to this orthogonal transformation. # The axes are now the axes of the Steiner circumscribed ellipsoid of the leaf vertices. # I am using M.T[:k].T to get the first k columns of M. Z = np.dot(X, Vt.T) print >> out, 'orthogonally transformed points (call this Z):' print >> out, Z print >> out Y = Z.T[:(nleaves - 1)].T print >> out, 'projection of the points onto the axes of the leaf ellipsoid,' print >> out, '(these are the first columns of Z; call this projected matrix Y):' print >> out, Y print >> out # Show the inner products. inner_products_of_columns = np.dot(Y.T, Y) print >> out, "pairwise inner products of the columns of Y (that is, Y'Y)" print >> out, inner_products_of_columns print >> out # Show other inner products. inner_products_of_columns = np.dot(Y[:5].T, Y[:5]) print >> out, "pairwise inner products of the first few columns of Y" print >> out, inner_products_of_columns print >> out # Extract the subset of points that define the points of articulation. # Note that the origin is the centroid of the leaves. R = X[nleaves:] Y_leaves = Y[:nleaves] W = np.dot(np.linalg.pinv(L), Y_leaves) print >> out, 'leaf projection using pseudoinverse (first few rows of Y):' print >> out, np.dot(L, W) print >> out print >> out, 'projection of points of articulation using pseudoinverse (remaining rows of Y):' print >> out, np.dot(R, W) print >> out # Get all of the points in high dimensional space. X = Euclid.edm_to_points(D_full) # Get the MDS onto the lower dimensional space. X = X.T[:(nleaves - 1)].T assert np.allclose(sum(X, 0), 0) print >> out, 'all points projected onto the first principal axes of the full ellipsoid:' print >> out, X print >> out # Look at only the leaves in this space. L = X[:nleaves] L -= np.mean(L, 0) print >> out, 'leaves projected onto the first principal axes of the full ellipsoid and then centered:' print >> out, L print >> out # Re-project the leaves onto the axes of leaf ellipsoid. D_leaves = Euclid.dccov_to_edm(np.dot(L, L.T)) Y = Euclid.edm_to_points(D_leaves) print >> out, 'leaves further projected onto principal axes of their own ellipsoid:' print >> out, Y print >> out # Try something else D_all = Euclid.dccov_to_edm(np.dot(X, X.T)) Y = Euclid.edm_to_points(D_all).T[:(nleaves - 1)].T print >> out, 'all points further projected onto their own principal axes of inertia:' print >> out, Y print >> out # Try the same thing some more D_again = Euclid.dccov_to_edm(np.dot(Y, Y.T)) Z = Euclid.edm_to_points(D_again).T[:(nleaves - 1)].T print >> out, 'all points further projected onto their own principal axes of inertia (second iteration):' print >> out, Z print >> out return out.getvalue().strip()
def process(ntaxa): np.set_printoptions(linewidth=200) out = StringIO() # sample an xtree topology xtree = TreeSampler.sample_agglomerated_tree(ntaxa) # sample an xtree with exponentially distributed branch lengths mu = 2.0 for branch in xtree.get_branches(): branch.length = random.expovariate(1/mu) # convert the xtree to a FelTree so we can use the internal vertices tree_string = xtree.get_newick_string() tree = NewickIO.parse(tree_string, FelTree.NewickTree) # get ordered ids and the number of leaves and some auxiliary variables ordered_ids = get_ordered_ids(tree) nleaves = len(list(tree.gen_tips())) id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids)) # get the distance matrix relating all of the points D_full = np.array(tree.get_full_distance_matrix(ordered_ids)) # Now do the projection so that # the resulting points are in the subspace whose basis vectors are the axes of the leaf ellipsoid. # First get the points such that the n rows in X are points in n-1 dimensional space. X = Euclid.edm_to_points(D_full) print >> out, 'points with centroid at origin:' print >> out, X print >> out # Translate all of the points so that the origin is at the centroid of the leaves. X -= np.mean(X[:nleaves], 0) print >> out, 'points with centroid of leaves at origin:' print >> out, X print >> out # Extract the subset of points that define the leaves. L = X[:nleaves] # Find the orthogonal transformation of the leaves onto their MDS axes. # According to the python svd documentation, singular values are sorted most important to least important. U, s, Vt = np.linalg.svd(L) # Transform all of the points (including the internal vertices) according to this orthogonal transformation. # The axes are now the axes of the Steiner circumscribed ellipsoid of the leaf vertices. # I am using M.T[:k].T to get the first k columns of M. Z = np.dot(X, Vt.T) print >> out, 'orthogonally transformed points (call this Z):' print >> out, Z print >> out Y = Z.T[:(nleaves-1)].T print >> out, 'projection of the points onto the axes of the leaf ellipsoid,' print >> out, '(these are the first columns of Z; call this projected matrix Y):' print >> out, Y print >> out # Show the inner products. inner_products_of_columns = np.dot(Y.T, Y) print >> out, "pairwise inner products of the columns of Y (that is, Y'Y)" print >> out, inner_products_of_columns print >> out # Show other inner products. inner_products_of_columns = np.dot(Y[:5].T, Y[:5]) print >> out, "pairwise inner products of the first few columns of Y" print >> out, inner_products_of_columns print >> out # Extract the subset of points that define the points of articulation. # Note that the origin is the centroid of the leaves. R = X[nleaves:] Y_leaves = Y[:nleaves] W = np.dot(np.linalg.pinv(L), Y_leaves) print >> out, 'leaf projection using pseudoinverse (first few rows of Y):' print >> out, np.dot(L, W) print >> out print >> out, 'projection of points of articulation using pseudoinverse (remaining rows of Y):' print >> out, np.dot(R, W) print >> out # Get all of the points in high dimensional space. X = Euclid.edm_to_points(D_full) # Get the MDS onto the lower dimensional space. X = X.T[:(nleaves-1)].T assert np.allclose(sum(X, 0), 0) print >> out, 'all points projected onto the first principal axes of the full ellipsoid:' print >> out, X print >> out # Look at only the leaves in this space. L = X[:nleaves] L -= np.mean(L, 0) print >> out, 'leaves projected onto the first principal axes of the full ellipsoid and then centered:' print >> out, L print >> out # Re-project the leaves onto the axes of leaf ellipsoid. D_leaves = Euclid.dccov_to_edm(np.dot(L, L.T)) Y = Euclid.edm_to_points(D_leaves) print >> out, 'leaves further projected onto principal axes of their own ellipsoid:' print >> out, Y print >> out # Try something else D_all = Euclid.dccov_to_edm(np.dot(X, X.T)) Y = Euclid.edm_to_points(D_all).T[:(nleaves-1)].T print >> out, 'all points further projected onto their own principal axes of inertia:' print >> out, Y print >> out # Try the same thing some more D_again = Euclid.dccov_to_edm(np.dot(Y, Y.T)) Z = Euclid.edm_to_points(D_again).T[:(nleaves-1)].T print >> out, 'all points further projected onto their own principal axes of inertia (second iteration):' print >> out, Z print >> out return out.getvalue().strip()
def process(ntaxa, nseconds): """ @param nseconds: allow this many seconds to run or None to run forever @return: a multi-line string that summarizes the results """ start_time = time.time() nsamples_rejected = 0 nsamples_accepted = 0 pattern_to_topo_surrogate = {} pattern_to_tree_string = {} counterexample_message = 'no counterexample was found' try: while True: elapsed_time = time.time() - start_time if nseconds and elapsed_time > nseconds: break # sample an xtree topology xtree = TreeSampler.sample_agglomerated_tree(ntaxa) # convert the xtree to a FelTree, although I guess this might not be necessary tree_string = xtree.get_newick_string() tree = NewickIO.parse(tree_string, FelTree.NewickTree) # get ordered ids and the number of leaves and some auxiliary variables ordered_ids = get_ordered_ids(tree) nleaves = len(list(tree.gen_tips())) id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids)) # force every branch length to be the unit length reset_branch_lengths(tree) # get the unweighted distance matrix among tips in convenient hashable form D_unit = np.array(tree.get_partial_distance_matrix(ordered_ids)) topo_surrogate = tuple(tuple(row.tolist()) for row in D_unit) # sample random branch lengths sample_branch_lengths(tree) # get the weighted tree string weighted_tree_string = NewickIO.get_newick_string(tree) # get the distance matrix relating the leaves D = np.array(tree.get_partial_distance_matrix(ordered_ids)) # get the projections onto the MDS axes of the leaves X = Euclid.edm_to_points(D) # if any coordinate is near zero then reject the sample if np.min(np.abs(X)) < g_epsilon: nsamples_rejected += 1 continue # do an orthogonal transformation that puts the first point in the positive orthant canonizing_vector = np.array(point_to_orthant(X[0])) X *= canonizing_vector # get the canonical sign pattern sign_pattern = tuple(point_to_orthant(row) for row in X) # compare the topo surrogate of this sign pattern to the one in memory expected_topo_surrogate = pattern_to_topo_surrogate.get( sign_pattern, None) if expected_topo_surrogate: if topo_surrogate != expected_topo_surrogate: remembered_tree_string = pattern_to_tree_string[ sign_pattern] msg = 'these trees have the same sign pattern but different topologies: {%s, %s}' % ( weighted_tree_string, remembered_tree_string) raise CounterexampleError(msg) else: pattern_to_topo_surrogate[sign_pattern] = topo_surrogate pattern_to_tree_string[sign_pattern] = weighted_tree_string # increment the count of accepted samples nsamples_accepted += 1 except KeyboardInterrupt, e: pass
def get_response_content(fs): # build the newick tree from the string tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree) nvertices = len(list(tree.preorder())) nleaves = len(list(tree.gen_tips())) ninternal = nvertices - nleaves # get ordered ids with the internal nodes first ordered_ids = get_ordered_ids(tree) leaf_ids = [id(node) for node in tree.gen_tips()] # get the distance matrix and the augmented distance matrix D_leaf = np.array(tree.get_partial_distance_matrix(leaf_ids)) D = np.array(tree.get_partial_distance_matrix(ordered_ids)) # analyze the leaf distance matrix X_leaf = Euclid.edm_to_points(D_leaf) w_leaf, v_leaf = EigUtil.eigh(Euclid.edm_to_dccov(D_leaf)) V_leaf = np.array(v_leaf).T # explicitly compute the limiting points as the number of dups increases X = Euclid.edm_to_points(D) X -= np.mean(X[-nleaves:], axis=0) XL = X[-nleaves:] U, s, Vt = np.linalg.svd(XL) Z = np.dot(X, Vt.T) # hack the Z matrix to show the leaf-related eigenvectors Z = Z.T[:nleaves - 1].T WY = Z / np.sqrt(w_leaf[:-1]) # compute a product using the first few rows of WY W = WY[:ninternal] M_alpha = get_alpha_multiplier(D, nleaves) MW_alpha = np.dot(M_alpha, W) # compute a product using the first few rows of WY M_beta = get_beta_multiplier(D, nleaves) MY_beta = np.dot(M_beta, V_leaf) # report the results np.set_printoptions(linewidth=300, threshold=10000) out = StringIO() print >> out, 'leaf distance matrix:' print >> out, D_leaf print >> out print >> out, 'eigenvalues derived from the leaf distance matrix' print >> out, w_leaf print >> out print >> out, 'corresponding eigenvectors (as columns)' print >> out, V_leaf print >> out print >> out, "candidates for [W' Y']':" print >> out, WY print >> out print >> out, 'candidates for W:' print >> out, W print >> out print >> out, 'left multiplier of W:' print >> out, M_alpha print >> out print >> out, 'each column is a (left multiplier, W) product:' print >> out, MW_alpha print >> out print >> out, 'left multiplier of Y:' print >> out, M_beta print >> out print >> out, 'each column is a (left multiplier, Y) product:' print >> out, MY_beta print >> out print >> out, 'the above matrix divided by 2*eigenvalue:' print >> out, MY_beta / (2 * np.array(w_leaf)) print >> out return out.getvalue()
def process(): """ @return: a multi-line string that summarizes the results """ np.set_printoptions(linewidth=200) out = StringIO() # define some distance matrices D_leaves = Euclid.g_D_b D_all = Euclid.g_D_c nvertices = 6 nleaves = 4 # define mass vectors m_degenerate = np.array([0.25, 0.25, 0.25, 0.25, 0, 0]) m_interesting = np.array([.2, .2, .2, .2, .1, .1]) m_uniform = np.ones(nvertices) / float(nvertices) # augment a distance matrix by adding leaflets D_augmented = add_leaflets(D_all, nleaves) # create the projection of points X_projected = do_projection(D_all, nleaves) # show some of the distance matrices print >> out, 'pairwise distances among vertices in the original tree:' print >> out, D_all print >> out, 'pairwise distance matrix augmented with one leaflet per leaf:' print >> out, D_augmented # get the distance matrices corresponding to the cases in the docstring print >> out, 'case 1: embedding of all vertices:' print >> out, Euclid.edm_to_points(D_all) print >> out, 'case 2: embedding of leaves and leaflets from the leaflet-augmented distance matrix:' print >> out, Euclid.edm_to_points(D_augmented) print >> out, 'case 3: projection of all vertices onto the MDS space of the leaves:' print >> out, X_projected # another embedding print >> out, 'embedding of leaves from the leaf distance matrix:' print >> out, Euclid.edm_to_points(D_leaves) # show embeddings of a tree augmented with leaflets print >> out, 'first few coordinates of the original vertices of the embedded tree with lots of leaflets per leaf:' D_super_augmented = D_all.copy() for i in range(20): D_super_augmented = add_leaflets(D_super_augmented, nleaves) X_super = Euclid.edm_to_points(D_super_augmented) X_super_block_small = X_super[:6].T[:3].T print >> out, X_super_block_small print >> out, 'ratio of coordinates of projected points to coordinates of this block of the embedding of the augmented tree:' print >> out, X_projected / X_super_block_small # test Z = Euclid.edm_to_weighted_points(D_all, m_uniform) print >> out, 'generalized case 1:' print >> out, Z # test Z = Euclid.edm_to_weighted_points(D_all, m_interesting) print >> out, 'generalized case 2:' print >> out, Z # test Z = Euclid.edm_to_weighted_points(D_all, m_degenerate) print >> out, 'generalized case 3:' print >> out, Z # test Z = get_weighted_embedding_b(D_all, m_uniform) print >> out, 'eric formula case 1:' print >> out, Z # test Z = get_weighted_embedding_b(D_all, m_interesting) print >> out, 'eric formula case 2:' print >> out, Z # test Z = get_weighted_embedding_b(D_all, m_degenerate) print >> out, 'eric formula case 3:' print >> out, Z # test stuff print >> out, 'testing random stuff:' D = D_all m = m_degenerate nvertices = len(m) sqrtm = np.sqrt(m) M = np.diag(sqrtm) cross_product_matrix = Euclid.edm_to_weighted_cross_product(D, m) U_cross, S_cross, VT_cross = np.linalg.svd(cross_product_matrix, full_matrices=False) Q = np.dot(M, np.dot(cross_product_matrix, M.T)) U, B, VT = np.linalg.svd(Q, full_matrices=False) S = np.sqrt(np.diag(B)) US = np.dot(U, S) M_pinv = np.linalg.pinv(M) M_pinv_narrow = M_pinv.T[:-2].T US_short = US[:-2] print >> out, 'eigenvalues of the abdi cross product:', S_cross print >> out, 'eigenvalues of the eric cross product:', B print >> out, M_pinv print >> out, US print >> out, M_pinv_narrow print >> out, US_short Z = np.dot(M_pinv_narrow, US_short) print >> out, Z # return the response return out.getvalue().strip()