def get_response_content(fs): # check input compatibility if fs.nvertices < fs.naxes+1: msg_a = 'attempting to plot too many eigenvectors ' msg_b = 'for the given number of vertices' raise ValueError(msg_a + msg_b) # define the requested physical size of the images (in pixels) physical_size = (640, 480) # get the points L = create_laplacian_matrix(fs.nvertices) D = Euclid.laplacian_to_edm(L) HSH = Euclid.edm_to_dccov(D) W, VT = np.linalg.eigh(HSH) V = VT.T.tolist() if fs.eigenvalue_scaling: vectors = [np.array(v)*w for w, v in list(reversed(sorted(zip(np.sqrt(W), V))))[:-1]] else: vectors = [np.array(v) for w, v in list(reversed(sorted(zip(np.sqrt(W), V))))[:-1]] X = np.array(zip(*vectors)) # transform the points to eigenfunctions such that the first point is positive F = X.T[:fs.naxes] for i in range(fs.naxes): if F[i][0] < 0: F[i] *= -1 # draw the image try: ext = Form.g_imageformat_to_ext[fs.imageformat] return create_image_string(ext, physical_size, F, fs.xaxis_length) except CairoUtil.CairoUtilError as e: raise HandlingError(e)
def process(tree_string): """ @param tree_string: a newick string @return: a multi-line string that summarizes the results """ np.set_printoptions(linewidth=200) out = StringIO() # build the newick tree from the string tree = NewickIO.parse(tree_string, FelTree.NewickTree) # get ordered names and ids ordered_ids, ordered_names = get_ordered_ids_and_names(tree) # get the distance matrix with ordered indices including all nodes in the tree nvertices = len(list(tree.preorder())) nleaves = len(list(tree.gen_tips())) id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids)) D = np.array(tree.get_partial_distance_matrix(ordered_ids)) # define mass vectors m_uniform_unscaled = [1] * nvertices m_degenerate_unscaled = [1] * nleaves + [0] * (nvertices - nleaves) m_uniform = np.array(m_uniform_unscaled, dtype=float) / sum(m_uniform_unscaled) m_degenerate = np.array(m_degenerate_unscaled, dtype=float) / sum(m_degenerate_unscaled) # show some of the distance matrices print >> out, 'ordered names:' print >> out, ordered_names print >> out print >> out, 'embedded points with mass uniformly distributed among all vertices:' print >> out, Euclid.edm_to_weighted_points(D, m_uniform) print >> out print >> out, 'embedded points with mass uniformly distributed among the leaves:' print >> out, Euclid.edm_to_weighted_points(D, m_degenerate) print >> out # return the response return out.getvalue().strip()
def process(): """ @return: a multi-line string that summarizes the results """ np.set_printoptions(linewidth=200) out = StringIO() # define a degenerate mass vector m_degenerate = np.array([0.25, 0.25, 0.25, 0.25, 0, 0]) # define some distance matrices D_leaves = Euclid.g_D_b D_all = Euclid.g_D_c nvertices = 6 nleaves = 4 # get the projection and the weighted multidimensional scaling X = Euclid.edm_to_points(D_all) Y = Euclid.edm_to_weighted_points(D_all, m_degenerate) D_X = np.array([[np.dot(pb - pa, pb - pa) for pa in X] for pb in X]) D_Y = np.array([[np.dot(pb - pa, pb - pa) for pa in Y] for pb in Y]) # get the embedding using only the leaves print >> out, 'embedding of leaves from the leaf distance matrix:' print >> out, Euclid.edm_to_points(D_leaves) print >> out, 'projection of all vertices onto the MDS space of the leaves:' print >> out, do_projection(D_all, nleaves) print >> out, 'embedding of all vertices using uniform weights:' print >> out, X print >> out, 'corresponding distance matrix:' print >> out, D_X print >> out, 'embedding of all vertices using degenerate weights:' print >> out, Y print >> out, 'corresponding distance matrix:' print >> out, D_Y return out.getvalue().strip()
def process(tree_string): """ @param tree_string: a newick string @return: a multi-line string that summarizes the results """ np.set_printoptions(linewidth=200) out = StringIO() # build the newick tree from the string tree = NewickIO.parse(tree_string, FelTree.NewickTree) # get ordered names and ids ordered_ids, ordered_names = get_ordered_ids_and_names(tree) # get the distance matrix with ordered indices including all nodes in the tree nvertices = len(list(tree.preorder())) nleaves = len(list(tree.gen_tips())) id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids)) D = np.array(tree.get_partial_distance_matrix(ordered_ids)) # define mass vectors m_uniform_unscaled = [1]*nvertices m_degenerate_unscaled = [1]*nleaves + [0]*(nvertices-nleaves) m_uniform = np.array(m_uniform_unscaled, dtype=float) / sum(m_uniform_unscaled) m_degenerate = np.array(m_degenerate_unscaled, dtype=float) / sum(m_degenerate_unscaled) # show some of the distance matrices print >> out, 'ordered names:' print >> out, ordered_names print >> out print >> out, 'embedded points with mass uniformly distributed among all vertices:' print >> out, Euclid.edm_to_weighted_points(D, m_uniform) print >> out print >> out, 'embedded points with mass uniformly distributed among the leaves:' print >> out, Euclid.edm_to_weighted_points(D, m_degenerate) print >> out # return the response return out.getvalue().strip()
def process(): """ @return: a multi-line string that summarizes the results """ np.set_printoptions(linewidth=200) out = StringIO() # define a degenerate mass vector m_degenerate = np.array([0.25, 0.25, 0.25, 0.25, 0, 0]) # define some distance matrices D_leaves = Euclid.g_D_b D_all = Euclid.g_D_c nvertices = 6 nleaves = 4 # get the projection and the weighted multidimensional scaling X = Euclid.edm_to_points(D_all) Y = Euclid.edm_to_weighted_points(D_all, m_degenerate) D_X = np.array([[np.dot(pb-pa, pb-pa) for pa in X] for pb in X]) D_Y = np.array([[np.dot(pb-pa, pb-pa) for pa in Y] for pb in Y]) # get the embedding using only the leaves print >> out, 'embedding of leaves from the leaf distance matrix:' print >> out, Euclid.edm_to_points(D_leaves) print >> out, 'projection of all vertices onto the MDS space of the leaves:' print >> out, do_projection(D_all, nleaves) print >> out, 'embedding of all vertices using uniform weights:' print >> out, X print >> out, 'corresponding distance matrix:' print >> out, D_X print >> out, 'embedding of all vertices using degenerate weights:' print >> out, Y print >> out, 'corresponding distance matrix:' print >> out, D_Y return out.getvalue().strip()
def get_response_content(fs): # build the newick tree from the string tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree) nvertices = len(list(tree.preorder())) nleaves = len(list(tree.gen_tips())) ninternal = nvertices - nleaves # get ordered ids with the internal nodes first ordered_ids = get_ordered_ids(tree) leaf_ids = [id(node) for node in tree.gen_tips()] # get the distance matrix and the augmented distance matrix D_leaf = np.array(tree.get_partial_distance_matrix(leaf_ids)) D = np.array(tree.get_partial_distance_matrix(ordered_ids)) D_aug = get_augmented_distance(D, nleaves, fs.ndups) # analyze the leaf distance matrix X_leaf = Euclid.edm_to_points(D_leaf) # get the eigendecomposition of the centered augmented distance matrix X_aug = Euclid.edm_to_points(D_aug, nvertices-1) # explicitly compute the points for the given number of dups using weights m = [1]*ninternal + [1+fs.ndups]*nleaves m = np.array(m, dtype=float) / sum(m) X_weighted = Euclid.edm_to_weighted_points(D, m) # explicitly compute the points for 10x dups m = [1]*ninternal + [1+fs.ndups*10]*nleaves m = np.array(m, dtype=float) / sum(m) X_weighted_10x = Euclid.edm_to_weighted_points(D, m) # explicitly compute the limiting points as the number of dups increases X = Euclid.edm_to_points(D) X -= np.mean(X[-nleaves:], axis=0) XL = X[-nleaves:] U, s, Vt = np.linalg.svd(XL) Z = np.dot(X, Vt.T) # report the results np.set_printoptions(linewidth=300, threshold=10000) out = StringIO() print >> out, 'leaf distance matrix:' print >> out, D_leaf print >> out print >> out, 'points derived from the leaf distance matrix' print >> out, '(the first column is proportional to the Fiedler vector):' print >> out, X_leaf print >> out if fs.show_aug: print >> out, 'augmented distance matrix:' print >> out, D_aug print >> out print >> out, 'points derived from the augmented distance matrix' print >> out, '(the first column is proportional to the Fiedler vector):' print >> out, get_ugly_matrix(X_aug, ninternal, nleaves) print >> out print >> out, 'points computed using masses:' print >> out, X_weighted print >> out print >> out, 'points computed using masses with 10x dups:' print >> out, X_weighted_10x print >> out print >> out, 'limiting points:' print >> out, Z print >> out return out.getvalue()
def get_response_content(fs): # build the newick tree from the string tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree) nvertices = len(list(tree.preorder())) nleaves = len(list(tree.gen_tips())) ninternal = nvertices - nleaves # get ordered ids with the internal nodes first ordered_ids = get_ordered_ids(tree) leaf_ids = [id(node) for node in tree.gen_tips()] # get the distance matrix and the augmented distance matrix D_leaf = np.array(tree.get_partial_distance_matrix(leaf_ids)) D = np.array(tree.get_partial_distance_matrix(ordered_ids)) D_aug = get_augmented_distance(D, nleaves, fs.ndups) # analyze the leaf distance matrix X_leaf = Euclid.edm_to_points(D_leaf) # get the eigendecomposition of the centered augmented distance matrix X_aug = Euclid.edm_to_points(D_aug, nvertices - 1) # explicitly compute the points for the given number of dups using weights m = [1] * ninternal + [1 + fs.ndups] * nleaves m = np.array(m, dtype=float) / sum(m) X_weighted = Euclid.edm_to_weighted_points(D, m) # explicitly compute the points for 10x dups m = [1] * ninternal + [1 + fs.ndups * 10] * nleaves m = np.array(m, dtype=float) / sum(m) X_weighted_10x = Euclid.edm_to_weighted_points(D, m) # explicitly compute the limiting points as the number of dups increases X = Euclid.edm_to_points(D) X -= np.mean(X[-nleaves:], axis=0) XL = X[-nleaves:] U, s, Vt = np.linalg.svd(XL) Z = np.dot(X, Vt.T) # report the results np.set_printoptions(linewidth=300, threshold=10000) out = StringIO() print >> out, 'leaf distance matrix:' print >> out, D_leaf print >> out print >> out, 'points derived from the leaf distance matrix' print >> out, '(the first column is proportional to the Fiedler vector):' print >> out, X_leaf print >> out if fs.show_aug: print >> out, 'augmented distance matrix:' print >> out, D_aug print >> out print >> out, 'points derived from the augmented distance matrix' print >> out, '(the first column is proportional to the Fiedler vector):' print >> out, get_ugly_matrix(X_aug, ninternal, nleaves) print >> out print >> out, 'points computed using masses:' print >> out, X_weighted print >> out print >> out, 'points computed using masses with 10x dups:' print >> out, X_weighted_10x print >> out print >> out, 'limiting points:' print >> out, Z print >> out return out.getvalue()
def get_response_content(fs): locations = get_locations() np_locs = [np.array(p) for p in locations] edges = get_edges() npoints = len(locations) # start writing the response np.set_printoptions(linewidth=200) out = StringIO() # print the layout data print >> out, 'POINTS' for i, (x, y) in enumerate(locations): print >> out, i, x, y print >> out, 'EDGES' for i, j in edges: print >> out, i, j print >> out # show the unweighted adjacency matrix UA = np.zeros((npoints, npoints)) for i, j in edges: UA[i, j] = 1 UA[j, i] = 1 print >> out, 'unweighted adjacency matrix:' print >> out, UA print >> out # show the unweighted laplacian matrix UL = Euclid.adjacency_to_laplacian(UA) print >> out, 'unweighted laplacian matrix:' print >> out, UL print >> out # show the weighted adjacency matrix WA = np.zeros((npoints, npoints)) for i, j in edges: d = np.linalg.norm(np_locs[i] - np_locs[j]) / math.sqrt(2.0) w = 1.0 / d WA[i, j] = w WA[j, i] = w print >> out, 'weighted adjacency matrix:' print >> out, WA print >> out # show the weighted laplacian matrix WL = Euclid.adjacency_to_laplacian(WA) print >> out, 'weighted laplacian matrix:' print >> out, WL print >> out # remove the two internal nodes by schur complementation ntips = 4 schur_L = SchurAlgebra.schur_helper(WL, 2) X = Euclid.dccov_to_points(np.linalg.pinv(schur_L)) print >> out, 'schur graph layout:' print >> out, 'POINTS' for i, v in enumerate(X): print >> out, i, v[0], v[1] print >> out, 'EDGES' for i in range(ntips): for j in range(i+1, ntips): print >> out, i, j # return the response return out.getvalue()
def get_response_content(fs): locations = get_locations() np_locs = [np.array(p) for p in locations] edges = get_edges() npoints = len(locations) # start writing the response np.set_printoptions(linewidth=200) out = StringIO() # print the layout data print >> out, 'POINTS' for i, (x, y) in enumerate(locations): print >> out, i, x, y print >> out, 'EDGES' for i, j in edges: print >> out, i, j print >> out # show the unweighted adjacency matrix UA = np.zeros((npoints, npoints)) for i, j in edges: UA[i, j] = 1 UA[j, i] = 1 print >> out, 'unweighted adjacency matrix:' print >> out, UA print >> out # show the unweighted laplacian matrix UL = Euclid.adjacency_to_laplacian(UA) print >> out, 'unweighted laplacian matrix:' print >> out, UL print >> out # show the weighted adjacency matrix WA = np.zeros((npoints, npoints)) for i, j in edges: d = np.linalg.norm(np_locs[i] - np_locs[j]) / math.sqrt(2.0) w = 1.0 / d WA[i, j] = w WA[j, i] = w print >> out, 'weighted adjacency matrix:' print >> out, WA print >> out # show the weighted laplacian matrix WL = Euclid.adjacency_to_laplacian(WA) print >> out, 'weighted laplacian matrix:' print >> out, WL print >> out # remove the two internal nodes by schur complementation ntips = 4 schur_L = SchurAlgebra.schur_helper(WL, 2) X = Euclid.dccov_to_points(np.linalg.pinv(schur_L)) print >> out, 'schur graph layout:' print >> out, 'POINTS' for i, v in enumerate(X): print >> out, i, v[0], v[1] print >> out, 'EDGES' for i in range(ntips): for j in range(i + 1, ntips): print >> out, i, j # return the response return out.getvalue()
def get_response_content(fs): # get the tree tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree) # get information about the tree topology internal = [id(node) for node in tree.gen_internal_nodes()] tips = [id(node) for node in tree.gen_tips()] vertices = internal + tips ntips = len(tips) ninternal = len(internal) nvertices = len(vertices) # get the ordered ids with the leaves first ordered_ids = vertices # get the full weighted adjacency matrix A = np.array(tree.get_affinity_matrix(ordered_ids)) # compute the weighted adjacency matrix of the decorated tree p = ninternal q = ntips N = fs.N if fs.weight_n: weight = float(N) elif fs.weight_sqrt_n: weight = math.sqrt(N) A_aug = get_A_aug(A, weight, p, q, N) # compute the weighted Laplacian matrix of the decorated tree L_aug = Euclid.adjacency_to_laplacian(A_aug) # compute the eigendecomposition w, vt = np.linalg.eigh(L_aug) # show the output np.set_printoptions(linewidth=1000, threshold=10000) out = StringIO() if fs.lap: print >> out, 'Laplacian of the decorated tree:' print >> out, L_aug print >> out if fs.eigvals: print >> out, 'eigenvalues:' for x in w: print >> out, x print >> out if fs.eigvecs: print >> out, 'eigenvector matrix:' print >> out, vt print >> out if fs.compare: # get the distance matrix for only the original tips D_tips = np.array(tree.get_partial_distance_matrix(tips)) X_tips = Euclid.edm_to_points(D_tips) # wring the approximate points out of the augmented tree X_approx = vt[p:p+q].T[1:1+q-1].T / np.sqrt(w[1:1+q-1]) # do the comparison print >> out, 'points from tip-only MDS:' print >> out, X_tips print >> out print >> out, 'approximate points from decorated tree:' print >> out, X_approx print >> out return out.getvalue()
def produce_CRT(): (p,q) = produce_p_q() n = p*q Euler = (p-1)*(q-1) #欧拉函数 d = Euclid.extended_Euclid(e,Euler)#求出e模Euler的逆元d,e*d=1mod(Euler) dP = Euclid.extended_Euclid(e,p-1) dQ = Euclid.extended_Euclid(e,q-1) qInv = Euclid.extended_Euclid(q,p) return (p,q,n,d,dP,dQ,qInv)
def get_response_content(fs): # build the newick tree from the string tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree) nvertices = len(list(tree.preorder())) nleaves = len(list(tree.gen_tips())) # get ordered ids with the leaves first ordered_ids = get_ordered_ids(tree) # get the adjacency matrix and the augmented adjacency matrix A = np.array(tree.get_affinity_matrix(ordered_ids)) A_aug = get_augmented_adjacency(A, nleaves, fs.ndups, fs.strength) # get the laplacian matrices L = Euclid.adjacency_to_laplacian(A) L_aug = Euclid.adjacency_to_laplacian(A_aug) # get the schur complement R = SchurAlgebra.mschur(L, set(range(nleaves, nvertices))) R_pinv = np.linalg.pinv(R) vals, vecs = EigUtil.eigh(R_pinv) # get the scaled Fiedler vector for the Schur complement w, v = EigUtil.principal_eigh(R_pinv) fiedler = v * math.sqrt(w) # get the eigendecomposition of the augmented Laplacian L_aug_pinv = np.linalg.pinv(L_aug) vals_aug, vecs_aug = EigUtil.eigh(L_aug_pinv) # get the scaled Fiedler vector for the augmented Laplacian w_aug, v_aug = EigUtil.principal_eigh(L_aug_pinv) fiedler_aug = v_aug * math.sqrt(w_aug) # report the results np.set_printoptions(linewidth=300) out = StringIO() print >> out, 'Laplacian matrix:' print >> out, L print >> out print >> out, 'Schur complement of Laplacian matrix:' print >> out, R print >> out print >> out, 'scaled Fiedler vector of Schur complement:' print >> out, fiedler print >> out print >> out, 'eigenvalues of pinv of Schur complement:' print >> out, vals print >> out print >> out, 'corresponding eigenvectors of pinv of Schur complement:' print >> out, np.array(vecs).T print >> out print >> out print >> out, 'augmented Laplacian matrix:' print >> out, L_aug print >> out print >> out, 'scaled Fiedler vector of augmented Laplacian:' print >> out, fiedler_aug print >> out print >> out, 'eigenvalues of pinv of augmented Laplacian:' print >> out, vals_aug print >> out print >> out, 'rows are eigenvectors of pinv of augmented Laplacian:' print >> out, np.array(vecs_aug) return out.getvalue()
def get_splits(initial_distance_matrix, split_function, update_function, on_label_split=None): """ This is the most external of the functions in this module. Get the set of splits implied by the tree that would be reconstructed. @param initial_distance_matrix: a distance matrix @param split_function: takes a distance matrix and returns an index split @param update_function: takes a distance matrix and an index subset and returns a distance matrix @param on_label_split: notifies the caller of the label split induced by an index split @return: a set of splits """ n = len(initial_distance_matrix) # keep a stack of (label_set_per_vertex, distance_matrix) pairs initial_state = ([set([i]) for i in range(n)], initial_distance_matrix) stack = [initial_state] # process the stack in a depth first manner, building the split set label_split_set = set() while stack: label_sets, D = stack.pop() # if the matrix is small then we are done if len(D) < 4: continue # split the indices using the specified function try: index_split = split_function(D) # convert the index split to a label split label_split = index_split_to_label_split(index_split, label_sets) # notify the caller if a callback is requested if on_label_split: on_label_split(label_split) # add the split to the master set of label splits label_split_set.add(label_split) # for large matrices create the new label sets and the new conformant distance matrices a, b = index_split for index_selection, index_complement in ((a, b), (b, a)): if len(index_complement) > 2: next_label_sets = SchurAlgebra.vmerge( label_sets, index_selection) next_D = update_function(D, index_selection) next_state = (next_label_sets, next_D) stack.append(next_state) except DegenerateSplitException, e: # we cannot recover from a degenerate split unless there are more than four indices if len(D) <= 4: continue # with more than four indices we can fall back to partial splits index_set = set([e.index]) # get the next label sets next_label_sets = SchurAlgebra.vdelete(label_sets, index_set) # get the next conformant distance matrix by schur complementing out the offending index L = Euclid.edm_to_laplacian(D) L_small = SchurAlgebra.mschur(L, index_set) next_D = Euclid.laplacian_to_edm(L_small) next_state = (next_label_sets, next_D) stack.append(next_state)
def process(): """ @return: a multi-line string that summarizes the results """ np.set_printoptions(linewidth=200) # define the adjacency matrix A = g_A n = 6 # define some mass distributions m_uniform = np.ones(n) / float(n) m_weighted = np.array([102, 102, 102, 102, 1, 1], dtype=float) / 410 # make the response out = StringIO() # look at the eigendecomposition of -(1/2)HDH where D is the leaf distance matrix HSH = Euclid.edm_to_dccov(Euclid.g_D_b) W_HSH, VT_HSH = np.linalg.eigh(HSH) print >> out, 'W for -(1/2)HDH of the leaf distance matrix:' print >> out, W_HSH print >> out, 'VT for -(1/2)HDH of the leaf distance matrix:' print >> out, VT_HSH # look at the eigendecomposition of S given a degenerate mass distribution on the full tree m_degenerate = np.array([.25, .25, .25, .25, 0, 0]) S = Euclid.edm_to_weighted_cross_product(Euclid.g_D_c, m_degenerate) W_S, VT_S = np.linalg.eigh(S) print >> out, 'W for -(1/2)(Xi)D(Xi)^T of the full distance matrix with degenerate masses:' print >> out, W_S print >> out, 'VT for -(1/2)(Xi)D(Xi)^T of the full distance matrix with degenerate masses:' print >> out, VT_S # look at the effects of various mass distributions on the MDS of the full tree for m in (m_uniform, m_weighted): # the mass distribution should sum to 1 if not np.allclose(np.sum(m), 1): raise ValueError('masses should sum to 1') # to compute the perturbed laplacian matrix first get weighted sums v = np.dot(m, A) # now divide elementwise by the masses v /= m # subtract the adjacency matrix from the diagonal formed by elements of this vector Lp = np.diag(v) - A # now get the eigendecomposition of the pseudoinverse of the perturbed laplacian W_Lp_pinv, VT_Lp_pinv = np.linalg.eigh(np.linalg.pinv(Lp)) # look at the eigendecomposition of the S matrix associated with the distance matrix of this tree D = Euclid.g_D_c S = Euclid.edm_to_weighted_cross_product(D, m) W_S, VT_S = np.linalg.eigh(S) print >> out, 'perturbed laplacian:' print >> out, Lp print >> out, 'm:', m print >> out, 'W for the pseudoinverse of the perturbed laplacian:' print >> out, W_Lp_pinv print >> out, 'VT for the pseudoinverse of the perturbed laplacian:' print >> out, VT_Lp_pinv print >> out, 'W for the cross product matrix:' print >> out, W_S print >> out, 'VT for the cross product matrix:' print >> out, VT_S return out.getvalue().strip()
def get_response_content(fs): # get the tree tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree) # get information about the tree topology internal = [id(node) for node in tree.gen_internal_nodes()] tips = [id(node) for node in tree.gen_tips()] vertices = internal + tips ntips = len(tips) ninternal = len(internal) nvertices = len(vertices) # get the ordered ids with the leaves first ordered_ids = vertices # get the full weighted adjacency matrix A = np.array(tree.get_affinity_matrix(ordered_ids)) # compute the weighted adjacency matrix of the decorated tree p = ninternal q = ntips N = fs.N if fs.weight_n: weight = float(N) elif fs.weight_sqrt_n: weight = math.sqrt(N) A_aug = get_A_aug(A, weight, p, q, N) # compute the weighted Laplacian matrix of the decorated tree L_aug = Euclid.adjacency_to_laplacian(A_aug) # compute the eigendecomposition w, vt = np.linalg.eigh(L_aug) # show the output np.set_printoptions(linewidth=1000, threshold=10000) out = StringIO() if fs.lap: print >> out, 'Laplacian of the decorated tree:' print >> out, L_aug print >> out if fs.eigvals: print >> out, 'eigenvalues:' for x in w: print >> out, x print >> out if fs.eigvecs: print >> out, 'eigenvector matrix:' print >> out, vt print >> out if fs.compare: # get the distance matrix for only the original tips D_tips = np.array(tree.get_partial_distance_matrix(tips)) X_tips = Euclid.edm_to_points(D_tips) # wring the approximate points out of the augmented tree X_approx = vt[p:p + q].T[1:1 + q - 1].T / np.sqrt(w[1:1 + q - 1]) # do the comparison print >> out, 'points from tip-only MDS:' print >> out, X_tips print >> out print >> out, 'approximate points from decorated tree:' print >> out, X_approx print >> out return out.getvalue()
def get_response_content(fs): # read the lat-lon points from the input lines = Util.get_stripped_lines(fs.datalines.splitlines()) rows = parse_lines(lines) latlon_points = [] city_names = [] for city, latd, latm, lond, lonm in rows: lat = math.radians(GPS.degrees_minutes_to_degrees(latd, latm)) lon = math.radians(GPS.degrees_minutes_to_degrees(lond, lonm)) latlon_points.append((lat, lon)) city_names.append(city) npoints = len(latlon_points) # start writing the response np.set_printoptions(linewidth=200) out = StringIO() radius = GPS.g_earth_radius_miles for dfunc, name in ( (GPS.get_arc_distance, 'great arc'), (GPS.get_euclidean_distance, 'euclidean')): # define the edm whose elements are squared euclidean-like distances edm = np.zeros((npoints, npoints)) D = np.zeros((npoints, npoints)) for i, pointa in enumerate(latlon_points): for j, pointb in enumerate(latlon_points): D[i, j] = dfunc(pointa, pointb, radius) edm[i, j] = D[i, j]**2 print >> out, name, 'distances:' print >> out, D print >> out print >> out, name, 'EDM:' print >> out, edm print >> out G = Euclid.edm_to_dccov(edm) print >> out, name, 'Gower centered matrix:' print >> out, G print >> out spectrum = np.array(list(reversed(sorted(np.linalg.eigvals(G))))) print >> out, name, 'spectrum of Gower centered matrix:' for x in spectrum: print >> out, x print >> out print >> out, name, 'rounded spectrum:' for x in spectrum: print >> out, '%.1f' % x print >> out mds_points = Euclid.edm_to_points(edm) print >> out, '2D MDS coordinates:' for name, mds_point in zip(city_names, mds_points): x = mds_point[0] y = mds_point[1] print >> out, '\t'.join(str(x) for x in [name, x, y]) print >> out # break between distance methods print >> out # return the response return out.getvalue()
def get_response_content(fs): # build the newick tree from the string tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree) nvertices = len(list(tree.preorder())) nleaves = len(list(tree.gen_tips())) # get ordered ids with the leaves first ordered_ids = get_ordered_ids(tree) # get the distance matrix and the augmented distance matrix D = np.array(tree.get_partial_distance_matrix(ordered_ids)) D_aug = get_augmented_distance(D, nleaves, fs.ndups) # get the laplacian matrix L = Euclid.edm_to_laplacian(D) # get the schur complement R = SchurAlgebra.mschur(L, set(range(nleaves, nvertices))) R_pinv = np.linalg.pinv(R) vals, vecs = EigUtil.eigh(R_pinv) # get the scaled Fiedler vector for the Schur complement w, v = EigUtil.principal_eigh(R_pinv) fiedler = v * math.sqrt(w) # get the eigendecomposition of the centered augmented distance matrix L_aug_pinv = Euclid.edm_to_dccov(D_aug) vals_aug, vecs_aug = EigUtil.eigh(L_aug_pinv) # get the scaled Fiedler vector for the augmented Laplacian w_aug, v_aug = EigUtil.principal_eigh(L_aug_pinv) fiedler_aug = v_aug * math.sqrt(w_aug) # report the results np.set_printoptions(linewidth=300, threshold=10000) out = StringIO() print >> out, "Laplacian matrix:" print >> out, L print >> out print >> out, "Schur complement of Laplacian matrix:" print >> out, R print >> out print >> out, "scaled Fiedler vector of Schur complement:" print >> out, fiedler print >> out print >> out, "eigenvalues of pinv of Schur complement:" print >> out, vals print >> out print >> out, "corresponding eigenvectors of pinv of Schur complement:" print >> out, np.array(vecs).T print >> out print >> out print >> out, "augmented distance matrix:" print >> out, D_aug print >> out print >> out, "scaled Fiedler vector of augmented Laplacian limit:" print >> out, fiedler_aug print >> out print >> out, "eigenvalues of pinv of augmented Laplacian limit:" print >> out, vals_aug print >> out print >> out, "rows are eigenvectors of pinv of augmented Laplacian limit:" print >> out, np.array(vecs_aug) return out.getvalue()
def produce_e_d(): """产生(e,d)""" e = 3 while True: d = Euclid.extended_Euclid(e,m)#e*d=1modm if Euclid.gcd(m,e) == 1 and d > 0: break else: e += 2 return (e,d)
def get_response_content(fs): # read the lat-lon points from the input lines = Util.get_stripped_lines(fs.datalines.splitlines()) rows = parse_lines(lines) latlon_points = [] city_names = [] for city, latd, latm, lond, lonm in rows: lat = math.radians(GPS.degrees_minutes_to_degrees(latd, latm)) lon = math.radians(GPS.degrees_minutes_to_degrees(lond, lonm)) latlon_points.append((lat, lon)) city_names.append(city) npoints = len(latlon_points) # start writing the response np.set_printoptions(linewidth=200) out = StringIO() radius = GPS.g_earth_radius_miles for dfunc, name in ((GPS.get_arc_distance, 'great arc'), (GPS.get_euclidean_distance, 'euclidean')): # define the edm whose elements are squared euclidean-like distances edm = np.zeros((npoints, npoints)) D = np.zeros((npoints, npoints)) for i, pointa in enumerate(latlon_points): for j, pointb in enumerate(latlon_points): D[i, j] = dfunc(pointa, pointb, radius) edm[i, j] = D[i, j]**2 print >> out, name, 'distances:' print >> out, D print >> out print >> out, name, 'EDM:' print >> out, edm print >> out G = Euclid.edm_to_dccov(edm) print >> out, name, 'Gower centered matrix:' print >> out, G print >> out spectrum = np.array(list(reversed(sorted(np.linalg.eigvals(G))))) print >> out, name, 'spectrum of Gower centered matrix:' for x in spectrum: print >> out, x print >> out print >> out, name, 'rounded spectrum:' for x in spectrum: print >> out, '%.1f' % x print >> out mds_points = Euclid.edm_to_points(edm) print >> out, '2D MDS coordinates:' for name, mds_point in zip(city_names, mds_points): x = mds_point[0] y = mds_point[1] print >> out, '\t'.join(str(x) for x in [name, x, y]) print >> out # break between distance methods print >> out # return the response return out.getvalue()
def produce_e_d(): """产生(e, d)""" # Generate a number e so that gcd(e, m) = 1, start with e = 3 e = 3 while True: d = Euclid.extended_Euclid(e, m) if Euclid.gcd(m, e) == 1 and d > 0: break else: e += 2 return (e, d)
def update_using_laplacian(D, index_set): """ Update the distance matrix by summing rows and columns of the removed indices. @param D: the distance matrix @param index_set: the set of indices that will be removed from the updated distance matrix @return: an updated distance matrix """ L = Euclid.edm_to_laplacian(D) L_small = SchurAlgebra.mmerge(L, index_set) D_small = Euclid.laplacian_to_edm(L_small) return D_small
def get_splits(initial_distance_matrix, split_function, update_function, on_label_split=None): """ This is the most external of the functions in this module. Get the set of splits implied by the tree that would be reconstructed. @param initial_distance_matrix: a distance matrix @param split_function: takes a distance matrix and returns an index split @param update_function: takes a distance matrix and an index subset and returns a distance matrix @param on_label_split: notifies the caller of the label split induced by an index split @return: a set of splits """ n = len(initial_distance_matrix) # keep a stack of (label_set_per_vertex, distance_matrix) pairs initial_state = ([set([i]) for i in range(n)], initial_distance_matrix) stack = [initial_state] # process the stack in a depth first manner, building the split set label_split_set = set() while stack: label_sets, D = stack.pop() # if the matrix is small then we are done if len(D) < 4: continue # split the indices using the specified function try: index_split = split_function(D) # convert the index split to a label split label_split = index_split_to_label_split(index_split, label_sets) # notify the caller if a callback is requested if on_label_split: on_label_split(label_split) # add the split to the master set of label splits label_split_set.add(label_split) # for large matrices create the new label sets and the new conformant distance matrices a, b = index_split for index_selection, index_complement in ((a, b), (b, a)): if len(index_complement) > 2: next_label_sets = SchurAlgebra.vmerge(label_sets, index_selection) next_D = update_function(D, index_selection) next_state = (next_label_sets, next_D) stack.append(next_state) except DegenerateSplitException, e: # we cannot recover from a degenerate split unless there are more than four indices if len(D) <= 4: continue # with more than four indices we can fall back to partial splits index_set = set([e.index]) # get the next label sets next_label_sets = SchurAlgebra.vdelete(label_sets, index_set) # get the next conformant distance matrix by schur complementing out the offending index L = Euclid.edm_to_laplacian(D) L_small = SchurAlgebra.mschur(L, index_set) next_D = Euclid.laplacian_to_edm(L_small) next_state = (next_label_sets, next_D) stack.append(next_state)
def _do_analysis(self, use_generalized_nj): """ Do some splits of the tree. @param use_generalized_nj: True if we use an old method of outgrouping """ # define the distance matrix D = np.array(self.pruned_tree.get_distance_matrix(self.pruned_names)) # get the primary split of the criterion matrix L = Euclid.edm_to_laplacian(D) v = BuildTreeTopology.laplacian_to_fiedler(L) eigensplit = BuildTreeTopology.eigenvector_to_split(v) # assert that the first split cleanly separates the bacteria from the rest left_indices, right_indices = eigensplit left_domains = self._get_domains([self.pruned_names[x] for x in left_indices]) right_domains = self._get_domains([self.pruned_names[x] for x in right_indices]) if ('bacteria' in left_domains) and ('bacteria' in right_domains): raise HandlingError('bacteria were not defined by the first split') # now we have enough info to define the first supplementary csv file self.first_split_object = SupplementarySpreadsheetObject(self.pruned_names, L, v) # define the bacteria indices vs the non-bacteria indices for the second split if 'bacteria' in left_domains: bacteria_indices = left_indices non_bacteria_indices = right_indices elif 'bacteria' in right_domains: bacteria_indices = right_indices non_bacteria_indices = left_indices # get the secondary split of interest if use_generalized_nj: D_secondary = BuildTreeTopology.update_generalized_nj(D, bacteria_indices) L_secondary = Euclid.edm_to_laplacian(D_secondary) else: L_secondary = SchurAlgebra.mmerge(L, bacteria_indices) full_label_sets = [set([i]) for i in range(len(self.pruned_names))] next_label_sets = SchurAlgebra.vmerge(full_label_sets, bacteria_indices) v_secondary = BuildTreeTopology.laplacian_to_fiedler(L_secondary) eigensplit_secondary = BuildTreeTopology.eigenvector_to_split(v_secondary) left_subindices, right_subindices = eigensplit_secondary pruned_names_secondary = [] for label_set in next_label_sets: if len(label_set) == 1: label = list(label_set)[0] pruned_names_secondary.append(self.pruned_names[label]) else: pruned_names_secondary.append('all-bacteria') # assert that the second split cleanly separates the eukaryota from the rest left_subdomains = self._get_domains([pruned_names_secondary[x] for x in left_subindices]) right_subdomains = self._get_domains([pruned_names_secondary[x] for x in right_subindices]) if ('eukaryota' in left_subdomains) and ('eukaryota' in right_subdomains): raise HandlingError('eukaryota were not defined by the second split') # now we have enough info to define the second supplementary csv file self.second_split_object = SupplementarySpreadsheetObject(pruned_names_secondary, L_secondary, v_secondary)
def get_response_content(fs): # read the matrix D = fs.matrix # read the ordered labels ordered_labels = Util.get_stripped_lines(StringIO(fs.labels)) if not ordered_labels: raise HandlingError('no ordered taxa were provided') if len(ordered_labels) != len(set(ordered_labels)): raise HandlingError('the ordered taxa should be unique') # get the label selection and its complement min_selected_labels = 2 min_unselected_labels = 1 selected_labels = set(Util.get_stripped_lines(StringIO(fs.selection))) if len(selected_labels) < min_selected_labels: raise HandlingError('at least %d taxa should be selected to be grouped' % min_selected_labels) # get the set of labels in the complement unselected_labels = set(ordered_labels) - selected_labels if len(unselected_labels) < min_unselected_labels: raise HandlingError('at least %d taxa should remain outside the selected group' % min_unselected_labels) # assert that no bizarre labels were selected weird_labels = selected_labels - set(ordered_labels) if weird_labels: raise HandlingError('some selected taxa are invalid: ' + str(weird_labels)) # assert that the size of the distance matrix is compatible with the number of ordered labels if len(D) != len(ordered_labels): raise HandlingError('the number of listed taxa does not match the number of rows in the distance matrix') # get the set of selected indices and its complement n = len(D) index_selection = set(i for i, label in enumerate(ordered_labels) if label in selected_labels) index_complement = set(range(n)) - index_selection # begin the response out = StringIO() # get the ordered list of sets of indices to merge merged_indices = SchurAlgebra.vmerge([set([x]) for x in range(n)], index_selection) # calculate the new distance matrix L = Euclid.edm_to_laplacian(D) L_merged = SchurAlgebra.mmerge(L, index_selection) D_merged = Euclid.laplacian_to_edm(L_merged) # print the output distance matrix and the labels of its rows print >> out, 'new distance matrix:' print >> out, MatrixUtil.m_to_string(D_merged) print >> out print >> out, 'new taxon labels:' for merged_index_set in merged_indices: if len(merged_index_set) == 1: print >> out, ordered_labels[merged_index_set.pop()] else: print >> out, '{' + ', '.join(selected_labels) + '}' # write the response return out.getvalue()
def gen_e_d(m): """ 产生(e, d); m is p1 * q1 :param m: :return: """ # Generate a number e so that gcd(e, m) = 1, start with e = 3 e = 3 while True: d = Euclid.extended_Euclid(e, m) if Euclid.gcd(m, e) == 1 and d > 0: break else: e += 2 return e, d
def get_response_content(fs): np.set_printoptions(linewidth=200) n = len(fs.D) # create the Laplacian matrix L = Euclid.edm_to_laplacian(fs.D) # create the Laplacian matrix with the extra node added L_dup = get_pseudoduplicate_laplacian(L, fs.strength) # get the principal axis projection from the Laplacian dup matrix X_w, X_v = EigUtil.principal_eigh(np.linalg.pinv(L_dup)) L_dup_x = X_v * math.sqrt(X_w) # get masses summing to one m = np.array([1] * (n - 1) + [2], dtype=float) / (n + 1) # get the principal axis projection using the weight formula M = np.diag(np.sqrt(m)) L_pinv = np.linalg.pinv(L) I = np.eye(n, dtype=float) E = I - np.outer(np.ones(n, dtype=float), m) ME = np.dot(M, E) Q = np.dot(ME, np.dot(L_pinv, ME.T)) Q_w, Q_v = EigUtil.principal_eigh(Q) Q_x = Q_v * math.sqrt(Q_w) / np.sqrt(m) # make the response out = StringIO() print >> out, 'Laplacian matrix with pseudo-duplicate node:' print >> out, L_dup print >> out print >> out, 'principal axis projection:' print >> out, L_dup_x print >> out print >> out, 'principal axis projection using the weight formula:' print >> out, Q_x return out.getvalue()
def do_projection(D_full, nleaves): """ Project points onto the space of the leaves. The resulting points are in the subspace whose basis vectors are the principal axes of the leaf ellipsoid. @param D_full: distances relating all, including internal, vertices. @param nleaves: the first few indices in D_full represent leaves @return: a numpy array where each row is a vertex of the tree """ # Get the points # such that the n rows in X are points in n-1 dimensional space. X = Euclid.edm_to_points(D_full) # Translate all of the points # so that the origin is at the centroid of the leaves. X -= np.mean(X[:nleaves], 0) # Extract the subset of points that define the leaves. L = X[:nleaves] # Find the orthogonal transformation of the leaves onto their MDS axes. # According to the python svd documentation, # singular values are sorted most important to least important. U, s, Vt = np.linalg.svd(L) # Transform all of the points (including the internal vertices) # according to this orthogonal transformation. # The axes are now the principal axes # of the Steiner circumscribed ellipsoid of the leaf vertices. # I am using M.T[:k].T to get the first k columns of M. points = np.dot(X, Vt.T).T[:(nleaves-1)].T return points
def get_response_content(fs): np.set_printoptions(linewidth=200) n = len(fs.D) # create the Laplacian matrix L = Euclid.edm_to_laplacian(fs.D) # create the Laplacian matrix with the extra node added L_dup = get_pseudoduplicate_laplacian(L, fs.strength) # get the principal axis projection from the Laplacian dup matrix X_w, X_v = EigUtil.principal_eigh(np.linalg.pinv(L_dup)) L_dup_x = X_v * math.sqrt(X_w) # get masses summing to one m = np.array([1]*(n-1) + [2], dtype=float) / (n+1) # get the principal axis projection using the weight formula M = np.diag(np.sqrt(m)) L_pinv = np.linalg.pinv(L) I = np.eye(n, dtype=float) E = I - np.outer(np.ones(n, dtype=float), m) ME = np.dot(M, E) Q = np.dot(ME, np.dot(L_pinv, ME.T)) Q_w, Q_v = EigUtil.principal_eigh(Q) Q_x = Q_v * math.sqrt(Q_w) / np.sqrt(m) # make the response out = StringIO() print >> out, 'Laplacian matrix with pseudo-duplicate node:' print >> out, L_dup print >> out print >> out, 'principal axis projection:' print >> out, L_dup_x print >> out print >> out, 'principal axis projection using the weight formula:' print >> out, Q_x return out.getvalue()
def get_response_content(fs): D = fs.matrix L = Euclid.edm_to_laplacian(D) S = get_sigma_matrix(D) P = get_precision_matrix(S) # begin the response out = StringIO() print >> out, 'the Laplacian matrix:' print >> out, MatrixUtil.m_to_string(L) print >> out print >> out, 'the sigma matrix corresponding to the Q matrix:' print >> out, MatrixUtil.m_to_string(S) print >> out print >> out, 'the precision matrix corresponding to the Q matrix:' print >> out, MatrixUtil.m_to_string(P) print >> out print >> out, 'the precision matrix minus the laplacian matrix:' print >> out, MatrixUtil.m_to_string(P-L) print >> out print >> out, 'the double centered precision matrix minus the laplacian matrix:' print >> out, MatrixUtil.m_to_string(MatrixUtil.double_centered(P)-L) print >> out print >> out, 'the pseudo-inverse of the double centered sigma matrix minus the laplacian matrix:' print >> out, MatrixUtil.m_to_string(np.linalg.pinv(MatrixUtil.double_centered(S))-L) # write the response return out.getvalue()
def get_response_content(fs): # Collect the image format information. border_info = BorderInfo(fs.border_x, fs.border_y) axis_info = AxisInfo(fs.flip_x, fs.flip_y, fs.show_x, fs.show_y) # read the points and edges points, edges = read_points_and_edges(fs.graph_data) # define edge weights if fs.weighted: np_points = [np.array(p) for p in points] dists = [np.linalg.norm(np_points[j] - np_points[i]) for i, j in edges] weights = [1.0 / d for d in dists] else: weights = [1.0 for e in edges] # define the point colors using the graph Fiedler loadings L = edges_to_laplacian(edges, weights) G = np.linalg.pinv(L) X = Euclid.dccov_to_points(G) points = [(p[0], p[1]) for p in X] xs, ys = zip(*points) colors = valuations_to_colors(xs) # Get the image. ext = Form.g_imageformat_to_ext[fs.imageformat] image_info = ImageInfo(fs.width, fs.height, fs.black, fs.show_edges, fs.show_labels, axis_info, border_info, ext) return get_image_string(xs, ys, colors, edges, image_info)
def get_response_content(fs): # read the matrix D = fs.matrix # begin the response out = StringIO() # Look at the eigenvalues # of the associated doubly centered covariance matrix. HSH = Euclid.edm_to_dccov(D) w, V_T = np.linalg.eigh(HSH) V = V_T.T print >> out, 'eigenvalues of the associated doubly centered covariance matrix:' for x in reversed(sorted(w)): print >> out, x print >> out print >> out, 'eigenvector associated with last eigenvalue:' last_eigenvector = min(zip(w, V))[1] for x in last_eigenvector: print >> out, x print >> out # look at another criterion D_pinv = np.linalg.pinv(D) criterion = np.sum(D_pinv) if criterion > 0: print >> out, 'sum of elements of the pseudoinverse of the distance matrix is positive' else: print >> out, 'sum of elements of the pseudoinverse of the distance matrix is nonpositive' print >> out, 'A Euclidean distance matrix is spherical if and only if the sum of the elements of its pseudoinverse is positive.' print >> out, 'For this distance matrix, this sum is', criterion # write the response return out.getvalue()
def get_response_content(fs): """ @param fs: a FieldStorage object containing the cgi arguments @return: a (response_headers, response_text) pair """ # get the tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) # read the ordered labels ordered_labels = Util.get_stripped_lines(StringIO(fs.labels)) # validate the input observed_label_set = set(node.get_name() for node in tree.gen_tips()) if set(ordered_labels) != observed_label_set: msg = 'the labels should match the labels of the leaves of the tree' raise HandlingError(msg) # get the matrix of pairwise distances among the tips D = np.array(tree.get_distance_matrix(ordered_labels)) L = Euclid.edm_to_laplacian(D) w, v = get_eigendecomposition(L) C = get_contrast_matrix(w, v) # set elements with small absolute value to zero C[abs(C) < fs.epsilon] = 0 # start to prepare the reponse out = StringIO() if fs.plain_format: print >> out, MatrixUtil.m_to_string(C) elif fs.matlab_format: print >> out, MatrixUtil.m_to_matlab_string(C) elif fs.r_format: print >> out, MatrixUtil.m_to_R_string(C) # write the response return out.getvalue()
def get_eigendecomposition_report(D): """ @param D: a distance matrix @return: a multi-line string """ out = StringIO() # get some intermediate matrices and vectors L = Euclid.edm_to_laplacian(D) laplacian_fiedler = BuildTreeTopology.laplacian_to_fiedler(L) distance_fiedler = BuildTreeTopology.edm_to_fiedler(D) eigensplit = BuildTreeTopology.eigenvector_to_split(laplacian_fiedler) # report the two eigenvalue lists that should be the same HDH = MatrixUtil.double_centered(D) HSH = -0.5 * HDH w_distance, vt_distance = np.linalg.eigh(HSH) print >> out, 'the laplacian-derived and distance-derived eigenvalues:' w_laplacian, vt_laplacian = np.linalg.eigh(L) for a, b in zip(sorted(w_laplacian), sorted(w_distance)): print >> out, a, '\t', b print >> out # report the two fiedler vectors that should be the same print >> out, 'the laplacian-derived and distance-derived fiedler vectors:' for a, b in zip(laplacian_fiedler, distance_fiedler): print >> out, a, '\t', b return out.getvalue().strip()
def get_response_content(fs): # read the points and edges points, edges = read_points_and_edges(fs.graph_data) # define edge weights if fs.weighted: np_points = [np.array(p) for p in points] dists = [np.linalg.norm(np_points[j] - np_points[i]) for i, j in edges] weights = [1.0 / d for d in dists] else: weights = [1.0 for e in edges] # get the width and height of the drawable area of the image width = fs.total_width - 2*fs.border height = fs.total_height - 2*fs.border if width < 1 or height < 1: msg = 'the image dimensions do not allow for enough drawable area' raise HandlingError(msg) # define the point colors using the unweighted graph Fiedler loadings L = edges_to_laplacian(edges, weights) G = np.linalg.pinv(L) X = Euclid.dccov_to_points(G) points = [(-p[0] if fs.flip else p[0], p[1]) for p in X] x_coords, y_coords = zip(*points) colors = valuations_to_colors(x_coords) # draw the image ext = Form.g_imageformat_to_ext[fs.imageformat] info = ImageInfo(fs.total_width, fs.total_height, fs.border, ext) try: return get_image_string(points, edges, colors, fs.black, info) except CairoUtil.CairoUtilError as e: raise HandlingError(e)
def create_laplacian_matrix(lena, lenb, lenc): """ @param lena: integer length of first branch. @param lenb: integer length of second branch. @param lenc: integer length of third branch. """ N = 1 + lena + lenb + lenc A = np.zeros((N, N), dtype=float) # Add connections to the hub vertex. if lena: A[0, 1] = 1 A[1, 0] = 1 if lenb: A[0, lena + 1] = 1 A[lena + 1, 0] = 1 if lenc: A[0, lena + lenb + 1] = 1 A[lena + lenb + 1, 0] = 1 # Add tridiagonal connections on the first branch. for i in range(lena - 1): j = i + 1 A[j, j + 1] = 1 A[j + 1, j] = 1 # Add tridiagonal connections on the second branch. for i in range(lenb - 1): j = lena + i + 1 A[j, j + 1] = 1 A[j + 1, j] = 1 # Add tridiagonal connections on the second branch. for i in range(lenc - 1): j = lena + lenb + i + 1 A[j, j + 1] = 1 A[j + 1, j] = 1 L = Euclid.adjacency_to_laplacian(A) return L
def split_function(self, D): """ Split the distance matrix using signs of an eigenvector of -HDH/2. If a degenerate split is found then a DegenerateSplitException is raised. @param D: the distance matrix @return: a set of two index sets defining a split of the indices """ try: # get the matrix whose eigendecomposition is of interest HSH = Euclid.edm_to_dccov(D) # get the eigendecomposition eigenvalues, V_T = np.linalg.eigh(HSH) eigenvectors = V_T.T.tolist() # save the eigenvalues for reporting self.eigenvalues = eigenvalues # get the eigenvector of interest w, v = max(zip(eigenvalues, eigenvectors)) # get the indices with positive eigenvector valuations n = len(D) positive = frozenset(i for i, x in enumerate(v) if x > 0) nonpositive = frozenset(set(range(n)) - positive) # check for a degenerate split for index_set in (positive, nonpositive): assert len(index_set) > 0 for index_set in (positive, nonpositive): if len(index_set) == 1: index, = index_set raise BuildTreeTopology.DegenerateSplitException(index) return frozenset((positive, nonpositive)) except BuildTreeTopology.DegenerateSplitException, e: self.eigenvalues = None return BuildTreeTopology.split_nj(D)
def get_response_content(fs): # define the requested physical size of the images (in pixels) physical_size = (640, 480) # build the newick tree from the string tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree) nvertices = len(list(tree.preorder())) nleaves = len(list(tree.gen_tips())) # Get ordered ids with the leaves first, # and get the corresponding distance matrix. ordered_ids = get_ordered_ids(tree) D = np.array(tree.get_partial_distance_matrix(ordered_ids)) # get the image extension ext = Form.g_imageformat_to_ext[fs.imageformat] # get the scaling factors and offsets if fs.hticks < 2: msg = 'expected at least two ticks on the horizontal axis' raise HandlingError(msg) width, height = physical_size xoffset = fs.border yoffset = fs.border yscale = float(height - 2 * fs.border) xscale = (width - 2 * fs.border) / float(fs.hticks - 1) # define the eigendecomposition function if fs.slow: fn = get_augmented_spectrum elif fs.fast: fn = get_augmented_spectrum_fast # define the target eigenvalues tip_ids = [id(node) for node in tree.gen_tips()] D_tips = np.array(tree.get_partial_distance_matrix(tip_ids)) G_tips = Euclid.edm_to_dccov(D_tips) target_ws = scipy.linalg.eigh(G_tips, eigvals_only=True) * fs.denom # draw the image return create_image(ext, physical_size, xscale, yscale, xoffset, yoffset, D, nleaves, fs.hticks, fs.denom, fn, target_ws)
def create_laplacian_matrix(lena, lenb, lenc): """ @param lena: integer length of first branch. @param lenb: integer length of second branch. @param lenc: integer length of third branch. """ N = 1 + lena + lenb + lenc A = np.zeros((N,N), dtype=float) # Add connections to the hub vertex. if lena: A[0, 1] = 1 A[1, 0] = 1 if lenb: A[0, lena+1] = 1 A[lena+1, 0] = 1 if lenc: A[0, lena+lenb+1] = 1 A[lena+lenb+1, 0] = 1 # Add tridiagonal connections on the first branch. for i in range(lena-1): j = i + 1 A[j, j+1] = 1 A[j+1, j] = 1 # Add tridiagonal connections on the second branch. for i in range(lenb-1): j = lena + i + 1 A[j, j+1] = 1 A[j+1, j] = 1 # Add tridiagonal connections on the second branch. for i in range(lenc-1): j = lena + lenb + i + 1 A[j, j+1] = 1 A[j+1, j] = 1 L = Euclid.adjacency_to_laplacian(A) return L
def get_full_tree_message(tree, m_to_string): """ In this function we find the Fiedler split of the full tree. @param tree: each node in this tree must have a name @param m_to_string: a function that converts a matrix to a string @return: a message about the split of the tips of the tree induced by the fiedler vector """ out = StringIO() # get the alphabetically ordered names ordered_names = list(sorted(node.get_name() for node in tree.preorder())) # get the corresponding ordered ids name_to_id = dict((node.get_name(), id(node)) for node in tree.preorder()) ordered_ids = [name_to_id[name] for name in ordered_names] # get the full weighted adjacency matrix A = np.array(tree.get_affinity_matrix(ordered_ids)) print >> out, 'the weighted reciprocal adjacency matrix of the full tree:' print >> out, m_to_string(get_reciprocal_matrix(A)) print >> out # get the full Laplacian matrix L = Euclid.adjacency_to_laplacian(A) # get the fiedler split v = BuildTreeTopology.laplacian_to_fiedler(L) print >> out, 'the Fiedler split of the full tree:' for name, value in zip(ordered_names, v): print >> out, name, ':', value return out.getvalue().strip()
def get_response_content(fs): # read the points and edges points, edges = read_points_and_edges(fs.graph_data) # define edge weights if fs.weighted: np_points = [np.array(p) for p in points] dists = [np.linalg.norm(np_points[j] - np_points[i]) for i, j in edges] weights = [1.0 / d for d in dists] else: weights = [1.0 for e in edges] # get the width and height of the drawable area of the image width = fs.total_width - 2 * fs.border height = fs.total_height - 2 * fs.border if width < 1 or height < 1: msg = 'the image dimensions do not allow for enough drawable area' raise HandlingError(msg) # define the point colors using the unweighted graph Fiedler loadings L = edges_to_laplacian(edges, weights) G = np.linalg.pinv(L) X = Euclid.dccov_to_points(G) points = [(-p[0] if fs.flip else p[0], p[1]) for p in X] x_coords, y_coords = zip(*points) colors = valuations_to_colors(x_coords) # draw the image ext = Form.g_imageformat_to_ext[fs.imageformat] info = ImageInfo(fs.total_width, fs.total_height, fs.border, ext) try: return get_image_string(points, edges, colors, fs.black, info) except CairoUtil.CairoUtilError as e: raise HandlingError(e)
def newimg(img): #Image will be taken for Deskewing newim = Deskew.deskew(img) #Deskew the image #print(newim) data = MyClust.get_all_points( newim) #Gives all points according to threshold print("Shape:", data.shape[0]) if (data.shape[0] < 300): print("Continue") else: print("Nice try !!!!") return ############# CLUSTERING ################# lm = MyClust.Get_Clusters( data, num_clusters) #Data of all points will be clustered into some points lm2 = np.array(Euclid.fun1(lm, num_clusters)) #Distance array of all clusters #print(lm2) lm2 = Ham.Shortest_path_way( lm2, num_clusters) #Order of the path it needed to travel #print(lm2) lm2 = np.array(lm2[0][:-1], dtype=np.int64) #print(lm2) mm = Ham.path_order( lm2, lm, num_clusters) #After sorting of the landmarks,path has been defined #print(mm) return (mm) #Returning path to comp2
def do_projection(D_full, nleaves): """ Project points onto the space of the leaves. The resulting points are in the subspace whose basis vectors are the principal axes of the leaf ellipsoid. @param D_full: distances relating all, including internal, vertices. @param nleaves: the first few indices in D_full represent leaves @return: a numpy array where each row is a vertex of the tree """ # Get the points # such that the n rows in X are points in n-1 dimensional space. X = Euclid.edm_to_points(D_full) # Translate all of the points # so that the origin is at the centroid of the leaves. X -= np.mean(X[:nleaves], 0) # Extract the subset of points that define the leaves. L = X[:nleaves] # Find the orthogonal transformation of the leaves onto their MDS axes. # According to the python svd documentation, # singular values are sorted most important to least important. U, s, Vt = np.linalg.svd(L) # Transform all of the points (including the internal vertices) # according to this orthogonal transformation. # The axes are now the principal axes # of the Steiner circumscribed ellipsoid of the leaf vertices. # I am using M.T[:k].T to get the first k columns of M. points = np.dot(X, Vt.T).T[:(nleaves - 1)].T return points
def get_response_content(fs): D = fs.matrix L = Euclid.edm_to_laplacian(D) S = get_sigma_matrix(D) P = get_precision_matrix(S) # begin the response out = StringIO() print >> out, 'the Laplacian matrix:' print >> out, MatrixUtil.m_to_string(L) print >> out print >> out, 'the sigma matrix corresponding to the Q matrix:' print >> out, MatrixUtil.m_to_string(S) print >> out print >> out, 'the precision matrix corresponding to the Q matrix:' print >> out, MatrixUtil.m_to_string(P) print >> out print >> out, 'the precision matrix minus the laplacian matrix:' print >> out, MatrixUtil.m_to_string(P - L) print >> out print >> out, 'the double centered precision matrix minus the laplacian matrix:' print >> out, MatrixUtil.m_to_string(MatrixUtil.double_centered(P) - L) print >> out print >> out, 'the pseudo-inverse of the double centered sigma matrix minus the laplacian matrix:' print >> out, MatrixUtil.m_to_string( np.linalg.pinv(MatrixUtil.double_centered(S)) - L) # write the response return out.getvalue()
def get_response_content(fs): out = StringIO() # try to make some graphs unconnected_count = 0 invalid_split_count = 0 valid_split_count = 0 for graph_index in range(fs.ngraphs): G = erdos_renyi(fs.nvertices, fs.pedge) if is_connected(G): # add interesting edge weights add_exponential_weights(G) # turn the adjacency matrix into a laplacian matrix L = Euclid.adjacency_to_laplacian(G) for v in range(fs.nvertices): small_index_to_big_index = {} for i_small, i_big in enumerate([i for i in range(fs.nvertices) if i != v]): small_index_to_big_index[i_small] = i_big # take the schur complement with respect to the given vertex L_reduced = get_single_element_schur_complement(L, v) assert len(L_reduced) == len(L) - 1 # get the loadings of the vertices of the reduced graph if fs.fiedler_cut: Y_reduced = BuildTreeTopology.laplacian_to_fiedler(L_reduced) elif fs.random_cut: Y_reduced = get_random_vector(L_reduced) assert len(Y_reduced) == len(L_reduced) # expand the fiedler vector with positive and negative valuations for the removed vertex found_valid_split = False for augmented_loading in (-1.0, 1.0): # get the augmented split vector for this assignment of the removed vertex Y_full = [0]*len(G) for i_reduced, loading in enumerate(Y_reduced): i_big = small_index_to_big_index[i_reduced] Y_full[i_big] = loading Y_full[v] = augmented_loading assert len(Y_full) == len(G) # get the two graphs defined by the split subgraph_a, subgraph_b = list(gen_subgraphs(G, Y_full)) # if the subgraphs are both connected then the split is valid if is_connected(subgraph_a) and is_connected(subgraph_b): found_valid_split = True # if a valid split was not found then show the matrix if found_valid_split: valid_split_count += 1 else: print >> out, 'Found a matrix that was split incompatibly by a cut of its schur complement!' print >> out, 'matrix:' print >> out, MatrixUtil.m_to_string(G) print >> out, 'index that was removed:', v invalid_split_count += 1 else: unconnected_count += 1 # show the number of connected and of unconnected graphs print >> out, 'this many random graphs were connected:', fs.ngraphs - unconnected_count print >> out, 'this many random graphs were not connected:', unconnected_count print >> out, 'this many splits were valid:', valid_split_count print >> out, 'this many splits were invalid:', invalid_split_count # return the result return out.getvalue()
def get_response_content(fs): # read the distance matrix D = fs.matrix L = Euclid.edm_to_laplacian(D) resistor = -1/L resistor -= np.diag(np.diag(resistor)) # return the edge resistor matrix return MatrixUtil.m_to_string(resistor) + '\n'
def get_response_content(fs): # build the newick tree from the string tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree) nvertices = len(list(tree.preorder())) nleaves = len(list(tree.gen_tips())) # get ordered ids with the leaves first ordered_ids = get_ordered_ids(tree) # get the adjacency matrix and the augmented adjacency matrix A = np.array(tree.get_affinity_matrix(ordered_ids)) A_aug = get_augmented_adjacency(A, nleaves, fs.strength) # get the laplacian matrices L = Euclid.adjacency_to_laplacian(A) L_aug = Euclid.adjacency_to_laplacian(A_aug) # get the schur complements R = SchurAlgebra.mschur(L, set(range(nleaves, nvertices))) R_aug = SchurAlgebra.mschur(L_aug, set(range(nleaves, nvertices))) # get the scaled Fiedler vectors w, v = EigUtil.principal_eigh(np.linalg.pinv(R)) fiedler = v * math.sqrt(w) w_aug, v_aug = EigUtil.principal_eigh(np.linalg.pinv(R_aug)) fiedler_aug = v_aug * math.sqrt(w_aug) # report the results np.set_printoptions(linewidth=200) out = StringIO() print >> out, 'Laplacian matrix:' print >> out, L print >> out print >> out, 'Schur complement of Laplacian matrix:' print >> out, R print >> out print >> out, 'scaled Fiedler vector:' print >> out, fiedler print >> out print >> out, 'augmented Laplacian matrix:' print >> out, L_aug print >> out print >> out, 'Schur complement of augmented Laplacian matrix:' print >> out, R_aug print >> out print >> out, 'scaled Fiedler vector of augmented matrix:' print >> out, fiedler_aug print >> out return out.getvalue()
def do_internal_projection(D_full): """ The resulting points are in the subspace whose basis vectors are the principal axes of the whole ellipsoid. @param D_full: the distance matrix as a numpy array relating all vertices including internal vertices @return: a numpy array where each row is a vertex of the tree """ # Get the points such that the n rows in are points in n-1 dimensional space. # The first coordinate is the principal axis. points = Euclid.edm_to_points(D_full) return points
def get_response_content(fs): # read the matrix L = fs.laplacian # read the ordered labels ordered_labels = Util.get_stripped_lines(StringIO(fs.labels)) if not ordered_labels: raise HandlingError('no ordered taxa were provided') if len(ordered_labels) != len(set(ordered_labels)): raise HandlingError('the ordered taxa should be unique') # get the label selection and its complement min_selected_labels = 2 min_unselected_labels = 1 selected_labels = set(Util.get_stripped_lines(StringIO(fs.selection))) if len(selected_labels) < min_selected_labels: raise HandlingError( 'at least %d taxa should be selected to be grouped' % min_selected_labels) # get the set of labels in the complement unselected_labels = set(ordered_labels) - selected_labels if len(unselected_labels) < min_unselected_labels: raise HandlingError( 'at least %d taxa should remain outside the selected group' % min_unselected_labels) # assert that no bizarre labels were selected weird_labels = selected_labels - set(ordered_labels) if weird_labels: raise HandlingError('some selected taxa are invalid: ' + str(weird_labels)) # assert that the size of the distance matrix is compatible with the number of ordered labels if len(L) != len(ordered_labels): raise HandlingError( 'the number of listed taxa does not match the number of rows in the distance matrix' ) # get the set of selected indices and its complement n = len(L) index_selection = set(i for i, label in enumerate(ordered_labels) if label in selected_labels) index_complement = set(range(n)) - index_selection # begin the response out = StringIO() # calculate the new laplacian matrix L_small = SchurAlgebra.mschur(L, index_selection) D_small = Euclid.laplacian_to_edm(L_small) # print the matrices and the labels of its rows print >> out, 'new laplacian matrix:' print >> out, MatrixUtil.m_to_string(L_small) print >> out print >> out, 'new distance matrix:' print >> out, MatrixUtil.m_to_string(D_small) print >> out print >> out, 'new taxon labels:' for index in sorted(index_complement): print >> out, ordered_labels[index] # write the response return out.getvalue()
def get_response_content(fs): # use a fixed seed if requested if fs.seed: random.seed(fs.seed) # define the max number of rejection iterations limit = fs.npoints * 100 # validate input if fs.axis < 0: raise ValueError('the mds axis must be nonnegative') # get points defining the boundary of africa nafrica = len(g_africa_poly) africa_edges = [(i, (i + 1) % nafrica) for i in range(nafrica)] # get some points and edges inside africa points = sample_with_rejection(fs.npoints, g_africa_poly, limit) x_list, y_list = zip(*points) tri = Triangulation(x_list, y_list) tri_edges = [(i + nafrica, j + nafrica) for i, j in tri.edge_db.tolist()] # get the whole list of points allpoints = g_africa_poly + points # refine the list of edges tri_edges = list(gen_noncrossing_edges(tri_edges, africa_edges, allpoints)) tri_edges = get_mst(tri_edges, allpoints) alledges = africa_edges + tri_edges # make the graph laplacian A = np.zeros((len(points), len(points))) for ia, ib in tri_edges: xa, ya = allpoints[ia] xb, yb = allpoints[ib] d = math.hypot(xb - xa, yb - ya) A[ia - nafrica, ib - nafrica] = 1 / d A[ib - nafrica, ia - nafrica] = 1 / d L = Euclid.adjacency_to_laplacian(A) ws, vs = EigUtil.eigh(np.linalg.pinv(L)) if fs.axis >= len(ws): raise ValueError('choose a smaller mds axis') v = vs[fs.axis] # get the color and sizes for the points v /= max(np.abs(v)) colors = [(0, 0, 0)] * nafrica + [get_color(x) for x in v] radii = [2] * nafrica + [5 for p in points] # get the width and height of the drawable area of the image width = fs.total_width - 2 * fs.border height = fs.total_height - 2 * fs.border if width < 1 or height < 1: msg = 'the image dimensions do not allow for enough drawable area' raise HandlingError(msg) # draw the image ext = Form.g_imageformat_to_ext[fs.imageformat] try: helper = ImgHelper(allpoints, alledges, fs.total_width, fs.total_height, fs.border) return helper.get_image_string(colors, radii, ext) except CairoUtil.CairoUtilError as e: raise HandlingError(e)
def process(npoints, nseconds): """ @param npoints: attempt to form each counterexample from this many points @param nseconds: allow this many seconds to run @return: a multi-line string that summarizes the results """ start_time = time.time() best_result = None nchecked = 0 while time.time() - start_time < nseconds: # look for a counterexample points = sample_points(npoints) D = points_to_edm(points) L = Euclid.edm_to_laplacian(D) L_small = SchurAlgebra.mmerge(L, set([0, 1])) w = np.linalg.eigvalsh(L_small) D_small = Euclid.laplacian_to_edm(L_small) result = Counterexample(points, D, w, D_small) # see if the counterexample is interesting if best_result is None: best_result = result elif min(result.L_eigenvalues) < min(best_result.L_eigenvalues): best_result = result nchecked += 1 out = StringIO() print >> out, 'checked', nchecked, 'matrices each formed from', npoints, 'points' print >> out print >> out, 'eigenvalues of the induced matrix with lowest eigenvalue:' for value in reversed(sorted(best_result.L_eigenvalues)): print >> out, value print >> out print >> out, 'corresponding induced distance matrix:' print >> out, MatrixUtil.m_to_string(best_result.D_small) print >> out print >> out, 'the original distance matrix corresponding to this matrix:' print >> out, MatrixUtil.m_to_string(best_result.D) print >> out print >> out, 'the points that formed the original distance matrix:' for point in best_result.points: print >> out, '\t'.join(str(x) for x in point) return out.getvalue().strip()