Exemplo n.º 1
0
def get_response_content(fs):
    # read the matrix
    D = fs.matrix
    # begin the response
    out = StringIO()
    # Look at the eigenvalues
    # of the associated doubly centered covariance matrix.
    HSH = Euclid.edm_to_dccov(D)
    w, V_T = np.linalg.eigh(HSH)
    V = V_T.T
    print >> out, 'eigenvalues of the associated doubly centered covariance matrix:'
    for x in reversed(sorted(w)):
        print >> out, x
    print >> out
    print >> out, 'eigenvector associated with last eigenvalue:'
    last_eigenvector = min(zip(w, V))[1]
    for x in last_eigenvector:
        print >> out, x
    print >> out
    # look at another criterion
    D_pinv = np.linalg.pinv(D)
    criterion = np.sum(D_pinv)
    if criterion > 0:
        print >> out, 'sum of elements of the pseudoinverse of the distance matrix is positive'
    else:
        print >> out, 'sum of elements of the pseudoinverse of the distance matrix is nonpositive'
    print >> out, 'A Euclidean distance matrix is spherical if and only if the sum of the elements of its pseudoinverse is positive.'
    print >> out, 'For this distance matrix, this sum is', criterion
    # write the response
    return out.getvalue()
Exemplo n.º 2
0
 def split_function(self, D):
     """
     Split the distance matrix using signs of an eigenvector of -HDH/2.
     If a degenerate split is found then a DegenerateSplitException is raised.
     @param D: the distance matrix
     @return: a set of two index sets defining a split of the indices
     """
     try:
         # get the matrix whose eigendecomposition is of interest
         HSH = Euclid.edm_to_dccov(D)
         # get the eigendecomposition
         eigenvalues, V_T = np.linalg.eigh(HSH)
         eigenvectors = V_T.T.tolist()
         # save the eigenvalues for reporting
         self.eigenvalues = eigenvalues
         # get the eigenvector of interest
         w, v = max(zip(eigenvalues, eigenvectors))
         # get the indices with positive eigenvector valuations
         n = len(D)
         positive = frozenset(i for i, x in enumerate(v) if x > 0)
         nonpositive = frozenset(set(range(n)) - positive)
         # check for a degenerate split
         for index_set in (positive, nonpositive):
             assert len(index_set) > 0
         for index_set in (positive, nonpositive):
             if len(index_set) == 1:
                 index, = index_set
                 raise BuildTreeTopology.DegenerateSplitException(index)
         return frozenset((positive, nonpositive))
     except BuildTreeTopology.DegenerateSplitException, e:
         self.eigenvalues = None
         return BuildTreeTopology.split_nj(D)
Exemplo n.º 3
0
def get_response_content(fs):
    # define the requested physical size of the images (in pixels)
    physical_size = (640, 480)
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    # Get ordered ids with the leaves first,
    # and get the corresponding distance matrix.
    ordered_ids = get_ordered_ids(tree)
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # get the image extension
    ext = Form.g_imageformat_to_ext[fs.imageformat]
    # get the scaling factors and offsets
    if fs.hticks < 2:
        msg = 'expected at least two ticks on the horizontal axis'
        raise HandlingError(msg)
    width, height = physical_size
    xoffset = fs.border
    yoffset = fs.border
    yscale = float(height - 2 * fs.border)
    xscale = (width - 2 * fs.border) / float(fs.hticks - 1)
    # define the eigendecomposition function
    if fs.slow:
        fn = get_augmented_spectrum
    elif fs.fast:
        fn = get_augmented_spectrum_fast
    # define the target eigenvalues
    tip_ids = [id(node) for node in tree.gen_tips()]
    D_tips = np.array(tree.get_partial_distance_matrix(tip_ids))
    G_tips = Euclid.edm_to_dccov(D_tips)
    target_ws = scipy.linalg.eigh(G_tips, eigvals_only=True) * fs.denom
    # draw the image
    return create_image(ext, physical_size, xscale, yscale, xoffset, yoffset,
                        D, nleaves, fs.hticks, fs.denom, fn, target_ws)
Exemplo n.º 4
0
def get_response_content(fs):
    # check input compatibility
    if fs.nvertices < fs.naxes+1:
        msg_a = 'attempting to plot too many eigenvectors '
        msg_b = 'for the given number of vertices'
        raise ValueError(msg_a + msg_b)
    # define the requested physical size of the images (in pixels)
    physical_size = (640, 480)
    # get the points
    L = create_laplacian_matrix(fs.nvertices)
    D = Euclid.laplacian_to_edm(L)
    HSH = Euclid.edm_to_dccov(D)
    W, VT = np.linalg.eigh(HSH)
    V = VT.T.tolist()
    if fs.eigenvalue_scaling:
        vectors = [np.array(v)*w for w, v in list(reversed(sorted(zip(np.sqrt(W), V))))[:-1]]
    else:
        vectors = [np.array(v) for w, v in list(reversed(sorted(zip(np.sqrt(W), V))))[:-1]]
    X = np.array(zip(*vectors))
    # transform the points to eigenfunctions such that the first point is positive
    F = X.T[:fs.naxes]
    for i in range(fs.naxes):
        if F[i][0] < 0:
            F[i] *= -1
    # draw the image
    try:
        ext = Form.g_imageformat_to_ext[fs.imageformat]
        return create_image_string(ext, physical_size, F, fs.xaxis_length)
    except CairoUtil.CairoUtilError as e:
        raise HandlingError(e)
Exemplo n.º 5
0
def process():
    """
    @return: a multi-line string that summarizes the results
    """
    np.set_printoptions(linewidth=200)
    # define the adjacency matrix
    A = g_A
    n = 6
    # define some mass distributions
    m_uniform = np.ones(n) / float(n)
    m_weighted = np.array([102, 102, 102, 102, 1, 1], dtype=float) / 410
    # make the response
    out = StringIO()
    # look at the eigendecomposition of -(1/2)HDH where D is the leaf distance matrix
    HSH = Euclid.edm_to_dccov(Euclid.g_D_b)
    W_HSH, VT_HSH = np.linalg.eigh(HSH)
    print >> out, 'W for -(1/2)HDH of the leaf distance matrix:'
    print >> out, W_HSH
    print >> out, 'VT for -(1/2)HDH of the leaf distance matrix:'
    print >> out, VT_HSH
    # look at the eigendecomposition of S given a degenerate mass distribution on the full tree
    m_degenerate = np.array([.25, .25, .25, .25, 0, 0])
    S = Euclid.edm_to_weighted_cross_product(Euclid.g_D_c, m_degenerate)
    W_S, VT_S = np.linalg.eigh(S)
    print >> out, 'W for -(1/2)(Xi)D(Xi)^T of the full distance matrix with degenerate masses:'
    print >> out, W_S
    print >> out, 'VT for -(1/2)(Xi)D(Xi)^T of the full distance matrix with degenerate masses:'
    print >> out, VT_S
    # look at the effects of various mass distributions on the MDS of the full tree
    for m in (m_uniform, m_weighted):
        # the mass distribution should sum to 1
        if not np.allclose(np.sum(m), 1):
            raise ValueError('masses should sum to 1')
        # to compute the perturbed laplacian matrix first get weighted sums
        v = np.dot(m, A)
        # now divide elementwise by the masses
        v /= m
        # subtract the adjacency matrix from the diagonal formed by elements of this vector
        Lp = np.diag(v) - A
        # now get the eigendecomposition of the pseudoinverse of the perturbed laplacian
        W_Lp_pinv, VT_Lp_pinv = np.linalg.eigh(np.linalg.pinv(Lp))
        # look at the eigendecomposition of the S matrix associated with the distance matrix of this tree
        D = Euclid.g_D_c
        S = Euclid.edm_to_weighted_cross_product(D, m)
        W_S, VT_S = np.linalg.eigh(S)
        print >> out, 'perturbed laplacian:'
        print >> out, Lp
        print >> out, 'm:', m
        print >> out, 'W for the pseudoinverse of the perturbed laplacian:'
        print >> out, W_Lp_pinv
        print >> out, 'VT for the pseudoinverse of the perturbed laplacian:'
        print >> out, VT_Lp_pinv
        print >> out, 'W for the cross product matrix:'
        print >> out, W_S
        print >> out, 'VT for the cross product matrix:'
        print >> out, VT_S
    return out.getvalue().strip()
Exemplo n.º 6
0
def process():
    """
    @return: a multi-line string that summarizes the results
    """
    np.set_printoptions(linewidth=200)
    # define the adjacency matrix
    A = g_A
    n = 6
    # define some mass distributions
    m_uniform = np.ones(n) / float(n)
    m_weighted = np.array([102, 102, 102, 102, 1, 1], dtype=float) / 410
    # make the response
    out = StringIO()
    # look at the eigendecomposition of -(1/2)HDH where D is the leaf distance matrix
    HSH = Euclid.edm_to_dccov(Euclid.g_D_b)
    W_HSH, VT_HSH = np.linalg.eigh(HSH)
    print >> out, 'W for -(1/2)HDH of the leaf distance matrix:'
    print >> out, W_HSH
    print >> out, 'VT for -(1/2)HDH of the leaf distance matrix:'
    print >> out, VT_HSH
    # look at the eigendecomposition of S given a degenerate mass distribution on the full tree
    m_degenerate = np.array([.25, .25, .25, .25, 0, 0])
    S = Euclid.edm_to_weighted_cross_product(Euclid.g_D_c, m_degenerate)
    W_S, VT_S = np.linalg.eigh(S)
    print >> out, 'W for -(1/2)(Xi)D(Xi)^T of the full distance matrix with degenerate masses:'
    print >> out, W_S
    print >> out, 'VT for -(1/2)(Xi)D(Xi)^T of the full distance matrix with degenerate masses:'
    print >> out, VT_S
    # look at the effects of various mass distributions on the MDS of the full tree
    for m in (m_uniform, m_weighted):
        # the mass distribution should sum to 1
        if not np.allclose(np.sum(m), 1):
            raise ValueError('masses should sum to 1')
        # to compute the perturbed laplacian matrix first get weighted sums
        v = np.dot(m, A)
        # now divide elementwise by the masses
        v /= m
        # subtract the adjacency matrix from the diagonal formed by elements of this vector
        Lp = np.diag(v) - A
        # now get the eigendecomposition of the pseudoinverse of the perturbed laplacian
        W_Lp_pinv, VT_Lp_pinv = np.linalg.eigh(np.linalg.pinv(Lp))
        # look at the eigendecomposition of the S matrix associated with the distance matrix of this tree
        D = Euclid.g_D_c
        S = Euclid.edm_to_weighted_cross_product(D, m)
        W_S, VT_S = np.linalg.eigh(S)
        print >> out, 'perturbed laplacian:'
        print >> out, Lp
        print >> out, 'm:', m
        print >> out, 'W for the pseudoinverse of the perturbed laplacian:'
        print >> out, W_Lp_pinv
        print >> out, 'VT for the pseudoinverse of the perturbed laplacian:'
        print >> out, VT_Lp_pinv
        print >> out, 'W for the cross product matrix:'
        print >> out, W_S
        print >> out, 'VT for the cross product matrix:'
        print >> out, VT_S
    return out.getvalue().strip()
Exemplo n.º 7
0
def get_response_content(fs):
    # read the lat-lon points from the input
    lines = Util.get_stripped_lines(fs.datalines.splitlines())
    rows = parse_lines(lines)
    latlon_points = []
    city_names = []
    for city, latd, latm, lond, lonm in rows:
        lat = math.radians(GPS.degrees_minutes_to_degrees(latd, latm))
        lon = math.radians(GPS.degrees_minutes_to_degrees(lond, lonm))
        latlon_points.append((lat, lon))
        city_names.append(city)
    npoints = len(latlon_points)
    # start writing the response
    np.set_printoptions(linewidth=200)
    out = StringIO()
    radius = GPS.g_earth_radius_miles
    for dfunc, name in (
            (GPS.get_arc_distance, 'great arc'),
            (GPS.get_euclidean_distance, 'euclidean')):
        # define the edm whose elements are squared euclidean-like distances
        edm = np.zeros((npoints, npoints))
        D = np.zeros((npoints, npoints))
        for i, pointa in enumerate(latlon_points):
            for j, pointb in enumerate(latlon_points):
                D[i, j] = dfunc(pointa, pointb, radius)
                edm[i, j] = D[i, j]**2
        print >> out, name, 'distances:'
        print >> out, D
        print >> out
        print >> out, name, 'EDM:'
        print >> out, edm
        print >> out
        G = Euclid.edm_to_dccov(edm)
        print >> out, name, 'Gower centered matrix:'
        print >> out, G
        print >> out
        spectrum = np.array(list(reversed(sorted(np.linalg.eigvals(G)))))
        print >> out, name, 'spectrum of Gower centered matrix:'
        for x in spectrum:
            print >> out, x
        print >> out
        print >> out, name, 'rounded spectrum:'
        for x in spectrum:
            print >> out, '%.1f' % x
        print >> out
        mds_points = Euclid.edm_to_points(edm)
        print >> out, '2D MDS coordinates:'
        for name, mds_point in zip(city_names, mds_points):
            x = mds_point[0]
            y = mds_point[1]
            print >> out, '\t'.join(str(x) for x in [name, x, y])
        print >> out
        # break between distance methods
        print >> out
    # return the response
    return out.getvalue()
Exemplo n.º 8
0
def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    # get ordered ids with the leaves first
    ordered_ids = get_ordered_ids(tree)
    # get the distance matrix and the augmented distance matrix
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    D_aug = get_augmented_distance(D, nleaves, fs.ndups)
    # get the laplacian matrix
    L = Euclid.edm_to_laplacian(D)
    # get the schur complement
    R = SchurAlgebra.mschur(L, set(range(nleaves, nvertices)))
    R_pinv = np.linalg.pinv(R)
    vals, vecs = EigUtil.eigh(R_pinv)
    # get the scaled Fiedler vector for the Schur complement
    w, v = EigUtil.principal_eigh(R_pinv)
    fiedler = v * math.sqrt(w)
    # get the eigendecomposition of the centered augmented distance matrix
    L_aug_pinv = Euclid.edm_to_dccov(D_aug)
    vals_aug, vecs_aug = EigUtil.eigh(L_aug_pinv)
    # get the scaled Fiedler vector for the augmented Laplacian
    w_aug, v_aug = EigUtil.principal_eigh(L_aug_pinv)
    fiedler_aug = v_aug * math.sqrt(w_aug)
    # report the results
    np.set_printoptions(linewidth=300, threshold=10000)
    out = StringIO()
    print >> out, 'Laplacian matrix:'
    print >> out, L
    print >> out
    print >> out, 'Schur complement of Laplacian matrix:'
    print >> out, R
    print >> out
    print >> out, 'scaled Fiedler vector of Schur complement:'
    print >> out, fiedler
    print >> out
    print >> out, 'eigenvalues of pinv of Schur complement:'
    print >> out, vals
    print >> out
    print >> out, 'corresponding eigenvectors of pinv of Schur complement:'
    print >> out, np.array(vecs).T
    print >> out
    print >> out
    print >> out, 'augmented distance matrix:'
    print >> out, D_aug
    print >> out
    print >> out, 'scaled Fiedler vector of augmented Laplacian limit:'
    print >> out, fiedler_aug
    print >> out
    print >> out, 'eigenvalues of pinv of augmented Laplacian limit:'
    print >> out, vals_aug
    print >> out
    print >> out, 'rows are eigenvectors of pinv of augmented Laplacian limit:'
    print >> out, np.array(vecs_aug)
    return out.getvalue()
Exemplo n.º 9
0
def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    # get ordered ids with the leaves first
    ordered_ids = get_ordered_ids(tree)
    # get the distance matrix and the augmented distance matrix
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    D_aug = get_augmented_distance(D, nleaves, fs.ndups)
    # get the laplacian matrix
    L = Euclid.edm_to_laplacian(D)
    # get the schur complement
    R = SchurAlgebra.mschur(L, set(range(nleaves, nvertices)))
    R_pinv = np.linalg.pinv(R)
    vals, vecs = EigUtil.eigh(R_pinv)
    # get the scaled Fiedler vector for the Schur complement
    w, v = EigUtil.principal_eigh(R_pinv)
    fiedler = v * math.sqrt(w)
    # get the eigendecomposition of the centered augmented distance matrix
    L_aug_pinv = Euclid.edm_to_dccov(D_aug)
    vals_aug, vecs_aug = EigUtil.eigh(L_aug_pinv)
    # get the scaled Fiedler vector for the augmented Laplacian
    w_aug, v_aug = EigUtil.principal_eigh(L_aug_pinv)
    fiedler_aug = v_aug * math.sqrt(w_aug)
    # report the results
    np.set_printoptions(linewidth=300, threshold=10000)
    out = StringIO()
    print >> out, "Laplacian matrix:"
    print >> out, L
    print >> out
    print >> out, "Schur complement of Laplacian matrix:"
    print >> out, R
    print >> out
    print >> out, "scaled Fiedler vector of Schur complement:"
    print >> out, fiedler
    print >> out
    print >> out, "eigenvalues of pinv of Schur complement:"
    print >> out, vals
    print >> out
    print >> out, "corresponding eigenvectors of pinv of Schur complement:"
    print >> out, np.array(vecs).T
    print >> out
    print >> out
    print >> out, "augmented distance matrix:"
    print >> out, D_aug
    print >> out
    print >> out, "scaled Fiedler vector of augmented Laplacian limit:"
    print >> out, fiedler_aug
    print >> out
    print >> out, "eigenvalues of pinv of augmented Laplacian limit:"
    print >> out, vals_aug
    print >> out
    print >> out, "rows are eigenvectors of pinv of augmented Laplacian limit:"
    print >> out, np.array(vecs_aug)
    return out.getvalue()
Exemplo n.º 10
0
def get_response_content(fs):
    # read the lat-lon points from the input
    lines = Util.get_stripped_lines(fs.datalines.splitlines())
    rows = parse_lines(lines)
    latlon_points = []
    city_names = []
    for city, latd, latm, lond, lonm in rows:
        lat = math.radians(GPS.degrees_minutes_to_degrees(latd, latm))
        lon = math.radians(GPS.degrees_minutes_to_degrees(lond, lonm))
        latlon_points.append((lat, lon))
        city_names.append(city)
    npoints = len(latlon_points)
    # start writing the response
    np.set_printoptions(linewidth=200)
    out = StringIO()
    radius = GPS.g_earth_radius_miles
    for dfunc, name in ((GPS.get_arc_distance, 'great arc'),
                        (GPS.get_euclidean_distance, 'euclidean')):
        # define the edm whose elements are squared euclidean-like distances
        edm = np.zeros((npoints, npoints))
        D = np.zeros((npoints, npoints))
        for i, pointa in enumerate(latlon_points):
            for j, pointb in enumerate(latlon_points):
                D[i, j] = dfunc(pointa, pointb, radius)
                edm[i, j] = D[i, j]**2
        print >> out, name, 'distances:'
        print >> out, D
        print >> out
        print >> out, name, 'EDM:'
        print >> out, edm
        print >> out
        G = Euclid.edm_to_dccov(edm)
        print >> out, name, 'Gower centered matrix:'
        print >> out, G
        print >> out
        spectrum = np.array(list(reversed(sorted(np.linalg.eigvals(G)))))
        print >> out, name, 'spectrum of Gower centered matrix:'
        for x in spectrum:
            print >> out, x
        print >> out
        print >> out, name, 'rounded spectrum:'
        for x in spectrum:
            print >> out, '%.1f' % x
        print >> out
        mds_points = Euclid.edm_to_points(edm)
        print >> out, '2D MDS coordinates:'
        for name, mds_point in zip(city_names, mds_points):
            x = mds_point[0]
            y = mds_point[1]
            print >> out, '\t'.join(str(x) for x in [name, x, y])
        print >> out
        # break between distance methods
        print >> out
    # return the response
    return out.getvalue()
Exemplo n.º 11
0
def show_split(D):
    HSH = Euclid.edm_to_dccov(D)
    # get the eigendecomposition
    eigenvalues, V_T = np.linalg.eigh(HSH)
    eigenvectors = V_T.T.tolist()
    # get the eigenvalue and eigenvector of interest
    w, v = max(zip(eigenvalues, eigenvectors))
    # show the results
    print 'the maximum of these eigenvalues is interesting:'
    print '\t'.join(str(x) for x in sorted(eigenvalues))
    print 'the interesting eigenvector:'
    print '\t'.join(str(x) for x in v)
Exemplo n.º 12
0
def show_split(D):
    HSH = Euclid.edm_to_dccov(D)
    # get the eigendecomposition
    eigenvalues, V_T = np.linalg.eigh(HSH)
    eigenvectors = V_T.T.tolist()
    # get the eigenvalue and eigenvector of interest
    w, v = max(zip(eigenvalues, eigenvectors))
    # show the results
    print 'the maximum of these eigenvalues is interesting:'
    print '\t'.join(str(x) for x in sorted(eigenvalues))
    print 'the interesting eigenvector:'
    print '\t'.join(str(x) for x in v)
Exemplo n.º 13
0
def get_response_content(fs):
    # define the requested physical size of the images (in pixels)
    physical_size = (640, 480)
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    # Get ordered ids with the leaves first,
    # and get the corresponding distance matrix.
    ordered_ids = get_ordered_ids(tree)
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # get the image extension
    ext = Form.g_imageformat_to_ext[fs.imageformat]
    # get the scaling factors and offsets
    if fs.hticks < 2:
        msg = 'expected at least two ticks on the horizontal axis'
        raise HandlingError(msg)
    width, height = physical_size
    xoffset = fs.border
    yoffset = fs.border
    yscale = float(height - 2*fs.border)
    xscale = (width - 2*fs.border) / float(fs.hticks - 1)
    # define the eigendecomposition function
    if fs.slow:
        fn = get_augmented_spectrum
    elif fs.fast:
        fn = get_augmented_spectrum_fast
    # define the target eigenvalues
    tip_ids = [id(node) for node in tree.gen_tips()]
    D_tips = np.array(tree.get_partial_distance_matrix(tip_ids))
    G_tips = Euclid.edm_to_dccov(D_tips)
    target_ws = scipy.linalg.eigh(G_tips, eigvals_only=True) * fs.denom
    # draw the image
    return create_image(ext, physical_size,
            xscale, yscale, xoffset, yoffset,
            D, nleaves, fs.hticks, fs.denom, fn,
            target_ws)
Exemplo n.º 14
0
def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    ninternal = nvertices - nleaves
    # get ordered ids with the internal nodes first
    ordered_ids = get_ordered_ids(tree)
    leaf_ids = [id(node) for node in tree.gen_tips()]
    # get the distance matrix and the augmented distance matrix
    D_leaf = np.array(tree.get_partial_distance_matrix(leaf_ids))
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # analyze the leaf distance matrix
    X_leaf = Euclid.edm_to_points(D_leaf)
    w_leaf, v_leaf = EigUtil.eigh(Euclid.edm_to_dccov(D_leaf))
    V_leaf = np.array(v_leaf).T
    # explicitly compute the limiting points as the number of dups increases
    X = Euclid.edm_to_points(D)
    X -= np.mean(X[-nleaves:], axis=0)
    XL = X[-nleaves:]
    U, s, Vt = np.linalg.svd(XL)
    Z = np.dot(X, Vt.T)
    # hack the Z matrix to show the leaf-related eigenvectors
    Z = Z.T[:nleaves - 1].T
    WY = Z / np.sqrt(w_leaf[:-1])
    # compute a product using the first few rows of WY
    W = WY[:ninternal]
    M_alpha = get_alpha_multiplier(D, nleaves)
    MW_alpha = np.dot(M_alpha, W)
    # compute a product using the first few rows of WY
    M_beta = get_beta_multiplier(D, nleaves)
    MY_beta = np.dot(M_beta, V_leaf)
    # report the results
    np.set_printoptions(linewidth=300, threshold=10000)
    out = StringIO()
    print >> out, 'leaf distance matrix:'
    print >> out, D_leaf
    print >> out
    print >> out, 'eigenvalues derived from the leaf distance matrix'
    print >> out, w_leaf
    print >> out
    print >> out, 'corresponding eigenvectors (as columns)'
    print >> out, V_leaf
    print >> out
    print >> out, "candidates for [W' Y']':"
    print >> out, WY
    print >> out
    print >> out, 'candidates for W:'
    print >> out, W
    print >> out
    print >> out, 'left multiplier of W:'
    print >> out, M_alpha
    print >> out
    print >> out, 'each column is a (left multiplier, W) product:'
    print >> out, MW_alpha
    print >> out
    print >> out, 'left multiplier of Y:'
    print >> out, M_beta
    print >> out
    print >> out, 'each column is a (left multiplier, Y) product:'
    print >> out, MY_beta
    print >> out
    print >> out, 'the above matrix divided by 2*eigenvalue:'
    print >> out, MY_beta / (2 * np.array(w_leaf))
    print >> out
    return out.getvalue()
Exemplo n.º 15
0
def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    ninternal = nvertices - nleaves
    # get ordered ids with the internal nodes first
    ordered_ids = get_ordered_ids(tree)
    leaf_ids = [id(node) for node in tree.gen_tips()]
    # get the distance matrix and the augmented distance matrix
    D_leaf = np.array(tree.get_partial_distance_matrix(leaf_ids))
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # analyze the leaf distance matrix
    X_leaf = Euclid.edm_to_points(D_leaf)
    w_leaf, v_leaf = EigUtil.eigh(Euclid.edm_to_dccov(D_leaf))
    V_leaf = np.array(v_leaf).T
    # explicitly compute the limiting points as the number of dups increases
    X = Euclid.edm_to_points(D)
    X -= np.mean(X[-nleaves:], axis=0)
    XL = X[-nleaves:]
    U, s, Vt = np.linalg.svd(XL)
    Z = np.dot(X, Vt.T)
    # hack the Z matrix to show the leaf-related eigenvectors
    Z = Z.T[: nleaves - 1].T
    WY = Z / np.sqrt(w_leaf[:-1])
    # compute a product using the first few rows of WY
    W = WY[:ninternal]
    M_alpha = get_alpha_multiplier(D, nleaves)
    MW_alpha = np.dot(M_alpha, W)
    # compute a product using the first few rows of WY
    M_beta = get_beta_multiplier(D, nleaves)
    MY_beta = np.dot(M_beta, V_leaf)
    # report the results
    np.set_printoptions(linewidth=300, threshold=10000)
    out = StringIO()
    print >> out, "leaf distance matrix:"
    print >> out, D_leaf
    print >> out
    print >> out, "eigenvalues derived from the leaf distance matrix"
    print >> out, w_leaf
    print >> out
    print >> out, "corresponding eigenvectors (as columns)"
    print >> out, V_leaf
    print >> out
    print >> out, "candidates for [W' Y']':"
    print >> out, WY
    print >> out
    print >> out, "candidates for W:"
    print >> out, W
    print >> out
    print >> out, "left multiplier of W:"
    print >> out, M_alpha
    print >> out
    print >> out, "each column is a (left multiplier, W) product:"
    print >> out, MW_alpha
    print >> out
    print >> out, "left multiplier of Y:"
    print >> out, M_beta
    print >> out
    print >> out, "each column is a (left multiplier, Y) product:"
    print >> out, MY_beta
    print >> out
    print >> out, "the above matrix divided by 2*eigenvalue:"
    print >> out, MY_beta / (2 * np.array(w_leaf))
    print >> out
    return out.getvalue()
Exemplo n.º 16
0
def edm_to_fiedler(D):
    """
    @param D: the distance matrix
    @return: the Fiedler vector of a related graph
    """
    return dccov_to_fiedler(Euclid.edm_to_dccov(D))
Exemplo n.º 17
0
def edm_to_fiedler(D):
    """
    @param D: the distance matrix
    @return: the Fiedler vector of a related graph
    """
    return dccov_to_fiedler(Euclid.edm_to_dccov(D))
Exemplo n.º 18
0
def get_response_content(fs):

    # set up print options
    np.set_printoptions(
            linewidth=1000000,
            threshold=1000000,
            )
    out = StringIO()
    
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)

    # Get ordered ids with the leaves first.
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    ordered_ids = get_ordered_ids(tree)

    # Report the full distance matrix.
    D_full = np.array(tree.get_partial_distance_matrix(ordered_ids))
    print >> out, 'full distance matrix:'
    print >> out, D_full
    print >> out

    # Extract the part of the distance matrix that relates only leaves.
    D = D_full[:nleaves, :nleaves]
    print >> out, 'leaf distance matrix:'
    print >> out, D
    print >> out

    # Report the Gower matrix.
    G = Euclid.edm_to_dccov(D)
    print >> out, 'gower matrix:'
    print >> out, G
    print >> out, 'diag:', np.diag(G)
    print >> out

    # Compute the corresponding Laplacian matrix.
    L_comb = scipy.linalg.pinvh(G)
    w, wpinv = get_spectral_info(L_comb)
    w, v = scipy.linalg.eigh(L_comb)
    print >> out, 'leaf combinatorial Laplacian matrix:'
    print >> out, L_comb
    print >> out, 'diag:', np.diag(L_comb)
    print >> out, 'spectrum:', w
    print >> out, 'pinv spectrum:', wpinv
    print >> out, 'eigenvectors:'
    print >> out, v
    print >> out

    # Compute the normalized Laplacian matrix.
    out_degrees = np.diag(L_comb)
    v = np.reciprocal(np.sqrt(out_degrees))
    L_norm = L_comb * np.outer(v, v)
    w, wpinv = get_spectral_info(L_norm)
    w, v = scipy.linalg.eigh(L_norm)
    print >> out, 'leaf normalized Laplacian matrix:'
    print >> out, L_norm
    print >> out, 'spectrum:', w
    print >> out, 'pinv spectrum:', wpinv
    print >> out, 'eigenvectors:'
    print >> out, v
    print >> out

    # Attempt to compute something related to weighted MDS.
    m = out_degrees
    M = np.diag(np.sqrt(m))
    I = np.identity(nleaves)
    e = np.ones(nleaves)
    E = I - np.outer(e, m) / np.inner(m, e)
    ME = np.dot(M, E)
    Q = -0.5 * ME.dot(D).dot(ME.T)
    w, wpinv = get_spectral_info(Q)
    w, v = scipy.linalg.eigh(Q)
    print >> out, 'a matrix related to weighted MDS:'
    print >> out, Q
    print >> out, 'spectrum:', w
    print >> out, 'pinv spectrum:', wpinv
    print >> out, 'eigenvectors:'
    print >> out, v
    print >> out


    # show the result
    return out.getvalue()