示例#1
0
def get_leaf_distn_acl(R, B):
    """
    This is a possibly equivalent formulation.
    It is based on Felsenstein weights.
    """
    # Get the vertex order.
    T = Ftree.R_to_T(R)
    r = Ftree.R_to_root(R)
    leaves = Ftree.T_to_leaves(T)
    non_r_internal = [v for v in Ftree.T_to_internal_vertices(T) if v != r]
    vertices = leaves + non_r_internal + [r]
    # Get the pseudoinverse of the Laplacian.
    # This is also the doubly centered covariance matrix.
    L = Ftree.TB_to_L_principal(T, B, vertices)
    HSH = np.linalg.pinv(L)
    # Decenter the covariance matrix using the root.
    # This should give the rooted covariance matrix
    # which is M in the appendix of Weights for Data Related by a Tree
    # by Altschul, Carroll, and Lipman, 1989.
    e = np.ones_like(HSH[-1])
    J = np.ones_like(HSH)
    M = HSH - np.outer(e, HSH[-1]) - np.outer(HSH[-1], e) + HSH[-1,-1]*J
    # Pick out the part corresponding to leaves.
    nleaves = len(leaves)
    S = M[:nleaves, :nleaves]
    S_pinv = np.linalg.pinv(S)
    # Normalized row or column sums of inverse of M gives the leaf distribution.
    w = S_pinv.sum(axis=0) / S_pinv.sum()
    return dict((v, w[i]) for i, v in enumerate(leaves))
示例#2
0
 def X_to_L_V(self, X):
     """
     Unpack in a way that uses initialized state.
     """
     B, Vr = self.X_to_B_Vr(X)
     # get the laplacian matrix
     L = Ftree.TB_to_L_principal(self.T_test, B, self.vertices)
     # get the augmented vector
     V = np.vstack([self.Vp, Vr])
     # return the unpacked values
     return L, V
示例#3
0
 def _get_v_to_point(self):
     # get the full tree laplacian matrix
     vertices = Ftree.T_to_order(self.T)
     L = Ftree.TB_to_L_principal(self.T, self.B, vertices)
     # get the eigendecomposition by increasing eigenvalue
     w, vt = scipy.linalg.eigh(L)
     # get the point valuations of interest
     x_values = vt.T[1]
     y_values = vt.T[2]
     z_values = vt.T[3]
     points = [np.array(xyz) for xyz in zip(x_values, y_values, z_values)]
     # get the vertex to point map
     return dict(zip(vertices, points))
示例#4
0
def get_internal_vertex_to_leaf_distn_cov(T, B):
    """
    This is a possibly equivalent formualtion.
    It is based on Schur complementation in the unrooted covariance matrix.
    Return a map from an internal vertex to a leaf distribution.
    @return: a dictionary that maps an internal vertex to a leaf distribution
    """
    leaves = Ftree.T_to_leaves(T)
    internal = Ftree.T_to_internal_vertices(T)
    vertices = leaves + internal
    # Get the full tree Laplacian matrix.
    L = Ftree.TB_to_L_principal(T, B, vertices)
    # Get the unrooted covariance matrix.
    HSH = np.linalg.pinv(L)
    # Use the multivariate normal distribution wikipedia page
    # for conditional distributions.
    nleaves = len(leaves)
    ninternal = len(internal)
    #
    # This interpolator works.
    #Lbb = L[nleaves:, nleaves:]
    #Lba = L[nleaves:, :nleaves]
    #interpolator = -ndot(np.linalg.pinv(Lbb), Lba)
    #
    # This interpolator seems like it should work but it does not.
    Saa = HSH[:nleaves, :nleaves]
    Sba = HSH[nleaves:, :nleaves]
    #print 'det(Saa)'
    #print np.linalg.det(Saa)
    interpolator = ndot(Sba, np.linalg.pinv(Saa))
    #
    # Try a hack.
    #eps = 1e-12
    #nvertices = len(vertices)
    #J = np.ones((nvertices, nvertices))
    #Saa = (HSH + J)[:nleaves, :nleaves]
    #Sba = (HSH + J)[nleaves:, :nleaves]
    #interpolator = ndot(Sba, np.linalg.pinv(Saa))
    #
    #print 'cov interpolator:'
    #print interpolator.shape
    #print interpolator
    d = {}
    for i, v in enumerate(internal):
        distn = {}
        for j, leaf in enumerate(leaves):
            distn[leaf] = interpolator[i, j]
        d[v] = distn
    return d
示例#5
0
def get_response_content(fs):
    # read the tree
    T, B, N = FtreeIO.newick_to_TBN(fs.tree)
    leaves = Ftree.T_to_leaves(T)
    internal = Ftree.T_to_internal_vertices(T)
    # root arbitrarily
    R = Ftree.T_to_R_canonical(T)
    # init some sampling parameters
    npillars = 9
    # init some helper variables
    nleaves = len(leaves)
    r = get_new_vertex(T)
    vertices = internal + [r] + leaves
    combo = np.array([0] * len(internal) + [1] + [-1.0 / nleaves] * nleaves)
    # Map edge position triple to the quadratic form value.
    qform = {}
    for d_edge in R:
        a, b = d_edge
        u_edge = frozenset(d_edge)
        distance = B[u_edge]
        for i in range(npillars):
            # get the proportion of the distance along the branch
            t = (i + 1) / float(npillars + 1)
            T_new, B_new = add_vertex(T, B, d_edge, r, t)
            # create the new centered covariance matrix
            L = Ftree.TB_to_L_principal(T_new, B_new, vertices)
            S = np.linalg.pinv(L)
            qform[(a, b, t * distance)] = quadratic_form(S, combo)
            #shortcombo = np.array([1] + [-1.0/nleaves]*nleaves)
            #shortvert = [r] + leaves
            #L_schur = Ftree.TB_to_L_schur(T_new, B_new, shortvert)
            #S = np.linalg.pinv(L_schur)
            #qform[(a, b, t*distance)] = quadratic_form(S, shortcombo)
    wat = sorted((val, va, vb, d) for (va, vb, d), val in qform.items())
    # write the report
    out = StringIO()
    for val, va, vb, d in wat:
        print >> out, N[va], '--[', d, ']-->', N[vb], ':', val
        print >> out
    return out.getvalue()