示例#1
0
def anc_recon(tree, chars, Q, p=None, pi="Fitzjohn", ars=None):
    """
    Given tree, character states at tips, and transition matrix perform
    ancestor reconstruction.

    Perform downpass using mk function, then perform uppass.

    Return reconstructed states - including tips

    Args:
        tree (Node): Root node of a tree. All branch lengths must be
          greater than 0 (except root)
        chars (list): List of character states corresponding to leaf nodes in
          preoder sequence. Character states must be numbered 0,1,2,...
        Q (np.array): Instantaneous rate matrix
        p (np.array): 3-D array of dimensions branch_number * nchar * nchar.
            Optional. Pre-allocated space for efficient calculations
        pi (str or np.array): Option to weight the root node by given values.
           Either a string containing the method or an array
           of weights. Weights should be given in order.

           Accepted methods of weighting root:

           Equal: flat prior
           Equilibrium: Prior equal to stationary distribution
             of Q matrix
           Fitzjohn: Root states weighted by how well they
             explain the data at the tips.
        ars (dict): Dict of pre-allocated arrays to improve
          speed by avoiding creating and destroying new arrays. Can be
          created with create_ancrecon_ars function.
    Returns:
        np.array: Array of nodes in preorder sequence containing marginal
          likelihoods.
    """
    nchar = Q.shape[0]
    if ars is None:
        # Creating arrays to be used later
        ars = create_ancrecon_ars(tree, chars)
    # Calculating the likelihoods for each node in post-order sequence
    if p is None:  # Instantiating empty array
        p = np.empty([len(ars["t"]), Q.shape[0], Q.shape[1]],
                     dtype=np.double,
                     order="C")
    # Creating probability matrices from Q matrix and branch lengths
    cyexpokit.dexpm_tree_preallocated_p(Q, ars["t"],
                                        p)  # This changes p in place
    np.copyto(
        ars["down_nl_w"],
        ars["down_nl_r"])  # Copy original values if they have been changed
    ars["child_inds"].fill(0)
    root_equil = ivy.chars.mk.qsd(Q)

    cyexpokit.cy_anc_recon(p, ars["down_nl_w"], ars["charlist"],
                           ars["childlist"], ars["up_nl"], ars["marginal_nl"],
                           ars["partial_parent_nl"], ars["partial_nl"],
                           ars["child_inds"], root_equil, ars["temp_dotprod"])

    return ars["marginal_nl"]
示例#2
0
def anc_recon(tree, chars, Q, p=None, pi="Fitzjohn", ars=None):
    """
    Given tree, character states at tips, and transition matrix perform
    ancestor reconstruction.

    Perform downpass using mk function, then perform uppass.

    Return reconstructed states - including tips

    Args:
        tree (Node): Root node of a tree. All branch lengths must be
          greater than 0 (except root)
        chars (list): List of character states corresponding to leaf nodes in
          preoder sequence. Character states must be numbered 0,1,2,...
        Q (np.array): Instantaneous rate matrix
        p (np.array): 3-D array of dimensions branch_number * nchar * nchar.
            Optional. Pre-allocated space for efficient calculations
        pi (str or np.array): Option to weight the root node by given values.
           Either a string containing the method or an array
           of weights. Weights should be given in order.

           Accepted methods of weighting root:

           Equal: flat prior
           Equilibrium: Prior equal to stationary distribution
             of Q matrix
           Fitzjohn: Root states weighted by how well they
             explain the data at the tips.
        ars (dict): Dict of pre-allocated arrays to improve
          speed by avoiding creating and destroying new arrays. Can be
          created with create_ancrecon_ars function.
    Returns:
        np.array: Array of nodes in preorder sequence containing marginal
          likelihoods.
    """
    nchar = Q.shape[0]
    if ars is None:
        # Creating arrays to be used later
        ars = create_ancrecon_ars(tree, chars)
    # Calculating the likelihoods for each node in post-order sequence
    if p is None: # Instantiating empty array
        p = np.empty([len(ars["t"]), Q.shape[0],
                     Q.shape[1]], dtype = np.double, order="C")
    # Creating probability matrices from Q matrix and branch lengths
    cyexpokit.dexpm_tree_preallocated_p(Q, ars["t"], p) # This changes p in place
    np.copyto(ars["down_nl_w"], ars["down_nl_r"]) # Copy original values if they have been changed
    ars["child_inds"].fill(0)
    root_equil = ivy.chars.mk.qsd(Q)

    cyexpokit.cy_anc_recon(p, ars["down_nl_w"], ars["charlist"], ars["childlist"],
                        ars["up_nl"], ars["marginal_nl"], ars["partial_parent_nl"],
                        ars["partial_nl"], ars["child_inds"], root_equil,ars["temp_dotprod"])



    return ars["marginal_nl"]
示例#3
0
文件: hrm.py 项目: ChriZiegler/ivy
def hrm_back_mk(tree, chars, Q, nregime, p=None, pi="Fitzjohn",returnPi=False,
                preallocated_arrays=None, tip_states=None, returnnodes=False):
    """
    Calculate probability vector at root given tree, characters, and Q matrix,
    then reconstruct probability vectors for tips and use those in another
    up-pass to calculate probability vector at root.

    Args:
        tree (Node): Root node of a tree. All branch lengths must be
          greater than 0 (except root)
        chars (list): List of character states corresponding to leaf nodes in
          preoder sequence. Character states must be numbered 0,1,2,...
        Q (np.array): Instantaneous rate matrix
        p (np.array): Optional pre-allocated p matrix
        pi (str or np.array): Option to weight the root node by given values.
           Either a string containing the method or an array
           of weights. Weights should be given in order.

           Accepted methods of weighting root:

           Equal: flat prior
           Equilibrium: Prior equal to stationary distribution
             of Q matrix
           Fitzjohn: Root states weighted by how well they
             explain the data at the tips.
        returnPi (bool): Whether or not to return the final values of root
          node weighting
        preallocated_arrays (dict): Dict of pre-allocated arrays to improve
          speed by avoiding creating and destroying new arrays
    """
    nchar = Q.shape[0]
    nobschar = nchar/nregime
    if preallocated_arrays is None:
        # Creating arrays to be used later
        preallocated_arrays = {}
        t = [node.length for node in tree.postiter() if not node.isroot]
        t = np.array(t, dtype=np.double)
        preallocated_arrays["charlist"] = range(Q.shape[0])
        preallocated_arrays["t"] = t

    if p is None: # Instantiating empty array
        p = np.empty([len(preallocated_arrays["t"]), Q.shape[0], Q.shape[1]], dtype = np.double, order="C")
    # Creating probability matrices from Q matrix and branch lengths
    cyexpokit.dexpm_tree_preallocated_p(Q, preallocated_arrays["t"], p) # This changes p in place

    if len(preallocated_arrays.keys())==2:
        # Creating more arrays
        nnode = len(tree.descendants())+1
        preallocated_arrays["nodelist"] = np.zeros((nnode, nchar+1))
        preallocated_arrays["childlist"] = np.zeros(nnode, dtype=object)
        leafind = [ n.isleaf for n in tree.postiter()]
        # Reordering character states to be in postorder sequence
        preleaves = [ n for n in tree.preiter() if n.isleaf ]
        postleaves = [n for n in tree.postiter() if n.isleaf ]
        postnodes = list(tree.postiter());prenodes = list(tree.preiter())
        postChars = [ chars[i] for i in [ preleaves.index(n) for n in postleaves ] ]
        # Filling in the node list. It contains all of the information needed
        # to calculate the likelihoods at each node

        # Q matrix is in the form of "0S, 1S, 0F, 1F" etc. Probabilities
        # set to 1 for all hidden states of the observed state.
        for k,ch in enumerate(postChars):
            # Indices of hidden rates of observed state. These will all be set to 1
            hiddenChs = [y + ch for y in [x * nobschar for x in range(nregime) ]]
            [ n for i,n in enumerate(preallocated_arrays["nodelist"]) if leafind[i] ][k][hiddenChs] = 1.0/nregime
        for i,n in enumerate(preallocated_arrays["nodelist"][:-1]):
            n[nchar] = postnodes.index(postnodes[i].parent)
            preallocated_arrays["childlist"][i] = [ nod.pi for nod in postnodes[i].children ]
        preallocated_arrays["childlist"][i+1] = [ nod.pi for nod in postnodes[i+1].children ]

        # Setting initial node likelihoods to 1.0 for calculations
        preallocated_arrays["nodelist"][[ i for i,b in enumerate(leafind) if not b],:-1] = 1.0

        # Empty array to store root priors
        preallocated_arrays["root_priors"] = np.empty([nchar], dtype=np.double)
        preallocated_arrays["nodelist-up"] = preallocated_arrays["nodelist"].copy()
        preallocated_arrays["t_Q"] = Q
        preallocated_arrays["p_up"] = p.copy()
        preallocated_arrays["v"] = np.zeros([nchar])
        preallocated_arrays["tmp"] = np.zeros([nchar+1])
        preallocated_arrays["motherRow"] = np.zeros([nchar+1])

    leafind = [ n.isleaf for n in tree.postiter()]
    if tip_states is not None:
        leaf_rownums = [i for i,n in enumerate(leafind) if n]
        tip_states = preallocated_arrays["nodelist"][leaf_rownums][:,:-1] * tip_states[:,:-1]
        tip_states = tip_states/np.sum(tip_states,1)[:,None]

        preallocated_arrays["nodelist"][leaf_rownums,:-1] = tip_states

    # Calculating the likelihoods for each node in post-order sequence
    cyexpokit.cy_mk(preallocated_arrays["nodelist"], p, preallocated_arrays["charlist"])
    # The last row of nodelist contains the likelihood values at the root
    # Applying the correct root prior
    if type(pi) != str:
        assert len(pi) == nchar, "length of given pi does not match Q dimensions"
        assert str(type(pi)) == "<type 'numpy.ndarray'>", "pi must be str or numpy array"
        assert np.isclose(sum(pi), 1), "values of given pi must sum to 1"

        np.copyto(preallocated_arrays["root_priors"], pi)

        li = sum([ i*preallocated_arrays["root_priors"][n] for n,i in enumerate(preallocated_arrays["nodelist"][-1,:-1]) ])
        logli = math.log(li)

    elif pi == "Equal":
        preallocated_arrays["root_priors"].fill(1.0/nchar)
        li = sum([ float(i)/nchar for i in preallocated_arrays["nodelist"][-1] ])

        logli = math.log(li)

    elif pi == "Fitzjohn":
        np.copyto(preallocated_arrays["root_priors"],
                  [preallocated_arrays["nodelist"][-1,:-1][charstate]/
                   sum(preallocated_arrays["nodelist"][-1,:-1]) for
                   charstate in range(nchar) ])

        li = sum([ preallocated_arrays["nodelist"][-1,:-1][charstate] *
                     preallocated_arrays["root_priors"][charstate] for charstate in set(chars) ])
        logli = math.log(li)
    elif pi == "Equilibrium":
        # Equilibrium pi from the stationary distribution of Q
        np.copyto(preallocated_arrays["root_priors"],qsd(Q))
        li = sum([ i*preallocated_arrays["root_priors"][n] for n,i in enumerate(preallocated_arrays["nodelist"][-1,:-1]) ])
        logli = math.log(li)

    # Transposal of Q for up-pass now that down-pass is completed
    np.copyto(preallocated_arrays["t_Q"], Q)
    preallocated_arrays["t_Q"] = np.transpose(preallocated_arrays["t_Q"])
    preallocated_arrays["t_Q"][np.diag_indices(nchar)] = 0
    preallocated_arrays["t_Q"][np.diag_indices(nchar)] = -np.sum(preallocated_arrays["t_Q"], 1)
    preallocated_arrays["t_Q"] = np.ascontiguousarray(preallocated_arrays["t_Q"])
    cyexpokit.dexpm_tree_preallocated_p(preallocated_arrays["t_Q"], preallocated_arrays["t"], preallocated_arrays["p_up"])
    preallocated_arrays["nodelist-up"][:,:-1] = 1.0
    preallocated_arrays["nodelist-up"][-1] = preallocated_arrays["nodelist"][-1]

    ni = len(preallocated_arrays["nodelist-up"]) - 2

    root_marginal =  ivy.chars.mk.qsd(Q) # Change to Fitzjohn Q?

    for n in preallocated_arrays["nodelist-up"][::-1][1:]:
        curRow = n[:-1]
        motherRowNum = int(n[nchar])
        np.copyto(preallocated_arrays["motherRow"], preallocated_arrays["nodelist-up"][int(motherRowNum)])
        sisterRows = [ (preallocated_arrays["nodelist-up"][i],i) for i in preallocated_arrays["childlist"][motherRowNum] if not i==ni]

        # If the mother is the root...
        if preallocated_arrays["motherRow"][nchar] == 0.0:
            # The marginal of the root
            np.copyto(preallocated_arrays["v"],root_marginal) # Only need to calculate once
        else:
            # If the mother is not the root, calculate prob. of being in any state
            # Use transposed matrix
            np.dot(preallocated_arrays["p_up"][motherRowNum], preallocated_arrays["nodelist-up"][motherRowNum][:nchar], out=preallocated_arrays["v"])
        for s in sisterRows:
            # Use non-transposed matrix
            np.copyto(preallocated_arrays["tmp"], preallocated_arrays["nodelist"][s[1]])
            preallocated_arrays["tmp"][:nchar] = preallocated_arrays["tmp"][:-1]/sum(preallocated_arrays["tmp"][:nchar])
            preallocated_arrays["v"] *= np.dot(p[s[1]], preallocated_arrays["tmp"][:nchar])
        preallocated_arrays["nodelist-up"][ni][:nchar] = preallocated_arrays["v"]
        ni -= 1
    out = [preallocated_arrays["nodelist-up"][[ t.pi for t in tree.leaves() ]], logli]
    if returnnodes:
        out.append(preallocated_arrays["nodelist-up"])
    return out
示例#4
0
文件: hrm.py 项目: ChriZiegler/ivy
def hrm_mk(tree, chars, Q, nregime, p=None, pi="Fitzjohn",returnPi=False,
          preallocated_arrays=None):
    """
    Note: this version calculates likelihoods at each node.
    Other version calculates probabilities at each node to match
    corHMM
    Return log-likelihood of hidden-rates-model mk as described in
    Beaulieu et al. 2013

    Args:
        tree (Node): Root node of a tree. All branch lengths must be
          greater than 0 (except root)
        chars (list): List of character states corresponding to leaf nodes in
          preoder sequence. Character states must be numbered 0,1,2,...
        Q (np.array): Instantaneous rate matrix
        p (np.array): Optional pre-allocated p matrix
        pi (str or np.array): Option to weight the root node by given values.
           Either a string containing the method or an array
           of weights. Weights should be given in order.

           Accepted methods of weighting root:

           Equal: flat prior
           Equilibrium: Prior equal to stationary distribution
             of Q matrix
           Fitzjohn: Root states weighted by how well they
             explain the data at the tips.
        returnPi (bool): Whether or not to return the final values of root
          node weighting
        preallocated_arrays (dict): Dict of pre-allocated arrays to improve
          speed by avoiding creating and destroying new arrays
    """
    nchar = Q.shape[0]
    nobschar = nchar/nregime
    if preallocated_arrays is None:
        # Creating arrays to be used later
        preallocated_arrays = {}
        preallocated_arrays["charlist"] = range(Q.shape[0])
        preallocated_arrays["t"] = np.array([node.length for node in tree.postiter() if not node.isroot], dtype=np.double)

    if p is None: # Instantiating empty array
        p = np.empty([len(preallocated_arrays["t"]), Q.shape[0], Q.shape[1]], dtype = np.double, order="C")
    # Creating probability matrices from Q matrix and branch lengths
    cyexpokit.dexpm_tree_preallocated_p(Q, preallocated_arrays["t"], p) # This changes p in place

    if len(preallocated_arrays.keys())==2:
        # Creating more arrays
        nnode = len(tree.descendants())+1
        preallocated_arrays["nodelist"] = np.zeros((nnode, nchar+1))
        leafind = [ n.isleaf for n in tree.postiter()]
        # Reordering character states to be in postorder sequence
        preleaves = [ n for n in tree.preiter() if n.isleaf ]
        postleaves = [n for n in tree.postiter() if n.isleaf ]
        postnodes = list(tree.postiter());prenodes = list(tree.preiter())
        postChars = [ chars[i] for i in [ preleaves.index(n) for n in postleaves ] ]
        # Filling in the node list. It contains all of the information needed
        # to calculate the likelihoods at each node

        # Q matrix is in the form of "0S, 1S, 0F, 1F" etc. Probabilities
        # set to 1 for all hidden states of the observed state.
        for k,ch in enumerate(postChars):
            # Indices of hidden rates of observed state. These will all be set to 1
            hiddenChs = [y + ch for y in [x * nobschar for x in range(nregime) ]]
            [ n for i,n in enumerate(preallocated_arrays["nodelist"]) if leafind[i] ][k][hiddenChs] = 1.0
        for i,n in enumerate(preallocated_arrays["nodelist"][:-1]):
            n[nchar] = postnodes.index(postnodes[i].parent)

        # Setting initial node likelihoods to 1.0 for calculations
        preallocated_arrays["nodelist"][[ i for i,b in enumerate(leafind) if not b],:-1] = 1.0

        # Empty array to store root priors
        preallocated_arrays["root_priors"] = np.empty([nchar], dtype=np.double)

    # Calculating the likelihoods for each node in post-order sequence
    cyexpokit.cy_mk(preallocated_arrays["nodelist"], p, preallocated_arrays["charlist"])
    # The last row of nodelist contains the likelihood values at the root

    # Applying the correct root prior
    if type(pi) != str:
        assert len(pi) == nchar, "length of given pi does not match Q dimensions"
        assert str(type(pi)) == "<type 'numpy.ndarray'>", "pi must be str or numpy array"
        assert np.isclose(sum(pi), 1), "values of given pi must sum to 1"

        np.copyto(preallocated_arrays["root_priors"], pi)

        li = sum([ i*preallocated_arrays["root_priors"][n] for n,i in enumerate(preallocated_arrays["nodelist"][-1,:-1]) ])
        logli = math.log(li)

    elif pi == "Equal":
        preallocated_arrays["root_priors"].fill(1.0/nchar)
        li = sum([ float(i)/nchar for i in preallocated_arrays["nodelist"][-1] ])

        logli = math.log(li)

    elif pi == "Fitzjohn":
        np.copyto(preallocated_arrays["root_priors"],
                  [preallocated_arrays["nodelist"][-1,:-1][charstate]/
                   sum(preallocated_arrays["nodelist"][-1,:-1]) for
                   charstate in range(nchar) ])

        li = sum([ preallocated_arrays["nodelist"][-1,:-1][charstate] *
                     preallocated_arrays["root_priors"][charstate] for charstate in set(chars) ])
        logli = math.log(li)
    elif pi == "Equilibrium":
        # Equilibrium pi from the stationary distribution of Q
        np.copyto(preallocated_arrays["root_priors"],qsd(Q))
        li = sum([ i*preallocated_arrays["root_priors"][n] for n,i in enumerate(preallocated_arrays["nodelist"][-1,:-1]) ])
        logli = math.log(li)
    if returnPi:
        return (logli, {k:v for k,v in enumerate(preallocated_arrays["root_priors"])})
    else:
        return logli
示例#5
0
def anc_recon_py(tree, chars, Q, p=None, pi="Fitzjohn"):
    """
    - Pure python version of anc recon code

    Given tree, character states at tips, and transition matrix perform
    ancestor reconstruction.

    Perform downpass using mk function, then perform uppass.

    Return reconstructed states - including tips (tips can be switched
    to their true values in post-processing)


    """
    chartree = tree.copy()
    chartree.char = None; chartree.downpass_likelihood={}
    t = [node.length for node in chartree.descendants()]
    t = np.array(t, dtype=np.double)
    nchar = Q.shape[0]

    # Generating probability matrix for each branch
    if p is None:
        p = np.empty([len(t), Q.shape[0], Q.shape[1]], dtype = np.double, order="C")
    cyexpokit.dexpm_tree_preallocated_p(Q, t, p) # This changes p in place


    for i, nd in enumerate(chartree.descendants()):
        nd.pmat = p[i] # Assigning probability matrices for each branch
        nd.downpass_likelihood = {}
        nd.char = None

    for i, lf in enumerate(chartree.leaves()):
        lf.char = chars[i] # Assigning character states to tips

    # Performing the downpass
    for node in chartree.postiter():
        if node.char is not None: # For tip nodes, likelihoods are 1 for observed state and 0 for the rest
            for state in range(nchar):
                node.downpass_likelihood[state]=0.0
            node.downpass_likelihood[node.char]=1.0
        else:
            for state in range(nchar):
                likelihoodStateN = []
                for ch in node.children:
                    likelihoodStateNCh = []
                    for chState in range(nchar):
                        likelihoodStateNCh.append(ch.pmat[state, chState] * ch.downpass_likelihood[chState]) #Likelihood for a certain state = p(stateBegin, stateEnd * likelihood(stateEnd))
                    likelihoodStateN.append(sum(likelihoodStateNCh))
                node.downpass_likelihood[state]=np.product(likelihoodStateN)
    # Performing the uppass (skipping the root)
    # Iterate over nodes in pre-order sequence
    for node in chartree.descendants():
        # Marginal is equivalent to information coming UP from the root * information coming DOWN from the tips
        node.marginal_likelihood = {}

        ### Getting uppass information for node of interest
        ###(partial uppass likelihood of parent * partial downpass likelihood of parent)
        ## Calculating partial downpass likelihood vector for parent
        node.parent.partial_down_likelihood = {}
        sibs = node.get_siblings()
        for state in range(nchar):
            partial_likelihoodN = [1.0] * nchar
            # Sister to this node
            for chState in range(nchar):
                for sib in sibs:
                    partial_likelihoodN[chState]*=(sib.downpass_likelihood[chState] * sib.pmat[state, chState])
            node.parent.partial_down_likelihood[state] = sum(partial_likelihoodN)
        ## Calculating partial uppass likelihood vector for parent
        node.parent.partial_up_likelihood = {}
        # If the parent is the root, there is no up-likelihood because there is
        # nothing "upwards" of the root. Set all likelihoods to 1 for identity
        if node.parent.isroot:
            for state in range(nchar):
                node.parent.partial_up_likelihood[state] = 1.0
        # If the parent is not the root, the up-likelihood is equal to the up-likelihoods coming from the parent
        else:
            for state in range(nchar):
                node.parent.partial_up_likelihood[state] = 0.0
                partial_uplikelihoodN = [1.0] * nchar
                for pstate in range(nchar):
                    for sib in node.parent.get_siblings():
                        partial_uplikelihoodNP = [0.0] * nchar
                        for sibstate in range(nchar):
                            partial_uplikelihoodNP[pstate] += sib.downpass_likelihood[sibstate] * sib.pmat[pstate,sibstate]
                        partial_uplikelihoodN[pstate] *= partial_uplikelihoodNP[pstate]
                    node.parent.partial_up_likelihood[state] += partial_uplikelihoodN[pstate] * node.parent.pmat[pstate, state]
        ### Putting together the uppass information and the downpass information
        uppass_information = {}
        for state in range(nchar):
            uppass_information[state] = node.parent.partial_down_likelihood[state] * node.parent.partial_up_likelihood[state]
        downpass_information = node.downpass_likelihood

        for state in range(nchar):
            node.marginal_likelihood[state] = 0
            for pstate in range(nchar):
                node.marginal_likelihood[state] += uppass_information[pstate] * node.pmat[pstate, state]
            node.marginal_likelihood[state] *= downpass_information[state]
    return chartree
示例#6
0
def anc_recon_purepy(tree, chars, Q, p=None, pi="Fitzjohn", ars=None):
    """
    Given tree, character states at tips, and transition matrix perform
    ancestor reconstruction.

    Perform downpass using mk function, then perform uppass.

    Return reconstructed states - including tips

    """
    nchar = Q.shape[0]
    if ars is None:
        # Creating arrays to be used later
        ars = create_ancrecon_ars(tree, chars)
    # Calculating the likelihoods for each node in post-order sequence
    if p is None: # Instantiating empty array
        p = np.empty([len(ars["t"]), Q.shape[0],
                     Q.shape[1]], dtype = np.double, order="C")
    # Creating probability matrices from Q matrix and branch lengths
    cyexpokit.dexpm_tree_preallocated_p(Q, ars["t"], p) # This changes p in place
    np.copyto(ars["down_nl_w"], ars["down_nl_r"]) # Copy original values if they have been changed
    ars["child_inds"].fill(0)
    root_equil = ivy.chars.mk.qsd(Q)
    # ------------------- Performing the down-pass -----------------------------
    for intnode in map(int, sorted(set(ars["down_nl_w"][:-1,nchar]))):
        nextli = ars["down_nl_w"][intnode]
        for chi, child in enumerate(ars["childlist"][intnode]):
            li = ars["down_nl_w"][child]
            p_li = ars["partial_nl"][intnode][chi]
            for ch in ars["charlist"]:
                p_li[ch] = sum([ p[child][ch,st] for st in ars["charlist"] ]
                               * li[:nchar])
                nextli[ch] *= p_li[ch]

    # "downpass_likelihood" contains the downpass likelihood vectors for each node in postorder sequence
    # Now that the downpass has been performed, we must perform the up-pass
    # ------------------- Performing the up-pass -------------------------------
    # The up-pass likelihood at each node is equivalent to information coming
    # up from the root * information coming down from the tips

    # Each node has the following:
    # Uppass_likelihood (set to the equilibrium frequency for the root)
    # Marginal_likelihood (product of uppass_likelihood and downpass likelihood)
    # Partial likelihood for each child node
    # The final two columns of up_nl point to the
    # postorder index numbers of the parent and self node, respectively

    # child_masks contains an array of the children to use for calculating
    # partial likelihood of the next child of that node. All parents
    # start out with excluding the first child that appears (for calculating
    # marginal likelihood of that child)


    # The parent's partial likelihood without current node
    # partial_parent_likelihoods = np.zeros([ars["up_nl"].shape[0],nchar])
    root_posti = ars["up_nl"].shape[0] - 1
    for i,l in enumerate(ars["up_nl"]):
        # Uppass information for node
        if i == 0:
            # Set root node uppass to be equivalent to the root equilibrium

            # Set the marginal to be equivalent to the root equilibrium times
            # the root downpass
            l[:nchar] = root_equil
            ars["marginal_nl"][i][:nchar] = (l[:nchar] *
                                               ars["down_nl_w"][-1][:nchar])
        else:
            spi = int(l[nchar+1]) #self's postorder index
            ppi = int(l[nchar]) # the parent's postorder index
            if ppi == root_posti:
                # If parent is the root, the parent's partial likelihood is
                # equivalent to the partial downpass (downpass likelihoods of
                # the node's siblings, indexed using 'child_masks') and
                # the equilibrium frequency of the Q matrix.
                ars["partial_parent_nl"][spi] = (ars["partial_nl"][ppi].take(range(ars["child_inds"][ppi])+range(ars["child_inds"][ppi]+1,ars["partial_nl"][ppi].shape[0]),0) *
                                                               root_equil)
            else:
                # If parent is not the root, the parent's partial likelihood is
                # the partial downpass * the partial uppass, which is calculated
                # as the parent of the parent's partial likelihood times
                # the transition probability
                np.dot(p[ppi].T, ars["partial_parent_nl"][ppi], out=ars["temp_dotprod"])
                ars["partial_parent_nl"][spi] = (ars["partial_nl"][ppi].take(range(ars["child_inds"][ppi])+range(ars["child_inds"][ppi]+1,ars["partial_nl"][ppi].shape[0]),0) *
                                                ars["temp_dotprod"])
            # The up-pass likelihood is equivalent to the parent's partial
            # likelihood times the transition probability
            np.dot(p[spi].T, ars["partial_parent_nl"][spi], out = l[:nchar])
            # Roll child masks so that next likelihood calculated for this
            # parent uses siblings of next node
            ars["child_inds"][ppi]  += 1

            # Marginal = Uppass * downpass
            ars["marginal_nl"][i][:nchar] = l[:nchar] * ars["down_nl_w"][l[nchar+1]][:nchar]

    return ars["marginal_nl"]
示例#7
0
def anc_recon_py(tree, chars, Q, p=None, pi="Fitzjohn"):
    """
    - Pure python version of anc recon code

    Given tree, character states at tips, and transition matrix perform
    ancestor reconstruction.

    Perform downpass using mk function, then perform uppass.

    Return reconstructed states - including tips (tips can be switched
    to their true values in post-processing)


    """
    chartree = tree.copy()
    chartree.char = None
    chartree.downpass_likelihood = {}
    t = [node.length for node in chartree.descendants()]
    t = np.array(t, dtype=np.double)
    nchar = Q.shape[0]

    # Generating probability matrix for each branch
    if p is None:
        p = np.empty([len(t), Q.shape[0], Q.shape[1]],
                     dtype=np.double,
                     order="C")
    cyexpokit.dexpm_tree_preallocated_p(Q, t, p)  # This changes p in place

    for i, nd in enumerate(chartree.descendants()):
        nd.pmat = p[i]  # Assigning probability matrices for each branch
        nd.downpass_likelihood = {}
        nd.char = None

    for i, lf in enumerate(chartree.leaves()):
        lf.char = chars[i]  # Assigning character states to tips

    # Performing the downpass
    for node in chartree.postiter():
        if node.char is not None:  # For tip nodes, likelihoods are 1 for observed state and 0 for the rest
            for state in range(nchar):
                node.downpass_likelihood[state] = 0.0
            node.downpass_likelihood[node.char] = 1.0
        else:
            for state in range(nchar):
                likelihoodStateN = []
                for ch in node.children:
                    likelihoodStateNCh = []
                    for chState in range(nchar):
                        likelihoodStateNCh.append(
                            ch.pmat[state, chState] *
                            ch.downpass_likelihood[chState]
                        )  #Likelihood for a certain state = p(stateBegin, stateEnd * likelihood(stateEnd))
                    likelihoodStateN.append(sum(likelihoodStateNCh))
                node.downpass_likelihood[state] = np.product(likelihoodStateN)
    # Performing the uppass (skipping the root)
    # Iterate over nodes in pre-order sequence
    for node in chartree.descendants():
        # Marginal is equivalent to information coming UP from the root * information coming DOWN from the tips
        node.marginal_likelihood = {}

        ### Getting uppass information for node of interest
        ###(partial uppass likelihood of parent * partial downpass likelihood of parent)
        ## Calculating partial downpass likelihood vector for parent
        node.parent.partial_down_likelihood = {}
        sibs = node.get_siblings()
        for state in range(nchar):
            partial_likelihoodN = [1.0] * nchar
            # Sister to this node
            for chState in range(nchar):
                for sib in sibs:
                    partial_likelihoodN[chState] *= (
                        sib.downpass_likelihood[chState] *
                        sib.pmat[state, chState])
            node.parent.partial_down_likelihood[state] = sum(
                partial_likelihoodN)
        ## Calculating partial uppass likelihood vector for parent
        node.parent.partial_up_likelihood = {}
        # If the parent is the root, there is no up-likelihood because there is
        # nothing "upwards" of the root. Set all likelihoods to 1 for identity
        if node.parent.isroot:
            for state in range(nchar):
                node.parent.partial_up_likelihood[state] = 1.0
        # If the parent is not the root, the up-likelihood is equal to the up-likelihoods coming from the parent
        else:
            for state in range(nchar):
                node.parent.partial_up_likelihood[state] = 0.0
                partial_uplikelihoodN = [1.0] * nchar
                for pstate in range(nchar):
                    for sib in node.parent.get_siblings():
                        partial_uplikelihoodNP = [0.0] * nchar
                        for sibstate in range(nchar):
                            partial_uplikelihoodNP[
                                pstate] += sib.downpass_likelihood[
                                    sibstate] * sib.pmat[pstate, sibstate]
                        partial_uplikelihoodN[
                            pstate] *= partial_uplikelihoodNP[pstate]
                    node.parent.partial_up_likelihood[
                        state] += partial_uplikelihoodN[
                            pstate] * node.parent.pmat[pstate, state]
        ### Putting together the uppass information and the downpass information
        uppass_information = {}
        for state in range(nchar):
            uppass_information[state] = node.parent.partial_down_likelihood[
                state] * node.parent.partial_up_likelihood[state]
        downpass_information = node.downpass_likelihood

        for state in range(nchar):
            node.marginal_likelihood[state] = 0
            for pstate in range(nchar):
                node.marginal_likelihood[state] += uppass_information[
                    pstate] * node.pmat[pstate, state]
            node.marginal_likelihood[state] *= downpass_information[state]
    return chartree
示例#8
0
def anc_recon_purepy(tree, chars, Q, p=None, pi="Fitzjohn", ars=None):
    """
    Given tree, character states at tips, and transition matrix perform
    ancestor reconstruction.

    Perform downpass using mk function, then perform uppass.

    Return reconstructed states - including tips

    """
    nchar = Q.shape[0]
    if ars is None:
        # Creating arrays to be used later
        ars = create_ancrecon_ars(tree, chars)
    # Calculating the likelihoods for each node in post-order sequence
    if p is None:  # Instantiating empty array
        p = np.empty([len(ars["t"]), Q.shape[0], Q.shape[1]],
                     dtype=np.double,
                     order="C")
    # Creating probability matrices from Q matrix and branch lengths
    cyexpokit.dexpm_tree_preallocated_p(Q, ars["t"],
                                        p)  # This changes p in place
    np.copyto(
        ars["down_nl_w"],
        ars["down_nl_r"])  # Copy original values if they have been changed
    ars["child_inds"].fill(0)
    root_equil = ivy.chars.mk.qsd(Q)
    # ------------------- Performing the down-pass -----------------------------
    for intnode in map(int, sorted(set(ars["down_nl_w"][:-1, nchar]))):
        nextli = ars["down_nl_w"][intnode]
        for chi, child in enumerate(ars["childlist"][intnode]):
            li = ars["down_nl_w"][child]
            p_li = ars["partial_nl"][intnode][chi]
            for ch in ars["charlist"]:
                p_li[ch] = sum([p[child][ch, st]
                                for st in ars["charlist"]] * li[:nchar])
                nextli[ch] *= p_li[ch]

    # "downpass_likelihood" contains the downpass likelihood vectors for each node in postorder sequence
    # Now that the downpass has been performed, we must perform the up-pass
    # ------------------- Performing the up-pass -------------------------------
    # The up-pass likelihood at each node is equivalent to information coming
    # up from the root * information coming down from the tips

    # Each node has the following:
    # Uppass_likelihood (set to the equilibrium frequency for the root)
    # Marginal_likelihood (product of uppass_likelihood and downpass likelihood)
    # Partial likelihood for each child node
    # The final two columns of up_nl point to the
    # postorder index numbers of the parent and self node, respectively

    # child_masks contains an array of the children to use for calculating
    # partial likelihood of the next child of that node. All parents
    # start out with excluding the first child that appears (for calculating
    # marginal likelihood of that child)

    # The parent's partial likelihood without current node
    # partial_parent_likelihoods = np.zeros([ars["up_nl"].shape[0],nchar])
    root_posti = ars["up_nl"].shape[0] - 1
    for i, l in enumerate(ars["up_nl"]):
        # Uppass information for node
        if i == 0:
            # Set root node uppass to be equivalent to the root equilibrium

            # Set the marginal to be equivalent to the root equilibrium times
            # the root downpass
            l[:nchar] = root_equil
            ars["marginal_nl"][i][:nchar] = (l[:nchar] *
                                             ars["down_nl_w"][-1][:nchar])
        else:
            spi = int(l[nchar + 1])  #self's postorder index
            ppi = int(l[nchar])  # the parent's postorder index
            if ppi == root_posti:
                # If parent is the root, the parent's partial likelihood is
                # equivalent to the partial downpass (downpass likelihoods of
                # the node's siblings, indexed using 'child_masks') and
                # the equilibrium frequency of the Q matrix.
                ars["partial_parent_nl"][spi] = (ars["partial_nl"][ppi].take(
                    range(ars["child_inds"][ppi]) +
                    range(ars["child_inds"][ppi] + 1,
                          ars["partial_nl"][ppi].shape[0]), 0) * root_equil)
            else:
                # If parent is not the root, the parent's partial likelihood is
                # the partial downpass * the partial uppass, which is calculated
                # as the parent of the parent's partial likelihood times
                # the transition probability
                np.dot(p[ppi].T,
                       ars["partial_parent_nl"][ppi],
                       out=ars["temp_dotprod"])
                ars["partial_parent_nl"][spi] = (ars["partial_nl"][ppi].take(
                    range(ars["child_inds"][ppi]) +
                    range(ars["child_inds"][ppi] + 1,
                          ars["partial_nl"][ppi].shape[0]), 0) *
                                                 ars["temp_dotprod"])
            # The up-pass likelihood is equivalent to the parent's partial
            # likelihood times the transition probability
            np.dot(p[spi].T, ars["partial_parent_nl"][spi], out=l[:nchar])
            # Roll child masks so that next likelihood calculated for this
            # parent uses siblings of next node
            ars["child_inds"][ppi] += 1

            # Marginal = Uppass * downpass
            ars["marginal_nl"][i][:nchar] = l[:nchar] * ars["down_nl_w"][l[
                nchar + 1]][:nchar]

    return ars["marginal_nl"]