def anc_recon(tree, chars, Q, p=None, pi="Fitzjohn", ars=None): """ Given tree, character states at tips, and transition matrix perform ancestor reconstruction. Perform downpass using mk function, then perform uppass. Return reconstructed states - including tips Args: tree (Node): Root node of a tree. All branch lengths must be greater than 0 (except root) chars (list): List of character states corresponding to leaf nodes in preoder sequence. Character states must be numbered 0,1,2,... Q (np.array): Instantaneous rate matrix p (np.array): 3-D array of dimensions branch_number * nchar * nchar. Optional. Pre-allocated space for efficient calculations pi (str or np.array): Option to weight the root node by given values. Either a string containing the method or an array of weights. Weights should be given in order. Accepted methods of weighting root: Equal: flat prior Equilibrium: Prior equal to stationary distribution of Q matrix Fitzjohn: Root states weighted by how well they explain the data at the tips. ars (dict): Dict of pre-allocated arrays to improve speed by avoiding creating and destroying new arrays. Can be created with create_ancrecon_ars function. Returns: np.array: Array of nodes in preorder sequence containing marginal likelihoods. """ nchar = Q.shape[0] if ars is None: # Creating arrays to be used later ars = create_ancrecon_ars(tree, chars) # Calculating the likelihoods for each node in post-order sequence if p is None: # Instantiating empty array p = np.empty([len(ars["t"]), Q.shape[0], Q.shape[1]], dtype=np.double, order="C") # Creating probability matrices from Q matrix and branch lengths cyexpokit.dexpm_tree_preallocated_p(Q, ars["t"], p) # This changes p in place np.copyto( ars["down_nl_w"], ars["down_nl_r"]) # Copy original values if they have been changed ars["child_inds"].fill(0) root_equil = ivy.chars.mk.qsd(Q) cyexpokit.cy_anc_recon(p, ars["down_nl_w"], ars["charlist"], ars["childlist"], ars["up_nl"], ars["marginal_nl"], ars["partial_parent_nl"], ars["partial_nl"], ars["child_inds"], root_equil, ars["temp_dotprod"]) return ars["marginal_nl"]
def anc_recon(tree, chars, Q, p=None, pi="Fitzjohn", ars=None): """ Given tree, character states at tips, and transition matrix perform ancestor reconstruction. Perform downpass using mk function, then perform uppass. Return reconstructed states - including tips Args: tree (Node): Root node of a tree. All branch lengths must be greater than 0 (except root) chars (list): List of character states corresponding to leaf nodes in preoder sequence. Character states must be numbered 0,1,2,... Q (np.array): Instantaneous rate matrix p (np.array): 3-D array of dimensions branch_number * nchar * nchar. Optional. Pre-allocated space for efficient calculations pi (str or np.array): Option to weight the root node by given values. Either a string containing the method or an array of weights. Weights should be given in order. Accepted methods of weighting root: Equal: flat prior Equilibrium: Prior equal to stationary distribution of Q matrix Fitzjohn: Root states weighted by how well they explain the data at the tips. ars (dict): Dict of pre-allocated arrays to improve speed by avoiding creating and destroying new arrays. Can be created with create_ancrecon_ars function. Returns: np.array: Array of nodes in preorder sequence containing marginal likelihoods. """ nchar = Q.shape[0] if ars is None: # Creating arrays to be used later ars = create_ancrecon_ars(tree, chars) # Calculating the likelihoods for each node in post-order sequence if p is None: # Instantiating empty array p = np.empty([len(ars["t"]), Q.shape[0], Q.shape[1]], dtype = np.double, order="C") # Creating probability matrices from Q matrix and branch lengths cyexpokit.dexpm_tree_preallocated_p(Q, ars["t"], p) # This changes p in place np.copyto(ars["down_nl_w"], ars["down_nl_r"]) # Copy original values if they have been changed ars["child_inds"].fill(0) root_equil = ivy.chars.mk.qsd(Q) cyexpokit.cy_anc_recon(p, ars["down_nl_w"], ars["charlist"], ars["childlist"], ars["up_nl"], ars["marginal_nl"], ars["partial_parent_nl"], ars["partial_nl"], ars["child_inds"], root_equil,ars["temp_dotprod"]) return ars["marginal_nl"]
def hrm_back_mk(tree, chars, Q, nregime, p=None, pi="Fitzjohn",returnPi=False, preallocated_arrays=None, tip_states=None, returnnodes=False): """ Calculate probability vector at root given tree, characters, and Q matrix, then reconstruct probability vectors for tips and use those in another up-pass to calculate probability vector at root. Args: tree (Node): Root node of a tree. All branch lengths must be greater than 0 (except root) chars (list): List of character states corresponding to leaf nodes in preoder sequence. Character states must be numbered 0,1,2,... Q (np.array): Instantaneous rate matrix p (np.array): Optional pre-allocated p matrix pi (str or np.array): Option to weight the root node by given values. Either a string containing the method or an array of weights. Weights should be given in order. Accepted methods of weighting root: Equal: flat prior Equilibrium: Prior equal to stationary distribution of Q matrix Fitzjohn: Root states weighted by how well they explain the data at the tips. returnPi (bool): Whether or not to return the final values of root node weighting preallocated_arrays (dict): Dict of pre-allocated arrays to improve speed by avoiding creating and destroying new arrays """ nchar = Q.shape[0] nobschar = nchar/nregime if preallocated_arrays is None: # Creating arrays to be used later preallocated_arrays = {} t = [node.length for node in tree.postiter() if not node.isroot] t = np.array(t, dtype=np.double) preallocated_arrays["charlist"] = range(Q.shape[0]) preallocated_arrays["t"] = t if p is None: # Instantiating empty array p = np.empty([len(preallocated_arrays["t"]), Q.shape[0], Q.shape[1]], dtype = np.double, order="C") # Creating probability matrices from Q matrix and branch lengths cyexpokit.dexpm_tree_preallocated_p(Q, preallocated_arrays["t"], p) # This changes p in place if len(preallocated_arrays.keys())==2: # Creating more arrays nnode = len(tree.descendants())+1 preallocated_arrays["nodelist"] = np.zeros((nnode, nchar+1)) preallocated_arrays["childlist"] = np.zeros(nnode, dtype=object) leafind = [ n.isleaf for n in tree.postiter()] # Reordering character states to be in postorder sequence preleaves = [ n for n in tree.preiter() if n.isleaf ] postleaves = [n for n in tree.postiter() if n.isleaf ] postnodes = list(tree.postiter());prenodes = list(tree.preiter()) postChars = [ chars[i] for i in [ preleaves.index(n) for n in postleaves ] ] # Filling in the node list. It contains all of the information needed # to calculate the likelihoods at each node # Q matrix is in the form of "0S, 1S, 0F, 1F" etc. Probabilities # set to 1 for all hidden states of the observed state. for k,ch in enumerate(postChars): # Indices of hidden rates of observed state. These will all be set to 1 hiddenChs = [y + ch for y in [x * nobschar for x in range(nregime) ]] [ n for i,n in enumerate(preallocated_arrays["nodelist"]) if leafind[i] ][k][hiddenChs] = 1.0/nregime for i,n in enumerate(preallocated_arrays["nodelist"][:-1]): n[nchar] = postnodes.index(postnodes[i].parent) preallocated_arrays["childlist"][i] = [ nod.pi for nod in postnodes[i].children ] preallocated_arrays["childlist"][i+1] = [ nod.pi for nod in postnodes[i+1].children ] # Setting initial node likelihoods to 1.0 for calculations preallocated_arrays["nodelist"][[ i for i,b in enumerate(leafind) if not b],:-1] = 1.0 # Empty array to store root priors preallocated_arrays["root_priors"] = np.empty([nchar], dtype=np.double) preallocated_arrays["nodelist-up"] = preallocated_arrays["nodelist"].copy() preallocated_arrays["t_Q"] = Q preallocated_arrays["p_up"] = p.copy() preallocated_arrays["v"] = np.zeros([nchar]) preallocated_arrays["tmp"] = np.zeros([nchar+1]) preallocated_arrays["motherRow"] = np.zeros([nchar+1]) leafind = [ n.isleaf for n in tree.postiter()] if tip_states is not None: leaf_rownums = [i for i,n in enumerate(leafind) if n] tip_states = preallocated_arrays["nodelist"][leaf_rownums][:,:-1] * tip_states[:,:-1] tip_states = tip_states/np.sum(tip_states,1)[:,None] preallocated_arrays["nodelist"][leaf_rownums,:-1] = tip_states # Calculating the likelihoods for each node in post-order sequence cyexpokit.cy_mk(preallocated_arrays["nodelist"], p, preallocated_arrays["charlist"]) # The last row of nodelist contains the likelihood values at the root # Applying the correct root prior if type(pi) != str: assert len(pi) == nchar, "length of given pi does not match Q dimensions" assert str(type(pi)) == "<type 'numpy.ndarray'>", "pi must be str or numpy array" assert np.isclose(sum(pi), 1), "values of given pi must sum to 1" np.copyto(preallocated_arrays["root_priors"], pi) li = sum([ i*preallocated_arrays["root_priors"][n] for n,i in enumerate(preallocated_arrays["nodelist"][-1,:-1]) ]) logli = math.log(li) elif pi == "Equal": preallocated_arrays["root_priors"].fill(1.0/nchar) li = sum([ float(i)/nchar for i in preallocated_arrays["nodelist"][-1] ]) logli = math.log(li) elif pi == "Fitzjohn": np.copyto(preallocated_arrays["root_priors"], [preallocated_arrays["nodelist"][-1,:-1][charstate]/ sum(preallocated_arrays["nodelist"][-1,:-1]) for charstate in range(nchar) ]) li = sum([ preallocated_arrays["nodelist"][-1,:-1][charstate] * preallocated_arrays["root_priors"][charstate] for charstate in set(chars) ]) logli = math.log(li) elif pi == "Equilibrium": # Equilibrium pi from the stationary distribution of Q np.copyto(preallocated_arrays["root_priors"],qsd(Q)) li = sum([ i*preallocated_arrays["root_priors"][n] for n,i in enumerate(preallocated_arrays["nodelist"][-1,:-1]) ]) logli = math.log(li) # Transposal of Q for up-pass now that down-pass is completed np.copyto(preallocated_arrays["t_Q"], Q) preallocated_arrays["t_Q"] = np.transpose(preallocated_arrays["t_Q"]) preallocated_arrays["t_Q"][np.diag_indices(nchar)] = 0 preallocated_arrays["t_Q"][np.diag_indices(nchar)] = -np.sum(preallocated_arrays["t_Q"], 1) preallocated_arrays["t_Q"] = np.ascontiguousarray(preallocated_arrays["t_Q"]) cyexpokit.dexpm_tree_preallocated_p(preallocated_arrays["t_Q"], preallocated_arrays["t"], preallocated_arrays["p_up"]) preallocated_arrays["nodelist-up"][:,:-1] = 1.0 preallocated_arrays["nodelist-up"][-1] = preallocated_arrays["nodelist"][-1] ni = len(preallocated_arrays["nodelist-up"]) - 2 root_marginal = ivy.chars.mk.qsd(Q) # Change to Fitzjohn Q? for n in preallocated_arrays["nodelist-up"][::-1][1:]: curRow = n[:-1] motherRowNum = int(n[nchar]) np.copyto(preallocated_arrays["motherRow"], preallocated_arrays["nodelist-up"][int(motherRowNum)]) sisterRows = [ (preallocated_arrays["nodelist-up"][i],i) for i in preallocated_arrays["childlist"][motherRowNum] if not i==ni] # If the mother is the root... if preallocated_arrays["motherRow"][nchar] == 0.0: # The marginal of the root np.copyto(preallocated_arrays["v"],root_marginal) # Only need to calculate once else: # If the mother is not the root, calculate prob. of being in any state # Use transposed matrix np.dot(preallocated_arrays["p_up"][motherRowNum], preallocated_arrays["nodelist-up"][motherRowNum][:nchar], out=preallocated_arrays["v"]) for s in sisterRows: # Use non-transposed matrix np.copyto(preallocated_arrays["tmp"], preallocated_arrays["nodelist"][s[1]]) preallocated_arrays["tmp"][:nchar] = preallocated_arrays["tmp"][:-1]/sum(preallocated_arrays["tmp"][:nchar]) preallocated_arrays["v"] *= np.dot(p[s[1]], preallocated_arrays["tmp"][:nchar]) preallocated_arrays["nodelist-up"][ni][:nchar] = preallocated_arrays["v"] ni -= 1 out = [preallocated_arrays["nodelist-up"][[ t.pi for t in tree.leaves() ]], logli] if returnnodes: out.append(preallocated_arrays["nodelist-up"]) return out
def hrm_mk(tree, chars, Q, nregime, p=None, pi="Fitzjohn",returnPi=False, preallocated_arrays=None): """ Note: this version calculates likelihoods at each node. Other version calculates probabilities at each node to match corHMM Return log-likelihood of hidden-rates-model mk as described in Beaulieu et al. 2013 Args: tree (Node): Root node of a tree. All branch lengths must be greater than 0 (except root) chars (list): List of character states corresponding to leaf nodes in preoder sequence. Character states must be numbered 0,1,2,... Q (np.array): Instantaneous rate matrix p (np.array): Optional pre-allocated p matrix pi (str or np.array): Option to weight the root node by given values. Either a string containing the method or an array of weights. Weights should be given in order. Accepted methods of weighting root: Equal: flat prior Equilibrium: Prior equal to stationary distribution of Q matrix Fitzjohn: Root states weighted by how well they explain the data at the tips. returnPi (bool): Whether or not to return the final values of root node weighting preallocated_arrays (dict): Dict of pre-allocated arrays to improve speed by avoiding creating and destroying new arrays """ nchar = Q.shape[0] nobschar = nchar/nregime if preallocated_arrays is None: # Creating arrays to be used later preallocated_arrays = {} preallocated_arrays["charlist"] = range(Q.shape[0]) preallocated_arrays["t"] = np.array([node.length for node in tree.postiter() if not node.isroot], dtype=np.double) if p is None: # Instantiating empty array p = np.empty([len(preallocated_arrays["t"]), Q.shape[0], Q.shape[1]], dtype = np.double, order="C") # Creating probability matrices from Q matrix and branch lengths cyexpokit.dexpm_tree_preallocated_p(Q, preallocated_arrays["t"], p) # This changes p in place if len(preallocated_arrays.keys())==2: # Creating more arrays nnode = len(tree.descendants())+1 preallocated_arrays["nodelist"] = np.zeros((nnode, nchar+1)) leafind = [ n.isleaf for n in tree.postiter()] # Reordering character states to be in postorder sequence preleaves = [ n for n in tree.preiter() if n.isleaf ] postleaves = [n for n in tree.postiter() if n.isleaf ] postnodes = list(tree.postiter());prenodes = list(tree.preiter()) postChars = [ chars[i] for i in [ preleaves.index(n) for n in postleaves ] ] # Filling in the node list. It contains all of the information needed # to calculate the likelihoods at each node # Q matrix is in the form of "0S, 1S, 0F, 1F" etc. Probabilities # set to 1 for all hidden states of the observed state. for k,ch in enumerate(postChars): # Indices of hidden rates of observed state. These will all be set to 1 hiddenChs = [y + ch for y in [x * nobschar for x in range(nregime) ]] [ n for i,n in enumerate(preallocated_arrays["nodelist"]) if leafind[i] ][k][hiddenChs] = 1.0 for i,n in enumerate(preallocated_arrays["nodelist"][:-1]): n[nchar] = postnodes.index(postnodes[i].parent) # Setting initial node likelihoods to 1.0 for calculations preallocated_arrays["nodelist"][[ i for i,b in enumerate(leafind) if not b],:-1] = 1.0 # Empty array to store root priors preallocated_arrays["root_priors"] = np.empty([nchar], dtype=np.double) # Calculating the likelihoods for each node in post-order sequence cyexpokit.cy_mk(preallocated_arrays["nodelist"], p, preallocated_arrays["charlist"]) # The last row of nodelist contains the likelihood values at the root # Applying the correct root prior if type(pi) != str: assert len(pi) == nchar, "length of given pi does not match Q dimensions" assert str(type(pi)) == "<type 'numpy.ndarray'>", "pi must be str or numpy array" assert np.isclose(sum(pi), 1), "values of given pi must sum to 1" np.copyto(preallocated_arrays["root_priors"], pi) li = sum([ i*preallocated_arrays["root_priors"][n] for n,i in enumerate(preallocated_arrays["nodelist"][-1,:-1]) ]) logli = math.log(li) elif pi == "Equal": preallocated_arrays["root_priors"].fill(1.0/nchar) li = sum([ float(i)/nchar for i in preallocated_arrays["nodelist"][-1] ]) logli = math.log(li) elif pi == "Fitzjohn": np.copyto(preallocated_arrays["root_priors"], [preallocated_arrays["nodelist"][-1,:-1][charstate]/ sum(preallocated_arrays["nodelist"][-1,:-1]) for charstate in range(nchar) ]) li = sum([ preallocated_arrays["nodelist"][-1,:-1][charstate] * preallocated_arrays["root_priors"][charstate] for charstate in set(chars) ]) logli = math.log(li) elif pi == "Equilibrium": # Equilibrium pi from the stationary distribution of Q np.copyto(preallocated_arrays["root_priors"],qsd(Q)) li = sum([ i*preallocated_arrays["root_priors"][n] for n,i in enumerate(preallocated_arrays["nodelist"][-1,:-1]) ]) logli = math.log(li) if returnPi: return (logli, {k:v for k,v in enumerate(preallocated_arrays["root_priors"])}) else: return logli
def anc_recon_py(tree, chars, Q, p=None, pi="Fitzjohn"): """ - Pure python version of anc recon code Given tree, character states at tips, and transition matrix perform ancestor reconstruction. Perform downpass using mk function, then perform uppass. Return reconstructed states - including tips (tips can be switched to their true values in post-processing) """ chartree = tree.copy() chartree.char = None; chartree.downpass_likelihood={} t = [node.length for node in chartree.descendants()] t = np.array(t, dtype=np.double) nchar = Q.shape[0] # Generating probability matrix for each branch if p is None: p = np.empty([len(t), Q.shape[0], Q.shape[1]], dtype = np.double, order="C") cyexpokit.dexpm_tree_preallocated_p(Q, t, p) # This changes p in place for i, nd in enumerate(chartree.descendants()): nd.pmat = p[i] # Assigning probability matrices for each branch nd.downpass_likelihood = {} nd.char = None for i, lf in enumerate(chartree.leaves()): lf.char = chars[i] # Assigning character states to tips # Performing the downpass for node in chartree.postiter(): if node.char is not None: # For tip nodes, likelihoods are 1 for observed state and 0 for the rest for state in range(nchar): node.downpass_likelihood[state]=0.0 node.downpass_likelihood[node.char]=1.0 else: for state in range(nchar): likelihoodStateN = [] for ch in node.children: likelihoodStateNCh = [] for chState in range(nchar): likelihoodStateNCh.append(ch.pmat[state, chState] * ch.downpass_likelihood[chState]) #Likelihood for a certain state = p(stateBegin, stateEnd * likelihood(stateEnd)) likelihoodStateN.append(sum(likelihoodStateNCh)) node.downpass_likelihood[state]=np.product(likelihoodStateN) # Performing the uppass (skipping the root) # Iterate over nodes in pre-order sequence for node in chartree.descendants(): # Marginal is equivalent to information coming UP from the root * information coming DOWN from the tips node.marginal_likelihood = {} ### Getting uppass information for node of interest ###(partial uppass likelihood of parent * partial downpass likelihood of parent) ## Calculating partial downpass likelihood vector for parent node.parent.partial_down_likelihood = {} sibs = node.get_siblings() for state in range(nchar): partial_likelihoodN = [1.0] * nchar # Sister to this node for chState in range(nchar): for sib in sibs: partial_likelihoodN[chState]*=(sib.downpass_likelihood[chState] * sib.pmat[state, chState]) node.parent.partial_down_likelihood[state] = sum(partial_likelihoodN) ## Calculating partial uppass likelihood vector for parent node.parent.partial_up_likelihood = {} # If the parent is the root, there is no up-likelihood because there is # nothing "upwards" of the root. Set all likelihoods to 1 for identity if node.parent.isroot: for state in range(nchar): node.parent.partial_up_likelihood[state] = 1.0 # If the parent is not the root, the up-likelihood is equal to the up-likelihoods coming from the parent else: for state in range(nchar): node.parent.partial_up_likelihood[state] = 0.0 partial_uplikelihoodN = [1.0] * nchar for pstate in range(nchar): for sib in node.parent.get_siblings(): partial_uplikelihoodNP = [0.0] * nchar for sibstate in range(nchar): partial_uplikelihoodNP[pstate] += sib.downpass_likelihood[sibstate] * sib.pmat[pstate,sibstate] partial_uplikelihoodN[pstate] *= partial_uplikelihoodNP[pstate] node.parent.partial_up_likelihood[state] += partial_uplikelihoodN[pstate] * node.parent.pmat[pstate, state] ### Putting together the uppass information and the downpass information uppass_information = {} for state in range(nchar): uppass_information[state] = node.parent.partial_down_likelihood[state] * node.parent.partial_up_likelihood[state] downpass_information = node.downpass_likelihood for state in range(nchar): node.marginal_likelihood[state] = 0 for pstate in range(nchar): node.marginal_likelihood[state] += uppass_information[pstate] * node.pmat[pstate, state] node.marginal_likelihood[state] *= downpass_information[state] return chartree
def anc_recon_purepy(tree, chars, Q, p=None, pi="Fitzjohn", ars=None): """ Given tree, character states at tips, and transition matrix perform ancestor reconstruction. Perform downpass using mk function, then perform uppass. Return reconstructed states - including tips """ nchar = Q.shape[0] if ars is None: # Creating arrays to be used later ars = create_ancrecon_ars(tree, chars) # Calculating the likelihoods for each node in post-order sequence if p is None: # Instantiating empty array p = np.empty([len(ars["t"]), Q.shape[0], Q.shape[1]], dtype = np.double, order="C") # Creating probability matrices from Q matrix and branch lengths cyexpokit.dexpm_tree_preallocated_p(Q, ars["t"], p) # This changes p in place np.copyto(ars["down_nl_w"], ars["down_nl_r"]) # Copy original values if they have been changed ars["child_inds"].fill(0) root_equil = ivy.chars.mk.qsd(Q) # ------------------- Performing the down-pass ----------------------------- for intnode in map(int, sorted(set(ars["down_nl_w"][:-1,nchar]))): nextli = ars["down_nl_w"][intnode] for chi, child in enumerate(ars["childlist"][intnode]): li = ars["down_nl_w"][child] p_li = ars["partial_nl"][intnode][chi] for ch in ars["charlist"]: p_li[ch] = sum([ p[child][ch,st] for st in ars["charlist"] ] * li[:nchar]) nextli[ch] *= p_li[ch] # "downpass_likelihood" contains the downpass likelihood vectors for each node in postorder sequence # Now that the downpass has been performed, we must perform the up-pass # ------------------- Performing the up-pass ------------------------------- # The up-pass likelihood at each node is equivalent to information coming # up from the root * information coming down from the tips # Each node has the following: # Uppass_likelihood (set to the equilibrium frequency for the root) # Marginal_likelihood (product of uppass_likelihood and downpass likelihood) # Partial likelihood for each child node # The final two columns of up_nl point to the # postorder index numbers of the parent and self node, respectively # child_masks contains an array of the children to use for calculating # partial likelihood of the next child of that node. All parents # start out with excluding the first child that appears (for calculating # marginal likelihood of that child) # The parent's partial likelihood without current node # partial_parent_likelihoods = np.zeros([ars["up_nl"].shape[0],nchar]) root_posti = ars["up_nl"].shape[0] - 1 for i,l in enumerate(ars["up_nl"]): # Uppass information for node if i == 0: # Set root node uppass to be equivalent to the root equilibrium # Set the marginal to be equivalent to the root equilibrium times # the root downpass l[:nchar] = root_equil ars["marginal_nl"][i][:nchar] = (l[:nchar] * ars["down_nl_w"][-1][:nchar]) else: spi = int(l[nchar+1]) #self's postorder index ppi = int(l[nchar]) # the parent's postorder index if ppi == root_posti: # If parent is the root, the parent's partial likelihood is # equivalent to the partial downpass (downpass likelihoods of # the node's siblings, indexed using 'child_masks') and # the equilibrium frequency of the Q matrix. ars["partial_parent_nl"][spi] = (ars["partial_nl"][ppi].take(range(ars["child_inds"][ppi])+range(ars["child_inds"][ppi]+1,ars["partial_nl"][ppi].shape[0]),0) * root_equil) else: # If parent is not the root, the parent's partial likelihood is # the partial downpass * the partial uppass, which is calculated # as the parent of the parent's partial likelihood times # the transition probability np.dot(p[ppi].T, ars["partial_parent_nl"][ppi], out=ars["temp_dotprod"]) ars["partial_parent_nl"][spi] = (ars["partial_nl"][ppi].take(range(ars["child_inds"][ppi])+range(ars["child_inds"][ppi]+1,ars["partial_nl"][ppi].shape[0]),0) * ars["temp_dotprod"]) # The up-pass likelihood is equivalent to the parent's partial # likelihood times the transition probability np.dot(p[spi].T, ars["partial_parent_nl"][spi], out = l[:nchar]) # Roll child masks so that next likelihood calculated for this # parent uses siblings of next node ars["child_inds"][ppi] += 1 # Marginal = Uppass * downpass ars["marginal_nl"][i][:nchar] = l[:nchar] * ars["down_nl_w"][l[nchar+1]][:nchar] return ars["marginal_nl"]
def anc_recon_py(tree, chars, Q, p=None, pi="Fitzjohn"): """ - Pure python version of anc recon code Given tree, character states at tips, and transition matrix perform ancestor reconstruction. Perform downpass using mk function, then perform uppass. Return reconstructed states - including tips (tips can be switched to their true values in post-processing) """ chartree = tree.copy() chartree.char = None chartree.downpass_likelihood = {} t = [node.length for node in chartree.descendants()] t = np.array(t, dtype=np.double) nchar = Q.shape[0] # Generating probability matrix for each branch if p is None: p = np.empty([len(t), Q.shape[0], Q.shape[1]], dtype=np.double, order="C") cyexpokit.dexpm_tree_preallocated_p(Q, t, p) # This changes p in place for i, nd in enumerate(chartree.descendants()): nd.pmat = p[i] # Assigning probability matrices for each branch nd.downpass_likelihood = {} nd.char = None for i, lf in enumerate(chartree.leaves()): lf.char = chars[i] # Assigning character states to tips # Performing the downpass for node in chartree.postiter(): if node.char is not None: # For tip nodes, likelihoods are 1 for observed state and 0 for the rest for state in range(nchar): node.downpass_likelihood[state] = 0.0 node.downpass_likelihood[node.char] = 1.0 else: for state in range(nchar): likelihoodStateN = [] for ch in node.children: likelihoodStateNCh = [] for chState in range(nchar): likelihoodStateNCh.append( ch.pmat[state, chState] * ch.downpass_likelihood[chState] ) #Likelihood for a certain state = p(stateBegin, stateEnd * likelihood(stateEnd)) likelihoodStateN.append(sum(likelihoodStateNCh)) node.downpass_likelihood[state] = np.product(likelihoodStateN) # Performing the uppass (skipping the root) # Iterate over nodes in pre-order sequence for node in chartree.descendants(): # Marginal is equivalent to information coming UP from the root * information coming DOWN from the tips node.marginal_likelihood = {} ### Getting uppass information for node of interest ###(partial uppass likelihood of parent * partial downpass likelihood of parent) ## Calculating partial downpass likelihood vector for parent node.parent.partial_down_likelihood = {} sibs = node.get_siblings() for state in range(nchar): partial_likelihoodN = [1.0] * nchar # Sister to this node for chState in range(nchar): for sib in sibs: partial_likelihoodN[chState] *= ( sib.downpass_likelihood[chState] * sib.pmat[state, chState]) node.parent.partial_down_likelihood[state] = sum( partial_likelihoodN) ## Calculating partial uppass likelihood vector for parent node.parent.partial_up_likelihood = {} # If the parent is the root, there is no up-likelihood because there is # nothing "upwards" of the root. Set all likelihoods to 1 for identity if node.parent.isroot: for state in range(nchar): node.parent.partial_up_likelihood[state] = 1.0 # If the parent is not the root, the up-likelihood is equal to the up-likelihoods coming from the parent else: for state in range(nchar): node.parent.partial_up_likelihood[state] = 0.0 partial_uplikelihoodN = [1.0] * nchar for pstate in range(nchar): for sib in node.parent.get_siblings(): partial_uplikelihoodNP = [0.0] * nchar for sibstate in range(nchar): partial_uplikelihoodNP[ pstate] += sib.downpass_likelihood[ sibstate] * sib.pmat[pstate, sibstate] partial_uplikelihoodN[ pstate] *= partial_uplikelihoodNP[pstate] node.parent.partial_up_likelihood[ state] += partial_uplikelihoodN[ pstate] * node.parent.pmat[pstate, state] ### Putting together the uppass information and the downpass information uppass_information = {} for state in range(nchar): uppass_information[state] = node.parent.partial_down_likelihood[ state] * node.parent.partial_up_likelihood[state] downpass_information = node.downpass_likelihood for state in range(nchar): node.marginal_likelihood[state] = 0 for pstate in range(nchar): node.marginal_likelihood[state] += uppass_information[ pstate] * node.pmat[pstate, state] node.marginal_likelihood[state] *= downpass_information[state] return chartree
def anc_recon_purepy(tree, chars, Q, p=None, pi="Fitzjohn", ars=None): """ Given tree, character states at tips, and transition matrix perform ancestor reconstruction. Perform downpass using mk function, then perform uppass. Return reconstructed states - including tips """ nchar = Q.shape[0] if ars is None: # Creating arrays to be used later ars = create_ancrecon_ars(tree, chars) # Calculating the likelihoods for each node in post-order sequence if p is None: # Instantiating empty array p = np.empty([len(ars["t"]), Q.shape[0], Q.shape[1]], dtype=np.double, order="C") # Creating probability matrices from Q matrix and branch lengths cyexpokit.dexpm_tree_preallocated_p(Q, ars["t"], p) # This changes p in place np.copyto( ars["down_nl_w"], ars["down_nl_r"]) # Copy original values if they have been changed ars["child_inds"].fill(0) root_equil = ivy.chars.mk.qsd(Q) # ------------------- Performing the down-pass ----------------------------- for intnode in map(int, sorted(set(ars["down_nl_w"][:-1, nchar]))): nextli = ars["down_nl_w"][intnode] for chi, child in enumerate(ars["childlist"][intnode]): li = ars["down_nl_w"][child] p_li = ars["partial_nl"][intnode][chi] for ch in ars["charlist"]: p_li[ch] = sum([p[child][ch, st] for st in ars["charlist"]] * li[:nchar]) nextli[ch] *= p_li[ch] # "downpass_likelihood" contains the downpass likelihood vectors for each node in postorder sequence # Now that the downpass has been performed, we must perform the up-pass # ------------------- Performing the up-pass ------------------------------- # The up-pass likelihood at each node is equivalent to information coming # up from the root * information coming down from the tips # Each node has the following: # Uppass_likelihood (set to the equilibrium frequency for the root) # Marginal_likelihood (product of uppass_likelihood and downpass likelihood) # Partial likelihood for each child node # The final two columns of up_nl point to the # postorder index numbers of the parent and self node, respectively # child_masks contains an array of the children to use for calculating # partial likelihood of the next child of that node. All parents # start out with excluding the first child that appears (for calculating # marginal likelihood of that child) # The parent's partial likelihood without current node # partial_parent_likelihoods = np.zeros([ars["up_nl"].shape[0],nchar]) root_posti = ars["up_nl"].shape[0] - 1 for i, l in enumerate(ars["up_nl"]): # Uppass information for node if i == 0: # Set root node uppass to be equivalent to the root equilibrium # Set the marginal to be equivalent to the root equilibrium times # the root downpass l[:nchar] = root_equil ars["marginal_nl"][i][:nchar] = (l[:nchar] * ars["down_nl_w"][-1][:nchar]) else: spi = int(l[nchar + 1]) #self's postorder index ppi = int(l[nchar]) # the parent's postorder index if ppi == root_posti: # If parent is the root, the parent's partial likelihood is # equivalent to the partial downpass (downpass likelihoods of # the node's siblings, indexed using 'child_masks') and # the equilibrium frequency of the Q matrix. ars["partial_parent_nl"][spi] = (ars["partial_nl"][ppi].take( range(ars["child_inds"][ppi]) + range(ars["child_inds"][ppi] + 1, ars["partial_nl"][ppi].shape[0]), 0) * root_equil) else: # If parent is not the root, the parent's partial likelihood is # the partial downpass * the partial uppass, which is calculated # as the parent of the parent's partial likelihood times # the transition probability np.dot(p[ppi].T, ars["partial_parent_nl"][ppi], out=ars["temp_dotprod"]) ars["partial_parent_nl"][spi] = (ars["partial_nl"][ppi].take( range(ars["child_inds"][ppi]) + range(ars["child_inds"][ppi] + 1, ars["partial_nl"][ppi].shape[0]), 0) * ars["temp_dotprod"]) # The up-pass likelihood is equivalent to the parent's partial # likelihood times the transition probability np.dot(p[spi].T, ars["partial_parent_nl"][spi], out=l[:nchar]) # Roll child masks so that next likelihood calculated for this # parent uses siblings of next node ars["child_inds"][ppi] += 1 # Marginal = Uppass * downpass ars["marginal_nl"][i][:nchar] = l[:nchar] * ars["down_nl_w"][l[ nchar + 1]][:nchar] return ars["marginal_nl"]