def fitMk(tree, chars, Q = "Equal", pi = "Equal"): """ Fit an mk model to a given tree and list of characters. Return fitted Q matrix and calculated likelihood. Args: tree (Node): Root node of a tree. All branch lengths must be greater than 0 (except root) chars (list): List of character states corresponding to leaf nodes in preoder sequence. Character states must be in the form of 0,1,2,... pi (str): Either "Equal", "Equilibrium", or "Fitzjohn". How to weight values at root node. Defaults to "Equal" Method "Fitzjohn" is not thouroughly tested, use with caution Q: Either a string specifying how to esimate values for Q or a numpy array of a pre-specified Q matrix. Valid strings for Q: "Equal": All rates equal "Sym": Forward and reverse rates equal "ARD": All rates different Returns: tuple: Tuple of fitted Q matrix (a np array) and log-likelihood value """ assert pi in ["Equal", "Fitzjohn", "Equilibrium"], "Pi must be one of: 'Equal', 'Fitzjohn', 'Equilibrium'" if type(Q) == str: if Q == "Equal": q,l,piRates,rootLiks = fitMkER(tree, chars, pi=pi) elif Q == "Sym": q,l,piRates,rootLiks = fitMkSym(tree, chars, pi=pi) elif Q == "ARD": q,l,piRates,rootLiks = fitMkARD(tree, chars, pi=pi) else: raise ValueError("Q str must be one of: 'Equal', 'Sym', 'ARD'") return {key:val for key, val in zip(["Q", "Log-likelihood","pi","rootLiks"], [q,l,piRates,rootLiks])} else: assert str(type(Q)) == "<type 'numpy.ndarray'>", "Q must be str or numpy array" assert len(Q[0]) == len(set(chars)), "Supplied Q has wrong dimensions" l,piRates, rootLiks = mk(tree, chars, Q, pi=pi, returnPi=True) q = Q return {key:val for key, val in zip(["Q", "Log-likelihood","pi","rootLiks"], [q,l,piRates,rootLiks])}
def fitMkSym(tree, chars, pi="Equal"): """ Estimate parameter of a symmetrical-rate Q matrix Return log-likelihood of mk equation using fitted Q Multi-parameter model: forward = reverse Args: tree (Node): Root node of a tree. All branch lengths must be greater than 0 (except root) chars (list): List of character states corresponding to leaf nodes in preoder sequence. Character states must be numbered 0,1,2,... pi (str): Either "Equal" or "Fitzjohn". How to weight values at root node. Defaults to "Equal" Method "Fitzjohn" is currently untested Returns: tuple: Fitted parameter, log-likelihood, and dictionary of weightings at the root. """ nchar = len(set(chars)) # Number of params equal to binom(nchar, 2) # Initial values arbitrary x0 = [0.1] * binom(nchar, 2) # Starting values for our symmetrical rates model mk_func = create_likelihood_function_mk(tree, chars, Qtype="Sym", pi = pi) # Need to constrain values to be greater than 0 optim = minimize(mk_func, x0, method="L-BFGS-B", bounds = tuple(( (1e-14,None) for i in range(len(x0)) ))) q = np.zeros([nchar,nchar], dtype=np.double) q[np.triu_indices(nchar, k=1)] = optim.x q = q + q.T q[np.diag_indices(nchar)] = 0-np.sum(q, 1) piRates, rootLiks = mk(tree, chars, q, pi=pi, returnPi=True)[1:] return (q, -1*float(optim.fun), piRates, rootLiks)
def fitMkARD(tree, chars, pi="Equal"): """ Estimate parameters of an all-rates-different Q matrix Return log-likelihood of mk equation using fitted Q Multi-parameter model: all rates different Args: tree (Node): Root node of a tree. All branch lengths must be greater than 0 (except root) chars (list): List of character states corresponding to leaf nodes in preoder sequence. Character states must be numbered 0,1,2,... pi (str): Either "Equal" or "Fitzjohn". How to weight values at root node. Defaults to "Equal" Method "Fitzjohn" is currently untested Returns: tuple: Fitted parameter, log-likelihood, and dictionary of weightings at the root. """ # Number of parameters equal to k^2 - k nchar = len(set(chars)) x0 = [1.0] * (nchar ** 2 - nchar) mk_func = create_likelihood_function_mk(tree, chars, Qtype="ARD", pi=pi) optim = minimize(mk_func, x0, method="L-BFGS-B", bounds = tuple(( (1e-14,None) for i in range(len(x0)) ))) q = np.zeros([nchar,nchar], dtype=np.double) q[np.triu_indices(nchar, k=1)] = optim.x[:len(optim.x)/2] q[np.tril_indices(nchar, k=-1)] = optim.x[len(optim.x)/2:] q[np.diag_indices(nchar)] = 0-np.sum(q, 1) piRates, rootLiks = mk(tree, chars, q, pi=pi, returnPi=True)[1:] return (q, -1*float(optim.fun), piRates, rootLiks)
def fitMkER(tree, chars, pi="Equal"): """ Estimate parameter of an equal-rate Q matrix Return log-likelihood of mk equation using fitted Q One-parameter model: alpha = beta Args: tree (Node): Root node of a tree. All branch lengths must be greater than 0 (except root) chars (list): List of character states corresponding to leaf nodes in preoder sequence. Character states must be numbered 0,1,2,... pi (str): Either "Equal" or "Fitzjohn". How to weight values at root node. Defaults to "Equal" Returns: tuple: Fitted parameter, log-likelihood, and dictionary of weightings at the root. """ nchar = len(set(chars)) # Initial value arbitrary x0 = [0.1] # Starting value for our equal rates model mk_func = create_likelihood_function_mk(tree, chars, Qtype="ER", pi=pi) optim = minimize(mk_func, x0, method="L-BFGS-B", bounds = [(1e-14,None)]) q = np.empty([nchar,nchar], dtype=np.double) q.fill(optim.x[0]) q[np.diag_indices(nchar)] = 0 - (q.sum(1)-q[0,0]) piRates, rootLiks = mk(tree, chars, q, pi=pi, returnPi=True)[1:] return (q, -1*float(optim.fun), piRates, rootLiks)
def likelihood_function(Qparams): # Enforcing upper bound on parameters if (sum(Qparams) > var["upperbound"]) or any(Qparams <= 0): return var["nullval"] # Filling Q matrices: if Qtype == "ER": var["Q"].fill(Qparams[0]) var["Q"][np.diag_indices(nchar)] = -Qparams[0] * (nchar-1) elif Qtype == "Sym": var["Q"].fill(0.0) # Re-filling with zeroes xs,ys = np.triu_indices(nchar,k=1) var["Q"][xs,ys] = Qparams var["Q"][ys,xs] = Qparams var["Q"][np.diag_indices(nchar)] = 0-np.sum(var["Q"], 1) elif Qtype == "ARD": var["Q"].fill(0.0) # Re-filling with zeroes var["Q"][np.triu_indices(nchar, k=1)] = Qparams[:len(Qparams)/2] var["Q"][np.tril_indices(nchar, k=-1)] = Qparams[len(Qparams)/2:] var["Q"][np.diag_indices(nchar)] = 0-np.sum(var["Q"], 1) else: raise ValueError, "Qtype must be one of: ER, Sym, ARD" # Resetting the values in these arrays np.copyto(var["nodelist"], var["nodelistOrig"]) var["root_priors"].fill(1.0) if min: x = -1 else: x = 1 try: return x * mk(tree, chars, var["Q"], p=var["p"], pi = pi, preallocated_arrays=var) # Minimizing negative log-likelihood except ValueError: # If likelihood returned is 0 return var["nullval"]