Пример #1
0
def fitMk(tree, chars, Q = "Equal", pi = "Equal"):
    """
    Fit an mk model to a given tree and list of characters. Return fitted
    Q matrix and calculated likelihood.

    Args:
        tree (Node): Root node of a tree. All branch lengths must be
          greater than 0 (except root)
        chars (list): List of character states corresponding to leaf nodes in
          preoder sequence. Character states must be in the form of 0,1,2,...
        pi (str): Either "Equal", "Equilibrium", or "Fitzjohn". How to weight
          values at root node. Defaults to "Equal"
          Method "Fitzjohn" is not thouroughly tested, use with caution
       Q: Either a string specifying how to esimate values for Q or a
          numpy array of a pre-specified Q matrix.

          Valid strings for Q:

          "Equal": All rates equal
          "Sym": Forward and reverse rates equal
          "ARD": All rates different

    Returns:
        tuple: Tuple of fitted Q matrix (a np array) and log-likelihood value
    """
    assert pi in ["Equal", "Fitzjohn", "Equilibrium"], "Pi must be one of: 'Equal', 'Fitzjohn', 'Equilibrium'"

    if type(Q) == str:
        if Q == "Equal":
            q,l,piRates,rootLiks = fitMkER(tree, chars, pi=pi)

        elif Q == "Sym":
            q,l,piRates,rootLiks = fitMkSym(tree, chars, pi=pi)

        elif Q == "ARD":
            q,l,piRates,rootLiks = fitMkARD(tree, chars, pi=pi)
        else:
            raise ValueError("Q str must be one of: 'Equal', 'Sym', 'ARD'")

        return {key:val for key, val in zip(["Q", "Log-likelihood","pi","rootLiks"], [q,l,piRates,rootLiks])}


    else:
        assert str(type(Q)) == "<type 'numpy.ndarray'>", "Q must be str or numpy array"
        assert len(Q[0]) == len(set(chars)), "Supplied Q has wrong dimensions"

        l,piRates, rootLiks = mk(tree, chars, Q, pi=pi, returnPi=True)
        q = Q

        return {key:val for key, val in zip(["Q", "Log-likelihood","pi","rootLiks"], [q,l,piRates,rootLiks])}
Пример #2
0
def fitMkSym(tree, chars, pi="Equal"):
    """
    Estimate parameter of a symmetrical-rate Q matrix
    Return log-likelihood of mk equation using fitted Q

    Multi-parameter model: forward = reverse

    Args:
        tree (Node): Root node of a tree. All branch lengths must be
          greater than 0 (except root)
        chars (list): List of character states corresponding to leaf nodes in
          preoder sequence. Character states must be numbered 0,1,2,...
        pi (str): Either "Equal" or "Fitzjohn". How to weight values at root
          node. Defaults to "Equal"
          Method "Fitzjohn" is currently untested

    Returns:
        tuple: Fitted parameter, log-likelihood, and dictionary of weightings
          at the root.


    """

    nchar = len(set(chars))
    # Number of params equal to binom(nchar, 2)
    # Initial values arbitrary
    x0 = [0.1] * binom(nchar, 2) # Starting values for our symmetrical rates model
    mk_func = create_likelihood_function_mk(tree, chars, Qtype="Sym", pi = pi)

    # Need to constrain values to be greater than 0
    optim = minimize(mk_func, x0, method="L-BFGS-B",
                      bounds = tuple(( (1e-14,None) for i in range(len(x0)) )))


    q = np.zeros([nchar,nchar], dtype=np.double)

    q[np.triu_indices(nchar, k=1)] = optim.x
    q = q + q.T
    q[np.diag_indices(nchar)] = 0-np.sum(q, 1)

    piRates, rootLiks = mk(tree, chars, q, pi=pi, returnPi=True)[1:]

    return (q, -1*float(optim.fun), piRates, rootLiks)
Пример #3
0
def fitMkARD(tree, chars, pi="Equal"):
    """
    Estimate parameters of an all-rates-different Q matrix
    Return log-likelihood of mk equation using fitted Q

    Multi-parameter model: all rates different

    Args:
        tree (Node): Root node of a tree. All branch lengths must be
          greater than 0 (except root)
        chars (list): List of character states corresponding to leaf nodes in
          preoder sequence. Character states must be numbered 0,1,2,...
        pi (str): Either "Equal" or "Fitzjohn". How to weight values at root
          node. Defaults to "Equal"
          Method "Fitzjohn" is currently untested

    Returns:
        tuple: Fitted parameter, log-likelihood, and dictionary of weightings
          at the root.

    """
    # Number of parameters equal to k^2 - k
    nchar = len(set(chars))
    x0 = [1.0] * (nchar ** 2 - nchar)

    mk_func = create_likelihood_function_mk(tree, chars, Qtype="ARD", pi=pi)

    optim = minimize(mk_func, x0, method="L-BFGS-B",
                      bounds = tuple(( (1e-14,None) for i in range(len(x0)) )))

    q = np.zeros([nchar,nchar], dtype=np.double)

    q[np.triu_indices(nchar, k=1)] = optim.x[:len(optim.x)/2]
    q[np.tril_indices(nchar, k=-1)] = optim.x[len(optim.x)/2:]
    q[np.diag_indices(nchar)] = 0-np.sum(q, 1)

    piRates, rootLiks = mk(tree, chars, q, pi=pi, returnPi=True)[1:]

    return (q, -1*float(optim.fun), piRates, rootLiks)
Пример #4
0
def fitMkER(tree, chars, pi="Equal"):
    """
    Estimate parameter of an equal-rate Q matrix
    Return log-likelihood of mk equation using fitted Q

    One-parameter model: alpha = beta

    Args:
        tree (Node): Root node of a tree. All branch lengths must be
          greater than 0 (except root)
        chars (list): List of character states corresponding to leaf nodes in
          preoder sequence. Character states must be numbered 0,1,2,...
        pi (str): Either "Equal" or "Fitzjohn". How to weight values at root
          node. Defaults to "Equal"

    Returns:
        tuple: Fitted parameter, log-likelihood, and dictionary of weightings
          at the root.

    """
    nchar = len(set(chars))
    # Initial value arbitrary
    x0 = [0.1] # Starting value for our equal rates model
    mk_func = create_likelihood_function_mk(tree, chars, Qtype="ER", pi=pi)

    optim = minimize(mk_func, x0, method="L-BFGS-B",
                      bounds = [(1e-14,None)])

    q = np.empty([nchar,nchar], dtype=np.double)
    q.fill(optim.x[0])

    q[np.diag_indices(nchar)] = 0 - (q.sum(1)-q[0,0])

    piRates, rootLiks = mk(tree, chars, q, pi=pi, returnPi=True)[1:]

    return (q, -1*float(optim.fun), piRates, rootLiks)
Пример #5
0
    def likelihood_function(Qparams):
        # Enforcing upper bound on parameters
        if (sum(Qparams) > var["upperbound"]) or any(Qparams <= 0):
            return var["nullval"]

        # Filling Q matrices:
        if Qtype == "ER":
            var["Q"].fill(Qparams[0])
            var["Q"][np.diag_indices(nchar)] = -Qparams[0] * (nchar-1)
        elif Qtype == "Sym":
            var["Q"].fill(0.0) # Re-filling with zeroes
            xs,ys = np.triu_indices(nchar,k=1)
            var["Q"][xs,ys] = Qparams
            var["Q"][ys,xs] = Qparams
            var["Q"][np.diag_indices(nchar)] = 0-np.sum(var["Q"], 1)
        elif Qtype == "ARD":
            var["Q"].fill(0.0) # Re-filling with zeroes
            var["Q"][np.triu_indices(nchar, k=1)] = Qparams[:len(Qparams)/2]
            var["Q"][np.tril_indices(nchar, k=-1)] = Qparams[len(Qparams)/2:]
            var["Q"][np.diag_indices(nchar)] = 0-np.sum(var["Q"], 1)
        else:
            raise ValueError, "Qtype must be one of: ER, Sym, ARD"

        # Resetting the values in these arrays
        np.copyto(var["nodelist"], var["nodelistOrig"])
        var["root_priors"].fill(1.0)

        if min:
            x = -1
        else:
            x = 1

        try:
            return x * mk(tree, chars, var["Q"], p=var["p"], pi = pi, preallocated_arrays=var) # Minimizing negative log-likelihood
        except ValueError: # If likelihood returned is 0
            return var["nullval"]