Пример #1
0
def hrm_multipass_bayesian(tree, chars, Qtype, nregime, pi="Fitzjohn"):
    """
    Args:
        tree (Node): Root node of a tree. All branch lengths must be
          greater than 0 (except root)
        chars (list): List of character states corresponding to leaf nodes in
          preoder sequence. Character states must be in the form of 0,1,2,...
        pi (str): Either "Equal", "Equilibrium", or "Fitzjohn". How to weight
          values at root node. Defaults to "Equal"
          Method "Fitzjohn" is not thouroughly tested, use with caution
        Qtype: Either a string specifying how to esimate values for Q or a
          numpy array of a pre-specified Q matrix.
            "Simple": Symmetric rates within observed states and between
              rates.
            "STD": State Transitions Different. Transitions between states
              within the same rate class are asymetrical
        nregime (int): Number of hidden states. nstates = 0 is
          equivalent to a vanilla Mk model
    """
    nobschar = len(set(chars))
    nchar = nobschar * nregime

    # assert nobschar == 2, "Can currently only handle 4x4 Q matrix"
    # assert nregime == 2, "Can currently only handle 4x4 Q matrix"
    assert Qtype in ["Simple", "STD", "RTD", "ARD"], "Q type must be one of: simple, STD, RTD, ARD"

    ###########################################################################
    # Qparams:
    ###########################################################################
    # The simple model has # of parameters equal to nregime + nregime-1 (One set of
    # rates for each regime, plus transition rates between regimes)
    # For now, we will have each be exponentially distributed

    # Simplest model: all transitions between states within a regime are equal.
    # Each regime has a rate associated with it.
    # There is one rate between regimes.
    # Number of parameters = nregime+1
    if Qtype == "Simple":
        # Wtihin-regime transitions
        WR_Qparams = np.ndarray(nregime, dtype="object")
        for i in range(nregime):
            WR_Qparams[i] = pymc.Exponential(name="wr-par"+str(i), beta = 1.0, value = 1e-2+(i/100.0))
    # Between-regime transitions:
        BR_Qparams = pymc.Exponential(name="br-par", beta = 1.0, value = 1e-2)
    # State-transitions different. Transitions between states within a
    # rate regime can differ (ARD). Transitions between rates share one
    # rate parameter.
    # Number of parameters = nregime*(nobschar**2-nobschar) + 1
    if Qtype == "STD":
        theta = [1.0/2.0] * nobschar

        i_d = np.ndarray(nregime, dtype="object")
        c_d = np.ndarray(nregime, dtype="object")
        scale = np.ndarray(nregime, dtype="object")
        # Within-regime transitions
        WR_Qparams = np.ndarray(nregime, dtype="object")

        for i in range(nregime):
            # First, create a dirichlet distribution
            i_d[i] = pymc.Dirichlet("parInit_"+str(i), theta, value = [1.0/nobschar]*(nobschar-1))
            c_d[i] = pymc.CompletedDirichlet("parInit"+str(i), i_d[i])

            scale[i] = pymc.Exponential(name="scaling"+str(i), beta=1.0, value=1e-2+(i/100.0))

            # Then, scale dirichlet distribution by overall rate parameter for that regime
            @pymc.deterministic(plot=False,name="wr-par"+str(i))
            def d_scaled(d = c_d[i], s = scale[i]):
                return (d*s)[0]
            WR_Qparams[i] = d_scaled

        BR_Qparams = pymc.Exponential(name="br-par", beta = 1.0, value = 1e-2)
    if Qtype == "RTD":
        WR_Qparams = np.ndarray(nregime, dtype="object")
        for i in range(nregime):
            WR_Qparams[i] = pymc.Exponential(name="wr-par"+str(i), beta = 1.0, value = 1e-2+(i/100.0))

        BR_Qparams = np.ndarray(nregime-1, dtype="object")
        for i in range(nregime-1):
            BR_Qparams[i] = pymc.Exponential(name="br-par"+str(i), beta=1.0, value=1e-2)
    if Qtype == "ARD":
        theta = [1.0/2.0] * nobschar
        i_d = np.ndarray(nregime, dtype="object")
        c_d = np.ndarray(nregime, dtype="object")
        scale = np.ndarray(nregime, dtype="object")
        # Within-regime transitions
        WR_Qparams = np.ndarray(nregime, dtype="object")

        for i in range(nregime):
            # First, create a dirichlet distribution
            i_d[i] = pymc.Dirichlet("parInit_"+str(i), theta, value = [1.0/nobschar]*(nobschar-1))
            c_d[i] = pymc.CompletedDirichlet("parInit"+str(i), i_d[i])

            scale[i] = pymc.Exponential(name="scaling"+str(i), beta=1.0, value=1e-2+(i/100.0))

            # Then, scale dirichlet distribution by overall rate parameter for that regime
            @pymc.deterministic(plot=False,name="wr-par"+str(i))
            def d_scaled(d = c_d[i], s = scale[i]):
                return (d*s)[0]
            WR_Qparams[i] = d_scaled
        BR_Qparams = np.ndarray((nregime-1)*2*nobschar, dtype="object")
        br_i = 0
        for i in range(nregime-1)*2:
            for n in range(nobschar):
                BR_Qparams[br_i] = pymc.Exponential(name="br-par"+str(br_i), beta=1.0, value=1e-2)
                br_i += 1




    ###########################################################################
    # Likelihood
    ###########################################################################
    l = discrete.create_likelihood_function_hrmmultipass_mk(tree=tree, chars=chars,
        nregime=nregime, Qtype="ARD", pi=pi, min=False)
    @pymc.potential
    def mklik(wr = WR_Qparams, br=BR_Qparams, name="mklik"):
        if Qtype == "Simple":
            # Getting the locations of each Q parameter to feed
            # to the likelihood function

            # Note that the likelihood function takes q parameters
            # in coumnwise-order, not counting zero and negative values.
            # Within-regime shifts
            qinds = {}
            for i,q in enumerate(wr):
                qinds[i]=valid_indices(nobschar, nregime, i,i)
            rshift_pairs = zip(range(nregime)[1:], range(nregime)[:-1])
            qinds[i+1] = [] # Between-regime shifts(all share 1 rate)
            for p in rshift_pairs:
                qinds[i+1].extend(valid_indices(nobschar, nregime, p[0],p[1]))
                qinds[i+1].extend(valid_indices(nobschar, nregime, p[1],p[0]))

            # These are the indices of the values we will give to
            # the likelihood function, in order
            param_indices = sorted([ i for v in qinds.values() for i in v])
            qparam_list = list(wr)+[br] # Making a single list to get parameters from
            Qparams = [] # Empty list for values to feed to lik function
            for pi in param_indices:
                qi = [ k for k,v in qinds.iteritems() if pi in v ][0]
                Qparams.append(qparam_list[qi]) # Pulling out the correct param
            # Qparams now contains the parameters needed in the
            # correct order for the likelihood function.
            if ((sorted(list(wr)) == list(wr)) and (br < wr[nregime-1])):
                return l(np.array(Qparams))
            else:
                return -np.inf
        if Qtype == "STD":
            qinds = {}
            n=0
            for i,q in enumerate(wr):
                for k in range(nregime):
                    qinds[n] = [valid_indices(nobschar, nregime, i, i)[k]]
                    n+=1
            rshift_pairs = zip(range(nregime)[1:], range(nregime)[:-1])
            qinds[n] = [] # Between-regime shifts(all share 1 rate)
            for p in rshift_pairs:
                qinds[n].extend(valid_indices(nobschar, nregime, p[0],p[1]))
                qinds[n].extend(valid_indices(nobschar, nregime, p[1],p[0]))

            param_indices = sorted([ i for v in qinds.values() for i in v])
            qparam_list = [i for s in [q for q in wr] for i in s]+[br] # Making a single list to get parameters from
            Qparams = [] # Empty list for values to feed to lik function
            for pi in param_indices:
                qi = [ k for k,v in qinds.iteritems() if pi in v ][0]
                Qparams.append(qparam_list[qi]) # Pulling out the correct param

            # Conditions to be enforced:
            # All corresponding rates in each regime must be ordered (fast must be faster than medium, etc)
            # Rate for shift between regimes cannot be faster than the fastest
            # rate within regimes
            for i in range(nregime):
                n = [q[i] for q in wr]
                if not sorted(n) == n:
                    return -np.inf
            if br > max(wr[nregime-1]):
                return -np.inf

            return l(np.array(Qparams))
        if Qtype == "RTD":
            qinds = {}
            for i,q in enumerate(wr):
                qinds[i]=valid_indices(nobschar, nregime, i,i)
        if Qtype == "ARD":
            qinds = {}
            n=0
            for i,q in enumerate(wr):
                for k in range(nregime):
                    qinds[n] = [valid_indices(nobschar, nregime, i, i)[k]]
                    n+=1
            rshift_pairs = zip(range(nregime)[1:], range(nregime)[:-1])
            for p in rshift_pairs:
                for i in  valid_indices(nobschar, nregime, p[0],p[1]):
                    qinds[n] = [i]
                    n+=1
                for i in  valid_indices(nobschar, nregime, p[1],p[0]):
                    qinds[n] = [i]
                    n+=1
            param_indices = sorted([ i for v in qinds.values() for i in v])
            qparam_list = [i for s in [q for q in wr] for i in s]+[b for b in br] # Making a single list to get parameters from
            Qparams = [] # Empty list for values to feed to lik function
            for pi in param_indices:
                qi = [ k for k,v in qinds.iteritems() if pi in v ][0]
                Qparams.append(qparam_list[qi]) # Pulling out the correct param

            # Conditions to be enforced:
            # All corresponding rates in each regime must be ordered (fast must be faster than medium, etc)
            # Max rate for shift between regimes cannot be faster than the fastest
            # rate within regimes
            for i in range(nregime):
                n = [q[i] for q in wr]
                if not sorted(n) == n:
                    return -np.inf
            if max(br) > max(wr[nregime-1]):
                return -np.inf
            return l(np.array(Qparams))



    return locals()
Пример #2
0
Q = ivy.chars.discrete.fill_Q_matrix(2,2, [0.0,1.0,0.00,1.0,1.0,5.5,1.0,5.5])

nregime = 2
p = None
pi = "Fitzjohn"
returnPi = False
preallocated_arrays = None
tip_states = None
Qtype="ARD"
min=True
Qparams = np.array([0.0,1.0,0.0,1.0,1.0,5.5,1.0,5.5])




f = discrete.create_likelihood_function_hrmmultipass_mk(tree, chars, 2, "ARD")

Qparams = np.array([0.0,1.0,0.0,1.0,1.0,5.5,1.0,5.5])





#
# tree.ape_node_idx()
#
# for t in tree:
#     t.label = t.apeidx
#
#
# treeni = 38
Пример #3
0
chars = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1]

Q = ivy.chars.discrete.fill_Q_matrix(2, 2,
                                     [0.0, 1.0, 0.00, 1.0, 1.0, 5.5, 1.0, 5.5])

nregime = 2
p = None
pi = "Fitzjohn"
returnPi = False
preallocated_arrays = None
tip_states = None
Qtype = "ARD"
min = True
Qparams = np.array([0.0, 1.0, 0.0, 1.0, 1.0, 5.5, 1.0, 5.5])

f = discrete.create_likelihood_function_hrmmultipass_mk(tree, chars, 2, "ARD")

Qparams = np.array([0.0, 1.0, 0.0, 1.0, 1.0, 5.5, 1.0, 5.5])

#
# tree.ape_node_idx()
#
# for t in tree:
#     t.label = t.apeidx
#
#
# treeni = 38
#
#
# node1ni = 18
# node1n = preallocated_arrays["nodelist-up"][18]
Пример #4
0
l_single = discrete.create_likelihood_function_hrm_mk(tree=tree,
                                                      chars=chars,
                                                      nregime=nregime,
                                                      Qtype="ARD",
                                                      pi=pi,
                                                      min=False)


def loop1():
    for i in range(5):
        l_single(np.array([0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]))


cProfile.run("loop1()")

l_multipass = discrete.create_likelihood_function_hrmmultipass_mk(
    tree=tree, chars=chars, nregime=nregime, Qtype="ARD", pi=pi, min=False)


def loop2():
    for i in range(5):
        l_multipass(Qparams)


cProfile.run("loop2()")

var = copy.deepcopy(var1)


def loop3():
    for i in range(5 * 11):
        discrete.hrm_back_mk(tree, chars, Q, nregime, preallocated_arrays=var)