示例#1
0
def make_forward_tree_defn(
    subst_model, tree, bin_names, with_indel_params=True, kn=True
):
    """Pairwise Fwd"""
    indel = make_indel_model_defn(with_indel_params, kn)
    subst = subst_model.make_fundamental_param_controller_defns(bin_names)
    leaf = NonParamDefn("leaf", dimensions=("edge",))

    if len(bin_names) > 1:
        switch = ProbabilityParamDefn("bin_switch", dimensions=["locus"])
        bprobs = PartitionDefn(
            [1.0 / len(bin_names) for bin in bin_names],
            name="bprobs",
            dimensions=["locus"],
            dimension=("bin", bin_names),
        )
        edge_args = [switch, bprobs]
        edge_defn_constructor = EdgeSumAndAlignDefnWithBins
    else:
        edge_args = []
        edge_defn_constructor = EdgeSumAndAlignDefn

    mprobs = subst["word_probs"]
    bin_data = CalcDefn(BinData)(mprobs, indel, subst["Qd"])
    bin_data = bin_data.across_dimension("bin", bin_names)
    edge_args.extend(bin_data)

    (top, scores) = _recursive_defns(
        tree, subst, leaf, edge_defn_constructor, edge_args
    )
    defn = FwdDefn(top)
    # defn = SumDefn(*scores)
    return AnnotateFloatDefn(defn, top)
示例#2
0
 def make_motif_word_prob_defns(self):
     monomer_probs = self.make_motif_probs_defn()
     word_probs = CalcDefn(self.calc_word_probs,
                           name="wprobs")(monomer_probs)
     mprobs_matrix = CalcDefn(self.calc_word_weight_matrix,
                              name="mprobs_matrix")(monomer_probs)
     return (monomer_probs, word_probs, mprobs_matrix)
示例#3
0
 def make_motif_word_prob_defns(self):
     monomer_probs = PartitionDefn(
         name="psmprobs",
         default=None,
         dimensions=("locus", "position", "edge"),
         dimension=("motif", tuple(self.get_input_alphabet())),
     )
     monomer_probs3 = monomer_probs.across_dimension(
         "position", [str(i) for i in range(self.word_length)])
     monomer_probs3 = CalcDefn(lambda *x: numpy.array(x),
                               name="mprobs")(*monomer_probs3)
     word_probs = CalcDefn(self.calc_word_probs,
                           name="wprobs")(monomer_probs3)
     mprobs_matrix = CalcDefn(self.calc_word_weight_matrix,
                              name="mprobs_matrix")(monomer_probs3)
     return (monomer_probs, word_probs, mprobs_matrix)
示例#4
0
def make_indel_model_defn(with_indel_params=True, kn=True):
    if kn:
        klass = indel_model.KnudsenMiyamotoIndelModel
    else:
        klass = indel_model.SimpleIndelModel
    if with_indel_params:
        a = IndelParameterDefn("indel_length")  # P(extend indel)
        r = IndelParameterDefn("indel_rate")  # indels per substitution
        return CalcDefn(klass, name="indels")(r, a)
    else:
        # not optimisable parameter, a constant. Another example is the
        # alignment in an LikFunc
        return NonParamDefn("indel_model")
示例#5
0
def make_partial_likelihood_defns(edge, lht, psubs, fixed_motifs):
    kw = {"edge_name": edge.name}

    if edge.istip():
        plh = LeafPartialLikelihoodDefn(lht, **kw)
    else:
        lht_edge = LhtEdgeLookupDefn(lht, **kw)
        children = []
        for child in edge.children:
            child_plh = make_partial_likelihood_defns(child, lht, psubs,
                                                      fixed_motifs)
            psub = psubs.select_from_dimension("edge", child.name)
            child_plh = CalcDefn(numpy.inner)(child_plh, psub)
            children.append(child_plh)

        if fixed_motifs:
            fixed_motif = fixed_motifs.select_from_dimension("edge", edge.name)
            plh = PartialLikelihoodProductDefnFixedMotif(
                fixed_motif, lht_edge, *children, **kw)
        else:
            plh = PartialLikelihoodProductDefn(lht, *children, **kw)

    return plh
示例#6
0
def make_total_loglikelihood_defn(tree, leaves, psubs, mprobs, bprobs,
                                  bin_names, locus_names, sites_independent):

    fixed_motifs = NonParamDefn("fixed_motif", ["edge"])

    lht = LikelihoodTreeDefn(leaves, tree=tree)
    plh = make_partial_likelihood_defns(tree, lht, psubs, fixed_motifs)

    # After the root partial likelihoods have been calculated it remains to
    # sum over the motifs, local sites, other sites (ie: cpus), bins and loci.
    # The motifs are always done first, but after that it gets complicated.
    # If a bin HMM is being used then the sites from the different CPUs must
    # be interleaved first, otherwise summing over the CPUs is done last to
    # minimise inter-CPU communicaton.

    root_mprobs = mprobs.select_from_dimension("edge", "root")
    lh = CalcDefn(numpy.inner, name="lh")(plh, root_mprobs)
    if len(bin_names) > 1:
        if sites_independent:
            site_pattern = CalcDefn(BinnedSiteDistribution,
                                    name="bdist")(bprobs)
        else:
            switch = ProbabilityParamDefn("bin_switch", dimensions=["locus"])
            site_pattern = CalcDefn(PatchSiteDistribution,
                                    name="bdist")(switch, bprobs)
        blh = CallDefn(site_pattern, lht, name="bindex")
        tll = CallDefn(blh, *lh.across_dimension("bin", bin_names),
                       **dict(name="tll"))
    else:
        lh = lh.select_from_dimension("bin", bin_names[0])
        tll = CalcDefn(log_sum_across_sites, name="logsum")(lht, lh)

    if len(locus_names) > 1:
        # currently has no .make_likelihood_function() method.
        tll = SumDefn(*tll.across_dimension("locus", locus_names))
    else:
        tll = tll.select_from_dimension("locus", locus_names[0])

    return tll
示例#7
0
    def test_recalculation(self):
        def add(*args):
            return sum(args)

        top = CalcDefn(add)(ParamDefn("A"), ParamDefn("B"))
        pc = top.make_likelihood_function()
        f = pc.make_calculator()

        self.assertEqual(f.get_value_array(), [1.0, 1.0])
        self.assertEqual(f([3.0, 4.25]), 7.25)
        self.assertEqual(f.change([(1, 4.5)]), 7.5)
        self.assertEqual(f.get_value_array(), [3.0, 4.5])

        # Now with scopes.  We will set up the calculation
        # result = (Ax+Bx) + (Ay+By) + (Az+Bz)

        # A and B will remain distinct parameters, but x,y and z are merely scopes - ie:
        # it may be the case that Ax = Ay = Az, and that may simplify the calculation, but
        # we will never even notice if Ax = Bx.
        # Each scope dimension (here there is just one, 'category') must be collapsed away
        # at some point towards the end of the calculation if the calculation is to produce
        # a scalar result.  Here this is done with the select_from_dimension method.

        a = ParamDefn("A", dimensions=["category"])
        b = ParamDefn("B", dimensions=["category"])
        mid = CalcDefn(add, name="mid")(a, b)
        top = CalcDefn(add)(
            mid.select_from_dimension("category", "x"),
            mid.select_from_dimension("category", "y"),
            mid.select_from_dimension("category", "z"),
        )

        # or equivalently:
        # top = CalcDefn(add, *mid.acrossDimension('category', ['x', 'y', 'z']))

        pc = top.make_likelihood_function()
        f = pc.make_calculator()

        self.assertEqual(str(f.get_value_array()), "[1.0, 1.0]")

        # There are still only 2 inputs because the default scope
        # is global, ie: Ax == Ay == Az.  If we allow A to be
        # different in the x,y and z categories and set their
        # initial values to 2.0:

        pc.assign_all("A", value=2.0, independent=True)
        f = pc.make_calculator()

        self.assertEqual(str(f.get_value_array()), "[1.0, 2.0, 2.0, 2.0]")

        # Now we have A local and B still global, so the calculation is
        # (Ax+B) + (Ay+B) + (Az+B) with the input parameters being
        # [B, Ax, Ay, Az], so:

        self.assertEqual(f([1.0, 2.0, 2.0, 2.0]), 9.0)
        self.assertEqual(f([0.25, 2.0, 2.0, 2.0]), 6.75)

        # Constants do not appear in the optimisable inputs.
        # Set one of the 3 A values to be a constant and there
        # will be one fewer optimisable parameters:

        pc.assign_all("A", scope_spec={"category": "z"}, const=True)
        f = pc.make_calculator()

        self.assertEqual(str(f.get_value_array()), "[1.0, 2.0, 2.0]")

        # The parameter controller should catch cases where the specified scope
        # does not exist:

        with self.assertRaises(InvalidScopeError):
            pc.assign_all("A", scope_spec={"category": "nosuch"})
        with self.assertRaises(InvalidDimensionError):
            pc.assign_all("A", scope_spec={"nonsuch": "nosuch"})

        # It is complicated guesswork matching the parameters you expect with positions in
        # the value array, let alone remembering whether or not they are presented to the
        # optimiser as logs, so .get_value_array(), .change() and .__call__() should only be
        # used by optimisers.  For other purposes there is an alternative, human friendly
        # interface:

        pc.update_from_calculator(f)
        self.assertEqual(pc.get_param_value("A", category="x"), 2, 0)
        self.assertEqual(pc.get_param_value("B", category=["x", "y"]), 1.0)

        # Despite the name, .get_param_value can get the value from any step in the
        # calculation, so long as it has a unique name.

        self.assertEqual(pc.get_param_value("mid", category="x"), 3.0)

        # For bulk retrieval of parameter values by parameter name and scope name there is
        # the .get_param_value_dict() method:

        vals = pc.get_param_value_dict(["category"])
        self.assertEqual(vals["A"]["x"], 2.0)

        # Here is a function that is more like a likelihood function, in that it has a
        # maximum:

        def curve(x, y):
            return 0 - (x**2 + y**2)

        top = CalcDefn(curve)(ParamDefn("X"), ParamDefn("Y"))
        pc = top.make_likelihood_function()
        f = pc.make_calculator()

        # Now ask it to find the maximum.  It is a simple function with only one local
        # maximum so local optimisation should be enough:

        f.optimise(local=True, show_progress=False)
        pc.update_from_calculator(f)

        # There were two parameters, X and Y, and at the maximum they should both be 0.0:

        self.assertEqual(pc.get_param_value("Y"), 0.0)
        self.assertEqual(pc.get_param_value("X"), 0.0)

        # Because this function has a maximum it is possible to ask it for a confidence
        # interval around a parameter, ie: how far from 0.0 can we move x before f(x,y)
        # falls bellow f(X,Y)-dropoff:

        self.assertEqual(pc.get_param_interval("X", dropoff=4, xtol=0.0),
                         (-2.0, 0.0, 2.0))

        # We test the ability to omit xtol. Due to precision issues we convert the returned value to a string.

        self.assertTrue("-2.0, 0.0, 2.0" == "%.1f, %.1f, %.1f" %
                        pc.get_param_interval("X", dropoff=4))

        # And finally intervals can be calculated in bulk by passing a dropoff value to
        # .get_param_value_dict():

        self.assertEqual(
            pc.get_param_value_dict([], dropoff=4, xtol=0.0)["X"],
            (-2.0, 0.0, 2.0))

        # For likelihood functions it is more convenient to provide 'p' rather than
        # 'dropoff', dropoff = chdtri(1, p) / 2.0.  Also in general you won't need ultra precise answers,
        # so don't use 'xtol=0.0', that's just to make the doctest work.
        gz = pc.graphviz()