Пример #1
0
    def compute_proposal_probability(self, grammar, t1, t2, resampleProbability=lambdaOne, recurse=True):
        # NOTE: This is not strictly necessary since we don't actually have to sum over trees
        # if we use an auxiliary variable argument. But this fits nicely with the other proposers
        # and is not much slower.

        chosen_node1 , chosen_node2 = least_common_difference(t1,t2)

        lps = []
        if chosen_node1 is None: # any node in the tree could have been regenerated
            for node in t1:
                lp_of_choosing_node = t1.sampling_log_probability(node,resampleProbability=resampleProbability)
                with BVRuleContextManager(grammar, node.parent, recurse_up=True):
                    lp_of_generating_tree = grammar.log_probability(node)
                lps += [lp_of_choosing_node + lp_of_generating_tree]
        else: # we have a specific path up the tree
            while chosen_node1:
                lp_of_choosing_node = t1.sampling_log_probability(chosen_node1,resampleProbability=resampleProbability)
                with BVRuleContextManager(grammar, chosen_node2.parent, recurse_up=True):
                    lp_of_generating_tree = grammar.log_probability(chosen_node2)
                lps += [lp_of_choosing_node + lp_of_generating_tree]
                if recurse:
                    chosen_node1 = chosen_node1.parent
                    chosen_node2 = chosen_node2.parent
                else:
                    chosen_node1 = None

        return logsumexp(lps)
Пример #2
0
    def single_probability(self, t):
        # in this tree, in its context (recursing up), what is the probability of this single expansion?

        with BVRuleContextManager(self, t, recurse_up=True):
            z = log(sum([ r.p for r in self.get_rules(t.returntype) ]))
            r = self.get_matching_rule(t)
            return log(r.p)-z
Пример #3
0
    def propose_tree(self, t, resampleProbability=lambdaOne):
        """
            Propose to a tree, returning the new tree and the prob. of sampling it.
        """

        newt = copy(t)

        try:
            # sample a subnode
            n, lp = newt.sample_subnode(
                resampleProbability=resampleProbability)
        except NodeSamplingException:
            # If we've been given resampleProbability that can't sample
            raise ProposalFailedException

        assert getattr(n, "resampleProbability",
                       1.0) > 0.0, "*** Error in propose_tree %s ; %s" % (
                           resampleProbability(t), t)

        # In the context of the parent, resample n according to the grammar
        # We recurse_up in order to add all the parent's rules
        with BVRuleContextManager(self.grammar, n.parent, recurse_up=True):
            n.setto(self.grammar.generate(n.returntype))

        # compute the forward/backward probability
        f = lp + self.grammar.log_probability(newt)
        b = (log(1.0*resampleProbability(n)) - log(newt.sample_node_normalizer(resampleProbability=resampleProbability)))\
            + self.grammar.log_probability(t)

        return [newt, f - b]
Пример #4
0
    def generate(self, x=None):
        """Generate from the grammar

        Arguments:
            x (string): What we start from -- can be None and then we use Grammar.start.

        """
        # print "# Calling Grammar.generate", type(x), x

        # Decide what to start from based on the default if start is not specified
        if x is None:
            x = self.start
            assert self.start in self.nonterminals(), \
                "The default start symbol %s is not a defined nonterminal" % self.start

        # Dispatch different kinds of generation
        if isinstance(x,list):            
            return map(lambda xi: self.generate(x=xi), x)             # If we get a list, just map along it to generate.
        elif self.is_nonterminal(x):

            # sample a grammar rule
            rules = self.get_rules(x)
            assert len(rules) > 0, "*** No rules in x=%s"%x

            # sample the rule
            r = weighted_sample(rules, probs=lambda x: x.p, log=False)

            # Make a stub for this functionNode 
            fn = r.make_FunctionNodeStub(self, None)

            # Define a new context that is the grammar with the rule added
            # Then, when we exit, it's still right.
            with BVRuleContextManager(self, fn, recurse_up=False):      # not sure why we can't use with/as:
                # Can't recurse on None or else we genreate from self.start
                if fn.args is not None:
                    # and generate below *in* this context (e.g. with the new rules added)
                    try:
                        fn.args = self.generate(fn.args)
                    except RuntimeError as e:
                        print "*** Runtime error in %s" % fn
                        raise e

                # and set the parents
                for a in fn.argFunctionNodes():
                    a.parent = fn

            return fn
        elif isinstance(x, FunctionNode): # this will let us finish generation of a partial tree

            x.args = [ self.generate(a) for a in x.args]

            for a in x.argFunctionNodes():
                a.parent = x

            return x

        else:  # must be a terminal
            assert isinstance(x, str), ("*** Terminal must be a string! x="+x)
            return x
Пример #5
0
    def generate_with_counts(self, x=None, nc=None):
        if x is None:
            x = self.start

            assert self.start in self.nonterminals(), \
                "The default start symbol %s is not a defined nonterminal" % self.start

            if nc is None:
                nc = np.zeros(np.shape(self.alphas))
            x = self.generate_with_counts(x, nc)

            return x, nc

        try:
            if isinstance(x, list):
                return map(lambda xi: self.generate_with_counts(x=xi, nc=nc),
                           x)
            elif self.is_nonterminal(x):
                rules = self.get_rules(x)
                assert len(rules) > 0, "*** No rules in x=%s" % x

                r = weighted_sample(rules, probs=lambda x: x.p, log=False)

                rule_ix = self.sorted_rules.index(x)
                sym_ix = rules.index(r)
                nc[rule_ix, sym_ix] += 1.0

                fn = r.make_FunctionNodeStub(self, None)

                with BVRuleContextManager(self, fn, recurse_up=False):
                    if fn.args is not None:
                        try:
                            fn.args = self.generate_with_counts(fn.args, nc)
                        except RuntimeError as e:
                            print "*** Runtime error in %s" % fn
                            raise e

                    for a in fn.argFunctionNodes():
                        a.parent = fn

                return fn
            elif isinstance(x, FunctionNode):
                x.args = [self.generate_with_counts(a, nc) for a in x.args]

                for a in x.argFunctionNodes():
                    a.parent = x

                return x
            else:
                assert isinstance(
                    x, str), ("*** Terminal must be a string! x=" + x)
                return x
        except AssertionError as e:
            print "***Assertion error in %s" % x
            raise e
Пример #6
0
 def propose_tree(self, grammar, t, resampleProbability=lambdaOne):
     """Propose, returning the new tree"""
     new_t = copy(t)
 
     try: # to sample a subnode
         n, lp = new_t.sample_subnode(resampleProbability=resampleProbability)
     except NodeSamplingException: # when no nodes can be sampled
         raise ProposalFailedException
 
     # In the context of the parent, resample n according to the
     # grammar. recurse_up in order to add all the parent's rules
     with BVRuleContextManager(grammar, n.parent, recurse_up=True):
         n.setto(grammar.generate(n.returntype))
     return new_t
Пример #7
0
    def make_children(self):
        assert self.children is None  ## Otherwise we should never call this

        root = self.value.value

        if root.count_nodes() >= self.value.maxnodes:
            raise StatePruneException

        # Now make the copy
        newfn = copy(root)

        ## find the first unfilled Node: the argi'th argument of fn in our new copy
        try:
            fn, argi = None, None  # the index of the fn. This will be used to find it in the copy
            for j, x in enumerate(newfn):
                if x.args is not None:
                    for i, a in enumerate(x.args):
                        if self.grammar.is_nonterminal(a):
                            fn, argi = x, i
                            raise LoopsBreakException
        except LoopsBreakException:
            pass
        assert fn is not None, "Cannot call make_children on a terminal. This must be avoided in State.next()"

        # Now make the children below
        children = []
        with BVRuleContextManager(self.grammar, fn, recurse_up=True):
            rules = self.grammar.get_rules(fn.args[argi])
            lZ = log(sum([r.p for r in rules]))

            for r in rules:
                fn.args[argi] = r.make_FunctionNodeStub(self.grammar, fn)

                # copy the type in self.value
                newh = self.value.__copy__(value=None)
                newh.set_value(
                    copy(newfn), f=lambdaAssertFalse
                )  # Need to copy so different r give different fn; don't use set_value or it compiles

                # and make it into a State
                s = type(self)(newh,
                               data=self.data,
                               grammar=self.grammar,
                               hole_penalty=self.hole_penalty,
                               parent=self)
                children.append(s)

        return children
Пример #8
0
    def iterate_subnodes(self,
                         t,
                         d=0,
                         predicate=lambdaTrue,
                         do_bv=True,
                         yield_depth=False):
        """
                Iterate through all subnodes of node *t*, while updating the added rules (bound variables)
                so that at each subnode, the grammar is accurate to what it was.

                if *do_bv*=False, we don't do bound variables (useful for things like counting nodes, instead of having to update the grammar)

                *yield_depth*: if True, we return (node, depth) instead of node
                *predicate*: filter only the ones that match this

                NOTE: if you DON'T iterate all the way through, you end up acculmulating bv rules
                so NEVER stop this iteration in the middle!
                TODO: Make this more elegant -- use BVCM
        """

        if isFunctionNode(t):
            #  print "iterate subnode: ", t, t.added_rule

            if predicate(t):
                yield (t, d) if yield_depth else t

            #Define a new context that is the grammar with the rule added. Then, when we exit, it's still right
            with BVRuleContextManager(self, t.added_rule):

                if t.args is not None:
                    for g in self.iterate_subnodes(
                            t.args,
                            d=d + 1,
                            do_bv=do_bv,
                            yield_depth=yield_depth,
                            predicate=predicate
                    ):  # pass up anything from below
                        yield g

        elif isinstance(t, list):
            for a in t:
                for g in self.iterate_subnodes(a,
                                               d=d,
                                               do_bv=do_bv,
                                               yield_depth=yield_depth,
                                               predicate=predicate):
                    yield g
Пример #9
0
    def generate(self, x=None):
        """
                Generate from the PCFG -- default is to start from x - either a
                nonterminal or a FunctionNode.

                Returns a FunctionNode.

        """
        #print "# Calling grammar.generate", d, type(x), x

        # Decide what to start from based on the default if start is not specified
        if x is None:
            x = self.start
            assert self.start in self.rules, "The default start symbol %s is not a defined nonterminal" % self.start

        # Dispatch different kinds of generation
        if isinstance(x, list):
            # If we get a list, just map along it to generate. We don't count lists as depth--only FunctionNodes
            return map(lambda xi: self.generate(x=xi), x)
        elif self.is_nonterminal(x):

            # sample a grammar rule
            r, gp = weighted_sample(self.rules[x],
                                    probs=lambda x: x.p,
                                    return_probability=True,
                                    log=False)
            #print "SAMPLED:", gp, r, type(r)

            # Make a stub for this functionNode
            fn = r.make_FunctionNodeStub(
                self, gp)  ## NOT SURE WHY BU TCOPY IS NECESSARY HERE

            # Define a new context that is the grammar with the rule added. Then, when we exit, it's still right
            with BVRuleContextManager(
                    self, fn.added_rule):  # not sure why I can't use with/as:
                if fn.args is not None:  # Can't recurse on None or else we genreate from self.start
                    fn.args = self.generate(
                        fn.args
                    )  # and generate below *in* this context (e.g. with the new rules added)

            return fn

        else:  # must be a terminal
            assert isinstance(x,
                              str), ("*** Terminal must be a string! x=" + x)
            return x
Пример #10
0
    def unpack_ascii_rec(self, s, x, idx2rule):
        """
        Unpack a string into a tree. Follows the format of Grammar.generate, but indexes
        the choices with s
        """
        assert x is not None  # should have been given by unpack_ascii

        # Dispatch different kinds of generation
        if isinstance(x, list):
            return map(lambda xi: self.unpack_ascii_rec(s, xi, idx2rule), x)

        elif self.is_nonterminal(x):
            
            rules = self.get_rules(x)

            # index
            # instead of sampling a rule, get it from the string
            i = pack_string.index(s[0])
            del s[0]  # remove (works since s is a list)
            r = idx2rule[i]

            # Make a stub for this functionNode
            fn = r.make_FunctionNodeStub(self, None)
            with BVRuleContextManager(self, fn, recurse_up=False):

                # add rule (to idx2rule)
                if isinstance(r, BVAddGrammarRule):
                    idx = max(idx2rule.keys()) + 1
                    idx2rule[idx] = fn.added_rule

                # recurse
                if fn.args is not None:
                    fn.args = self.unpack_ascii_rec(s, fn.args, idx2rule)

                # remove rule (as we now do in packing, too)
                if isinstance(r, BVAddGrammarRule):
                    idx = max(idx2rule.keys())
                    del idx2rule[idx]
            
                for a in fn.argFunctionNodes():
                    a.parent = fn

            return fn

        else: # must be a terminal
            return x
Пример #11
0
    def compute_proposal_probability(self,
                                     grammar,
                                     t1,
                                     t2,
                                     resampleProbability=lambdaOne):
        node_1, node_2 = least_common_difference(t1, t2)

        if (node_1 and node_2 and any([
                nodes_are_roughly_equal(arg, node_1)
                for arg in None2Empty(node_2.args)
        ])):

            lp_choosing_node_1 = t1.sampling_log_probability(
                node_1,
                resampleProbability=lambda t: can_insert_FunctionNode(
                    t, grammar) * resampleProbability(t))

            lp_choosing_rule = -nicelog(
                len(
                    filter(can_insert_GrammarRule,
                           grammar.rules[node_1.returntype])))
            lp_choosing_replacement = -nicelog(
                len(
                    filter(
                        lambda i: node_2.args[i].returntype == node_1.
                        returntype, xrange(len(node_2.args)))))

            lp_generation = []
            for arg in node_2.args:
                if not (arg.name == node_1.name and arg.returntype
                        == node_1.returntype and arg.args == node_1.args
                        ):  # if the nodes are significantly different
                    with BVRuleContextManager(grammar, node_2,
                                              recurse_up=True):
                        lp_generation += [grammar.log_probability(arg)]

            lp_copy_making_node_2 = lp_choosing_rule + lp_choosing_replacement + sum(
                lp_generation)

            return lp_choosing_node_1 + lp_copy_making_node_2
        else:
            return -Infinity  # the trees cannot be identical if we performed an insertion
Пример #12
0
    def iterate_subnodes(self,
                         grammar,
                         t=None,
                         d=0,
                         predicate=lambdaTrue,
                         yield_depth=False,
                         recurse_up=False):
        """Iterate through all subnodes of node *t*, while updating the added rules (bound variables)
        so that at each subnode, the grammar is accurate to what it was.

        Arguments
        ---------
        grammar : LOTlib.Grammar
            This is the grammar we're iterating through
        t : FunctionNode
            The tree we will iterate over
        yield_depth : bool
            If True, we return (node, depth) instead of node.
        predicate : function
            Filter only the nodes that match this function (i.e. eval (function(fn) == True) on each fn).
        recurse_up : bool
            Do we recurse all the way up and add all above nodes too?
        """
        if not t:
            t = self
        if predicate(t):
            yield (t, d) if yield_depth else t

        # Define a new context that is the grammar with the rule added.
        # Then, when we exit, it's still right.
        with BVRuleContextManager(grammar, t, recurse_up=recurse_up):
            for a in t.argFunctionNodes():
                # Pass up anything from below
                for g in self.iterate_subnodes(
                        grammar,
                        a,
                        d=d + 1,
                        yield_depth=yield_depth,
                        predicate=predicate,
                        recurse_up=False):  # we never have to recurse up
                    yield g
Пример #13
0
    def propose_tree(self, grammar, tree, resampleProbability=lambdaOne):
        new_t = copy(tree)

        try:  # to choose a node to insert on
            ni, lp = new_t.sample_subnode(lambda t: can_insert_FunctionNode(
                t, grammar) * resampleProbability(t))
        except NodeSamplingException:
            raise ProposalFailedException

        # is there a rule that expands from ni.returntype to some ni.returntype?
        replicating_rules = filter(can_insert_GrammarRule,
                                   grammar.rules[ni.returntype])
        if len(replicating_rules) == 0:
            raise ProposalFailedException

        # sample a rule
        r = sample1(replicating_rules)

        # the functionNode we are building
        fn = r.make_FunctionNodeStub(grammar, ni.parent)

        # figure out which arg will be the existing ni
        replicatingindices = filter(lambda i: fn.args[i] == ni.returntype,
                                    xrange(len(fn.args)))
        if len(replicatingindices) <= 0:  # should never happen
            raise ProposalFailedException

        # choose the one to replace
        replace_i = sample1(replicatingindices)

        ## Now expand the other args, with the right rules in the grammar
        with BVRuleContextManager(grammar, fn, recurse_up=True):
            for i, a in enumerate(fn.args):
                fn.args[i] = copy(ni) if (
                    i == replace_i) else grammar.generate(a)

        # perform the insertion
        ni.setto(fn)

        return new_t
Пример #14
0
    def log_probability(self, t):
        """
        Returns the log probability of t, recomputing everything (as we do now)

        This is overall about half as fast, but it means we don't have to store generation_probability
        """
        assert isinstance(t, FunctionNode)

        z = log(sum([ r.p for r in self.get_rules(t.returntype) ]))

        # Find the one that matches. While it may seem like we should store this, that is hard to make work
        # with multiple grammar objects across loading/saving, because the objects will change. This way,
        # we always look it up.
        lp = -Infinity
        r = self.get_matching_rule(t)
        assert r is not None, "Failed to find matching rule at %s %s" % (t, r)

        lp = log(r.p) - z

        with BVRuleContextManager(self, t):
            for a in t.argFunctionNodes():
                lp += self.log_probability(a)

        return lp
Пример #15
0
    def lp_propose(self, x, y, resampleProbability=lambdaOne, xZ=None):
        """
                Returns a log probability of starting at x and ending up at y from a regeneration move.
                Any node is a candidate if the trees are identical except for what's below those nodes
                (although what's below *can* be identical!)

                NOTE: This does NOT take into account insert/delete
                NOTE: Not so simple because we must count multiple paths


                NOTE: This is currently not correct because it will mess up with bound variables, which now have
                unique names. Also it seems to add too many rules to the grammar, probably via recurse_up
        """
        RP = -Infinity

        if isinstance(x, FunctionNode) and isinstance(
                y, FunctionNode) and x.returntype == y.returntype:

            # compute the normalizer
            if xZ is None:
                xZ = x.sample_node_normalizer(
                    resampleProbability=resampleProbability)

            # Well we could select x's root to go to Y, but we must recompute y under the current grammar
            with BVRuleContextManager(self.grammar, x, recurse_up=True):
                RP = logplusexp(
                    RP,
                    log(1.0 * resampleProbability(x)) - log(xZ) +
                    self.grammar.log_probability(y))

            if x.name == y.name and x.args is not None and y.args is not None and len(
                    x.args) == len(y.args):

                # how many kids are not equal, and where was the last?
                mismatch_count, mismatch_index = 0, 0
                for i, xa, ya in zip(xrange(len(x.args)), x.args, y.args):
                    if xa != ya:  # checks whole subtree!
                        mismatch_count += 1
                        mismatch_index = i
                    if mismatch_count > 1: break  # can't win

                if mismatch_count > 1:  # We have to have only selected x,y to regenerate

                    pass

                elif mismatch_count == 1:  # we could propose to x, or x.args[mismatch_index], but nothing else (nothing else will fix the mismatch)

                    with BVRuleContextManager(
                            self.grammar, x,
                            recurse_up=False):  # recurse, but keep track of bv
                        RP = logplusexp(
                            RP,
                            self.lp_propose(
                                x.args[mismatch_index],
                                y.args[mismatch_index],
                                resampleProbability=resampleProbability,
                                xZ=xZ))

                else:  # identical trees -- we could propose to any, so that's just the tree probability below convolved with the resample p

                    for xi in x.iterate_subnodes(self.grammar,
                                                 recurse_up=True):
                        if xi is not x:  # but we already counted ourself (NOTE: Must be "is", not ==)
                            # Here we use grammar.log_probability since the grammar may have changed with bv
                            RP = logplusexp(
                                RP,
                                log(resampleProbability(xi) * 1.0) - log(xZ) +
                                self.grammar.log_probability(xi))

        return RP
Пример #16
0
    def enumerate_at_depth(self, d, nt=None, leaves=True):
        """Generate trees at depth d, no deeper or shallower.

        Parameters
            d (int): the depth of trees you want to generate
            nt (str): the type of the nonterminal you want to return (None reverts to self.start)
            leaves (bool): do we put terminals in the leaves or leave nonterminal types? This is useful in
              PartitionMCMC. This returns trees of depth d-1!

        Return:
            yields the ...

        """
        if nt is None:
            nt = self.start

        # handle garbage that may be passed in here
        if not self.is_nonterminal(nt):
            yield nt
            raise StopIteration

        if d == 0:
            if leaves:
                # Note: can NOT use filter here, or else it doesn't include added rules
                for r in self.rules[nt]:
                    if self.is_terminal_rule(r):
                        yield r.make_FunctionNodeStub(self, None)
            else:
                # If not leaves, we just put the nonterminal type in the leaves
                yield nt
        else:
            # Note: can NOT use filter here, or else it doesn't include added rules. No sorting either!
            for r in self.rules[nt]:

                # No good since it won't be deep enough
                if self.is_terminal_rule(r):
                    continue


                fn = r.make_FunctionNodeStub(self, None)

                # The possible depths for the i'th child
                # Here we just ensure that nonterminals vary up to d, and otherwise
                child_i_depths = lambda i: xrange(d) if self.is_nonterminal(fn.args[i]) else [0]

                # The depths of each kid
                for cd in lazyproduct(map(child_i_depths, xrange(len(fn.args))), child_i_depths):

                    # One must be equal to d-1
                    # TODO: can be made more efficient via permutations. Also can skip terminals in args.
                    if max(cd) < d-1:
                        continue
                    assert max(cd) == d-1

                    myiter = lazyproduct(
                        [self.enumerate_at_depth(di, nt=a, leaves=leaves) for di, a in zip(cd, fn.args)],
                        lambda i: self.enumerate_at_depth(cd[i], nt=fn.args[i], leaves=leaves))
                    try:
                        while True:
                            # Make a copy so we don't modify anything
                            yieldfn = copy(fn)

                            # BVRuleContextManager here makes us remove the rule BEFORE yielding,
                            # or else this will be incorrect. Wasteful but necessary.
                            with BVRuleContextManager(self, fn, recurse_up=False):
                                yieldfn.args = myiter.next()
                                for a in yieldfn.argFunctionNodes():
                                    # Update parents
                                    a.parent = yieldfn

                            yield copy(yieldfn)

                    except StopIteration:
                        # Catch this here so we continue in this loop over rules
                        pass
Пример #17
0
    def propose_tree(self, t):

        newt = copy(t)

        if random() < 0.5:  # So we insert

            # Choose a node at random to insert on
            # TODO: We could precompute the nonterminals we can do this move on, if we wanted
            try:
                ni, lp = newt.sample_subnode(isNotBVAddFunctionNode)
            except NodeSamplingException:
                raise ProposalFailedException

            # Since it's an insert, see if there is a (replicating) rule that expands
            # from ni.returntype to some ni.returntype
            replicating_rules = filter(is_replicating_GrammarRule,
                                       self.grammar.rules[ni.returntype])
            if len(replicating_rules) == 0: return [newt, fb]

            # sample a rule and compute its probability (not under the predicate)
            r = sample1(replicating_rules)

            # the functionNode we are building
            fn = r.make_FunctionNodeStub(self, ni.parent)

            # figure out which arg will be the existing ni
            replicatingindices = filter(lambda i: fn.args[i] == ni.returntype,
                                        xrange(len(fn.args)))
            assert replicatingindices > 0  # since that's what a replicating rule is
            replace_i = sample1(
                replicatingindices)  # choose the one to replace
            fn.args[replace_i] = copy(ni)  # the one we replace

            ## Now expand the other args, with the right rules in the grammar
            with BVRuleContextManager(self.grammar, fn, recurse_up=True):

                # and generate the args below
                for i, a in enumerate(fn.args):
                    if i != replace_i:
                        fn.args[i] = self.grammar.generate(
                            a)  #else generate like normalized

            # we need a count of how many kids are the same afterwards
            after_same_children = sum([x == ni for x in fn.args])

            ni.setto(fn)

            with BVRuleContextManager(self.grammar, fn, recurse_up=True):

                # what is the prob mass of the new stuff?
                new_lp_below = sum([
                    self.grammar.log_probability(fn.args[i]) if
                    (i != replace_i and isFunctionNode(fn.args[i])) else 0.
                    for i in xrange(len(fn.args))
                ])
                # What is the new normalizer?
                newZ = newt.sample_node_normalizer(isNotBVAddFunctionNode)
                assert newZ > 0
                # To sample forward: choose the node ni, choose the replicating rule, choose which "to" to expand (we could have put it on any of the replicating rules that are identical), and genreate the rest of the tree
                f = lp + (-log(len(replicating_rules))) + (
                    log(after_same_children) -
                    log(len(replicatingindices))) + new_lp_below
                # To go backwards, choose the inserted rule, and any of the identical children, out of all replicators
                b = (log(1.0 * isNotBVAddFunctionNode(fn)) - log(newZ)) + (
                    log(after_same_children) - log(len(fn.args)))

        else:  # A delete move!

            # Sample a node at random
            try:
                ni, lp = newt.sample_subnode(
                    isNotBVAddFunctionNode)  # this could raise exception

                # Really, it had to be not None
                if ni.args is None:
                    raise NodeSamplingException

            except NodeSamplingException:
                raise ProposalFailedException

            # Figure out which of my children have the same type as me
            replicating_kid_indices = filter(
                lambda i: isFunctionNode(ni.args[i]) and ni.args[i].returntype
                == ni.returntype, range(len(ni.args)))
            nrk = len(replicating_kid_indices)  # how many replicating kids
            if nrk == 0:
                raise ProposalFailedException

            replicating_rules = filter(is_replicating_GrammarRule,
                                       self.grammar.rules[ni.returntype])
            assert len(replicating_rules
                       ) > 0  # better be some or where did ni come from?

            samplei = sample1(
                replicating_kid_indices
            )  # who to promote; NOTE: not done via any weighting

            # We need to be in the right grammar state to evaluate log_probability
            with BVRuleContextManager(self.grammar,
                                      ni.args[samplei],
                                      recurse_up=True):

                # Now we must count the multiple ways we could go forward or back
                # Here, we could have sampled any of them equivalent to ni.args[i]
                before_same_children = sum([
                    x == ni.args[samplei] for x in ni.args
                ])  # how many are the same after?

                # the lp of everything we'd have to create going backwards
                old_lp_below = sum([
                    self.grammar.log_probability(ni.args[i]) if
                    (i != samplei and isFunctionNode(ni.args[i])) else 0.
                    for i in xrange(len(ni.args))
                ])

                # and replace it
                ni.args[
                    samplei].parent = ni.parent  # update this first ;; TODO: IS THIS NECSESARY?
                ni.setto(ni.args[samplei])

                # And compute f/b probs
                newZ = newt.sample_node_normalizer(
                    resampleProbability=isNotBVAddFunctionNode)
                # To go forward, choose the node, and then from all equivalent children
                f = lp + (log(before_same_children) - log(nrk))
                # To go back, choose the node, choose the replicating rule, choose where to put it, and generate the rest of the tree
                b = (log(1.0 * isNotBVAddFunctionNode(ni)) -
                     log(newZ)) + -log(len(replicating_rules)) + (
                         log(before_same_children) - log(nrk)) + old_lp_below

        return [newt, f - b]
Пример #18
0
    def increment_tree_(self,
                        x=None,
                        depth=0,
                        max_depth=Infinity,
                        depthdict=None):
        """
                A lazy version of tree enumeration. Here, we generate all trees, starting from a rule or a nonterminal symbol and going up to max_depth

                This is constant memory and should produce each tree *once* (However: if a grammar has multiple derivations of the same
                str(tree), then you will see repeats!). 
                
                TODO: CHANGE THIS TO ENUMERATE SHALLOW->DEEP
                
                *x*: A node in the tree
                *depth*: Depth of the tree
                *depthdict* : memoizes depth_to_terminal so that we can order rules in order to make enumeration small->large
        """
        # wrap no specification for x

        if depth >= max_depth:
            raise StopIteration

        if isFunctionNode(x):
            # NOTE: WE don't need to handle BV here since they are handled below when we use the rule

            original_x = copy(x)

            # go all odometer on the kids below::
            iters = [
                self.increment_tree_(
                    x=y, depth=depth, max_depth=max_depth, depthdict=depthdict)
                if self.is_nonterminal(y) else None for y in x.args
            ]
            if len(iters) == 0:
                yield copy(x)
            else:
                #print "HERE", iters
                for i in xrange(len(iters)):
                    if iters[i] is not None:
                        x.args[i] = iters[i].next()

                # the index of the last terminal symbol (may not be len(iters)-1),
                last_terminal_idx = max([
                    i if iters[i] is not None else -1
                    for i in xrange(len(iters))
                ])

                ## Now loop through the args, counting them up
                while True:

                    yield copy(
                        x
                    )  # yield the initial tree, and then each successive tree

                    # and then process each carry:
                    for carry_pos in xrange(
                            len(iters)
                    ):  # index into which tree we are incrementing
                        if iters[
                                carry_pos] is not None:  # we are not a terminal symbol (mixed in)

                            ## NOTE: This *MUST* go here in order to prevent adding a rule and then not removing it when you carry (thus introducing a bv of a1 into a2)
                            with BVRuleContextManager(self, x.added_rule):

                                try:
                                    x.args[carry_pos] = iters[carry_pos].next()
                                    break  # if we increment successfully, no carry, so break out of i loop
                                except StopIteration:  # if so, then "carry"
                                    if carry_pos == last_terminal_idx:
                                        raise StopIteration
                                    elif iters[carry_pos] is not None:
                                        # reset the incrementer since we just carried
                                        iters[
                                            carry_pos] = self.increment_tree_(
                                                x=original_x.args[carry_pos],
                                                depth=depth,
                                                max_depth=max_depth,
                                                depthdict=depthdict)
                                        x.args[carry_pos] = iters[
                                            carry_pos].next()  # reset this
                                        # and just continue your loop over i (which processes the carry)

        elif self.is_nonterminal(x):  # just a single nonterminal

            ## TODO: somewhat inefficient since we do this each time:
            ## Here we change the order of rules to be terminals *first*
            terminals = []
            nonterminals = []
            for k in self.rules[x]:
                if not self.is_terminal_rule(
                        k
                ):  #AAH this used to be called "x" and that ruined the scope of the outer "x"
                    nonterminals.append(k)
                else:
                    terminals.append(k)

            # sort by probability, so high probability trees *tend* to come first
            terminals = sorted(
                terminals,
                key=lambda r: self.depth_to_terminal(r, current_d=depthdict))
            nonterminals = sorted(
                nonterminals,
                key=lambda r: self.depth_to_terminal(r, current_d=depthdict))
            Z = logsumexp([log(r.p) for r in self.rules[x]])  # normalizer

            #print terminals
            #print nonterminals
            #print "---------------------------------------"

            # yield each of the rules that lead to terminals -- always do this since depth>=0 (above)
            for r in terminals:
                fn = r.make_FunctionNodeStub(self, (log(r.p) - Z))
                # Do not need to set added_rule since they can't exist here
                yield fn

            if depth < max_depth:  # if we can go deeper
                for r in nonterminals:  #expand each nonterminals
                    fn = r.make_FunctionNodeStub(self, (log(r.p) - Z))

                    for q in self.increment_tree_(x=fn,
                                                  depth=depth + 1,
                                                  max_depth=max_depth,
                                                  depthdict=depthdict):
                        yield q
            else:
                yield x