def test_RegenerationProposal(self): from LOTlib.Inference.Proposals.RegenerationProposal import RegenerationProposal rp = RegenerationProposal(self.grammar) for tree in self.trees: cnt = Counter() for _ in xrange(NSAMPLES): p, fb = rp.propose_tree(tree) cnt[p] += 1 # Check the proposal self.check_tree(p) ## check that the proposals are what they should be -- rp.lp_propose is correct! obsc = [cnt[t] for t in self.trees] expc = [exp(self.grammar.log_probability(t))*sum(obsc) for t in self.trees] csq, pv = chisquare([cnt[t] for t in self.trees], [exp(rp.lp_propose(tree, x))*NSAMPLES for x in self.trees]) # Look at some # print ">>>>>>>>>>>", tree # for p in self.trees: # print "||||||||||", p # v = rp.lp_propose(tree,p) # print "V=",v for c, e, tt in zip([cnt[t] for t in self.trees], [exp(rp.lp_propose(tree, x))*NSAMPLES for x in self.trees], self.trees): print c, e, tt, rp.lp_propose(tree,tt) self.assertGreater(pv, 0.001, msg="Sampler failed chi squared!")
def test_lp_regenerate_propose_to(self): # import the grammar from Grammars import lp_regenerate_propose_to_grammar self.G = lp_regenerate_propose_to_grammar.g # the RegenerationProposal class rp = RegenerationProposal(self.G) numTests = 100 # Sample 1000 trees from the grammar, and run a chi-squared test for each of them for i in break_ctrlc(range(numTests)): # keep track of expected and actual counts # expected_counts = defaultdict(int) # a dictionary whose keys are trees and values are the expected number of times we should be proposing to this tree actual_counts = defaultdict(int) # same as expected_counts, but stores the actual number of times we proposed to a given tree tree = self.G.generate('START') # Regenerate some number of trees at random numTrees = 1000 for j in range(numTrees): newtree = rp.propose_tree(tree)[0] # trees.append(newtree) actual_counts[newtree] += 1 # see if the frequency with which each category of trees is generated matches the # expected counts using a chi-squared test chisquared, p = self.get_pvalue(tree, actual_counts, numTrees) # print chisquared, p # if p > 0.01/1000, test passes self.assertTrue(p > 0.01/numTests, "Trees are not being generated according to the expected log probabilities") if i % 10 == 0 and i != 0: print i, "lp_regenerate_propose_to tests..." print numTests, "lp_regenerate_propose_to tests..."
def test_log_probability_proposals_FiniteWithoutBVArgs(self): # import the grammar from Grammars import FiniteWithoutBVArgs self.G = FiniteWithoutBVArgs.g # the RegenerationProposal class rp = RegenerationProposal(self.G) # sample from G 100 times for i in range(100): X = self.G.generate('START') # propose to a new tree Y = rp.propose_tree(X)[0] # count probability manually prob = FiniteWithoutBVArgs.log_probability(Y) # check that it's equal to .log_probability() self.assertTrue(math.fabs(prob - Y.log_probability()) < 0.00000001)
class RestrictedRegenProposal(RegenerationProposal): """ A standard regen proposal but with a restriction on which types are valid to regenerate. Specify *EITHER* a whitelist (of valid types) or a blacklist (of invalid types) """ def __init__(self, grammar, whitelist=None, blacklist=None, **kwargs): self.__dict__.update(locals()) self.regen_proposal = RegenerationProposal(grammar, **kwargs) def propose_tree(self, tree): def isvalid(node): if self.whitelist: return node.returntype in self.whitelist elif self.blacklist: return node.returntype not in self.blacklist else: return True return self.regen_proposal.propose_tree(tree, resampleProbability=isvalid)
def __init__(self, grammar, whitelist=None, blacklist=None, **kwargs): self.__dict__.update(locals()) self.regen_proposal = RegenerationProposal(grammar, **kwargs)
def propose_tree(self, t): return RegenerationProposal.propose_tree( self, t, resampleProbability=lambda x: getattr(x, 'resample_p', 1.0))
def propose_tree(self, t): return RegenerationProposal.propose_tree(self, t, resampleProbability=lambda x: getattr(x,'resample_p', 1.0))
grammar.add_rule('LAMBDA_WORD', 'lambda', ['WORD'], 1.0, bv_type='WORD') grammar.add_rule('WORD', 'apply_', ['LAMBDA_WORD', 'WORD'], 1.0) p = InverseInlineProposer(grammar) """ # Just look at some proposals for _ in xrange(200): t = grammar.generate() print ">>", t #assert t.check_parent_refs() for _ in xrange(10): t = p.propose_tree(t)[0] print "\t", t """ # Run MCMC -- more informative about f-b errors from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler from LOTlib.Inference.Proposals.MixtureProposal import MixtureProposal from LOTlib.Inference.Proposals.RegenerationProposal import RegenerationProposal h = make_h0(proposal_function=MixtureProposal( [InverseInlineProposer(grammar), RegenerationProposal(grammar)])) data = generate_data(100) for h in break_ctrlc(MHSampler(h, data)): print h.posterior_score, h.prior, h.likelihood, get_knower_pattern( h), h