def run_one(iteration, proposal_type, proposal_param=None): m = None if proposal_type == 'InsertDeleteMixture': m = MixtureProposal( [RegenerationProposal(grammar), InsertDeleteProposal(grammar)], probs=[proposal_param, 1. - proposal_param]) elif proposal_type == 'RegenerationProposal': m = RegenerationProposal(grammar) else: raise NotImplementedError(proposal_type) # define a wrapper to set this proposal def wrapped_make_h0(): h0 = make_h0() h0.set_proposal_function(m) return h0 sampler = MultipleChainMCMC(wrapped_make_h0, data, steps=options.SAMPLES, nchains=options.CHAINS) # Run evaluate on it, printing to the right locations evaluate_sampler( sampler, prefix="\t".join( map(str, [options.MODEL, iteration, proposal_type, proposal_param])), out_hypotheses=out_hypotheses, out_aggregate=out_aggregate)
def test_lp_regenerate_propose_to(self): # import the grammar from LOTlibTest.Grammars import lp_regenerate_propose_to_grammar self.G = lp_regenerate_propose_to_grammar.g # the RegenerationProposal class rp = RegenerationProposal(self.G) numTests = 100 # Sample 1000 trees from the grammar, and run a chi-squared test for each of them for i in lot_iter(range(numTests)): # keep track of expected and actual counts # expected_counts = defaultdict(int) # a dictionary whose keys are trees and values are the expected number of times we should be proposing to this tree actual_counts = defaultdict(int) # same as expected_counts, but stores the actual number of times we proposed to a given tree tree = self.G.generate('START') # Regenerate some number of trees at random numTrees = 1000 for j in range(numTrees): newtree = rp.propose_tree(tree)[0] # trees.append(newtree) actual_counts[newtree] += 1 # see if the frequency with which each category of trees is generated matches the # expected counts using a chi-squared test chisquared, p = self.get_pvalue(tree, actual_counts, numTrees) # print chisquared, p # if p > 0.01/1000, test passes self.assertTrue(p > 0.01/numTests, "Trees are not being generated according to the expected log probabilities") if i % 10 == 0 and i != 0: print i, "lp_regenerate_propose_to tests..." print numTests, "lp_regenerate_propose_to tests..."
def test_lp_regenerate_propose_to(self): # import the grammar from LOTlibTest.Grammars import lp_regenerate_propose_to_grammar self.G = lp_regenerate_propose_to_grammar.g # the RegenerationProposal class rp = RegenerationProposal(self.G) numTests = 100 # Sample 1000 trees from the grammar, and run a chi-squared test for each of them for i in lot_iter(range(numTests)): # keep track of expected and actual counts # expected_counts = defaultdict(int) # a dictionary whose keys are trees and values are the expected number of times we should be proposing to this tree actual_counts = defaultdict( int ) # same as expected_counts, but stores the actual number of times we proposed to a given tree tree = self.G.generate('START') # Regenerate some number of trees at random numTrees = 1000 for j in range(numTrees): newtree = rp.propose_tree(tree)[0] # trees.append(newtree) actual_counts[newtree] += 1 # see if the frequency with which each category of trees is generated matches the # expected counts using a chi-squared test chisquared, p = self.get_pvalue(tree, actual_counts, numTrees) # print chisquared, p # if p > 0.01/1000, test passes self.assertTrue( p > 0.01 / numTests, "Trees are not being generated according to the expected log probabilities" ) if i % 10 == 0 and i != 0: print i, "lp_regenerate_propose_to tests..." print numTests, "lp_regenerate_propose_to tests..."
def test_log_probability_proposals_FiniteWithoutBVArgs(self): # import the grammar from LOTlibTest.Grammars import FiniteWithoutBVArgs self.G = FiniteWithoutBVArgs.g # the RegenerationProposal class rp = RegenerationProposal(self.G) # sample from G 100 times for i in range(100): X = self.G.generate('START') # propose to a new tree Y = rp.propose_tree(X)[0] # count probability manually prob = FiniteWithoutBVArgs.log_probability(Y) # print X, Y, prob, Y.log_probability(), prob - Y.log_probability() # check that it's equal to .log_probability() self.assertTrue(math.fabs(prob - Y.log_probability()) < 0.00000001)
def __init__(self, grammar, value=None, f=None, start='START', ALPHA=0.9, maxnodes=25, args=['x'], proposal_function=None, **kwargs): """ *grammar* - The grammar for the hypothesis (specified in Grammar.py) *value* - the value for the hypothesis *f* - if specified, we don't recompile the whole function *start* - The start symbol for the grammar *ALPHA* - parameter for compute_single_likelihood that *maxnodes* - the maximum amount of nodes that the grammar can have *args* - The arguments to the function *proposal_function* - function that tells the program how to transition from one tree to another (by default, it uses the RegenerationProposal function) """ # save all of our keywords (though we don't need v) self.__dict__.update(locals()) if value is None: value = grammar.generate(self.start) FunctionHypothesis.__init__(self, value=value, f=f, args=args, **kwargs) # Save a proposal function ## TODO: How to handle this in copying? if proposal_function is None: self.proposal_function = RegenerationProposal(self.grammar) self.likelihood = 0.0
for a, b in itertools.product(objects, objects): myinput = [a, b] # opposites (n/p) interact; x interacts with nothing myoutput = (a[0] != b[0]) and (a[0] != 'x') and (b[0] != 'x') data.append(FunctionData(input=myinput, output=myoutput)) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Run mcmc # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if __name__ == "__main__": from LOTlib.Proposals.RegenerationProposal import RegenerationProposal #mp = MixtureProposal([RegenerationProposal(grammar), InsertDeleteProposal(grammar)] ) mp = RegenerationProposal(grammar) from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis h0 = LOTHypothesis( grammar, args=['x', 'y'], ALPHA=0.999, proposal_function=mp ) # alpha here trades off with the amount of data. Currently assuming no noise, but that's not necessary from LOTlib.Inference.MetropolisHastings import mh_sample for h in mh_sample(h0, data, 4000000, skip=100): print h.posterior_score, h.likelihood, h.prior, cleanFunctionNodeString( h) #print map( lambda d: h(*d.input), data) #print "\n"