def standard_sample(make_hypothesis, make_data, skip=9, show=True, N=100, save_top='top.pkl', alsoprint='None', **kwargs): """ Just a simplified interface for sampling, allowing printing (showing), returning the top, and saving. This is used by many examples, and is meant to easily allow running with a variety of parameters. NOTE: This skip is a skip *only* on printing **kwargs get passed to sampler """ if LOTlib.SIG_INTERRUPTED: return TopN() # So we don't waste time! h0 = make_hypothesis() data = make_data() best_hypotheses = TopN(N=N) f = eval(alsoprint) for i, h in enumerate(break_ctrlc(MHSampler(h0, data, **kwargs))): best_hypotheses.add(h) if show and i%(skip+1) == 0: print i, \ h.posterior_score, \ h.prior, \ h.likelihood, \ f(h) if f is not None else '', \ qq(cleanFunctionNodeString(h)) if save_top is not None: print "# Saving top hypotheses" with open(save_top, 'w') as f: pickle.dump(best_hypotheses, f) return best_hypotheses
def run(): from LOTlib import lot_iter from LOTlib.Inference.Proposals.RegenerationProposal import RegenerationProposal #mp = MixtureProposal([RegenerationProposal(grammar), InsertDeleteProposal(grammar)] ) mp = RegenerationProposal(grammar) from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis h0 = LOTHypothesis(grammar, args=['x', 'y'], ALPHA=0.999, proposal_function=mp) # alpha here trades off with the amount of data. Currently assuming no noise, but that's not necessary from LOTlib.Inference.MetropolisHastings import MHSampler for h in lot_iter(MHSampler(h0, data, skip=100)): print h.posterior_score, h.likelihood, h.prior, cleanFunctionNodeString(h)
def standard_sample(make_hypothesis, make_data, show_skip=9, show=True, N=100, save_top='top.pkl', alsoprint='None', **kwargs): """ Just a simplified interface for sampling, allowing printing (showing), returning the top, and saving. This is used by many examples, and is meant to easily allow running with a variety of parameters. NOTE: This skip is a skip *only* on printing **kwargs get passed to sampler """ if LOTlib.SIG_INTERRUPTED: return TopN() # So we don't waste time! h0 = make_hypothesis() data = make_data() best_hypotheses = TopN(N=N) f = eval(alsoprint) sampler = MHSampler(h0, data, **kwargs) # # TODO change acceptance temperature over times # sampler.acceptance_temperature = 0.5 for i, h in enumerate(break_ctrlc(sampler)): # if i % 10000 == 0 and i != 0: # sampler.acceptance_temperature = min(1.0, sampler.acceptance_temperature+0.1) # print '='*50 # print 'change acc temperature to', sampler.acceptance_temperature best_hypotheses.add(h) if show and i%(show_skip+1) == 0: print i, \ h.posterior_score, \ h.prior, \ h.likelihood, \ f(h) if f is not None else '', \ qq(cleanFunctionNodeString(h)) if save_top is not None: print "# Saving top hypotheses" with open(save_top, 'w') as f: pickle.dump(best_hypotheses, f) return best_hypotheses
FunctionData(input=[ "n2", "n1" ], output=False), FunctionData(input=[ "n2", "n2" ], output=False), FunctionData(input=[ "n2", "p1" ], output=True), FunctionData(input=[ "n2", "p2" ], output=True)] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Standard exports from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis def make_ho(value=None): return LOTHypothesis(grammar, value=value, args=['x', 'y'], ALPHA=0.999) # alpha here trades off with the amount of data. Currently assuming no noise, but that's not necessary if __name__ == "__main__": # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Run mcmc # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.Proposals.RegenerationProposal import * #mp = MixtureProposal([RegenerationProposal(grammar), InsertDeleteProposal(grammar)] ) mp = RegenerationProposal(grammar) h0 = LOTHypothesis(grammar, args=['x', 'y'], ALPHA=0.999, proposal_function=mp) # alpha here trades off with the amount of data. Currently assuming no noise, but that's not necessary from LOTlib.Inference.MetropolisHastings import mh_sample for h in mh_sample(h0, data, 4000000, skip=100): print h.posterior_score, h.likelihood, h.prior, cleanFunctionNodeString(h) print map( lambda d: h(*d.input), data) print "\n"
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Standard exports from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis def make_ho(value=None): return LOTHypothesis( grammar, value=value, args=['x', 'y'], ALPHA=0.999 ) # alpha here trades off with the amount of data. Currently assuming no noise, but that's not necessary if __name__ == "__main__": # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Run mcmc # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.Proposals.RegenerationProposal import * #mp = MixtureProposal([RegenerationProposal(grammar), InsertDeleteProposal(grammar)] ) mp = RegenerationProposal(grammar) h0 = LOTHypothesis( grammar, args=['x', 'y'], ALPHA=0.999, proposal_function=mp ) # alpha here trades off with the amount of data. Currently assuming no noise, but that's not necessary from LOTlib.Inference.MetropolisHastings import mh_sample for h in mh_sample(h0, data, 4000000, skip=100): print h.posterior_score, h.likelihood, h.prior, cleanFunctionNodeString( h) print map(lambda d: h(*d.input), data) print "\n"
display_option_summary(options) eval_data = None if options.EVAL_DATA > 0: eval_data = make_data(options.EVAL_DATA) # choose the appropriate map function args = list(itertools.product([make_hypothesis],[make_data], data_amounts * options.CHAINS) ) # set the output codec -- needed to display lambda to stdout sys.stdout = codecs.getwriter('utf8')(sys.stdout) seen = set() for fs in MPI_unorderedmap(run, numpy.random.permutation(args)): assert is_master_process() for h in fs: if h not in seen: seen.add(h) if eval_data is not None: h.compute_posterior(eval_data) # evaluate on the big data print h.prior, h.likelihood / options.EVAL_DATA, qq(cleanFunctionNodeString(h)) import pickle with open(options.OUT_PATH, 'w') as f: pickle.dump(seen, f)
if options.EVAL_DATA > 0: eval_data = make_data(options.EVAL_DATA) # choose the appropriate map function args = list(itertools.product([make_hypothesis],[make_data], data_amounts * options.CHAINS) ) # set the output codec -- needed to display lambda to stdout sys.stdout = codecs.getwriter('utf8')(sys.stdout) seen = set() for fs in MPI_unorderedmap(run, numpy.random.permutation(args)): assert is_master_process() for h in fs: if h not in seen: seen.add(h) if eval_data is not None: h.compute_posterior(eval_data) # evaluate on the big data print h.posterior_score, h.prior, h.likelihood / options.EVAL_DATA, \ alsoprint(h) if alsoprint is not None else '',\ qq(cleanFunctionNodeString(h)) import pickle with open(options.OUT_PATH, 'w') as f: pickle.dump(seen, f)
eval_data = None if options.EVAL_DATA > 0: eval_data = make_data(options.EVAL_DATA) # choose the appropriate map function args = list( itertools.product([make_hypothesis], [make_data], data_amounts * options.CHAINS)) # set the output codec -- needed to display lambda to stdout sys.stdout = codecs.getwriter('utf8')(sys.stdout) seen = set() for fs in MPI_unorderedmap(run, numpy.random.permutation(args)): assert is_master_process() for h in fs: if h not in seen: seen.add(h) if eval_data is not None: h.compute_posterior(eval_data) # evaluate on the big data print h.posterior_score, h.prior, h.likelihood / options.EVAL_DATA, \ alsoprint(h) if alsoprint is not None else '',\ qq(cleanFunctionNodeString(h)) import pickle with open(options.OUT_PATH, 'w') as f: pickle.dump(seen, f)