def run(llt=1.0): h0 = CCGLexicon(make_hypothesis, words=all_words, alpha=0.9, palpha=0.9, likelihood_temperature=llt) fbs = FiniteBestSet(N=10) from LOTlib.Inference.MetropolisHastings import mh_sample for h in lot_iter(mh_sample(h0, data, SAMPLES)): fbs.add(h, h.posterior_score) return fbs
def run(*args): # starting hypothesis -- here this generates at random h0 = GaussianLOTHypothesis(grammar, prior_temperature=PRIOR_TEMPERATURE) # We store the top 100 from each run pq = FiniteBestSet(100, max=True, key="posterior_score") pq.add( mh_sample(h0, data, STEPS, skip=SKIP) ) return pq
def run(llt=1.0): h0 = CCGLexicon(make_hypothesis, words=all_words, alpha=0.9, palpha=0.9, likelihood_temperature=llt) fbs = FiniteBestSet(N=10) from LOTlib.Inference.MetropolisHastings import mh_sample for h in lot_iter(mh_sample(h0, data, SAMPLES)): fbs.add(h, h.posterior_score) return fbs
def run(*args): # starting hypothesis -- here this generates at random h0 = GaussianLOTHypothesis(grammar, prior_temperature=PRIOR_TEMPERATURE) # We store the top 100 from each run pq = FiniteBestSet(100, max=True, key="posterior_score") pq.add(mh_sample(h0, data, STEPS, skip=SKIP)) return pq
def ptaboo_search(h0, data, steps, skip=0, noisy_memoize=1000, seen_penalty=1.0): seen_count = defaultdict(int) # define a wrapper class that overwrites prior with our penalized version class WrapperClass(type(h0)): def compute_prior(self): self.rawprior = type(h0).compute_prior(self) # save the prior for use if we want to convert back self.prior = self.rawprior - seen_count[self]*seen_penalty self.lp = self.prior + self.likelihood return self.prior def fixlp(self): """ Temporarily fix our log probability returned """ self.prior = self.rawprior self.lp = self.prior + self.likelihood myh0 = WrapperClass(h0.grammar, v=h0.value) ## TODO: NOTE HERE WE ASSUME G IS TAKEN! # Now just run standard MCMC: for h in mh_sample(myh0, data, steps, skip=skip): if LOTlib.SIG_INTERRUPTED: break # THIS IS VERY BIZARRE: # We don't yield a copy, so we fixlp, yield, and then re-compute the prior to restore the lp # to the current sample #h.fixlp() #yield h #h.compute_prior() # # Slower way to do it, just copy the value h0.set_value(h.value) h0.compute_posterior(data) yield h0 seen_count[h] += 1
http://www.mit.edu/~ndg/papers/RRfinal3.pdf This script scatters our imports around to show where each part comes from """ from Shared import * # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Create an initial hypothesis. Here we use a RationalRulesLOTHypothesis, which # is defined in LOTlib.Hypotheses and wraps LOTHypothesis with the rational rules prior from LOTlib.Hypotheses.RationalRulesLOTHypothesis import RationalRulesLOTHypothesis h0 = RationalRulesLOTHypothesis(grammar=DNF, rrAlpha=1.0) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Run the MH from LOTlib.Inference.MetropolisHastings import mh_sample # Run the vanilla sampler. Without steps, it will run infinitely # this prints out posterior (posterior_score), prior, tree grammar probability, likelihood, for h in mh_sample(h0, data, 10000, skip=100): print h.posterior_score, h.prior, h.value.log_probability( ), h.likelihood, q(h) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # This yields data like below. #-10.1447997767 -9.93962659915 -12.2377573418 -0.20517317755 'and_(not_(is_shape_(x, 'triangle')), not_(is_color_(x, 'blue')))' #-11.9260879461 -8.77647578935 -12.2377573418 -3.14961215672 'and_(not_(is_shape_(x, 'triangle')), not_(is_shape_(x, 'triangle')))'
""" from random import randint, sample from LOTlib.Inference.MetropolisHastings import mh_sample from Shared import * NDATA = 50 # How many total data points? NSTEPS = 10000 BEST_N = 100 # How many from each hypothesis to store OUTFILE = "hypotheses.pkl" # Where we keep track of all hypotheses (across concepts) all_hypotheses = FiniteBestSet() # Now loop over each target concept and get a set of hypotheses for i, f in enumerate(TARGET_CONCEPTS): # Set up the hypothesis h0 = LOTHypothesis(grammar, start='START', args=['x']) # Set up some data data = generate_data(NDATA, f) # Now run some MCMC fs = FiniteBestSet(N=BEST_N, key="posterior_score") fs.add(mh_sample(h0, data, steps=NSTEPS, trace=False)) all_hypotheses.merge(fs) pickle_save(all_hypotheses, OUTFILE)
""" from random import randint, sample from LOTlib.Inference.MetropolisHastings import mh_sample from Shared import * NDATA = 50 # How many total data points? NSTEPS = 10000 BEST_N = 100 # How many from each hypothesis to store OUTFILE = "hypotheses.pkl" # Where we keep track of all hypotheses (across concepts) all_hypotheses = FiniteBestSet() # Now loop over each target concept and get a set of hypotheses for i, f in enumerate(TARGET_CONCEPTS): # Set up the hypothesis h0 = LOTHypothesis(grammar, start='START', args=['x']) # Set up some data data = generate_data(NDATA, f) # Now run some MCMC fs = FiniteBestSet(N=BEST_N, key="posterior_score") fs.add(mh_sample(h0, data, steps=NSTEPS, trace=False)) all_hypotheses.merge(fs) pickle_save(all_hypotheses, OUTFILE)
if value is None: value = numpy.array([0.0, 0.0]) VectorHypothesis.__init__(self, value=value, N=2, proposal=numpy.eye(2)*0.1) """ MCMC plays nicest if we have defined prior and likelihood, and just don't touch compute_posterior """ def compute_likelihood(self, data): self.likelihood = 0.0 self.posterior_score = self.prior + self.likelihood return self.likelihood def compute_prior(self): x,y = self.value self.prior = -((1.0-x)**2.0 + 100.0*(y-x**2.0)**2.0) self.posterior_score = self.prior + self.likelihood return self.prior def propose(self): ## NOTE: Does not copy proposal newv = numpy.random.multivariate_normal(self.value, self.proposal) return RosenbrockSampler(value=newv), 0.0 # from symmetric proposals if __name__ == "__main__": N = 1 initial_hyp = RosenbrockSampler() for x in mh_sample(initial_hyp, [], 1000000, skip=100, trace=False): print x, x.posterior_score
from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis from LOTlib.Inference.MetropolisHastings import mh_sample from LOTlib.Examples.Quantifier.Model import * ALPHA = 0.9 SAMPLES = 100000 DATA_SIZE = 1000 if __name__ == "__main__": ## sample the target data data = generate_data(DATA_SIZE) W = 'every' # Now to use it as a LOTHypothesis, we need data to have an "output" field which is true/false for whether its the target word. This is then used by LOTHypothesis.compute_likelihood to see if we match or not with whether a word was said (ignoring the other words -- that's why its a pseudolikelihood) for di in data: di.output = (di.word == W) #print (di.word == W) FBS = FiniteBestSet(max=True, N=100) H = LOTHypothesis(grammar, args=['A', 'B', 'S'], ALPHA=ALPHA) # Now just run the sampler with a LOTHypothesis for s in mh_sample(H, data, SAMPLES, skip=10): #print s.lp, "\t", s.prior, "\t", s.likelihood, "\n", s, "\n\n" FBS.push(s, s.lp) for k in reversed(FBS.get_all(sorted=True)): print k.lp, k.prior, k.likelihood, k
from Data import generate_data from Grammar import grammar, NCONSTANTS STEPS = 500000 SKIP = 0 data_sd = 0.1 # the SD of the data NDATA = 50 MEMOIZE = 1000 # 0 means don't memoize ## The target function for symbolic regression target = lambda x: 3. * x + sin(4.3 / x) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # starting hypothesis -- here this generates at random data = generate_data(target, NDATA, data_sd) # generate some data h0 = MAPSymbolicRegressionHypothesis(grammar) h0.CONSTANT_VALUES = numpy.zeros( NCONSTANTS) ## TODO: Move this to an itializer from LOTlib.Inference.MetropolisHastings import mh_sample for h in mh_sample(h0, data, STEPS, skip=SKIP, trace=False, debug=False, memoize=MEMOIZE): print h.posterior_score, h.likelihood, h.prior, h.CONSTANT_VALUES, qq(h)
#for i in xrange(100): #print LOTHypothesis(G, args=['S']) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Or real inference: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.DataAndObjects import FunctionData, Obj # for nicely managing data from LOTlib.Inference.MetropolisHastings import mh_sample # for running MCMC # Make up some data -- here just one set containing {red, red, green} colors data = [ FunctionData(input=[ {Obj(color='red'), Obj(color='red'), Obj(color='green')} ], \ output=True) ] # Create an initial hypothesis h0 = LOTHypothesis(G, args=['S']) # OR if we want to specify and use insert/delete proposals #from LOTlib.Proposals import * #h0 = LOTHypothesis(G, proposal_function=MixtureProposal(G, [RegenerationProposal(G), InsertDeleteProposal(G)] ) ) # MCMC! for h in mh_sample(h0, data, 4000): # run sampler #for h in unique(mh_sample(h0, data, 4000)): # get unique samples # hypotheses' .prior, .likelihood, and .posterior_score are set in mh_sample print h.likelihood, h.prior, h.posterior_score, h
def run_one(r): if LOTlib.SIG_INTERRUPTED: return h0 = NumberExpression(G) #sampler = tempered_transitions_sample(copy(h0), data, TEST_SAMPLES, skip=0, temperatures=[1.0, 1.25, 1.5]) #evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="TemperedTransitions-1.5\t"+str(r), output=output ) #sampler = tempered_transitions_sample(copy(h0), data, TEST_SAMPLES, skip=0, temperatures=[1.0, 1.05, 1.1]) #evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="TemperedTransitions-1.1\t"+str(r), output=output ) #sampler = tempered_transitions_sample(copy(h0), data, TEST_SAMPLES, skip=0, temperatures=[1.0, 1.025, 1.05]) #evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="TemperedTransitions-1.05\t"+str(r), output=output ) #sampler = parallel_tempering_sample(copy(h0), data, TEST_SAMPLES, within_steps=10, temperatures=[1.0, 1.25, 1.5], swaps=1) #evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="ParallelTempering-1.5\t"+str(r), output=output ) #sampler = parallel_tempering_sample(copy(h0), data, TEST_SAMPLES, within_steps=10, temperatures=[1.0, 1.05, 1.1], swaps=1) #evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="ParallelTempering-1.1\t"+str(r), output=output ) #sampler = parallel_tempering_sample(copy(h0), data, TEST_SAMPLES, within_steps=10, temperatures=[1.0, 1.025, 1.05], swaps=1) #evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="ParallelTempering-1.05\t"+str(r), output=output ) inner_steps=10 sampler = datawise_optimize(copy(h0), data, TEST_SAMPLES*inner_steps, inner_steps=inner_steps, data_weight=1.0) evaluate_sampler(target, sampler, steps=TEST_SAMPLES*inner_steps, name="DatawiseOptimize-1.0\t"+str(r), output=output ) sampler = datawise_optimize(copy(h0), data, TEST_SAMPLES*inner_steps, inner_steps=inner_steps, data_weight=0.1) evaluate_sampler(target, sampler, steps=TEST_SAMPLES*inner_steps, name="DatawiseOptimize-0.1\t"+str(r), output=output ) sampler = datawise_optimize(copy(h0), data, TEST_SAMPLES*inner_steps, inner_steps=inner_steps, data_weight=0.01) evaluate_sampler(target, sampler, steps=TEST_SAMPLES*inner_steps, name="DatawiseOptimize-0.01\t"+str(r), output=output ) #sampler = ptaboo_search( copy(h0), data, steps=TEST_SAMPLES, skip=0, seen_penalty=1.0) #evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="PtabooSearch-1.0\t"+str(r), trace=False, output=output ) #sampler = ptaboo_search( copy(h0), data, steps=TEST_SAMPLES, skip=0, seen_penalty=10.0) #evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="PtabooSearch-10.0\t"+str(r), trace=False, output=output ) #sampler = ptaboo_search( copy(h0), data, steps=TEST_SAMPLES, skip=0, seen_penalty=100.0) #evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="PtabooSearch-100.0\t"+str(r), trace=False, output=output ) #sampler = increase_temperature_mh_sample( copy(h0), data, steps=TEST_SAMPLES, skip=0, increase_amount=1.01) #evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="IncreaseTemperature-1.01\t"+str(r), trace=False, output=output) #sampler = increase_temperature_mh_sample( copy(h0), data, steps=TEST_SAMPLES, skip=0, increase_amount=1.1) #evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="IncreaseTemperature-1.1\t"+str(r), trace=False, output=output) #sampler = increase_temperature_mh_sample( copy(h0), data, steps=TEST_SAMPLES, skip=0, increase_amount=1.5) #evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="IncreaseTemperature-1.5\t"+str(r), trace=False, output=output) #sampler = increase_temperature_mh_sample( copy(h0), data, steps=TEST_SAMPLES, skip=0, increase_amount=2.0) #evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="IncreaseTemperature-2.0\t"+str(r), trace=False, output=output) sampler = mh_sample( copy(h0), data, steps=TEST_SAMPLES, skip=0) evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="BasicSampler\t"+str(r), trace=False, output=output) sampler = mh_sample( copy(h0), data, steps=TEST_SAMPLES, skip=0, temperature=1.01) evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="BasicSampler-T1.01\t"+str(r), trace=False, output=output) sampler = mh_sample( copy(h0), data, steps=TEST_SAMPLES, skip=0, temperature=1.05) evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="BasicSampler-T1.05\t"+str(r), trace=False, output=output ) sampler = mh_sample( copy(h0), data, steps=TEST_SAMPLES, skip=0, temperature=1.1) evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="BasicSampler-T1.1\t"+str(r), trace=False, output=output )
VectorHypothesis.__init__(self, value=value, n=2, proposal=numpy.eye(2)*0.1) """ MCMC plays nicest if we have defined prior and likelihood, and just don't touch compute_posterior. """ def compute_likelihood(self, data, **kwargs): self.likelihood = 0.0 self.posterior_score = self.prior + self.likelihood return self.likelihood def compute_prior(self): x,y = self.value self.prior = -((1.0-x)**2.0 + 100.0*(y-x**2.0)**2.0) self.posterior_score = self.prior + self.likelihood return self.prior def propose(self): ## NOTE: Does not copy proposal newv = numpy.random.multivariate_normal(self.value, self.proposal) return RosenbrockSampler(value=newv), 0.0 # from symmetric proposals if __name__ == "__main__": N = 1 initial_hyp = RosenbrockSampler() for x in lot_iter(mh_sample(initial_hyp, [], 1000000, skip=100, trace=False)): print x, x.posterior_score
proposal=numpy.eye(2) * 0.1) """ MCMC plays nicest if we have defined prior and likelihood, and just don't touch compute_posterior """ def compute_likelihood(self, data): self.likelihood = 0.0 self.posterior_score = self.prior + self.likelihood return self.likelihood def compute_prior(self): x, y = self.value self.prior = -((1.0 - x)**2.0 + 100.0 * (y - x**2.0)**2.0) self.posterior_score = self.prior + self.likelihood return self.prior def propose(self): ## NOTE: Does not copy proposal newv = numpy.random.multivariate_normal(self.value, self.proposal) return RosenbrockSampler(value=newv), 0.0 # from symmetric proposals if __name__ == "__main__": N = 1 initial_hyp = RosenbrockSampler() for x in mh_sample(initial_hyp, [], 1000000, skip=100, trace=False): print x, x.posterior_score
# Or we can make them as hypotheses (functions of S): #for i in xrange(100): #print LOTHypothesis(grammar, args=['S']) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Or real inference: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.DataAndObjects import FunctionData, Obj # for nicely managing data from LOTlib.Inference.MetropolisHastings import mh_sample # for running MCMC # Make up some data -- here just one set containing {red, red, green} colors data = [ FunctionData(input=[ {Obj(color='red'), Obj(color='red'), Obj(color='green')} ], \ output=True) ] # Create an initial hypothesis h0 = LOTHypothesis(grammar, args=['S']) # OR if we want to specify and use insert/delete proposals #from LOTlib.Proposals import * #h0 = LOTHypothesis(grammar, proposal_function=MixtureProposal(grammar, [RegenerationProposal(grammar), InsertDeleteProposal(grammar)] ) ) if __name__ == "__main__": # MCMC! for h in mh_sample(h0, data, 4000): # run sampler #for h in unique(mh_sample(h0, data, 4000)): # get unique samples # hypotheses' .prior, .likelihood, and .posterior_score are set in mh_sample print h.likelihood, h.prior, h.posterior_score, h
FunctionData(input=[ "n2", "n1" ], output=False), FunctionData(input=[ "n2", "n2" ], output=False), FunctionData(input=[ "n2", "p1" ], output=True), FunctionData(input=[ "n2", "p2" ], output=True)] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Standard exports from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis def make_ho(value=None): return LOTHypothesis(grammar, value=value, args=['x', 'y'], ALPHA=0.999) # alpha here trades off with the amount of data. Currently assuming no noise, but that's not necessary if __name__ == "__main__": # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Run mcmc # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.Proposals.RegenerationProposal import * #mp = MixtureProposal([RegenerationProposal(grammar), InsertDeleteProposal(grammar)] ) mp = RegenerationProposal(grammar) h0 = LOTHypothesis(grammar, args=['x', 'y'], ALPHA=0.999, proposal_function=mp) # alpha here trades off with the amount of data. Currently assuming no noise, but that's not necessary from LOTlib.Inference.MetropolisHastings import mh_sample for h in mh_sample(h0, data, 4000000, skip=100): print h.posterior_score, h.likelihood, h.prior, cleanFunctionNodeString(h) print map( lambda d: h(*d.input), data) print "\n"
from LOTlib.DataAndObjects import FunctionData from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis from LOTlib.Miscellaneous import qq from MAPSymbolicRegressionHypothesis import MAPSymbolicRegressionHypothesis, grammar from Data import generate_data from Grammar import grammar, NCONSTANTS STEPS = 500000 SKIP = 0 data_sd = 0.1 # the SD of the data NDATA = 50 MEMOIZE = 1000 # 0 means don't memoize ## The target function for symbolic regression target = lambda x: 3.*x + sin(4.3/x) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # starting hypothesis -- here this generates at random data = generate_data(target, NDATA, data_sd) # generate some data h0 = MAPSymbolicRegressionHypothesis(grammar) h0.CONSTANT_VALUES = numpy.zeros(NCONSTANTS) ## TODO: Move this to an itializer from LOTlib.Inference.MetropolisHastings import mh_sample for h in mh_sample(h0, data, STEPS, skip=SKIP, trace=False, debug=False, memoize=MEMOIZE): print h.posterior_score, h.likelihood, h.prior, h.CONSTANT_VALUES, qq(h)