def run(llt=1.0): h0 = CCGLexicon(make_hypothesis, words=all_words, alpha=0.9, palpha=0.9, likelihood_temperature=llt) fbs = FiniteBestSet(N=10) from LOTlib.Inference.MetropolisHastings import mh_sample for h in lot_iter(mh_sample(h0, data, SAMPLES)): fbs.add(h, h.posterior_score) return fbs
def run(*args): # starting hypothesis -- here this generates at random h0 = GaussianLOTHypothesis(grammar, prior_temperature=PRIOR_TEMPERATURE) # We store the top 100 from each run pq = FiniteBestSet(100, max=True, key="posterior_score") pq.add(mh_sample(h0, data, STEPS, skip=SKIP)) return pq
def run(data_size): print "Running ", data_size # We store the top 100 from each run hypset = FiniteBestSet(TOP_COUNT, max=True) # initialize the data data = generate_data(data_size) # starting hypothesis -- here this generates at random learner = GriceanQuantifierLexicon(make_my_hypothesis, my_weight_function) # We will defautly generate from null the grammar if no value is specified for w in target.all_words(): learner.set_word(w) # populate the finite sample by running the sampler for this many steps for x in mh_sample(learner, data, SAMPLES, skip=0): hypset.push(x, x.posterior_score) return hypset
from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis from LOTlib.Inference.Samplers.MetropolisHastings import mh_sample from LOTlib.Examples.Quantifier.Model import * ALPHA = 0.9 SAMPLES = 100000 DATA_SIZE = 1000 if __name__ == "__main__": ## sample the target data data = generate_data(DATA_SIZE) W = 'every' # Now to use it as a LOTHypothesis, we need data to have an "output" field which is true/false for whether its the target word. This is then used by LOTHypothesis.compute_likelihood to see if we match or not with whether a word was said (ignoring the other words -- that's why its a pseudolikelihood) for di in data: di.output = (di.word == W) #print (di.word == W) FBS = FiniteBestSet(max=True, N=100) H = LOTHypothesis(grammar, args=['A', 'B', 'S'], ALPHA=ALPHA) # Now just run the sampler with a LOTHypothesis for s in mh_sample(H, data, SAMPLES, skip=10): #print s.lp, "\t", s.prior, "\t", s.likelihood, "\n", s, "\n\n" FBS.push(s, s.lp) for k in reversed(FBS.get_all(sorted=True)): print k.lp, k.prior, k.likelihood, k
learner = GriceanQuantifierLexicon(make_my_hypothesis, my_weight_function) # We will defautly generate from null the grammar if no value is specified for w in target.all_words(): learner.set_word(w) # populate the finite sample by running the sampler for this many steps for x in mh_sample(learner, data, SAMPLES, skip=0): hypset.push(x, x.posterior_score) return hypset if __name__ == "__main__": # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # MPI interface # Map. SimpleMPI will use a normal MAP if we are not running in MPI allret = MPI_map(run, map(lambda x: [x], DATA_AMOUNTS * CHAINS)) # this many chains ## combine into a single hypothesis set and save outhyp = FiniteBestSet(max=True) for r in allret: print "# Merging ", len(r) outhyp.merge(r) import pickle pickle.dump(outhyp, open(OUT_PATH, 'w'))
def run(*args): #print "# Running data" global hypotheses data_size = args[0] p_representation = defaultdict( int) # how often do you get the right representation p_response = defaultdict(int) # how often do you get the right response? p_representation_literal = defaultdict( int) # how often do you get the right representation p_response_literal = defaultdict( int) # how often do you get the right response? p_representation_presup = defaultdict( int) # how often do you get the right representation p_response_presup = defaultdict( int) # how often do you get the right response? #print "# Generating data" data = generate_data(data_size) # recompute these #print "# Computing posterior" #[ x.unclear_functions() for x in hypotheses ] [x.compute_posterior(data) for x in hypotheses] # normalize the posterior in fs #print "# Computing normalizer" Z = logsumexp([x.posterior_score for x in hypotheses]) # and output the top hypotheses qq = FiniteBestSet(max=True, N=25) for h in hypotheses: qq.push(h, h.posterior_score) # get the tops for i, h in enumerate(qq.get_all(sorted=True)): for w in h.all_words(): fprintn(8, data_size, i, w, h.posterior_score, q(h.value[w]), f=options.OUT_PATH + "-hypotheses." + str(get_rank()) + ".txt") # and compute the probability of being correct #print "# Computing correct probability" for h in hypotheses: hstr = str(h) #print data_size, len(data), exp(h.posterior_score), correct[ str(h)+":"+w ] for w in words: p = exp(h.posterior_score - Z) key = w + ":" + hstr p_representation[w] += p * (agree_pct[key] == 1.) p_representation_presup[w] += p * ( agree_pct_presup[key] == 1. ) # if we always agree with the target, then we count as the right rep. p_representation_literal[w] += p * (agree_pct_literal[key] == 1.) # and just how often does the hypothesis agree? p_response[w] += p * agree_pct[key] p_response_presup[w] += p * agree_pct_presup[key] p_response_literal[w] += p * agree_pct_literal[key] #print "# Outputting" for w in words: fprintn(10, str(get_rank()), q(w), data_size, p_representation[w], p_representation_presup[w], p_representation_literal[w], p_response[w], p_response_presup[w], p_response_literal[w], f=options.OUT_PATH + "-stats." + str(get_rank()) + ".txt") return 0
def make_h0(value=None): return GaussianLOTHypothesis(grammar, value=value) if __name__ == "__main__": # # # # # # # # # # # # # # # # # # # # # # # # # # # # # the running function def run(*args): # starting hypothesis -- here this generates at random h0 = GaussianLOTHypothesis(grammar, prior_temperature=PRIOR_TEMPERATURE) # We store the top 100 from each run pq = FiniteBestSet(100, max=True, key="posterior_score") pq.add(mh_sample(h0, data, STEPS, skip=SKIP)) return pq finitesample = FiniteBestSet(max=True) # the finite sample of all results = map(run, [[None]] * CHAINS) # Run on a single core finitesample.merge(results) ## and display for r in finitesample.get_all(decreasing=False, sorted=True): print r.posterior_score, r.prior, r.likelihood, qq(str(r))
fbs.add(h, h.posterior_score) return fbs ## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### MPI map ## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from SimpleMPI.MPI_map import MPI_map, is_master_process allret = MPI_map(run, map(lambda x: [x], [0.01, 0.1, 1.0] * 100 )) if is_master_process(): allfbs = FiniteBestSet(max=True) allfbs.merge(allret) H = allfbs.get_all() for h in H: h.likelihood_temperature = 0.01 # on what set of data we want? h.compute_posterior(data) # show the *average* ll for each hypothesis for h in sorted(H, key=lambda h: h.posterior_score): print h.posterior_score, h.prior, h.likelihood, h.likelihood_temperature print h # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## Play around with some different inference schemes