def compute_likelihood(self, data, update_post=True, **kwargs): """Use bayesian model averaging with `self.hypotheses` to estimate likelihood of generating the data. This is taken as a weighted sum over all hypotheses, sum_h { p(h | X) } . Args: data(list): List of FunctionData objects. Returns: float: Likelihood summed over all outputs, summed over all hypotheses & weighted for each hypothesis by posterior score p(h|X). """ self.update() hypotheses = self.hypotheses likelihood = 0.0 for d in data: posteriors = [sum(h.compute_posterior(d.input)) for h in hypotheses] Z = logsumexp(posteriors) weights = [(post-Z) for post in posteriors] for o in d.output.keys(): # probability for yes on output `o` is sum of posteriors for hypos that contain `o` p = logsumexp([w if o in h() else -Infinity for h, w in zip(hypotheses, weights)]) p = -1e-10 if p >= 0 else p k = d.output[o][0] # num. yes responses n = k + d.output[o][1] # num. trials bc = gammaln(n+1) - (gammaln(k+1) + gammaln(n-k+1)) # binomial coefficient likelihood += bc + (k*p) + (n-k)*log1mexp(p) # likelihood we got human output if update_post: self.likelihood = likelihood self.update_posterior() return likelihood
def compute_kl(current_dict, next_dict): current_Z = logsumexp([v for h, v in current_dict.iteritems()]) next_Z = logsumexp([v for h, v in next_dict.iteritems()]) kl = 0.0 for h, v in current_dict.iteritems(): p = np.exp(v - current_Z) if p == 0: continue kl += p * (v - next_dict[h] + next_Z - current_Z) return kl
def in_concept_avg(self, domain): """ p(y in C | `self.hypotheses`) for each hypothesis h, if y in C_h, accumulated w_h where w is the weight of a hypothesis, determined by the hypothesis's posterior score p(h | y) ==> This is the weighted bayesian model averaging described in (Murphy, 2007) """ self.update() probs_in_c = {} for y in domain: prob_in_c = 0 Z = logsumexp([h.posterior_score for h in self.hypotheses]) # for h in self.hypotheses: # h.set_value(h.value) # print self.hypotheses[0].prior, self.hypotheses[3].prior, self.hypotheses[5].prior for h in self.hypotheses: C = h() w = h.posterior_score - Z if y in C: prob_in_c += exp(w) probs_in_c[y] = prob_in_c return probs_in_c
def compute_single_likelihood(self, datum, llcounts, distance_factor=100.0): assert isinstance(datum.output, dict), "Data supplied must be a dict (function outputs to counts)" lo = sum(llcounts.values()) # normalizing constant # We are going to compute a pseudo-likelihood, counting close strings as being close return sum([datum.output[k]*logsumexp([log(llcounts[r])-log(lo) - distance_factor*distance(r, k) for r in llcounts.keys()]) for k in datum.output.keys()])
def compute_proposal_probability(self, grammar, t1, t2, resampleProbability=lambdaOne, recurse=True): # NOTE: This is not strictly necessary since we don't actually have to sum over trees # if we use an auxiliary variable argument. But this fits nicely with the other proposers # and is not much slower. chosen_node1 , chosen_node2 = least_common_difference(t1,t2) lps = [] if chosen_node1 is None: # any node in the tree could have been regenerated for node in t1: lp_of_choosing_node = t1.sampling_log_probability(node,resampleProbability=resampleProbability) with BVRuleContextManager(grammar, node.parent, recurse_up=True): lp_of_generating_tree = grammar.log_probability(node) lps += [lp_of_choosing_node + lp_of_generating_tree] else: # we have a specific path up the tree while chosen_node1: lp_of_choosing_node = t1.sampling_log_probability(chosen_node1,resampleProbability=resampleProbability) with BVRuleContextManager(grammar, chosen_node2.parent, recurse_up=True): lp_of_generating_tree = grammar.log_probability(chosen_node2) lps += [lp_of_choosing_node + lp_of_generating_tree] if recurse: chosen_node1 = chosen_node1.parent chosen_node2 = chosen_node2.parent else: chosen_node1 = None return logsumexp(lps)
def compute_weights(self): """ Here we compute weights defaultly and then add an extra penalty for unfilled holes to decide which to use next. Returning a tuple lets these weights get sorted by each successive element. This also exponentiates and re-normalizes the posterior among children, keeping it within [0,1] """ # Here what we call x_bar is really the mean log posterior. So we convert it out of that. es = [c.get_xbar() if c.nsteps > 0 else Infinity for c in self.children] Z = logsumexp(es) ## renormalize, for converting to logprob # We need to preserve -inf here as well as +inf since these mean something special # -inf means we should never ever visit; +inf means we can't not visit es = [ exp(x-Z) if abs(x) < Infinity else x for x in es] N = sum([c.nsteps for c in self.children]) # the weights we return weights = [None] * len(self.children) for i, c in enumerate(self.children): v = 0.0 # the adjustment if es[i] == Infinity: # so break the ties. # This must prevent us from wandering off to infinity. To do that, we impose a penalty for each nonterminal for fn in c.value.value: for a in fn.argStrings(): if self.grammar.is_nonterminal(a): v += self.hole_penalty.get(a, -1.0) # pay this much for this hole. -1 is for those weird nonterminals that need bv introduced weights[i] = (es[i] + self.C * sqrt(2.0 * log(N)/float(c.nsteps+1)) if c.nsteps > 0 else Infinity, v) return weights
def probe_MHsampler(h, language, options, name, size=64, data=None, init_size=None, iters_per_stage=None, sampler=None, ret_sampler=False): get_data = language.sample_data_as_FuncData evaluation_data = get_data(size, max_length=options.FINITE) if data is None: if init_size is None: data = evaluation_data else: data = get_data(n=size, max_length=init_size) if sampler is None: sampler = MHSampler(h, data) else: sampler.data = data best_hypotheses = TopN(N=options.TOP_COUNT) iter = 0 for h in sampler: if iter == options.STEPS: break if iter % 100 == 0: print '---->', iter best_hypotheses.add(h) if iter % options.PROBE == 0: for h in best_hypotheses: h.compute_posterior(evaluation_data) Z = logsumexp([h.posterior_score for h in best_hypotheses]) pr_data = get_data(1024, max_length=options.FINITE) weighted_score = 0 for h in best_hypotheses: precision, recall = language.estimate_precision_and_recall( h, pr_data) if precision + recall != 0: f_score = precision * recall / (precision + recall) weighted_score += np.exp(h.posterior_score - Z) * f_score weighted_score *= 2 to_file([[iter, Z, weighted_score]], name) if init_size is not None and iter % iters_per_stage == 0: init_size += 2 sampler.data = get_data(n=size, max_length=init_size) iter += 1 if ret_sampler: return sampler
def compute_proposal_probability(self,grammar, t1, t2, resampleProbability=lambdaOne, recurse=True): chosen_node1 , chosen_node2 = least_common_difference(t1,t2) lps = [] if chosen_node1 is None: # any node in the tree could have been copied for node in t1: could_be_source = lambda x: 1.0 * nodes_equal_except_parents(grammar,x,node) * resampleProbability(x) lp_of_choosing_source = (nicelog(t1.sample_node_normalizer(could_be_source) - could_be_source(node)) - nicelog(t1.sample_node_normalizer(resampleProbability))) lp_of_choosing_target = t1.sampling_log_probability(chosen_node1,resampleProbability=resampleProbability) lps += [lp_of_choosing_source + lp_of_choosing_target] else: # we have a specific path up the tree while chosen_node1: could_be_source = lambda x: 1.0 * nodes_equal_except_parents(grammar,x,chosen_node2) * resampleProbability(x) lp_of_choosing_source = nicelog(t1.sample_node_normalizer(could_be_source)) - nicelog(t1.sample_node_normalizer(resampleProbability)) lp_of_choosing_target = t1.sampling_log_probability(chosen_node1,resampleProbability=resampleProbability) lps += [lp_of_choosing_source + lp_of_choosing_target] if recurse: chosen_node1 = chosen_node1.parent chosen_node2 = chosen_node2.parent else: chosen_node1 = None return logsumexp(lps)
def compute_single_likelihood(self, datum): assert isinstance(datum.output, dict) hp = self(*datum.input) # output dictionary, output->probabilities assert isinstance(hp, dict) s = 0.0 for k, dc in datum.output.items(): if k in hp: s += dc * hp[k] elif len(hp.keys()) > 0: # probability fo each string under this editing model s += dc * logsumexp([ v + edit_likelihood(x, k, alphabet_size=self.alphabet_size, alpha=datum.alpha) for x, v in hp.items() ]) # the highest probability string; or we could logsumexp else: s += dc * edit_likelihood( '', k, alphabet_size=self.alphabet_size, alpha=datum.alpha) # This is the mixing {a,b}* noise model # lp = log(1.0-datum.alpha) - log(self.alphabet_size+1)*(len(k)+1) #the +1s here count the character marking the end of the string # if k in hp: # lp = logplusexp(lp, log(datum.alpha) + hp[k]) # if non-noise possible # s += dc*lp return s
def compute_single_likelihood_MPI(self, input_args): d_index, d, P = input_args posteriors = self.L[d_index] + P Z = logsumexp(posteriors) w = np.exp(posteriors - Z) # weights for each hypothesis r_i = np.transpose(self.R[d_index]) w_times_R = w * r_i likelihood = 0.0 # Compute likelihood of producing same output (yes/no) as data for q, r, m in d.get_queries(): # col `m` of boolean matrix `R[i]` weighted by `w` query_col = w_times_R[m, :] exp_p = query_col.sum() p = log(exp_p) ## p = log((np.exp(w) * self.R[d_index][:, m]).sum()) # NOTE: with really small grammars sometimes we get p > 0 if p >= 0: print 'P ERROR!' yes, no = r k = yes # num. yes responses n = yes + no # num. trials bc = gammaln(n+1) - (gammaln(k+1) + gammaln(n-k+1)) # binomial coefficient l1mp = log1mexp(p) likelihood += bc + (k*p) + (n-k)*l1mp # likelihood we got human output
def runTest(self): NSAMPLES = 10000 from LOTlib.DefaultGrammars import finiteTestGrammar as grammar from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis class MyH(LOTHypothesis): @attrmem('likelihood') def compute_likelihood(self, *args, **kwargs): return 0.0 @attrmem('prior') def compute_prior(self): return grammar.log_probability(self.value) print "# Taking MHSampler for a test run" cnt = Counter() h0 = MyH(grammar=grammar) for h in break_ctrlc( MHSampler(h0, [], steps=NSAMPLES, skip=10)): # huh the skip here seems to be important cnt[h] += 1 trees = list(cnt.keys()) print "# Done taking MHSampler for a test run" ## TODO: When the MCMC methods get cleaned up for how many samples they return, we will assert that we got the right number here # assert sum(cnt.values()) == NSAMPLES # Just make sure we aren't using a sampler that returns fewer samples! I'm looking at you, ParallelTempering Z = logsumexp([grammar.log_probability(t.value) for t in trees ]) # renormalize to the trees in self.trees obsc = [cnt[t] for t in trees] expc = [ exp(grammar.log_probability(t.value)) * sum(obsc) for t in trees ] # And plot here expc, obsc, trees = zip(*sorted(zip(expc, obsc, trees), reverse=True)) import matplotlib.pyplot as plt plt.subplot(111) # Log here spaces things out at the high end, where we can see it! plt.scatter(log(range(len(trees))), expc, color="red", alpha=1.) plt.scatter(log(range(len(trees))), obsc, color="blue", marker="x", alpha=1.) plt.savefig('finite-sampler-test.pdf') plt.clf() # Do chi squared test csq, pv = chisquare(obsc, expc) self.assertAlmostEqual(sum(obsc), sum(expc)) # And examine for t, c, s in zip(trees, obsc, expc): print c, s, t print(csq, pv), sum(obsc) self.assertGreater(pv, 0.01, msg="Sampler failed chi squared!")
def plot_sampler(self, opath, sampler): """ Plot the sampler, for cases with many zeros where chisquared won't work well """ cnt = Counter() for h in lot_iter(sampler): cnt[h.value] += 1 Z = logsumexp([t.log_probability() for t in self.trees]) # renormalize to the trees in self.trees obsc = [cnt[t] for t in self.trees] expc = [exp(t.log_probability()-Z)*sum(obsc) for t in self.trees] for t, c, s in zip(self.trees, obsc, expc): print c, "\t", s, "\t", t expc, obsc, trees = zip(*sorted(zip(expc, obsc, self.trees), reverse=True)) import matplotlib.pyplot as plt from numpy import log plt.subplot(111) # Log here spaces things out at the high end, where we can see it! plt.scatter(log(range(len(trees))), expc, color="red", alpha=1.) plt.scatter(log(range(len(trees))), obsc, color="blue", marker="x", alpha=1.) plt.savefig(opath) plt.clf()
def probe(best_hypotheses, evaluation_data, pr_data, estimate_precision_and_recall): for h in best_hypotheses: h.compute_posterior(evaluation_data) Z = logsumexp([h.posterior_score for h in best_hypotheses]) score_sum = 0 best = 0 s = None rec = [] for h in best_hypotheses: precision, recall = estimate_precision_and_recall(h, pr_data) base = precision + recall if base != 0: p = np.exp(h.posterior_score - Z) weighted_score = p * (precision * recall / base) if weighted_score > best: best = weighted_score s = str(h) score_sum += weighted_score if p > 1e-2: rec.append([p, 2 * precision * recall / base]) score_sum *= 2 rec.sort(key=lambda x: x[0], reverse=True) return Z, score_sum, best*2, s, rec
def test_hypo_stat(): """ objective: test how does those high prob hypotheses look like run: mpiexec -n 12 """ seq = load(open('seq_' + str(rank) + '')) cnt = 0 for e in seq: Z = logsumexp([p for h, p in e.iteritems()]) e_list = [[h, p] for h, p in e.iteritems()] e_list.sort(key=lambda x: x[1], reverse=True) f = open('hypo_stat_' + str(rank) + suffix, 'a') print >> f, '=' * 40 for iii in xrange(4): print >> f, 'rank: %i' % rank, 'prob', np.exp(e_list[iii][1] - Z) print >> f, Counter([e_list[iii][0]() for _ in xrange(512)]) print >> f, str(e_list[iii][0]) print cnt, 'done' cnt += 1 f.close()
def test_lis_disp(names): ll = [load(open(name)) for name in names] for li in ll: print '='*50 Z = logsumexp([h[0] for h in li]) for i in xrange(3): print 'p ', np.exp(li[i][0] -Z), 'x_f-score ', li[i][1], 'axb_f-score', li[i][2] print li[i][4]
def run(*args): #print "# Running data" global hypotheses data_size = args[0] p_representation = defaultdict(int) # how often do you get the right representation p_response = defaultdict(int) # how often do you get the right response? p_representation_literal = defaultdict(int) # how often do you get the right representation p_response_literal = defaultdict(int) # how often do you get the right response? p_representation_presup = defaultdict(int) # how often do you get the right representation p_response_presup = defaultdict(int) # how often do you get the right response? #print "# Generating data" data = generate_data(data_size) # recompute these #print "# Computing posterior" #[ x.unclear_functions() for x in hypotheses ] [ x.compute_posterior(data) for x in hypotheses ] # normalize the posterior in fs #print "# Computing normalizer" Z = logsumexp([x.posterior_score for x in hypotheses]) # and output the top hypotheses qq = FiniteBestSet(max=True, N=25) for h in hypotheses: qq.push(h, h.posterior_score) # get the tops for i, h in enumerate(qq.get_all(sorted=True)): for w in h.all_words(): fprintn(8, data_size, i, w, h.posterior_score, q(h.value[w]), f=options.OUT_PATH+"-hypotheses."+str(get_rank())+".txt") # and compute the probability of being correct #print "# Computing correct probability" for h in hypotheses: hstr = str(h) #print data_size, len(data), exp(h.posterior_score), correct[ str(h)+":"+w ] for w in words: p = exp(h.posterior_score - Z) key = w + ":" + hstr p_representation[w] += p * (agree_pct[key] == 1.) p_representation_presup[w] += p * (agree_pct_presup[key] == 1.) # if we always agree with the target, then we count as the right rep. p_representation_literal[w] += p * (agree_pct_literal[key] == 1.) # and just how often does the hypothesis agree? p_response[w] += p * agree_pct[key] p_response_presup[w] += p * agree_pct_presup[key] p_response_literal[w] += p * agree_pct_literal[key] #print "# Outputting" for w in words: fprintn(10, str(get_rank()), q(w), data_size, p_representation[w], p_representation_presup[w], p_representation_literal[w], p_response[w], p_response_presup[w], p_response_literal[w], f=options.OUT_PATH+"-stats."+str(get_rank())+".txt") return 0
def most_prob(suffix): topn = load(open('out/SimpleEnglish/hypotheses__' + suffix)) Z = logsumexp([h.posterior_score for h in topn]) h_set = [h for h in topn]; h_set.sort(key=lambda x: x.posterior_score, reverse=True) for i in xrange(10): print h_set[i] print 'prob`: ', np.exp(h_set[i].posterior_score - Z) print Counter([h_set[i]() for _ in xrange(512)])
def prob_correct(data_size, hypotheses, agree_pct, agree_pct_presup, agree_pct_literal): p_representation = defaultdict( int) # how often do you get the right representation p_response = defaultdict(int) # how often do you get the right response? p_representation_literal = defaultdict( int) # how often do you get the right representation p_response_literal = defaultdict( int) # how often do you get the right response? p_representation_presup = defaultdict( int) # how often do you get the right representation p_response_presup = defaultdict( int) # how often do you get the right response? weight = 1. / EVAL_NUM for _ in range(EVAL_NUM): data = generate_data(data_size) # recompute posterior print 'Compute posterior for ', str(data_size) [x.compute_posterior(data) for x in hypotheses] # normalize the posterior in fs Z = logsumexp([x.posterior_score for x in hypotheses]) words = hypotheses.best().all_words() # and compute the probability of being correct for h in hypotheses: hstr = str(h) for w in words: p = np.exp(h.posterior_score - Z) key = w + ":" + hstr p_representation[w] += weight * p * (agree_pct[key] == 1.) p_representation_presup[w] += weight * p * ( agree_pct_presup[key] == 1. ) # if we always agree with the target, then we count as the right rep. p_representation_literal[w] += weight * p * ( agree_pct_literal[key] == 1.) # and just how often does the hypothesis agree? p_response[w] += weight * p * agree_pct[key] p_response_presup[w] += weight * p * agree_pct_presup[key] p_response_literal[w] += weight * p * agree_pct_literal[key] filename = 'results/correctness_' + GRAMMAR_TYPE + '_' + str( SAMPLE_SIZE) + '.txt' f = open(filename, 'a') for w in words: col = [ w, str(data_size), str(p_representation[w]), str(p_representation_presup[w]), str(p_representation_literal[w]), str(p_response[w]), str(p_response_presup[w]), str(p_response_literal[w]) ] f.write(','.join(col) + '\n')
def test_lis_disp(names): ll = [load(open(name)) for name in names] for li in ll: print '=' * 50 Z = logsumexp([h[0] for h in li]) for i in xrange(3): print 'p ', np.exp( li[i][0] - Z), 'x_f-score ', li[i][1], 'axb_f-score', li[i][2] print li[i][4]
def most_prob(suffix): topn = load(open('out/SimpleEnglish/hypotheses__' + suffix)) Z = logsumexp([h.posterior_score for h in topn]) h_set = [h for h in topn] h_set.sort(key=lambda x: x.posterior_score, reverse=True) for i in xrange(10): print h_set[i] print 'prob`: ', np.exp(h_set[i].posterior_score - Z) print Counter([h_set[i]() for _ in xrange(512)])
def compute_single_likelihood(self, datum): assert isinstance(datum.output, dict), "Data supplied must be a dict (function outputs to counts)" llcounts = self.make_ll_counts(datum.input) lo = sum(llcounts.values()) ll = 0.0 # We are going to compute a pseudo-likelihood, counting close strings as being close for k in datum.output.keys(): ll += datum.output[k] * logsumexp([ log(llcounts[r])-log(lo) - 100.0 * distance(r, k) for r in llcounts.keys() ]) return ll
def compute_likelihood(self, data, **kwargs): self.update() hypotheses = self.hypotheses likelihood = 0.0 for d in data: posteriors = [h.compute_posterior(d.input)[0] + h.compute_posterior(d.input)[1] for h in hypotheses] zo = logsumexp(posteriors) weights = [(post - zo) for post in posteriors] for o in d.output.keys(): # probability for yes on output `o` is sum of posteriors for hypos that contain `o` p = logsumexp( [w if o.Y in h(o.word, o.context, set([o.Y])) else -Infinity for h, w in zip(hypotheses, weights)]) p = -1e-10 if p >= 0 else p k = d.output[o][0] # num. yes responses n = k + d.output[o][1] # num. trials bc = gammaln(n + 1) - (gammaln(k + 1) + gammaln(n - k + 1)) # binomial coefficient likelihood += bc + (k * p) + (n - k) * log1mexp(p) # likelihood we got human output return likelihood
def compute_proposal_probability(self,grammar, t1, t2, resampleProbability=lambdaOne, **kwargs): """ sum over all possible ways of generating t2 from t1 over all proposers, adjusted for their weight """ lps = [] for idx,proposer in enumerate(self.proposers): lp = proposer.compute_proposal_probability(grammar,t1,t2, resampleProbability=resampleProbability, **kwargs) lw = nicelog(self.proposer_weights[idx]) lps += [lw+lp] return logsumexp(lps)
def runTest(self): NSAMPLES = 10000 from LOTlib.DefaultGrammars import finiteTestGrammar as grammar from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis class MyH(LOTHypothesis): @attrmem('likelihood') def compute_likelihood(self, *args, **kwargs): return 0.0 @attrmem('prior') def compute_prior(self): return grammar.log_probability(self.value) print "# Taking MHSampler for a test run" cnt = Counter() h0 = MyH(grammar=grammar) for h in break_ctrlc(MHSampler(h0, [], steps=NSAMPLES, skip=10)): # huh the skip here seems to be important cnt[h] += 1 trees = list(cnt.keys()) print "# Done taking MHSampler for a test run" ## TODO: When the MCMC methods get cleaned up for how many samples they return, we will assert that we got the right number here # assert sum(cnt.values()) == NSAMPLES # Just make sure we aren't using a sampler that returns fewer samples! I'm looking at you, ParallelTempering Z = logsumexp([grammar.log_probability(t.value) for t in trees]) # renormalize to the trees in self.trees obsc = [cnt[t] for t in trees] expc = [exp( grammar.log_probability(t.value))*sum(obsc) for t in trees] # And plot here expc, obsc, trees = zip(*sorted(zip(expc, obsc, trees), reverse=True)) import matplotlib.pyplot as plt plt.subplot(111) # Log here spaces things out at the high end, where we can see it! plt.scatter(log(range(len(trees))), expc, color="red", alpha=1.) plt.scatter(log(range(len(trees))), obsc, color="blue", marker="x", alpha=1.) plt.savefig('finite-sampler-test.pdf') plt.clf() # Do chi squared test csq, pv = chisquare(obsc, expc) self.assertAlmostEqual(sum(obsc), sum(expc)) # And examine for t, c, s in zip(trees, obsc, expc): print c, s, t print (csq, pv), sum(obsc) self.assertGreater(pv, 0.01, msg="Sampler failed chi squared!")
def probe_MHsampler(h, language, options, name, size=64, data=None, init_size=None, iters_per_stage=None, sampler=None, ret_sampler=False): get_data = language.sample_data_as_FuncData evaluation_data = get_data(size, max_length=options.FINITE) if data is None: if init_size is None: data = evaluation_data else: data = get_data(n=size, max_length=init_size) if sampler is None: sampler = MHSampler(h, data) else: sampler.data = data best_hypotheses = TopN(N=options.TOP_COUNT) iter = 0 for h in sampler: if iter == options.STEPS: break if iter % 100 == 0: print '---->', iter best_hypotheses.add(h) if iter % options.PROBE == 0: for h in best_hypotheses: h.compute_posterior(evaluation_data) Z = logsumexp([h.posterior_score for h in best_hypotheses]) pr_data = get_data(1024, max_length=options.FINITE) weighted_score = 0 for h in best_hypotheses: precision, recall = language.estimate_precision_and_recall(h, pr_data) if precision + recall != 0: f_score = precision * recall / (precision + recall) weighted_score += np.exp(h.posterior_score - Z) * f_score weighted_score *= 2 to_file([[iter, Z, weighted_score]], name) if init_size is not None and iter % iters_per_stage == 0: init_size += 2 sampler.data = get_data(n=size, max_length=init_size) iter += 1 if ret_sampler: return sampler
def evaluate_sampler(my_sampler, print_every=1000, out_aggregate=sys.stdout, trace=False, pthreshold=0.999, prefix=""): """ Print the stats for a single sampler run *my_sampler* -- a generator of samples print_every -- display the output every this many steps out_hypothesis -- where we put hypothesis stats out_aggregate -- where we put aggregate stats trace -- print every sample prefix -- display before lines """ visited_at = defaultdict(list) startt = time() for n, s in break_ctrlc(enumerate(my_sampler)): # each sample should have an .posterior_score defined if trace: print "#", n, s visited_at[s].append(n) if (n%print_every)==0 and n>0: post = sorted([x.posterior_score for x in visited_at.keys()], reverse=True) # the unnormalized posteriors of everything found ll = sorted([x.likelihood for x in visited_at.keys()], reverse=True) Z = logsumexp(post) # just compute total probability mass found -- the main measure # determine how many you need to get pthreshold of the posterior mass J=0 while J < len(post): if logsumexp(post[J:]) < Z + log(1.0-pthreshold): break J += 1 out_aggregate.write('\t'.join(map(str, [prefix, n, r3(time()-startt), r5(Z), r5(post[0]), J, len(post)] )) + '\n') out_aggregate.flush() return
def run(data, TOP=100, STEPS=1000): #if LOTlib.SIG_INTERRUPTED: # return "" #data = [FunctionData(input=(), output={lst: len(lst)})] h0 = MyHypothesis() tn = TopN(N=TOP) # run the sampler counter = Counter() for h in MHSampler(h0, data, steps=STEPS, acceptance_temperature=1.0, likelihood_temperature=1.0):#, likelihood_temperature=10.0): # counter[h] += 1 tn.add(h) z = logsumexp([h.posterior_score for h in tn]) sort_post_probs = [(h, exp(h.posterior_score - z)) for h in tn.get_all(sorted=True)][::-1] return sort_post_probs
def compute_single_likelihood(self, datum): distance_scale = self.__dict__.get('distance', 1.0) assert isinstance(datum.output, dict) hp = self(*datum.input) # output dictionary, output->probabilities assert isinstance(hp, dict) try: # now we have to add up every string that we could get return sum(dc * ( logsumexp([rlp - distance_scale*prefix_distance(r, k) for r, rlp in hp.items()]))\ for k, dc in datum.output.items()) except ValueError as e: print "*** Math domain error", hp, str(self) raise e
def compute_single_likelihood(self, datum, llcounts, distance_factor=100.0): assert isinstance( datum.output, dict), "Data supplied must be a dict (function outputs to counts)" lo = sum(llcounts.values()) # normalizing constant # We are going to compute a pseudo-likelihood, counting close strings as being close return sum([ datum.output[k] * logsumexp([ log(llcounts[r]) - log(lo) - distance_factor * distance(r, k) for r in llcounts.keys() ]) for k in datum.output.keys() ])
def compute_weights(self): """ Here we compute weights defaultly and then add an extra penalty for unfilled holes to decide which to use next. Returning a tuple lets these weights get sorted by each successive element. This also exponentiates and re-normalizes the posterior among children, keeping it within [0,1] """ # Here what we call x_bar is really the mean log posterior. So we convert it out of that. es = [ c.get_xbar() if c.nsteps > 0 else Infinity for c in self.children ] Z = logsumexp(es) ## renormalize, for converting to logprob # We need to preserve -inf here as well as +inf since these mean something special # -inf means we should never ever visit; +inf means we can't not visit es = [exp(x - Z) if abs(x) < Infinity else x for x in es] N = sum([c.nsteps for c in self.children]) # the weights we return weights = [None] * len(self.children) for i, c in enumerate(self.children): v = 0.0 # the adjustment if es[i] == Infinity: # so break the ties. # This must prevent us from wandering off to infinity. To do that, we impose a penalty for each nonterminal for fn in c.value.value: for a in fn.argStrings(): if self.grammar.is_nonterminal(a): v += self.hole_penalty.get( a, -1.0 ) # pay this much for this hole. -1 is for those weird nonterminals that need bv introduced weights[i] = (es[i] + self.C * sqrt(2.0 * log(N) / float(c.nsteps + 1)) if c.nsteps > 0 else Infinity, v) return weights
def compute_proposal_probability(self, grammar, t1, t2, resampleProbability=lambdaOne, **kwargs): """ sum over all possible ways of generating t2 from t1 over all proposers, adjusted for their weight """ lps = [] for idx, proposer in enumerate(self.proposers): lp = proposer.compute_proposal_probability( grammar, t1, t2, resampleProbability=resampleProbability, **kwargs) lw = nicelog(self.proposer_weights[idx]) lps += [lw + lp] return logsumexp(lps)
def compute_likelihood(self, data, update_post=True, **kwargs): """ Compute the likelihood of producing human data, given: H (self.hypotheses) & x (self.value) """ # The following must be computed for this specific GrammarHypothesis # ------------------------------------------------------------------ x = self.normalized_value() # vector of rule probabilites P = np.dot(self.C, x) # prior for each hypothesis likelihood = 0.0 for d_key, d in enumerate(data): # Initialize unfilled values for L[data] & R[data] if d_key not in self.L: self.init_L(d, d_key) if d_key not in self.R: self.init_R(d, d_key) posteriors = self.L[d_key] + P Z = logsumexp(posteriors) w = posteriors - Z # weights for each hypothesis # Compute likelihood of producing same output (yes/no) as data for m, o in enumerate(d.output.keys()): # col `m` of boolean matrix `R[i]` weighted by `w` p = log((np.exp(w) * self.R[d_key][:, m]).sum()) # NOTE: with really small grammars sometimes we get p > 0 if p >= 0: print "P ERROR!" k = d.output[o][0] # num. yes responses n = k + d.output[o][1] # num. trials bc = gammaln(n + 1) - (gammaln(k + 1) + gammaln(n - k + 1)) # binomial coefficient likelihood += bc + (k * p) + (n - k) * log1mexp(p) # likelihood we got human output if update_post: self.likelihood = likelihood self.update_posterior() return likelihood
def compute_proposal_probability(self, grammar, t1, t2, resampleProbability=lambdaOne, recurse=True): chosen_node1, chosen_node2 = least_common_difference(t1, t2) lps = [] if chosen_node1 is None: # any node in the tree could have been copied for node in t1: could_be_source = lambda x: 1.0 * nodes_equal_except_parents( grammar, x, node) * resampleProbability(x) lp_of_choosing_source = ( nicelog( t1.sample_node_normalizer(could_be_source) - could_be_source(node)) - nicelog(t1.sample_node_normalizer(resampleProbability))) lp_of_choosing_target = t1.sampling_log_probability( chosen_node1, resampleProbability=resampleProbability) lps += [lp_of_choosing_source + lp_of_choosing_target] else: # we have a specific path up the tree while chosen_node1: could_be_source = lambda x: 1.0 * nodes_equal_except_parents( grammar, x, chosen_node2) * resampleProbability(x) lp_of_choosing_source = nicelog( t1.sample_node_normalizer(could_be_source)) - nicelog( t1.sample_node_normalizer(resampleProbability)) lp_of_choosing_target = t1.sampling_log_probability( chosen_node1, resampleProbability=resampleProbability) lps += [lp_of_choosing_source + lp_of_choosing_target] if recurse: chosen_node1 = chosen_node1.parent chosen_node2 = chosen_node2.parent else: chosen_node1 = None return logsumexp(lps)
def compute_single_likelihood(self, datum): assert isinstance(datum.output, dict) hp = self(*datum.input) # output dictionary, output->probabilities assert isinstance(hp, dict) s = 0.0 for k, dc in datum.output.items(): if k in hp: s += dc * hp[k] elif len(hp.keys()) > 0: # probability fo each string under this editing model s += dc * logsumexp([ v + edit_likelihood(x, k, alphabet_size=self.alphabet_size, alpha=datum.alpha) for x, v in hp.items() ]) # the highest probability string; or we could logsumexp else: s += dc * edit_likelihood('', k, alphabet_size=self.alphabet_size, alpha=datum.alpha) # This is the mixing {a,b}* noise model # lp = log(1.0-datum.alpha) - log(self.alphabet_size+1)*(len(k)+1) #the +1s here count the character marking the end of the string # if k in hp: # lp = logplusexp(lp, log(datum.alpha) + hp[k]) # if non-noise possible # s += dc*lp return s
def weighted_sample(self, n, strings, probs): length = len(probs) prob_sum = logsumexp(probs) cumu_prob = np.zeros(length, dtype=np.float64) mass = 0 for i in xrange(length): mass += np.exp(probs[i] - prob_sum) cumu_prob[i] = mass output = [] for _ in xrange(n): rand = np.random.rand() for i in xrange(length): if rand < cumu_prob[i]: output.append(strings[i]) break return output
def test_hypo_stat(): """ objective: test how does those high prob hypotheses look like run: mpiexec -n 12 """ seq = load(open('seq_'+str(rank)+'')) cnt = 0 for e in seq: Z = logsumexp([p for h, p in e.iteritems()]) e_list = [[h, p] for h, p in e.iteritems()]; e_list.sort(key=lambda x:x[1], reverse=True) f = open('hypo_stat_'+str(rank)+suffix, 'a') print >> f, '='*40 for iii in xrange(4): print >> f, 'rank: %i' % rank, 'prob', np.exp(e_list[iii][1] - Z) print >> f, Counter([e_list[iii][0]() for _ in xrange(512)]) print >> f, str(e_list[iii][0]) print cnt, 'done'; cnt += 1 f.close()
def csv_compare_model_human(self, data, filename): """ Save csv stuff for making the regression plot. Format is list of input/outputs, with human & model probabilities for each. Note ---- This is specific to NumberGameHypothesis (because of 'o in h()') """ import math import csv self.update() for h in self.hypotheses: h.compute_prior() h.update_posterior() with open(filename, "a") as f: writer = csv.writer(f) hypotheses = self.hypotheses writer.writerow(["input", "output", "human p", "model p"]) i = 0 for d in data: posteriors = [sum(h.compute_posterior(d.input)) for h in hypotheses] Z = logsumexp(posteriors) weights = [(post - Z) for post in posteriors] print i, "\t|\t", d.input i += 1 for o in d.output.keys(): # Probability for yes on output `o` is sum of posteriors for hypos that contain `o` p_human = float(d.output[o][0]) / float(d.output[o][0] + d.output[o][1]) p_model = sum([math.exp(w) if o in h() else 0 for h, w in zip(hypotheses, weights)]) writer.writerow([d.input, o, p_human, p_model])
def evaluate_sampler(self, sampler): cnt = Counter() for h in lot_iter(sampler): cnt[h.value] += 1 ## TODO: When the MCMC methods get cleaned up for how many samples they return, we will assert that we got the right number here # assert sum(cnt.values()) == NSAMPLES # Just make sure we aren't using a sampler that returns fewer samples! I'm looking at you, ParallelTempering Z = logsumexp([t.log_probability() for t in self.trees]) # renormalize to the trees in self.trees obsc = [cnt[t] for t in self.trees] expc = [exp(t.log_probability()-Z)*sum(obsc) for t in self.trees] csq, pv = chisquare(obsc, expc) assert abs(sum(obsc) - sum(expc)) < 0.01 # assert min(expc) > 5 # or else chisq sux for t, c, s in zip(self.trees, obsc, expc): print c, s, t print (csq, pv), sum(obsc) self.assertGreater(pv, PVALUE, msg="Sampler failed chi squared!") return csq, pv
def compute_likelihood(self, data): ll = 0.0 for cl in self.concept2hypotheses.keys(): # for each concept and list if cl not in concept2data: print "# Warning, %s not in concept2data."%cl continue d = concept2data[cl] for si in xrange(len(d)): # for each prefix of the data hypotheses = self.concept2hypotheses[cl] assert len(hypotheses) > 0 # update the posteriors for this amount of data for h in hypotheses: h.compute_posterior(d[:si]) # up to but not including this set # get their normalizer Z = logsumexp([h.posterior_score for h in hypotheses]) nxtd = d[si] pred = [0.0] * len(nxtd.input) # how we respond to each # compute the predictive for h in hypotheses: p = exp(h.posterior_score-Z) for i, ri in enumerate(h.evaluate_on_set(nxtd.input)): pred[i] += p*((ri==True)*h.alpha + (1.0-h.alpha)*h.baserate) for ri, pi in enumerate(pred): key = tuple([cl, si, ri]) # assert key in human_yes and key in human_no, "No key " + key # Does not have to be there because there can be zero counts # print pi ll += human_yes[key]*log(pi) + human_no[key]*log(1.-pi) return ll
def evaluate_sampler(my_sampler, print_every=1000, out_hypotheses=sys.stdout, out_aggregate=sys.stdout, trace=False, prefix=""): """ Print the stats for a single sampler run *my_sampler* -- a generator of samples print_every -- display the output every this many steps out_hypothesis -- where we put hypothesis stats out_aggregate -- where we put aggregate stats trace -- print every sample prefix -- display before lines """ visited_at = defaultdict(list) startt = time() for n, s in lot_iter(enumerate(my_sampler)): # each sample should have an .posterior_score defined if trace: print "#", n, s visited_at[s].append(n) if (n%print_every)==0 and n>0: post = sorted([x.posterior_score for x in visited_at.keys()], reverse=True) # the unnormalized posteriors of everything found ll = sorted([x.likelihood for x in visited_at.keys()], reverse=True) Z = logsumexp(post) # just compute total probability mass found -- the main measure out_aggregate.write('\t'.join(map(str, [prefix, n, r3(time()-startt), r5(Z), len(post)]+mydisplay(post))) + '\n') # Now once we're done, output the hypothesis stats for k,v in visited_at.items(): mean_diff = "NA" if len(v) > 1: mean_diff = mean(diff(v)) out_hypotheses.write('\t'.join(map(str, [prefix, k.posterior_score, k.prior, k.likelihood, len(v), min(v), max(v), mean_diff, sum(diff(v)==0) ])) +'\n') # number of rejects from this return 0.0
def run(*args): #print "# Running data" global hypotheses data_size = args[0] p_representation = defaultdict( int) # how often do you get the right representation p_response = defaultdict(int) # how often do you get the right response? p_representation_literal = defaultdict( int) # how often do you get the right representation p_response_literal = defaultdict( int) # how often do you get the right response? p_representation_presup = defaultdict( int) # how often do you get the right representation p_response_presup = defaultdict( int) # how often do you get the right response? #print "# Generating data" data = generate_data(data_size) # recompute these #print "# Computing posterior" #[ x.unclear_functions() for x in hypotheses ] [x.compute_posterior(data) for x in hypotheses] # normalize the posterior in fs #print "# Computing normalizer" Z = logsumexp([x.posterior_score for x in hypotheses]) # and output the top hypotheses qq = FiniteBestSet(max=True, N=25) for h in hypotheses: qq.push(h, h.posterior_score) # get the tops for i, h in enumerate(qq.get_all(sorted=True)): for w in h.all_words(): fprintn(8, data_size, i, w, h.posterior_score, q(h.value[w]), f=options.OUT_PATH + "-hypotheses." + str(get_rank()) + ".txt") # and compute the probability of being correct #print "# Computing correct probability" for h in hypotheses: hstr = str(h) #print data_size, len(data), exp(h.posterior_score), correct[ str(h)+":"+w ] for w in words: p = exp(h.posterior_score - Z) key = w + ":" + hstr p_representation[w] += p * (agree_pct[key] == 1.) p_representation_presup[w] += p * ( agree_pct_presup[key] == 1. ) # if we always agree with the target, then we count as the right rep. p_representation_literal[w] += p * (agree_pct_literal[key] == 1.) # and just how often does the hypothesis agree? p_response[w] += p * agree_pct[key] p_response_presup[w] += p * agree_pct_presup[key] p_response_literal[w] += p * agree_pct_literal[key] #print "# Outputting" for w in words: fprintn(10, str(get_rank()), q(w), data_size, p_representation[w], p_representation_presup[w], p_representation_literal[w], p_response[w], p_response_presup[w], p_response_literal[w], f=options.OUT_PATH + "-stats." + str(get_rank()) + ".txt") return 0
def parse_cube(lang_name, finite): """ reads hypotheses of lang_name, estimates the p/r and posterior score, and saves them into a cube (list of tables) data structure: stats = [cube, topn] cube = [[size, Z, table], [size, Z, table], ...] table = [[ind, score, p, r, f, strs], [ind, score, p, r, f, strs], ...] NOTE: topn here is a dict, you can use ind to find the h example script: mpiexec -n 12 python parse_hypothesis.py --mode=parse_cube --language=An --finite=3/10 """ _dir = 'out/' global size global rank topn = dict() prf_dict = {} language = instance(lang_name, finite) if rank == 0: truncate_flag = False if (lang_name == 'An' and finite <= 3) else True set_topn = set() print 'loading..' fff() for file_name in listdir(_dir): if lang_name + '_' in file_name: _set = load(open(_dir + file_name)) set_topn.update([h for h in _set]) print 'getting p&r..' fff() pr_data = language.sample_data_as_FuncData(2048) for h in set_topn: p, r, h_llcounts = language.estimate_precision_and_recall( h, pr_data, truncate=truncate_flag) prf_dict[h] = [p, r, 0 if p + r == 0 else 2 * p * r / (p + r)] h.fixed_ll_counts = h_llcounts topn = dict(enumerate(set_topn)) print 'bcasting..' fff() topn = comm.bcast(topn, root=0) prf_dict = comm.bcast(prf_dict, root=0) print rank, 'getting posterior' fff() # work_list = slice_list(np.arange(0, 72, 6), size) work_list = slice_list(np.arange(120, 264, 12), size) cube = [] for s in work_list[rank]: eval_data = language.sample_data_as_FuncData(s) for ind, h in topn.iteritems(): h.likelihood_temperature = 100 h.compute_posterior(eval_data) Z = logsumexp([h.posterior_score for ind, h in topn.iteritems()]) table = [[ ind, h.posterior_score, prf_dict[h][0], prf_dict[h][1], prf_dict[h][2], h.fixed_ll_counts ] for ind, h in topn.iteritems()] table.sort(key=lambda x: x[1], reverse=True) cube += [[s, Z, table]] print rank, s, 'done' fff() if rank == 0: for i in xrange(1, size): cube += comm.recv(source=i) else: comm.send(cube, dest=0) print rank, 'table sent' fff() sys.exit(0) cube.sort(key=lambda x: x[0]) dump([cube, topn], open(lang_name + '_stats' + suffix, 'w'))
def parse_nonadjacent(_dir, temperature): """ 1. read raw hypos 2. get fixed llcnts 3. compute posterior given different data pool sizes NOTE: if _dir is previously dumped topn then load it """ if 'nonadjacent_topn' not in _dir: topn = set() for filename in os.listdir(_dir): if 'nonadjacent' in filename and 'seq' not in filename: print 'load', filename _set = load(open(_dir + filename)) topn.update([h for h in _set]) topn = list(topn) # fix the llcnts to save time and make curve smooth print 'get llcnts...' topn = gen_fixlen_llcnts(topn, 5) dump(topn, open(_dir + '_nonadjacent_topn' + suffix, 'w')) else: print 'load', _dir topn = load(open(_dir)) # find all correct hypotheses topn = list(topn) correct_set = set() for i in xrange(len(topn)): flag = True for k, v in topn[i].fixed_ll_counts.iteritems(): if len(k) < 2: continue elif k[0] == 'a' and k[-1] in 'b': continue elif k[0] == 'c' and k[-1] in 'bd': continue elif k[0] == 'e' and k[-1] in 'bdf': continue flag = False break if flag: correct_set.add(i) print len(correct_set), 'of', len(topn), 'are correct' # get posterior w_list = range(2, 25, 1) amount_list = range(24, 144, 5) posterior_seq = [] for i in xrange(len(w_list)): pool_size = w_list[i] language = LongDependency(max_length=pool_size) eval_data = [ FunctionData(input=[], output={ e: float(amount_list[i]) / pool_size for e in language.str_sets }) ] for h in topn: h.likelihood_temperature = temperature h.compute_posterior(eval_data) Z = logsumexp([h.posterior_score for h in topn]) prob = 0 for i in xrange(len(topn)): if i in correct_set: prob += np.exp(topn[i].posterior_score - Z) print 'pool_size', pool_size, 'prob', prob posterior_seq.append([pool_size, prob]) #debug _list = [h for h in topn] _list.sort(key=lambda x: x.posterior_score, reverse=True) for i in xrange(3): print 'prob: ', np.exp(_list[i].posterior_score - Z), print h.fixed_ll_counts print _list[i] print '=' * 50 fff() dump(posterior_seq, open('nonadjacent_posterior_seq' + suffix, 'w'))
def dis_pos(jump, is_plot, file_name, axb_bound, x_bound): """ 1. read posterior sequence 2. set bound for axb and x hypotheses 3. plot run: serial """ # space_seq, pr_dict = load(open('non_seq%i_' % i + file_name)) print 'loading..' fff() _set = [load(open('non_seq%i_' % i + file_name)) for i in xrange(4)] print 'avging..' fff() avg_space_seq, avg_pr_dict = _set.pop(0) for space_seq, pr_dict in _set: for i in xrange(len(space_seq)): prob_dict, ada_dict = space_seq[i] avg_prob_dict, avg_ada_dict = avg_space_seq[i] for h in prob_dict: avg_prob_dict[h] = logsumexp([avg_prob_dict[h], prob_dict[h]]) avg_ada_dict[h] += ada_dict[h] for h in pr_dict: avg_pr_dict[h] += pr_dict[h] for prob_dict, ada_dict in avg_space_seq: for h in prob_dict: prob_dict[h] -= 4 ada_dict[h] /= 4 for h in avg_pr_dict: avg_pr_dict[h] /= 4 for axb_bound in np.arange(0.1, 1, 0.1): for x_bound in np.arange(0.1, 1, 0.1): seq = [] seq1 = [] seq2 = [] for seen in avg_space_seq: Z = logsumexp([p for h, p in seen[0].iteritems()]) axb_prob = -Infinity x_prob = -Infinity for h, v in seen[0].iteritems(): if avg_pr_dict[h] > axb_bound: axb_prob = logsumexp([axb_prob, v]) if seen[1][h] > x_bound: x_prob = logsumexp([x_prob, v]) seq.append(np.exp(axb_prob - Z)) seq1.append(np.exp(x_prob - Z)) seq2.append(np.exp(axb_prob - Z) - np.exp(x_prob - Z)) print 'done' fff() flag = True for i in xrange(len(seq2) - 1): if seq2[i] - seq2[i + 1] > 1e-4: flag = False break if not flag: continue print axb_bound, x_bound, '=' * 50 print 'axb_prob: ', seq print 'x_prob: ', seq1 print 'difference_prob: ', seq2 fff() dump([seq, seq1, seq2], open('nonadjacent_%.2f_%.2f' % (axb_bound, x_bound) + suffix, 'w')) if is_plot == 'yes': f, axarr = plt.subplots(1, 3) axarr[0].plot(range(2, 65, jump), seq) axarr[1].plot(range(2, 65, jump), seq1) axarr[2].plot(range(2, 65, jump), seq2) # plt.legend(handles=[x]) plt.ylabel('posterior') plt.xlabel('poo_size') plt.show()
def Z(self): """ Normalizer of everything """ return logsumexp([h.posterior_score for h in self.get_all(sorted=False)])
def parse_plot(lang_name, finite, is_plot): """ run: mpi supported example: mpiexec -n 12 python parse_hypothesis.py --mode=parse_plot --language=An --finite=3 --plot=yes --wfs=yes """ _dir = 'out/final/' global size global rank topn = set() prf_dict = {} language = instance(lang_name, finite) if rank == 0: print 'loading..' fff() for file_name in listdir(_dir): if lang_name + '_' in file_name: _set = load(open(_dir + file_name)) topn.update([h for h in _set]) print 'getting p&r..' fff() pr_data = language.sample_data_as_FuncData(1024) for h in topn: p, r = language.estimate_precision_and_recall(h, pr_data) prf_dict[h] = [p, r, 0 if p + r == 0 else 2 * p * r / (p + r)] dump(prf_dict, open(lang_name + '_prf_dict' + suffix, 'w')) topn = comm.bcast(topn, root=0) prf_dict = comm.bcast(prf_dict, root=0) print rank, 'getting posterior' fff() work_list = slice_list(np.arange(235, 300, 5), size) seq = [] pnt_str = 'Weighted F-score' if options.WFS == 'yes' else 'Posterior Probability' for s in work_list[rank]: eval_data = language.sample_data_as_FuncData(s) for h in topn: h.likelihood_temperature = 100 h.compute_posterior(eval_data) Z = logsumexp([h.posterior_score for h in topn]) if options.WFS == 'yes': tmp = sum( [prf_dict[h][2] * np.exp(h.posterior_score - Z) for h in topn]) # TODO # else: tmp = sum([np.exp(h.posterior_score - Z) for h in topn if prf_dict[h][2] > 0.9]) else: tmp = sum([ np.exp(h.posterior_score - Z) for h in topn if (prf_dict[h][0] < 0.3 and prf_dict[h][1] > 0.9) ]) if options.PROB == 'yes': dump([topn, Z], open(lang_name + '_prob_' + str(s) + suffix, 'w')) seq.append([s, tmp]) print 'size: %.1f' % s, '%s: %.2f' % (pnt_str, tmp) fff() #debug _list = [h for h in topn] _list.sort(key=lambda x: x.posterior_score, reverse=True) for i in xrange(3): print 'prob: ', np.exp(_list[i].posterior_score - Z), 'p,r: ', prf_dict[_list[i]][:2], print Counter([_list[i]() for _ in xrange(256)]) print _list[i] print '=' * 50 fff() if rank == 0: for i in xrange(1, size): seq += comm.recv(source=i) else: comm.send(seq, dest=0) sys.exit(0) seq.sort(key=lambda x: x[0]) dump(seq, open(lang_name + '_seq' + suffix, 'w')) if is_plot == 'yes': x, y = zip(*seq) plt.plot(x, y) plt.ylabel(pnt_str) plt.xlabel('Size of Data') plt.title(lang_name) plt.show()
def make_pos(jump, temp): """ 1. read raw output 2. compute precision & recall on nonadjacent and adjacent contents 3. evaluate posterior probability on different data sizes 4. dump the sequence run: mpiexec -n 4 """ print 'loading..' fff() rec = load_hypo('out/simulations/nonadjacent/', ['0']) print 'estimating pr' fff() pr_dict = {} _set = set() cnt_tmp = {} for e in rec: for h in e[1]: if h in _set: continue cnt = Counter([h() for _ in xrange(256)]) # cnt = Counter([h() for _ in xrange(10)]) cnt_tmp[h] = cnt base = sum(cnt.values()) num = 0 for k, v in cnt.iteritems(): if k is None or len(k) < 2: continue if k[0] == 'a' and k[-1] == 'b': num += v pr_dict[h] = float(num) / base _set.add(h) work_list = range(2, 24, jump) space_seq = [] for i in work_list: language = LongDependency(max_length=i) eval_data = {} for e in language.str_sets: eval_data[e] = 144.0 / len(language.str_sets) eval_data = [FunctionData(input=[], output=eval_data)] prob_dict = {} ada_dict = {} test_list = [] for h in _set: h.likelihood_temperature = temp prob_dict[h] = h.compute_posterior(eval_data) p, r = language.estimate_precision_and_recall(h, cnt_tmp[h]) ada_dict[h] = 2 * p * r / (p + r) if p + r != 0 else 0 test_list.append([ h.posterior_score, ada_dict[h], pr_dict[h], cnt_tmp[h], str(h) ]) Z = logsumexp([h.posterior_score for h in _set]) test_list.sort(key=lambda x: x[0], reverse=True) weighted_x = 0 weighted_axb = 0 for e in test_list: weighted_x += np.exp(e[0] - Z) * e[1] weighted_axb += np.exp(e[0] - Z) * e[2] f = open('non_w' + suffix, 'a') print >> f, weighted_x, weighted_axb f.close() # print rank, i, '='*50 # for i_t in xrange(3): # print 'prob: ', np.exp(test_list[i_t][0] - Z), 'x_f-score', test_list[i_t][1], 'axb_f-score', test_list[i_t][2] # print test_list[i_t][3] # print test_list[i_t][5].compute_posterior(eval_data) # print language.estimate_precision_and_recall(test_list[i_t][5], cnt_tmp[test_list[i_t][5]]) # fff() # dump(test_list, open('test_list_'+str(rank)+'_'+str(i)+suffix, 'w')) # space_seq.append([prob_dict, ada_dict]) print 'rank', rank, i, 'done' fff() dump([space_seq, pr_dict], open('non_seq' + str(rank) + suffix, 'w'))
def make_staged_posterior_seq(_dir, temperature, lang_name, dtype): """ script: python parse_hypothesis.py --mode=make_staged_posterior_seq --file=file --temp=1 --language=AnBn --dtype=staged/uniform 1. read raw file 2. compute fixed Counter 3. compute posterior for different amounts dumped posterior format: [topn, [z,amount,finite,[s1,s2,....]], [], [], ....] NOTE: if _dir is previously dumped posterior seq, then we use it """ if not (os.path.isfile(_dir) and 'posterior_seq' in _dir): topn = set() for filename in os.listdir(_dir): if ('staged' in filename or 'normal' in filename) and 'seq' not in filename: print 'load', filename _set = load(open(_dir + filename)) topn.update([h for h in _set]) topn = list(topn) # fix the llcnts to save time and make curve smooth print 'get llcnts...' for h in topn: llcnts = Counter([h() for _ in xrange(2048)]) h.fixed_ll_counts = llcnts seq = [] seq.append(topn) for amount, finite in mk_staged_wlist(0, 200, 2, [48, 96]): print 'posterior on', amount, finite if dtype == 'staged': language = instance(lang_name, finite) eval_data = language.sample_data_as_FuncData(amount) elif dtype == 'uniform': eval_data = uniform_data(amount, 12) for h in topn: h.likelihood_temperature = temperature h.compute_posterior(eval_data) Z = logsumexp([h.posterior_score for h in topn]) seq.append([Z, amount, finite, [h.posterior_score for h in topn]]) dump(seq, open(dtype + '_posterior_seq' + suffix, 'w')) else: seq = load(open(_dir)) # ====================== compute KL based on seq ======================= print 'compute kl seq...' kl_seq = [] topn = seq.pop(0) for i in xrange(len(seq) - 1): kl_seq.append([seq[i][1], compute_kl2(seq[i], seq[i + 1])]) dump(kl_seq, open(dtype + '_kl_seq' + suffix, 'w'))
def make_pos2(jump, temp): """ 1. read raw output 2. compute precision & recall on nonadjacent and adjacent contents 3. evaluate posterior probability on different data sizes 4. dump the sequence run: mpiexec -n 4 """ print 'loading..' fff() rec = load_hypo('out/simulations/nonadjacent/', ['0']) # TODO one do this print 'estimating pr' fff() pr_dict = {} _set = set() cnt_tmp = {} for e in rec: for h in e[1]: if h in _set: continue cnt = Counter([h() for _ in xrange(1024)]) cnt_tmp[h] = cnt base = sum(cnt.values()) num = 0 for k, v in cnt.iteritems(): if k is None or len(k) < 2: continue if k[0] + k[-1] in ['ab', 'cd', 'ef']: num += v pr_dict[h] = float(num) / base # fix the h_output h.h_output = cnt _set.add(h) work_list = range(2, 17, jump) for i in work_list: language = LongDependency(max_length=i) eval_data = {} for e in language.str_sets: eval_data[e] = 144.0 / len(language.str_sets) eval_data = [FunctionData(input=[], output=eval_data)] score = np.zeros(len(_set), dtype=np.float64) prec = np.zeros(len(_set), dtype=np.float64) # prob_dict = {} # test_list = [] for ind, h in enumerate(_set): h.likelihood_temperature = temp score[ind] = h.compute_posterior(eval_data) prec[ind] = pr_dict[h] # prob_dict[h] = h.compute_posterior(eval_data) # test_list.append([h.posterior_score, pr_dict[h], cnt_tmp[h], str(h), h]) # test_list.sort(key=lambda x: x[0], reverse=True) # Z = logsumexp([h.posterior_score for h in _set]) # # weighted_axb = sum([np.exp(e[0] - Z) * e[1] for e in test_list]) # print i, weighted_axb # for i_t in xrange(3): # print 'prob: ', np.exp(test_list[i_t][0] - Z), 'axb_f-score', test_list[i_t][1] # print test_list[i_t][2] # # print test_list[i_t][4].compute_posterior(eval_data) # # print language.estimate_precision_and_recall(test_list[i_t][5], cnt_tmp[test_list[i_t][5]]) # print '='*50 # fff() # # f = open('non_w'+suffix, 'a') # print >> f, Z, weighted_axb # print # f.close() # # print 'size: %i' % i, Z, weighted_axb; fff() if rank != 0: comm.send(score, dest=0) comm.send(prec, dest=0) sys.exit(0) else: for r in xrange(size - 1): score += comm.recv(source=r + 1) prec += comm.recv(source=r + 1) score /= size prec /= size Z = logsumexp(score) weighted_axb = np.sum(np.exp(score - Z) * prec) f = open('non_w' + suffix, 'a') print >> f, Z, weighted_axb print i, Z, weighted_axb fff() f.close()
def parse_nonadjacent(temperature): """ load the hypothesis space and compute weighted F-scores of nonadjacent dependency on different pool sizes. replace the make_pos function example script: mpiexec -n 12 python parse_hypothesis.py --mode=nonadjacent_mk --temp=100 """ eval_data_size = 1024 global size global rank pr_dict = {} _set = set() if rank == 0: print 'loading..' fff() rec = load_hypo('out/simulations/nonadjacent/', ['_']) print 'estimating pr' fff() for e in rec: for h in e[1]: if h in _set: continue cnt = Counter([h() for _ in xrange(eval_data_size)]) num = 0 for k, v in cnt.iteritems(): if k is None or len(k) < 2: continue if k[0] + k[-1] in ['ab', 'cd', 'ef']: num += v pr_dict[h] = float(num) / eval_data_size _set.add(h) #debug _list = [[h, pr] for h, pr in pr_dict.iteritems()] _list.sort(key=lambda x: x[1], reverse=True) for i in xrange(10): print 'p,r: ', _list[i][1], print Counter([_list[i][0]() for _ in xrange(256)]) print _list[i][0] print '=' * 50 fff() print "sync..." fff() pr_dict = comm.bcast(pr_dict, root=0) _set = comm.bcast(_set, root=0) # work_list = slice_list(np.arange(2, 65, 2), size) work_list = slice_list(np.arange(10, 66, 5), size) seq = [] for s in work_list[rank]: wfs = 0.0 language = LongDependency(max_length=s) eval_data = [ FunctionData(input=[], output={ e: float(eval_data_size) / s for e in language.str_sets }) ] for h in _set: h.likelihood_temperature = temperature h.compute_posterior(eval_data) Z = logsumexp([h.posterior_score for h in _set]) seq.append([ s, sum([pr_dict[h] * np.exp(h.posterior_score - Z) for h in _set]) ]) #debug _list = [h for h in _set] _list.sort(key=lambda x: x.posterior_score, reverse=True) print 'pool size: ', s for i in xrange(3): print 'prob: ', np.exp(_list[i].posterior_score - Z), 'p,r: ', pr_dict[_list[i]], print Counter([_list[i]() for _ in xrange(256)]) print _list[i] print '=' * 50 fff() if rank == 0: for i in xrange(1, size): seq += comm.recv(source=i) else: comm.send(seq, dest=0) sys.exit(0) seq.sort(key=lambda x: x[0]) f = open('nonadjacent_wfs_seq' + suffix, 'w') for s, wfs in seq: print >> f, s, wfs f.close()
def parse_cube(lang_name, finite): """ reads hypotheses of lang_name, estimates the p/r and posterior score, and saves them into a cube (list of tables) data structure: stats = [cube, topn] cube = [[size, Z, table], [size, Z, table], ...] table = [[ind, score, p, r, f, strs], [ind, score, p, r, f, strs], ...] NOTE: topn here is a dict, you can use ind to find the h example script: mpiexec -n 12 python parse_hypothesis.py --mode=parse_cube --language=An --finite=3/10 """ _dir = 'out/' global size global rank topn = dict() prf_dict = {} language = instance(lang_name, finite) if rank == 0: truncate_flag = False if (lang_name == 'An' and finite <= 3) else True set_topn = set() print 'loading..'; fff() for file_name in listdir(_dir): if lang_name + '_' in file_name: _set = load(open(_dir+file_name)) set_topn.update([h for h in _set]) print 'getting p&r..'; fff() pr_data = language.sample_data_as_FuncData(2048) for h in set_topn: p, r, h_llcounts = language.estimate_precision_and_recall(h, pr_data, truncate=truncate_flag) prf_dict[h] = [p, r, 0 if p+r == 0 else 2*p*r/(p+r)] h.fixed_ll_counts = h_llcounts topn = dict(enumerate(set_topn)) print 'bcasting..'; fff() topn = comm.bcast(topn, root=0) prf_dict = comm.bcast(prf_dict, root=0) print rank, 'getting posterior'; fff() # work_list = slice_list(np.arange(0, 72, 6), size) work_list = slice_list(np.arange(120, 264, 12), size) cube = [] for s in work_list[rank]: eval_data = language.sample_data_as_FuncData(s) for ind, h in topn.iteritems(): h.likelihood_temperature = 100 h.compute_posterior(eval_data) Z = logsumexp([h.posterior_score for ind, h in topn.iteritems()]) table = [[ind, h.posterior_score, prf_dict[h][0], prf_dict[h][1], prf_dict[h][2], h.fixed_ll_counts] for ind, h in topn.iteritems()] table.sort(key=lambda x: x[1], reverse=True) cube += [[s, Z, table]] print rank, s, 'done'; fff() if rank == 0: for i in xrange(1, size): cube += comm.recv(source=i) else: comm.send(cube, dest=0) print rank, 'table sent'; fff() sys.exit(0) cube.sort(key=lambda x: x[0]) dump([cube, topn], open(lang_name+'_stats'+suffix, 'w'))
# print h.prior, h.likelihood # print h # print sorted(list(h.top_strings)) # print sorted(list(top_data_strings)) # print top_data_strings - h.top_strings # print "---------------" print "# Computed hypotheses for ", options.LANG precision, recall = numpy.zeros(options.NDATA), numpy.zeros(options.NDATA) for i, di in enumerate(data_range): posteriors = [h.prior + h.likelihood * float(di) / float(LARGE_SAMPLE) for h in hypotheses] # posteriors = [h.posteriors[i] for h in hypotheses] Z = logsumexp(posteriors) # print [(h.accuracy, exp(p-Z)) for h,p in zip(hypotheses, posteriors)] precision[i] = sum([h.precision*exp(p-Z) for h,p in zip(hypotheses, posteriors)]) recall[i] = sum([h.recall *exp(p-Z) for h,p in zip(hypotheses, posteriors)]) print "# Computed precision and recall for ", options.LANG, precision[-1], recall[-1] #################################################################################################################### # Plot it #################################################################################################################### import matplotlib.pyplot as plt fig = plt.figure(figsize=(2,1.5)) p = fig.add_subplot(111) p.semilogx(data_range, precision, linewidth=3)