def or_d(x, y): out = defaultdict(lambdaMinusInfinity) out[True] = logplusexp( x.get(True, -Infinity) + y.get(False, -Infinity), x.get(False, -Infinity) + y.get(True, -Infinity) ) out[False] = log1mexp(out[True]) return out
def compute_likelihood(self, data, update_post=True, **kwargs): """Use bayesian model averaging with `self.hypotheses` to estimate likelihood of generating the data. This is taken as a weighted sum over all hypotheses, sum_h { p(h | X) } . Args: data(list): List of FunctionData objects. Returns: float: Likelihood summed over all outputs, summed over all hypotheses & weighted for each hypothesis by posterior score p(h|X). """ self.update() hypotheses = self.hypotheses likelihood = 0.0 for d in data: posteriors = [sum(h.compute_posterior(d.input)) for h in hypotheses] Z = logsumexp(posteriors) weights = [(post-Z) for post in posteriors] for o in d.output.keys(): # probability for yes on output `o` is sum of posteriors for hypos that contain `o` p = logsumexp([w if o in h() else -Infinity for h, w in zip(hypotheses, weights)]) p = -1e-10 if p >= 0 else p k = d.output[o][0] # num. yes responses n = k + d.output[o][1] # num. trials bc = gammaln(n+1) - (gammaln(k+1) + gammaln(n-k+1)) # binomial coefficient likelihood += bc + (k*p) + (n-k)*log1mexp(p) # likelihood we got human output if update_post: self.likelihood = likelihood self.update_posterior() return likelihood
def prob_data(grammar, input_data, output_data, num_iters=10000, alpha=0.9): """Compute the probability of generating human data given our grammar & input data. Args: grammar (LOTlib.Grammar): The grammar. input_data (list): List of integers, the likelihood of the model is initially computed with these. output_data (list): List of tuples corresponding to (# yes, # no) responses in human data. Returns: float: Estimated probability of generating human data. """ model_likelihoods = likelihood_data(grammar, input_data, output_data, num_iters, alpha) p_output = -Infinity for o in output_data.keys(): p = model_likelihoods[o] k = output_data[o][0] # num. yes responses n = k + output_data[o][1] # num. trials bc = factorial(n) / (factorial(k) * factorial(n-k)) # binomial coefficient p_o = log(bc) + (k*p) + (n-k)*log1mexp(p) # log version p_output = logplusexp(p_output, p_o) # p_gen_human_data[o] = bc * pow(p, k) * pow(1-p, n-k) # linear version return p_output
def compute_single_likelihood_MPI(self, input_args): d_index, d, P = input_args posteriors = self.L[d_index] + P Z = logsumexp(posteriors) w = np.exp(posteriors - Z) # weights for each hypothesis r_i = np.transpose(self.R[d_index]) w_times_R = w * r_i likelihood = 0.0 # Compute likelihood of producing same output (yes/no) as data for q, r, m in d.get_queries(): # col `m` of boolean matrix `R[i]` weighted by `w` query_col = w_times_R[m, :] exp_p = query_col.sum() p = log(exp_p) ## p = log((np.exp(w) * self.R[d_index][:, m]).sum()) # NOTE: with really small grammars sometimes we get p > 0 if p >= 0: print 'P ERROR!' yes, no = r k = yes # num. yes responses n = yes + no # num. trials bc = gammaln(n+1) - (gammaln(k+1) + gammaln(n-k+1)) # binomial coefficient l1mp = log1mexp(p) likelihood += bc + (k*p) + (n-k)*l1mp # likelihood we got human output
def __init__(self, grammar=None, value=None, f=None, gamma=-30, **kwargs): RecursiveLOTHypothesis.__init__(self, grammar, value=value, f=f, **kwargs) self.gamma = gamma self.lg1mgamma = log1mexp(gamma)
def compute_likelihood(self, data, **kwargs): self.update() hypotheses = self.hypotheses likelihood = 0.0 for d in data: posteriors = [h.compute_posterior(d.input)[0] + h.compute_posterior(d.input)[1] for h in hypotheses] zo = logsumexp(posteriors) weights = [(post - zo) for post in posteriors] for o in d.output.keys(): # probability for yes on output `o` is sum of posteriors for hypos that contain `o` p = logsumexp( [w if o.Y in h(o.word, o.context, set([o.Y])) else -Infinity for h, w in zip(hypotheses, weights)]) p = -1e-10 if p >= 0 else p k = d.output[o][0] # num. yes responses n = k + d.output[o][1] # num. trials bc = gammaln(n + 1) - (gammaln(k + 1) + gammaln(n - k + 1)) # binomial coefficient likelihood += bc + (k * p) + (n - k) * log1mexp(p) # likelihood we got human output return likelihood
def compute_likelihood(self, data, update_post=True, **kwargs): """ Compute the likelihood of producing human data, given: H (self.hypotheses) & x (self.value) """ # The following must be computed for this specific GrammarHypothesis # ------------------------------------------------------------------ x = self.normalized_value() # vector of rule probabilites P = np.dot(self.C, x) # prior for each hypothesis likelihood = 0.0 for d_key, d in enumerate(data): # Initialize unfilled values for L[data] & R[data] if d_key not in self.L: self.init_L(d, d_key) if d_key not in self.R: self.init_R(d, d_key) posteriors = self.L[d_key] + P Z = logsumexp(posteriors) w = posteriors - Z # weights for each hypothesis # Compute likelihood of producing same output (yes/no) as data for m, o in enumerate(d.output.keys()): # col `m` of boolean matrix `R[i]` weighted by `w` p = log((np.exp(w) * self.R[d_key][:, m]).sum()) # NOTE: with really small grammars sometimes we get p > 0 if p >= 0: print "P ERROR!" k = d.output[o][0] # num. yes responses n = k + d.output[o][1] # num. trials bc = gammaln(n + 1) - (gammaln(k + 1) + gammaln(n - k + 1)) # binomial coefficient likelihood += bc + (k * p) + (n - k) * log1mexp(p) # likelihood we got human output if update_post: self.likelihood = likelihood self.update_posterior() return likelihood
def empty_d(x): p = x.get("", -Infinity) return {True: p, False: log1mexp(p)}
def empty_d(x): p = x.get('', -Infinity) return {True: p, False:log1mexp(p)}
def not_d(x): out = defaultdict(lambdaMinusInfinity) out[True] = x.get(False, -Infinity) out[False] = log1mexp(out[True]) return out
def __init__(self, grammar=None, value=None, f=None, gamma=-30, **kwargs): RecursiveLOTHypothesis.__init__(self, grammar, value=value, f=f, **kwargs) self.gamma = gamma self.lg1mgamma = log1mexp(gamma)
def and_d(x, y): out = defaultdict(lambdaMinusInfinity) out[True] = x.get(True, -Infinity) + y.get(True, -Infinity) out[False] = log1mexp(out[True]) return out
def equal_d(x,y): peq = -Infinity for a,v in x.items(): peq = logplusexp(peq, v + y.get(a,-Infinity)) # P(x=a,y=a) return {True: peq, False:log1mexp(peq)}
def equal_d(x, y): peq = -Infinity for a, v in x.items(): peq = logplusexp(peq, v + y.get(a, -Infinity)) # P(x=a,y=a) return {True: peq, False: log1mexp(peq)}
from LOTlib.Hypotheses.RecursiveLOTHypothesis import RecursiveLOTHypothesis from LOTlib.Miscellaneous import log, Infinity, log1mexp, attrmem from LOTlib.Evaluation.EvaluationException import EvaluationException # for computing knower-levels from Data import sample_sets_of_objects, all_objects, word_to_number, ALPHA # ============================================================================================================ # Define a class for running MH GAMMA = -30.0 # the log probability penalty for recursion LG_1MGAMMA = log1mexp(GAMMA) MAX_NODES = 50 # How many FunctionNodes are allowed in a hypothesis? If we make this 20, things may slow from Grammar import grammar def make_hypothesis(**kwargs): """ Default hypothesis creation """ return NumberExpression(grammar, **kwargs) class NumberExpression(RecursiveLOTHypothesis): def __init__(self, grammar=None, value=None, f=None, args=['x'], **kwargs): RecursiveLOTHypothesis.__init__(self, grammar, value=value, args=['x'], **kwargs) def __call__(self, *args): try: return RecursiveLOTHypothesis.__call__(self, *args) except EvaluationException: # catch recursion and too big
def and_d(x,y): out = defaultdict(lambdaMinusInfinity) out[True] = x.get(True,-Infinity) + y.get(True,-Infinity) out[False] = log1mexp(out[True]) return out
def not_d(x): out = defaultdict(lambdaMinusInfinity) out[True] = x.get(False,-Infinity) out[False] = log1mexp(out[True]) return out
def or_d(x,y): out = defaultdict(lambdaMinusInfinity) out[True] = logplusexp(x.get(True,-Infinity) + y.get(False,-Infinity), x.get(False,-Infinity) + y.get(True,-Infinity)) out[False] = log1mexp(out[True]) return out