def sample_utterance(self, possible_utterances, context): t, f, others = self.partition_utterances( possible_utterances, context) m = set(t).union(f) if flip(self.palpha) and (len(m) > 0): # if we sample from a presup is true if (flip(self.alpha) and (len(t)>0)): return weighted_sample(t, probs=map( lambda u: self.weightfunction(u, context), t), log=False) else: return weighted_sample(m, probs=map( lambda u: self.weightfunction(u, context), m), log=False) else: return weighted_sample(possible_utterances, probs=map( lambda u: self.weightfunction(u, context), possible_utterances), log=False) # sample from all utterances
def makeZipfianLexiconData(lexicon, word, context, n=100, s=1.0, alpha=0.9, verbose=False): # TODO remove word param from Shift files data = [] true_set = lexicon.make_true_data(context) all_poss_speakers = [ t[1] for t in true_set ] p = [ zipf(t, s, context, len(context.objects)) for t in all_poss_speakers ] for i in xrange(n): if flip(alpha): speaker = weighted_sample(all_poss_speakers, probs=p) bagR = {w : lexicon(w, context, set([speaker])) for w in lexicon.all_words()} uniqR = [] for w in lexicon.all_words(): uniqR.extend(bagR[w]) p1 = [ zipf(t, s, context, len(context.objects)) for t in uniqR ] referent = weighted_sample(uniqR, probs=p1) word = sample1([w for w in lexicon.all_words() if referent in bagR[w]]) if verbose: print "True data:", i, word, speaker, referent data.append(KinshipData(word, speaker, referent, context)) else: word = sample1(lexicon.all_words()) x = sample1(context.objects) y = sample1(context.objects) if verbose: print "Noise data:", i, word, x, y data.append(KinshipData(word, x, y, context)) if verbose: print lexicon.compute_likelihood(data) return data
def makeVariableLexiconData(lexicon, word, context, n=100, s=1.0, alpha=0.9, verbose=False): data = [] true_set = lexicon.make_true_data(context) all_poss_speakers = [t[1] for t in true_set] p = [zipf(t, s, context, len(context.objects)) for t in all_poss_speakers] for i in xrange(n): if flip(alpha): speaker = weighted_sample(all_poss_speakers, probs=p) referents = lexicon(word, context, set([speaker])) p1 = [zipf(t, s, context, len(context.objects)) for t in referents] referent = weighted_sample(referents, probs=p1) if verbose: print "True data:", i, word, speaker, referent data.append(KinshipData(word, speaker, referent, context)) else: x = sample1(context.objects) y = sample1(context.objects) if verbose: print "Noise data:", i, word, x, y data.append(KinshipData(word, x, y, context)) if verbose: print lexicon.compute_likelihood(data) return data
def propose(self): """ Propose to the lexicon by flipping a coin for each word and proposing to it. This permits ProposalFailExceptions on individual words, but does not return a lexicon unless we can propose to something. """ fb = 0.0 changed_any = False while not changed_any: new = deepcopy(self) ## Now we just copy the whole thing for w in self.all_words(): if flip(self.propose_p): try: xp, xfb = self.get_word(w).propose() changed_any = True new.set_word(w, xp) fb += xfb except ProposalFailedException: pass return new, fb
def makeTreeLexiconData(lexicon, context, n=100, alpha=0.9, epsilon=0.9, verbose=False): ''' L() --> {(W,S,R)} data ~ uniform( L() ) :param lexicon: the target lexicon :param context: the context :param n: the number of data points :param alpha: the reliability parameter. Noise = 1 - alpha :param epsilon: the ego-centric probability. :param verbose: print the generated data points :return: list of KinshipData objects ''' data = [] tree_truth = lexicon.make_true_data(context) ego_truth = lexicon.make_true_data(context, fixX=context.ego) for s in xrange(n): if flip(alpha): if flip(epsilon): t = sample1(ego_truth) if verbose: print "True data:", s, t[0], t[1], t[2] data.append(KinshipData(t[0], t[1], t[2], context)) else: t = sample1(tree_truth) if verbose: print "True data:", s, t[0], t[1], t[2] data.append(KinshipData(t[0], t[1], t[2], context)) else: x = sample1(context.objects) y = sample1(context.objects) word = sample1(lexicon.all_words()) if verbose: print "Noise data:", s, word, x, y data.append(KinshipData(word, x, y, context)) if verbose: print lexicon.compute_likelihood(data) return data
def propose(self): """ Propose to the lexicon by flipping a coin for each word and proposing to it. """ new = copy(self) ## Now we just copy the whole thing fb = 0.0 for w in self.all_words(): if flip(self.propose_p): xp, xfb = self.get_word(w).propose() new.set_word(w, xp) fb += xfb return new, fb
def propose(self): """ Propose to the lexicon by flipping a coin for each word and proposing to it. """ new = deepcopy(self) ## Now we just copy the whole thing fb = 0.0 for w in self.all_words(): if flip(self.propose_p): xp, xfb = self.get_word(w).propose() new.set_word(w, xp) fb += xfb return new, fb
def propose(self): """ Default proposal to a lexicon -- now at least one, plus some coin flips :return: """ new = copy(self) ## Now we just copy the whole thing # Propose one for sure w = weighted_sample(self.value.keys()) # the word to change p, fb = self.value[w].propose() new.set_word(w, p) for x in self.all_words(): if w != x and flip(self.propose_p): xp, xfb = self.value[x].propose() new.set_word(x, xp) fb += xfb return new, fb
def makeLexiconData(lexicon, context, n=100, alpha=0.9, verbose=False): data = [] if context.ego is None: tree_truth = lexicon.make_true_data(context) else: tree_truth = lexicon.make_true_data(context, fixX=context.ego) for s in xrange(n): if flip(alpha): t = sample1(tree_truth) if verbose: print "True data:", s, t data.append(KinshipData(t[0], t[1], t[2], context)) else: x = sample1(context.objects) y = sample1(context.objects) word = sample1(lexicon.all_words()) if verbose: print "Noise data:", s, word, x, y data.append(KinshipData(word, x, y, context)) if verbose: print lexicon.compute_likelihood(data) return data
def makeVariableLexiconData(lexicon, word, context, n=100, s=1.0, alpha=0.9, verbose=False): data = [] true_set = lexicon.make_true_data(context) all_poss_speakers = [ t[1] for t in true_set ] p = [ zipf(t, s, context, len(context.objects)) for t in all_poss_speakers ] for i in xrange(n): if flip(alpha): speaker = weighted_sample(all_poss_speakers, probs=p) referents = lexicon(word, context, set([speaker])) p1 = [ zipf(t, s, context, len(context.objects)) for t in referents ] referent = weighted_sample(referents, probs=p1) if verbose: print "True data:", i, word, speaker, referent data.append(KinshipData(word, speaker, referent, context)) else: x = sample1(context.objects) y = sample1(context.objects) if verbose: print "Noise data:", i, word, x, y data.append(KinshipData(word, x, y, context)) if verbose: print lexicon.compute_likelihood(data) return data
def flip_(p=0.5): return flip(p)
def makeZipfianLexiconData(lexicon, context, dfreq=None, n=100, s=1.0, alpha=0.9, epsilon=0.8, verbose=False): ''' L() --> P(W) [ eps P(S|W) P(R|W) + 1-eps P(S|W) P(R|SW)] P(W) ~ dfreq or defaults to uniform P(S|W) ~ Zipf(s) domain: all speakers that can use that word P(R|W) ~ Zipf(s) domain: all people the learner has a word for P(R|SW) ~ Zipf(s) domain: all referents the speaker can use the word to refer to :param lexicon: the target lexicon :param context: the context :param dfreq: dictionary[word] = frequency weight (float) :param n: the number of data points :param s: the zipfian exponent parameter :param alpha: the reliability parameter. Noise = 1 - alpha :param epsilon: the ego-centric probability :param verbose: print the generated data points :return: list of KinshipData objects ''' assert context.distance is not None, "There are no distances in the context!" if dfreq is not None: assert set(lexicon.all_words()).issubset(set( dfreq.keys())), "Words in lexicon without frequencies" freq = lambda w: dfreq[w] else: freq = None data = [] speakers = dict() egoRef = dict() for w in lexicon.all_words(): speakers[w] = [t[1] for t in lexicon.make_word_data(w, context)] egoRef[w] = [ t[2] for t in lexicon.make_word_data(w, context, fixX=context.ego) ] for i in xrange(n): if flip(alpha): wrd = weighted_sample(lexicon.all_words(), probs=freq) speaker = weighted_sample( speakers[wrd], probs=lambda x: zipf(x, s, context, len(context.objects))) if flip(epsilon): referent = weighted_sample( egoRef[wrd], probs=lambda x: zipf(x, s, context, len(context.objects))) eps = 'Ego' else: referent = weighted_sample( lexicon(wrd, context, set([speaker])), probs=lambda x: zipf(x, s, context, len(context.objects))) eps = 'Speaker' if verbose: print "True data:", i, wrd, speaker, referent, eps data.append(KinshipData(wrd, speaker, referent, context)) else: wrd = weighted_sample(lexicon.all_words(), probs=freq) x = weighted_sample( context.objects, probs=lambda x: zipf(x, s, context, len(context.objects))) y = weighted_sample( context.objects, probs=lambda x: zipf(x, s, context, len(context.objects))) if verbose: print "Noise data:", i, wrd, x, y data.append(KinshipData(wrd, x, y, context)) if verbose: print lexicon.compute_likelihood(data) return data