Exemplo n.º 1
0
def makeVariableLexiconData(lexicon,
                            word,
                            context,
                            n=100,
                            s=1.0,
                            alpha=0.9,
                            verbose=False):
    data = []
    true_set = lexicon.make_true_data(context)
    all_poss_speakers = [t[1] for t in true_set]
    p = [zipf(t, s, context, len(context.objects)) for t in all_poss_speakers]

    for i in xrange(n):
        if flip(alpha):
            speaker = weighted_sample(all_poss_speakers, probs=p)
            referents = lexicon(word, context, set([speaker]))
            p1 = [zipf(t, s, context, len(context.objects)) for t in referents]
            referent = weighted_sample(referents, probs=p1)
            if verbose:
                print "True data:", i, word, speaker, referent
            data.append(KinshipData(word, speaker, referent, context))
        else:
            x = sample1(context.objects)
            y = sample1(context.objects)
            if verbose:
                print "Noise data:", i, word, x, y
            data.append(KinshipData(word, x, y, context))
    if verbose:
        print lexicon.compute_likelihood(data)
    return data
Exemplo n.º 2
0
def makeZipfianLexiconData(lexicon, word, context, n=100, s=1.0, alpha=0.9, verbose=False): # TODO remove word param from Shift files
    data = []
    true_set = lexicon.make_true_data(context)
    all_poss_speakers = [ t[1] for t in true_set ]
    p = [ zipf(t, s, context, len(context.objects)) for t in all_poss_speakers ]

    for i in xrange(n):
        if flip(alpha):
            speaker = weighted_sample(all_poss_speakers, probs=p)

            bagR = {w : lexicon(w, context, set([speaker])) for w in lexicon.all_words()}
            uniqR = []
            for w in lexicon.all_words():
                uniqR.extend(bagR[w])

            p1 = [ zipf(t, s, context, len(context.objects)) for t in uniqR ]
            referent = weighted_sample(uniqR, probs=p1)

            word = sample1([w for w in lexicon.all_words() if referent in bagR[w]])

            if verbose:
                print "True data:", i, word, speaker, referent
            data.append(KinshipData(word, speaker, referent, context))
        else:
            word = sample1(lexicon.all_words())
            x = sample1(context.objects)
            y = sample1(context.objects)
            if verbose:
                print "Noise data:", i, word, x, y
            data.append(KinshipData(word, x, y, context))
    if verbose:
        print lexicon.compute_likelihood(data)
    return data
Exemplo n.º 3
0
def makeVariableLexiconData(lexicon, word, context, n=100, s=1.0, alpha=0.9, verbose=False):
    data = []
    true_set = lexicon.make_true_data(context)
    all_poss_speakers = [ t[1] for t in true_set ]
    p = [ zipf(t, s, context, len(context.objects)) for t in all_poss_speakers ]

    for i in xrange(n):
        if flip(alpha):
            speaker = weighted_sample(all_poss_speakers, probs=p)
            referents = lexicon(word, context, set([speaker]))
            p1 = [ zipf(t, s, context, len(context.objects)) for t in referents ]
            referent = weighted_sample(referents, probs=p1)
            if verbose:
                print "True data:", i, word, speaker, referent
            data.append(KinshipData(word, speaker, referent, context))
        else:
            x = sample1(context.objects)
            y = sample1(context.objects)
            if verbose:
                print "Noise data:", i, word, x, y
            data.append(KinshipData(word, x, y, context))
    if verbose:
        print lexicon.compute_likelihood(data)
    return data
Exemplo n.º 4
0
def makeZipfianLexiconData(lexicon,
                           context,
                           dfreq=None,
                           n=100,
                           s=1.0,
                           alpha=0.9,
                           epsilon=0.8,
                           verbose=False):
    '''

    L() --> P(W) [ eps P(S|W) P(R|W) + 1-eps P(S|W) P(R|SW)]
    P(W) ~ dfreq or defaults to uniform
    P(S|W)  ~ Zipf(s) domain: all speakers that can use that word
    P(R|W)  ~ Zipf(s) domain: all people the learner has a word for
    P(R|SW) ~ Zipf(s) domain: all referents the speaker can use the word to refer to

    :param lexicon: the target lexicon
    :param context: the context
    :param dfreq: dictionary[word] = frequency weight (float)
    :param n: the number of data points
    :param s: the zipfian exponent parameter
    :param alpha: the reliability parameter. Noise = 1 - alpha
    :param epsilon: the ego-centric probability
    :param verbose: print the generated data points
    :return: list of KinshipData objects
    '''
    assert context.distance is not None, "There are no distances in the context!"
    if dfreq is not None:
        assert set(lexicon.all_words()).issubset(set(
            dfreq.keys())), "Words in lexicon without frequencies"
        freq = lambda w: dfreq[w]
    else:
        freq = None
    data = []
    speakers = dict()
    egoRef = dict()
    for w in lexicon.all_words():
        speakers[w] = [t[1] for t in lexicon.make_word_data(w, context)]
        egoRef[w] = [
            t[2] for t in lexicon.make_word_data(w, context, fixX=context.ego)
        ]

    for i in xrange(n):
        if flip(alpha):
            wrd = weighted_sample(lexicon.all_words(), probs=freq)
            speaker = weighted_sample(
                speakers[wrd],
                probs=lambda x: zipf(x, s, context, len(context.objects)))
            if flip(epsilon):
                referent = weighted_sample(
                    egoRef[wrd],
                    probs=lambda x: zipf(x, s, context, len(context.objects)))
                eps = 'Ego'
            else:
                referent = weighted_sample(
                    lexicon(wrd, context, set([speaker])),
                    probs=lambda x: zipf(x, s, context, len(context.objects)))
                eps = 'Speaker'
            if verbose:
                print "True data:", i, wrd, speaker, referent, eps
            data.append(KinshipData(wrd, speaker, referent, context))
        else:
            wrd = weighted_sample(lexicon.all_words(), probs=freq)
            x = weighted_sample(
                context.objects,
                probs=lambda x: zipf(x, s, context, len(context.objects)))
            y = weighted_sample(
                context.objects,
                probs=lambda x: zipf(x, s, context, len(context.objects)))
            if verbose:
                print "Noise data:", i, wrd, x, y
            data.append(KinshipData(wrd, x, y, context))
    if verbose:
        print lexicon.compute_likelihood(data)
    return data