Exemplo n.º 1
0
    def compute_likelihood(self, data, **kwargs):
        constants = dict()
        ll = 0
        for datum in data:
            if datum.context in constants.keys():
                trueset = constants[datum.context][0]
                all_poss = constants[datum.context][1]
            else:
                try:
                    if datum.context.ego is None:
                        trueset = self.make_true_data(datum.context)
                    else:
                        trueset = self.make_true_data(datum.context, fixX=datum.context.ego)

                    all_poss = len(self.all_words())*len(datum.context.objects)**2
                    constants[datum.context] = [trueset, all_poss]
                except RecursionDepthException:
                    self.likelihood = -Infinity
                    self.update_posterior()
                    return self.likelihood

            if (datum.word, datum.X, datum.Y) in trueset:
                ll += log(self.alpha/len(trueset) + ((1.-self.alpha)/all_poss))
            else:
                ll += log((1.-self.alpha)/all_poss)

        self.likelihood = ll / self.likelihood_temperature

        self.update_posterior()
        return self.likelihood
def compute_Zipf_likelihood(lexicon, data, s):
    constants = dict()
    ll = 0
    for datum in data:
        if datum.context in constants.keys():
            trueset = constants[datum.context][0]
            all_poss = constants[datum.context][1]
            margin = constants[datum.context][2]
        else:
            try:
                if datum.context.ego is None:
                    trueset = {w: lexicon.make_word_data(w, datum.context) for w in lexicon.all_words()}
                else:
                    trueset = {w: lexicon.make_word_data(w, datum.context, fixX=datum.context.ego)
                               for w in lexicon.all_words()}

                all_poss = len(datum.context.objects) ** 2
                all_poss_speakers = set([t[1] for t in trueset])
                margin = float(sum(Fraction(1, d) ** s for d in xrange(1, len(all_poss_speakers) + 1)))
                constants[datum.context] = [trueset, all_poss, margin]
            except RecursionDepthException:
                return -Infinity

        if (datum.word, datum.X, datum.Y) in trueset[datum.word]:
            pS = (datum.context.distance[datum.Y] ** -s ) / margin
            pRgS = (datum.context.distance[datum.Y] ** -s) / sum(
                [(datum.context.distance[ref] ** -s) for ref in lexicon(datum.word, datum.context, set([datum.X]))])
            ll += log(lexicon.alpha * pS * pRgS + ((1. - lexicon.alpha) / all_poss))
        else:
            ll += log((1. - lexicon.alpha) / all_poss)

    return ll / lexicon.likelihood_temperature
Exemplo n.º 3
0
    def compute_likelihood(self, data, **kwargs):
        ll = 0
        context = data[0].context  # Hack same context for all likelihood
        trueset = self.make_true_data(context)
        all_poss = len(self.all_words()) * len(context.objects)**2
        all_poss_speakers = set([t[1] for t in trueset])
        margin = float(
            sum(
                Fraction(1, d)**self.s
                for d in xrange(1,
                                len(all_poss_speakers) + 1)))

        for datum in data:
            if (datum.word, datum.X, datum.Y) in trueset:
                pS = (context.distance[datum.Y]**-self.s) / margin
                pRgS = (context.distance[datum.Y]**-self.s) / sum(
                    [(context.distance[ref]**-self.s)
                     for ref in self(self.words[0], context, set([datum.X]))])
                ll += log(self.alpha * pS * pRgS +
                          ((1. - self.alpha) / all_poss))
            else:
                ll += log((1. - self.alpha) / all_poss)

        self.likelihood = ll / self.likelihood_temperature

        self.update_posterior()
        return self.likelihood
Exemplo n.º 4
0
    def compute_single_likelihood(self, datum):
        """Computes the likelihood of data.

            TODO: Make sure this precisely matches the number paper.
        """
        response = self(*datum.input)
        if response == "undef" or response == None:
            return log(1.0 / 10.0)  # if undefined, just sample from a base distribution
        else:
            return log((1.0 - datum.alpha) / 10.0 + datum.alpha * (response == datum.output))
Exemplo n.º 5
0
    def compute_single_likelihood(self, datum):
        """Computes the likelihood of data.

            TODO: Make sure this precisely matches the number paper.
        """
        response = self(*datum.input)
        if response == 'undef' or response == None:
            return log(
                1.0 /
                10.0)  # if undefined, just sample from a base distribution
        else:
            return log((1.0 - datum.alpha) / 10.0 + datum.alpha *
                       (response == datum.output))
Exemplo n.º 6
0
    def compute_L_likelihood(self, data, eval=False, **kwargs):
        constants = dict()
        ll = 0
        for di, datum in enumerate(data):
            # Cache constants
            if datum.context in constants.keys():
                trueset = constants[datum.context][0]
                all_poss = constants[datum.context][3]
            else:
                try:
                    trueset = self.make_true_data(datum.context)
                    all_poss = len(datum.context.objects)

                    constants[datum.context] = [trueset, all_poss]
                except RecursionDepthException:
                    self.likelihood = -Infinity
                    self.update_posterior()
                    return self.likelihood
                    # Make sure recursion is well formed
            if di == 0:
                if not eval and not self.canIrecurse(data, trueset):
                    self.likelihood = -Infinity
                    self.update_posterior()
                    return self.likelihood
            # Calculate the single point likelihood
            p = (1. - self.alpha) / all_poss
            if (datum.word, datum.X, datum.Y) in trueset:
                p += self.alpha * len(trueset)**-1
            ll += log(p)

        self.likelihood = ll / self.likelihood_temperature

        self.update_posterior()
        return self.likelihood
Exemplo n.º 7
0
def compute_Zipf_likelihood(lexicon, data, s):
    constants = dict()
    ll = 0
    for datum in data:
        if datum.context in constants.keys():
            trueset = constants[datum.context][0]
            all_poss = constants[datum.context][1]
            margin = constants[datum.context][2]
        else:
            try:
                if datum.context.ego is None:
                    trueset = {
                        w: lexicon.make_word_data(w, datum.context)
                        for w in lexicon.all_words()
                    }
                else:
                    trueset = {
                        w: lexicon.make_word_data(w,
                                                  datum.context,
                                                  fixX=datum.context.ego)
                        for w in lexicon.all_words()
                    }

                all_poss = len(datum.context.objects)**2
                all_poss_speakers = set([t[1] for t in trueset])
                margin = float(
                    sum(
                        Fraction(1, d)**s
                        for d in xrange(1,
                                        len(all_poss_speakers) + 1)))
                constants[datum.context] = [trueset, all_poss, margin]
            except RecursionDepthException:
                return -Infinity

        if (datum.word, datum.X, datum.Y) in trueset[datum.word]:
            pS = (datum.context.distance[datum.Y]**-s) / margin
            pRgS = (datum.context.distance[datum.Y]**-s) / sum([
                (datum.context.distance[ref]**-s)
                for ref in lexicon(datum.word, datum.context, set([datum.X]))
            ])
            ll += log(lexicon.alpha * pS * pRgS +
                      ((1. - lexicon.alpha) / all_poss))
        else:
            ll += log((1. - lexicon.alpha) / all_poss)

    return ll / lexicon.likelihood_temperature
Exemplo n.º 8
0
def compute_word_ll(word, h, data):
    data = [dp for dp in data if dp.word == word]
    if len(data) == 0:
        return 0
    context = data[0].context
    trueset = set()
    for x in context.objects:
        for y in h('', context, set([x])):  # x must be a set here
            trueset.add((word, x, y))
    all_poss = len(context.objects)**2
    ll = 0
    for datum in data:
        if (datum.word, datum.X, datum.Y) in trueset:
            ll += log(options.alpha / len(trueset) +
                      ((1. - options.alpha) / all_poss))
        else:
            ll += log((1. - options.alpha) / all_poss)
    return ll
Exemplo n.º 9
0
    def compute_likelihood(self, data, **kwargs):
        constants = dict()
        ll = 0
        for di, datum in enumerate(data):
            if datum.context in constants.keys():
                trueset = constants[datum.context][0]
                all_poss = constants[datum.context][1]
            else:
                try:
                    if datum.context.ego is None:
                        trueset = self.make_true_data(datum.context)
                        #trueset = {w: self.make_word_data(w, datum.context) for w in self.all_words()}
                        all_poss = len(self.all_words()) * len(datum.context.objects) ** 2
                    else:
                        trueset = self.make_true_data(datum.context, fixX=datum.context.ego)
                        #trueset = {w: self.make_word_data(w, datum.context, fixX=datum.context.ego)
                        #           for w in self.all_words()}
                        all_poss = len(self.all_words())*len(datum.context.objects)

                    constants[datum.context] = [trueset, all_poss]
                except RecursionDepthException:
                    self.likelihood = -Infinity
                    self.update_posterior()
                    return self.likelihood

            # Check to see if you can recurse and if that matters
            if di == 0:
                if not self.canIrecurse(data, trueset):
                    self.likelihood = -Infinity
                    self.update_posterior()
                    return self.likelihood

            if (datum.word, datum.X, datum.Y) in trueset:
                ll += log(self.alpha/len(trueset) + ((1.-self.alpha)/all_poss))
            else:
                ll += log((1.-self.alpha)/all_poss)

        self.likelihood = ll / self.likelihood_temperature

        self.update_posterior()
        return self.likelihood
def compute_likelihood(self, s, data, word):
    ll = 0
    context = data[0].context  # Hack same context for all likelihood
    trueset = self.make_true_data(context)
    all_poss = len(self.all_words()) * len(context.objects) ** 2
    all_poss_speakers = set([t[1] for t in trueset])
    margin = float(sum(Fraction(1, d) ** s for d in xrange(1, len(all_poss_speakers) + 1)))

    for datum in data:
        if (datum.word, datum.X, datum.Y) in trueset:
            pS = (context.distance[datum.Y] ** -s) / margin
            pRgS = (context.distance[datum.Y] ** -s) / sum(
                [(context.distance[ref] ** -s) for ref in self(word, context, set([datum.X]))])
            ll += log(self.alpha * pS * pRgS + ((1. - self.alpha) / all_poss))
        else:
            ll += log((1. - self.alpha) / all_poss)

    self.likelihood = ll / self.likelihood_temperature

    self.update_posterior()
    return self.likelihood
Exemplo n.º 11
0
    def compute_single_likelihood(self, udi):
        """
                Compute the likelihood of a single data point, udi, an utteranceData
        """
        assert isinstance(udi, UtteranceData)

        # Types of utterances
        trues, falses, others = self.partition_utterances(
            udi.possible_utterances, udi.context)
        #print "T:", trues
        #print "F:", falses
        #print "U:", others
        u = udi.utterance

        # compute the weights
        all_weights = sum(
            map(lambda u: self.weightfunction(u, udi.context),
                udi.possible_utterances))
        true_weights = sum(
            map(lambda u: self.weightfunction(u, udi.context), trues))
        met_weights = sum(
            map(lambda u: self.weightfunction(u, udi.context),
                falses)) + true_weights

        w = self.weightfunction(u, udi.context)  # the current word weight
        if (u in trues):
            p = self.palpha * self.alpha * w / true_weights + self.palpha *  \
            (1.0 - self.alpha) * w / met_weights + (1.0 - self.palpha) * w / \
            all_weights # choose from the trues
        elif (u in falses):
            p = ifelse(true_weights == 0, 1.0,
                       1.0 - self.alpha) * self.palpha * w / met_weights + (
                           1.0 - self.palpha
                       ) * w / all_weights  # choose from the trues
        else:
            p = ifelse(met_weights == 0, 1.0,
                       (1.0 - self.palpha)) * w / all_weights
        """
        TODO: WHY NOT THIS WAY, IGNORING tre_weights==0? Because if we sample, then we have 0 chance of getting a true when true_weights is like that. This causes problems in CCGLexicon
        w = self.weightfunction(u, udi.context) # the current word weight
        if   (u in trues):  p = self.palpha * (self.alpha * w / true_weights + (1.0 - self.alpha) * w / met_weights) + (1.0 - self.palpha) * w / all_weights # choose from the trues
        elif (u in falses): p = self.palpha * (1.0-self.alpha) * w / met_weights + (1.0 - self.palpha) * w / all_weights # choose from the trues
        else:               p = (1.0 - self.palpha) * w / all_weights
        """

        return log(p)
Exemplo n.º 12
0
    def compute_single_likelihood(self, udi):
        """
                Compute the likelihood of a single data point, udi, an utteranceData
        """
        assert isinstance(udi, UtteranceData)

        # Types of utterances
        trues, falses, others = self.partition_utterances( udi.possible_utterances, udi.context)
        #print "T:", trues
        #print "F:", falses
        #print "U:", others
        u = udi.utterance

        # compute the weights
        all_weights  = sum(map( lambda u: self.weightfunction(u, udi.context), udi.possible_utterances ))
        true_weights = sum(map( lambda u: self.weightfunction(u, udi.context), trues))
        met_weights  = sum(map( lambda u: self.weightfunction(u, udi.context), falses)) + true_weights

        w = self.weightfunction(u, udi.context) # the current word weight
        if(u in trues):
            p = self.palpha * self.alpha * w / true_weights + self.palpha *  \
            (1.0 - self.alpha) * w / met_weights + (1.0 - self.palpha) * w / \
            all_weights # choose from the trues
        elif (u in falses):
            p = ifelse(true_weights==0, 1.0, 1.0-self.alpha) * self.palpha * w / met_weights + (1.0 - self.palpha) * w / all_weights # choose from the trues
        else:
            p = ifelse(met_weights==0, 1.0, (1.0 - self.palpha)) * w / all_weights

        """
        TODO: WHY NOT THIS WAY, IGNORING tre_weights==0? Because if we sample, then we have 0 chance of getting a true when true_weights is like that. This causes problems in CCGLexicon
        w = self.weightfunction(u, udi.context) # the current word weight
        if   (u in trues):  p = self.palpha * (self.alpha * w / true_weights + (1.0 - self.alpha) * w / met_weights) + (1.0 - self.palpha) * w / all_weights # choose from the trues
        elif (u in falses): p = self.palpha * (1.0-self.alpha) * w / met_weights + (1.0 - self.palpha) * w / all_weights # choose from the trues
        else:               p = (1.0 - self.palpha) * w / all_weights
        """


        return log(p)
Exemplo n.º 13
0
    def compute_likelihood(self, data, eval=False, **kwargs):
        constants = dict()
        ll = 0
        for di, datum in enumerate(data):
            # Cache constants
            if datum.context in constants.keys():
                trueset = constants[datum.context][0]
                egoset = constants[datum.context][1]
                egoRef = constants[datum.context][2]
                all_poss = constants[datum.context][3]
            else:
                try:
                    trueset = self.make_true_data(datum.context)
                    egoset = self.make_true_data(datum.context,
                                                 fixX=datum.context.ego)
                    egoRef = dict()
                    for w in self.all_words():
                        rs = [
                            t[2] for t in self.make_word_data(
                                w, datum.context, fixX=datum.context.ego)
                        ]
                        egoRef[w] = sum(
                            map(
                                lambda r: zipf(r, self.s, datum.context,
                                               len(datum.context.objects)),
                                rs))
                    all_poss = len(datum.context.objects)

                    constants[datum.context] = [
                        trueset, egoset, egoRef, all_poss
                    ]
                except RecursionDepthException:
                    self.likelihood = -Infinity
                    self.update_posterior()
                    return self.likelihood
                    # Make sure recursion is well formed
            if di == 0:
                if not eval and not self.canIrecurse(data, trueset):
                    self.likelihood = -Infinity
                    self.update_posterior()
                    return self.likelihood
            # Calculate the single point likelihood
            p = (1. - self.alpha) / all_poss
            if (datum.word, datum.X, datum.Y) in trueset:
                # Probability it's true and speaker centric
                pT = self.alpha * (1. - self.epsilon)
                # Probability of the speaker
                # pS = zipf(datum.X, self.s, datum.context, len(datum.context.objects))
                # Probability of the referent given the speaker and the word
                pr = zipf(datum.Y, self.s, datum.context,
                          len(datum.context.objects))
                hout = self(datum.word, datum.context, set([datum.X]))
                hout.discard(datum.X)
                Z = sum(
                    map(
                        lambda r: zipf(r, self.s, datum.context,
                                       len(datum.context.objects)), hout))
                p += pT * (pr / Z)
            if (datum.word, datum.X, datum.Y) in egoset:
                # Probability it's true and ego-centric
                pT = self.alpha * self.epsilon
                # Probability of the speaker
                # pS = zipf(datum.X, self.s, datum.context, len(datum.context.objects))
                # Probability of the referent
                pR = zipf(datum.Y, self.s, datum.context,
                          len(datum.context.objects)) / egoRef[datum.word]
                p += pT * pR
            ll += log(p)

        self.likelihood = ll / self.likelihood_temperature

        self.update_posterior()
        return self.likelihood