def compute_single_likelihood(self, udi): """ Compute the likelihood of a single data point, udi, an utteranceData """ assert isinstance(udi, UtteranceData) # Types of utterances trues, falses, others = self.partition_utterances( udi.possible_utterances, udi.context) #print "T:", trues #print "F:", falses #print "U:", others u = udi.utterance # compute the weights all_weights = sum( map(lambda u: self.weightfunction(u, udi.context), udi.possible_utterances)) true_weights = sum( map(lambda u: self.weightfunction(u, udi.context), trues)) met_weights = sum( map(lambda u: self.weightfunction(u, udi.context), falses)) + true_weights w = self.weightfunction(u, udi.context) # the current word weight if (u in trues): p = self.palpha * self.alpha * w / true_weights + self.palpha * \ (1.0 - self.alpha) * w / met_weights + (1.0 - self.palpha) * w / \ all_weights # choose from the trues elif (u in falses): p = ifelse(true_weights == 0, 1.0, 1.0 - self.alpha) * self.palpha * w / met_weights + ( 1.0 - self.palpha ) * w / all_weights # choose from the trues else: p = ifelse(met_weights == 0, 1.0, (1.0 - self.palpha)) * w / all_weights """ TODO: WHY NOT THIS WAY, IGNORING tre_weights==0? Because if we sample, then we have 0 chance of getting a true when true_weights is like that. This causes problems in CCGLexicon w = self.weightfunction(u, udi.context) # the current word weight if (u in trues): p = self.palpha * (self.alpha * w / true_weights + (1.0 - self.alpha) * w / met_weights) + (1.0 - self.palpha) * w / all_weights # choose from the trues elif (u in falses): p = self.palpha * (1.0-self.alpha) * w / met_weights + (1.0 - self.palpha) * w / all_weights # choose from the trues else: p = (1.0 - self.palpha) * w / all_weights """ return log(p)
def compute_single_likelihood(self, udi): """ Compute the likelihood of a single data point, udi, an utteranceData """ assert isinstance(udi, UtteranceData) # Types of utterances trues, falses, others = self.partition_utterances( udi.possible_utterances, udi.context) #print "T:", trues #print "F:", falses #print "U:", others u = udi.utterance # compute the weights all_weights = sum(map( lambda u: self.weightfunction(u, udi.context), udi.possible_utterances )) true_weights = sum(map( lambda u: self.weightfunction(u, udi.context), trues)) met_weights = sum(map( lambda u: self.weightfunction(u, udi.context), falses)) + true_weights w = self.weightfunction(u, udi.context) # the current word weight if(u in trues): p = self.palpha * self.alpha * w / true_weights + self.palpha * \ (1.0 - self.alpha) * w / met_weights + (1.0 - self.palpha) * w / \ all_weights # choose from the trues elif (u in falses): p = ifelse(true_weights==0, 1.0, 1.0-self.alpha) * self.palpha * w / met_weights + (1.0 - self.palpha) * w / all_weights # choose from the trues else: p = ifelse(met_weights==0, 1.0, (1.0 - self.palpha)) * w / all_weights """ TODO: WHY NOT THIS WAY, IGNORING tre_weights==0? Because if we sample, then we have 0 chance of getting a true when true_weights is like that. This causes problems in CCGLexicon w = self.weightfunction(u, udi.context) # the current word weight if (u in trues): p = self.palpha * (self.alpha * w / true_weights + (1.0 - self.alpha) * w / met_weights) + (1.0 - self.palpha) * w / all_weights # choose from the trues elif (u in falses): p = self.palpha * (1.0-self.alpha) * w / met_weights + (1.0 - self.palpha) * w / all_weights # choose from the trues else: p = (1.0 - self.palpha) * w / all_weights """ return log(p)
def gricean_weight(h, testing_set, nu=1.0): """Takes a hypothesis and its function and returns the weight under a gricean setup. Production probability is proportional to: exp( 1.0 / (nu + proportionoftimeitistrue) ) Notes: The max weight is 1/nu, and this should not be huge compared to 1/alpha We (should) boundedly memoize this """ pct = float(sum(map(lambda s: ifelse(h(s), 1.0, 0.0), testing_set))) / len(testing_set) # pul out the context sets and apply f #pct = float(sum(map(lambda s: ifelse(f(*s) is True, 1.0, 0.0), testing_set) )) / len(testing_set) # pul out the context sets and apply f #pct = float(sum(map(lambda s: ifelse(collapse_undef(f(*s)), 1.0, 0.0), testing_set) )) / len(testing_set) return 1.0 / (nu + pct)