def test_get_normalized_probabilities(self): probability_list = [0.2, 0.3, 0.2] ans = [0.28571428571428575, 0.4285714285714286, 0.28571428571428575] actual_list = utils.get_normalized_probabilities(probability_list) for i in xrange(len(actual_list)): self.assertAlmostEqual(actual_list[i], ans[i]) self.assertAlmostEqual(sum(actual_list), 1.0)
def generate_suggestions_and_posteriors(self, query, get_posterior_fn = None): """Return (suggestion, posterior) pairs for query. Get a list of candidate suggestions and calculate posteriors for each of them. Arguments: - `query`: Suggestion object. """ if get_posterior_fn == None: get_posterior_fn = self.get_posterior_fn # all_queries = [query] + utils.get_corrected_split_queries(query, self.lexicon) \ # + utils.get_corrected_run_on_queries(query, self.lexicon) all_queries = [query] + utils.get_corrected_split_queries(query, self.lexicon) # + utils.get_corrected_run_on_queries(query, self.lexicon) # print 'all_queries' # pprint(all_queries) # List of list of (query, suggestion, likelihood) for each query all_suggestions = [[(query, suggestion) for suggestion in self.generate_candidate_suggestions( map(self.generate_candidate_terms, query), query.suggestion_type)] for query in all_queries] # Flatten the list of list of suggestions all_suggestions = list(itertools.chain(*all_suggestions)) # print 'all_suggestions after flattening' # pprint(all_suggestions) all_suggestions.sort(key = lambda query_sugg_tuple: phrase.get_likelihood(*query_sugg_tuple), reverse = True) # print 'suggestions and likelihood' # pprint([(query, suggestion, phrase.get_likelihood(query, suggestion)) # for query, suggestion in all_suggestions]) # Remove duplicates (if any) all_suggestions = [key for key, _ in itertools.groupby(all_suggestions)] # print 'all_suggestions after removing duplicates' # pprint(all_suggestions) # Take only the top few suggestions all_suggestions = all_suggestions[:self.MAX_NUM_SUGGESTIONS] # print 'len(all_suggestions)' # pprint(len(all_suggestions)) # print 'all_suggestions after taking off the top' # pprint(all_suggestions) all_posteriors = [get_posterior_fn(suggestion, query) for query, suggestion in all_suggestions] all_suggestions = list(zip(*all_suggestions)[1]) # TODO # original_query = query # original_query_posterior = get_posterior_fn(query, query) # print 'original_query' # pprint(original_query, original_query_posterior) # if original_query_posterior > self.ORIGINAL_POSTERIOR_THRESHOLD: # all_suggestions += [original_query] # all_posteriors += [original_query_posterior] normalized_posteriors = utils.get_normalized_probabilities(all_posteriors) suggestion_posterior_list = list(zip(all_suggestions, normalized_posteriors)) suggestion_posterior_list.sort(key = lambda pair: pair[1], reverse = True) return suggestion_posterior_list