Exemplo n.º 1
0
    def test_get_normalized_probabilities(self):
        probability_list = [0.2, 0.3, 0.2]
        ans = [0.28571428571428575, 0.4285714285714286, 0.28571428571428575]

        actual_list = utils.get_normalized_probabilities(probability_list)

        for i in xrange(len(actual_list)):
            self.assertAlmostEqual(actual_list[i], ans[i])

        self.assertAlmostEqual(sum(actual_list), 1.0)
    def generate_suggestions_and_posteriors(self, query, 
                                            get_posterior_fn = None):
        """Return (suggestion, posterior) pairs for query.

        Get a list of candidate suggestions and calculate posteriors
        for each of them.

        Arguments:
        - `query`: Suggestion object.
        """
        if get_posterior_fn == None:
            get_posterior_fn = self.get_posterior_fn

        # all_queries = [query] + utils.get_corrected_split_queries(query, self.lexicon) \
        #   + utils.get_corrected_run_on_queries(query, self.lexicon)

        all_queries = [query] + utils.get_corrected_split_queries(query, self.lexicon)
        #   + utils.get_corrected_run_on_queries(query, self.lexicon)

        # print 'all_queries'
        # pprint(all_queries)

        # List of list of (query, suggestion, likelihood) for each query
        all_suggestions = [[(query, suggestion) 
                            for suggestion in self.generate_candidate_suggestions(
                                    map(self.generate_candidate_terms, query),
                                    query.suggestion_type)] 
                                    for query in all_queries]

        # Flatten the list of list of suggestions
        all_suggestions = list(itertools.chain(*all_suggestions))

        # print 'all_suggestions after flattening'
        # pprint(all_suggestions)

        all_suggestions.sort(key = lambda query_sugg_tuple: 
                             phrase.get_likelihood(*query_sugg_tuple), 
                             reverse = True)

        # print 'suggestions and likelihood'
        # pprint([(query, suggestion, phrase.get_likelihood(query, suggestion)) 
        #         for query, suggestion in all_suggestions])

        # Remove duplicates (if any)
        all_suggestions = [key for key, _ in itertools.groupby(all_suggestions)]

        # print 'all_suggestions after removing duplicates'
        # pprint(all_suggestions)

        # Take only the top few suggestions
        all_suggestions = all_suggestions[:self.MAX_NUM_SUGGESTIONS]

        # print 'len(all_suggestions)'
        # pprint(len(all_suggestions))

        # print 'all_suggestions after taking off the top'
        # pprint(all_suggestions)

        all_posteriors = [get_posterior_fn(suggestion, query)
                          for query, suggestion in all_suggestions]

        all_suggestions = list(zip(*all_suggestions)[1])

        # TODO
        # original_query = query
        # original_query_posterior = get_posterior_fn(query, query)
        # print 'original_query'
        # pprint(original_query, original_query_posterior)
        # if original_query_posterior > self.ORIGINAL_POSTERIOR_THRESHOLD:
        #     all_suggestions += [original_query]
        #     all_posteriors += [original_query_posterior]

        normalized_posteriors = utils.get_normalized_probabilities(all_posteriors)
        suggestion_posterior_list = list(zip(all_suggestions, normalized_posteriors))
        suggestion_posterior_list.sort(key = lambda pair: pair[1], reverse = True)
        return suggestion_posterior_list