Exemplo n.º 1
0
    def test_remove_nouns_from_reviews(self):

        nouns = ['bar', 'night', 'food', 'wine']
        actual_review1 = Review(review_text1)
        actual_review2 = Review(review_text2)
        actual_reviews = [actual_review1, actual_review2]

        expected_review1 = Review(review_text1)
        expected_review2 = Review(review_text2)
        expected_review1.nouns.remove('night')
        expected_review1.nouns.remove('food')
        expected_review2.nouns.remove('bar')
        expected_review2.nouns.remove('food')

        context_utils.remove_nouns_from_reviews(actual_reviews, nouns)
        self.assertItemsEqual(actual_review1.nouns, expected_review1.nouns)
        self.assertItemsEqual(actual_review2.nouns, expected_review2.nouns)
Exemplo n.º 2
0
    def test_remove_nouns_from_reviews(self):

        nouns = ['bar', 'night', 'food', 'wine']
        actual_review1 = Review(review_text1)
        actual_review2 = Review(review_text2)
        actual_reviews = [actual_review1, actual_review2]

        expected_review1 = Review(review_text1)
        expected_review2 = Review(review_text2)
        expected_review1.nouns.remove('night')
        expected_review1.nouns.remove('food')
        expected_review2.nouns.remove('bar')
        expected_review2.nouns.remove('food')

        context_utils.remove_nouns_from_reviews(actual_reviews, nouns)
        self.assertItemsEqual(actual_review1.nouns, expected_review1.nouns)
        self.assertItemsEqual(actual_review2.nouns, expected_review2.nouns)
Exemplo n.º 3
0
    def filter_nouns(self):
        print('filter_nouns', time.strftime("%H:%M:%S"))
        unwanted_nouns = set()

        for noun in list(self.all_nouns):
            specific_weighted_frq =\
                context_utils.calculate_word_weighted_frequency(
                    noun, self.specific_reviews)
            generic_weighted_frq =\
                context_utils.calculate_word_weighted_frequency(
                    noun, self.generic_reviews)

            # print('specific_weighted_frq', specific_weighted_frq)
            # print('generic_weighted_frq', generic_weighted_frq)

            if generic_weighted_frq < self.alpha or specific_weighted_frq < self.alpha:
                self.all_nouns.remove(noun)
                unwanted_nouns.add(noun)
                continue

            ratio = specific_weighted_frq / generic_weighted_frq

            if ratio < self.beta:
                self.all_nouns.remove(noun)
                unwanted_nouns.add(noun)
                continue

        print('remove_nouns', time.strftime("%H:%M:%S"))
        context_utils.remove_nouns_from_reviews(self.reviews, unwanted_nouns)

        print('generating_all_senses', time.strftime("%H:%M:%S"))
        for review in self.reviews:
            context_utils.generate_senses(review)
        print('generating_specific_senses', time.strftime("%H:%M:%S"))
        for review in self.specific_reviews:
            context_utils.generate_senses(review)
        print('generating_generic_senses', time.strftime("%H:%M:%S"))
        for review in self.generic_reviews:
            context_utils.generate_senses(review)
Exemplo n.º 4
0
    def filter_nouns(self):
        print('filter_nouns', time.strftime("%H:%M:%S"))
        unwanted_nouns = set()
        context_nouns = []

        num_nouns = len(self.all_nouns)
        print('num nouns %d' % len(self.all_nouns))
        index = 0
        for noun in list(self.all_nouns):
            index += 1
            # print('processes nouns: %d/%d\r' % (index, num_nouns)),
            sys.stdout.write('\r' + str(index) + '/' + str(num_nouns))
            sys.stdout.flush()  # important

            weighted_frq =\
                context_utils.calculate_word_weighted_frequency(
                    noun, self.reviews)
            specific_weighted_frq =\
                context_utils.calculate_word_weighted_frequency(
                    noun, self.specific_reviews)
            generic_weighted_frq =\
                context_utils.calculate_word_weighted_frequency(
                    noun, self.generic_reviews)

            # print('specific_weighted_frq', specific_weighted_frq)
            # print('generic_weighted_frq', generic_weighted_frq)

            if weighted_frq < self.alpha:
                self.all_nouns.remove(noun)
                unwanted_nouns.add(noun)
                continue

            if specific_weighted_frq == 0:
                self.all_nouns.remove(noun)
                unwanted_nouns.add(noun)
                continue

            if generic_weighted_frq == 0:
                context_nouns.append(noun)
                continue

            ratio = specific_weighted_frq / generic_weighted_frq

            if ratio < self.beta:
                self.all_nouns.remove(noun)
                unwanted_nouns.add(noun)
                continue

            context_nouns.append(noun)

        print('')
        # print('context nouns', context_nouns)
        print('num context nouns', len(context_nouns))

        print('remove_nouns', time.strftime("%H:%M:%S"))
        context_utils.remove_nouns_from_reviews(self.reviews, unwanted_nouns)

        print('generating_all_senses', time.strftime("%H:%M:%S"))
        for review in self.reviews:
            context_utils.generate_senses(review)
        print('generating_specific_senses', time.strftime("%H:%M:%S"))
        for review in self.specific_reviews:
            context_utils.generate_senses(review)
        print('generating_generic_senses', time.strftime("%H:%M:%S"))
        for review in self.generic_reviews:
            context_utils.generate_senses(review)