def test_remove_nouns_from_reviews(self): nouns = ['bar', 'night', 'food', 'wine'] actual_review1 = Review(review_text1) actual_review2 = Review(review_text2) actual_reviews = [actual_review1, actual_review2] expected_review1 = Review(review_text1) expected_review2 = Review(review_text2) expected_review1.nouns.remove('night') expected_review1.nouns.remove('food') expected_review2.nouns.remove('bar') expected_review2.nouns.remove('food') context_utils.remove_nouns_from_reviews(actual_reviews, nouns) self.assertItemsEqual(actual_review1.nouns, expected_review1.nouns) self.assertItemsEqual(actual_review2.nouns, expected_review2.nouns)
def filter_nouns(self): print('filter_nouns', time.strftime("%H:%M:%S")) unwanted_nouns = set() for noun in list(self.all_nouns): specific_weighted_frq =\ context_utils.calculate_word_weighted_frequency( noun, self.specific_reviews) generic_weighted_frq =\ context_utils.calculate_word_weighted_frequency( noun, self.generic_reviews) # print('specific_weighted_frq', specific_weighted_frq) # print('generic_weighted_frq', generic_weighted_frq) if generic_weighted_frq < self.alpha or specific_weighted_frq < self.alpha: self.all_nouns.remove(noun) unwanted_nouns.add(noun) continue ratio = specific_weighted_frq / generic_weighted_frq if ratio < self.beta: self.all_nouns.remove(noun) unwanted_nouns.add(noun) continue print('remove_nouns', time.strftime("%H:%M:%S")) context_utils.remove_nouns_from_reviews(self.reviews, unwanted_nouns) print('generating_all_senses', time.strftime("%H:%M:%S")) for review in self.reviews: context_utils.generate_senses(review) print('generating_specific_senses', time.strftime("%H:%M:%S")) for review in self.specific_reviews: context_utils.generate_senses(review) print('generating_generic_senses', time.strftime("%H:%M:%S")) for review in self.generic_reviews: context_utils.generate_senses(review)
def filter_nouns(self): print('filter_nouns', time.strftime("%H:%M:%S")) unwanted_nouns = set() context_nouns = [] num_nouns = len(self.all_nouns) print('num nouns %d' % len(self.all_nouns)) index = 0 for noun in list(self.all_nouns): index += 1 # print('processes nouns: %d/%d\r' % (index, num_nouns)), sys.stdout.write('\r' + str(index) + '/' + str(num_nouns)) sys.stdout.flush() # important weighted_frq =\ context_utils.calculate_word_weighted_frequency( noun, self.reviews) specific_weighted_frq =\ context_utils.calculate_word_weighted_frequency( noun, self.specific_reviews) generic_weighted_frq =\ context_utils.calculate_word_weighted_frequency( noun, self.generic_reviews) # print('specific_weighted_frq', specific_weighted_frq) # print('generic_weighted_frq', generic_weighted_frq) if weighted_frq < self.alpha: self.all_nouns.remove(noun) unwanted_nouns.add(noun) continue if specific_weighted_frq == 0: self.all_nouns.remove(noun) unwanted_nouns.add(noun) continue if generic_weighted_frq == 0: context_nouns.append(noun) continue ratio = specific_weighted_frq / generic_weighted_frq if ratio < self.beta: self.all_nouns.remove(noun) unwanted_nouns.add(noun) continue context_nouns.append(noun) print('') # print('context nouns', context_nouns) print('num context nouns', len(context_nouns)) print('remove_nouns', time.strftime("%H:%M:%S")) context_utils.remove_nouns_from_reviews(self.reviews, unwanted_nouns) print('generating_all_senses', time.strftime("%H:%M:%S")) for review in self.reviews: context_utils.generate_senses(review) print('generating_specific_senses', time.strftime("%H:%M:%S")) for review in self.specific_reviews: context_utils.generate_senses(review) print('generating_generic_senses', time.strftime("%H:%M:%S")) for review in self.generic_reviews: context_utils.generate_senses(review)