def test_remove_keywords_dictionary_compare(self): """For each of the test case initialize a new KeywordProcessor. Add the keywords the test case to KeywordProcessor. Remove the keywords in remove_keyword_dict Extract keywords and check if they match the expected result for the test case. """ for test_id, test_case in enumerate(self.test_cases): keyword_processor = KeywordProcessor() keyword_processor.add_keywords_from_dict(test_case['keyword_dict']) keyword_processor.remove_keywords_from_dict( test_case['remove_keyword_dict']) keyword_trie_dict = keyword_processor.keyword_trie_dict new_dictionary = defaultdict(list) for key, values in test_case['keyword_dict'].items(): for value in values: if not (key in test_case['remove_keyword_dict'] and value in test_case['remove_keyword_dict'][key]): new_dictionary[key].append(value) keyword_processor_two = KeywordProcessor() keyword_processor_two.add_keywords_from_dict(new_dictionary) keyword_trie_dict_two = keyword_processor_two.keyword_trie_dict self.assertTrue( keyword_trie_dict == keyword_trie_dict_two, "keywords_extracted don't match the expected results for test case: {}" .format(test_id))
def test_remove_keywords_dictionary_len(self): """For each of the test case initialize a new KeywordProcessor. Add the keywords the test case to KeywordProcessor. Remove the keywords in remove_keyword_dict Extract keywords and check if they match the expected result for the test case. """ for test_id, test_case in enumerate(self.test_cases): keyword_processor = KeywordProcessor() keyword_processor.add_keywords_from_dict(test_case['keyword_dict']) keyword_processor.remove_keywords_from_dict( test_case['remove_keyword_dict']) kp_len = len(keyword_processor) new_dictionary = defaultdict(list) for key, values in test_case['keyword_dict'].items(): for value in values: if not (key in test_case['remove_keyword_dict'] and value in test_case['remove_keyword_dict'][key]): new_dictionary[key].append(value) keyword_processor_two = KeywordProcessor() keyword_processor_two.add_keywords_from_dict(new_dictionary) kp_len_two = len(keyword_processor_two) self.assertEqual( kp_len, kp_len_two, "keyword processor length doesn't match for Text ID {}".format( test_id))
def test_remove_keywords_len(self): """For each of the test case initialize a new KeywordProcessor. Add the keywords the test case to KeywordProcessor. Remove the keywords in remove_keyword_dict Extract keywords and check if they match the expected result for the test case. """ for test_id, test_case in enumerate(self.test_cases): keyword_processor = KeywordProcessor() keyword_processor.add_keywords_from_dict(test_case['keyword_dict']) # check length kp_len = len(keyword_processor) kp_len_expected = sum([ len(values) for key, values in test_case['keyword_dict'].items() ]) self.assertEqual( kp_len, kp_len_expected, "keyword processor length doesn't match".format(test_id)) keyword_processor.remove_keywords_from_dict( test_case['remove_keyword_dict']) # check length kp_len = len(keyword_processor) kp_len_decreased = sum([ len(values) for key, values in test_case['remove_keyword_dict'].items() ]) self.assertEqual( kp_len, kp_len_expected - kp_len_decreased, "keyword processor length doesn't match for Text ID {}".format( test_id))
def test_remove_keyword_from_dictionary(self): keyword_processor = KeywordProcessor() keyword_dict = { "java": "java_2e", "product management": "product manager" } with pytest.raises(AttributeError): keyword_processor.remove_keywords_from_dict(keyword_dict)
def test_remove_keywords(self): """For each of the test case initialize a new KeywordProcessor. Add the keywords the test case to KeywordProcessor. Remove the keywords in remove_keyword_dict Extract keywords and check if they match the expected result for the test case. """ for test_id, test_case in enumerate(self.test_cases): keyword_processor = KeywordProcessor() keyword_processor.add_keywords_from_dict(test_case['keyword_dict']) keyword_processor.remove_keywords_from_dict(test_case['remove_keyword_dict']) keywords_extracted = keyword_processor.extract_keywords(test_case['sentence']) self.assertEqual(keywords_extracted, test_case['keywords'], "keywords_extracted don't match the expected results for test case: {}".format(test_id))
"java": ["java_2e", "java programing"], "product management": ["PM", "product manager"] } keyword_processor.add_keywords_from_dict(keyword_dict) print( keyword_processor.extract_keywords( 'I am a product manager for a java_2e platform')) # output ['product management', 'java'] keyword_processor.remove_keyword('java_2e') print( keyword_processor.extract_keywords( 'I am a product manager for a java_2e platform')) # ['product management'] # you can also remove keywords from a list/ dictionary keyword_processor.remove_keywords_from_dict({"product management": ["PM"]}) keyword_processor.remove_keywords_from_list(["java programing"]) keyword_processor.extract_keywords( 'I am a product manager for a java_2e platform') # output ['product management'] # 查询添加关键词的个数 keyword_processor = KeywordProcessor() # 字典格式的关键词,其对应的key为最终匹配出的词,但key不记入关键词搜索的范围 keyword_dict = { "java": ["java_2e", "java programing"], "product management": ["PM", "product manager"] } keyword_processor.add_keywords_from_dict(keyword_dict) print(len(keyword_processor)) # output 4
# 删除关键词 keyword_processor = KeywordProcessor() keyword_dict = { "java": ["java_2e", "java programing"], "product management": ["PM", "product manager"] } keyword_processor.add_keywords_from_dict(keyword_dict) print(keyword_processor.extract_keywords('I am a product manager for a java_2e platform')) # output ['product management', 'java'] keyword_processor.remove_keyword('java_2e') print(keyword_processor.extract_keywords('I am a product manager for a java_2e platform')) # ['product management'] # you can also remove keywords from a list/ dictionary keyword_processor.remove_keywords_from_dict({"product management": ["PM"]}) keyword_processor.remove_keywords_from_list(["java programing"]) keyword_processor.extract_keywords('I am a product manager for a java_2e platform') # output ['product management'] # 查询添加关键词的个数 keyword_processor = KeywordProcessor() # 字典格式的关键词,其对应的key为最终匹配出的词,但key不记入关键词搜索的范围 keyword_dict = { "java": ["java_2e", "java programing"], "product management": ["PM", "product manager"] } keyword_processor.add_keywords_from_dict(keyword_dict) print(len(keyword_processor)) # output 4
def remove(self): keyword_processor = KeywordProcessor() keyword_processor.add_keywords_from_dict(self.keyword_dict) keyword_processor.remove_keywords_from_dict(self.remove_keyword) extractedKeyword = keyword_processor.extract_keywords(self.text) return extractedKeyword