def test_remove_keywords_dictionary_compare(self):
        """For each of the test case initialize a new KeywordProcessor.
        Add the keywords the test case to KeywordProcessor.
        Remove the keywords in remove_keyword_dict
        Extract keywords and check if they match the expected result for the test case.
        """
        for test_id, test_case in enumerate(self.test_cases):
            keyword_processor = KeywordProcessor()
            keyword_processor.add_keywords_from_dict(test_case['keyword_dict'])
            keyword_processor.remove_keywords_from_dict(
                test_case['remove_keyword_dict'])
            keyword_trie_dict = keyword_processor.keyword_trie_dict

            new_dictionary = defaultdict(list)
            for key, values in test_case['keyword_dict'].items():
                for value in values:
                    if not (key in test_case['remove_keyword_dict'] and value
                            in test_case['remove_keyword_dict'][key]):
                        new_dictionary[key].append(value)

            keyword_processor_two = KeywordProcessor()
            keyword_processor_two.add_keywords_from_dict(new_dictionary)
            keyword_trie_dict_two = keyword_processor_two.keyword_trie_dict
            self.assertTrue(
                keyword_trie_dict == keyword_trie_dict_two,
                "keywords_extracted don't match the expected results for test case: {}"
                .format(test_id))
Exemplo n.º 2
0
    def test_remove_keywords_dictionary_len(self):
        """For each of the test case initialize a new KeywordProcessor.
        Add the keywords the test case to KeywordProcessor.
        Remove the keywords in remove_keyword_dict
        Extract keywords and check if they match the expected result for the test case.
        """
        for test_id, test_case in enumerate(self.test_cases):
            keyword_processor = KeywordProcessor()
            keyword_processor.add_keywords_from_dict(test_case['keyword_dict'])
            keyword_processor.remove_keywords_from_dict(
                test_case['remove_keyword_dict'])

            kp_len = len(keyword_processor)

            new_dictionary = defaultdict(list)
            for key, values in test_case['keyword_dict'].items():
                for value in values:
                    if not (key in test_case['remove_keyword_dict'] and value
                            in test_case['remove_keyword_dict'][key]):
                        new_dictionary[key].append(value)

            keyword_processor_two = KeywordProcessor()
            keyword_processor_two.add_keywords_from_dict(new_dictionary)
            kp_len_two = len(keyword_processor_two)
            self.assertEqual(
                kp_len, kp_len_two,
                "keyword processor length doesn't match for Text ID {}".format(
                    test_id))
Exemplo n.º 3
0
 def test_remove_keywords_len(self):
     """For each of the test case initialize a new KeywordProcessor.
     Add the keywords the test case to KeywordProcessor.
     Remove the keywords in remove_keyword_dict
     Extract keywords and check if they match the expected result for the test case.
     """
     for test_id, test_case in enumerate(self.test_cases):
         keyword_processor = KeywordProcessor()
         keyword_processor.add_keywords_from_dict(test_case['keyword_dict'])
         # check length
         kp_len = len(keyword_processor)
         kp_len_expected = sum([
             len(values)
             for key, values in test_case['keyword_dict'].items()
         ])
         self.assertEqual(
             kp_len, kp_len_expected,
             "keyword processor length doesn't match".format(test_id))
         keyword_processor.remove_keywords_from_dict(
             test_case['remove_keyword_dict'])
         # check length
         kp_len = len(keyword_processor)
         kp_len_decreased = sum([
             len(values)
             for key, values in test_case['remove_keyword_dict'].items()
         ])
         self.assertEqual(
             kp_len, kp_len_expected - kp_len_decreased,
             "keyword processor length doesn't match for Text ID {}".format(
                 test_id))
Exemplo n.º 4
0
 def test_remove_keyword_from_dictionary(self):
     keyword_processor = KeywordProcessor()
     keyword_dict = {
         "java": "java_2e",
         "product management": "product manager"
     }
     with pytest.raises(AttributeError):
         keyword_processor.remove_keywords_from_dict(keyword_dict)
Exemplo n.º 5
0
 def test_remove_keywords(self):
     """For each of the test case initialize a new KeywordProcessor.
     Add the keywords the test case to KeywordProcessor.
     Remove the keywords in remove_keyword_dict
     Extract keywords and check if they match the expected result for the test case.
     """
     for test_id, test_case in enumerate(self.test_cases):
         keyword_processor = KeywordProcessor()
         keyword_processor.add_keywords_from_dict(test_case['keyword_dict'])
         keyword_processor.remove_keywords_from_dict(test_case['remove_keyword_dict'])
         keywords_extracted = keyword_processor.extract_keywords(test_case['sentence'])
         self.assertEqual(keywords_extracted, test_case['keywords'],
                          "keywords_extracted don't match the expected results for test case: {}".format(test_id))
    "java": ["java_2e", "java programing"],
    "product management": ["PM", "product manager"]
}
keyword_processor.add_keywords_from_dict(keyword_dict)
print(
    keyword_processor.extract_keywords(
        'I am a product manager for a java_2e platform'))
# output ['product management', 'java']
keyword_processor.remove_keyword('java_2e')
print(
    keyword_processor.extract_keywords(
        'I am a product manager for a java_2e platform'))
# ['product management']

# you can also remove keywords from a list/ dictionary
keyword_processor.remove_keywords_from_dict({"product management": ["PM"]})
keyword_processor.remove_keywords_from_list(["java programing"])
keyword_processor.extract_keywords(
    'I am a product manager for a java_2e platform')
# output ['product management']

# 查询添加关键词的个数
keyword_processor = KeywordProcessor()
# 字典格式的关键词,其对应的key为最终匹配出的词,但key不记入关键词搜索的范围
keyword_dict = {
    "java": ["java_2e", "java programing"],
    "product management": ["PM", "product manager"]
}
keyword_processor.add_keywords_from_dict(keyword_dict)
print(len(keyword_processor))
# output 4
# 删除关键词
keyword_processor = KeywordProcessor()
keyword_dict = {
    "java": ["java_2e", "java programing"],
    "product management": ["PM", "product manager"]
}
keyword_processor.add_keywords_from_dict(keyword_dict)
print(keyword_processor.extract_keywords('I am a product manager for a java_2e platform'))
# output ['product management', 'java']
keyword_processor.remove_keyword('java_2e')
print(keyword_processor.extract_keywords('I am a product manager for a java_2e platform'))
# ['product management']

# you can also remove keywords from a list/ dictionary
keyword_processor.remove_keywords_from_dict({"product management": ["PM"]})
keyword_processor.remove_keywords_from_list(["java programing"])
keyword_processor.extract_keywords('I am a product manager for a java_2e platform')
# output ['product management']

# 查询添加关键词的个数
keyword_processor = KeywordProcessor()
# 字典格式的关键词,其对应的key为最终匹配出的词,但key不记入关键词搜索的范围
keyword_dict = {
    "java": ["java_2e", "java programing"],
    "product management": ["PM", "product manager"]
}
keyword_processor.add_keywords_from_dict(keyword_dict)
print(len(keyword_processor))
# output 4
Exemplo n.º 8
0
 def remove(self):
     keyword_processor = KeywordProcessor()
     keyword_processor.add_keywords_from_dict(self.keyword_dict)
     keyword_processor.remove_keywords_from_dict(self.remove_keyword)
     extractedKeyword = keyword_processor.extract_keywords(self.text)
     return extractedKeyword