Пример #1
0
 def test_parse_dictionary_path(self):
     # put path to dictionary and parse sentence.
     path_default_ipadic = '/usr/local/lib/mecab/dic/mecab-ipadic-neologd'
     if os.path.exists(path_default_ipadic):
         mecab_obj = MecabWrapper(dictType=None, path_dictionary=path_default_ipadic)
         assert mecab_obj._path_dictionary == path_default_ipadic
         parsed_obj = mecab_obj.tokenize(sentence=self.test_senetence, return_list=False)
         assert isinstance(parsed_obj, TokenizedSenetence)
Пример #2
0
    def test_init_userdict(self):
        """* Test case
        - すべての辞書を利用した場合の動作を確認する
        """
        mecab_obj = MecabWrapper(dictType='all', pathUserDictCsv=self.path_user_dict)
        assert isinstance(mecab_obj, MecabWrapper)

        res = mecab_obj.tokenize(sentence=self.test_senetence, return_list=True)
        assert isinstance(res, list)
        assert 'さくらまな' in res
Пример #3
0
    def test_parse_jumandic(self):
        mecab_obj = MecabWrapper(dictType='jumandic')
        assert isinstance(mecab_obj, MecabWrapper)

        parsed_obj = mecab_obj.tokenize(sentence=self.test_senetence, return_list=False)
        assert isinstance(parsed_obj, TokenizedSenetence)
        for tokenized_obj in parsed_obj.tokenized_objects:
            if tokenized_obj.word_stem == '女優':
                # ドメイン:文化・芸術 is special output only in Jumandic
                assert 'ドメイン:文化・芸術' in tokenized_obj.analyzed_line
Пример #4
0
 def test_init_userdict(self):
     # this test should be error response.
     mecab_obj = MecabWrapper(dictType='ipadic', pathUserDictCsv=self.path_user_dict)
     assert isinstance(mecab_obj, MecabWrapper)
     parsed_obj = mecab_obj.tokenize(sentence=self.test_senetence, return_list=False)
     assert isinstance(parsed_obj, TokenizedSenetence)
     is_ok = False
     for tokenized_obj in parsed_obj.tokenized_objects:
         if tokenized_obj.word_stem == 'さくらまな':
             is_ok = True
     assert is_ok
Пример #5
0
    def test_neologd_parse(self):
        # test using neologd dictionary
        mecab_obj = MecabWrapper(dictType='neologd')
        parsed_obj = mecab_obj.tokenize(sentence=self.test_senetence)
        self.assertTrue(parsed_obj, TokenizedSenetence)
        self.assertTrue(isinstance(parsed_obj.convert_list_object(), list))
        self.assertTrue(all(isinstance(mrph, str) for mrph in parsed_obj.convert_list_object()))

        parsed_obj = mecab_obj.tokenize(sentence=self.test_sentence2)
        self.assertTrue(parsed_obj, TokenizedSenetence)
        self.assertTrue(isinstance(parsed_obj.convert_list_object(), list))
        self.assertTrue(all(isinstance(mrph, str) for mrph in parsed_obj.convert_list_object()))
Пример #6
0
    def test_neologd_parse(self):
        """* Test case
        - neologd辞書で正しく分割できることを確認する
        """
        mecab_obj = MecabWrapper(dictType='neologd')
        parsed_obj = mecab_obj.tokenize(sentence=self.test_senetence)
        self.assertTrue(parsed_obj, TokenizedSenetence)
        self.assertTrue(isinstance(parsed_obj.convert_list_object(), list))
        self.assertTrue(all(isinstance(mrph, str) for mrph in parsed_obj.convert_list_object()))

        parsed_obj = mecab_obj.tokenize(sentence=self.test_sentence2)
        self.assertTrue(parsed_obj, TokenizedSenetence)
        self.assertTrue(isinstance(parsed_obj.convert_list_object(), list))
        self.assertTrue(all(isinstance(mrph, str) for mrph in parsed_obj.convert_list_object()))
 def test_init_userdict(self):
     # test when user dictionary is called
     mecab_obj = MecabWrapper(dictType='ipadic',
                              pathUserDictCsv=self.path_user_dict)
     assert isinstance(mecab_obj, MecabWrapper)
     parsed_obj = mecab_obj.tokenize(sentence=self.test_senetence,
                                     return_list=True)
     is_ok = False
     for morph in parsed_obj:
         if u'さくらまな' == morph:
             is_ok = True
     else:
         pass
     assert is_ok
Пример #8
0
    def test_default_parse(self):
        # test default status
        dictType = "ipadic"
        mecab_obj = MecabWrapper(dictType=dictType)
        assert isinstance(mecab_obj, MecabWrapper)
        
        parsed_obj = mecab_obj.tokenize(sentence=self.test_senetence, return_list=True)
        assert isinstance(parsed_obj, list)
        for morph in parsed_obj:
            assert isinstance(morph, str)

        parsed_obj = mecab_obj.tokenize(sentence=self.test_sentence2, return_list=True)
        assert isinstance(parsed_obj, list)
        for morph in parsed_obj:
            assert isinstance(morph, str)
 def test_default_parse(self):
     """* Test case
     - デフォルトの状態で動作を確認する
     """
     dictType = "ipadic"
     mecab_obj = MecabWrapper(dictType=dictType)
     assert isinstance(mecab_obj, MecabWrapper)
     parsed_obj = mecab_obj.tokenize(sentence=self.test_senetence,
                                     return_list=True)
     assert isinstance(parsed_obj, list)
     if python_version >= (3, 0, 0):
         for morph in parsed_obj:
             assert isinstance(morph, str)
     else:
         for morph in parsed_obj:
             assert isinstance(morph, string_types)
 def test_init_alldict(self):
     """* Test case
     - すべての辞書を利用した場合の動作を確認する
     """
     with self.assertRaises(Exception):
         mecab_obj = MecabWrapper(dictType='all',
                                  pathUserDictCsv=self.path_user_dict)
         assert isinstance(mecab_obj, MecabWrapper)
 def test_parse_jumandic(self):
     with self.assertRaises(Exception):
         mecab_obj = MecabWrapper(dictType='jumandic')
         assert isinstance(mecab_obj, MecabWrapper)