示例#1
0
    def test_parse_args(self):
        '''Test invocation of parse with bad arguments.'''
        # None text
        with mecab.MeCab() as nm:
            with self.assertRaises(api.MeCabError):
                nm.parse(None)

        # text must be str
        with mecab.MeCab() as nm:
            with self.assertRaises(api.MeCabError):
                nm.parse(99)

        # boundary_constraints must be re or str
        with mecab.MeCab() as nm:
            with self.assertRaises(api.MeCabError):
                nm.parse('foo', boundary_constraints=99.99)

        # feature_constraints must be tuple
        with mecab.MeCab() as nm:
            with self.assertRaises(api.MeCabError):
                nm.parse('foo', feature_constraints=[])

        # -p / --partial, text must end with \n
        with mecab.MeCab('--partial') as nm:
            with self.assertRaises(api.MeCabError):
                nm.parse('foo')
示例#2
0
    def test_parse_tostr_default(self):
        '''Test simple default parsing.'''
        with mecab.MeCab() as nm:
            expected = nm.parse(self.text).strip()
            expected = expected.replace('\n', os.linesep)  # ???

            actual = self._2bytes(self._mecab_parse(''))

            self.assertEqual(expected, actual)
示例#3
0
 def test_sysdic(self):
     '''Test dictionary interface on system dictionary.'''
     with mecab.MeCab() as nm:
         sysdic = nm.dicts[0]
         cs = sysdic.charset.lower()
         self.assertIn(cs, self.CHARSETS)
         self.assertIsNotNone(re.search('sys.dic$', sysdic.filepath))
         self.assertEqual(sysdic.type, 0)
         self.assertEqual(sysdic.version, 102)
示例#4
0
    def test_parse_tostr_partial(self):
        '''Test -p / --partial parsing to string.'''
        with mecab.MeCab('-p') as nm:
            yml = self.yaml.get('text10')
            txt = self._u2str(yml.get('text'))
            actual = nm.parse(txt).split('\n')
            expected = [self._u2str(e) for e in yml.get('expected').get('str').split(',')]

            for i in range(len(actual)):
                self.assertTrue(actual[i].startswith(expected[i]))
示例#5
0
    def test_parse_mecab_options_marginal(self):
        '''Test option-parsing: marginal.'''
        with mecab.MeCab() as nm:
            dopts = nm._MeCab__parse_mecab_options('-m')
            self.assertDictEqual(dopts, {'marginal': True})

            dopts = nm._MeCab__parse_mecab_options('--marginal')
            self.assertDictEqual(dopts, {'marginal': True})

            dopts = nm._MeCab__parse_mecab_options({'marginal': True})
            self.assertDictEqual(dopts, {'marginal': True})
示例#6
0
    def test_parse_mecab_options_allocatesentence(self):
        '''Test option-parsing: allocation-sentence.'''
        with mecab.MeCab() as nm:
            dopts = nm._MeCab__parse_mecab_options('-C')
            self.assertDictEqual(dopts, {'allocate_sentence': True})

            dopts = nm._MeCab__parse_mecab_options('--allocate-sentence')
            self.assertDictEqual(dopts, {'allocate_sentence': True})

            dopts = nm._MeCab__parse_mecab_options({'allocate_sentence': True})
            self.assertDictEqual(dopts, {'allocate_sentence': True})
示例#7
0
    def test_parse_mecab_options_allmorphs(self):
        '''Test option-parsing: all-morphs.'''
        with mecab.MeCab() as nm:
            dopts = nm._MeCab__parse_mecab_options('-a')
            self.assertDictEqual(dopts, {'all_morphs': True})

            dopts = nm._MeCab__parse_mecab_options('--all-morphs')
            self.assertDictEqual(dopts, {'all_morphs': True})

            dopts = nm._MeCab__parse_mecab_options({'all_morphs': True})
            self.assertDictEqual(dopts, {'all_morphs': True})
示例#8
0
    def test_parse_unicodeRstr(self):
        '''Test parse: unicode input (Python 2) and bytes input (Python 3).'''
        s = '日本語だよ、これが。'
        with mecab.MeCab() as nm:
            if sys.version < '3':
                b = s.decode('utf-8')
            else:
                b = s.encode('utf-8')

            with self.assertRaises(api.MeCabError):
                nm.parse(b)
示例#9
0
    def test_parse_tostr_feature(self):
        '''Test feature constraint parsing to string (output format does NOT apply).'''
        with mecab.MeCab(r'-F%m,%f[0],%s\n') as nm:
            yml = self.yaml.get('text11')
            txt = self._u2str(yml.get('text'))
            feat = (tuple(self._u2str(yml.get('feature')).split(',')), )
            expected = [self._u2str(e) for e in yml.get('expected')]

            actual = nm.parse(txt, feature_constraints=feat).split('\n')

            for i in range(len(actual)):
                self.assertEqual(actual[i], expected[i])
示例#10
0
    def test_parse_override_node_format(self):
        '''Test node-format override when default is defined in rcfile'''
        with mecab.MeCab(r'-r {} -O "" -F%m!\n'.format(self.testrc)) as nm:
            expected = nm.parse(self.text, as_nodes=True)
            expected = [e.feature for e in expected if e.stat == 0]

            argf = ['-r', self.testrc, '-O', '', '-F%m!\\n']
            actual = self._2bytes(self._mecab_parse(argf))
            actual = [e for e in actual.split() if not e.startswith('EOS')]

            for i, e in enumerate(actual):
                self.assertEqual(e, expected[i])
示例#11
0
 def test_parse_tonode_outputformat_errors(self):
     '''Test node parsing with output formatting errors:
        1. unknown node has no pronunciation value 
        2. format missing leading [
        3. format missing ending ]
     '''
     s = '私はブルザエモンです。'
     formats = ['-F%f[8]', '-F%f1]', '-F%f[1']
     for argf in formats:
         with mecab.MeCab(argf) as nm:
             with self.assertRaises(api.MeCabError):
                 list(nm.parse('私はブルザエモンです。', as_nodes=True))
示例#12
0
    def test_parse_tostr(self):
        '''Test default parsing, across different output formats.'''
        formats = [
            '', '-Owakati', '-Oyomi', '-Ochasen2', '-N2', r'-F%m\t%h\t%f[0]\n'
        ]
        for argf in formats:
            with mecab.MeCab(argf) as nm:
                expected = nm.parse(self.text)
                expected = expected.replace('\n', os.linesep)

                actual = self._2bytes(self._mecab_parse(argf))

                self.assertEqual(expected, actual)
示例#13
0
    def test_init_libunset(self):
        '''Test for load error when MeCab lib is not found.'''
        try:
            orig_env = os.getenv(mecab.MeCab.MECAB_PATH)
            os.environ[mecab.MeCab.MECAB_PATH] = '/foo/bar'

            with self.assertRaises(api.MeCabError) as cm:
                with mecab.MeCab():
                    self.assertIsNotNone(
                        re.search('cannot load library /foo/bar',
                                  str(cm.exception)))
        finally:
            os.environ[mecab.MeCab.MECAB_PATH] = orig_env
示例#14
0
    def test_parse_mecab_options_userdic(self):
        '''Test option-parsing: userdic.'''
        with mecab.MeCab() as nm:
            dopts = nm._MeCab__parse_mecab_options('-u/baz/qux.dic')
            self.assertDictEqual(dopts, {'userdic': '/baz/qux.dic'})

            dopts = nm._MeCab__parse_mecab_options('-u /baz/qux.dic')
            self.assertDictEqual(dopts, {'userdic': '/baz/qux.dic'})

            dopts = nm._MeCab__parse_mecab_options('--userdic=/baz/qux.dic')
            self.assertDictEqual(dopts, {'userdic': '/baz/qux.dic'})

            dopts = nm._MeCab__parse_mecab_options({'userdic': '/baz/qux.dic'})
            self.assertDictEqual(dopts, {'userdic': '/baz/qux.dic'})
示例#15
0
    def test_parse_mecab_options_unkfeature(self):
        '''Test option-parsing: unk-feature.'''
        with mecab.MeCab() as nm:
            dopts = nm._MeCab__parse_mecab_options('-x!!!\\n')
            self.assertDictEqual(dopts, {'unk_feature': '!!!\\n'})

            dopts = nm._MeCab__parse_mecab_options('-x !!!\\n')
            self.assertDictEqual(dopts, {'unk_feature': '!!!\\n'})

            dopts = nm._MeCab__parse_mecab_options('--unk-feature=!!!\\n')
            self.assertDictEqual(dopts, {'unk_feature': '!!!\\n'})

            dopts = nm._MeCab__parse_mecab_options({'unk_feature': '!!!\\n'})
            self.assertDictEqual(dopts, {'unk_feature': '!!!\\n'})
示例#16
0
    def test_parse_mecab_options_eonformat(self):
        '''Test option-parsing: eon-format.'''
        with mecab.MeCab() as nm:
            dopts = nm._MeCab__parse_mecab_options('-S___\\n')
            self.assertDictEqual(dopts, {'eon_format': '___\\n'})

            dopts = nm._MeCab__parse_mecab_options('-S ___\\n')
            self.assertDictEqual(dopts, {'eon_format': '___\\n'})

            dopts = nm._MeCab__parse_mecab_options('--eon-format=___\\n')
            self.assertDictEqual(dopts, {'eon_format': '___\\n'})

            dopts = nm._MeCab__parse_mecab_options({'eon_format': '___\\n'})
            self.assertDictEqual(dopts, {'eon_format': '___\\n'})
示例#17
0
    def test_parse_mecab_options_bosformat(self):
        '''Test option-parsing: bos-format.'''
        with mecab.MeCab() as nm:
            dopts = nm._MeCab__parse_mecab_options('-B>>>\\n')
            self.assertDictEqual(dopts, {'bos_format': '>>>\\n'})

            dopts = nm._MeCab__parse_mecab_options('-B >>>\\n')
            self.assertDictEqual(dopts, {'bos_format': '>>>\\n'})

            dopts = nm._MeCab__parse_mecab_options('--bos-format=>>>\\n')
            self.assertDictEqual(dopts, {'bos_format': '>>>\\n'})

            dopts = nm._MeCab__parse_mecab_options({'bos_format': '>>>\\n'})
            self.assertDictEqual(dopts, {'bos_format': '>>>\\n'})
示例#18
0
    def test_parse_mecab_options_unkformat(self):
        '''Test option-parsing: unk-format.'''
        with mecab.MeCab() as nm:
            dopts = nm._MeCab__parse_mecab_options('-U???\\n')
            self.assertDictEqual(dopts, {'unk_format': '???\\n'})

            dopts = nm._MeCab__parse_mecab_options('-U ???\\n')
            self.assertDictEqual(dopts, {'unk_format': '???\\n'})

            dopts = nm._MeCab__parse_mecab_options('--unk-format=???\\n')
            self.assertDictEqual(dopts, {'unk_format': '???\\n'})

            dopts = nm._MeCab__parse_mecab_options({'unk_format': '???\\n'})
            self.assertDictEqual(dopts, {'unk_format': '???\\n'})
示例#19
0
    def test_parse_mecab_options_nodeformat(self):
        '''Test option-parsing: node-format.'''
        with mecab.MeCab() as nm:
            dopts = nm._MeCab__parse_mecab_options('-F%m\\n')
            self.assertDictEqual(dopts, {'node_format': '%m\\n'})

            dopts = nm._MeCab__parse_mecab_options('-F %m\\n')
            self.assertDictEqual(dopts, {'node_format': '%m\\n'})

            dopts = nm._MeCab__parse_mecab_options('--node-format=%m\\n')
            self.assertDictEqual(dopts, {'node_format': '%m\\n'})

            dopts = nm._MeCab__parse_mecab_options({'node_format': '%m\\n'})
            self.assertDictEqual(dopts, {'node_format': '%m\\n'})
示例#20
0
    def test_parse_mecab_options_dicdir(self):
        '''Test option-parsing: dicdir.'''
        with mecab.MeCab() as nm:
            dopts = nm._MeCab__parse_mecab_options('-d/foo/bar')
            self.assertDictEqual(dopts, {'dicdir': '/foo/bar'})

            dopts = nm._MeCab__parse_mecab_options('-d /foo/bar')
            self.assertDictEqual(dopts, {'dicdir': '/foo/bar'})

            dopts = nm._MeCab__parse_mecab_options('--dicdir=/foo/bar')
            self.assertDictEqual(dopts, {'dicdir': '/foo/bar'})

            dopts = nm._MeCab__parse_mecab_options({'dicdir': '/foo/bar'})
            self.assertDictEqual(dopts, {'dicdir': '/foo/bar'})
示例#21
0
    def test_parse_tonode_default(self):
        '''Test node parsing, skipping over any BOS or EOS nodes.'''
        formats = ['', '-N2']
        for argf in formats:
            with mecab.MeCab(argf) as nm:
                expected = nm.parse(self.text, as_nodes=True)
                expected = [e for e in expected if e.stat == 0]

                actual = self._2bytes(self._mecab_parse(argf))
                actual = [e for e in actual.split(os.linesep) if e != 'EOS']

                for i in range(len(actual)):
                    s, f = actual[i].split()
                    self.assertEqual(expected[i].surface, s)
                    self.assertEqual(expected[i].feature, f)
示例#22
0
    def test_parse_mecab_options_outputformattype(self):
        '''Test option-parsing: output-format-type.'''
        with mecab.MeCab() as nm:
            dopts = nm._MeCab__parse_mecab_options('-Owakati')
            self.assertDictEqual(dopts, {'output_format_type': 'wakati'})

            dopts = nm._MeCab__parse_mecab_options('-O wakati')
            self.assertDictEqual(dopts, {'output_format_type': 'wakati'})

            dopts = nm._MeCab__parse_mecab_options(
                '--output-format-type=wakati')
            self.assertDictEqual(dopts, {'output_format_type': 'wakati'})

            dopts = nm._MeCab__parse_mecab_options(
                {'output_format_type': 'wakati'})
            self.assertDictEqual(dopts, {'output_format_type': 'wakati'})
示例#23
0
    def test_parse_mecab_options_theta(self):
        '''Test option-parsing: theta.'''
        with mecab.MeCab() as nm:
            dopts = nm._MeCab__parse_mecab_options('-t0.777')
            self.assertDictEqual(dopts, {'theta': 0.777})

            dopts = nm._MeCab__parse_mecab_options('-t 0.777')
            self.assertDictEqual(dopts, {'theta': 0.777})

            dopts = nm._MeCab__parse_mecab_options('--theta=0.777')
            self.assertDictEqual(dopts, {'theta': 0.777})

            dopts = nm._MeCab__parse_mecab_options({'theta': 0.777})
            self.assertDictEqual(dopts, {'theta': 0.777})

            # ValueError and message on stderr if theta is not a float
            with self.assertRaises(ValueError) as ctx:
                nm._MeCab__parse_mecab_options('--theta=XXX')
            self.assertIsNotNone(re.search('--theta', str(ctx.exception)))
示例#24
0
    def test_parse_mecab_options_nbest(self):
        '''Test option-parsing: nbest.'''
        with mecab.MeCab() as nm:
            dopts = nm._MeCab__parse_mecab_options('-N2')
            self.assertDictEqual(dopts, {'nbest': 2})

            dopts = nm._MeCab__parse_mecab_options('-N 2')
            self.assertDictEqual(dopts, {'nbest': 2})

            dopts = nm._MeCab__parse_mecab_options('--nbest=2')
            self.assertDictEqual(dopts, {'nbest': 2})

            dopts = nm._MeCab__parse_mecab_options({'nbest': 2})
            self.assertDictEqual(dopts, {'nbest': 2})

            # ValueError with message if nbest is not an int
            with self.assertRaises(ValueError) as ctx:
                nm._MeCab__parse_mecab_options('-N0.99')
            self.assertIsNotNone(re.search('--nbest', str(ctx.exception)))
示例#25
0
    def test_parse_mecab_options_costfactor(self):
        '''Test option-parsing: cost-factor.'''
        with mecab.MeCab() as nm:
            dopts = nm._MeCab__parse_mecab_options('-c666')
            self.assertDictEqual(dopts, {'cost_factor': 666})

            dopts = nm._MeCab__parse_mecab_options('-c 666')
            self.assertDictEqual(dopts, {'cost_factor': 666})

            dopts = nm._MeCab__parse_mecab_options('--cost-factor=666')
            self.assertDictEqual(dopts, {'cost_factor': 666})

            dopts = nm._MeCab__parse_mecab_options({'cost_factor': 666})
            self.assertDictEqual(dopts, {'cost_factor': 666})

            # ValueError with message if cost_factor is not an int
            with self.assertRaises(ValueError) as ctx:
                nm._MeCab__parse_mecab_options('-c0.99')
            self.assertIsNotNone(re.search('--cost-factor',
                                           str(ctx.exception)))
示例#26
0
    def test_parse_mecab_options_maxgroupingsize(self):
        '''Test option-parsing: max-grouping-size.'''
        with mecab.MeCab() as nm:
            dopts = nm._MeCab__parse_mecab_options('-M99')
            self.assertDictEqual(dopts, {'max_grouping_size': 99})

            dopts = nm._MeCab__parse_mecab_options('-M 99')
            self.assertDictEqual(dopts, {'max_grouping_size': 99})

            dopts = nm._MeCab__parse_mecab_options('--max-grouping-size=99')
            self.assertDictEqual(dopts, {'max_grouping_size': 99})

            dopts = nm._MeCab__parse_mecab_options({'max_grouping_size': 99})
            self.assertDictEqual(dopts, {'max_grouping_size': 99})

            # ValueError with message if max_grouping_size is not an int
            with self.assertRaises(ValueError) as ctx:
                nm._MeCab__parse_mecab_options('-M0.99')
            self.assertIsNotNone(
                re.search('--max-grouping-size', str(ctx.exception)))
示例#27
0
    def test_parse_mecab_options_inputbuffersize(self):
        '''Test option-parsing: input-buffer-size.'''
        with mecab.MeCab() as nm:
            dopts = nm._MeCab__parse_mecab_options('-b8888')
            self.assertDictEqual(dopts, {'input_buffer_size': 8888})

            dopts = nm._MeCab__parse_mecab_options('-b 8888')
            self.assertDictEqual(dopts, {'input_buffer_size': 8888})

            dopts = nm._MeCab__parse_mecab_options('--input-buffer-size=8888')
            self.assertDictEqual(dopts, {'input_buffer_size': 8888})

            dopts = nm._MeCab__parse_mecab_options({'input_buffer_size': 8888})
            self.assertDictEqual(dopts, {'input_buffer_size': 8888})

            # ValueError with message if input_buffer_size is not an int
            with self.assertRaises(ValueError) as ctx:
                nm._MeCab__parse_mecab_options('-b0.99')
            self.assertIsNotNone(
                re.search('--input-buffer-size', str(ctx.exception)))
示例#28
0
    def test_build_options_str(self):
        '''Test option-building logic.'''
        with mecab.MeCab() as nm:
            opts = nm._MeCab__build_options_str({
                'dicdir': '/foo',
                'userdic': '/bar',
                'lattice_level': 444,
                'output_format_type': 'yomi',
                'all_morphs': True,
                'nbest': 555,
                'partial': True,
                'marginal': True,
                'max_grouping_size': 666,
                'node_format': 'node\\n',
                'unk_format': 'unk\\n',
                'bos_format': 'bos\\n',
                'eos_format': 'eos\\n',
                'eon_format': 'eon\\n',
                'unk_feature': 'unkf\\n',
                'input_buffer_size': 777,
                'allocate_sentence': True,
                'theta': 0.999,
                'cost_factor': 888,
                'unknown': 1000
            })
            expected = self._2bytes(opts)

            actual = [
                '--dicdir=/foo', '--userdic=/bar', '--lattice-level=444',
                '--output-format-type=yomi', '--all-morphs', '--nbest=555',
                '--partial', '--marginal', '--max-grouping-size=666',
                '--node-format=node\\\\n', '--unk-format=unk\\\\n',
                '--bos-format=bos\\\\n', '--eos-format=eos\\\\n',
                '--eon-format=eon\\\\n', '--unk-feature=unkf\\\\n',
                '--input-buffer-size=777', '--allocate-sentence',
                '--theta=0.999', '--cost-factor=888'
            ]
            for option in actual:
                self.assertIsNotNone(re.search(option, expected))
            self.assertIsNone(re.search('--unknown', expected))
示例#29
0
    def test_parse_mecab_options_latticelevel(self):
        '''Test option-parsing: lattice-level warning.'''
        with mecab.MeCab() as nm:
            # setting lattice-level issues warning on stderr
            orig_err = sys.stderr
            try:
                opts = [
                    '-l777', '-l 777', '--lattice-level=777', {
                        'lattice_level': 777
                    }
                ]

                for o in opts:
                    tmp_err = StringIO()
                    sys.stderr = tmp_err

                    dopts = nm._MeCab__parse_mecab_options(o)
                    self.assertDictEqual(dopts, {'lattice_level': 777})

                    res = re.search(nm._WARN_LATTICE_LEVEL,
                                    tmp_err.getvalue().strip())
                    self.assertIsNotNone(res)
            finally:
                sys.stderr = orig_err
示例#30
0
 def test_init_unknownoption(self):
     '''Test instantiation of MeCab with unrecognized option.'''
     with self.assertRaises(api.MeCabError) as ctx:
         with mecab.MeCab('--unknown'):
             self.assertIsNotNone(re.search('--unknown',
                                            str(ctx.exception)))