示例#1
0
class TestStemmer(unittest.TestCase):
    def setUp(self):
        self.stemmer = PydicStemmer()
        self.dictionary = PyDic('dict1.txt')
        self.index = self.stemmer.build_index(self.dictionary)

    def test_build_index(self):
        self.assertTrue(u'tok' in self.index)
        self.assertTrue(u'seip' in self.index)
        self.assertTrue(u'ywoteloifartlu' in self.index)

    def test_find_base_word(self):
        self.assertEqual(self.stemmer.find_base_word(self.index, u'mełkot'),
                         (5, u'bełkot'))

        self.assertEqual(self.stemmer.find_base_word(self.index, u'sadfasdf'),
                         None)

    def test_inflect(self):
        self.assertEqual(
            self.stemmer.inflect(
                [u'bełkot', u'bełkotu', u'bełkotem', u'bełkotowi', ],
                u'rwałkot'),
            [u'rwałkot', u'rwałkotu', u'rwałkotem', u'rwałkotowi', ]
        )

    def test_process(self):
        self.assertSequenceEqual(
            self.stemmer.process(self.dictionary, self.index, u'rwałkot'),
            [
                u"rwałkot", u"rwałkotu", u"rwałkotowi", u"rwałkotem",
                u"rwałkocie", u"rwałkoty", u"rwałkotów",
                u"rwałkotom", u"rwałkotami", u"rwałkotach",
            ]
        )
示例#2
0
class TestStemmer(unittest.TestCase):
    def setUp(self):
        self.stemmer = PydicStemmer()
        self.dictionary = PyDic('dict1.txt')
        self.index = self.stemmer.build_index(self.dictionary)

    def test_build_index(self):
        self.assertTrue(u'tok' in self.index)
        self.assertTrue(u'seip' in self.index)
        self.assertTrue(u'ywoteloifartlu' in self.index)

    def test_find_base_word(self):
        self.assertEqual(self.stemmer.find_base_word(self.index, u'mełkot'),
                         (5, u'bełkot'))

        self.assertEqual(self.stemmer.find_base_word(self.index, u'sadfasdf'),
                         None)

    def test_inflect(self):
        self.assertEqual(
            self.stemmer.inflect([
                u'bełkot',
                u'bełkotu',
                u'bełkotem',
                u'bełkotowi',
            ], u'rwałkot'), [
                u'rwałkot',
                u'rwałkotu',
                u'rwałkotem',
                u'rwałkotowi',
            ])

    def test_process(self):
        self.assertSequenceEqual(
            self.stemmer.process(self.dictionary, self.index, u'rwałkot'), [
                u"rwałkot",
                u"rwałkotu",
                u"rwałkotowi",
                u"rwałkotem",
                u"rwałkocie",
                u"rwałkoty",
                u"rwałkotów",
                u"rwałkotom",
                u"rwałkotami",
                u"rwałkotach",
            ])