Пример #1
0
    def test_word_tokenize_mm(self):
        self.assertEqual(multi_cut.segment(None), [])
        self.assertEqual(multi_cut.segment(""), [])
        self.assertEqual(word_tokenize("", engine="mm"), [])
        self.assertEqual(
            word_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="mm"),
            ["ฉัน", "รัก", "ภาษาไทย", "เพราะ", "ฉัน", "เป็น", "คนไทย"],
        )

        self.assertIsNotNone(multi_cut.find_all_segment("รถไฟฟ้ากรุงเทพมหานครBTS"))
Пример #2
0
    def test_word_tokenize_mm(self):
        self.assertEqual(multi_cut.segment(None), [])
        self.assertEqual(multi_cut.segment(""), [])
        self.assertEqual(word_tokenize("", engine="mm"), [])
        self.assertEqual(
            word_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="mm"),
            ["ฉัน", "รัก", "ภาษาไทย", "เพราะ", "ฉัน", "เป็น", "คนไทย"],
        )

        self.assertIsNotNone(multi_cut.mmcut("ทดสอบ"))

        self.assertIsNotNone(multi_cut.find_all_segment("รถไฟฟ้ากรุงเทพมหานครBTS"))
        self.assertEqual(multi_cut.find_all_segment(None), [])
Пример #3
0
    def test_mm(self):
        self.assertEqual(multi_cut.segment(None), [])
        self.assertEqual(multi_cut.segment(""), [])
        self.assertIsNotNone(multi_cut.segment("ตัด", dict_trie([""])))

        self.assertEqual(word_tokenize("", engine="mm"), [])
        self.assertEqual(
            word_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="mm"),
            ["ฉัน", "รัก", "ภาษาไทย", "เพราะ", "ฉัน", "เป็น", "คนไทย"],
        )
        self.assertEqual(
            word_tokenize("19...", engine="mm"),
            ['19', '...'],
        )
        self.assertEqual(
            word_tokenize("19.", engine="mm"),
            ['19', '.'],
        )
        self.assertEqual(
            word_tokenize("19.84", engine="mm"),
            ['19.84'],
        )
        self.assertEqual(
            word_tokenize("127.0.0.1", engine="mm"),
            ["127.0.0.1"],
        )
        self.assertEqual(
            word_tokenize("USD1,984.42", engine="mm"),
            ['USD', '1,984.42'],
        )

        self.assertIsNotNone(multi_cut.mmcut("ทดสอบ"))

        self.assertIsNotNone(
            multi_cut.find_all_segment("รถไฟฟ้ากรุงเทพมหานครBTS")
        )
        self.assertEqual(multi_cut.find_all_segment(None), [])