示例#1
0
 def test_normalize_for_romanian(self):
     self.assertEqual(NGram.normalize(six.u('\u015f')), six.u('\u015f'))
     self.assertEqual(NGram.normalize(six.u('\u0163')), six.u('\u0163'))
     self.assertEqual(NGram.normalize(six.u('\u0219')), six.u('\u015f'))
     self.assertEqual(NGram.normalize(six.u('\u021b')), six.u('\u0163'))
示例#2
0
 def test_normalize_with_cjk_kanji(self):
     self.assertEqual(NGram.normalize(six.u('\u4E00')), six.u('\u4E00'))
     self.assertEqual(NGram.normalize(six.u('\u4E01')), six.u('\u4E01'))
     self.assertEqual(NGram.normalize(six.u('\u4E02')), six.u('\u4E02'))
     self.assertEqual(NGram.normalize(six.u('\u4E03')), six.u('\u4E01'))
     self.assertEqual(NGram.normalize(six.u('\u4E04')), six.u('\u4E04'))
     self.assertEqual(NGram.normalize(six.u('\u4E05')), six.u('\u4E05'))
     self.assertEqual(NGram.normalize(six.u('\u4E06')), six.u('\u4E06'))
     self.assertEqual(NGram.normalize(six.u('\u4E07')), six.u('\u4E07'))
     self.assertEqual(NGram.normalize(six.u('\u4E08')), six.u('\u4E08'))
     self.assertEqual(NGram.normalize(six.u('\u4E09')), six.u('\u4E09'))
     self.assertEqual(NGram.normalize(six.u('\u4E10')), six.u('\u4E10'))
     self.assertEqual(NGram.normalize(six.u('\u4E11')), six.u('\u4E11'))
     self.assertEqual(NGram.normalize(six.u('\u4E12')), six.u('\u4E12'))
     self.assertEqual(NGram.normalize(six.u('\u4E13')), six.u('\u4E13'))
     self.assertEqual(NGram.normalize(six.u('\u4E14')), six.u('\u4E14'))
     self.assertEqual(NGram.normalize(six.u('\u4E15')), six.u('\u4E15'))
     self.assertEqual(NGram.normalize(six.u('\u4E1e')), six.u('\u4E1e'))
     self.assertEqual(NGram.normalize(six.u('\u4E1f')), six.u('\u4E1f'))
     self.assertEqual(NGram.normalize(six.u('\u4E20')), six.u('\u4E20'))
     self.assertEqual(NGram.normalize(six.u('\u4E21')), six.u('\u4E21'))
     self.assertEqual(NGram.normalize(six.u('\u4E22')), six.u('\u4E22'))
     self.assertEqual(NGram.normalize(six.u('\u4E23')), six.u('\u4E23'))
     self.assertEqual(NGram.normalize(six.u('\u4E24')), six.u('\u4E13'))
     self.assertEqual(NGram.normalize(six.u('\u4E25')), six.u('\u4E13'))
     self.assertEqual(NGram.normalize(six.u('\u4E30')), six.u('\u4E30'))
示例#3
0
 def test_normalize_with_latin(self):
     self.assertEqual(NGram.normalize(six.u('\u0000')), ' ')
     self.assertEqual(NGram.normalize(six.u('\u0009')), ' ')
     self.assertEqual(NGram.normalize(six.u('\u0020')), ' ')
     self.assertEqual(NGram.normalize(six.u('\u0030')), ' ')
     self.assertEqual(NGram.normalize(six.u('\u0040')), ' ')
     self.assertEqual(NGram.normalize(six.u('\u0041')), six.u('\u0041'))
     self.assertEqual(NGram.normalize(six.u('\u005a')), six.u('\u005a'))
     self.assertEqual(NGram.normalize(six.u('\u005b')), ' ')
     self.assertEqual(NGram.normalize(six.u('\u0060')), ' ')
     self.assertEqual(NGram.normalize(six.u('\u0061')), six.u('\u0061'))
     self.assertEqual(NGram.normalize(six.u('\u007a')), six.u('\u007a'))
     self.assertEqual(NGram.normalize(six.u('\u007b')), ' ')
     self.assertEqual(NGram.normalize(six.u('\u007f')), ' ')
     self.assertEqual(NGram.normalize(six.u('\u0080')), six.u('\u0080'))
     self.assertEqual(NGram.normalize(six.u('\u00a0')), ' ')
     self.assertEqual(NGram.normalize(six.u('\u00a1')), six.u('\u00a1'))
示例#4
0
 def test_normalize_for_romanian(self):
     self.assertEqual(NGram.normalize(six.u('\u015f')), six.u('\u015f'))
     self.assertEqual(NGram.normalize(six.u('\u0163')), six.u('\u0163'))
     self.assertEqual(NGram.normalize(six.u('\u0219')), six.u('\u015f'))
     self.assertEqual(NGram.normalize(six.u('\u021b')), six.u('\u0163'))
示例#5
0
 def test_normalize_with_cjk_kanji(self):
     self.assertEqual(NGram.normalize(six.u('\u4E00')), six.u('\u4E00'))
     self.assertEqual(NGram.normalize(six.u('\u4E01')), six.u('\u4E01'))
     self.assertEqual(NGram.normalize(six.u('\u4E02')), six.u('\u4E02'))
     self.assertEqual(NGram.normalize(six.u('\u4E03')), six.u('\u4E01'))
     self.assertEqual(NGram.normalize(six.u('\u4E04')), six.u('\u4E04'))
     self.assertEqual(NGram.normalize(six.u('\u4E05')), six.u('\u4E05'))
     self.assertEqual(NGram.normalize(six.u('\u4E06')), six.u('\u4E06'))
     self.assertEqual(NGram.normalize(six.u('\u4E07')), six.u('\u4E07'))
     self.assertEqual(NGram.normalize(six.u('\u4E08')), six.u('\u4E08'))
     self.assertEqual(NGram.normalize(six.u('\u4E09')), six.u('\u4E09'))
     self.assertEqual(NGram.normalize(six.u('\u4E10')), six.u('\u4E10'))
     self.assertEqual(NGram.normalize(six.u('\u4E11')), six.u('\u4E11'))
     self.assertEqual(NGram.normalize(six.u('\u4E12')), six.u('\u4E12'))
     self.assertEqual(NGram.normalize(six.u('\u4E13')), six.u('\u4E13'))
     self.assertEqual(NGram.normalize(six.u('\u4E14')), six.u('\u4E14'))
     self.assertEqual(NGram.normalize(six.u('\u4E15')), six.u('\u4E15'))
     self.assertEqual(NGram.normalize(six.u('\u4E1e')), six.u('\u4E1e'))
     self.assertEqual(NGram.normalize(six.u('\u4E1f')), six.u('\u4E1f'))
     self.assertEqual(NGram.normalize(six.u('\u4E20')), six.u('\u4E20'))
     self.assertEqual(NGram.normalize(six.u('\u4E21')), six.u('\u4E21'))
     self.assertEqual(NGram.normalize(six.u('\u4E22')), six.u('\u4E22'))
     self.assertEqual(NGram.normalize(six.u('\u4E23')), six.u('\u4E23'))
     self.assertEqual(NGram.normalize(six.u('\u4E24')), six.u('\u4E13'))
     self.assertEqual(NGram.normalize(six.u('\u4E25')), six.u('\u4E13'))
     self.assertEqual(NGram.normalize(six.u('\u4E30')), six.u('\u4E30'))
示例#6
0
 def test_normalize_with_latin(self):
     self.assertEqual(NGram.normalize(six.u('\u0000')), ' ')
     self.assertEqual(NGram.normalize(six.u('\u0009')), ' ')
     self.assertEqual(NGram.normalize(six.u('\u0020')), ' ')
     self.assertEqual(NGram.normalize(six.u('\u0030')), ' ')
     self.assertEqual(NGram.normalize(six.u('\u0040')), ' ')
     self.assertEqual(NGram.normalize(six.u('\u0041')), six.u('\u0041'))
     self.assertEqual(NGram.normalize(six.u('\u005a')), six.u('\u005a'))
     self.assertEqual(NGram.normalize(six.u('\u005b')), ' ')
     self.assertEqual(NGram.normalize(six.u('\u0060')), ' ')
     self.assertEqual(NGram.normalize(six.u('\u0061')), six.u('\u0061'))
     self.assertEqual(NGram.normalize(six.u('\u007a')), six.u('\u007a'))
     self.assertEqual(NGram.normalize(six.u('\u007b')), ' ')
     self.assertEqual(NGram.normalize(six.u('\u007f')), ' ')
     self.assertEqual(NGram.normalize(six.u('\u0080')), six.u('\u0080'))
     self.assertEqual(NGram.normalize(six.u('\u00a0')), ' ')
     self.assertEqual(NGram.normalize(six.u('\u00a1')), six.u('\u00a1'))