def test_normalize_for_romanian(self): self.assertEqual(NGram.normalize(six.u('\u015f')), six.u('\u015f')) self.assertEqual(NGram.normalize(six.u('\u0163')), six.u('\u0163')) self.assertEqual(NGram.normalize(six.u('\u0219')), six.u('\u015f')) self.assertEqual(NGram.normalize(six.u('\u021b')), six.u('\u0163'))
def test_normalize_with_cjk_kanji(self): self.assertEqual(NGram.normalize(six.u('\u4E00')), six.u('\u4E00')) self.assertEqual(NGram.normalize(six.u('\u4E01')), six.u('\u4E01')) self.assertEqual(NGram.normalize(six.u('\u4E02')), six.u('\u4E02')) self.assertEqual(NGram.normalize(six.u('\u4E03')), six.u('\u4E01')) self.assertEqual(NGram.normalize(six.u('\u4E04')), six.u('\u4E04')) self.assertEqual(NGram.normalize(six.u('\u4E05')), six.u('\u4E05')) self.assertEqual(NGram.normalize(six.u('\u4E06')), six.u('\u4E06')) self.assertEqual(NGram.normalize(six.u('\u4E07')), six.u('\u4E07')) self.assertEqual(NGram.normalize(six.u('\u4E08')), six.u('\u4E08')) self.assertEqual(NGram.normalize(six.u('\u4E09')), six.u('\u4E09')) self.assertEqual(NGram.normalize(six.u('\u4E10')), six.u('\u4E10')) self.assertEqual(NGram.normalize(six.u('\u4E11')), six.u('\u4E11')) self.assertEqual(NGram.normalize(six.u('\u4E12')), six.u('\u4E12')) self.assertEqual(NGram.normalize(six.u('\u4E13')), six.u('\u4E13')) self.assertEqual(NGram.normalize(six.u('\u4E14')), six.u('\u4E14')) self.assertEqual(NGram.normalize(six.u('\u4E15')), six.u('\u4E15')) self.assertEqual(NGram.normalize(six.u('\u4E1e')), six.u('\u4E1e')) self.assertEqual(NGram.normalize(six.u('\u4E1f')), six.u('\u4E1f')) self.assertEqual(NGram.normalize(six.u('\u4E20')), six.u('\u4E20')) self.assertEqual(NGram.normalize(six.u('\u4E21')), six.u('\u4E21')) self.assertEqual(NGram.normalize(six.u('\u4E22')), six.u('\u4E22')) self.assertEqual(NGram.normalize(six.u('\u4E23')), six.u('\u4E23')) self.assertEqual(NGram.normalize(six.u('\u4E24')), six.u('\u4E13')) self.assertEqual(NGram.normalize(six.u('\u4E25')), six.u('\u4E13')) self.assertEqual(NGram.normalize(six.u('\u4E30')), six.u('\u4E30'))
def test_normalize_with_latin(self): self.assertEqual(NGram.normalize(six.u('\u0000')), ' ') self.assertEqual(NGram.normalize(six.u('\u0009')), ' ') self.assertEqual(NGram.normalize(six.u('\u0020')), ' ') self.assertEqual(NGram.normalize(six.u('\u0030')), ' ') self.assertEqual(NGram.normalize(six.u('\u0040')), ' ') self.assertEqual(NGram.normalize(six.u('\u0041')), six.u('\u0041')) self.assertEqual(NGram.normalize(six.u('\u005a')), six.u('\u005a')) self.assertEqual(NGram.normalize(six.u('\u005b')), ' ') self.assertEqual(NGram.normalize(six.u('\u0060')), ' ') self.assertEqual(NGram.normalize(six.u('\u0061')), six.u('\u0061')) self.assertEqual(NGram.normalize(six.u('\u007a')), six.u('\u007a')) self.assertEqual(NGram.normalize(six.u('\u007b')), ' ') self.assertEqual(NGram.normalize(six.u('\u007f')), ' ') self.assertEqual(NGram.normalize(six.u('\u0080')), six.u('\u0080')) self.assertEqual(NGram.normalize(six.u('\u00a0')), ' ') self.assertEqual(NGram.normalize(six.u('\u00a1')), six.u('\u00a1'))