def test_normalize_vietnamese(self): self.assertEqual(NGram.normalize_vi(six.u('')), '') self.assertEqual(NGram.normalize_vi(six.u('ABC')), 'ABC') self.assertEqual(NGram.normalize_vi(six.u('012')), '012') self.assertEqual(NGram.normalize_vi(six.u('\u00c0')), six.u('\u00c0')) self.assertEqual(NGram.normalize_vi(six.u('\u0041\u0300')), six.u('\u00C0')) self.assertEqual(NGram.normalize_vi(six.u('\u0045\u0300')), six.u('\u00C8')) self.assertEqual(NGram.normalize_vi(six.u('\u0049\u0300')), six.u('\u00CC')) self.assertEqual(NGram.normalize_vi(six.u('\u004F\u0300')), six.u('\u00D2')) self.assertEqual(NGram.normalize_vi(six.u('\u0055\u0300')), six.u('\u00D9')) self.assertEqual(NGram.normalize_vi(six.u('\u0059\u0300')), six.u('\u1EF2')) self.assertEqual(NGram.normalize_vi(six.u('\u0061\u0300')), six.u('\u00E0')) self.assertEqual(NGram.normalize_vi(six.u('\u0065\u0300')), six.u('\u00E8')) self.assertEqual(NGram.normalize_vi(six.u('\u0069\u0300')), six.u('\u00EC')) self.assertEqual(NGram.normalize_vi(six.u('\u006F\u0300')), six.u('\u00F2')) self.assertEqual(NGram.normalize_vi(six.u('\u0075\u0300')), six.u('\u00F9')) self.assertEqual(NGram.normalize_vi(six.u('\u0079\u0300')), six.u('\u1EF3')) self.assertEqual(NGram.normalize_vi(six.u('\u00C2\u0300')), six.u('\u1EA6')) self.assertEqual(NGram.normalize_vi(six.u('\u00CA\u0300')), six.u('\u1EC0')) self.assertEqual(NGram.normalize_vi(six.u('\u00D4\u0300')), six.u('\u1ED2')) self.assertEqual(NGram.normalize_vi(six.u('\u00E2\u0300')), six.u('\u1EA7')) self.assertEqual(NGram.normalize_vi(six.u('\u00EA\u0300')), six.u('\u1EC1')) self.assertEqual(NGram.normalize_vi(six.u('\u00F4\u0300')), six.u('\u1ED3')) self.assertEqual(NGram.normalize_vi(six.u('\u0102\u0300')), six.u('\u1EB0')) self.assertEqual(NGram.normalize_vi(six.u('\u0103\u0300')), six.u('\u1EB1')) self.assertEqual(NGram.normalize_vi(six.u('\u01A0\u0300')), six.u('\u1EDC')) self.assertEqual(NGram.normalize_vi(six.u('\u01A1\u0300')), six.u('\u1EDD')) self.assertEqual(NGram.normalize_vi(six.u('\u01AF\u0300')), six.u('\u1EEA')) self.assertEqual(NGram.normalize_vi(six.u('\u01B0\u0300')), six.u('\u1EEB')) self.assertEqual(NGram.normalize_vi(six.u('\u0041\u0301')), six.u('\u00C1')) self.assertEqual(NGram.normalize_vi(six.u('\u0045\u0301')), six.u('\u00C9')) self.assertEqual(NGram.normalize_vi(six.u('\u0049\u0301')), six.u('\u00CD')) self.assertEqual(NGram.normalize_vi(six.u('\u004F\u0301')), six.u('\u00D3')) self.assertEqual(NGram.normalize_vi(six.u('\u0055\u0301')), six.u('\u00DA')) self.assertEqual(NGram.normalize_vi(six.u('\u0059\u0301')), six.u('\u00DD')) self.assertEqual(NGram.normalize_vi(six.u('\u0061\u0301')), six.u('\u00E1')) self.assertEqual(NGram.normalize_vi(six.u('\u0065\u0301')), six.u('\u00E9')) self.assertEqual(NGram.normalize_vi(six.u('\u0069\u0301')), six.u('\u00ED')) self.assertEqual(NGram.normalize_vi(six.u('\u006F\u0301')), six.u('\u00F3')) self.assertEqual(NGram.normalize_vi(six.u('\u0075\u0301')), six.u('\u00FA')) self.assertEqual(NGram.normalize_vi(six.u('\u0079\u0301')), six.u('\u00FD')) self.assertEqual(NGram.normalize_vi(six.u('\u00C2\u0301')), six.u('\u1EA4')) self.assertEqual(NGram.normalize_vi(six.u('\u00CA\u0301')), six.u('\u1EBE')) self.assertEqual(NGram.normalize_vi(six.u('\u00D4\u0301')), six.u('\u1ED0')) self.assertEqual(NGram.normalize_vi(six.u('\u00E2\u0301')), six.u('\u1EA5')) self.assertEqual(NGram.normalize_vi(six.u('\u00EA\u0301')), six.u('\u1EBF')) self.assertEqual(NGram.normalize_vi(six.u('\u00F4\u0301')), six.u('\u1ED1')) self.assertEqual(NGram.normalize_vi(six.u('\u0102\u0301')), six.u('\u1EAE')) self.assertEqual(NGram.normalize_vi(six.u('\u0103\u0301')), six.u('\u1EAF')) self.assertEqual(NGram.normalize_vi(six.u('\u01A0\u0301')), six.u('\u1EDA')) self.assertEqual(NGram.normalize_vi(six.u('\u01A1\u0301')), six.u('\u1EDB')) self.assertEqual(NGram.normalize_vi(six.u('\u01AF\u0301')), six.u('\u1EE8')) self.assertEqual(NGram.normalize_vi(six.u('\u01B0\u0301')), six.u('\u1EE9')) self.assertEqual(NGram.normalize_vi(six.u('\u0041\u0303')), six.u('\u00C3')) self.assertEqual(NGram.normalize_vi(six.u('\u0045\u0303')), six.u('\u1EBC')) self.assertEqual(NGram.normalize_vi(six.u('\u0049\u0303')), six.u('\u0128')) self.assertEqual(NGram.normalize_vi(six.u('\u004F\u0303')), six.u('\u00D5')) self.assertEqual(NGram.normalize_vi(six.u('\u0055\u0303')), six.u('\u0168')) self.assertEqual(NGram.normalize_vi(six.u('\u0059\u0303')), six.u('\u1EF8')) self.assertEqual(NGram.normalize_vi(six.u('\u0061\u0303')), six.u('\u00E3')) self.assertEqual(NGram.normalize_vi(six.u('\u0065\u0303')), six.u('\u1EBD')) self.assertEqual(NGram.normalize_vi(six.u('\u0069\u0303')), six.u('\u0129')) self.assertEqual(NGram.normalize_vi(six.u('\u006F\u0303')), six.u('\u00F5')) self.assertEqual(NGram.normalize_vi(six.u('\u0075\u0303')), six.u('\u0169')) self.assertEqual(NGram.normalize_vi(six.u('\u0079\u0303')), six.u('\u1EF9')) self.assertEqual(NGram.normalize_vi(six.u('\u00C2\u0303')), six.u('\u1EAA')) self.assertEqual(NGram.normalize_vi(six.u('\u00CA\u0303')), six.u('\u1EC4')) self.assertEqual(NGram.normalize_vi(six.u('\u00D4\u0303')), six.u('\u1ED6')) self.assertEqual(NGram.normalize_vi(six.u('\u00E2\u0303')), six.u('\u1EAB')) self.assertEqual(NGram.normalize_vi(six.u('\u00EA\u0303')), six.u('\u1EC5')) self.assertEqual(NGram.normalize_vi(six.u('\u00F4\u0303')), six.u('\u1ED7')) self.assertEqual(NGram.normalize_vi(six.u('\u0102\u0303')), six.u('\u1EB4')) self.assertEqual(NGram.normalize_vi(six.u('\u0103\u0303')), six.u('\u1EB5')) self.assertEqual(NGram.normalize_vi(six.u('\u01A0\u0303')), six.u('\u1EE0')) self.assertEqual(NGram.normalize_vi(six.u('\u01A1\u0303')), six.u('\u1EE1')) self.assertEqual(NGram.normalize_vi(six.u('\u01AF\u0303')), six.u('\u1EEE')) self.assertEqual(NGram.normalize_vi(six.u('\u01B0\u0303')), six.u('\u1EEF')) self.assertEqual(NGram.normalize_vi(six.u('\u0041\u0309')), six.u('\u1EA2')) self.assertEqual(NGram.normalize_vi(six.u('\u0045\u0309')), six.u('\u1EBA')) self.assertEqual(NGram.normalize_vi(six.u('\u0049\u0309')), six.u('\u1EC8')) self.assertEqual(NGram.normalize_vi(six.u('\u004F\u0309')), six.u('\u1ECE')) self.assertEqual(NGram.normalize_vi(six.u('\u0055\u0309')), six.u('\u1EE6')) self.assertEqual(NGram.normalize_vi(six.u('\u0059\u0309')), six.u('\u1EF6')) self.assertEqual(NGram.normalize_vi(six.u('\u0061\u0309')), six.u('\u1EA3')) self.assertEqual(NGram.normalize_vi(six.u('\u0065\u0309')), six.u('\u1EBB')) self.assertEqual(NGram.normalize_vi(six.u('\u0069\u0309')), six.u('\u1EC9')) self.assertEqual(NGram.normalize_vi(six.u('\u006F\u0309')), six.u('\u1ECF')) self.assertEqual(NGram.normalize_vi(six.u('\u0075\u0309')), six.u('\u1EE7')) self.assertEqual(NGram.normalize_vi(six.u('\u0079\u0309')), six.u('\u1EF7')) self.assertEqual(NGram.normalize_vi(six.u('\u00C2\u0309')), six.u('\u1EA8')) self.assertEqual(NGram.normalize_vi(six.u('\u00CA\u0309')), six.u('\u1EC2')) self.assertEqual(NGram.normalize_vi(six.u('\u00D4\u0309')), six.u('\u1ED4')) self.assertEqual(NGram.normalize_vi(six.u('\u00E2\u0309')), six.u('\u1EA9')) self.assertEqual(NGram.normalize_vi(six.u('\u00EA\u0309')), six.u('\u1EC3')) self.assertEqual(NGram.normalize_vi(six.u('\u00F4\u0309')), six.u('\u1ED5')) self.assertEqual(NGram.normalize_vi(six.u('\u0102\u0309')), six.u('\u1EB2')) self.assertEqual(NGram.normalize_vi(six.u('\u0103\u0309')), six.u('\u1EB3')) self.assertEqual(NGram.normalize_vi(six.u('\u01A0\u0309')), six.u('\u1EDE')) self.assertEqual(NGram.normalize_vi(six.u('\u01A1\u0309')), six.u('\u1EDF')) self.assertEqual(NGram.normalize_vi(six.u('\u01AF\u0309')), six.u('\u1EEC')) self.assertEqual(NGram.normalize_vi(six.u('\u01B0\u0309')), six.u('\u1EED')) self.assertEqual(NGram.normalize_vi(six.u('\u0041\u0323')), six.u('\u1EA0')) self.assertEqual(NGram.normalize_vi(six.u('\u0045\u0323')), six.u('\u1EB8')) self.assertEqual(NGram.normalize_vi(six.u('\u0049\u0323')), six.u('\u1ECA')) self.assertEqual(NGram.normalize_vi(six.u('\u004F\u0323')), six.u('\u1ECC')) self.assertEqual(NGram.normalize_vi(six.u('\u0055\u0323')), six.u('\u1EE4')) self.assertEqual(NGram.normalize_vi(six.u('\u0059\u0323')), six.u('\u1EF4')) self.assertEqual(NGram.normalize_vi(six.u('\u0061\u0323')), six.u('\u1EA1')) self.assertEqual(NGram.normalize_vi(six.u('\u0065\u0323')), six.u('\u1EB9')) self.assertEqual(NGram.normalize_vi(six.u('\u0069\u0323')), six.u('\u1ECB')) self.assertEqual(NGram.normalize_vi(six.u('\u006F\u0323')), six.u('\u1ECD')) self.assertEqual(NGram.normalize_vi(six.u('\u0075\u0323')), six.u('\u1EE5')) self.assertEqual(NGram.normalize_vi(six.u('\u0079\u0323')), six.u('\u1EF5')) self.assertEqual(NGram.normalize_vi(six.u('\u00C2\u0323')), six.u('\u1EAC')) self.assertEqual(NGram.normalize_vi(six.u('\u00CA\u0323')), six.u('\u1EC6')) self.assertEqual(NGram.normalize_vi(six.u('\u00D4\u0323')), six.u('\u1ED8')) self.assertEqual(NGram.normalize_vi(six.u('\u00E2\u0323')), six.u('\u1EAD')) self.assertEqual(NGram.normalize_vi(six.u('\u00EA\u0323')), six.u('\u1EC7')) self.assertEqual(NGram.normalize_vi(six.u('\u00F4\u0323')), six.u('\u1ED9')) self.assertEqual(NGram.normalize_vi(six.u('\u0102\u0323')), six.u('\u1EB6')) self.assertEqual(NGram.normalize_vi(six.u('\u0103\u0323')), six.u('\u1EB7')) self.assertEqual(NGram.normalize_vi(six.u('\u01A0\u0323')), six.u('\u1EE2')) self.assertEqual(NGram.normalize_vi(six.u('\u01A1\u0323')), six.u('\u1EE3')) self.assertEqual(NGram.normalize_vi(six.u('\u01AF\u0323')), six.u('\u1EF0')) self.assertEqual(NGram.normalize_vi(six.u('\u01B0\u0323')), six.u('\u1EF1'))