def setUp(self): self.ucd = UCD()
class UCDTests(unittest.TestCase): def setUp(self): self.ucd = UCD() def tearDown(self): pass def ignore_findit(self): from icu import Char, UProperty maxchar = 0x10ffff maxchar = 0xffff for usv in range(maxchar): char = chr(usv) # if ((not self.ucd.is_specific_script(char)) and # (not self.ucd.is_exemplar_wordbreak(char)) and # (not Char.isUAlphabetic(char))): if self.ucd.isformat(char) and not Char.hasBinaryProperty( char, UProperty.DEFAULT_IGNORABLE_CODE_POINT): print('%04X' % usv) self.assertTrue(False) # marks def test_mark_true(self): self.assertTrue(self.ucd.ismark(u'\u0301')) def test_mark_false(self): self.assertFalse(self.ucd.ismark(u'e')) def test_nukta_true(self): self.assertTrue(self.ucd.isnukta(u'\u093c')) def test_nukta_false(self): self.assertFalse(self.ucd.isnukta(u'\u0915')) # always_combine def test_nukta_always_combine(self): self.assertTrue(self.ucd.is_always_combine(u'\u093c')) def test_diacritic_always_combine(self): self.assertFalse(self.ucd.is_always_combine(u'\u0300')) def test_virama_always_combine(self): self.assertFalse(self.ucd.is_always_combine(u'\u0ccd')) def test_matra_always_combine(self): self.assertFalse(self.ucd.is_always_combine(u'\u093e')) # sometimes_combine def test_nukta_sometimes_combine(self): self.assertFalse(self.ucd.is_sometimes_combine(u'\u093c')) def test_diacritic_sometimes_combine(self): self.assertTrue(self.ucd.is_sometimes_combine(u'\u0300')) def test_virama_sometimes_combine(self): self.assertFalse(self.ucd.is_sometimes_combine(u'\u0ccd')) def test_matra_sometimes_combine(self): self.assertFalse(self.ucd.is_sometimes_combine(u'\u093e')) # never_combine def test_nukta_never_combine(self): self.assertFalse(self.ucd.is_never_combine(u'\u093c')) def test_diacritic_never_combine(self): self.assertFalse(self.ucd.is_never_combine(u'\u0300')) def test_virama_never_combine(self): self.assertTrue(self.ucd.is_never_combine(u'\u0ccd')) def test_matra_never_combine(self): self.assertTrue(self.ucd.is_never_combine(u'\u093e')) # other tests def test_zwnj_true(self): self.assertTrue(self.ucd.is_zwnj(u'\u200c')) def test_zwnj_false(self): self.assertFalse(self.ucd.is_zwnj(u'\u200d')) def test_zwj_true(self): self.assertTrue(self.ucd.is_zwj(u'\u200d')) def test_zwj_false(self): self.assertFalse(self.ucd.is_zwj(u'\u200c')) def test_vs_true(self): self.assertTrue(self.ucd.is_vs(u'\ufe00')) def test_vs_false(self): self.assertFalse(self.ucd.is_vs(u'\u1000')) def test_number_true(self): self.assertTrue(self.ucd.isnumber(u'1')) def test_number_false(self): self.assertFalse(self.ucd.isnumber(u'a')) def test_format_true(self): self.assertTrue(self.ucd.isformat(u'\u2060')) def test_format_false(self): self.assertFalse(self.ucd.isformat(u'a')) def test_space_separator_true(self): self.assertTrue(self.ucd.is_space_separator(u'\u200a')) def test_space_separator_false(self): self.assertFalse(self.ucd.is_space_separator(u'a')) def test_pua_false_bmp(self): self.assertFalse(self.ucd.is_pua(u'a')) def test_pua_true_bmp(self): self.assertTrue(self.ucd.is_pua(u'\ue000')) def test_pua_false_nonbmp(self): self.assertFalse(self.ucd.is_pua(u'\U0001D510')) def test_pua_true_nonbmp_a(self): self.assertTrue(self.ucd.is_pua(u'\U000fff80')) def test_pua_true_nonbmp_b(self): self.assertTrue(self.ucd.is_pua(u'\U000fff80')) def test_script_specific_true_latin(self): self.assertTrue(self.ucd.is_specific_script(u'\ua78c')) def test_script_specific_false_latin(self): self.assertFalse(self.ucd.is_specific_script(u'\u02bc')) def test_script_specific_false_chinese(self): self.assertFalse(self.ucd.is_specific_script(u'\ua700')) def test_script_specific_false_vedic(self): self.assertFalse(self.ucd.is_specific_script(u'\u1CD1')) def test_wordbreak_katakana(self): self.assertTrue(self.ucd.is_exemplar_wordbreak(u'\u309b')) def test_wordbreak_aletter(self): self.assertTrue(self.ucd.is_exemplar_wordbreak(u'\u05f3')) def test_wordbreak_midletter(self): self.assertFalse(self.ucd.is_exemplar_wordbreak(u'\u05f4')) def test_wordbreak_chinese(self): self.assertFalse(self.ucd.is_exemplar_wordbreak(u'\ua700')) def test_nfc(self): text = u'e\u0301' self.assertEqual(u'\u00e9', self.ucd.normalize('NFC', text)) def test_nfd(self): text = u'\u00e9' self.assertEqual(u'e\u0301', self.ucd.normalize('NFD', text)) def test_nfc_tus10(self): text = u'\u0061\u035C\u0315\u0300\u1DF6\u0062' self.assertEqual(u'\u00E0\u0315\u1DF6\u035C\u0062', self.ucd.normalize('NFC', text)) def test_nfd_tus10(self): text = u'\u0061\u035C\u0315\u0300\u1DF6\u0062' self.assertEqual(u'\u0061\u0300\u0315\u1DF6\u035C\u0062', self.ucd.normalize('NFD', text)) def ignore_nfc_tus11(self): text = u'\u0061\u0315\u0300\u05AE\u09FE\u0062' self.assertEqual(u'\u00E0\u05AE\u09FE\u0315\u0062', self.ucd.normalize('NFC', text)) def ignore_nfd_tus11(self): text = u'\u0061\u0315\u0300\u05AE\u09FE\u0062' self.assertEqual(u'\u0061\u05AE\u0300\u09FE\u0315\u0062', self.ucd.normalize('NFD', text)) def ignore_nfc_tus12(self): text = u'\u0061\u0315\u0300\u05AE\U0001E136\u0062' self.assertEqual(u'\u00E0\u05AE\U0001E136\u0315\u0062', self.ucd.normalize('NFC', text)) def ignore_nfd_tus12(self): text = u'\u0061\u0315\u0300\u05AE\U0001E136\u0062' self.assertEqual(u'\u0061\u05AE\u0300\U0001E136\u0315\u0062', self.ucd.normalize('NFD', text)) def ignore_nfc_tus13(self): text = u'\u0061\u3099\u093C\U00016FF0\u09BC\u0062' self.assertEqual(u'\u0061\U00016FF0\u093C\u09BC\u3099\u0062', tf.normalize('NFC', text)) def ignore_nfd_tus13(self): text = u'\u0061\u3099\u093C\U00016FF0\u09BC\u0062' self.assertEqual(u'\u0061\U00016FF0\u093C\u09BC\u3099\u0062', tf.normalize('NFD', text))