def test_on_UCS_sample_sets(Trafo, unicode_to_transformed_sequence): script_list = [ "Arabic", "Armenian", "Balinese", "Bengali", "Bopomofo", "Braille", "Buginese", "Buhid", "Canadian_Aboriginal", "Cherokee", "Common", "Cuneiform", "Cypriot", "Deseret", "Gothic", "Greek", "Hanunoo", "Hebrew", "Hiragana", "Inherited", "Kannada", "Han", "Katakana", "Kharoshthi", "Khmer", "Lao", "Latin", "Limbu", "Linear_B", "Malayalam", "Mongolian", "Myanmar", "New_Tai_Lue", "Nko", "Osmanya", "Ogham", "Old_Italic", "Old_Persian", "Phoenician", "Shavian", "Syloti_Nagri", "Syriac", "Tagalog", "Tagbanwa", "Tai_Le", "Tamil", "Telugu", "Thaana", "Thai", "Tibetan", "Tifinagh", "Ugaritic", "Yi" ] sets = [ X(name) for name in script_list ] orig = get_combined_state_machine(map(lambda x: x.sm, sets)) state_n_before, result = transform(Trafo, orig) # print result.get_graphviz_string(Option="hex") for set in sets: set.check(result, unicode_to_transformed_sequence) print "Translated %i groups without abortion on error (OK)" % len(sets) union = NumberSet() for nset in map(lambda set: set.charset, sets): union.unite_with(nset) inverse_union = NumberSet(Interval(0, 0x110000)) inverse_union.subtract(union) # print inverse_union.get_string(Option="hex") check_negative(result, inverse_union.get_intervals(PromiseToTreatWellF=True), unicode_to_transformed_sequence)
def test_on_UCS_sample_sets(Trafo, unicode_to_transformed_sequence): script_list = [ "Arabic", "Armenian", "Balinese", "Bengali", "Bopomofo", "Braille", "Buginese", "Buhid", "Canadian_Aboriginal", "Cherokee", "Common", "Cuneiform", "Cypriot", "Deseret", "Gothic", "Greek", "Hanunoo", "Hebrew", "Hiragana", "Inherited", "Kannada", "Han", "Katakana", "Kharoshthi", "Khmer", "Lao", "Latin", "Limbu", "Linear_B", "Malayalam", "Mongolian", "Myanmar", "New_Tai_Lue", "Nko", "Osmanya", "Ogham", "Old_Italic", "Old_Persian", "Phoenician", "Shavian", "Syloti_Nagri", "Syriac", "Tagalog", "Tagbanwa", "Tai_Le", "Tamil", "Telugu", "Thaana", "Thai", "Tibetan", "Tifinagh", "Ugaritic", "Yi" ] sets = [X(name) for name in script_list] orig = combination.do(map(lambda x: x.sm, sets)) state_n_before, result = transform(Trafo, orig) # print result.get_graphviz_string(Option="hex") for set in sets: set.check(result, unicode_to_transformed_sequence) print "Translated %i groups without abortion on error (OK)" % len(sets) union = NumberSet() for nset in map(lambda set: set.charset, sets): union.unite_with(nset) inverse_union = NumberSet(Interval(0, 0x110000)) inverse_union.subtract(union) # print inverse_union.get_string(Option="hex") check_negative(result, inverse_union.get_intervals(PromiseToTreatWellF=True), unicode_to_transformed_sequence)
"Buhid", "Canadian_Aboriginal", "Cherokee", "Syloti_Nagri", "Syriac", "Tagalog", "Tagbanwa", "Tai_Le", "Yi", ]) orig = get_combined_state_machine(map(lambda x: x.sm, sets)) print "# Number of states in state machine:" print "# Unicode: %i" % len(orig.states) result = trafo.do(orig) print "# UTF8-Splitted: %i" % len(result.states) # print result.get_graphviz_string(Option="hex") for set in sets: set.check(result) union = NumberSet() for nset in map(lambda set: set.charset, sets): union.unite_with(nset) inverse_union = NumberSet(Interval(0, 0x110000)) inverse_union.subtract(union) # print inverse_union.get_string(Option="hex") check_negative(result, inverse_union.get_intervals(PromiseToTreatWellF=True))
for cmd in result.states[s_idx].single_entry: assert not cmd.is_acceptance() print " (OK)" sets = map(lambda name: X(name), ["Arabic", "Armenian", "Balinese", "Bengali", "Bopomofo", "Braille", "Hanunoo", "Hebrew", "Hiragana", "Inherited", "Kannada", "Katakana", "Kharoshthi", "Khmer", "Lao", "Latin", "Limbu", "Linear_B", "Malayalam", "Mongolian", "Myanmar", "New_Tai_Lue", "Nko", "Ogham", "Old_Italic", "Old_Persian", "Syriac", "Tagalog", "Tagbanwa", "Tai_Le", "Tamil", "Telugu", "Thaana", "Thai", "Tibetan", "Tifinagh", "Ugaritic", "Yi"]) orig = get_combined_state_machine(map(lambda x: x.sm, sets)) print "Number of states in state machine:" print " Unicode: %i" % len(orig.states) result = trafo.do(orig) print " UTF8-Splitted: %i" % len(result.states) for set in sets: set.check(result) union = NumberSet() for nset in map(lambda set: set.charset, sets): union.unite_with(nset) inverse_union = NumberSet(Interval(0, 0x110000)) inverse_union.subtract(union) # print inverse_union.get_string(Option="hex") check_negative(result, inverse_union.get_intervals(PromiseToTreatWellF=True))