def test_on_UCS_sample_sets(Trafo, unicode_to_transformed_sequence): script_list = [ "Arabic", "Armenian", "Balinese", "Bengali", "Bopomofo", "Braille", "Buginese", "Buhid", "Canadian_Aboriginal", "Cherokee", "Common", "Cuneiform", "Cypriot", "Deseret", "Gothic", "Greek", "Hanunoo", "Hebrew", "Hiragana", "Inherited", "Kannada", "Han", "Katakana", "Kharoshthi", "Khmer", "Lao", "Latin", "Limbu", "Linear_B", "Malayalam", "Mongolian", "Myanmar", "New_Tai_Lue", "Nko", "Osmanya", "Ogham", "Old_Italic", "Old_Persian", "Phoenician", "Shavian", "Syloti_Nagri", "Syriac", "Tagalog", "Tagbanwa", "Tai_Le", "Tamil", "Telugu", "Thaana", "Thai", "Tibetan", "Tifinagh", "Ugaritic", "Yi" ] sets = [X(name) for name in script_list] orig = combination.do(map(lambda x: x.sm, sets)) state_n_before, result = transform(Trafo, orig) # print result.get_graphviz_string(Option="hex") for set in sets: set.check(result, unicode_to_transformed_sequence) print "Translated %i groups without abortion on error (OK)" % len(sets) union = NumberSet() for nset in map(lambda set: set.charset, sets): union.unite_with(nset) inverse_union = NumberSet(Interval(0, 0x110000)) inverse_union.subtract(union) # print inverse_union.get_string(Option="hex") check_negative(result, inverse_union.get_intervals(PromiseToTreatWellF=True), unicode_to_transformed_sequence)
def do(self, Option="utf8"): """Prepare output in the 'dot' language, that graphviz uses.""" assert Option in ["utf8", "hex"] self.__do(combination.do(self.mode.core_sm_list), self.file_name_main, Option) if self.mode.pre_context_sm_to_be_reversed_list: sm_list = [ reverse.do(sm) for sm in self.mode.pre_context_sm_to_be_reversed_list ] sm = combination.do(sm_list, FilterDominatedOriginsF=False) self.__do(sm, self.file_name_pre_context, Option) if len(self.mode.bipd_sm_to_be_reversed_db) != 0: for sm in self.mode.bipd_sm_to_be_reversed_db.itervalues(): file_name = self.file_name_bipd_db[sm.get_id()] reversed_sm = reverse.do(sm) self.__do(reversed_sm, file_name, Option)
def combine(appendix_sm_db, SmList): id_key = tuple(sorted([sm.get_id() for sm in SmList])) combined_sm = appendix_sm_db.get(id_key) if combined_sm is None: if len(SmList) == 1: combined_sm = SmList[0] else: combined_sm = combination.do(SmList, AlllowInitStateAcceptF=True) appendix_sm_db[id_key] = combined_sm return combined_sm
#! /usr/bin/env python # -*- coding: utf8 -*- import os import sys sys.path.insert(0, os.environ["QUEX_PATH"]) import quex.input.regular_expression.engine as regex import quex.engine.state_machine.construction.combination as combination import quex.engine.analyzer.engine_supply_factory as engine from quex.constants import E_InputActions import help from operator import attrgetter if "--hwut-info" in sys.argv: print "Track Analyzis: Backward Input Position Detection;" sys.exit() # There are no 'special cases' pattern_list = [ 'ax', ] state_machine_list = map(lambda x: regex.do(x, {}).extract_sm(), pattern_list) sm = combination.do(state_machine_list, False) # May be 'True' later. sm = sm.normalized_clone() # For DEBUG purposes: specify 'DRAW' on command line (in sys.argv) help.if_DRAW_in_sys_argv(sm) help.test(sm, engine.Class_BACKWARD_INPUT_POSITION(0))
def do(SmOrSmList, EngineType=engine.FORWARD, ReloadStateExtern=None, OnBeforeReload=None, OnAfterReload=None, OnBeforeEntry=None, dial_db=None, OnReloadFailureDoorId=None, CutF=True, ReverseF=False): assert dial_db is not None def treat(sm, ReverseF): backup_id = sm.get_id() ok_f, sm = Setup.buffer_encoding.do_state_machine(sm) if not ok_f: error.warning("Pattern contains elements not found in engine codec '%s'.\n" % Setup.buffer_encoding.name \ + "(Buffer element size is %s [byte])" % Setup.lexatom.size_in_byte, sm.sr) if ReverseF: sm = reverse.do(sm, EnsureDFA_f=True) sm.set_id(backup_id) return sm if type(SmOrSmList) != list: SM = treat(SmOrSmList, ReverseF) else: assert SmOrSmList sm_list = [treat(sm, ReverseF) for sm in SmOrSmList] SM = combination.do(sm_list, FilterDominatedOriginsF=False) SM.sr = sm_list[0].sr if CutF: error_name = SM.delete_named_number_list(signal_lexatoms(Setup)) if error_name: error.log( "Pattern is empty after deletion of signal lexatom '%s'" % error_name, SM.sr) # Generate FSM from DFA analyzer = FSM.from_DFA(SM, EngineType, ReloadStateExtern, OnBeforeEntry, dial_db=dial_db) # Optimize the FSM analyzer = optimizer.do(analyzer) # DoorID-s required by '.prepare_for_reload()' analyzer.prepare_DoorIDs() # Prepare the reload BEFORE mega state compression! # (Null-operation, in case no reload required.) # TransitionMap: On BufferLimitCode --> ReloadState # ReloadState.door of state: OnBeforeReload # prepare goto on reload success and reload fail # State.door of ReloadState: OnAfterReload (when reload was a success). for si, state in sorted(analyzer.state_db.iteritems(), key=itemgetter(0)): # Null-operation, in case no reload required. state.prepare_for_reload(analyzer, OnBeforeReload, OnAfterReload, OnFailureDoorId=OnReloadFailureDoorId) # [Optional] Combination of states into MegaState-s. if len(Setup.compression_type_list) != 0: mega_state_analyzer.do(analyzer) # Prepare Reload: # (Null-operation, in case no reload required.) # TransitionMap: On BufferLimitCode --> ReloadState # ReloadState.door of mega state: Router to doors of implemented states. for state in analyzer.mega_state_list: state.prepare_again_for_reload(analyzer) # FSM_State.transition_map: Interval --> DoorID # MegaState.transition_map: Interval --> TargetByStateKey # or Interval --> DoorID return analyzer