def do(TheAnalyzer, CompressionType, AvailableStateIndexSet): """Starting point of the search for single character traces in the state machine (TheAnalyzer). For each state in the state machine try to find branches of paths. States which are closer to the init state are searched first. This way quickly a set can be build of longest paths, which make searches from follower states unnecessary. """ # depth_db: state_index ---> distance from init state. # We first search for the longest paths, so that searches for sub paths # become unnecessary. This way computation time is reduced. depth_db = TheAnalyzer.get_depth_db() iterable_state_indices = ( \ i for i in TheAnalyzer.state_db.iterkeys() \ if i in AvailableStateIndexSet and i != TheAnalyzer.init_state_index \ ) path_list = flatten_list_of_lists( CharacterPathList_find(TheAnalyzer, state_index, CompressionType, AvailableStateIndexSet) for state_index in sorted(iterable_state_indices, key=lambda i: depth_db[i])) return path_list
def print_snapshot_map_scheme(info, Prefix=""): print Prefix + "Snapshot Map Schemes:" all_set = set( flatten_list_of_lists( entry_recipe.snapshot_map.keys() for entry_recipe in info.entry_recipe_db.values())) if not all_set: print return L = max(len("%s" % repr(x)) for x in all_set) predecessor_list = sorted(info.entry_recipe_db.iterkeys()) print Prefix + " %s %s" % (" " * L, "".join( "%-10s" % si for si in predecessor_list)) for variable_id in sorted(list(all_set)): scheme = [ info.entry_recipe_db[si].snapshot_map.get(variable_id) for si in predecessor_list ] name = "%s" % repr(variable_id) space = " " * (L - len(name)) print Prefix + " %s:%s %s" % (name, space, "".join( "%8s, " % x if x is not None else " " for x in scheme)) print
def do_from_leaf_to_root(TheState, OpTree, LeafDoorId, done_set, GlobalEntryF=False): """Code the sequence from a leaf of the command tree to its root. This avoids unnecessary gotos from outer nodes to their parents. It stops, whenever a parent is already implemented. Then, the function 'code()' automatically inserts a 'goto parent' at the end of the node. RETURNS: list of strings The list of string implements nodes from a command tree leaf over all of its parents to the root, or the first already implemented parent. """ txt = [] if not GlobalEntryF: # When the entry is a global entry into the analyzer, then it is slipped # into at function begin. => no 'assert unreachable'! Else, yes! txt.append("\n\n %s\n" % Lng.UNREACHABLE) txt.extend( flatten_list_of_lists( __code(node, TheState, done_set, GlobalEntryF) for node in OpTree.iterable_to_root(LeafDoorId, done_set))) return txt
def print_ip_offset_scheme(info, Prefix=""): print Prefix + "Input Pointer Offset Schemes:" all_set = set( flatten_list_of_lists( entry_recipe.ip_offset_db.keys() for entry_recipe in info.entry_recipe_db.values())) if not all_set: print return L = max(len("%s" % x) for x in all_set) predecessor_list = sorted(info.entry_recipe_db.iterkeys()) print Prefix + " %s %s" % (" " * L, "".join( "%-10s" % si for si in predecessor_list)) for position_register in sorted(list(all_set)): scheme = [ info.entry_recipe_db[si].ip_offset_db.get(position_register) for si in predecessor_list ] name = "%s" % position_register space = " " * (L - len(name)) print Prefix + " %s:%s %s" % (name, space, "".join( "%8s, " % x if x is not None else "<irrelv>, " for x in scheme)) print
def _branch_table_core(self, Selector, CaseList, get_case, DefaultConsequence=None): def get_content(C): if type(C) == list: return "".join(C) else: return C def iterable(CaseList, DefaultConsequence): item, effect = CaseList[0] for item_ahead, effect_ahead in CaseList[1:]: if effect_ahead == effect: yield item, "" else: yield item, effect item = item_ahead effect = effect_ahead yield item, effect if DefaultConsequence is not None: yield None, DefaultConsequence txt = [ "switch( %s ) {\n" % Selector ] txt.extend( flatten_list_of_lists( get_case(item, text, get_content) for item, text in iterable(CaseList, DefaultConsequence) ) ) txt.append("}\n") return txt
def do_NumberSet(self, NSet): """RETURNS: List of interval sequences that implement the number set. """ return flatten_list_of_lists( self.get_interval_sequences(interval) for interval in NSet.get_intervals(PromiseToTreatWellF=True) )
def get_number_list(self): """RETURNS: -- List of all numbers which are contained in the number set. -- None, if one border is 'sys.maxint'. The list would be too big. """ return flatten_list_of_lists( xrange(interval.begin, interval.end) for interval in self.__intervals)
def _get_source_code(analyzer_list, terminal_list, ColumnNPerChunk, AppendixSmExistF): """RETURNS: String containing source code for the 'loop'. -- The source code for the (looping) state machine. -- The terminals which contain counting actions. Also, it requests variable definitions as they are required. """ txt = flatten_list_of_lists( generator.do_analyzer(analyzer) for analyzer in analyzer_list ) txt.extend( generator.do_terminals(terminal_list, TheAnalyzer=None) ) loop_analyzer = analyzer_list[0] if loop_analyzer.engine_type.subject_to_reload(): txt.extend( generator.do_reload_procedure(loop_analyzer) ) if AppendixSmExistF or ColumnNPerChunk is not None: variable_db.require("reference_p", Condition="QUEX_OPTION_COLUMN_NUMBER_COUNTING") if Setup.buffer_codec.variable_character_sizes_f(): variable_db.require("lexatom_begin_p") return txt
def do(TheAnalyzer, CompressionType, AvailableStateIndexSet): """Starting point of the search for single character traces in the state machine (TheAnalyzer). For each state in the state machine try to find branches of paths. States which are closer to the init state are searched first. This way quickly a set can be build of longest paths, which make searches from follower states unnecessary. """ # depth_db: state_index ---> distance from init state. # We first search for the longest paths, so that searches for sub paths # become unnecessary. This way computation time is reduced. depth_db = TheAnalyzer.get_depth_db() iterable_state_indices = ( \ i for i in TheAnalyzer.state_db.iterkeys() \ if i in AvailableStateIndexSet and i != TheAnalyzer.init_state_index \ ) path_list = flatten_list_of_lists( CharacterPathList_find(TheAnalyzer, state_index, CompressionType, AvailableStateIndexSet) for state_index in sorted(iterable_state_indices, key=lambda i: depth_db[i]) ) return path_list
def do_sequence(Sequence, TrafoInfo=None, fh=-1): if TrafoInfo is None: TrafoInfo = Setup.buffer_codec return flatten_list_of_lists( do_character(x, TrafoInfo, fh) for x in Sequence )
def get_number_list(self): """RETURNS: -- List of all numbers which are contained in the number set. -- None, if one border is 'sys.maxint'. The list would be too big. """ return flatten_list_of_lists( xrange(interval.begin, interval.end) for interval in self.__intervals )
def _analyzer_functions_get(ModeDB): mode_name_list = ModeDB.keys() code = flatten_list_of_lists( engine_generator.do_with_counter(mode, mode_name_list) for mode in ModeDB.itervalues() ) code.append( engine_generator.comment_match_behavior(ModeDB.itervalues()) ) # generate frame for analyser code return Lng.FRAME_IN_NAMESPACE_MAIN("".join(code))
def value_list(self, Name): """The content of a value is a sequence, and the return value of this function is a concantinated list of all listed option setting values. """ setting_list = self.__get_setting_list(Name) if setting_list is None: return None info = mode_option_info_db[Name] if info.content_is_list(): result = flatten_list_of_lists(x.value for x in setting_list) else: result = [x.value for x in setting_list] return result
def __collect_files(DirList): if DirList is None: dir_list = dir_db.keys() else: dir_list = DirList if not Setup.implement_lib_quex_f: dir_list = [ d for d in dir_list if not d.startswith("quex/") ] if not Setup.implement_lib_lexeme_f: dir_list = [ d for d in dir_list if not d.startswith("lexeme/") ] result = set(flatten_list_of_lists( dir_db_get_files(d) for d in dir_list )) if not Setup.token_class_only_f: result.update(dir_db[""]) return result
def value_list(self, Name): """The content of a value is a sequence, and the return value of this function is a concantinated list of all listed option setting values. """ setting_list = self.__get_setting_list(Name) if setting_list is None: return None info = mode_option_info_db[Name] if info.content_is_list(): result = flatten_list_of_lists( x.value for x in setting_list ) else: result = [ x.value for x in setting_list ] return result
def get_tree_text(self, CommandAliasDb, Node=None, Depth=0): """__dive: indicate recursion. May be solved by 'TreeWalker'. """ if Node is None: Node = self.root txt = flatten_list_of_lists( self.get_tree_text(CommandAliasDb, self.door_db[door_id], Depth + 1) for door_id in sorted(Node.child_set)) txt.extend([ " " * (Depth + 1), ".--", str(Node.door_id), " [%s]\n" % ("".join("%s " % CommandAliasDb[cmd] for cmd in Node.command_list)).strip() ]) return txt
def do_leafs(TheState, OpTree, done_set): """Create code starting from the 'leafs' of the command tree. The leafs are the entry points from other states, i.e. the 'doors'. RETURNS: List of strings. """ outer_door_id_set = TheState.entry.door_id_set() txt_list = [] for door_id in outer_door_id_set: if door_id in done_set: continue branch_txt = do_from_leaf_to_root(TheState, OpTree, door_id, done_set) txt_list.append(branch_txt) # Flatten the list of lists, where the longest list has to come last. result = flatten_list_of_lists(sorted(txt_list, key=lambda x: len(x))) return result
def do_leafs(TheState, OpTree, done_set): """Create code starting from the 'leafs' of the command tree. The leafs are the entry points from other states, i.e. the 'doors'. RETURNS: List of strings. """ outer_door_id_set = TheState.entry.door_id_set() txt_list = [] for door_id in outer_door_id_set: if door_id in done_set: continue branch_txt = do_from_leaf_to_root(TheState, OpTree, door_id, done_set) txt_list.append(branch_txt) # Flatten the list of lists, where the longest list has to come last. result = flatten_list_of_lists(sorted(txt_list, key=lambda x: len(x))) return result
def get_tree_text(self, CommandAliasDb, Node=None, Depth=0): """__dive: indicate recursion. May be solved by 'TreeWalker'. """ if Node is None: Node = self.root txt = flatten_list_of_lists( self.get_tree_text(CommandAliasDb, self.door_db[door_id], Depth+1) for door_id in sorted(Node.child_set) ) txt.extend([ " " * (Depth + 1), ".--", str(Node.door_id), " [%s]\n" % ("".join("%s " % CommandAliasDb[cmd] for cmd in Node.command_list)).strip() ]) return txt
def argv_ufo_detections(Cl): """Detects unidentified command line options. """ known_option_list = [] for info in SETUP_INFO.itervalues(): if type(info) != list: continue known_option_list.extend(info[0]) ufo_list = Cl.unidentified_options(known_option_list) if not ufo_list: return pre_filter_flag_info = [info for info in SETUP_INFO.itervalues() if info] all_flag_list = flatten_list_of_lists( flag_list for flag_list, dummy in pre_filter_flag_info) ufo = ufo_list[0] error.log_similar(ufo, all_flag_list, "Unknown command line option '%s'" % ufo)
def do_transition(self, from_target_map, FromSi, ToSi, BadLexatomSi): """Translates to transition 'FromSi' --> 'ToSi' inside the state machine according to the specific coding (see derived class, i.e. UTF8 or UTF16). 'BadLexatomSi' is None => no bad lexatom detection. else, transitions to 'bad lexatom state' are added on invalid code units. RETURNS: [0] True if complete, False else. [1] StateDb of newly generated states. """ number_set = from_target_map[ToSi] # Check whether a modification is necessary if number_set.least_greater_bound() <= self.UnchangedRange: # 'UnchangedRange' => No change to numerical values. return True, None if not self.cut_forbidden_range(number_set): # 'number_set' solely contains forbidden elements. del from_target_map[ToSi] return False, None transformed_interval_sequence_list = flatten_list_of_lists( self.get_interval_sequences(interval) for interval in number_set.get_intervals(PromiseToTreatWellF=True)) # Second, enter the new transitions. new_target_map, \ new_state_db = self.plug_interval_sequences(FromSi, ToSi, transformed_interval_sequence_list, BadLexatomSi) # Absorb new transitions into the target map of the 'from state'. del from_target_map[ToSi] from_target_map.update(new_target_map) return True, new_state_db
def _get_source_code(analyzer_list, terminal_list, ColumnNPerChunk, AppendixSmExistF): """RETURNS: String containing source code for the 'loop'. -- The source code for the (looping) state machine. -- The terminals which contain counting actions. Also, it requests variable definitions as they are required. """ txt = flatten_list_of_lists( generator.do_analyzer(analyzer) for analyzer in analyzer_list) txt.extend(generator.do_terminals(terminal_list, TheAnalyzer=None)) loop_analyzer = analyzer_list[0] if loop_analyzer.engine_type.subject_to_reload(): txt.extend(generator.do_reload_procedure(loop_analyzer)) if AppendixSmExistF or ColumnNPerChunk is not None: variable_db.require("reference_p", Condition="QUEX_OPTION_COLUMN_NUMBER_COUNTING") if Setup.buffer_codec.variable_character_sizes_f(): variable_db.require("lexatom_begin_p") return txt
def do_from_leaf_to_root(TheState, OpTree, LeafDoorId, done_set, GlobalEntryF=False): """Code the sequence from a leaf of the command tree to its root. This avoids unnecessary gotos from outer nodes to their parents. It stops, whenever a parent is already implemented. Then, the function 'code()' automatically inserts a 'goto parent' at the end of the node. RETURNS: list of strings The list of string implements nodes from a command tree leaf over all of its parents to the root, or the first already implemented parent. """ txt = [] if not GlobalEntryF: # When the entry is a global entry into the analyzer, then it is slipped # into at function begin. => no 'assert unreachable'! Else, yes! txt.append("\n\n %s\n" % Lng.UNREACHABLE) txt.extend( flatten_list_of_lists( __code(node, TheState, done_set, GlobalEntryF) for node in OpTree.iterable_to_root(LeafDoorId, done_set) ) ) return txt
def print_ip_offset_scheme(info, Prefix=""): print Prefix + "Input Pointer Offset Schemes:" all_set = set(flatten_list_of_lists( entry_recipe.ip_offset_db.keys() for entry_recipe in info.entry_recipe_db.values() )) if not all_set: print return L = max(len("%s" % x) for x in all_set) predecessor_list = sorted(info.entry_recipe_db.iterkeys()) print Prefix + " %s %s" % (" "*L, "".join("%-10s" % si for si in predecessor_list)) for position_register in sorted(list(all_set)): scheme = [ info.entry_recipe_db[si].ip_offset_db.get(position_register) for si in predecessor_list ] name = "%s" % position_register space = " " * (L - len(name)) print Prefix + " %s:%s %s" % (name, space, "".join("%8s, " % x if x is not None else "<irrelv>, " for x in scheme)) print
def print_snapshot_map_scheme(info, Prefix=""): print Prefix + "Snapshot Map Schemes:" all_set = set(flatten_list_of_lists( entry_recipe.snapshot_map.keys() for entry_recipe in info.entry_recipe_db.values() )) if not all_set: print return L = max(len("%s" % repr(x)) for x in all_set) predecessor_list = sorted(info.entry_recipe_db.iterkeys()) print Prefix + " %s %s" % (" "*L, "".join("%-10s" % si for si in predecessor_list)) for variable_id in sorted(list(all_set)): scheme = [ info.entry_recipe_db[si].snapshot_map.get(variable_id) for si in predecessor_list ] name = "%s" % repr(variable_id) space = " " * (L - len(name)) print Prefix + " %s:%s %s" % (name, space, "".join("%8s, " % x if x is not None else " " for x in scheme)) print
def TRY_terminal_delimiter_sequence(Mode, UnicodeSequence, UnicodeEndSequencePattern, UponReloadDoneAdr): UnicodeEndSequencePattern.prepare_count_info(Mode.counter_db, Setup.buffer_codec) # Trasform letter by letter. sequence = flatten_list_of_lists( transformation.do_character(x, Setup.buffer_codec) for x in UnicodeSequence ) EndSequenceChunkN = len(sequence) # Column and line number count for closing delimiter run_time_counting_required_f, counter_txt = \ counter_for_pattern.get(UnicodeEndSequencePattern, ShiftF=False) # The Closer Delimiter must be a string. As such it has a pre-determined size. assert not run_time_counting_required_f # Column and line number count for 'normal' character. tm, column_counter_per_chunk = \ counter.get_XXX_counter_map(Mode.counter_db, "me->buffer._input_p", Trafo=Setup.buffer_codec) dummy, character_count_txt, dummy = \ counter.get_core_step(tm, "me->buffer._input_p") txt = [] for i, x in enumerate(sequence): txt.append(i) txt.append(Lng.IF_INPUT("==", "0x%X" % x, FirstF=True)) # Opening the 'if' txt.append(i+1) txt.append("%s\n" % Lng.INPUT_P_INCREMENT()) Lng.INDENT(counter_txt, i+1) if column_counter_per_chunk: txt.append(i+1) if column_counter_per_chunk == UnicodeEndSequencePattern.count_info().column_n_increment_by_lexeme_length: txt += Lng.REEFERENCE_P_COLUMN_ADD("me->buffer._input_p", column_counter_per_chunk) else: txt += Lng.REEFERENCE_P_COLUMN_ADD("(me->buffer._input_p - %i)" % EndSequenceChunkN, column_counter_per_chunk) txt.append(i+1) txt.extend(counter_txt) txt.append(i+1) txt.append("break;\n") for i, x in r_enumerate(sequence): txt.append(i) txt.append("%s" % Lng.IF_INPUT("==", "0x%X" % Setup.buffer_limit_code, FirstF=False)) # Check BLC txt.append(i+1) txt.append("%s\n" % Lng.LEXEME_START_SET("me->buffer._input_p - %i" % i)) txt.append(i+1) txt.append("%s\n" % Lng.GOTO_RELOAD(UponReloadDoneAdr, True, engine.FORWARD)) # Reload if i == 0: break txt.append(i) txt.append("%s" % Lng.ELSE) txt.append(i+1) txt.append("%s\n" % Lng.INPUT_P_ADD(- i)) txt.append(i) txt.append("%s\n" % Lng.END_IF()) txt.append(i) txt.append("%s\n" % Lng.END_IF()) txt.extend(character_count_txt) # print "##DEBUG:\n%s" % "".join(Lng.GET_PLAIN_STRINGS(txt)) return txt
def accepting_state_index_list(self): return flatten_list_of_lists((x.accepting_state_index for x in acceptance_sequence) for acceptance_sequence in self.__list)
def do_core(Mode): """Produces main code for an analyzer function which can detect patterns given in the 'PatternList' and has things to be done mentioned in 'TerminalDb'. RETURN: Code implementing the lexical analyzer. The code is not embedded in a function and required definitions are not provided. This happens through function 'wrap_up()'. """ # Prepare the combined state machines and terminals TerminalDb = Mode.terminal_db ReloadStateForward = Mode.reload_state_forward OnAfterMatchCode = Mode.incidence_db.get_CodeTerminal(E_IncidenceIDs.AFTER_MATCH) dial_db = Mode.dial_db variable_db.require_registers(flatten_list_of_lists( terminal.required_register_set() for terminal in TerminalDb.itervalues() )) # (*) Pre Context DFA # (If present: All pre-context combined in single backward analyzer.) Lng.debug_unit_name_set("Pre-Context:%s" % Mode.name) pre_context, \ pre_analyzer = generator.do_pre_context(Mode.pre_context_sm_to_be_reversed_list, Mode.pre_context_sm_id_list, dial_db) # assert all_isinstance(pre_context, (IfDoorIdReferencedCode, int, str, unicode)) # (*) Backward input position detection # (Seldomly present -- only for Pseudo-Ambiguous Post Contexts) Lng.debug_unit_name_set("Backward-Input-Position-Detection:%s" % Mode.name) bipd = generator.do_backward_read_position_detectors(Mode.bipd_sm_to_be_reversed_db, dial_db) # assert all_isinstance(bipd, (IfDoorIdReferencedCode, int, str, unicode)) # (*) Main DFA -- try to match core patterns # Post-context handling is webbed into the main state machine. Lng.debug_unit_name_set("Core:%s" % Mode.name) main, \ main_analyzer = generator.do_main(Mode.core_sm_list, ReloadStateForward, dial_db) Lng.debug_unit_name_set("Extra:%s" % Mode.name) # assert all_isinstance(main, (IfDoorIdReferencedCode, int, str, unicode)) extra = generator.do_analyzer_list(Mode.extra_analyzer_list) # (*) Terminals # (BEFORE 'Reload procedures' because some terminals may add entries # to the reloader.) terminals = generator.do_terminals(TerminalDb.values(), main_analyzer, dial_db) # (*) Reload procedures reload_procedure_fw = generator.do_reload_procedure(main_analyzer) reload_procedure_bw = generator.do_reload_procedure(pre_analyzer) # assert all_isinstance(reload_procedures, (IfDoorIdReferencedCode, int, str, unicode)) # (*) Re-entry preparation Lng.debug_unit_name_set("Re-Entry-Preparation:%s" % Mode.name) reentry_preparation = generator.do_reentry_preparation(Mode.pre_context_sm_id_list, OnAfterMatchCode, dial_db) # (*) State Router # (Something that can goto a state address by an given integer value) state_router = generator.do_state_router(dial_db) # assert all_isinstance(state_router, (IfDoorIdReferencedCode, int, str, unicode)) # (*) Variable Definitions # (Code that defines all required variables for the analyzer) variable_db.require_registers(Mode.required_register_set) variable_definitions = generator.do_variable_definitions() # assert all_isinstance(variable_definitions, (IfDoorIdReferencedCode, int, str, unicode)) # (*) Putting it all together function_body = [] function_body.extend(pre_context) # implementation of pre-contexts (if there are some) function_body.extend(main) # main pattern matcher function_body.extend(extra) # extra state machines (from 'Loopers') function_body.extend(bipd) # (seldom != empty; only for pseudo-ambiguous post contexts) function_body.extend(terminals) function_body.extend(state_router) # route to state by index (only if no computed gotos) function_body.extend(reload_procedure_fw) function_body.extend(reload_procedure_bw) function_body.extend(reentry_preparation) return function_body, variable_definitions
def do_sequence(self, Sequence, fh=-1): return flatten_list_of_lists( self.do_Number(x) for x in Sequence )
def get_elementary_trigger_sets(self, StateIdxList, epsilon_closure_db): """NOTE: 'epsilon_closure_db' must previously be calculcated by self.get_epsilon_closure_db(). This has to happen once and for all in order to save computation time. Considers the trigger dictionary that contains a mapping from target state index to the trigger set that triggers to it: target_state_index ---> trigger_set The trigger sets of different target state indices may intersect. As a result, this function produces a list of pairs: [ state_index_list, elementary_trigger_set ] where the elementary trigger set is the set of all triggers that trigger at the same time to all states in the state_index_list. The list contains for one state_index_list only one elementary_trigger_set. All elementary trigger sets are disjunct, i.e. they do not intersect. NOTE: A general solution of this problem would have to consider the inspection of all possible subset combinations. The number of combinations for N trigger sets is 2^N - which potentially blows the calculation power of the computer. Excessive optimizations would have to be programmed, if not the following were the case: NOTE: Fortunately, we are dealing with one dimensional sets! Thus, there is a very effective way to determine the elementary trigger sets. Imagine three trigger sets stretching over the range of numbers as follows: different targets, e.g. T0, T1, T2 are triggered by different sets of letters in the alphabet. letters of alphabet ----------------------------------------------------> T0 [---------) [----------) T1 [------) [-----) T2 [----------------------) => elementary sets: only T0 [-------) T0, T1 [-) only T1 [-) T1, T2 [--) only T2 [---) [----) T0, T2 [---) [) T0, T1, T2 [-----) """ # For Documentation Purposes: The following approach has been proven to be SLOWER # then the one currently implemented. May be, some time # it can be tweaked to be faster. # # Also, it is not proven to be correct! # ## trigger_list = [] ## for state_index in StateIdxList: ## state = self.states[state_index] ## for target_index, trigger_set in state.target_map.get_map().iteritems(): ## target_epsilon_closure = epsilon_closure_db[target_index] ## interval_list = trigger_set.get_intervals(PromiseToTreatWellF=True) ## trigger_list.extend([x, target_epsilon_closure] for x in interval_list]) ## ## trigger_list.sort(key=lambda x: x[0].begin) ## for element in trigger_list: ## # ... continue as shown below ## ## return combination_list ## Special Case -- Quickly Done: One State, One Target State ## proposal = None ## if len(StateIdxList) == 1: ## state_idx = list(StateIdxList)[0] ## if len(epsilon_closure_db[state_idx]) == 1: ## if len(self.states[state_idx].target_map.get_map()) == 1: ## target, trigger_set = self.states[state_idx].target_map.get_map().items()[0] ## proposal = { (target,): NumberSet(trigger_set) } # (*) Accumulate the transitions for all states in the state list. # transitions to the same target state are combined by union. history = flatten_list_of_lists( # -- trigger dictionary: target_idx --> trigger set that triggers to target self.states[state_idx].target_map.get_trigger_set_line_up() # NOTE: Duplicate entries in history are perfectly reasonable at this point, # simply if two states trigger on the same character range to the same # target state. When ranges are opened/closed via the history items # this algo keeps track of duplicates (see below). for state_idx in StateIdxList ) # (*) sort history according to position history.sort(key = attrgetter("position")) # lambda a, b: cmp(a.position, b.position)) # (*) build the elementary subset list combinations = {} # use dictionary for uniqueness current_interval_begin = None current_target_indices = {} # use dictionary for uniqueness current_target_epsilon_closure = [] for item in history: # -- add interval and target indice combination to the data # (only build interval when current begin is there, # when the interval size is not zero, and # when the epsilon closure of target states is not empty) if current_interval_begin is not None and \ current_interval_begin != item.position and \ len(current_target_indices) != 0: interval = Interval(current_interval_begin, item.position) # current_target_epsilon_closure.sort() key = tuple(sorted(current_target_epsilon_closure)) ## Caused 3 failures in unit test: ## if len(current_target_epsilon_closure) == 1: key = current_target_epsilon_closure[0] ## else: key = tuple(sorted(current_target_epsilon_closure)) combination = combinations.get(key) if combination is None: combinations[key] = NumberSet(interval, ArgumentIsYoursF=True) else: combination.unite_with(interval) # -- BEGIN / END of interval: # add or delete a target state to the set of currently considered target states # NOTE: More than one state can trigger on the same range to the same target state. # Thus, one needs to keep track of the 'opened' target states. if item.change == E_Border.BEGIN: if current_target_indices.has_key(item.target_idx): current_target_indices[item.target_idx] += 1 else: current_target_indices[item.target_idx] = 1 else: # == E_Border.END if current_target_indices[item.target_idx] > 1: current_target_indices[item.target_idx] -= 1 else: del current_target_indices[item.target_idx] # -- re-compute the epsilon closure of the target states current_target_epsilon_closure = \ self.get_epsilon_closure_of_state_set(current_target_indices.iterkeys(), epsilon_closure_db) # -- set the begin of interval to come current_interval_begin = item.position ## if proposal is not None: ## if len(proposal) != len(combinations) \ ## or proposal.keys() != combinations.keys() \ ## or not proposal.values()[0].is_equal(combinations.values()[0]): ## print "##proposal: ", proposal ## print "##combinations:", combinations # (*) create the list of pairs [target-index-combination, trigger_set] return combinations
def get_elementary_trigger_sets(self, StateIdxList, epsilon_closure_db): """NOTE: 'epsilon_closure_db' must previously be calculcated by self.get_epsilon_closure_db(). This has to happen once and for all in order to save computation time. Considers the trigger dictionary that contains a mapping from target state index to the trigger set that triggers to it: target_state_index ---> trigger_set The trigger sets of different target state indices may intersect. As a result, this function produces a list of pairs: [ state_index_list, elementary_trigger_set ] where the elementary trigger set is the set of all triggers that trigger at the same time to all states in the state_index_list. The list contains for one state_index_list only one elementary_trigger_set. All elementary trigger sets are disjunct, i.e. they do not intersect. NOTE: A general solution of this problem would have to consider the inspection of all possible subset combinations. The number of combinations for N trigger sets is 2^N - which potentially blows the calculation power of the computer. Excessive optimizations would have to be programmed, if not the following were the case: NOTE: Fortunately, we are dealing with one dimensional sets! Thus, there is a very effective way to determine the elementary trigger sets. Imagine three trigger sets stretching over the range of numbers as follows: different targets, e.g. T0, T1, T2 are triggered by different sets of letters in the alphabet. letters of alphabet ----------------------------------------------------> T0 [---------) [----------) T1 [------) [-----) T2 [----------------------) => elementary sets: only T0 [-------) T0, T1 [-) only T1 [-) T1, T2 [--) only T2 [---) [----) T0, T2 [---) [) T0, T1, T2 [-----) """ # For Documentation Purposes: The following approach has been proven to be SLOWER # then the one currently implemented. May be, some time # it can be tweaked to be faster. # # Also, it is not proven to be correct! # ## trigger_list = [] ## for state_index in StateIdxList: ## state = self.states[state_index] ## for target_index, trigger_set in state.target_map.get_map().iteritems(): ## target_epsilon_closure = epsilon_closure_db[target_index] ## interval_list = trigger_set.get_intervals(PromiseToTreatWellF=True) ## trigger_list.extend([x, target_epsilon_closure] for x in interval_list]) ## ## trigger_list.sort(key=lambda x: x[0].begin) ## for element in trigger_list: ## # ... continue as shown below ## ## return combination_list ## Special Case -- Quickly Done: One State, One Target State ## proposal = None ## if len(StateIdxList) == 1: ## state_idx = list(StateIdxList)[0] ## if len(epsilon_closure_db[state_idx]) == 1: ## if len(self.states[state_idx].target_map.get_map()) == 1: ## target, trigger_set = self.states[state_idx].target_map.get_map().items()[0] ## proposal = { (target,): NumberSet(trigger_set) } # (*) Accumulate the transitions for all states in the state list. # transitions to the same target state are combined by union. history = flatten_list_of_lists( # -- trigger dictionary: target_idx --> trigger set that triggers to target self.states[state_idx].target_map.get_trigger_set_line_up() # NOTE: Duplicate entries in history are perfectly reasonable at this point, # simply if two states trigger on the same character range to the same # target state. When ranges are opened/closed via the history items # this algo keeps track of duplicates (see below). for state_idx in StateIdxList) # (*) sort history according to position history.sort(key=attrgetter( "position")) # lambda a, b: cmp(a.position, b.position)) # (*) build the elementary subset list combinations = {} # use dictionary for uniqueness current_interval_begin = None current_target_indices = {} # use dictionary for uniqueness current_target_epsilon_closure = [] for item in history: # -- add interval and target indice combination to the data # (only build interval when current begin is there, # when the interval size is not zero, and # when the epsilon closure of target states is not empty) if current_interval_begin is not None and \ current_interval_begin != item.position and \ len(current_target_indices) != 0: interval = Interval(current_interval_begin, item.position) # current_target_epsilon_closure.sort() key = tuple(sorted(current_target_epsilon_closure)) ## Caused 3 failures in unit test: ## if len(current_target_epsilon_closure) == 1: key = current_target_epsilon_closure[0] ## else: key = tuple(sorted(current_target_epsilon_closure)) combination = combinations.get(key) if combination is None: combinations[key] = NumberSet(interval, ArgumentIsYoursF=True) else: combination.unite_with(interval) # -- BEGIN / END of interval: # add or delete a target state to the set of currently considered target states # NOTE: More than one state can trigger on the same range to the same target state. # Thus, one needs to keep track of the 'opened' target states. if item.change == E_Border.BEGIN: if current_target_indices.has_key(item.target_idx): current_target_indices[item.target_idx] += 1 else: current_target_indices[item.target_idx] = 1 else: # == E_Border.END if current_target_indices[item.target_idx] > 1: current_target_indices[item.target_idx] -= 1 else: del current_target_indices[item.target_idx] # -- re-compute the epsilon closure of the target states current_target_epsilon_closure = \ self.get_epsilon_closure_of_state_set(current_target_indices.iterkeys(), epsilon_closure_db) # -- set the begin of interval to come current_interval_begin = item.position ## if proposal is not None: ## if len(proposal) != len(combinations) \ ## or proposal.keys() != combinations.keys() \ ## or not proposal.values()[0].is_equal(combinations.values()[0]): ## print "##proposal: ", proposal ## print "##combinations:", combinations # (*) create the list of pairs [target-index-combination, trigger_set] return combinations
def TRY_terminal_delimiter_sequence(Mode, UnicodeSequence, UnicodeEndSequencePattern, UponReloadDoneAdr): UnicodeEndSequencePattern.prepare_count_info(Mode.counter_db, Setup.buffer_codec) # Trasform letter by letter. sequence = flatten_list_of_lists( transformation.do_character(x, Setup.buffer_codec) for x in UnicodeSequence) EndSequenceChunkN = len(sequence) # Column and line number count for closing delimiter run_time_counting_required_f, counter_txt = \ counter_for_pattern.get(UnicodeEndSequencePattern, ShiftF=False) # The Closer Delimiter must be a string. As such it has a pre-determined size. assert not run_time_counting_required_f # Column and line number count for 'normal' character. tm, column_counter_per_chunk = \ counter.get_XXX_counter_map(Mode.counter_db, "me->buffer._input_p", Trafo=Setup.buffer_codec) dummy, character_count_txt, dummy = \ counter.get_core_step(tm, "me->buffer._input_p") txt = [] for i, x in enumerate(sequence): txt.append(i) txt.append(Lng.IF_INPUT("==", "0x%X" % x, FirstF=True)) # Opening the 'if' txt.append(i + 1) txt.append("%s\n" % Lng.INPUT_P_INCREMENT()) Lng.INDENT(counter_txt, i + 1) if column_counter_per_chunk: txt.append(i + 1) if column_counter_per_chunk == UnicodeEndSequencePattern.count_info( ).column_n_increment_by_lexeme_length: txt += Lng.REEFERENCE_P_COLUMN_ADD("me->buffer._input_p", column_counter_per_chunk) else: txt += Lng.REEFERENCE_P_COLUMN_ADD( "(me->buffer._input_p - %i)" % EndSequenceChunkN, column_counter_per_chunk) txt.append(i + 1) txt.extend(counter_txt) txt.append(i + 1) txt.append("break;\n") for i, x in r_enumerate(sequence): txt.append(i) txt.append("%s" % Lng.IF_INPUT( "==", "0x%X" % Setup.buffer_limit_code, FirstF=False)) # Check BLC txt.append(i + 1) txt.append("%s\n" % Lng.LEXEME_START_SET("me->buffer._input_p - %i" % i)) txt.append(i + 1) txt.append( "%s\n" % Lng.GOTO_RELOAD(UponReloadDoneAdr, True, engine.FORWARD)) # Reload if i == 0: break txt.append(i) txt.append("%s" % Lng.ELSE) txt.append(i + 1) txt.append("%s\n" % Lng.INPUT_P_ADD(-i)) txt.append(i) txt.append("%s\n" % Lng.END_IF()) txt.append(i) txt.append("%s\n" % Lng.END_IF()) txt.extend(character_count_txt) # print "##DEBUG:\n%s" % "".join(Lng.GET_PLAIN_STRINGS(txt)) return txt
def do(loop_config, CaMap, SmList): """Perform separation: Parallel state machine ----> first transition + appendix state machine Appendix Sm-Id --> Original Sm-Id The 'first transition' is mounted on the loop state machine triggering an acceptance that causes a transit to the appendix state machine. RETURNS: list of LoopMapEntry-s """ # ESSENTIAL: Delimiter state machines shall never match on a common lexeme! _assert_no_intersections(SmList) assert all(sm.get_id() is not None for sm in SmList) loop_map_1, \ original_iid_db = split_first_transition(SmList) # loop_map_1: list of [0] first transition character set # [1] appendix sm with first transition removed # # original_iid_db: appendix sm id --> original sm id appendix_cmd_list_db = loop_config.get_appendix_terminal_cmd_list_db( CaMap, [sm for cs, sm in loop_map_1], original_iid_db) # appendix_cmd_list_db: appendix sm id --> CmdList(count action, # goto original terminal) loop_map_2 = \ split_first_character_set_for_distinct_count_actions(CaMap, loop_map_1) # loop_map_2: list of [0] character set where all elements # require same count actions # [1] count action # [2] appendix sm # For a 'state transition' it is required that all character sets # in the list are disjoint. Thus, any intersection must build its # on entry. Thus, some entries might have more than one appendix. loop_map_3 = combine_intersecting_character_sets(loop_map_2) # loop_map_2: list of [0] character set no character set intersects # with any other. # [1] count action # [2] list of (appendix sm) # A transition can only enter one state machine, so all appendix # state machines related to the same character set must be combined. loop_map_4, \ combined_appendix_sm_list_raw = combine_appendix_sm_lists(loop_map_3) # loop_map_4: list of [0] (disjoint) character set # [1] count action for character set # [2] state machine id of related combined appendix sm # combined_appendix_sm_lists: list of all generated (combined) appendix sm-s loop_map_5, \ combined_appendix_sm_list = determine_CmdLists(loop_config, loop_map_4, combined_appendix_sm_list_raw, original_iid_db) # loop_map_5: list of [0] (disjoint) character set # [1] CmdList = (count action, goto terminal/appendix sm), # combined_appendix_sm_list: contains only those combined appendix state machines # that do have transitions loop_map_6 = [ LoopMapEntry(character_set, IidCoupleTerminal=dial.new_incidence_id(), Code=cmd_list) for character_set, cmd_list in loop_map_5 ] # loop_map_6: list of LoopMapEntry-s # There must be a command list for any acceptance in the appendix # state machines. all_acceptance_id_set = flatten_list_of_lists( sm.acceptance_id_set() for sm in combined_appendix_sm_list) assert all(iid in appendix_cmd_list_db for iid in all_acceptance_id_set) return loop_map_6, combined_appendix_sm_list, appendix_cmd_list_db
def _get_plain_line_up(TargetMapList): return sorted(flatten_list_of_lists( target_map.get_trigger_set_line_up(Key=i) for i, target_map in enumerate(TargetMapList)), key=lambda x: (x.position, x.change, x.target_idx))