def test(Skeleton, *StringPaths): global filter_f sm = core.DFA() idx0 = sm.init_state_index for character_sequence in StringPaths: idx = construct_path(sm, idx0, character_sequence, Skeleton) sm = nfa_to_dfa.do(sm) # Path analyzis may not consider the init state, so mount # an init state before everything. sm.add_transition(7777L, ord('0'), sm.init_state_index) sm.init_state_index = 7777L sm = sm.normalized_clone() path_list = find_core(sm) if filter_f: path_list = paths.select(path_list) for path in sorted(path_list, key=lambda x: (-len(x.step_list), x.step_list[-1].state_index)): print "# " + path.get_string().replace("\n", "\n# ")
def snap_expression(stream, PatternDict): """expression: term term | expression """ __debug_entry("expression", stream) # -- term result = snap_term(stream, PatternDict) if result is None: return __debug_exit(None, stream) # -- optional '|' if not check(stream, '|'): return __debug_exit(result, stream) position_1 = stream.tell() __debug_print("'|' (in expression)") # -- expression result_2 = snap_expression(stream, PatternDict) __debug_print("expression(in expression):", result_2) if result_2 is None: stream.seek(position_1) return __debug_exit(result, stream) result = parallelize.do([result, result_2]) return __debug_exit(nfa_to_dfa.do(result), stream)
def do(StateMachineList, CommonTerminalStateF=True): """Connect state machines paralell. CommonTerminalStateF tells whether the state machines shall trigger to a common terminal. This may help nfa-to-dfa or hopcroft minimization for ISOLATED patterns. A state machine that consists of the COMBINATION of patterns MUST set this flag to 'False'. """ assert len(StateMachineList) != 0 def consider(sm): return not sm.is_Empty() and sm.get_init_state().has_transitions() # filter out empty state machines from the consideration sm_list = [ sm for sm in StateMachineList if consider(sm) ] empty_sm_list = [ sm for sm in StateMachineList if not consider(sm) ] if len(sm_list) < 2: if len(sm_list) < 1: result = DFA() else: result = sm_list[0] return __consider_empty_state_machines(result, empty_sm_list) # (*) collect all transitions from both state machines into a single one result = DFA() init_state = result.get_init_state() # Connect from the new initial state to the initial states of the # sms via epsilon transition. # Connect from each success state of the sms to the new terminal # state via epsilon transition. if __nfa_to_dfa_required(sm_list): for sm in sm_list: result.states.update(sm.states) init_state.target_map.add_epsilon_target_state(sm.init_state_index) result = nfa_to_dfa.do(result) else: # Set the 'single_entry' operations. init_state.set_single_entry(sm_list[0].get_init_state().single_entry.clone()) # Add transitions to the states. for sm in sm_list: init_state.target_map.update(sm.get_init_state().target_map) # not __nfa_to_dfa_required(...) # => No transition to an an init state. # => Original init states can be taken out. result.states.update( (si, state) for si, state in sm.states.iteritems() if si != sm.init_state_index ) result.assert_consistency() #if CommonTerminalStateF: # __combine_transitionless_acceptance_states(result) return __consider_empty_state_machines(result, empty_sm_list)
def test(sm, post_sm): print "EXPRESSION = ", sm print "POST CONDITION = ", post_sm return_sm = setup_post_context.do(sm, post_sm, False, SourceRef_VOID) print "APPENDED = ", sm sm = nfa_to_dfa.do(sm) print "DFA = ", sm sm = hopcroft.do(sm) print "HOPCROFT = ", sm
def test(RE): print "-------------------------------------------------------------------------------" print "## RE:", RE result = core.do(RE, {}).sm ## print "## State Machine:", result ## result = repeat.do(result, 1) print "## RE+ (repetition):", result.get_string(NormalizeF=True) ## print result.get_graphviz_string(NormalizeF=False) result = nfa_to_dfa.do(result) print "## NFA-to-DFA:" print result.get_string(NormalizeF=True)
def do(SM): """Construct a state machine which is equivalent to SM and is: -- DFA compliant, i.e. without epsilon transitions and no two transitions to the same target. -- Hopcroft-minimized. """ result = nfa_to_dfa.do(SM) hopcroft.do(result, CreateNewStateMachineF=False) assert result.is_DFA_compliant() return result
def test(Skeleton, *StringPaths): global filter_f sm = core.StateMachine() idx0 = sm.init_state_index for character_sequence in StringPaths: idx = construct_path(sm, idx0, character_sequence, Skeleton) sm = nfa_to_dfa.do(sm) # Path analyzis may not consider the init state, so mount # an init state before everything. sm.add_transition(7777L, ord('0'), sm.init_state_index) sm.init_state_index = 7777L sm = sm.normalized_clone() path_list = find_core(sm) if filter_f: path_list = paths.select(path_list) for path in sorted(path_list, key=lambda x: (-len(x.step_list), x.step_list[-1].state_index)): print "# " + path.get_string().replace("\n", "\n# ")
def get_pattern_object(SM): if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM) else: result = SM result = hopcroft.do(result, CreateNewStateMachineF=False) return Pattern(result, AllowStateMachineTrafoF=True)
\ | eps '- 33 ->( 3 )-- 44 ->( 4 ) The epsilon closures depend on the direction: * from 3: epsilon closure (3, 5) * from 1: epsilon closure (1, 5) => State '5' is merged into two resulting states. """ sm = StateMachine() sms.line(sm, sm.init_state_index, (0x11, 1), (0x22, 2)) sms.line(sm, sm.init_state_index, (0x33, 3), (0x44, 4)) sms.line(sm, 5, (0x66, 6)) sms.line(sm, 1, (None, 5)) sms.line(sm, 3, (None, 5)) dfa = nfa_to_dfa.do(sm) print plot_txt print dfa.get_string(NormalizeF=False, Option="hex") print "_______________________________________________________________________________" plot_txt = """ ( 1 )-- 22 --->( 2 )-- 33 --->( 3 )-- 55 --->( 5 ) n .<- 33 --' | .--' ( 0 )-- eps -->( 4 )-- 66 --->( 6 ) State '4' is be joined into epsilon closure with '0' from beginning. Later, when it is detected that '2' triggers on the same trigger set to '0' and '3', '0' joined with '3'. """
def do(SM): result = nfa_to_dfa.do(SM) hopcroft.do(result, CreateNewStateMachineF=False) return result
sm = DFA() si = sm.init_state_index si0 = sm.add_transition(si, ord('u')) si1 = sm.add_transition(si0, ord('y'), AcceptanceF=True) si2 = sm.add_transition(si0, ord('x')) si2 = sm.add_transition(si2, ord('x'), si2, AcceptanceF=True) print "#sm:", sm smp = DFA() si = smp.init_state_index si0 = smp.add_transition(si, ord('x'), si) si1 = smp.add_transition(si, ord('y'), AcceptanceF=True) print "#sm2:", smp # return_sm = setup_post_context.do(sm, smp, False, False, SourceRef_VOID) print "#return_sm:", nfa_to_dfa.do(sequentialize.do([sm, smp])) sys.exit() print "-------------------------------------------------------------------------------" tiny0 = DFA() tiny0.add_transition(tiny0.init_state_index, ord('a'), AcceptanceF=True) tiny1 = DFA() tiny1.add_transition(tiny1.init_state_index, ord(';'), AcceptanceF=True) test(tiny0, tiny1) print "-------------------------------------------------------------------------------" sm = sm1 post_sm = sm3.clone()
def do_state_machine(self, sm): """Transforms a given state machine from 'Unicode Driven' to another character encoding type. RETURNS: [0] Transformation complete (True->yes, False->not all transformed) [1] Transformed state machine. It may be the same as it was before if there was no transformation actually. It is ensured that the result of this function is a DFA compliant state machine. """ assert Setup.lexatom.type_range is not None if sm is None: return True, None assert sm.is_DFA_compliant() all_complete_f = True if Setup.bad_lexatom_detection_f: bad_lexatom_si = state_machine_index.get() # Generate the 'bad lexatom accepter'. bad_lexatom_state = DFA_State(AcceptanceF=True) bad_lexatom_state.mark_acceptance_id(E_IncidenceIDs.BAD_LEXATOM) sm.states[bad_lexatom_si] = bad_lexatom_state else: bad_lexatom_si = None # NOTE: Not 'iteritems()', for some encodings intermediate states are # generated. Those shall not be subject to transformation. for from_si, state in sm.states.items(): if from_si == bad_lexatom_si: continue target_map = state.target_map.get_map() for to_si, trigger_set in target_map.items(): if to_si == bad_lexatom_si: continue complete_f, \ new_state_db = self.do_transition(target_map, from_si, to_si, bad_lexatom_si) # Assume that the 'target_map' has been adapted if changes were # necessary. if new_state_db is not None: sm.states.update(new_state_db) all_complete_f &= complete_f # Transition to 'bad lexatom acceptor' on first code unit is best # to happen here, after all transitions have been adapted. self._add_transition_to_bad_lexatom_detector( target_map, bad_lexatom_si, 0) # If there were intermediate states being generated, the error # error detection must have been implemented right then. sm.delete_transitions_beyond_interval(Setup.lexatom.type_range) sm.delete_orphaned_states() # AFTER: Whatever happend, the transitions in the state machine MUST # lie in the drain_set. if not sm.is_DFA_compliant(): sm = nfa_to_dfa.do(sm) sm = hopcroft_minimization.do(sm, CreateNewStateMachineF=False) return all_complete_f, sm
from quex.engine.state_machine.TEST.test_state_machines import sm3 from quex.engine.state_machine.core import * import quex.engine.state_machine.construction.repeat as repeat import quex.engine.state_machine.algorithm.nfa_to_dfa as nfa_to_dfa if "--hwut-info" in sys.argv: print "NFA: Conversion to DFA (subset construction)" sys.exit(0) print "_______________________________________________________________________________" print "Example A:" sm = StateMachine() n0 = sm.init_state_index n1 = sm.add_transition(n0, ord('a'), AcceptanceF=True) sm = repeat.do(sm, 1) dfa = nfa_to_dfa.do(sm) print dfa print "_______________________________________________________________________________" print "Example B:" sm = StateMachine() n0 = sm.init_state_index n1 = sm.add_transition(n0, ord('a'), AcceptanceF=True) sm = repeat.do(sm) dfa = nfa_to_dfa.do(sm) print dfa print "_______________________________________________________________________________" print "Example C:" # (*) create a simple state machine: # ,--<------------ eps ------------------.
def ensure_dfa(sm): if not sm.is_DFA_compliant(): return nfa_to_dfa.do(sm) else: return sm
# ((9)) is the acceptance state. # sm = DFA() n0 = sm.init_state_index n1 = sm.add_transition(n0, ord('a')) n2 = sm.add_epsilon_transition(n1) n3 = sm.add_epsilon_transition(n2) # n4 = sm.add_epsilon_transition(n3) n5 = sm.add_transition(n4, ord('b')) # n6 = sm.add_epsilon_transition(n3) n7 = sm.add_transition(n6, ord('c')) n8 = sm.add_epsilon_transition(n7) # sm.add_epsilon_transition(n5, n8) # n9 = sm.add_epsilon_transition(n8, RaiseAcceptanceF=True) # sm.add_epsilon_transition(n2, n9) sm.add_epsilon_transition(n8, n3) if sys.argv[1] == "1": print sm.get_graphviz_string(NormalizeF=True) else: # (*) create the DFA from the specified NFA dfa = nfa_to_dfa.do(sm) print dfa.get_graphviz_string(NormalizeF=True)
def do(the_state_machine, post_context_sm, EndOfLinePostContextF, fh=-1): """Appends a post context to the given state machine and changes state infos as required. NOTE: In case that: post_context_sm is not None or EndOfLinePostContextF The function appends something to the state machine and it is therefore required to pass 'NFA to DFA'--better also Hopcroft Minimization. ________________________________________________________________________ This process is very similar to sequentialization. There is a major difference, though: Given a state machine (e.g. a pattern) X with a post context Y, a match is only valid if X is followed by Y. Let Xn be an acceptance state of X and Ym an acceptance state of Y: ---(Xn-1)---->(Xn)---->(Y0)----> ... ---->((Ym)) store acceptance input position That is, it holds: -- The next input position is stored the position of Xn, even though it is 'officially' not an acceptance state. -- Ym will be an acceptance state, but it will not store the input position! The analysis of the next pattern will start at the position where X stopped, even though Ym is required to state acceptance. """ # State machines with no states are senseless here. assert not the_state_machine.is_empty(), \ "empty state machine can have no post context." assert post_context_sm is None or not post_context_sm.is_empty(), \ "empty state machine cannot be a post-context." # State machines involved with post condition building are part of a pattern, # but not configured out of multiple patterns. Thus there should be no origins. assert the_state_machine.has_origins() == False assert post_context_sm is None or not post_context_sm.has_origins() for state in the_state_machine.get_acceptance_state_list(): for origin in state.origins(): assert origin.pre_context_id() == E_PreContextIDs.NONE, \ "Post Contexts MUST be mounted BEFORE pre-contexts." if post_context_sm is None: if not EndOfLinePostContextF: return the_state_machine, None # Generate a new post context that just contains the 'newline' post_context_sm = StateMachine(AcceptanceF=True) post_context_sm.mount_newline_to_acceptance_states(Setup.dos_carriage_return_newline_f) elif EndOfLinePostContextF: # Mount 'newline' to existing post context post_context_sm.mount_newline_to_acceptance_states(Setup.dos_carriage_return_newline_f) # A post context with an initial state that is acceptance is not really a # 'context' since it accepts anything. The state machine remains un-post context. if post_context_sm.get_init_state().is_acceptance(): error_msg("Post context accepts anything---replaced by no post context.", fh, DontExitF=True) return the_state_machine, None # (*) Two ways of handling post-contexts: # # -- Seldom Exception: # Pseudo-Ambiguous Post Conditions (x+/x) -- detecting the end of the # core pattern after the end of the post context # has been reached. # if ambiguous_post_context.detect_forward(the_state_machine, post_context_sm): if ambiguous_post_context.detect_backward(the_state_machine, post_context_sm): # -- for post contexts that are forward and backward ambiguous # a philosophical cut is necessary. error_msg("Post context requires philosophical cut--handle with care!\n" "Proposal: Isolate pattern and ensure results are as expected!", fh, DontExitF=True) post_context_sm = ambiguous_post_context.philosophical_cut(the_state_machine, post_context_sm) # NOTE: May be, the_state_machine does contain now an epsilon transition. See # comment at entry of this function. ipsb_sm = ambiguous_post_context.mount(the_state_machine, post_context_sm) the_state_machine = beautifier.do(the_state_machine) ipsb_sm = beautifier.do(ipsb_sm) return the_state_machine, ipsb_sm # -- The 'normal' way: storing the input position at the end of the core # pattern. # # (*) Need to clone the state machines, i.e. provide their internal # states with new ids, but the 'behavior' remains. This allows # state machines to appear twice, or being used in 'larger' # conglomerates. post_clone = post_context_sm.clone() # -- Once an acceptance state is reached no further analysis is necessary. ## NO: acceptance_pruning.do(post_clone) ## BECAUSE: it may have to compete with a pseudo-ambiguous post context # (*) collect all transitions from both state machines into a single one # # NOTE: The start index is unique. Therefore, one can assume that each # clone_list '.states' dictionary has different keys. One can simply # take over all transitions of a start index into the result without # considering interferences (see below) # orig_acceptance_state_id_list = the_state_machine.get_acceptance_state_index_list() # -- mount on every acceptance state the initial state of the following state # machine via epsilon transition the_state_machine.mount_to_acceptance_states(post_clone.init_state_index, CancelStartAcceptanceStateF=True) for start_state_index, state in post_clone.states.iteritems(): the_state_machine.states[start_state_index] = state # states are already cloned # -- raise at each old acceptance state the 'store input position flag' # -- set the post context flag for all acceptance states for state_idx in orig_acceptance_state_id_list: state = the_state_machine.states[state_idx] state.set_input_position_store_f(True) # -- no acceptance state shall store the input position # -- set the post context flag for all acceptance states for state in the_state_machine.get_acceptance_state_list(): state.set_input_position_store_f(False) state.set_input_position_restore_f(True) # No input position backward search required the_state_machine = nfa_to_dfa.do(the_state_machine) hopcroft.do(the_state_machine, CreateNewStateMachineF=False) return the_state_machine, None