Пример #1
0
def test(Skeleton, *StringPaths):
    global filter_f

    sm = core.DFA()

    idx0 = sm.init_state_index
    for character_sequence in StringPaths:
        idx = construct_path(sm, idx0, character_sequence, Skeleton)

    sm = nfa_to_dfa.do(sm)

    # Path analyzis may not consider the init state, so mount
    # an init state before everything.
    sm.add_transition(7777L, ord('0'), sm.init_state_index)
    sm.init_state_index = 7777L

    sm = sm.normalized_clone()
    path_list = find_core(sm)

    if filter_f:
        path_list = paths.select(path_list)

    for path in sorted(path_list,
                       key=lambda x:
                       (-len(x.step_list), x.step_list[-1].state_index)):
        print "# " + path.get_string().replace("\n", "\n# ")
Пример #2
0
def snap_expression(stream, PatternDict):
    """expression:  term
                    term | expression
    """
    __debug_entry("expression", stream)
    # -- term
    result = snap_term(stream, PatternDict)
    if result is None:
        return __debug_exit(None, stream)

    # -- optional '|'
    if not check(stream, '|'):
        return __debug_exit(result, stream)

    position_1 = stream.tell()
    __debug_print("'|' (in expression)")

    # -- expression
    result_2 = snap_expression(stream, PatternDict)
    __debug_print("expression(in expression):", result_2)
    if result_2 is None:
        stream.seek(position_1)
        return __debug_exit(result, stream)

    result = parallelize.do([result, result_2])
    return __debug_exit(nfa_to_dfa.do(result), stream)
Пример #3
0
def do(StateMachineList, CommonTerminalStateF=True):
    """Connect state machines paralell.

       CommonTerminalStateF tells whether the state machines shall trigger 
                            to a common terminal. This may help nfa-to-dfa
                            or hopcroft minimization for ISOLATED patterns.

                            A state machine that consists of the COMBINATION
                            of patterns MUST set this flag to 'False'.
    """
    assert len(StateMachineList) != 0
              
    def consider(sm):
        return not sm.is_Empty() and sm.get_init_state().has_transitions()

    # filter out empty state machines from the consideration          
    sm_list       = [ sm for sm in StateMachineList if consider(sm) ]
    empty_sm_list = [ sm for sm in StateMachineList if not consider(sm) ]

    if len(sm_list) < 2:
        if len(sm_list) < 1: result = DFA()
        else:                result = sm_list[0]

        return __consider_empty_state_machines(result, empty_sm_list)

    # (*) collect all transitions from both state machines into a single one
    result     = DFA()
    init_state = result.get_init_state()

    # Connect from the new initial state to the initial states of the
    # sms via epsilon transition. 
    # Connect from each success state of the sms to the new terminal
    # state via epsilon transition.
    if __nfa_to_dfa_required(sm_list):
        for sm in sm_list:
            result.states.update(sm.states)
            init_state.target_map.add_epsilon_target_state(sm.init_state_index)
        result = nfa_to_dfa.do(result)
    else:
        # Set the 'single_entry' operations.
        init_state.set_single_entry(sm_list[0].get_init_state().single_entry.clone())
        # Add transitions to the states.
        for sm in sm_list:
            init_state.target_map.update(sm.get_init_state().target_map)
            # not __nfa_to_dfa_required(...) 
            # => No transition to an an init state.
            # => Original init states can be taken out.
            result.states.update(
                (si, state) for si, state in sm.states.iteritems()
                            if si != sm.init_state_index
            )
        result.assert_consistency()


    #if CommonTerminalStateF:
    #    __combine_transitionless_acceptance_states(result)

    return __consider_empty_state_machines(result, empty_sm_list)
Пример #4
0
def test(sm, post_sm):    
    print "EXPRESSION = ", sm
    print "POST CONDITION = ", post_sm
    return_sm = setup_post_context.do(sm, post_sm, False, SourceRef_VOID)
    print "APPENDED = ", sm
    sm = nfa_to_dfa.do(sm)
    print "DFA = ", sm
    sm = hopcroft.do(sm)
    print "HOPCROFT = ", sm
Пример #5
0
def test(sm, post_sm):
    print "EXPRESSION = ", sm
    print "POST CONDITION = ", post_sm
    return_sm = setup_post_context.do(sm, post_sm, False, SourceRef_VOID)
    print "APPENDED = ", sm
    sm = nfa_to_dfa.do(sm)
    print "DFA = ", sm
    sm = hopcroft.do(sm)
    print "HOPCROFT = ", sm
Пример #6
0
def test(RE):
    print "-------------------------------------------------------------------------------"
    print "## RE:", RE
    result = core.do(RE, {}).sm
    ## print "## State Machine:", result
    ## result = repeat.do(result, 1)
    print "## RE+ (repetition):", result.get_string(NormalizeF=True)
    ## print result.get_graphviz_string(NormalizeF=False)
    result = nfa_to_dfa.do(result)
    print "## NFA-to-DFA:"
    print result.get_string(NormalizeF=True)
Пример #7
0
def do(SM):
    """Construct a state machine which is equivalent to SM and is:

       -- DFA compliant, i.e. without epsilon transitions and no two
              transitions to the same target.
       -- Hopcroft-minimized.
    """
    result = nfa_to_dfa.do(SM)
    hopcroft.do(result, CreateNewStateMachineF=False)

    assert result.is_DFA_compliant()
    return result
Пример #8
0
def do(SM):
    """Construct a state machine which is equivalent to SM and is:

       -- DFA compliant, i.e. without epsilon transitions and no two
              transitions to the same target.
       -- Hopcroft-minimized.
    """
    result = nfa_to_dfa.do(SM)
    hopcroft.do(result, CreateNewStateMachineF=False)

    assert result.is_DFA_compliant()
    return result
Пример #9
0
def test(Skeleton, *StringPaths):
    global filter_f

    sm = core.StateMachine()

    idx0 = sm.init_state_index
    for character_sequence in StringPaths:
        idx = construct_path(sm, idx0, character_sequence, Skeleton)

    sm = nfa_to_dfa.do(sm)

    # Path analyzis may not consider the init state, so mount 
    # an init state before everything.
    sm.add_transition(7777L, ord('0'), sm.init_state_index)
    sm.init_state_index = 7777L

    sm = sm.normalized_clone()
    path_list = find_core(sm)

    if filter_f:
        path_list = paths.select(path_list)

    for path in sorted(path_list, key=lambda x: (-len(x.step_list), x.step_list[-1].state_index)):
        print "# " + path.get_string().replace("\n", "\n# ")
Пример #10
0
 def get_pattern_object(SM):
     if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM)
     else: result = SM
     result = hopcroft.do(result, CreateNewStateMachineF=False)
     return Pattern(result, AllowStateMachineTrafoF=True)
Пример #11
0
         \          | eps 
          '- 33 ->( 3 )-- 44 ->( 4 )

The epsilon closures depend on the direction:
    * from 3: epsilon closure (3, 5)
    * from 1: epsilon closure (1, 5)
=> State '5' is merged into two resulting states.
"""
sm = StateMachine()
sms.line(sm, sm.init_state_index, (0x11, 1), (0x22, 2))
sms.line(sm, sm.init_state_index, (0x33, 3), (0x44, 4))
sms.line(sm, 5,                              (0x66, 6))
sms.line(sm, 1, (None, 5))
sms.line(sm, 3, (None, 5))

dfa = nfa_to_dfa.do(sm)
print plot_txt
print dfa.get_string(NormalizeF=False, Option="hex")

print "_______________________________________________________________________________"
plot_txt = """

        ( 1 )-- 22 --->( 2 )-- 33 --->( 3 )-- 55 --->( 5 )
          n    .<- 33 --'     
          | .--'           
        ( 0 )-- eps -->( 4 )-- 66 --->( 6 )

State '4' is be joined into epsilon closure with '0' from beginning.  Later,
when it is detected that '2' triggers on the same trigger set to '0' and '3',
'0' joined with '3'.
"""
Пример #12
0
def do(SM):
    result = nfa_to_dfa.do(SM)
    hopcroft.do(result, CreateNewStateMachineF=False)
    return result
Пример #13
0
    sm = DFA()
    si = sm.init_state_index
    si0 = sm.add_transition(si, ord('u'))
    si1 = sm.add_transition(si0, ord('y'), AcceptanceF=True)
    si2 = sm.add_transition(si0, ord('x'))
    si2 = sm.add_transition(si2, ord('x'), si2, AcceptanceF=True)
    print "#sm:", sm

    smp = DFA()
    si = smp.init_state_index
    si0 = smp.add_transition(si, ord('x'), si)
    si1 = smp.add_transition(si, ord('y'), AcceptanceF=True)
    print "#sm2:", smp

    # return_sm = setup_post_context.do(sm, smp, False, False, SourceRef_VOID)
    print "#return_sm:", nfa_to_dfa.do(sequentialize.do([sm, smp]))
    sys.exit()

print "-------------------------------------------------------------------------------"
tiny0 = DFA()
tiny0.add_transition(tiny0.init_state_index, ord('a'), AcceptanceF=True)

tiny1 = DFA()
tiny1.add_transition(tiny1.init_state_index, ord(';'), AcceptanceF=True)

test(tiny0, tiny1)

print "-------------------------------------------------------------------------------"
sm = sm1
post_sm = sm3.clone()
Пример #14
0
    def do_state_machine(self, sm):
        """Transforms a given state machine from 'Unicode Driven' to another
        character encoding type.
        
        RETURNS: 
           [0] Transformation complete (True->yes, False->not all transformed)
           [1] Transformed state machine. It may be the same as it was 
               before if there was no transformation actually.

        It is ensured that the result of this function is a DFA compliant
        state machine.
        """
        assert Setup.lexatom.type_range is not None

        if sm is None: return True, None
        assert sm.is_DFA_compliant()

        all_complete_f = True
        if Setup.bad_lexatom_detection_f:
            bad_lexatom_si = state_machine_index.get()
            # Generate the 'bad lexatom accepter'.
            bad_lexatom_state = DFA_State(AcceptanceF=True)
            bad_lexatom_state.mark_acceptance_id(E_IncidenceIDs.BAD_LEXATOM)
            sm.states[bad_lexatom_si] = bad_lexatom_state

        else:
            bad_lexatom_si = None

        # NOTE: Not 'iteritems()', for some encodings intermediate states are
        #       generated. Those shall not be subject to transformation.
        for from_si, state in sm.states.items():
            if from_si == bad_lexatom_si: continue
            target_map = state.target_map.get_map()

            for to_si, trigger_set in target_map.items():
                if to_si == bad_lexatom_si: continue

                complete_f,  \
                new_state_db = self.do_transition(target_map, from_si, to_si,
                                                  bad_lexatom_si)
                # Assume that the 'target_map' has been adapted if changes were
                # necessary.
                if new_state_db is not None:
                    sm.states.update(new_state_db)

                all_complete_f &= complete_f

            # Transition to 'bad lexatom acceptor' on first code unit is best
            # to happen here, after all transitions have been adapted.
            self._add_transition_to_bad_lexatom_detector(
                target_map, bad_lexatom_si, 0)

            # If there were intermediate states being generated, the error
            # error detection must have been implemented right then.

        sm.delete_transitions_beyond_interval(Setup.lexatom.type_range)

        sm.delete_orphaned_states()

        # AFTER: Whatever happend, the transitions in the state machine MUST
        #        lie in the drain_set.
        if not sm.is_DFA_compliant():
            sm = nfa_to_dfa.do(sm)
        sm = hopcroft_minimization.do(sm, CreateNewStateMachineF=False)
        return all_complete_f, sm
Пример #15
0
from   quex.engine.state_machine.TEST.test_state_machines import sm3
from   quex.engine.state_machine.core       import *
import quex.engine.state_machine.construction.repeat     as repeat
import quex.engine.state_machine.algorithm.nfa_to_dfa as nfa_to_dfa

if "--hwut-info" in sys.argv:
    print "NFA: Conversion to DFA (subset construction)"
    sys.exit(0)
    
print "_______________________________________________________________________________"
print "Example A:"
sm = StateMachine()
n0 = sm.init_state_index     
n1 = sm.add_transition(n0, ord('a'), AcceptanceF=True)
sm = repeat.do(sm, 1)
dfa = nfa_to_dfa.do(sm)
print dfa

print "_______________________________________________________________________________"
print "Example B:"
sm = StateMachine()
n0 = sm.init_state_index     
n1 = sm.add_transition(n0, ord('a'), AcceptanceF=True)
sm = repeat.do(sm)
dfa = nfa_to_dfa.do(sm)
print dfa

print "_______________________________________________________________________________"
print "Example C:"
# (*) create a simple state machine:  
#                                            ,--<------------ eps ------------------.
Пример #16
0
 def ensure_dfa(sm):
     if not sm.is_DFA_compliant(): return nfa_to_dfa.do(sm)
     else: return sm
Пример #17
0
#    ((9)) is the acceptance state.
#
sm = DFA()
n0 = sm.init_state_index
n1 = sm.add_transition(n0, ord('a'))
n2 = sm.add_epsilon_transition(n1)
n3 = sm.add_epsilon_transition(n2)
#
n4 = sm.add_epsilon_transition(n3)
n5 = sm.add_transition(n4, ord('b'))
#
n6 = sm.add_epsilon_transition(n3)
n7 = sm.add_transition(n6, ord('c'))
n8 = sm.add_epsilon_transition(n7)
#
sm.add_epsilon_transition(n5, n8)
#
n9 = sm.add_epsilon_transition(n8, RaiseAcceptanceF=True)
#
sm.add_epsilon_transition(n2, n9)
sm.add_epsilon_transition(n8, n3)

if sys.argv[1] == "1":
    print sm.get_graphviz_string(NormalizeF=True)

else:
    # (*) create the DFA from the specified NFA
    dfa = nfa_to_dfa.do(sm)

    print dfa.get_graphviz_string(NormalizeF=True)
Пример #18
0
 def get_pattern_object(SM):
     if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM)
     else:                         result = SM
     result = hopcroft.do(result, CreateNewStateMachineF=False)
     return Pattern(result, AllowStateMachineTrafoF=True)
Пример #19
0
def do(the_state_machine, post_context_sm, EndOfLinePostContextF, fh=-1):
    """Appends a post context to the given state machine and changes 
       state infos as required. 

       NOTE: 

           In case that:    post_context_sm is not None 
                         or EndOfLinePostContextF  

           The function appends something to the state machine and
           it is therefore required to pass 'NFA to DFA'--better
           also Hopcroft Minimization.
       
       ________________________________________________________________________
       This process is very similar to sequentialization. 
       There is a major difference, though:
       
       Given a state machine (e.g. a pattern) X with a post context Y, 
       a match is only valid if X is followed by Y. Let Xn be an acceptance
       state of X and Ym an acceptance state of Y: 

              ---(Xn-1)---->(Xn)---->(Y0)----> ... ---->((Ym))
                            store                       acceptance
                            input
                            position
       
       That is, it holds:

          -- The next input position is stored the position of Xn, even though
             it is 'officially' not an acceptance state.

          -- Ym will be an acceptance state, but it will not store 
             the input position!       

       The analysis of the next pattern will start at the position where
       X stopped, even though Ym is required to state acceptance.    
       
    """
    # State machines with no states are senseless here. 
    assert not the_state_machine.is_empty(), \
           "empty state machine can have no post context."
    assert post_context_sm is None or not post_context_sm.is_empty(), \
           "empty state machine cannot be a post-context."

    # State machines involved with post condition building are part of a pattern, 
    # but not configured out of multiple patterns. Thus there should be no origins.
    assert the_state_machine.has_origins() == False
    assert post_context_sm is None or not post_context_sm.has_origins()
    for state in the_state_machine.get_acceptance_state_list():
        for origin in state.origins(): 
            assert origin.pre_context_id() == E_PreContextIDs.NONE, \
                   "Post Contexts MUST be mounted BEFORE pre-contexts."

    if post_context_sm is None:
        if not EndOfLinePostContextF:
            return the_state_machine, None
        # Generate a new post context that just contains the 'newline'
        post_context_sm = StateMachine(AcceptanceF=True)
        post_context_sm.mount_newline_to_acceptance_states(Setup.dos_carriage_return_newline_f)

    elif EndOfLinePostContextF: 
        # Mount 'newline' to existing post context
        post_context_sm.mount_newline_to_acceptance_states(Setup.dos_carriage_return_newline_f)

    # A post context with an initial state that is acceptance is not really a
    # 'context' since it accepts anything. The state machine remains un-post context.
    if post_context_sm.get_init_state().is_acceptance():
        error_msg("Post context accepts anything---replaced by no post context.", fh, 
                  DontExitF=True)
        return the_state_machine, None
    
    # (*) Two ways of handling post-contexts:
    #
    #     -- Seldom Exception: 
    #        Pseudo-Ambiguous Post Conditions (x+/x) -- detecting the end of the 
    #        core pattern after the end of the post context
    #        has been reached.
    #
    if ambiguous_post_context.detect_forward(the_state_machine, post_context_sm):
        if ambiguous_post_context.detect_backward(the_state_machine, post_context_sm):
            # -- for post contexts that are forward and backward ambiguous
            #    a philosophical cut is necessary.
            error_msg("Post context requires philosophical cut--handle with care!\n"
                      "Proposal: Isolate pattern and ensure results are as expected!", fh, 
                      DontExitF=True)
            post_context_sm = ambiguous_post_context.philosophical_cut(the_state_machine, post_context_sm)
        
        # NOTE: May be, the_state_machine does contain now an epsilon transition. See
        #       comment at entry of this function.
        ipsb_sm = ambiguous_post_context.mount(the_state_machine, post_context_sm)
        the_state_machine = beautifier.do(the_state_machine)
        ipsb_sm           = beautifier.do(ipsb_sm)
        return the_state_machine, ipsb_sm 

    # -- The 'normal' way: storing the input position at the end of the core
    #    pattern.
    #
    # (*) Need to clone the state machines, i.e. provide their internal
    #     states with new ids, but the 'behavior' remains. This allows
    #     state machines to appear twice, or being used in 'larger'
    #     conglomerates.
    post_clone = post_context_sm.clone() 

    # -- Once an acceptance state is reached no further analysis is necessary.
    ## NO: acceptance_pruning.do(post_clone)
    ## BECAUSE: it may have to compete with a pseudo-ambiguous post context

    # (*) collect all transitions from both state machines into a single one
    #
    #     NOTE: The start index is unique. Therefore, one can assume that each
    #           clone_list '.states' dictionary has different keys. One can simply
    #           take over all transitions of a start index into the result without
    #           considering interferences (see below)
    #
    orig_acceptance_state_id_list = the_state_machine.get_acceptance_state_index_list()

    # -- mount on every acceptance state the initial state of the following state
    #    machine via epsilon transition
    the_state_machine.mount_to_acceptance_states(post_clone.init_state_index, 
                                                 CancelStartAcceptanceStateF=True)
    for start_state_index, state in post_clone.states.iteritems():        
        the_state_machine.states[start_state_index] = state # states are already cloned

    # -- raise at each old acceptance state the 'store input position flag'
    # -- set the post context flag for all acceptance states
    for state_idx in orig_acceptance_state_id_list:
        state = the_state_machine.states[state_idx]
        state.set_input_position_store_f(True)
    
    # -- no acceptance state shall store the input position
    # -- set the post context flag for all acceptance states
    for state in the_state_machine.get_acceptance_state_list():
        state.set_input_position_store_f(False)
        state.set_input_position_restore_f(True)

    # No input position backward search required
    the_state_machine = nfa_to_dfa.do(the_state_machine)
    hopcroft.do(the_state_machine, CreateNewStateMachineF=False)
    return the_state_machine, None