Exemplo n.º 1
0
class X:
    def __init__(self, Name):
        sh = StringIO("[:\\P{Script=%s}:]" % Name)
        self.name = Name
        self.charset = regex.snap_set_expression(sh, {})
        self.sm = StateMachine()
        self.sm.add_transition(self.sm.init_state_index, self.charset, AcceptanceF=True)
        self.id = self.sm.get_id()

    def check(self, SM):
        """This function throws an exception as soon as one single value
           is not matched according to the expectation.
        """
        print "Name = " + self.name, 
        for interval in self.charset.get_intervals(PromiseToTreatWellF=True):
            for i in range(interval.begin, interval.end):
                utf8_seq = unicode_to_utf8(i)

                # Apply sequence to state machine
                s_idx = result.init_state_index
                for byte in utf8_seq:
                    s_idx = result.states[s_idx].target_map.get_resulting_target_state_index(byte)

                # All acceptance flags must belong to the original state machine
                for cmd in result.states[s_idx].single_entry:
                    if cmd.__class__ != SeAccept: continue
                    # HERE: As soon as something is wrong --> fire an exception
                    assert cmd.acceptance_id() == self.id
        print " (OK=%i)" % self.id
Exemplo n.º 2
0
    def __init__(self, SM_A, SM_B, StartingSM=None):
        self.original = SM_A
        self.admissible = SM_B

        if StartingSM is None:
            self.result = StateMachine(
                InitStateIndex=index.map_state_combination_to_index(
                    (SM_A.init_state_index, SM_B.init_state_index)),
                InitState=self.get_state_core(SM_A.init_state_index,
                                              SM_B.init_state_index))
        else:
            self.result = StartingSM

        # TODO: Think if 'state_db' cannot be replaced by 'result'
        self.state_db = {}

        self.path = []

        # Use 'operation_index' to get a unique index that allows to indicate
        # that 'SM_B' is no longer involved. Also, it ensures that the
        # generated state indices from (a_state_index, operation_index) are
        # unique.
        self.operation_index = index.get()

        TreeWalker.__init__(self)
Exemplo n.º 3
0
def do(sh):
    """Converts a uni-code string into a state machine that parses 
       its letters sequentially. Each state in the sequence correponds
       to the sucessful triggering of a letter. Only the last state, though,
       is an acceptance state. Any bailing out before is 'not accepted'. 
       Example:

       "hey" is translated into the state machine:

           (0)-- 'h' -->(1)-- 'e' -->(2)-- 'y' --> ACCEPTANCE
            |            |            |
           FAIL         FAIL         FAIL
    
      Note: The state indices are globally unique. But, they are not necessarily
            0, 1, 2, ... 
    """
    assert     sh.__class__.__name__ == "StringIO" \
            or sh.__class__.__name__ == "file"

    # resulting state machine
    result    = StateMachine()
    state_idx = result.init_state_index

    # Only \" is a special character '"', any other backslashed character
    # remains as the sequence 'backslash' + character
    for char_code in get_character_code_sequence(sh):
        state_idx = result.add_transition(state_idx, char_code)

    # when the last state has trigger it is supposed to end up in 'acceptance'
    result.states[state_idx].set_acceptance()
    return result
Exemplo n.º 4
0
def test_on_UCS_range(Trafo, Source, Drain, CharacterBackwardTrafo):

    sm     = StateMachine()
    acc_db = {}
    for x in range(Source.begin, Source.end):
        ti = sm.add_transition(sm.init_state_index, x, AcceptanceF=True)
        acc_id    = len(acc_db)
        sm.states[ti].mark_acceptance_id(acc_id)
        acc_db[x] = acc_id

    if Setup.bad_lexatom_detection_f:
        acc_db[None] = E_IncidenceIDs.BAD_LEXATOM
    else:
        acc_db[None] = None

    state_n_before, result = transform(Trafo, sm)
    # assert state_n_before == len(result.states)

    init_state = result.get_init_state()
    count      = 0
    for y in range(Drain.begin, Drain.end):
        # Translate character into 
        x  = CharacterBackwardTrafo(y)
        # Transit on the translated charater
        ti = init_state.target_map.get_resulting_target_state_index(y)
        # Compare resulting state with the expected state's acceptance
        assert_only_acceptance_id(sm.states, ti, acc_db, x, y)

        count += 1

    print "<terminated: %i transitions ok>" % count
Exemplo n.º 5
0
def test_on_UCS_range(Trafo, Source, Drain, CharacterBackwardTrafo):

    sm = StateMachine()
    acc_db = {}
    for x in range(Source.begin, Source.end):
        ti = sm.add_transition(sm.init_state_index, x, AcceptanceF=True)
        acc_id = len(acc_db)
        sm.states[ti].mark_acceptance_id(acc_id)
        acc_db[x] = acc_id

    if Setup.bad_lexatom_detection_f:
        acc_db[None] = E_IncidenceIDs.BAD_LEXATOM
    else:
        acc_db[None] = None

    state_n_before, result = transform(Trafo, sm)
    # assert state_n_before == len(result.states)

    init_state = result.get_init_state()
    count = 0
    for y in range(Drain.begin, Drain.end):
        # Translate character into
        x = CharacterBackwardTrafo(y)
        # Transit on the translated charater
        ti = init_state.target_map.get_resulting_target_state_index(y)
        # Compare resulting state with the expected state's acceptance
        assert_only_acceptance_id(sm.states, ti, acc_db, x, y)

        count += 1

    print "<terminated: %i transitions ok>" % count
Exemplo n.º 6
0
def do(sh):
    """Converts a uni-code string into a state machine that parses 
       its letters sequentially. Each state in the sequence correponds
       to the sucessful triggering of a letter. Only the last state, though,
       is an acceptance state. Any bailing out before is 'not accepted'. 
       Example:

       "hey" is translated into the state machine:

           (0)-- 'h' -->(1)-- 'e' -->(2)-- 'y' --> ACCEPTANCE
            |            |            |
           FAIL         FAIL         FAIL
    
      Note: The state indices are globally unique. But, they are not necessarily
            0, 1, 2, ... 
    """
    assert     sh.__class__.__name__ == "StringIO" \
            or sh.__class__.__name__ == "file"

    # resulting state machine
    result = StateMachine()
    state_idx = result.init_state_index

    # Only \" is a special character '"', any other backslashed character
    # remains as the sequence 'backslash' + character
    for char_code in get_character_code_sequence(sh):
        state_idx = result.add_transition(state_idx, char_code)

    # when the last state has trigger it is supposed to end up in 'acceptance'
    result.states[state_idx].set_acceptance()
    return result
class X:
    def __init__(self, Name):
        sh = StringIO("[:\\P{Script=%s}:]" % Name)
        self.name = Name
        self.charset = regex.snap_set_expression(sh, {})
        self.sm = StateMachine()
        self.sm.add_transition(self.sm.init_state_index,
                               self.charset,
                               AcceptanceF=True)
        self.id = self.sm.get_id()

    def check(self, SM):
        """This function throws an exception as soon as one single value
           is not matched according to the expectation.
        """
        print "Name = " + self.name,
        for interval in self.charset.get_intervals(PromiseToTreatWellF=True):
            for i in range(interval.begin, interval.end):
                utf8_seq = unicode_to_utf8(i)

                # Apply sequence to state machine
                s_idx = result.init_state_index
                for byte in utf8_seq:
                    s_idx = result.states[
                        s_idx].target_map.get_resulting_target_state_index(
                            byte)

                # All acceptance flags must belong to the original state machine
                for cmd in result.states[s_idx].single_entry:
                    if cmd.__class__ != SeAccept: continue
                    # HERE: As soon as something is wrong --> fire an exception
                    assert cmd.acceptance_id() == self.id
        print " (OK=%i)" % self.id
Exemplo n.º 8
0
def test(TestString):
    print "expression    = \"" + TestString + "\""
    sm = StateMachine()
    try:
        trigger_set = character_set.do(StringIO(TestString + "]"))
        sm.add_transition(sm.init_state_index, trigger_set, AcceptanceF=True)
        print "state machine\n", sm 
    except RegularExpressionException, x:
        print repr(x)
 def __init__(self, Name):
     sh = StringIO("[:\\P{Script=%s}:]" % Name)
     self.name = Name
     self.charset = regex.snap_set_expression(sh, {})
     self.sm = StateMachine()
     self.sm.add_transition(self.sm.init_state_index,
                            self.charset,
                            AcceptanceF=True)
     self.id = self.sm.get_id()
Exemplo n.º 10
0
def get_any():
    """RETURNS:

       A state machine that 'eats' any character, but only one. 

           (0)--- \Any --->(( 0 ))
    """
    result = StateMachine()
    result.add_transition(result.init_state_index, NumberSet(Interval(-sys.maxint, sys.maxint)), AcceptanceF=True)

    return result
Exemplo n.º 11
0
def snap_non_control_character(stream, PatternDict):
    __debug_entry("non-control characters", stream)

    # (*) read first character
    char_code = utf8.__read_one_utf8_code_from_stream(stream)
    if char_code is None:
        error_msg("Character could not be interpreted as UTF8 code or End of File reached prematurely.", 
                  stream)
    result = StateMachine()
    result.add_transition(result.init_state_index, char_code, AcceptanceF=True)
    return __debug_exit(result, stream)
Exemplo n.º 12
0
def snap_non_control_character(stream, PatternDict):
    __debug_entry("non-control characters", stream)

    # (*) read first character
    char_code = utf8.__read_one_utf8_code_from_stream(stream)
    if char_code is None:
        error_msg("Character could not be interpreted as UTF8 code or End of File reached prematurely.", 
                  stream)
    result = StateMachine()
    result.add_transition(result.init_state_index, char_code, AcceptanceF=True)
    return __debug_exit(result, stream)
Exemplo n.º 13
0
def create_ALL_BUT_NEWLINE_state_machine():
    global Setup
    result = StateMachine()
    # NOTE: Buffer control characters are supposed to be filtered out by the code
    #       generator.
    trigger_set = NumberSet(Interval(ord("\n")).inverse()) 

    if Setup.get_character_value_limit() != sys.maxint:
        trigger_set.intersect_with(Interval(0, Setup.get_character_value_limit()))

    result.add_transition(result.init_state_index, trigger_set, AcceptanceF=True) 
    return result
Exemplo n.º 14
0
def create_state_machine(SM, Result, Class_StateMachine, Class_State):
    # If all states are of size one, this means, that there were no states that
    # could have been combined. In this case a simple copy of the original
    # state machine will do.
    if len(filter(lambda state_set: len(state_set) != 1,
                  Result.state_set_list)) == 0:
        return SM.clone()

    # Define a mapping from the state set to a new target state index
    #
    # map:  state_set_index  --->  index of the state that represents it
    #
    map_new_state_index = dict([(i, state_machine_index.get())
                                for i in xrange(len(Result.state_set_list))])

    # The state set that contains the initial state becomes the initial state of
    # the new state machine.
    state_set_containing_initial_state_i = Result.map[SM.init_state_index]
    new_init_state_index = map_new_state_index[
        state_set_containing_initial_state_i]

    result = StateMachine(new_init_state_index)

    # Ensure that each target state index has a state inside the state machine
    # Build up the state machine out of the state sets
    for state_set_idx, state_set in enumerate(Result.state_set_list):

        new_state_index = map_new_state_index[state_set_idx]

        # Merge all core information of the states inside the state set.
        # If one state set contains an acceptance state, then the result is 'acceptance'.
        # (Note: The initial split separates acceptance states from those that are not
        #  acceptance states. There can be no state set containing acceptance and
        #  non-acceptance states)
        # (Note, that the prototype's info has not been included yet, consider whole set)
        result.states[new_state_index] = Class_State.new_merged_core_state(
            SM.states[i] for i in state_set)

    for state_set_idx, state_set in enumerate(Result.state_set_list):
        # The prototype: States in one set behave all equivalent with respect to target state sets
        # thus only one state from the start set has to be considered.
        prototype = SM.states[state_set[0]]
        representative = result.states[map_new_state_index[state_set_idx]]

        # The representative must have all transitions that the prototype has
        for target_state_index, trigger_set in prototype.target_map.get_map(
        ).iteritems():
            target_state_set_index = Result.map[target_state_index]
            target_index = map_new_state_index[target_state_set_index]
            representative.add_transition(trigger_set, target_index)

    return result
Exemplo n.º 15
0
def create_ALL_BUT_NEWLINE_state_machine(stream):
    global Setup
    result = StateMachine()
    # NOTE: Buffer control characters are supposed to be filtered out by the code
    #       generator.
    trigger_set = NumberSet(Interval(ord("\n"))).get_complement(Setup.buffer_codec.source_set)
    if trigger_set.is_empty():
        error_msg("The set of admissible characters contains only newline.\n"
                  "The '.' for 'all but newline' is an empty set.",
                  SourceRef.from_FileHandle(stream))

    result.add_transition(result.init_state_index, trigger_set, AcceptanceF=True) 
    return result
Exemplo n.º 16
0
def get_any():
    """RETURNS:

       A state machine that 'eats' any character, but only one. 

           (0)--- \Any --->(( 0 ))
    """
    result = StateMachine()
    result.add_transition(result.init_state_index,
                          NumberSet(Interval(-sys.maxint, sys.maxint)),
                          AcceptanceF=True)

    return result
Exemplo n.º 17
0
def test(ByteSequenceDB):

    L = len(ByteSequenceDB[0])

    for seq in ByteSequenceDB:
        assert len(seq) == L
        for x in seq:
            assert isinstance(x, Interval)

    first_different_byte_index = -1
    for i in range(L):
        x0 = ByteSequenceDB[0][i]
        for seq in ByteSequenceDB[1:]:
            if not seq[i].is_equal(x0): 
                first_different_byte_index = i
                break
        if first_different_byte_index != -1: 
            break
    if first_different_byte_index == -1:
        first_different_byte_index = 0

    print "# Best To be Displayed by:"
    print "#"
    print "#  > " + sys.argv[0] + " " + sys.argv[1] + " | dot -Tsvg -o tmp.svg"
    print "#"
    print "# -------------------------"
    print "# Byte Sequences:     "
    i = -1
    for seq in ByteSequenceDB:
        i += 1
        print "# (%i) " % i,
        for x in seq:
            print "    " + x.get_string(Option="hex"), 
        print
    print "#    L    = %i" % L
    print "#    DIdx = %i" % first_different_byte_index



    sm = StateMachine()
    end_index = state_machine.index.get()
    sm.states[end_index] = State()

    trafo.plug_state_sequence_for_trigger_set_sequence(sm, sm.init_state_index, end_index, 
                                                       ByteSequenceDB, L, first_different_byte_index)

    if len(sm.get_orphaned_state_index_list()) != 0:
        print "Error: Orphaned States Detected!"

    print sm.get_graphviz_string(Option="hex")
Exemplo n.º 18
0
def get_setup(L0, L1, FSM0, FSM1, FSM2):
    # SPECIALITIES: -- sm0 and sm1 have an intersection between their second 
    #                  transition.
    #               -- sm1 transits further upon acceptance.
    #               -- sm2 has only one transition.
    ci_list = [
        CountInfo(dial_db.new_incidence_id(), NumberSet.from_range(L0, L1), 
                  CountAction(E_CharacterCountType.COLUMN, 0)),
    ]

    # Generate State Machine that does not have any intersection with 
    # the loop transitions.
    sm0 = StateMachine()
    si = sm0.add_transition(sm0.init_state_index, FSM0)
    si = sm0.add_transition(si, NS_A, AcceptanceF=True)
    sm0.states[si].mark_acceptance_id(dial_db.new_incidence_id())

    sm1 = StateMachine()
    si0 = sm1.add_transition(sm1.init_state_index, FSM1)
    si  = sm1.add_transition(si0, NS_A, AcceptanceF=True)
    iid1 = dial_db.new_incidence_id()
    sm1.states[si].mark_acceptance_id(iid1)
    si  = sm1.add_transition(si, NS_B, si0)
    sm1.states[si].mark_acceptance_id(iid1)

    sm2 = StateMachine()
    si = sm2.add_transition(sm2.init_state_index, FSM2, AcceptanceF=True)
    sm2.states[si].mark_acceptance_id(dial_db.new_incidence_id())

    return ci_list, [sm0, sm1, sm2]
Exemplo n.º 19
0
def test_plug_sequence(ByteSequenceDB):
    L = len(ByteSequenceDB[0])

    for seq in ByteSequenceDB:
        assert len(seq) == L
        for x in seq:
            assert isinstance(x, Interval)

    first_different_byte_index = -1
    for i in range(L):
        x0 = ByteSequenceDB[0][i]
        for seq in ByteSequenceDB[1:]:
            if not seq[i].is_equal(x0):
                first_different_byte_index = i
                break
        if first_different_byte_index != -1:
            break
    if first_different_byte_index == -1:
        first_different_byte_index = 0

    print "# Best To be Displayed by:"
    print "#"
    print "#  > " + sys.argv[0] + " " + sys.argv[1] + " | dot -Tsvg -o tmp.svg"
    print "#"
    print "# -------------------------"
    print "# Byte Sequences:     "
    i = -1
    for seq in ByteSequenceDB:
        i += 1
        print "# (%i) " % i,
        for x in seq:
            print "    " + x.get_string(Option="hex"),
        print
    print "#    L    = %i" % L
    print "#    DIdx = %i" % first_different_byte_index

    sm = StateMachine()
    end_index = state_machine.index.get()
    sm.states[end_index] = State()

    trafo = EncodingTrafoUTF8()
    Setup.buffer_codec_set(trafo, 1)
    trafo._plug_interval_sequences(sm, sm.init_state_index, end_index,
                                   ByteSequenceDB, beautifier)

    if len(sm.get_orphaned_state_index_list()) != 0:
        print "Error: Orphaned States Detected!"

    show_graphviz(sm)
Exemplo n.º 20
0
def create_ALL_BUT_NEWLINE_state_machine():
    global Setup
    result = StateMachine()
    # NOTE: Buffer control characters are supposed to be filtered out by the code
    #       generator.
    trigger_set = NumberSet(Interval(ord("\n")).inverse())

    if Setup.get_character_value_limit() != sys.maxint:
        trigger_set.intersect_with(
            Interval(0, Setup.get_character_value_limit()))

    result.add_transition(result.init_state_index,
                          trigger_set,
                          AcceptanceF=True)
    return result
Exemplo n.º 21
0
def do(stream, PatternDict):
    trigger_set = snap_set_expression(stream, PatternDict)

    if trigger_set is None: 
        raise RegularExpressionException("Regular Expression: character_set_expression called for something\n" + \
                                         "that does not start with '[:', '[' or '\\P'")
    if trigger_set.is_empty():
        raise RegularExpressionException("Regular Expression: Character set expression results in empty set.")

    # Create state machine that triggers with the trigger set to SUCCESS
    # NOTE: The default for the ELSE transition is FAIL.
    sm = StateMachine()
    sm.add_transition(sm.init_state_index, trigger_set, AcceptanceF=True)

    return __debug_exit(sm, stream)
Exemplo n.º 22
0
 def __init__(self, Name):
     sh = StringIO("[:\\P{Script=%s}:]" % Name)
     self.name = Name
     self.charset = regex.snap_set_expression(sh, {})
     self.sm = StateMachine()
     self.sm.add_transition(self.sm.init_state_index, self.charset, AcceptanceF=True)
     self.id = self.sm.get_id()
Exemplo n.º 23
0
def create_range_skipper_code(Language, TestStr, CloserSequence, QuexBufferSize=1024, 
                              CommentTestStrF=False, ShowPositionF=False):
    assert QuexBufferSize >= len(CloserSequence) + 2

    end_str = __prepare(Language)

    door_id_on_skip_range_open = dial_db.new_door_id()

    data = { 
        "closer_sequence":    CloserSequence, 
        "closer_pattern":     Pattern(StateMachine.from_sequence(CloserSequence), 
                                      PatternString="<skip range closer>"),
        "mode_name":          "MrUnitTest",
        "on_skip_range_open": CodeFragment([end_str]),
        "door_id_after":      DoorID.continue_without_on_after_match(),
    }

    skipper_code = range_skipper.do(data, Analyzer)
    __require_variables()

    return create_customized_analyzer_function(Language, TestStr, skipper_code,
                                               QuexBufferSize, CommentTestStrF, ShowPositionF, end_str,
                                               MarkerCharList  = [], 
                                               LocalVariableDB = deepcopy(variable_db.get()),
                                               DoorIdOnSkipRangeOpen=door_id_on_skip_range_open) 
Exemplo n.º 24
0
def do(the_state_machine_list,
       LeaveIntermediateAcceptanceStatesF=False,
       MountToFirstStateMachineF=False,
       CloneRemainingStateMachinesF=True):
    """Creates a state machine connecting all state machines in the array
    'state_machine_list'. When the flag 'LeaveIntermediateAcceptanceStatesF' is
    given as True, the connection points between the state machines will remain
    acceptances states. In any other case (e.g. the normal sequentialization)
    the connection points leave there acceptance status and only the last state
    machine in the list keeps its acceptance states.

    If MountToFirstStateMachineF is set, then the first state machine will
    contain the result of the concatination.
    """
    assert len(the_state_machine_list) != 0

    for sm in the_state_machine_list:  # DEBUG
        sm.assert_consistency()  # DEBUG

    # state machines with no states can be deleted from the list. they do not do anything
    # and do not introduce triggers.
    state_machine_list = filter(lambda sm: not sm.is_empty(),
                                the_state_machine_list)

    if len(state_machine_list) < 2:
        if len(state_machine_list) < 1: return StateMachine()
        else: return state_machine_list[0]

    # (*) collect all transitions from both state machines into a single one
    #     (clone to ensure unique identifiers of states)
    result = state_machine_list[0]
    if not MountToFirstStateMachineF: result = result.clone()

    # (*) need to clone the state machines, i.e. provide their internal
    #     states with new ids, but the 'behavior' remains. This allows
    #     state machines to appear twice, or being used in 'larger'
    #     conglomerates.
    appended_sm_list = state_machine_list[1:]
    if CloneRemainingStateMachinesF:
        appended_sm_list = map(lambda sm: sm.clone(), appended_sm_list)

    # (*) all but last state machine enter the subsequent one, in case of SUCCESS
    #     NOTE: The start index is unique. Therefore, one can assume that each
    #           appended_sm_list's '.states' dictionary has different keys. One can simply
    #           take over all transitions of a start index into the result without
    #           considering interferences (see below)
    for appendix in appended_sm_list:
        appendix.assert_consistency()  # DEBUG
        # Mount on every acceptance state the initial state of the following state
        # machine via epsilon transition.
        result.mount_to_acceptance_states(
            appendix.init_state_index,
            CancelStartAcceptanceStateF=not LeaveIntermediateAcceptanceStatesF)
        for state_index, state in appendix.states.items():
            result.states[
                state_index] = state  # state is already cloned (if desired), so no deepcopy here

    # (*) double check for consistency (each target state is contained in state machine)
    result.assert_consistency()  # DEBUG
    return result
Exemplo n.º 25
0
    def __sm_newline_default(self):
        """Default newline: '(\n)|(\r\n)'
        """
        global cc_type_name_db

        newline_set = NumberSet(ord('\n'))
        retour_set  = NumberSet(ord('\r'))

        before = self.specifier_count_op_map.find_occupier(newline_set, set())
        if before is not None:
            error.warning("Trying to implement default newline: '\\n' or '\\r\\n'.\n" 
                          "The '\\n' option is not possible, since it has been occupied by '%s'.\n" \
                          "No newline can be defined by default."
                          % cc_type_name_db[before.cc_type], before.sr, 
                          SuppressCode=NotificationDB.warning_default_newline_0A_impossible)
            # In this case, no newline can be defined!
            return

        sm = StateMachine.from_character_set(newline_set)

        if Setup.dos_carriage_return_newline_f:
            before = self.specifier_count_op_map.find_occupier(retour_set, set())
            if before is not None:
                error.warning("Trying to implement default newline: '\\n' or '\\r\\n'.\n" 
                          "The '\\r\\n' option is not possible, since '\\r' has been occupied by '%s'." \
                          % cc_type_name_db[before.cc_type],
                          before.sr, 
                          SuppressCode=NotificationDB.warning_default_newline_0D_impossible)
            else:
                sm.add_transition_sequence(sm.init_state_index, [retour_set, newline_set])

        return sm
Exemplo n.º 26
0
def create_state_machine(SM, Result, Class_StateMachine, Class_State):
    # If all states are of size one, this means, that there were no states that
    # could have been combined. In this case a simple copy of the original
    # state machine will do.
    if len(filter(lambda state_set: len(state_set) != 1, Result.state_set_list)) == 0:
        return SM.clone()
    
    # Define a mapping from the state set to a new target state index
    #
    # map:  state_set_index  --->  index of the state that represents it
    #
    map_new_state_index = dict([(i, state_machine_index.get()) for i in xrange(len(Result.state_set_list))])
                
    # The state set that contains the initial state becomes the initial state of 
    # the new state machine.   
    state_set_containing_initial_state_i = Result.map[SM.init_state_index]
    new_init_state_index                 = map_new_state_index[state_set_containing_initial_state_i]

    result = StateMachine(new_init_state_index)

    # Ensure that each target state index has a state inside the state machine
    # Build up the state machine out of the state sets
    for state_set_idx, state_set in enumerate(Result.state_set_list):

        new_state_index = map_new_state_index[state_set_idx]

        # Merge all core information of the states inside the state set.
        # If one state set contains an acceptance state, then the result is 'acceptance'.
        # (Note: The initial split separates acceptance states from those that are not
        #  acceptance states. There can be no state set containing acceptance and 
        #  non-acceptance states) 
        # (Note, that the prototype's info has not been included yet, consider whole set)
        result.states[new_state_index] = Class_State.new_merged_core_state(SM.states[i] for i in state_set)

    for state_set_idx, state_set in enumerate(Result.state_set_list):
        # The prototype: States in one set behave all equivalent with respect to target state sets
        # thus only one state from the start set has to be considered.      
        prototype    = SM.states[state_set[0]]
        representative = result.states[map_new_state_index[state_set_idx]]

        # The representative must have all transitions that the prototype has
        for target_state_index, trigger_set in prototype.target_map.get_map().iteritems():
            target_state_set_index = Result.map[target_state_index]
            target_index           = map_new_state_index[target_state_set_index]
            representative.add_transition(trigger_set, target_index)

    return result    
Exemplo n.º 27
0
def do(stream, PatternDict):
    trigger_set = snap_set_expression(stream, PatternDict)

    if trigger_set is None:
        raise RegularExpressionException("Regular Expression: character_set_expression called for something\n" + \
                                         "that does not start with '[:', '[' or '\\P'")
    if trigger_set.is_empty():
        raise RegularExpressionException(
            "Regular Expression: Character set expression results in empty set."
        )

    # Create state machine that triggers with the trigger set to SUCCESS
    # NOTE: The default for the ELSE transition is FAIL.
    sm = StateMachine()
    sm.add_transition(sm.init_state_index, trigger_set, AcceptanceF=True)

    return __debug_exit(sm, stream)
Exemplo n.º 28
0
def snap_character_set_expression(stream, PatternDict):
    # GRAMMAR:
    #
    # set_expression:
    #                 [: set_term :]
    #                 traditional character set
    #                 \P '{' propperty string '}'
    #                 '{' identifier '}'
    #
    # set_term:
    #                 "alnum"
    #                 "alpha"
    #                 "blank"
    #                 "cntrl"
    #                 "digit"
    #                 "graph"
    #                 "lower"
    #                 "print"
    #                 "punct"
    #                 "space"
    #                 "upper"
    #                 "xdigit"
    #                 "union"        '(' set_term [ ',' set_term ]+ ')'
    #                 "intersection" '(' set_term [ ',' set_term ]+ ')'
    #                 "difference"   '(' set_term [ ',' set_term ]+ ')'
    #                 "inverse"      '(' set_term ')'
    #                 set_expression
    #
    trigger_set = snap_set_expression(stream, PatternDict)

    if trigger_set is None:
        error.log("Regular Expression: snap_character_set_expression called for something\n" + \
                  "that does not start with '[:', '[' or '\\P'", stream)
    elif trigger_set.is_empty():
        error.warning(
            "Regular Expression: Character set expression results in empty set.",
            stream)

    # Create state machine that triggers with the trigger set to SUCCESS
    # NOTE: The default for the ELSE transition is FAIL.
    sm = StateMachine()
    sm.add_transition(sm.init_state_index, trigger_set, AcceptanceF=True)

    return __debug_exit(sm, stream)
Exemplo n.º 29
0
def snap_character_set_expression(stream, PatternDict):
    # GRAMMAR:
    #
    # set_expression: 
    #                 [: set_term :]
    #                 traditional character set
    #                 \P '{' propperty string '}'
    #                 '{' identifier '}'
    #
    # set_term:
    #                 "alnum" 
    #                 "alpha" 
    #                 "blank" 
    #                 "cntrl" 
    #                 "digit" 
    #                 "graph" 
    #                 "lower" 
    #                 "print" 
    #                 "punct" 
    #                 "space" 
    #                 "upper" 
    #                 "xdigit"
    #                 "union"        '(' set_term [ ',' set_term ]+ ')'
    #                 "intersection" '(' set_term [ ',' set_term ]+ ')'
    #                 "difference"   '(' set_term [ ',' set_term ]+ ')'
    #                 "inverse"      '(' set_term ')'
    #                 set_expression
    # 
    trigger_set = snap_set_expression(stream, PatternDict)

    if trigger_set is None: 
        error_msg("Regular Expression: snap_character_set_expression called for something\n" + \
                  "that does not start with '[:', '[' or '\\P'", stream)
    elif trigger_set.is_empty():
        error_msg("Regular Expression: Character set expression results in empty set.", stream, DontExitF=True)

    # Create state machine that triggers with the trigger set to SUCCESS
    # NOTE: The default for the ELSE transition is FAIL.
    sm = StateMachine()
    sm.add_transition(sm.init_state_index, trigger_set, AcceptanceF=True)

    return __debug_exit(sm, stream)
Exemplo n.º 30
0
def get_all():
    """RETURNS:

       A state machine that 'eats' absolutely everything, i.e. 


                              .--- \Any ---.
                              |            |
           (0)--- \Any --->(( 0 ))<--------'
    """
    result = StateMachine()

    i      = index.get()
    state  = State(AcceptanceF=True)
    state.add_transition(NumberSet(Interval(-sys.maxint, sys.maxint)), i)
    result.states[i] = state

    result.get_init_state().add_transition(NumberSet(Interval(-sys.maxint, sys.maxint)), i)

    return result
Exemplo n.º 31
0
def get_all():
    """RETURNS:

       A state machine that 'eats' absolutely everything, i.e. 


                              .--- \Any ---.
                              |            |
           (0)--- \Any --->(( 0 ))<--------'
    """
    result = StateMachine()

    i = index.get()
    state = State(AcceptanceF=True)
    state.add_transition(NumberSet_All(), i)
    result.states[i] = state

    result.get_init_state().add_transition(NumberSet_All(), i)

    return result
Exemplo n.º 32
0
def do(SM):
    """Creates a state machine that matches the reverse of what 'SM' matches.
    """
    result = StateMachine(InitStateIndex=SM.init_state_index)
    original_acceptance_state_index_list = SM.get_acceptance_state_index_list()

    if len(original_acceptance_state_index_list) == 0:
        # If there is no acceptance state in a state machine, the state machine
        # cannot match any pattern, it is equivalent to '\None'. The reverse
        # of \None is \None.
        return special.get_none()

    # Ensure that each target state index has a state inside the state machine
    for state_index in SM.states.keys():
        result.create_new_state(StateIdx=state_index)

    for state_index, state in SM.states.items():
        for target_state_index, trigger_set in state.target_map.get_map(
        ).items():
            result.states[target_state_index].add_transition(
                trigger_set.clone(), state_index)

        for target_state_index in state.target_map.get_epsilon_target_state_index_list(
        ):
            result.states[
                target_state_index].target_map.add_epsilon_target_state(
                    state_index)

    # -- copy all origins of the original state machine
    # -- We need to cancel any acceptance, because the inverted engine now starts
    #    from a combination of the acceptance states and ends at the initial state.
    for state_index, state in SM.states.items():
        original_origin_list = [origin.clone() for origin in state.origins()]
        for origin in original_origin_list:
            origin.set_input_position_restore_f(False)
            origin.set_pre_context_id(E_PreContextIDs.NONE)
            origin.set_acceptance_f(False)
        result.states[state_index].origins().set(
            original_origin_list)  # deepcopy implicit

    # -- only the ORIGINAL initial state becomes an acceptance state (end of inverse)
    result.states[SM.init_state_index].set_acceptance(True)

    # -- setup an epsilon transition from an new init state to all previous
    #    acceptance states.
    new_init_state_index = result.create_new_init_state()
    for state_index in original_acceptance_state_index_list:
        result.add_epsilon_transition(new_init_state_index, state_index)

    # -- for uniqueness of state ids, clone the result
    return result.clone()
Exemplo n.º 33
0
def generate_sm_for_boarders(Boarders, Trafo):
    sm = StateMachine()
    for ucs_char in Boarders:
        target_idx = index.get()
        sms.line(sm, sm.init_state_index, (ucs_char, target_idx),
                 (ucs_char, target_idx))
        sm.states[target_idx].set_acceptance()

    Trafo.adapt_source_and_drain_range(-1)
    verdict_f, result = Trafo.do_state_machine(sm, beautifier)
    assert verdict_f
    return result
Exemplo n.º 34
0
    def __init__(self, SM_A, SM_B, result=None):
        self.original = SM_A
        self.admissible = SM_B

        if result is None:
            init_state_index = index.map_state_combination_to_index(
                (SM_A.init_state_index, SM_B.init_state_index))
            state = self.get_state_core(SM_A.init_state_index)
            self.result = StateMachine(InitStateIndex=init_state_index,
                                       InitState=state)
        else:
            self.result = result
        self.path = []

        # Use 'operation_index' to get a unique index that allows to indicate
        # that 'SM_B' is no longer involved. Also, it ensures that the
        # generated state indices from (a_state_index, operation_index) are
        # unique.
        self.operation_index = index.get()

        TreeWalker.__init__(self)
Exemplo n.º 35
0
def _get_state_machine_and_terminal(Sequence, Name, OpList):
    """Create state machine that detects the 'Sequence', names the terminal
    with 'Name', and implements the 'CmdList' in the terminal.

    RETURNS: (state machine, terminal)
    """
    sm = StateMachine.from_sequence(Sequence)
    sm.set_id(dial_db.new_incidence_id())
    terminal = Terminal(CodeTerminal(Lng.COMMAND_LIST(OpList)), Name, sm.get_id())
    terminal.set_requires_goto_loop_entry_f()  # --> Goto Loop Entry

    return sm, terminal
Exemplo n.º 36
0
    def seal(self):
        if len(self.space_db) == 0 and len(self.grid_db) == 0:
            default_space = ord(' ')
            default_tab = ord('\t')
            bad = self.bad_character_set
            if bad.get().contains(default_space) == False:
                self.specify_space("[ ]", NumberSet(default_space), 1, self.fh)
            if bad.get().contains(default_tab) == False:
                self.specify_grid("[\\t]", NumberSet(default_tab), 4, self.fh)

            if len(self.space_db) == 0 and len(self.grid_db) == 0:
                error_msg(
                    "No space or grid defined for indentation counting. Default\n"
                    "values ' ' and '\\t' could not be used since they are specified as 'bad'.",
                    bad.file_name, bad.line_n)

        if self.newline_state_machine.get() is None:
            sm = StateMachine()
            end_idx = sm.add_transition(sm.init_state_index,
                                        NumberSet(ord('\n')),
                                        AcceptanceF=True)
            mid_idx = sm.add_transition(sm.init_state_index,
                                        NumberSet(ord('\r')),
                                        AcceptanceF=False)
            sm.add_transition(mid_idx,
                              NumberSet(ord('\n')),
                              end_idx,
                              AcceptanceF=False)
            self.specify_newline("(\\r\\n)|(\\n)", sm, self.fh)
Exemplo n.º 37
0
    def seal(self):
        if len(self.space_db) == 0 and len(self.grid_db) == 0:
            default_space = ord(' ')
            default_tab   = ord('\t')
            bad = self.bad_character_set
            if bad.get().contains(default_space) == False:
                self.specify_space("[ ]", NumberSet(default_space), 1, self.fh)
            if bad.get().contains(default_tab) == False:
                self.specify_grid("[\\t]", NumberSet(default_tab), 4, self.fh)

            if len(self.space_db) == 0 and len(self.grid_db) == 0:
                error_msg("No space or grid defined for indentation counting. Default\n"
                          "values ' ' and '\\t' could not be used since they are specified as 'bad'.",
                          bad.file_name, bad.line_n)


        if self.newline_state_machine.get() is None:
            sm   = StateMachine()
            end_idx = sm.add_transition(sm.init_state_index, NumberSet(ord('\n')), AcceptanceF=True)
            mid_idx = sm.add_transition(sm.init_state_index, NumberSet(ord('\r')), AcceptanceF=False)
            sm.add_transition(mid_idx, NumberSet(ord('\n')), end_idx, AcceptanceF=False)
            self.specify_newline("(\\r\\n)|(\\n)", sm, self.fh)
Exemplo n.º 38
0
def _get_state_machine_and_terminal(Sequence, Name, OpList):
    """Create state machine that detects the 'Sequence', names the terminal
    with 'Name', and implements the 'CmdList' in the terminal.

    RETURNS: (state machine, terminal)
    """
    sm = StateMachine.from_sequence(Sequence)
    sm.set_id(dial_db.new_incidence_id())
    terminal = Terminal(CodeTerminal(Lng.COMMAND_LIST(OpList)), Name,
                        sm.get_id())
    terminal.set_requires_goto_loop_entry_f()  # --> Goto Loop Entry

    return sm, terminal
Exemplo n.º 39
0
class X:
    def __init__(self, Name):
        sh = StringIO("[:\\P{Script=%s}:]" % Name)
        self.name = Name
        self.charset = regex.snap_set_expression(sh, {})
        self.sm = StateMachine()
        self.sm.add_transition(self.sm.init_state_index,
                               self.charset,
                               AcceptanceF=True)
        self.id = self.sm.get_id()

    def check(self, SM, TransformFunc):
        """This function throws an exception as soon as one single value
           is not matched according to the expectation.
        """
        print "## [%i] Name = %s" % (self.id, self.name),
        interval_list = self.charset.get_intervals(PromiseToTreatWellF=True)
        interval_count = len(interval_list)
        for interval in interval_list:
            for i in range(interval.begin, interval.end):
                lexatom_seq = TransformFunc(i)

                # Apply sequence to state machine
                state = SM.apply_sequence(lexatom_seq)
                if state is None:
                    error(self.sm, SM, lexatom_seq)

                # All acceptance flags must belong to the original state machine
                acceptance_id_list = [
                    cmd.acceptance_id()
                    for cmd in state.single_entry.get_iterable(SeAccept)
                ]
                if acceptance_id_list and self.id not in acceptance_id_list:
                    print eval("u'\U%08X'" % i)
                    print "#Seq:  ", ["%02X" % x for x in lexatom_seq]
                    print "#acceptance-ids:", acceptance_id_list
                    error(self.sm, SM, lexatom_seq)

        print " (OK=%i)" % interval_count
class X:
    def __init__(self, Name):
        sh = StringIO("[:\\P{Script=%s}:]" % Name)
        self.name = Name
        self.charset = regex.snap_set_expression(sh, {})
        self.sm = StateMachine()
        self.sm.add_transition(self.sm.init_state_index,
                               self.charset,
                               AcceptanceF=True)
        self.id = self.sm.get_id()

    def check(self, SM):
        """This function throws an exception as soon as one single value
           is not matched according to the expectation.
        """
        print "Name = " + self.name,
        for interval in self.charset.get_intervals(PromiseToTreatWellF=True):
            for i in range(interval.begin, interval.end):
                utf16_seq = unicode_to_utf16(i)

                # Apply sequence to state machine
                s_idx = result.init_state_index
                for word in utf16_seq:
                    s_idx = result.states[
                        s_idx].target_map.get_resulting_target_state_index(
                            word)

                assert s_idx is not None, \
                       "No acceptance for %X in [%X,%X] --> %s" % \
                       (i, interval.begin, interval.end - 1, repr(map(lambda x: "%04X." % x, utf16_seq)))

                # All acceptance flags must belong to the original state machine
                for cmd in result.states[s_idx].single_entry.get_iterable(
                        SeAccept):
                    # HERE: As soon as something is wrong --> fire an exception
                    assert cmd.acceptance_id() == self.id

        print " (OK=%i)" % self.id
Exemplo n.º 41
0
    def __prepare_incidence_id_map(self, IncidenceIdMap):
        def add(sm, StateIndex, TriggerSet, IncidenceId):
            if TriggerSet.is_empty(): return
            target_state_index = sm.add_transition(StateIndex, TriggerSet)
            target_state = sm.states[target_state_index]
            target_state.mark_self_as_origin(IncidenceId, target_state_index)
            target_state.set_acceptance(True)

        sm = StateMachine()
        for character_set, incidence_id in IncidenceIdMap:
            # 'cliid' = unique command list incidence id.
            add(sm, sm.init_state_index, character_set, incidence_id)

        return sm
Exemplo n.º 42
0
class X:
    def __init__(self, Name):
        sh = StringIO("[:\\P{Script=%s}:]" % Name)
        self.name = Name
        self.charset = regex.snap_set_expression(sh, {})
        self.sm = StateMachine()
        self.sm.add_transition(self.sm.init_state_index, self.charset, AcceptanceF=True)
        self.id = self.sm.get_id()

    def check(self, SM, TransformFunc):
        """This function throws an exception as soon as one single value
           is not matched according to the expectation.
        """
        print "## [%i] Name = %s" % (self.id, self.name), 
        interval_list  = self.charset.get_intervals(PromiseToTreatWellF=True)
        interval_count = len(interval_list)
        for interval in interval_list:
            for i in range(interval.begin, interval.end):
                lexatom_seq = TransformFunc(i)

                # Apply sequence to state machine
                state = SM.apply_sequence(lexatom_seq)
                if state is None:
                    error(self.sm, SM, lexatom_seq)

                # All acceptance flags must belong to the original state machine
                acceptance_id_list = [
                    cmd.acceptance_id()
                    for cmd in state.single_entry.get_iterable(SeAccept)
                ]
                if acceptance_id_list and self.id not in acceptance_id_list: 
                    print eval("u'\U%08X'" % i) 
                    print "#Seq:  ", ["%02X" % x for x in lexatom_seq]
                    print "#acceptance-ids:", acceptance_id_list
                    error(self.sm, SM, lexatom_seq)

        print " (OK=%i)" % interval_count
Exemplo n.º 43
0
def _get_loop_analyzer(LoopMap, EventHandler):
    """Construct a state machine that triggers only on one character. Actions
    according the the triggered character are implemented using terminals which
    are entered upon acceptance.

            .------.
       ---->| Loop |
            |      |----> accept A                 (normal loop terminals)
            |      |----> accept B
            |      |----> accept C
            :      :         :
            |      |----> accept CoupleIncidenceA  (couple terminals towards
            |      |----> accept CoupleIncidenceB   appendix state machines)
            |      |----> accept CoupleIncidenceC    
            :______:         :
            | else |----> accept iid_loop_exit
            '------'

    RETURNS: [0] Loop analyzer (prepared state machine)
             [1] DoorID of loop entry
    """
    # Loop StateMachine
    sm = StateMachine.from_IncidenceIdMap(
        (lei.character_set, lei.incidence_id) for lei in LoopMap)

    # Code Transformation
    verdict_f, sm = Setup.buffer_codec.do_state_machine(sm, beautifier)

    # Loop Analyzer
    analyzer = analyzer_generator.do(
        sm,
        EventHandler.engine_type,
        EventHandler.reload_state_extern,
        OnBeforeReload=EventHandler.on_before_reload,
        OnAfterReload=EventHandler.on_after_reload,
        OnBeforeEntry=EventHandler.on_loop_entry)

    # If reload state is generated
    # => All other analyzers MUST use the same generated reload state.
    if EventHandler.reload_state_extern is None:
        EventHandler.reload_state_extern = analyzer.reload_state

    # Set the 'Re-Entry' Operations.
    entry = analyzer.init_state().entry
    tid_reentry = entry.enter_OpList(analyzer.init_state_index, index.get(),
                                     EventHandler.on_loop_reentry)
    entry.categorize(analyzer.init_state_index)

    return analyzer, entry.get(tid_reentry).door_id
Exemplo n.º 44
0
Arquivo: loop.py Projeto: xxyzzzq/quex
def _get_loop_analyzer(LoopMap, EventHandler):
    """Construct a state machine that triggers only on one character. Actions
    according the the triggered character are implemented using terminals which
    are entered upon acceptance.

            .------.
       ---->| Loop |
            |      |----> accept A                 (normal loop terminals)
            |      |----> accept B
            |      |----> accept C
            :      :         :
            |      |----> accept CoupleIncidenceA  (couple terminals towards
            |      |----> accept CoupleIncidenceB   appendix state machines)
            |      |----> accept CoupleIncidenceC    
            :______:         :
            | else |----> accept iid_loop_exit
            '------'

    RETURNS: [0] Loop analyzer (prepared state machine)
             [1] DoorID of loop entry
    """
    # Loop StateMachine
    sm            = StateMachine.from_IncidenceIdMap(
                        (lei.character_set, lei.incidence_id) for lei in LoopMap
                    )

    # Code Transformation
    verdict_f, sm = Setup.buffer_codec.do_state_machine(sm, beautifier)

    # Loop Analyzer
    analyzer = analyzer_generator.do(sm, 
                                     EventHandler.engine_type, 
                                     EventHandler.reload_state_extern, 
                                     OnBeforeReload = EventHandler.on_before_reload, 
                                     OnAfterReload  = EventHandler.on_after_reload,
                                     OnBeforeEntry  = EventHandler.on_loop_entry)

    # If reload state is generated 
    # => All other analyzers MUST use the same generated reload state.
    if EventHandler.reload_state_extern is None:
        EventHandler.reload_state_extern = analyzer.reload_state

    # Set the 'Re-Entry' Operations.
    entry       = analyzer.init_state().entry
    tid_reentry = entry.enter_OpList(analyzer.init_state_index, index.get(), 
                                     EventHandler.on_loop_reentry)
    entry.categorize(analyzer.init_state_index)

    return analyzer, entry.get(tid_reentry).door_id
Exemplo n.º 45
0
def _get_state_machine_vs_terminal_list(CloserSequence, CounterDb): 
    """Additionally to all characters, the loop shall walk along the 'closer'.
    If the closer matches, the range skipping exits. Characters need to be 
    counted properly.

    RETURNS: list(state machine, terminal)

    The list contains only one single element.
    """
    sm = StateMachine.from_sequence(CloserSequence)
    sm.set_id(dial_db.new_incidence_id())

    code = [ Lng.GOTO(DoorID.continue_without_on_after_match()) ]
    terminal = Terminal(CodeTerminal(code), "<SKIP RANGE TERMINATED>", sm.get_id())
    return [ (sm, terminal) ]
Exemplo n.º 46
0
    def _plug_interval_sequences(self, sm, BeginIndex, EndIndex, IntervalSequenceList, beautifier):
        sub_sm = StateMachine.from_interval_sequences(IntervalSequenceList)
        if Setup.bad_lexatom_detection_f: 
            self._plug_encoding_error_detectors(sub_sm)
        sub_sm = beautifier.do(sub_sm)

        # The 'End State' is the state where there are no further transitions.
        new_end_si = None
        for state_index, state in sub_sm.states.iteritems():
            if state.target_map.is_empty() and not state.accepts_incidence(): 
                new_end_si = state_index
        assert new_end_si is not None

        # Mount the states inside the state machine
        sm.mount_absorbed_states_between(BeginIndex, EndIndex, 
                                         sub_sm.states, sub_sm.init_state_index, new_end_si)
Exemplo n.º 47
0
def _get_state_machine_vs_terminal_list(CloserSequence, CounterDb):
    """Additionally to all characters, the loop shall walk along the 'closer'.
    If the closer matches, the range skipping exits. Characters need to be 
    counted properly.

    RETURNS: list(state machine, terminal)

    The list contains only one single element.
    """
    sm = StateMachine.from_sequence(CloserSequence)
    sm.set_id(dial_db.new_incidence_id())

    code = [Lng.GOTO(DoorID.continue_without_on_after_match())]
    terminal = Terminal(CodeTerminal(code), "<SKIP RANGE TERMINATED>",
                        sm.get_id())
    return [(sm, terminal)]
Exemplo n.º 48
0
def get_transition_function(iid_map, Codec):
    if Codec == "UTF8": Setup.buffer_codec_set(bc_factory.do("utf8"), 1)
    else:               Setup.buffer_codec_set(bc_factory.do("unicode"), -1)

    sm        = StateMachine.from_IncidenceIdMap(iid_map)
    dummy, sm = Setup.buffer_codec.do_state_machine(sm, beautifier)
    analyzer  = analyzer_generator.do(sm, engine.CHARACTER_COUNTER)
    tm_txt    = do_analyzer(analyzer)
    tm_txt    = Lng.GET_PLAIN_STRINGS(tm_txt)
    tm_txt.append("\n")
    #label   = dial_db.get_label_by_door_id(DoorID.incidence(E_IncidenceIDs.MATCH_FAILURE))

    for character_set, iid in iid_map:
        tm_txt.append("%s return (int)%s;\n" % (Lng.LABEL(DoorID.incidence(iid)), iid))
    tm_txt.append("%s return (int)-1;\n" % Lng.LABEL(DoorID.drop_out(-1)))

    return "".join(tm_txt)
Exemplo n.º 49
0
def get_transition_function(iid_map, Codec):
    if Codec == "UTF8": Setup.buffer_codec_set(bc_factory.do("utf8"), 1)
    else: Setup.buffer_codec_set(bc_factory.do("unicode"), -1)

    sm = StateMachine.from_IncidenceIdMap(iid_map)
    dummy, sm = Setup.buffer_codec.do_state_machine(sm, beautifier)
    analyzer = analyzer_generator.do(sm, engine.CHARACTER_COUNTER)
    tm_txt = do_analyzer(analyzer)
    tm_txt = Lng.GET_PLAIN_STRINGS(tm_txt)
    tm_txt.append("\n")
    #label   = dial_db.get_label_by_door_id(DoorID.incidence(E_IncidenceIDs.MATCH_FAILURE))

    for character_set, iid in iid_map:
        tm_txt.append("%s return (int)%s;\n" %
                      (Lng.LABEL(DoorID.incidence(iid)), iid))
    tm_txt.append("%s return (int)-1;\n" % Lng.LABEL(DoorID.drop_out(-1)))

    return "".join(tm_txt)
Exemplo n.º 50
0
def do(SM):
    """Creates a state machine that matches the reverse of what 'SM' matches.
    """
    result                               = StateMachine(InitStateIndex=SM.init_state_index)
    original_acceptance_state_index_list = SM.get_acceptance_state_index_list()

    if len(original_acceptance_state_index_list) == 0:
        # If there is no acceptance state in a state machine, the state machine
        # cannot match any pattern, it is equivalent to '\None'. The reverse
        # of \None is \None.
        return special.get_none()
       
    # Ensure that each target state index has a state inside the state machine
    for state_index in SM.states.keys():
        result.create_new_state(StateIdx=state_index)

    for state_index, state in SM.states.items():
        for target_state_index, trigger_set in state.target_map.get_map().items():
            result.states[target_state_index].add_transition(trigger_set.clone(), state_index)

        for target_state_index in state.target_map.get_epsilon_target_state_index_list():
            result.states[target_state_index].target_map.add_epsilon_target_state(state_index)

    # -- copy all origins of the original state machine
    # -- We need to cancel any acceptance, because the inverted engine now starts
    #    from a combination of the acceptance states and ends at the initial state.
    for state_index, state in SM.states.items():
        original_origin_list = [origin.clone() for origin in state.origins()]
        for origin in original_origin_list:
            origin.set_input_position_restore_f(False)
            origin.set_pre_context_id(E_PreContextIDs.NONE)
            origin.set_acceptance_f(False)
        result.states[state_index].origins().set(original_origin_list) # deepcopy implicit

    # -- only the ORIGINAL initial state becomes an acceptance state (end of inverse)
    result.states[SM.init_state_index].set_acceptance(True)

    # -- setup an epsilon transition from an new init state to all previous 
    #    acceptance states.
    new_init_state_index = result.create_new_init_state() 
    for state_index in original_acceptance_state_index_list:
        result.add_epsilon_transition(new_init_state_index, state_index)        

    # -- for uniqueness of state ids, clone the result
    return result.clone()    
Exemplo n.º 51
0
    def __init__(self, SM_A, SM_B, result=None):
        self.original    = SM_A
        self.admissible  = SM_B

        if result is None:
            init_state_index = index.map_state_combination_to_index((SM_A.init_state_index, 
                                                                     SM_B.init_state_index))
            state            = self.get_state_core(SM_A.init_state_index)
            self.result      = StateMachine(InitStateIndex = init_state_index,
                                            InitState      = state)
        else:
            self.result      = result
        self.path        = []

        # Use 'operation_index' to get a unique index that allows to indicate
        # that 'SM_B' is no longer involved. Also, it ensures that the
        # generated state indices from (a_state_index, operation_index) are
        # unique.
        self.operation_index = index.get()

        TreeWalker.__init__(self)
Exemplo n.º 52
0
    def __init__(self, SM_A, SM_B, StartingSM=None):
        self.original   = SM_A
        self.admissible = SM_B

        if StartingSM is None:
            self.result = StateMachine(InitStateIndex = index.map_state_combination_to_index((SM_A.init_state_index, 
                                                                                              SM_B.init_state_index)), 
                                       InitState      = self.get_state_core(SM_A.init_state_index, 
                                                                            SM_B.init_state_index))
        else:
            self.result = StartingSM

        # TODO: Think if 'state_db' cannot be replaced by 'result'
        self.state_db   = {}

        self.path       = []

        # Use 'operation_index' to get a unique index that allows to indicate
        # that 'SM_B' is no longer involved. Also, it ensures that the
        # generated state indices from (a_state_index, operation_index) are
        # unique.
        self.operation_index = index.get()

        TreeWalker.__init__(self)
Exemplo n.º 53
0
def StateMachine_Newline():
    """Creates a state machine matching newline according to what has been 
    specified in the setup (Setup.dos_carriage_return_newline_f). 

    That is, if is DOS newline then the state machine represents '\r\n' and
    if it is unix only, then it represents '\n'. If both is required they 
    are implemented in parallel.

    RETURNS: StateMachine
    """
    UnixF = True
    DosF  = Setup.dos_carriage_return_newline_f

    NL = ord('\n')  # (pure) newline, i.e. line feed
    CR = ord('\r')  # carriage return

    sm = StateMachine()
    if UnixF:
        sm.add_transition(sm.init_state_index, NL, AcceptanceF=True)
    if DosF:
        idx = sm.add_transition(sm.init_state_index, CR, AcceptanceF=False)
        sm.add_transition(idx, NL, AcceptanceF=True)

    return beautifier.do(sm)
Exemplo n.º 54
0
def __parse_option(fh, new_mode):
    def get_pattern_object(SM):
        if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM)
        else:                         result = SM
        result = hopcroft.do(result, CreateNewStateMachineF=False)
        return Pattern(result, AllowStateMachineTrafoF=True)

    identifier = read_option_start(fh)
    if identifier is None: return False

    verify_word_in_list(identifier, mode_option_info_db.keys(),
                        "mode option", fh.name, get_current_line_info_number(fh))

    if identifier == "skip":
        # A skipper 'eats' characters at the beginning of a pattern that belong
        # to a specified set of characters. A useful application is most probably
        # the whitespace skipper '[ \t\n]'. The skipper definition allows quex to
        # implement a very effective way to skip these regions.
        pattern_str, trigger_set = regular_expression.parse_character_set(fh, PatternStringF=True)
        skip_whitespace(fh)

        if fh.read(1) != ">":
            error_msg("missing closing '>' for mode option '%s'." % identifier, fh)

        if trigger_set.is_empty():
            error_msg("Empty trigger set for skipper." % identifier, fh)

        # TriggerSet skipping is implemented the following way: As soon as one element of the 
        # trigger set appears, the state machine enters the 'trigger set skipper section'.
        # Enter the skipper as if the opener pattern was a normal pattern and the 'skipper' is the action.
        # NOTE: The correspondent CodeFragment for skipping is created in 'implement_skippers(...)'
        pattern_sm  = StateMachine()
        pattern_sm.add_transition(pattern_sm.init_state_index, trigger_set, AcceptanceF=True)

        # Skipper code is to be generated later
        action = GeneratedCode(skip_character_set.do, 
                               FileName = fh.name, 
                               LineN    = get_current_line_info_number(fh))
        action.data["character_set"] = trigger_set

        new_mode.add_match(pattern_str, action, get_pattern_object(pattern_sm), 
                           Comment=E_SpecialPatterns.SKIP)

        return True

    elif identifier in ["skip_range", "skip_nested_range"]:
        # A non-nesting skipper can contain a full fledged regular expression as opener,
        # since it only effects the trigger. Not so the nested range skipper-see below.

        # -- opener
        skip_whitespace(fh)
        if identifier == "skip_nested_range":
            # Nested range state machines only accept 'strings' not state machines
            opener_str, opener_sequence = __parse_string(fh, "Opener pattern for 'skip_nested_range'")
            opener_sm = StateMachine.from_sequence(opener_sequence)
        else:
            opener_str, opener_pattern = regular_expression.parse(fh)
            opener_sm = opener_pattern.sm
            # For 'range skipping' the opener sequence is not needed, only the opener state
            # machine is webbed into the pattern matching state machine.
            opener_sequence       = None

        skip_whitespace(fh)

        # -- closer
        closer_str, closer_sequence = __parse_string(fh, "Closing pattern for 'skip_range' or 'skip_nested_range'")
        skip_whitespace(fh)
        if fh.read(1) != ">":
            error_msg("missing closing '>' for mode option '%s'" % identifier, fh)

        # Skipper code is to be generated later
        generator_function, comment = { 
                "skip_range":        (skip_range.do,        E_SpecialPatterns.SKIP_RANGE),
                "skip_nested_range": (skip_nested_range.do, E_SpecialPatterns.SKIP_NESTED_RANGE),
        }[identifier]
        action = GeneratedCode(generator_function,
                               FileName = fh.name, 
                               LineN    = get_current_line_info_number(fh))

        action.data["opener_sequence"] = opener_sequence
        action.data["closer_sequence"] = closer_sequence
        action.data["mode_name"]       = new_mode.name

        new_mode.add_match(opener_str, action, get_pattern_object(opener_sm), Comment=comment)

        return True
        
    elif identifier == "indentation":
        value = indentation_setup.do(fh)

        # Enter 'Newline' and 'Suppressed Newline' as matches into the engine.
        # Similar to skippers, the indentation count is then triggered by the newline.
        # -- Suppressed Newline = Suppressor followed by Newline,
        #    then newline does not trigger indentation counting.
        suppressed_newline_pattern_str = ""
        if value.newline_suppressor_state_machine.get() is not None:
            suppressed_newline_pattern_str = \
                  "(" + value.newline_suppressor_state_machine.pattern_string() + ")" \
                + "(" + value.newline_state_machine.pattern_string() + ")"
                                           
            suppressed_newline_sm = \
                sequentialize.do([value.newline_suppressor_state_machine.get(),
                                  value.newline_state_machine.get()])
                 
            FileName = value.newline_suppressor_state_machine.file_name
            LineN    = value.newline_suppressor_state_machine.line_n
            # Go back to start.
            code = UserCodeFragment("goto %s;" % get_label("$start", U=True), FileName, LineN)

            new_mode.add_match(suppressed_newline_pattern_str, code, 
                               get_pattern_object(suppressed_newline_sm),
                               Comment=E_SpecialPatterns.SUPPRESSED_INDENTATION_NEWLINE)

        # When there is an empty line, then there shall be no indentation count on it.
        # Here comes the trick: 
        #
        #      Let               newline         
        #      be defined as:    newline ([space]* newline])*
        # 
        # This way empty lines are eating away before the indentation count is activated.

        # -- 'space'
        x0 = StateMachine()
        x0.add_transition(x0.init_state_index, value.indentation_count_character_set(), 
                          AcceptanceF=True)
        # -- '[space]*'
        x1 = repeat.do(x0)
        # -- '[space]* newline'
        x2 = sequentialize.do([x1, value.newline_state_machine.get()])
        # -- '([space]* newline)*'
        x3 = repeat.do(x2)
        # -- 'newline ([space]* newline)*'
        x4 = sequentialize.do([value.newline_state_machine.get(), x3])
        # -- nfa to dfa; hopcroft optimization
        sm = beautifier.do(x4)

        FileName = value.newline_state_machine.file_name
        LineN    = value.newline_state_machine.line_n
        action   = GeneratedCode(indentation_counter.do, FileName, LineN)

        action.data["indentation_setup"] = value

        new_mode.add_match(value.newline_state_machine.pattern_string(), action, 
                           get_pattern_object(sm), 
                           Comment=E_SpecialPatterns.INDENTATION_NEWLINE)

        # Announce the mode to which the setup belongs
        value.set_containing_mode_name(new_mode.name)
    else:
        value = read_option_value(fh)

    # The 'verify_word_in_list()' call must have ensured that the following holds
    assert mode_option_info_db.has_key(identifier)

    # Is the option of the appropriate value?
    option_info = mode_option_info_db[identifier]
    if option_info.domain is not None and value not in option_info.domain:
        error_msg("Tried to set value '%s' for option '%s'. " % (value, identifier) + \
                  "Though, possible for this option are only: %s." % repr(option_info.domain)[1:-1], fh)

    # Finally, set the option
    new_mode.add_option(identifier, value)

    return True
Exemplo n.º 55
0
def do(A, B):
    """RETURNS: True  - if A == SUPERSET of B
                False - if not
    """
    if isinstance(A, StateMachine):
        assert isinstance(B, StateMachine)
        return Checker(A, B).do()

    assert not isinstance(B, StateMachine)
    # (*) Core Pattern ________________________________________________________
    #
    #     (including the mounted post context, if there is one).
    #
    # NOTE: Post-conditions do not change anything, since they match only when
    #       the whole lexeme has matched (from begin to end of post condition).
    #       Post-conditions only tell something about the place where the 
    #       analyzer returns after the match.
    superset_f = Checker(A.sm, B.sm).do()

    if not superset_f: return False

    # NOW: For the core state machines it holds: 
    #
    #                      'core(A)' matches a super set of 'core(B)'.
    #

    # (*) Pre-Condition _______________________________________________________
    #
    if not A.has_pre_context(): 
        # core(A) is a superset of core(B). 
        # A is not restricted. B may be (who cares).
        # => A can match more than B.
        return True

    # NOW: Acceptance of A is restricted by a pre-context.
    #
    if not B.has_pre_context(): 
        # A is restricted by pre-context, B is not.
        # => B can match things that A cannot. 
        return False

    # NOW: A is restricted by pre-context. 
    #      B is restricted by pre-context. 
    #
    #      For A to be a superset of B, A must be less or equally restricted than B.
    #
    #                 pre(B) is a superset of pre(A) 
    # 
    #
    if B.pre_context_trivial_begin_of_line_f:
        if not A.pre_context_trivial_begin_of_line_f:
            # pre(A) can never be a subset of pre(B)
            return False
        else:
            # pre(A) = pre(B) which fulfills the condition
            return True

    # NOW: B is a 'real' pre-context not only a 'begin-of-line'
    #
    # Decision about "pre(A) is subset of pre(B)" done by Checker
    if not A.pre_context_trivial_begin_of_line_f:
        A_pre_sm = A.inverse_pre_context_sm
    else:
        # A contains only 'begin-of-line'. Note, however, that 
        # -- newline definition may include '\r\n' so inversion is 
        #    required. 
        # -- at this point in time we are dealing with transformed 
        #    machines. So this has also to be transformed.
        A_pre_sm = StateMachine.from_sequence("\n").get_inverse()
        A_pre_sm = transformation.try_this(A_pre_sm, fh=-1)

    return Checker(B.inverse_pre_context_sm, A_pre_sm).do()