Пример #1
0
def __templated_state_entries(txt, TheTemplate, SMD):
    """Defines the entries of templated states, so that the state key
       for the template is set, before the jump into the template. E.g.

            STATE_4711: 
               key = 0; goto TEMPLATE_STATE_111;
            STATE_3123: 
               key = 1; goto TEMPLATE_STATE_111;
            STATE_8912: 
               key = 2; goto TEMPLATE_STATE_111;
    """
    for key, state_index in enumerate(TheTemplate.template_combination().involved_state_list()):
        state = SMD.sm().states[state_index]

        if TheTemplate.uniform_state_entries_f():
            if state_index != SMD.sm().init_state_index:
                txt.append("    __quex_assert_no_passage();\n")
            txt.append(get_label("$entry", state_index) + ":\n")
            txt.append("\n    " + LanguageDB["$debug-state"](state_index, SMD.forward_lexing_f())) 
        else:
            # If all state entries are uniform, the entry handling happens uniformly at
            # the entrance of the template, not each state.
            txt.extend(input_block.do(state_index, False, SMD))
            txt.extend(acceptance_info.do(state, state_index, SMD, ForceSaveLastAcceptanceF=True))

        txt.append("    ")
        txt.append(LanguageDB["$assignment"]("template_state_key", "%i" % key).replace("\n", "\n    "))
        txt.append("    goto %s;" % get_label("$entry", TheTemplate.core().state_index, U=True))
        txt.append("\n\n")
Пример #2
0
def get_transition_to_reload(StateIdx, SMD, ReturnStateIndexStr=None):
    LanguageDB = Setup.language_db

    if SMD != None and SMD.backward_lexing_f(): direction = "BACKWARD"
    else: direction = "FORWARD"

    if SMD != None and (StateIdx == SMD.sm().init_state_index
                        and SMD.forward_lexing_f()):
        return "goto __RELOAD_INIT_STATE;"

    elif SMD == None or not SMD.backward_input_position_detection_f():
        if ReturnStateIndexStr != None:
            state_reference = ReturnStateIndexStr
        else:
            state_reference = "QUEX_LABEL(%i)" % get_address(
                "$entry", StateIdx, R=True)

        # Ensure that '__STATE_ROUTER' is marked as referenced
        get_label("$state-router", U=True)

        return "QUEX_GOTO_RELOAD(%s, %s, QUEX_LABEL(%i));" \
               % (get_label("$reload-%s" % direction, U=True),
                  state_reference,
                  get_address("$drop-out", StateIdx, U=True, R=True))

    else:
        return ""
Пример #3
0
def get_transition_to_reload(StateIdx, SMD, ReturnStateIndexStr=None):
    LanguageDB = Setup.language_db

    if SMD != None and SMD.backward_lexing_f(): direction = "BACKWARD"
    else:                                       direction = "FORWARD"

    if SMD != None and (StateIdx == SMD.sm().init_state_index and SMD.forward_lexing_f()):
        return "goto __RELOAD_INIT_STATE;" 

    elif SMD == None or not SMD.backward_input_position_detection_f():
        if ReturnStateIndexStr != None: 
            state_reference = ReturnStateIndexStr
        else:                           
            state_reference = "QUEX_LABEL(%i)" % get_address("$entry", StateIdx, R=True)

        # Ensure that '__STATE_ROUTER' is marked as referenced
        get_label("$state-router", U=True)

        return "QUEX_GOTO_RELOAD(%s, %s, QUEX_LABEL(%i));" \
               % (get_label("$reload-%s" % direction, U=True),
                  state_reference,
                  get_address("$drop-out", StateIdx, U=True, R=True)) 

    else:
        return "" 
Пример #4
0
def __end_state_router(PathWalker, SMD):
    """After the last transition of the path, transit into the 'end state',
       i.e. the first state after the path. If the pathwalker contains multiple
       path, this might include state routing.  
    """
    assert PathWalker.uniform_state_entries_f()

    PathList     = PathWalker.path_list()
    PathN        = len(PathList)

    txt = []
    txt.append("        ")
    txt.append(LanguageDB["$input/decrement"] + "\n")
    txt.append("\n        ")

    # -- Transition to the first state after the path:
    if PathN == 1:
        # (i) There is only one path for the pathwalker, then there is only
        #     one terminal and it is determined at compilation time.
        txt.append(transition.get_transition_to_state(PathList[0].end_state_index()))
    else:
        # (ii) There are multiple paths for the pathwalker, then the terminal
        #      must be determined at run time.
        #   -- At the end of the path, path_iterator == path_end, thus we can identify
        #      the path by comparing simply against all path_ends.
        
        get_label("$state-router", U=True) # Make sure, that state router is referenced
        txt.append("QUEX_GOTO_STATE(path_end_state);")

    txt.append("\n")
    txt.append("    ")
    return txt
Пример #5
0
def get_transition_to_terminal(Origin):
    LanguageDB = Setup.language_db

    # No unconditional case of acceptance 
    if type(Origin) == type(None): 
        get_label("$terminal-router", U=True) # Mark __TERMINAL_ROUTER as used
        return [ LanguageDB["$goto-last_acceptance"] ]

    assert Origin.is_acceptance()
    # The seek for the end of the core pattern is part of the 'normal' terminal
    # if the terminal 'is' a post conditioned pattern acceptance.
    if Origin.post_context_id() == -1:
        return [ "goto %s;" % get_label("$terminal", Origin.state_machine_id, U=True) ]
    else:
        return [ "goto %s;" % get_label("$terminal-direct", Origin.state_machine_id, U=True) ]
Пример #6
0
    def __backward_analyzer(self):
        LanguageDB = self.language_db

        assert self.pre_context_sm.get_orphaned_state_index_list() == []

        dsm = StateMachineDecorator(self.pre_context_sm, 
                                    self.state_machine_name, 
                                    PostContextSM_ID_List=[],
                                    BackwardLexingF=True, 
                                    BackwardInputPositionDetectionF=False)

        txt = []
        if Setup.comment_state_machine_transitions_f:
            comment = LanguageDB["$ml-comment"]("BEGIN: PRE-CONTEXT STATE MACHINE\n"             + \
                                                self.pre_context_sm.get_string(NormalizeF=False) + \
                                                "END: PRE-CONTEXT STATE MACHINE") 
            txt.append(comment)
            txt.append("\n") # For safety: New content may have to start in a newline, e.g. "#ifdef ..."

        msg = state_machine_coder.do(dsm)
        txt.extend(msg)

        txt.append(get_label("$terminal-general-bw") + ":\n")
        # -- set the input stream back to the real current position.
        #    during backward lexing the analyzer went backwards, so it needs to be reset.
        txt.append("    QUEX_NAME(Buffer_seek_lexeme_start)(&me->buffer);\n")

        return txt
Пример #7
0
    def __backward_analyzer(self):
        LanguageDB = self.language_db

        assert self.pre_context_sm.get_orphaned_state_index_list() == []

        dsm = StateMachineDecorator(self.pre_context_sm,
                                    self.state_machine_name,
                                    PostContextSM_ID_List=[],
                                    BackwardLexingF=True,
                                    BackwardInputPositionDetectionF=False)

        txt = []
        if Setup.comment_state_machine_transitions_f:
            comment = LanguageDB["$ml-comment"]("BEGIN: PRE-CONTEXT STATE MACHINE\n"             + \
                                                self.pre_context_sm.get_string(NormalizeF=False) + \
                                                "END: PRE-CONTEXT STATE MACHINE")
            txt.append(comment)
            txt.append(
                "\n"
            )  # For safety: New content may have to start in a newline, e.g. "#ifdef ..."

        msg = state_machine_coder.do(dsm)
        txt.extend(msg)

        txt.append(get_label("$terminal-general-bw") + ":\n")
        # -- set the input stream back to the real current position.
        #    during backward lexing the analyzer went backwards, so it needs to be reset.
        txt.append("    QUEX_NAME(Buffer_seek_lexeme_start)(&me->buffer);\n")

        return txt
Пример #8
0
def __template_state(txt, TheTemplate, SMD):
    """Generate the template state that 'hosts' the templated states.
    """
    state       = TheTemplate
    state_index = TheTemplate.core().state_index
    TriggerMap  = state.transitions().get_trigger_map()


    if TheTemplate.uniform_state_entries_f():
        txt.extend(input_block.do(state_index, False, SMD))
        txt.extend(acceptance_info.do(state, state_index, SMD, ForceSaveLastAcceptanceF=True))
    else:
        label_str = "    __quex_assert_no_passage();\n" + \
                    get_label("$entry", state_index) + ":\n"
        txt.append(label_str)

    state_index_str = None
    if not TheTemplate.uniform_state_entries_f():
        # Templates that need to implement more than one state need to return to
        # dedicated state entries, if the state entries are not uniform.
        state_index_str = "template_%i_map_state_key_to_state_index[template_state_key]" % state_index

    txt.extend(transition_block.do(TriggerMap, state_index, SMD, ReturnToState_Str=state_index_str))

    txt.extend(drop_out.do(state, state_index, SMD))
Пример #9
0
def __template_state(txt, TheTemplate, SMD):
    """Generate the template state that 'hosts' the templated states.
    """
    state = TheTemplate
    state_index = TheTemplate.core().state_index
    TriggerMap = state.transitions().get_trigger_map()

    if TheTemplate.uniform_state_entries_f():
        txt.extend(input_block.do(state_index, False, SMD))
        txt.extend(
            acceptance_info.do(state,
                               state_index,
                               SMD,
                               ForceSaveLastAcceptanceF=True))
    else:
        label_str = "    __quex_assert_no_passage();\n" + \
                    get_label("$entry", state_index) + ":\n"
        txt.append(label_str)

    state_index_str = None
    if not TheTemplate.uniform_state_entries_f():
        # Templates that need to implement more than one state need to return to
        # dedicated state entries, if the state entries are not uniform.
        state_index_str = "template_%i_map_state_key_to_state_index[template_state_key]" % state_index

    txt.extend(
        transition_block.do(TriggerMap,
                            state_index,
                            SMD,
                            ReturnToState_Str=state_index_str))

    txt.extend(drop_out.do(state, state_index, SMD))
Пример #10
0
def __dead_end_state_stub(DeadEndStateInfo, SMD):
    """Dead end states are states which are void of any transitions. They 
       all drop out to some terminal (or drop out totally). Many transitions 
       to goto states can be replaced by direct transitions to the correspondent
       terminal. Some dead end states, though, need to be replaced by 'stubs'
       where some basic handling is necessary. The implementation of such 
       stubs is handled inside this function.
    """
    LanguageDB = Setup.language_db

    pre_context_dependency_f, \
    winner_origin_list,       \
    state                     = DeadEndStateInfo

    assert isinstance(state, State)
    assert state.is_acceptance()

    if SMD.forward_lexing_f():
        if not pre_context_dependency_f:
            assert len(winner_origin_list) == 1
            # Direct transition to terminal possible, no stub required.
            return []

        else:

            def _on_detection_code(Origin):
                return transition.get_transition_to_terminal(Origin)

            return acceptance_info.get_acceptance_detector(
                state.origins().get_list(), _on_detection_code)

    elif SMD.backward_lexing_f():
        # When checking a pre-condition no dedicated terminal exists. However, when
        # we check for pre-conditions, a pre-condition flag needs to be set.
        return acceptance_info.backward_lexing(state.origins().get_list()) + \
               [ "goto %s;" % get_label("$terminal-general-bw", U=True) ]

    elif SMD.backward_input_position_detection_f():
        # When searching backwards for the end of the core pattern, and one reaches
        # a dead end state, then no position needs to be stored extra since it was
        # stored at the entry of the state.
        return [ LanguageDB["$input/decrement"], "\n"] + \
               acceptance_info.backward_lexing_find_core_pattern(state.origins().get_list()) + \
               [ "goto %s;" % get_label("$terminal-general-bw", U=True) ]

    assert False, \
           "Unknown mode '%s' in terminal stub code generation." % Mode
Пример #11
0
def __dead_end_state_stub(DeadEndStateInfo, SMD):
    """Dead end states are states which are void of any transitions. They 
       all drop out to some terminal (or drop out totally). Many transitions 
       to goto states can be replaced by direct transitions to the correspondent
       terminal. Some dead end states, though, need to be replaced by 'stubs'
       where some basic handling is necessary. The implementation of such 
       stubs is handled inside this function.
    """
    LanguageDB = Setup.language_db

    pre_context_dependency_f, \
    winner_origin_list,       \
    state                     = DeadEndStateInfo

    assert isinstance(state, State)
    assert state.is_acceptance()

    if SMD.forward_lexing_f():
        if not pre_context_dependency_f:
            assert len(winner_origin_list) == 1
            # Direct transition to terminal possible, no stub required.
            return [] 

        else:
            def _on_detection_code(Origin):
                return transition.get_transition_to_terminal(Origin)

            return acceptance_info.get_acceptance_detector(state.origins().get_list(), 
                                                           _on_detection_code)

    elif SMD.backward_lexing_f():
        # When checking a pre-condition no dedicated terminal exists. However, when
        # we check for pre-conditions, a pre-condition flag needs to be set.
        return acceptance_info.backward_lexing(state.origins().get_list()) + \
               [ "goto %s;" % get_label("$terminal-general-bw", U=True) ] 


    elif SMD.backward_input_position_detection_f():
        # When searching backwards for the end of the core pattern, and one reaches
        # a dead end state, then no position needs to be stored extra since it was
        # stored at the entry of the state.
        return [ LanguageDB["$input/decrement"], "\n"] + \
               acceptance_info.backward_lexing_find_core_pattern(state.origins().get_list()) + \
               [ "goto %s;" % get_label("$terminal-general-bw", U=True) ] 

    assert False, \
           "Unknown mode '%s' in terminal stub code generation." % Mode
Пример #12
0
    def get_code(self):
        """Template transition target states. The target state is determined at 
           run-time based on a 'state_key' for the template.
           NOTE: This handles also the recursive case.
        """
        LanguageDB = Setup.language_db

        if not self.recursive():
            label = "template_%i_target_%i[template_state_key]" % (self.template_index, self.target_index)
            get_label("$state-router", U=True) # Ensure reference of state router
            return [ "QUEX_GOTO_STATE(%s);\n" % label ]

        elif not self.uniform_state_entries_f():
            label = "template_%i_map_state_key_to_state_index[template_state_key]" % self.template_index
            get_label("$state-router", U=True) # Ensure reference of state router
            return [ "QUEX_GOTO_STATE(%s);\n" % label ]

        else:
            return [ "goto %s;" % get_label_of_address(self.template_index, U=True) ]
Пример #13
0
    def backward_detector_function_get(self, sm):
        assert sm.get_orphaned_state_index_list() == []

        dsm = StateMachineDecorator(sm, 
                                    "BACKWARD_DETECTOR_" + repr(sm.get_id()),
                                    PostContextSM_ID_List           = [], 
                                    BackwardLexingF                 = True, 
                                    BackwardInputPositionDetectionF = True)

        variable_db.init()
        init_address_handling(dsm.get_direct_transition_to_terminal_db())

        function_body = state_machine_coder.do(dsm)

        comment = []
        if Setup.comment_state_machine_transitions_f: 
            comment = Setup.language_db["$ml-comment"]("BEGIN: BACKWARD DETECTOR STATE MACHINE\n" + \
                                                       sm.get_string(NormalizeF=False)            + \
                                                       "\nEND: BACKWARD DETECTOR STATE MACHINE")
            comment.append("\n")


        # -- input position detectors simply the next 'catch' and return
        terminal = []
        terminal.append("\n")
        terminal.append("    __quex_assert_no_passage();\n")
        terminal.append(get_label("$terminal-general-bw") + ":\n")
        terminal.append("    " + self.language_db["$input/seek_position"]("end_of_core_pattern_position") + "\n")
        terminal.append("    " + self.language_db["$input/increment"] + "\n")
        terminal.append("    return;\n")

        routed_address_set = get_address_set_subject_to_routing()

        state_router_txt = ""
        if len(routed_address_set) != 0:
            routed_state_info_list = state_router.get_info(routed_address_set, dsm)
            state_router_txt       = state_router.do(routed_state_info_list)
            variable_db.require("target_state_index", Condition_ComputedGoto=False)

        variable_db.require("input")
        variable_db.require("end_of_core_pattern_position")

        local_variable_definition = self.language_db["$local-variable-defs"](variable_db.get())

        # Put all things together
        txt = []
        txt.append(bwd_prolog.replace("$$ID$$", repr(sm.get_id()).replace("L", "")))
        txt.extend(local_variable_definition)
        txt.extend(comment)
        txt.extend(function_body)
        txt.extend(terminal)
        txt.append(state_router_txt)
        txt.append(bwd_epilog.replace("$$INIT_STATE_ID$$", get_label_of_address(sm.init_state_index)))

        return get_plain_strings(txt)
Пример #14
0
def get_transition_to_terminal(Origin):
    LanguageDB = Setup.language_db

    # No unconditional case of acceptance
    if type(Origin) == type(None):
        get_label("$terminal-router", U=True)  # Mark __TERMINAL_ROUTER as used
        return [LanguageDB["$goto-last_acceptance"]]

    assert Origin.is_acceptance()
    # The seek for the end of the core pattern is part of the 'normal' terminal
    # if the terminal 'is' a post conditioned pattern acceptance.
    if Origin.post_context_id() == -1:
        return [
            "goto %s;" %
            get_label("$terminal", Origin.state_machine_id, U=True)
        ]
    else:
        return [
            "goto %s;" %
            get_label("$terminal-direct", Origin.state_machine_id, U=True)
        ]
Пример #15
0
def __templated_state_entries(txt, TheTemplate, SMD):
    """Defines the entries of templated states, so that the state key
       for the template is set, before the jump into the template. E.g.

            STATE_4711: 
               key = 0; goto TEMPLATE_STATE_111;
            STATE_3123: 
               key = 1; goto TEMPLATE_STATE_111;
            STATE_8912: 
               key = 2; goto TEMPLATE_STATE_111;
    """
    for key, state_index in enumerate(
            TheTemplate.template_combination().involved_state_list()):
        state = SMD.sm().states[state_index]

        if TheTemplate.uniform_state_entries_f():
            if state_index != SMD.sm().init_state_index:
                txt.append("    __quex_assert_no_passage();\n")
            txt.append(get_label("$entry", state_index) + ":\n")
            txt.append("\n    " + LanguageDB["$debug-state"]
                       (state_index, SMD.forward_lexing_f()))
        else:
            # If all state entries are uniform, the entry handling happens uniformly at
            # the entrance of the template, not each state.
            txt.extend(input_block.do(state_index, False, SMD))
            txt.extend(
                acceptance_info.do(state,
                                   state_index,
                                   SMD,
                                   ForceSaveLastAcceptanceF=True))

        txt.append("    ")
        txt.append(LanguageDB["$assignment"]("template_state_key",
                                             "%i" % key).replace(
                                                 "\n", "\n    "))
        txt.append("    goto %s;" %
                   get_label("$entry", TheTemplate.core().state_index, U=True))
        txt.append("\n\n")
Пример #16
0
    def get_code(self):
        """Indentation counters may count as a consequence of a 'triggering'."""
        LanguageDB = Setup.language_db

        # Spaces simply increment
        if self.type == "space": 
            if self.number != -1: add_str = "%i" % self.number
            else:                 add_str = "me->" + self.variable_name
            return "me->counter._indentation += %s;" % add_str + \
                   "goto %s;" % get_label("$entry", self.state_index, U=True)
        
        # Grids lie on a grid:
        elif self.type == "grid":
            if self.number != -1: 
                log2 = log(self.number)/log(2)
                if log2.is_integer():
                    # For k = a potentials of 2, the expression 'x - x % k' can be written as: x & ~log2(mask) !
                    # Thus: x = x - x % k + k = x & mask + k
                    mask = (1 << int(log2)) - 1
                    return "me->counter._indentation &= ~ ((QUEX_TYPE_INDENTATION)0x%X);\n" % mask + \
                           "me->counter._indentation += %i;\n" % self.number + \
                           "goto %s;" % get_label("$entry", self.state_index, U=True)
                else:
                    add_str = "%i" % self.number
            else:   
                add_str = "me->" + self.variable_name

            return "me->counter._indentation = (me->counter._indentation - (me->counter._indentation %% %s)) + %s;" \
                   % (add_str, add_str) + \
                   LanguageDB["$goto"]("$entry", self.state_index)

        elif self.type == "bad":
            assert self.state_index != -1
            return "goto INDENTATION_COUNTER_%i_BAD_CHARACTER;\n" % self.state_index

        else:
            assert False, "Unreachable code has been reached."
Пример #17
0
def do(StateIdx, InitStateF, SMD):
    """Generate the code fragment that produce the 'input' character for
       the subsequent transition map. In general this consists of 

           (i)  incrementing/decrementing the input pointer.
           (ii) dereferencing the pointer to get a value.

       The initial state in forward lexing is an exception! The input pointer
       is not increased, since it already stands on the right position from
       the last analyzis step. When the init state is entered from any 'normal'
       state it enters via the 'epilog' generated in the function 
       do_init_state_input_epilog().
    """
    LanguageDB = Setup.language_db

    txt = []

    if not InitStateF:
        txt.append("    __quex_assert_no_passage();\n")

    if InitStateF and SMD.forward_lexing_f():
        txt.append(get_label("$init_state_fw_transition_block") + ":\n")
        txt.append("    " + LanguageDB["$debug-init-state"])
    else:
        txt.append(get_label("$entry", StateIdx) + ":\n")
        txt.append("    " + LanguageDB["$debug-state"]
                   (StateIdx, SMD.forward_lexing_f()))

    # The init state in forward lexing does not increase the input pointer
    if not (SMD.forward_lexing_f() and InitStateF):
        if SMD.forward_lexing_f(): cmd = LanguageDB["$input/increment"]
        else: cmd = LanguageDB["$input/decrement"]
        txt.extend(["    ", cmd, "\n"])

    txt.extend(["    ", LanguageDB["$input/get"], "\n"])

    return txt
Пример #18
0
def __state_router(PathWalker, SMD):
    """Create code that allows to jump to a state based on the path_iterator.

       NOTE: Paths cannot be recursive. Also, path transitions are linear, i.e.
             target states are either subsequent path states or the path
             is left. 

             The current state is identified by the 'path_iterator'

             (1) determine to what path the path_iterator belongs.
             (2) 'path_iterator - path_begin' gives an integer that identifies
                 the particular state of the path.

       NOTE: In the case of non-uniform path state elements, the state router
             takes care of the detection of the end-state, thus it has not
             to be determined in the '*path_iterator == PTC' section.
    """
    assert not PathWalker.uniform_state_entries_f()

    PathWalkerID = PathWalker.core().state_index

    # Make sure that the state router is implemented, add reference:
    get_label("$state-router", U=True)
    return "QUEX_GOTO_STATE(path_walker_%i_state[path_iterator - path_walker_%i_base]);\n" % (PathWalkerID, PathWalkerID)
Пример #19
0
    def get_code(self):
        """Template transition target states. The target state is determined at 
           run-time based on a 'state_key' for the template.
           NOTE: This handles also the recursive case.
        """
        LanguageDB = Setup.language_db

        if not self.recursive():
            label = "template_%i_target_%i[template_state_key]" % (
                self.template_index, self.target_index)
            get_label("$state-router",
                      U=True)  # Ensure reference of state router
            return ["QUEX_GOTO_STATE(%s);\n" % label]

        elif not self.uniform_state_entries_f():
            label = "template_%i_map_state_key_to_state_index[template_state_key]" % self.template_index
            get_label("$state-router",
                      U=True)  # Ensure reference of state router
            return ["QUEX_GOTO_STATE(%s);\n" % label]

        else:
            return [
                "goto %s;" % get_label_of_address(self.template_index, U=True)
            ]
Пример #20
0
def get_epilog(StateIdx, InitStateF, SMD):
    """The init state does not increment the input position, thus we do the
       increment in a separate fragment. This fragment acts then as the entry
       to the init state. Finally, it jumps to the transition block of the 
       init state as defined above.

       (The backward init state decrements the input pointer, so this is not necessary.)
    """
    LanguageDB = Setup.language_db

    if not (InitStateF and SMD.forward_lexing_f()): return [""]

    txt = ["\n"]
    txt.append(Address("$entry", StateIdx))
    txt.append("\n")
    txt.extend(["    ", LanguageDB["$input/increment"], "\n"])
    txt.append("    goto %s;\n" % get_label("$init_state_fw_transition_block"))
    return txt
Пример #21
0
def get_epilog(StateIdx, InitStateF, SMD):
    """The init state does not increment the input position, thus we do the
       increment in a separate fragment. This fragment acts then as the entry
       to the init state. Finally, it jumps to the transition block of the 
       init state as defined above.

       (The backward init state decrements the input pointer, so this is not necessary.)
    """
    LanguageDB = Setup.language_db

    if not (InitStateF and SMD.forward_lexing_f()): return [""]

    txt = ["\n"]
    txt.append(Address("$entry", StateIdx))
    txt.append("\n")
    txt.extend(["    ", LanguageDB["$input/increment"], "\n"])
    txt.append("    goto %s;\n" % get_label("$init_state_fw_transition_block"))
    return txt
Пример #22
0
def get_info(StateIndexList, DSM):
    LanguageDB = Setup.language_db

    # In some strange cases, a 'dummy' state router is required so that
    # 'goto __STATE_ROUTER;' does not reference a non-existing label. Then,
    # we return an empty text array.
    if len(StateIndexList) == 0: return []

    # Make sure, that for every state the 'drop-out' state is also mentioned
    result = [None] * len(StateIndexList)
    for i, index in enumerate(StateIndexList):
        assert type(index) != str
        if index >= 0:
            # Transition to state entry
            code = "goto %s; " % get_label_of_address(index)
            result[i] = (index, code)
        else:
            # Transition to a templates 'drop-out'
            code = "goto " + get_label("$drop-out", -index) + "; "
            result[i] = (get_address("$drop-out", -index), code)
    return result
Пример #23
0
def get_info(StateIndexList, DSM):
    LanguageDB = Setup.language_db

    # In some strange cases, a 'dummy' state router is required so that 
    # 'goto __STATE_ROUTER;' does not reference a non-existing label. Then,
    # we return an empty text array.
    if len(StateIndexList) == 0: return []

    # Make sure, that for every state the 'drop-out' state is also mentioned
    result = [None] * len(StateIndexList)
    for i, index in enumerate(StateIndexList):
        assert type(index) != str
        if index >= 0:
            # Transition to state entry
            code = "goto %s; " % get_label_of_address(index)
            result[i] = (index, code)
        else:
            # Transition to a templates 'drop-out'
            code = "goto " + get_label("$drop-out", - index) + "; "
            result[i] = (get_address("$drop-out", - index), code)
    return result
Пример #24
0
def do(state, StateIdx, SMD=False):
    """Produces code for all state transitions. Programming language is determined
       by 'Language'.
    """
    assert isinstance(state, State)
    assert SMD.__class__.__name__ == "StateMachineDecorator"
    assert len(state.transitions().get_epsilon_target_state_index_list()) == 0, \
           "Epsilon transition contained target states: state machine was not made a DFA!\n" + \
           "Epsilon target states = " + repr(state.transitions().get_epsilon_target_state_index_list())
    InitStateF = StateIdx == SMD.sm().init_state_index

    LanguageDB = Setup.language_db

    # (*) Dead End States
    #     i.e. states with no further transitions.
    dead_end_state_info = SMD.dead_end_state_db().get(StateIdx)
    if dead_end_state_info != None:
        state_stub = __dead_end_state_stub(dead_end_state_info, SMD)
        # Some states do not need 'stubs' to terminal since they are straight
        # forward transitions to the terminal.
        if len(state_stub) == 0: return []
        return [
            get_label("$entry", StateIdx), ":\n", "    ",
            LanguageDB["$debug-state"](StateIdx, SMD.forward_lexing_f())
        ] + state_stub

    # (*) Normal States
    TriggerMap = state.transitions().get_trigger_map()
    assert TriggerMap != []  # Only dead end states have empty trigger maps.
    #                        # => Here, the trigger map cannot be empty.

    txt = []
    txt.extend(input_block.do(StateIdx, InitStateF, SMD))
    txt.extend(acceptance_info.do(state, StateIdx, SMD))
    txt.extend(transition_block.do(TriggerMap, StateIdx, SMD))
    txt.extend(drop_out.do(state, StateIdx, SMD))
    txt.extend(get_epilog(StateIdx, InitStateF, SMD))

    return txt
Пример #25
0
def do(state, StateIdx, SMD=False):
    """Produces code for all state transitions. Programming language is determined
       by 'Language'.
    """    
    assert isinstance(state, State)
    assert SMD.__class__.__name__   == "StateMachineDecorator"
    assert len(state.transitions().get_epsilon_target_state_index_list()) == 0, \
           "Epsilon transition contained target states: state machine was not made a DFA!\n" + \
           "Epsilon target states = " + repr(state.transitions().get_epsilon_target_state_index_list())
    InitStateF = StateIdx == SMD.sm().init_state_index

    LanguageDB = Setup.language_db

    # (*) Dead End States 
    #     i.e. states with no further transitions.
    dead_end_state_info = SMD.dead_end_state_db().get(StateIdx)
    if dead_end_state_info != None:
        state_stub = __dead_end_state_stub(dead_end_state_info, SMD)
        # Some states do not need 'stubs' to terminal since they are straight
        # forward transitions to the terminal.
        if len(state_stub) == 0: return []
        return [ get_label("$entry", StateIdx), ":\n",
                "    ", LanguageDB["$debug-state"](StateIdx, SMD.forward_lexing_f())
               ] + state_stub 
        

    # (*) Normal States
    TriggerMap = state.transitions().get_trigger_map()
    assert TriggerMap != []  # Only dead end states have empty trigger maps.
    #                        # => Here, the trigger map cannot be empty.

    txt = []
    txt.extend(input_block.do(StateIdx, InitStateF, SMD))
    txt.extend(acceptance_info.do(state, StateIdx, SMD))
    txt.extend(transition_block.do(TriggerMap, StateIdx, SMD))
    txt.extend(drop_out.do(state, StateIdx, SMD))
    txt.extend(get_epilog(StateIdx, InitStateF, SMD))
    
    return txt 
Пример #26
0
def get_skipper(EndSequence, Mode=None, IndentationCounterTerminalID=None, OnSkipRangeOpenStr=""):
    assert type(EndSequence) == list
    assert len(EndSequence) >= 1
    assert map(type, EndSequence) == [int] * len(EndSequence)

    local_variable_db = {}

    global template_str

    LanguageDB = Setup.language_db

    # Name the $$SKIPPER$$
    skipper_index = sm_index.get()

    # Determine the $$DELIMITER$$
    delimiter_str, delimiter_length_str, delimiter_comment_str = get_character_sequence(EndSequence)

    delimiter_comment_str = LanguageDB["$comment"]("                         Delimiter: " + delimiter_comment_str)

    # Determine the check for the tail of the delimiter
    delimiter_remainder_test_str = ""
    if len(EndSequence) != 1:
        txt = ""
        i = 0
        for letter in EndSequence[1:]:
            i += 1
            txt += "    " + LanguageDB["$input/get-offset"](i - 1) + "\n"
            txt += "    " + LanguageDB["$if !="]("Skipper$$SKIPPER_INDEX$$[%i]" % i)
            txt += "         goto %s;" % get_label("$entry", skipper_index, U=True)
            txt += "    " + LanguageDB["$endif"]
        delimiter_remainder_test_str = txt

    if not end_delimiter_is_subset_of_indentation_counter_newline(Mode, EndSequence):
        goto_after_end_of_skipping_str = "goto %s;" % get_label("$start", U=True)
    else:
        # If there is indentation counting involved, then the counter's terminal id must
        # be determined at this place.
        assert IndentationCounterTerminalID != None
        # If the ending delimiter is a subset of what the 'newline' pattern triggers
        # in indentation counting => move on to the indentation counter.
        goto_after_end_of_skipping_str = "goto %s;" % get_label(
            "$terminal-direct", IndentationCounterTerminalID, U=True
        )

    if OnSkipRangeOpenStr != "":
        on_skip_range_open_str = OnSkipRangeOpenStr
    else:
        on_skip_range_open_str = get_on_skip_range_open(Mode, EndSequence)

    # The main part
    code_str = blue_print(
        template_str,
        [
            ["$$DELIMITER$$", delimiter_str],
            ["$$DELIMITER_LENGTH$$", delimiter_length_str],
            ["$$DELIMITER_COMMENT$$", delimiter_comment_str],
            ["$$WHILE_1_PLUS_1_EQUAL_2$$", LanguageDB["$loop-start-endless"]],
            ["$$END_WHILE$$", LanguageDB["$loop-end"]],
            ["$$INPUT_P_INCREMENT$$", LanguageDB["$input/increment"]],
            ["$$INPUT_P_DECREMENT$$", LanguageDB["$input/decrement"]],
            ["$$INPUT_GET$$", LanguageDB["$input/get"]],
            ["$$IF_INPUT_EQUAL_DELIMITER_0$$", LanguageDB["$if =="]("Skipper$$SKIPPER_INDEX$$[0]")],
            ["$$ENDIF$$", LanguageDB["$endif"]],
            ["$$ENTRY$$", get_label("$entry", skipper_index)],
            ["$$RELOAD$$", get_label("$reload", skipper_index)],
            ["$$GOTO_ENTRY$$", get_label("$entry", skipper_index, U=True)],
            # When things were skipped, no change to acceptance flags or modes has
            # happend. One can jump immediately to the start without re-entry preparation.
            ["$$GOTO_AFTER_END_OF_SKIPPING$$", goto_after_end_of_skipping_str],
            ["$$MARK_LEXEME_START$$", LanguageDB["$mark-lexeme-start"]],
            ["$$DELIMITER_REMAINDER_TEST$$", delimiter_remainder_test_str],
            ["$$ON_SKIP_RANGE_OPEN$$", on_skip_range_open_str],
        ],
    )

    # Line and column number counting
    code_str, reference_p_f = __lc_counting_replacements(code_str, EndSequence)

    # The finishing touch
    code_str = blue_print(
        code_str,
        [["$$SKIPPER_INDEX$$", __nice(skipper_index)], ["$$GOTO_RELOAD$$", get_label("$reload", skipper_index)]],
    )

    if reference_p_f:
        local_variable_db["QUEX_OPTION_COLUMN_NUMBER_COUNTING/reference_p"] = Variable(
            "reference_p",
            "QUEX_TYPE_CHARACTER_POSITION",
            None,
            "(QUEX_TYPE_CHARACTER_POSITION)0x0",
            "QUEX_OPTION_COLUMN_NUMBER_COUNTING",
        )

    return code_str, local_variable_db
Пример #27
0
def __path_walker(txt, PathWalker, SMD):
    """Generates the path walker, that walks along the character sequence.
    """
    PathList     = PathWalker.path_list()
    Skeleton     = PathList[0].skeleton()
    PathWalkerID = PathWalker.core().state_index

    if PathWalker.uniform_state_entries_f():
        # (1) Input Block (get the new character)
        txt.extend(input_block.do(PathWalkerID, False, SMD))
        # (2) Acceptance information/Store Input positions
        txt.extend(acceptance_info.do(PathWalker, PathWalkerID, SMD, ForceSaveLastAcceptanceF=True))

    else:
        txt.append("    __quex_assert_no_passage();\n")
        txt.append(get_label("$entry", PathWalkerID) + ":\n")
        txt.append("    __quex_debug(\"path walker %i\");\n" % PathWalkerID)

    # (3) Transition Map

    # (3.1) The comparison with the path's current character
    #       If terminating zero is reached, the path's end state is entered.
    if PathWalker.uniform_state_entries_f():
        next_state = [ "goto %s;\n" % get_label_of_address(PathWalkerID, U=True) ]
        end_state = __end_state_router(PathWalker, SMD)
    else:
        next_state = [ __state_router(PathWalker, SMD) ]
        end_state  = ["        "] + next_state

    txt.append("    ")
    txt.append(LanguageDB["$if =="]("*path_iterator"))
    txt.append("        ")
    txt.append(LanguageDB["$increment"]("path_iterator"))
    txt.append("\n")
    txt.append("        ")
    txt.extend(next_state)
    txt.append("    ")
    txt.append(LanguageDB["$elseif"] \
               + LanguageDB["$=="]("*path_iterator", "QUEX_SETTING_PATH_TERMINATION_CODE") \
               + LanguageDB["$then"])
    txt.extend(end_state)
    txt.append("    ")
    txt.append(LanguageDB["$endif"])
    txt.append("\n")

    # (3.2) Transition map of the 'skeleton'        
    trigger_map = PathWalker.transitions().get_trigger_map()
    if len(trigger_map) == 0:
        # (This happens, for example, if there are only keywords and no 
        #  'overlaying' identifier pattern.)

        # Even if the skeleton/trigger map is empty there must be something
        # that catches the 'buffer limit code'. 
        # => Define an 'all drop out' trigger_map and then,
        # => Adapt the trigger map, so that the 'buffer limit' is an 
        #    isolated single interval.
        trigger_map = [ (Interval(-sys.maxint, sys.maxint), None) ]

    state_index_str = None
    if not PathWalker.uniform_state_entries_f():
        state_index_str = "path_walker_%i_state[path_iterator - path_walker_%i_base]" % (PathWalkerID, PathWalkerID)

    txt.extend(transition_block.do(trigger_map, PathWalkerID, SMD, ReturnToState_Str=state_index_str))

    # (4) The drop out (nothing matched)
    #     (Path iterator has not been increased yet)

    txt.extend(drop_out.do(PathWalker, PathWalkerID, SMD))

    return 
Пример #28
0
def __state_entries(txt, PathWalker, SMD):
    """Defines the entries of the path's states, so that the state key
       for the template is set, before the jump into the template. E.g.

            STATE_4711: 
               path_iterator = path_23 + 0; goto PATHWALKER_23;
            STATE_3123: 
               path_iterator = path_23 + 1; goto PATHWALKER_23;
            STATE_8912: 
               path_iterator = path_23 + 2; goto PATHWALKER_23;
    """
    sm = SMD.sm()

    PathN = len(PathWalker.path_list())
    require_path_end_state_variable_f = False
    txt.append("\n")
    for path in PathWalker.path_list():
        prev_state_index = None
        # Last state of sequence is not in the path, it is the first state after.
        for i, info in enumerate(path.sequence()[:-1]):
            state_index = info[0]
            # No need for state router if:
            #   (i) PathWalker is uniform, because then even after reload no dedicated
            #       state entry is required.
            #   (ii) The state is not entered from any other state except the predecessor
            #        on the path.
            # But:
            #   The first state always needs an entry.
            if prev_state_index != None:
                candidate = sm.get_only_entry_to_state(state_index)
                if PathWalker.uniform_state_entries_f() and prev_state_index == candidate:
                    prev_state_index = state_index
                    continue

            state = SMD.sm().states[state_index]

            entry_txt = []
            if PathWalker.uniform_state_entries_f():
                # If all state entries are uniform, the entry handling happens uniformly at
                # the entrance of the template, not each state.
                label_str = get_label("$entry", state_index) + ":\n"
                if state_index != SMD.sm().init_state_index:
                    label_str = "    __quex_assert_no_passage();\n" + label_str

                entry_txt.append(label_str)
                entry_txt.append("    ")
                entry_txt.append(LanguageDB["$debug-state"](state_index, SMD.forward_lexing_f()))
            else:
                entry_txt.extend(input_block.do(state_index, False, SMD))
                entry_txt.extend(acceptance_info.do(state, state_index, SMD, ForceSaveLastAcceptanceF=True))

            if PathWalker.uniform_state_entries_f() and PathN != 1:
                require_path_end_state_variable_f = True
                end_state_index = path.sequence()[-1][0]
                entry_txt.append("    path_end_state                 = QUEX_LABEL(%i);\n" \
                                 % get_address("$entry", end_state_index, U=True, R=True))
                
            entry_txt.append("    ")
            entry_txt.append(LanguageDB["$assignment"](
                                       "path_iterator                 ", 
                                       "path_%i + %i" % (path.index(), i)))
            entry_txt.append("goto %s;\n\n" % get_label_of_address(PathWalker.core().state_index, U=True))

            txt.append(Address("$entry", state_index, Code=entry_txt))
            prev_state_index = state_index

    return require_path_end_state_variable_f
Пример #29
0
def get_transition_to_drop_out(CurrentStateIdx):
    LanguageDB = Setup.language_db
    return "goto %s;" % get_label("$drop-out", CurrentStateIdx, U=True)
Пример #30
0
def get_skipper(EndSequence, Mode=None, IndentationCounterTerminalID=None, OnSkipRangeOpenStr=""):
    assert type(EndSequence) == list
    assert len(EndSequence) >= 1
    assert map(type, EndSequence) == [int] * len(EndSequence)

    local_variable_db = {}

    global template_str

    LanguageDB   = Setup.language_db

    # Name the $$SKIPPER$$
    skipper_index = sm_index.get()

    # Determine the $$DELIMITER$$
    delimiter_str,        \
    delimiter_length_str, \
    delimiter_comment_str \
                          = get_character_sequence(EndSequence)

    delimiter_comment_str  = LanguageDB["$comment"]("                         Delimiter: " 
                                                    + delimiter_comment_str)

    # Determine the check for the tail of the delimiter
    delimiter_remainder_test_str = ""
    if len(EndSequence) != 1: 
        txt = ""
        i = 0
        for letter in EndSequence[1:]:
            i += 1
            txt += "    " + LanguageDB["$input/get-offset"](i-1) + "\n"
            txt += "    " + LanguageDB["$if !="]("Skipper$$SKIPPER_INDEX$$[%i]" % i)
            txt += "         goto %s;" % get_label("$entry", skipper_index, U=True) 
            txt += "    " + LanguageDB["$endif"]
        delimiter_remainder_test_str = txt

    if not end_delimiter_is_subset_of_indentation_counter_newline(Mode, EndSequence):
        goto_after_end_of_skipping_str = "goto %s;" % get_label("$start", U=True)
    else:
        # If there is indentation counting involved, then the counter's terminal id must
        # be determined at this place.
        assert IndentationCounterTerminalID != None
        # If the ending delimiter is a subset of what the 'newline' pattern triggers 
        # in indentation counting => move on to the indentation counter.
        goto_after_end_of_skipping_str = "goto %s;" % get_label("$terminal-direct", IndentationCounterTerminalID, U=True)

    if OnSkipRangeOpenStr != "": on_skip_range_open_str = OnSkipRangeOpenStr
    else:                        on_skip_range_open_str = get_on_skip_range_open(Mode, EndSequence)

    # The main part
    code_str = blue_print(template_str,
                          [["$$DELIMITER$$",                      delimiter_str],
                           ["$$DELIMITER_LENGTH$$",               delimiter_length_str],
                           ["$$DELIMITER_COMMENT$$",              delimiter_comment_str],
                           ["$$WHILE_1_PLUS_1_EQUAL_2$$",         LanguageDB["$loop-start-endless"]],
                           ["$$END_WHILE$$",                      LanguageDB["$loop-end"]],
                           ["$$INPUT_P_INCREMENT$$",              LanguageDB["$input/increment"]],
                           ["$$INPUT_P_DECREMENT$$",              LanguageDB["$input/decrement"]],
                           ["$$INPUT_GET$$",                      LanguageDB["$input/get"]],
                           ["$$IF_INPUT_EQUAL_DELIMITER_0$$",     LanguageDB["$if =="]("Skipper$$SKIPPER_INDEX$$[0]")],
                           ["$$ENDIF$$",                          LanguageDB["$endif"]],
                           ["$$ENTRY$$",                          get_label("$entry", skipper_index)],
                           ["$$RELOAD$$",                         get_label("$reload", skipper_index)],
                           ["$$GOTO_ENTRY$$",                     get_label("$entry", skipper_index, U=True)],
                           # When things were skipped, no change to acceptance flags or modes has
                           # happend. One can jump immediately to the start without re-entry preparation.
                           ["$$GOTO_AFTER_END_OF_SKIPPING$$",     goto_after_end_of_skipping_str], 
                           ["$$MARK_LEXEME_START$$",              LanguageDB["$mark-lexeme-start"]],
                           ["$$DELIMITER_REMAINDER_TEST$$",       delimiter_remainder_test_str],
                           ["$$ON_SKIP_RANGE_OPEN$$",             on_skip_range_open_str],
                          ])

    # Line and column number counting
    code_str, reference_p_f = __lc_counting_replacements(code_str, EndSequence)

    # The finishing touch
    code_str = blue_print(code_str,
                          [["$$SKIPPER_INDEX$$", __nice(skipper_index)],
                           ["$$GOTO_RELOAD$$",   get_label("$reload", skipper_index)]])

    if reference_p_f:
        local_variable_db["QUEX_OPTION_COLUMN_NUMBER_COUNTING/reference_p"] = \
                           Variable("reference_p", 
                                    "QUEX_TYPE_CHARACTER_POSITION", 
                                    None,
                                    "(QUEX_TYPE_CHARACTER_POSITION)0x0",
                                    "QUEX_OPTION_COLUMN_NUMBER_COUNTING")

    return code_str, local_variable_db
Пример #31
0
def get_skipper(OpenerSequence, CloserSequence, Mode=None, IndentationCounterTerminalID=None, OnSkipRangeOpenStr=""):
    assert OpenerSequence.__class__  == list
    assert len(OpenerSequence)       >= 1
    assert map(type, OpenerSequence) == [int] * len(OpenerSequence)
    assert CloserSequence.__class__  == list
    assert len(CloserSequence)       >= 1
    assert map(type, CloserSequence) == [int] * len(CloserSequence)
    assert OpenerSequence != CloserSequence

    LanguageDB    = Setup.language_db

    skipper_index = sm_index.get()

    opener_str, opener_length_str, opener_comment_str = get_character_sequence(OpenerSequence)
    closer_str, closer_length_str, closer_comment_str = get_character_sequence(CloserSequence)

    if not end_delimiter_is_subset_of_indentation_counter_newline(Mode, CloserSequence):
        goto_after_end_of_skipping_str = "goto %s;" % get_label("$start", U=True)
    else:
        # If there is indentation counting involved, then the counter's terminal id must
        # be determined at this place.
        assert IndentationCounterTerminalID != None
        # If the ending delimiter is a subset of what the 'newline' pattern triggers 
        # in indentation counting => move on to the indentation counter.
        goto_after_end_of_skipping_str = "goto %s;" % get_label("$terminal-direct", 
                                                                IndentationCounterTerminalID, U=True)

    if OnSkipRangeOpenStr != "": on_skip_range_open_str = OnSkipRangeOpenStr
    else:                        on_skip_range_open_str = get_on_skip_range_open(Mode, CloserSequence)

    local_variable_db = { 
        "counter":     Variable("counter", "size_t", None, "0"),
        "QUEX_OPTION_COLUMN_NUMBER_COUNTING/reference_p": 
                       Variable("reference_p", 
                                "QUEX_TYPE_CHARACTER_POSITION", 
                                None,
                                "(QUEX_TYPE_CHARACTER_POSITION)0x0", 
                                "QUEX_OPTION_COLUMN_NUMBER_COUNTING")
    }
    reference_p_def = "    __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n"

    reference_p_def = "    __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n"
    before_reload   = "    __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer)\n" + \
                      "                                - reference_p));\n" 
    after_reload    = "        __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n"

    if CloserSequence[-1] == ord('\n'):
        end_procedure  = "       __QUEX_IF_COUNT_LINES_ADD((size_t)1);\n"
        end_procedure += "       __QUEX_IF_COUNT_COLUMNS_SET((size_t)1);\n"
    else:
        end_procedure = "        __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer)\n" + \
                        "                                    - reference_p));\n" 

    code_str = blue_print(template_str,
                          [
                           ["$$SKIPPER_INDEX$$",   __nice(skipper_index)],
                           #
                           ["$$OPENER$$",          opener_str],
                           ["$$OPENER_LENGTH$$",   opener_length_str],
                           ["$$OPENER_COMMENT$$",  opener_comment_str],
                           ["$$CLOSER$$",          closer_str],
                           ["$$CLOSER_LENGTH$$",   closer_length_str],
                           ["$$CLOSER_COMMENT$$",  closer_comment_str],
                           # 
                           ["$$INPUT_P_INCREMENT$$",              LanguageDB["$input/increment"]],
                           ["$$INPUT_P_DECREMENT$$",              LanguageDB["$input/decrement"]],
                           ["$$INPUT_GET$$",                      LanguageDB["$input/get"]],
                           ["$$IF_INPUT_EQUAL_DELIMITER_0$$",     LanguageDB["$if =="]("Skipper$$SKIPPER_INDEX$$[0]")],
                           ["$$ENDIF$$",                          LanguageDB["$endif"]],
                           ["$$ENTRY$$",                          get_label("$entry", skipper_index)],
                           ["$$RELOAD$$",                         get_label("$reload", skipper_index)],
                           ["$$GOTO_AFTER_END_OF_SKIPPING$$",     goto_after_end_of_skipping_str], 
                           ["$$GOTO_RELOAD$$",                    get_label("$reload", skipper_index)],
                           # When things were skipped, no change to acceptance flags or modes has
                           # happend. One can jump immediately to the start without re-entry preparation.
                           ["$$GOTO_ENTRY$$",                     get_label("$entry", skipper_index)],
                           ["$$MARK_LEXEME_START$$",              LanguageDB["$mark-lexeme-start"]],
                           ["$$ON_SKIP_RANGE_OPEN$$",             on_skip_range_open_str],
                           #
                           ["$$LC_COUNT_COLUMN_N_POINTER_DEFINITION$$", reference_p_def],
                           ["$$LC_COUNT_IN_LOOP$$",                     line_column_counter_in_loop],
                           ["$$LC_COUNT_END_PROCEDURE$$",               end_procedure],
                           ["$$LC_COUNT_BEFORE_RELOAD$$",               before_reload],
                           ["$$LC_COUNT_AFTER_RELOAD$$",                after_reload],
                          ])

    return code_str, local_variable_db
Пример #32
0
def get_transition_to_drop_out(CurrentStateIdx):
    LanguageDB = Setup.language_db
    return "goto %s;" % get_label("$drop-out", CurrentStateIdx, U=True)
Пример #33
0
def parse_mode_option(fh, new_mode):
    LanguageDB = Setup.language_db

    def fit_state_machine(SM):
        if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM)
        else: result = SM
        result = hopcroft.do(result, CreateNewStateMachineF=False)
        return result

    identifier = read_option_start(fh)
    if identifier == None: return False

    verify_word_in_list(identifier, lexer_mode.mode_option_info_db.keys(),
                        "mode option", fh.name,
                        get_current_line_info_number(fh))

    if identifier == "skip":
        # A skipper 'eats' characters at the beginning of a pattern that belong
        # to a specified set of characters. A useful application is most probably
        # the whitespace skipper '[ \t\n]'. The skipper definition allows quex to
        # implement a very effective way to skip these regions.
        pattern_str, trigger_set = regular_expression.parse_character_set(
            fh, PatternStringF=True)
        skip_whitespace(fh)

        if fh.read(1) != ">":
            error_msg("missing closing '>' for mode option '%s'." % identifier,
                      fh)

        if trigger_set.is_empty():
            error_msg("Empty trigger set for skipper." % identifier, fh)

        # TriggerSet skipping is implemented the following way: As soon as one element of the
        # trigger set appears, the state machine enters the 'trigger set skipper section'.
        # Enter the skipper as if the opener pattern was a normal pattern and the 'skipper' is the action.
        # NOTE: The correspondent CodeFragment for skipping is created in 'implement_skippers(...)'
        pattern_sm = StateMachine()
        pattern_sm.add_transition(pattern_sm.init_state_index,
                                  trigger_set,
                                  AcceptanceF=True)

        # Skipper code is to be generated later
        action = GeneratedCode(skip_character_set.do,
                               FileName=fh.name,
                               LineN=get_current_line_info_number(fh))
        action.data["character_set"] = trigger_set

        pattern_sm = fit_state_machine(pattern_sm)
        # For skippers line and column counting detection is not really a topic
        # It is done in the skipper itself.
        pattern_sm.side_info = SideInfo()

        new_mode.add_match(pattern_str, action, pattern_sm)

        return True

    elif identifier in ["skip_range", "skip_nested_range"]:
        # A non-nesting skipper can contain a full fledged regular expression as opener,
        # since it only effects the trigger. Not so the nested range skipper-see below.

        # -- opener
        skip_whitespace(fh)
        if identifier == "skip_nested_range":
            # Nested range state machines only accept 'strings' not state machines
            opener_str, opener_sequence = parse_string_constant(
                fh, "Opener pattern for 'skip_nested_range'")

            opener_sm = StateMachine()
            idx = opener_sm.init_state_index
            for letter in opener_sequence:
                idx = opener_sm.add_transition(idx, letter)
            opener_sm.states[idx].set_acceptance(True)
        else:
            opener_str, opener_sm = regular_expression.parse(fh)
            # For 'range skipping' the opener sequence is not needed, only the opener state
            # machine is webbed into the pattern matching state machine.
            opener_sequence = None

        skip_whitespace(fh)

        # -- closer
        closer_str, closer_sequence = parse_string_constant(
            fh, "Closing pattern for 'skip_range' or 'skip_nested_range'")
        skip_whitespace(fh)
        if fh.read(1) != ">":
            error_msg("missing closing '>' for mode option '%s'" % identifier,
                      fh)

        # Skipper code is to be generated later
        generator_function = {
            "skip_range": skip_range.do,
            "skip_nested_range": skip_nested_range.do,
        }[identifier]
        action = GeneratedCode(generator_function,
                               FileName=fh.name,
                               LineN=get_current_line_info_number(fh))

        action.data["opener_sequence"] = opener_sequence
        action.data["closer_sequence"] = closer_sequence
        action.data["mode_name"] = new_mode.name

        fit_state_machine(opener_sm)

        # For skippers line and column counting detection is not really a topic
        # It is done in the skipper itself.
        opener_sm.side_info = SideInfo()

        new_mode.add_match(opener_str, action, opener_sm)

        return True

    elif identifier == "indentation":
        value = indentation_setup.do(fh)

        # Enter 'Newline' and 'Suppressed Newline' as matches into the engine.
        # Similar to skippers, the indentation count is then triggered by the newline.
        # -- Suppressed Newline = Suppressor followed by Newline,
        #    then newline does not trigger indentation counting.
        suppressed_newline_pattern = ""
        if value.newline_suppressor_state_machine.get() != None:
            suppressed_newline_pattern = \
                  "(" + value.newline_suppressor_state_machine.pattern_str + ")" \
                + "(" + value.newline_state_machine.pattern_str + ")"

            suppressed_newline_sm = \
                sequentialize.do([value.newline_suppressor_state_machine.get(),
                                  value.newline_state_machine.get()])

            FileName = value.newline_suppressor_state_machine.file_name
            LineN = value.newline_suppressor_state_machine.line_n
            # Go back to start.
            code_fragment = UserCodeFragment(
                "goto %s;" % get_label("$start", U=True), FileName, LineN)

            suppressed_newline_sm = fit_state_machine(suppressed_newline_sm)

            # Analyze pattern for constant number of newlines, characters, etc.
            suppressed_newline_sm.side_info = SideInfo(
                character_counter.get_newline_n(suppressed_newline_sm),
                character_counter.get_character_n(suppressed_newline_sm))

            new_mode.add_match(suppressed_newline_pattern,
                               code_fragment,
                               suppressed_newline_sm,
                               Comment="indentation newline suppressor")

        # When there is an empty line, then there shall be no indentation count on it.
        # Here comes the trick:
        #
        #      Let               newline
        #      be defined as:    newline ([space]* newline])*
        #
        # This way empty lines are eating away before the indentation count is activated.

        # -- 'space'
        x0 = StateMachine()
        x0.add_transition(x0.init_state_index,
                          value.indentation_count_character_set(),
                          AcceptanceF=True)
        # -- '[space]*'
        x1 = repeat.do(x0)
        # -- '[space]* newline'
        x2 = sequentialize.do([x1, value.newline_state_machine.get()])
        # -- '([space]* newline)*'
        x3 = repeat.do(x2)
        # -- 'newline ([space]* newline)*'
        x4 = sequentialize.do([value.newline_state_machine.get(), x3])
        # -- nfa to dfa; hopcroft optimization
        sm = hopcroft.do(nfa_to_dfa.do(x4), CreateNewStateMachineF=False)

        FileName = value.newline_state_machine.file_name
        LineN = value.newline_state_machine.line_n
        action = GeneratedCode(indentation_counter.do, FileName, LineN)

        action.data["indentation_setup"] = value

        sm = fit_state_machine(sm)
        sm.side_info = SideInfo(character_counter.get_newline_n(sm),
                                character_counter.get_character_n(sm))
        new_mode.add_match(value.newline_state_machine.pattern_str,
                           action,
                           sm,
                           Comment="indentation newline")

        # Announce the mode to which the setup belongs
        value.set_containing_mode_name(new_mode.name)
    else:
        value = read_option_value(fh)

    # The 'verify_word_in_list()' call must have ensured that the following holds
    assert lexer_mode.mode_option_info_db.has_key(identifier)

    # Is the option of the appropriate value?
    option_info = lexer_mode.mode_option_info_db[identifier]
    if option_info.domain != None and value not in option_info.domain:
        error_msg("Tried to set value '%s' for option '%s'. " % (Value, Option) + \
                  "Though, possible for this option are only: %s." % repr(oi.domain)[1:-1], fh)

    # Finally, set the option
    new_mode.add_option(identifier, value)

    return True
Пример #34
0
    def backward_detector_function_get(self, sm):
        assert sm.get_orphaned_state_index_list() == []

        dsm = StateMachineDecorator(sm,
                                    "BACKWARD_DETECTOR_" + repr(sm.get_id()),
                                    PostContextSM_ID_List=[],
                                    BackwardLexingF=True,
                                    BackwardInputPositionDetectionF=True)

        variable_db.init()
        init_address_handling(dsm.get_direct_transition_to_terminal_db())

        function_body = state_machine_coder.do(dsm)

        comment = []
        if Setup.comment_state_machine_transitions_f:
            comment = Setup.language_db["$ml-comment"]("BEGIN: BACKWARD DETECTOR STATE MACHINE\n" + \
                                                       sm.get_string(NormalizeF=False)            + \
                                                       "\nEND: BACKWARD DETECTOR STATE MACHINE")
            comment.append("\n")

        # -- input position detectors simply the next 'catch' and return
        terminal = []
        terminal.append("\n")
        terminal.append("    __quex_assert_no_passage();\n")
        terminal.append(get_label("$terminal-general-bw") + ":\n")
        terminal.append("    " + self.language_db["$input/seek_position"]
                        ("end_of_core_pattern_position") + "\n")
        terminal.append("    " + self.language_db["$input/increment"] + "\n")
        terminal.append("    return;\n")

        routed_address_set = get_address_set_subject_to_routing()

        state_router_txt = ""
        if len(routed_address_set) != 0:
            routed_state_info_list = state_router.get_info(
                routed_address_set, dsm)
            state_router_txt = state_router.do(routed_state_info_list)
            variable_db.require("target_state_index",
                                Condition_ComputedGoto=False)

        variable_db.require("input")
        variable_db.require("end_of_core_pattern_position")

        local_variable_definition = self.language_db["$local-variable-defs"](
            variable_db.get())

        # Put all things together
        txt = []
        txt.append(
            bwd_prolog.replace("$$ID$$",
                               repr(sm.get_id()).replace("L", "")))
        txt.extend(local_variable_definition)
        txt.extend(comment)
        txt.extend(function_body)
        txt.extend(terminal)
        txt.append(state_router_txt)
        txt.append(
            bwd_epilog.replace("$$INIT_STATE_ID$$",
                               get_label_of_address(sm.init_state_index)))

        return get_plain_strings(txt)
Пример #35
0
def get_skipper(TriggerSet):
    """This function implements simple 'skipping' in the sense of passing by
       characters that belong to a given set of characters--the TriggerSet.
    """
    global template_str
    assert TriggerSet.__class__.__name__ == "NumberSet"
    assert not TriggerSet.is_empty()

    LanguageDB = Setup.language_db

    skipper_index = sm_index.get()
    # Mini trigger map:  [ trigger set ] --> loop start
    # That means: As long as characters of the trigger set appear, we go to the loop start.
    transition_map = TransitionMap() # (don't worry about 'drop-out-ranges' etc.)
    transition_map.add_transition(TriggerSet, skipper_index)
    # On buffer limit code, the skipper must transit to a dedicated reloader

    iteration_code = transition_block.do(transition_map.get_trigger_map(), 
                                         skipper_index, 
                                         DSM=None, 
                                         GotoReload_Str="goto %s;" % get_label("$reload", skipper_index))

    comment_str = LanguageDB["$comment"]("Skip any character in " + TriggerSet.get_utf8_string())

    # Line and column number counting
    prolog = __lc_counting_replacements(prolog_txt, TriggerSet)
    epilog = __lc_counting_replacements(epilog_txt, TriggerSet)

    prolog = blue_print(prolog,
                        [
                         ["$$DELIMITER_COMMENT$$",              comment_str],
                         ["$$SKIPPER_INDEX$$",                  "%i" % skipper_index],
                         ["$$INPUT_GET$$",                      LanguageDB["$input/get"]],
                        ])

    epilog = blue_print(epilog,
                        [
                         ["$$INPUT_P_INCREMENT$$",              LanguageDB["$input/increment"]],
                         ["$$INPUT_P_DECREMENT$$",              LanguageDB["$input/decrement"]],
                         ["$$IF_INPUT_EQUAL_DELIMITER_0$$",     LanguageDB["$if =="]("SkipDelimiter$$SKIPPER_INDEX$$[0]")],
                         ["$$ENDIF$$",                          LanguageDB["$endif"]],
                         ["$$LOOP_REENTRANCE$$",                get_label("$entry", skipper_index)],
                         ["$$INPUT_EQUAL_BUFFER_LIMIT_CODE$$",  LanguageDB["$BLC"]],
                         ["$$RELOAD$$",                         get_label("$reload", skipper_index)],
                         ["$$DROP_OUT_DIRECT$$",                get_label("$drop-out", skipper_index, U=True)],
                         ["$$SKIPPER_INDEX$$",                  "%i" % skipper_index],
                         ["$$GOTO_TERMINAL_EOF$$",              get_label("$terminal-EOF", U=True)],
                         # When things were skipped, no change to acceptance flags or modes has
                         # happend. One can jump immediately to the start without re-entry preparation.
                         ["$$GOTO_START$$",                     get_label("$start", U=True)], 
                         ["$$MARK_LEXEME_START$$",              LanguageDB["$mark-lexeme-start"]],
                        ])

    code = [ prolog ]
    code.extend(iteration_code)
    code.append(epilog)

    local_variable_db = {}
    local_variable_db["QUEX_OPTION_COLUMN_NUMBER_COUNTING/reference_p"] = \
                     Variable("reference_p", 
                              "QUEX_TYPE_CHARACTER_POSITION", 
                              None,
                              "(QUEX_TYPE_CHARACTER_POSITION)0x0",
                              "QUEX_OPTION_COLUMN_NUMBER_COUNTING") 

    return code, local_variable_db
Пример #36
0
def get_transition_to_state(TargetInfo):
    LanguageDB = Setup.language_db
    return "goto %s;" % get_label("$entry", TargetInfo, U=True)
Пример #37
0
def do(Data):
    """The generated code is very similar to the 'skipper' code. It is to be executed
       as soon as a 'real' newline arrived. Then it skips whitespace until the next 
       non-whitepace (also newline may trigger a 'stop'). 

       Dependent on the setup the indentation is determined.
    """
    IndentationSetup = Data["indentation_setup"]
    assert IndentationSetup.__class__.__name__ == "IndentationSetup"


    LanguageDB = Setup.language_db
    Mode = None
    if IndentationSetup.containing_mode_name() != "":
        Mode = lexer_mode.mode_db[IndentationSetup.containing_mode_name()]

    counter_index = sm_index.get()
    
    # Mini trigger map:  [ trigger set ] --> loop start
    # That means: As long as characters of the trigger set appear, we go to the loop start.

    trigger_map = []
    # If the indentation consists only of spaces, than it is 'uniform' ...
    if IndentationSetup.has_only_single_spaces():
        # Count indentation/column at end of run;
        # simply: current position - reference_p

        character_set = IndentationSetup.space_db.values()[0]
        for interval in character_set.get().get_intervals(PromiseToTreatWellF=True):
            trigger_map.append([interval, counter_index])

        # Reference Pointer: Define Variable, Initialize, determine how to subtact.
        end_procedure = \
        "    me->counter._indentation = (size_t)(QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer) - reference_p);\n" 
    else:
        # Count the indentation/column during the 'run'

        # Add the space counters
        for count, character_set in IndentationSetup.space_db.items():
            for interval in character_set.get().get_intervals(PromiseToTreatWellF=True):
                trigger_map.append([interval, IndentationCounter("space", count, counter_index)])

        # Add the grid counters
        for count, character_set in IndentationSetup.grid_db.items():
            for interval in character_set.get().get_intervals(PromiseToTreatWellF=True):
                trigger_map.append([interval, IndentationCounter("grid", count, counter_index)])

        # Reference Pointer: Not required.
        #                    No subtraction 'current_position - reference_p'.
        #                    (however, we pass 'reference_p' to indentation handler)
        end_procedure = "" 

    # Bad character detection
    if IndentationSetup.bad_character_set.get().is_empty() == False:
        for interval in IndentationSetup.bad_character_set.get().get_intervals(PromiseToTreatWellF=True):
            trigger_map.append([interval, IndentationCounter("bad", None, counter_index)])

    # Since we do not use a 'TransitionMap', there are some things we need 
    # to do by hand.
    arrange_trigger_map(trigger_map)

    local_variable_db = { "reference_p" : 
                          Variable("reference_p", 
                                   "QUEX_TYPE_CHARACTER_POSITION", 
                                   None, 
                                   "(QUEX_TYPE_CHARACTER_POSITION)0x0")
    }
    init_reference_p  = "    reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer);\n" + \
                        "    me->counter._indentation = (QUEX_TYPE_INDENTATION)0;\n"

    iteration_code = transition_block.do(trigger_map, 
                                         counter_index, 
                                         DSM=None, 
                                         GotoReload_Str="goto %s;" % get_label("$reload", counter_index))

    comment_str    = LanguageDB["$comment"]("Skip whitespace at line begin; count indentation.")

    # NOTE: Line and column number counting is off
    #       -- No newline can occur
    #       -- column number = indentation at the end of the process

    end_procedure += "    __QUEX_IF_COUNT_COLUMNS_ADD(me->counter._indentation);\n"
    if Mode == None or Mode.default_indentation_handler_sufficient():
        end_procedure += "    QUEX_NAME(on_indentation)(me, me->counter._indentation, reference_p);\n"
    else:
        # Definition of '%s_on_indentation' in mode_classes.py.
        end_procedure += "    QUEX_NAME(%s_on_indentation)(me, me->counter._indentation, reference_p);\n" \
                         % Mode.name

    # The finishing touch
    prolog = blue_print(prolog_txt,
                         [
                           ["$$DELIMITER_COMMENT$$",              comment_str],
                           ["$$INIT_REFERENCE_POINTER$$",         init_reference_p],
                           ["$$COUNTER_INDEX$$",                  repr(counter_index)],
                           ["$$INPUT_GET$$",                      LanguageDB["$input/get"]],
                         ])

    # The finishing touch
    epilog = blue_print(epilog_txt,
                      [
                       ["$$INPUT_P_INCREMENT$$",              LanguageDB["$input/increment"]],
                       ["$$INPUT_P_DECREMENT$$",              LanguageDB["$input/decrement"]],
                       ["$$IF_INPUT_EQUAL_DELIMITER_0$$",     LanguageDB["$if =="]("SkipDelimiter$$COUNTER_INDEX$$[0]")],
                       ["$$ENDIF$$",                          LanguageDB["$endif"]],
                       ["$$LOOP_REENTRANCE$$",                get_label("$entry",  counter_index)],
                       ["$$INPUT_EQUAL_BUFFER_LIMIT_CODE$$",  LanguageDB["$BLC"]],
                       ["$$RELOAD$$",                         get_label("$reload", counter_index)],
                       ["$$COUNTER_INDEX$$",                  repr(counter_index)],
                       ["$$GOTO_TERMINAL_EOF$$",              get_label("$terminal-EOF", U=True)],
                       # When things were skipped, no change to acceptance flags or modes has
                       # happend. One can jump immediately to the start without re-entry preparation.
                       ["$$GOTO_START$$",                     get_label("$start", U=True)], 
                       ["$$END_PROCEDURE$$",                  end_procedure],
                       ["$$BAD_CHARACTER_HANDLING$$",         get_bad_character_handler(Mode, IndentationSetup, counter_index)],
                      ])

    txt = [prolog]
    txt.extend(iteration_code)
    # txt.append(Address("$drop-out", counter_index))
    txt.append("\n")
    txt.append(epilog)

    return txt, local_variable_db
Пример #38
0
def get_transition_to_state(TargetInfo):
    LanguageDB = Setup.language_db
    return "goto %s;" % get_label("$entry", TargetInfo, U=True)
Пример #39
0
def get_skipper(TriggerSet):
    """This function implements simple 'skipping' in the sense of passing by
       characters that belong to a given set of characters--the TriggerSet.
    """
    global template_str
    assert TriggerSet.__class__.__name__ == "NumberSet"
    assert not TriggerSet.is_empty()

    LanguageDB = Setup.language_db

    skipper_index = sm_index.get()
    # Mini trigger map:  [ trigger set ] --> loop start
    # That means: As long as characters of the trigger set appear, we go to the loop start.
    transition_map = TransitionMap(
    )  # (don't worry about 'drop-out-ranges' etc.)
    transition_map.add_transition(TriggerSet, skipper_index)
    # On buffer limit code, the skipper must transit to a dedicated reloader

    iteration_code = transition_block.do(transition_map.get_trigger_map(),
                                         skipper_index,
                                         DSM=None,
                                         GotoReload_Str="goto %s;" %
                                         get_label("$reload", skipper_index))

    comment_str = LanguageDB["$comment"]("Skip any character in " +
                                         TriggerSet.get_utf8_string())

    # Line and column number counting
    prolog = __lc_counting_replacements(prolog_txt, TriggerSet)
    epilog = __lc_counting_replacements(epilog_txt, TriggerSet)

    prolog = blue_print(prolog, [
        ["$$DELIMITER_COMMENT$$", comment_str],
        ["$$SKIPPER_INDEX$$", "%i" % skipper_index],
        ["$$INPUT_GET$$", LanguageDB["$input/get"]],
    ])

    epilog = blue_print(
        epilog,
        [
            ["$$INPUT_P_INCREMENT$$", LanguageDB["$input/increment"]],
            ["$$INPUT_P_DECREMENT$$", LanguageDB["$input/decrement"]],
            [
                "$$IF_INPUT_EQUAL_DELIMITER_0$$",
                LanguageDB["$if =="]("SkipDelimiter$$SKIPPER_INDEX$$[0]")
            ],
            ["$$ENDIF$$", LanguageDB["$endif"]],
            ["$$LOOP_REENTRANCE$$",
             get_label("$entry", skipper_index)],
            ["$$INPUT_EQUAL_BUFFER_LIMIT_CODE$$", LanguageDB["$BLC"]],
            ["$$RELOAD$$", get_label("$reload", skipper_index)],
            [
                "$$DROP_OUT_DIRECT$$",
                get_label("$drop-out", skipper_index, U=True)
            ],
            ["$$SKIPPER_INDEX$$", "%i" % skipper_index],
            ["$$GOTO_TERMINAL_EOF$$",
             get_label("$terminal-EOF", U=True)],
            # When things were skipped, no change to acceptance flags or modes has
            # happend. One can jump immediately to the start without re-entry preparation.
            ["$$GOTO_START$$", get_label("$start", U=True)],
            ["$$MARK_LEXEME_START$$", LanguageDB["$mark-lexeme-start"]],
        ])

    code = [prolog]
    code.extend(iteration_code)
    code.append(epilog)

    local_variable_db = {}
    local_variable_db["QUEX_OPTION_COLUMN_NUMBER_COUNTING/reference_p"] = \
                     Variable("reference_p",
                              "QUEX_TYPE_CHARACTER_POSITION",
                              None,
                              "(QUEX_TYPE_CHARACTER_POSITION)0x0",
                              "QUEX_OPTION_COLUMN_NUMBER_COUNTING")

    return code, local_variable_db
Пример #40
0
def parse_mode_option(fh, new_mode):
    LanguageDB = Setup.language_db

    def fit_state_machine(SM):
        if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM)
        else:                         result = SM
        result = hopcroft.do(result, CreateNewStateMachineF=False)
        return result

    identifier = read_option_start(fh)
    if identifier == None: return False

    verify_word_in_list(identifier, lexer_mode.mode_option_info_db.keys(),
                        "mode option", fh.name, get_current_line_info_number(fh))

    if identifier == "skip":
        # A skipper 'eats' characters at the beginning of a pattern that belong
        # to a specified set of characters. A useful application is most probably
        # the whitespace skipper '[ \t\n]'. The skipper definition allows quex to
        # implement a very effective way to skip these regions.
        pattern_str, trigger_set = regular_expression.parse_character_set(fh, PatternStringF=True)
        skip_whitespace(fh)

        if fh.read(1) != ">":
            error_msg("missing closing '>' for mode option '%s'." % identifier, fh)

        if trigger_set.is_empty():
            error_msg("Empty trigger set for skipper." % identifier, fh)

        # TriggerSet skipping is implemented the following way: As soon as one element of the 
        # trigger set appears, the state machine enters the 'trigger set skipper section'.
        # Enter the skipper as if the opener pattern was a normal pattern and the 'skipper' is the action.
        # NOTE: The correspondent CodeFragment for skipping is created in 'implement_skippers(...)'
        pattern_sm  = StateMachine()
        pattern_sm.add_transition(pattern_sm.init_state_index, trigger_set, AcceptanceF=True)

        # Skipper code is to be generated later
        action = GeneratedCode(skip_character_set.do, 
                               FileName = fh.name, 
                               LineN    = get_current_line_info_number(fh))
        action.data["character_set"] = trigger_set

        pattern_sm = fit_state_machine(pattern_sm)
        # For skippers line and column counting detection is not really a topic
        # It is done in the skipper itself.
        pattern_sm.side_info = SideInfo()

        new_mode.add_match(pattern_str, action, pattern_sm)

        return True

    elif identifier in ["skip_range", "skip_nested_range"]:
        # A non-nesting skipper can contain a full fledged regular expression as opener,
        # since it only effects the trigger. Not so the nested range skipper-see below.

        # -- opener
        skip_whitespace(fh)
        if identifier == "skip_nested_range":
            # Nested range state machines only accept 'strings' not state machines
            opener_str, opener_sequence = parse_string_constant(fh, "Opener pattern for 'skip_nested_range'")
            
            opener_sm = StateMachine()
            idx = opener_sm.init_state_index
            for letter in opener_sequence:
                idx = opener_sm.add_transition(idx, letter)
            opener_sm.states[idx].set_acceptance(True)
        else:
            opener_str, opener_sm = regular_expression.parse(fh)
            # For 'range skipping' the opener sequence is not needed, only the opener state
            # machine is webbed into the pattern matching state machine.
            opener_sequence       = None

        skip_whitespace(fh)

        # -- closer
        closer_str, closer_sequence = parse_string_constant(fh, "Closing pattern for 'skip_range' or 'skip_nested_range'")
        skip_whitespace(fh)
        if fh.read(1) != ">":
            error_msg("missing closing '>' for mode option '%s'" % identifier, fh)

        # Skipper code is to be generated later
        generator_function = { 
                "skip_range":        skip_range.do,
                "skip_nested_range": skip_nested_range.do,
        }[identifier]
        action = GeneratedCode(generator_function,
                               FileName = fh.name, 
                               LineN    = get_current_line_info_number(fh))

        action.data["opener_sequence"] = opener_sequence
        action.data["closer_sequence"] = closer_sequence
        action.data["mode_name"]       = new_mode.name

        fit_state_machine(opener_sm)

        # For skippers line and column counting detection is not really a topic
        # It is done in the skipper itself.
        opener_sm.side_info = SideInfo()

        new_mode.add_match(opener_str, action, opener_sm)

        return True
        
    elif identifier == "indentation":
        value = indentation_setup.do(fh)

        # Enter 'Newline' and 'Suppressed Newline' as matches into the engine.
        # Similar to skippers, the indentation count is then triggered by the newline.
        # -- Suppressed Newline = Suppressor followed by Newline,
        #    then newline does not trigger indentation counting.
        suppressed_newline_pattern = ""
        if value.newline_suppressor_state_machine.get() != None:
            suppressed_newline_pattern = \
                  "(" + value.newline_suppressor_state_machine.pattern_str + ")" \
                + "(" + value.newline_state_machine.pattern_str + ")"
                                           
            suppressed_newline_sm = \
                sequentialize.do([value.newline_suppressor_state_machine.get(),
                                  value.newline_state_machine.get()])
                 
            FileName = value.newline_suppressor_state_machine.file_name
            LineN    = value.newline_suppressor_state_machine.line_n
            # Go back to start.
            code_fragment = UserCodeFragment("goto %s;" % get_label("$start", U=True), FileName, LineN)

            suppressed_newline_sm = fit_state_machine(suppressed_newline_sm)

            # Analyze pattern for constant number of newlines, characters, etc.
            suppressed_newline_sm.side_info = SideInfo(
                    character_counter.get_newline_n(suppressed_newline_sm),
                    character_counter.get_character_n(suppressed_newline_sm))

            new_mode.add_match(suppressed_newline_pattern, code_fragment, suppressed_newline_sm,
                               Comment="indentation newline suppressor")

        # When there is an empty line, then there shall be no indentation count on it.
        # Here comes the trick: 
        #
        #      Let               newline         
        #      be defined as:    newline ([space]* newline])*
        # 
        # This way empty lines are eating away before the indentation count is activated.

        # -- 'space'
        x0 = StateMachine()
        x0.add_transition(x0.init_state_index, value.indentation_count_character_set(), 
                          AcceptanceF=True)
        # -- '[space]*'
        x1 = repeat.do(x0)
        # -- '[space]* newline'
        x2 = sequentialize.do([x1, value.newline_state_machine.get()])
        # -- '([space]* newline)*'
        x3 = repeat.do(x2)
        # -- 'newline ([space]* newline)*'
        x4 = sequentialize.do([value.newline_state_machine.get(), x3])
        # -- nfa to dfa; hopcroft optimization
        sm = hopcroft.do(nfa_to_dfa.do(x4), CreateNewStateMachineF=False)

        FileName = value.newline_state_machine.file_name
        LineN    = value.newline_state_machine.line_n
        action   = GeneratedCode(indentation_counter.do, FileName, LineN)

        action.data["indentation_setup"] = value

        sm = fit_state_machine(sm)
        sm.side_info = SideInfo(character_counter.get_newline_n(sm),
                                character_counter.get_character_n(sm))
        new_mode.add_match(value.newline_state_machine.pattern_str,
                           action, sm, Comment="indentation newline")

        # Announce the mode to which the setup belongs
        value.set_containing_mode_name(new_mode.name)
    else:
        value = read_option_value(fh)

    # The 'verify_word_in_list()' call must have ensured that the following holds
    assert lexer_mode.mode_option_info_db.has_key(identifier)

    # Is the option of the appropriate value?
    option_info = lexer_mode.mode_option_info_db[identifier]
    if option_info.domain != None and value not in option_info.domain:
        error_msg("Tried to set value '%s' for option '%s'. " % (Value, Option) + \
                  "Though, possible for this option are only: %s." % repr(oi.domain)[1:-1], fh)

    # Finally, set the option
    new_mode.add_option(identifier, value)

    return True
Пример #41
0
def get_skipper(OpenerSequence,
                CloserSequence,
                Mode=None,
                IndentationCounterTerminalID=None,
                OnSkipRangeOpenStr=""):
    assert OpenerSequence.__class__ == list
    assert len(OpenerSequence) >= 1
    assert map(type, OpenerSequence) == [int] * len(OpenerSequence)
    assert CloserSequence.__class__ == list
    assert len(CloserSequence) >= 1
    assert map(type, CloserSequence) == [int] * len(CloserSequence)
    assert OpenerSequence != CloserSequence

    LanguageDB = Setup.language_db

    skipper_index = sm_index.get()

    opener_str, opener_length_str, opener_comment_str = get_character_sequence(
        OpenerSequence)
    closer_str, closer_length_str, closer_comment_str = get_character_sequence(
        CloserSequence)

    if not end_delimiter_is_subset_of_indentation_counter_newline(
            Mode, CloserSequence):
        goto_after_end_of_skipping_str = "goto %s;" % get_label("$start",
                                                                U=True)
    else:
        # If there is indentation counting involved, then the counter's terminal id must
        # be determined at this place.
        assert IndentationCounterTerminalID != None
        # If the ending delimiter is a subset of what the 'newline' pattern triggers
        # in indentation counting => move on to the indentation counter.
        goto_after_end_of_skipping_str = "goto %s;" % get_label(
            "$terminal-direct", IndentationCounterTerminalID, U=True)

    if OnSkipRangeOpenStr != "": on_skip_range_open_str = OnSkipRangeOpenStr
    else: on_skip_range_open_str = get_on_skip_range_open(Mode, CloserSequence)

    local_variable_db = {
        "counter":
        Variable("counter", "size_t", None, "0"),
        "QUEX_OPTION_COLUMN_NUMBER_COUNTING/reference_p":
        Variable("reference_p", "QUEX_TYPE_CHARACTER_POSITION", None,
                 "(QUEX_TYPE_CHARACTER_POSITION)0x0",
                 "QUEX_OPTION_COLUMN_NUMBER_COUNTING")
    }
    reference_p_def = "    __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n"

    reference_p_def = "    __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n"
    before_reload   = "    __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer)\n" + \
                      "                                - reference_p));\n"
    after_reload = "        __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n"

    if CloserSequence[-1] == ord('\n'):
        end_procedure = "       __QUEX_IF_COUNT_LINES_ADD((size_t)1);\n"
        end_procedure += "       __QUEX_IF_COUNT_COLUMNS_SET((size_t)1);\n"
    else:
        end_procedure = "        __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer)\n" + \
                        "                                    - reference_p));\n"

    code_str = blue_print(
        template_str,
        [
            ["$$SKIPPER_INDEX$$", __nice(skipper_index)],
            #
            ["$$OPENER$$", opener_str],
            ["$$OPENER_LENGTH$$", opener_length_str],
            ["$$OPENER_COMMENT$$", opener_comment_str],
            ["$$CLOSER$$", closer_str],
            ["$$CLOSER_LENGTH$$", closer_length_str],
            ["$$CLOSER_COMMENT$$", closer_comment_str],
            #
            ["$$INPUT_P_INCREMENT$$", LanguageDB["$input/increment"]],
            ["$$INPUT_P_DECREMENT$$", LanguageDB["$input/decrement"]],
            ["$$INPUT_GET$$", LanguageDB["$input/get"]],
            [
                "$$IF_INPUT_EQUAL_DELIMITER_0$$",
                LanguageDB["$if =="]("Skipper$$SKIPPER_INDEX$$[0]")
            ],
            ["$$ENDIF$$", LanguageDB["$endif"]],
            ["$$ENTRY$$", get_label("$entry", skipper_index)],
            ["$$RELOAD$$", get_label("$reload", skipper_index)],
            ["$$GOTO_AFTER_END_OF_SKIPPING$$", goto_after_end_of_skipping_str],
            ["$$GOTO_RELOAD$$",
             get_label("$reload", skipper_index)],
            # When things were skipped, no change to acceptance flags or modes has
            # happend. One can jump immediately to the start without re-entry preparation.
            ["$$GOTO_ENTRY$$",
             get_label("$entry", skipper_index)],
            ["$$MARK_LEXEME_START$$", LanguageDB["$mark-lexeme-start"]],
            ["$$ON_SKIP_RANGE_OPEN$$", on_skip_range_open_str],
            #
            ["$$LC_COUNT_COLUMN_N_POINTER_DEFINITION$$", reference_p_def],
            ["$$LC_COUNT_IN_LOOP$$", line_column_counter_in_loop],
            ["$$LC_COUNT_END_PROCEDURE$$", end_procedure],
            ["$$LC_COUNT_BEFORE_RELOAD$$", before_reload],
            ["$$LC_COUNT_AFTER_RELOAD$$", after_reload],
        ])

    return code_str, local_variable_db