def get_character_set_skipper(TriggerSet, LanguageDB): """This function implements simple 'skipping' in the sense of passing by characters that belong to a given set of characters--the TriggerSet. """ assert TriggerSet.__class__.__name__ == "NumberSet" assert not TriggerSet.is_empty() skipper_index = sm_index.get() # Mini trigger map: [ trigger set ] --> loop start # That means: As long as characters of the trigger set appear, we go to the loop start. transition_map = TransitionMap() transition_map.add_transition(TriggerSet, skipper_index) iteration_code = transition_block.do(transition_map.get_trigger_map(), skipper_index, InitStateF=False, DSM=None) comment_str = LanguageDB["$comment"]("Skip any character in " + TriggerSet.get_utf8_string()) # Line and column number counting code_str = __set_skipper_lc_counting_replacements( trigger_set_skipper_template, TriggerSet) # The finishing touch txt = blue_print(code_str, [ ["$$DELIMITER_COMMENT$$", comment_str], ["$$INPUT_P_INCREMENT$$", LanguageDB["$input/increment"]], ["$$INPUT_P_DECREMENT$$", LanguageDB["$input/decrement"]], ["$$INPUT_GET$$", LanguageDB["$input/get"]], [ "$$IF_INPUT_EQUAL_DELIMITER_0$$", LanguageDB["$if =="]("SkipDelimiter$$SKIPPER_INDEX$$[0]") ], ["$$ENDIF$$", LanguageDB["$endif"]], ["$$LOOP_START$$", LanguageDB["$label-def"]("$input", skipper_index)], ["$$GOTO_LOOP_START$$", LanguageDB["$goto"]("$input", skipper_index)], [ "$$LOOP_REENTRANCE$$", LanguageDB["$label-def"]("$entry", skipper_index) ], ["$$RESTART$$", LanguageDB["$label-def"]("$input", skipper_index)], ["$$DROP_OUT$$", LanguageDB["$label-def"]("$drop-out", skipper_index)], [ "$$DROP_OUT_DIRECT$$", LanguageDB["$label-def"]("$drop-out-direct", skipper_index) ], ["$$GOTO_LOOP_START$$", LanguageDB["$goto"]("$entry", skipper_index)], ["$$SKIPPER_INDEX$$", repr(skipper_index)], ["$$GOTO_TERMINAL_EOF$$", LanguageDB["$goto"]("$terminal-EOF")], ["$$GOTO_REENTRY_PREPARATION$$", LanguageDB["$goto"]("$re-start")], ["$$MARK_LEXEME_START$$", LanguageDB["$mark-lexeme-start"]], ["$$ON_TRIGGER_SET_TO_LOOP_START$$", iteration_code], ]) return blue_print(txt, [[ "$$GOTO_DROP_OUT$$", LanguageDB["$goto"]("$drop-out", skipper_index) ]])
def __cpp_terminal_states(StateMachineName, sm, action_db, DefaultAction): # -- specific terminal states of patterns (entered from acceptance states) txt = "" for state_machine_id in action_db.keys(): txt += " %s:\n" % get_label("", None, state_machine_id) action_code = " " + action_db[state_machine_id].replace("\n", "\n ") txt += " QUEX_STREAM_SEEK(last_acceptance_input_position);" txt += action_code + "\n" txt += " // if action code returns from the function, then the following is meaningless\n" if sm.states[sm.init_state_index].transitions().is_empty() == False: txt += " QUEX_STREAM_GET(input);" txt += " goto QUEX_LABEL_%s_ENTRY_INITIAL_STATE;\n" % StateMachineName specific_terminal_states_str = txt # -- general terminal state (entered from non-acceptance state) txt = "" for state_machine_id in action_db.keys(): txt += " case %s: goto %s;\n" % \ (repr(state_machine_id), get_label("", None, state_machine_id)) jumps_to_acceptance_states_str = txt # -- execute default pattern action # -- reset character stream to last success # -- goto initial state txt = blue_print(__cpp_terminal_state_str, [["$$JUMPS_TO_ACCEPTANCE_STATE$$", jumps_to_acceptance_states_str], ["$$SPECIFIC_TERMINAL_STATES$$", specific_terminal_states_str], ["$$DEFAULT_ACTION$$", DefaultAction.replace("\n", " \n")], ["$$STATE_MACHINE_NAME$$", StateMachineName], ["$$INITIAL_STATE_INDEX_LABEL$$", get_label(StateMachineName, sm.init_state_index)]]) return txt
def __get_mode_init_call(mode, LexerClassName): header_str = "%s_%s_" % (LexerClassName, mode.name) analyser_function = header_str + "analyser_function" on_indentation = header_str + "on_indentation" on_entry = header_str + "on_entry" on_exit = header_str + "on_exit" has_base = header_str + "has_base" has_entry_from = header_str + "has_entry_from" has_exit_to = header_str + "has_exit_to" if mode.options["inheritable"] == "only": analyser_function = "QuexMode_uncallable_analyser_function" if mode.on_entry_code_fragments() == []: on_entry = "QuexMode_on_entry_exit_null_function" if mode.on_exit_code_fragments() == []: on_exit = "QuexMode_on_entry_exit_null_function" if mode.on_indentation_code_fragments() == []: on_indentation = "QuexMode_on_indentation_null_function" txt = blue_print( quex_mode_init_call_str, [["$$MN$$", mode.name], ["$analyser_function", analyser_function], ["$on_indentation", on_indentation], ["$on_entry", on_entry], ["$on_exit", on_exit], ["$has_base", has_base], ["$has_entry_from", has_entry_from], ["$has_exit_to", has_exit_to]]) return txt
def do(sm, LanguageDB, PrintStateMachineF): decorated_state_machine = StateMachineDecorator(sm, "BACKWARD_DETECTOR_" + repr(sm.get_id()), PostContextSM_ID_List = [], BackwardLexingF=True, BackwardInputPositionDetectionF=True) function_body = state_machine_coder.do(decorated_state_machine) sm_str = " " + LanguageDB["$comment"]("state machine") + "\n" if PrintStateMachineF: sm_str += LanguageDB["$ml-comment"](sm.get_string(NormalizeF=False)) + "\n" # -- input position detectors simply the next 'catch' and return function_body += LanguageDB["$label-def"]("$terminal-general", True) + "\n" function_body += LanguageDB["$input/seek_position"]("end_of_core_pattern_position") + "\n" function_body += LanguageDB["$input/increment"] + "\n" variables_txt = LanguageDB["$local-variable-defs"]( [["QUEX_CHARACTER_TYPE", "input", "(QUEX_CHARACTER_TYPE)(0x0)"], ["QUEX_CHARACTER_POSITION_TYPE", "end_of_core_pattern_position", "(QUEX_CHARACTER_TYPE*)(0x0)"]]) return blue_print(function_str, [["$$ID$$", repr(sm.get_id()).replace("L", "")], ["$$FUNCTION_BODY$$", function_body], ["$$LOCAL_VARIABLES$$", variables_txt], ["$$STATE_MACHINE$$", sm_str], ])
def __get_mode_init_call(mode): analyzer_function = "QUEX_NAME(%s_analyzer_function)" % mode.name on_indentation = "QUEX_NAME(%s_on_indentation)" % mode.name on_entry = "QUEX_NAME(%s_on_entry)" % mode.name on_exit = "QUEX_NAME(%s_on_exit)" % mode.name has_base = "QUEX_NAME(%s_has_base)" % mode.name has_entry_from = "QUEX_NAME(%s_has_entry_from)" % mode.name has_exit_to = "QUEX_NAME(%s_has_exit_to)" % mode.name if mode.options["inheritable"] == "only": analyzer_function = "QUEX_NAME(Mode_uncallable_analyzer_function)" if mode.get_code_fragment_list("on_entry") == []: on_entry = "QUEX_NAME(Mode_on_entry_exit_null_function)" if mode.get_code_fragment_list("on_exit") == []: on_exit = "QUEX_NAME(Mode_on_entry_exit_null_function)" if mode.get_code_fragment_list("on_indentation") == []: on_indentation = "QUEX_NAME(Mode_on_indentation_null_function)" txt = blue_print(quex_mode_init_call_str, [["$$MN$$", mode.name], ["$analyzer_function", analyzer_function], ["$on_indentation", on_indentation], ["$on_entry", on_entry], ["$on_exit", on_exit], ["$has_base", has_base], ["$has_entry_from", has_entry_from], ["$has_exit_to", has_exit_to]]) return txt
def __lc_counting_replacements(code_str, CharacterSet): """Line and Column Number Counting(Range Skipper): -- in loop if there appears a newline, then do: increment line_n set position from where to count column_n -- at end of skipping do one of the following: if end delimiter contains newline: column_n = number of letters since last new line in end delimiter increment line_n by number of newlines in end delimiter. (NOTE: in this case the setting of the position from where to count the column_n can be omitted.) else: column_n = current_position - position from where to count column number. NOTE: On reload we do count the column numbers and reset the column_p. """ variable_definition = " __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n" in_loop = "" # Does the end delimiter contain a newline? if CharacterSet.contains(ord("\n")): in_loop = line_column_counter_in_loop end_procedure = " __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(me->buffer._input_p - reference_p));\n" before_reload = " __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(me->buffer._input_p - reference_p));\n" after_reload = " __QUEX_IF_COUNT_COLUMNS(reference_p = me->buffer._input_p);\n" return blue_print(code_str, [ ["$$LC_COUNT_COLUMN_N_POINTER_DEFINITION$$", variable_definition], ["$$LC_COUNT_IN_LOOP$$", in_loop], ["$$LC_COUNT_END_PROCEDURE$$", end_procedure], ["$$LC_COUNT_BEFORE_RELOAD$$", before_reload], ["$$LC_COUNT_AFTER_RELOAD$$", after_reload], ])
def get_graphviz_string(self, NormalizeF=False): # (*) normalize the state indices index_map, inverse_index_map, index_sequence = self.__get_state_index_normalization(NormalizeF) # (*) Border of plot block frame_txt = """ digraph state_machine_%i { rankdir=LR; size="8,5" node [shape = doublecircle]; $$ACCEPTANCE_STATES$$; node [shape = circle]; $$TRANSITIONS$$ } """ % self.get_id() transition_str = "" acceptance_state_str = "" for state_i in index_sequence: printed_state_i = index_map[state_i] state = self.states[state_i] if state.is_acceptance(): acceptance_state_str += "%i; " % int(printed_state_i) transition_str += state.get_graphviz_string(printed_state_i, index_map) if acceptance_state_str != "": acceptance_state_str = acceptance_state_str[:-2] return blue_print(frame_txt, [["$$ACCEPTANCE_STATES$$", acceptance_state_str], ["$$TRANSITIONS$$", transition_str]])
def __lc_counting_replacements(code_str, CharacterSet): """Line and Column Number Counting(Range Skipper): -- in loop if there appears a newline, then do: increment line_n set position from where to count column_n -- at end of skipping do one of the following: if end delimiter contains newline: column_n = number of letters since last new line in end delimiter increment line_n by number of newlines in end delimiter. (NOTE: in this case the setting of the position from where to count the column_n can be omitted.) else: column_n = current_position - position from where to count column number. NOTE: On reload we do count the column numbers and reset the column_p. """ variable_definition = " __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n" in_loop = "" # Does the end delimiter contain a newline? if CharacterSet.contains(ord("\n")): in_loop = line_column_counter_in_loop end_procedure = " __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(me->buffer._input_p - reference_p));\n" before_reload = " __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(me->buffer._input_p - reference_p));\n" after_reload = " __QUEX_IF_COUNT_COLUMNS(reference_p = me->buffer._input_p);\n" return blue_print(code_str, [["$$LC_COUNT_COLUMN_N_POINTER_DEFINITION$$", variable_definition], ["$$LC_COUNT_IN_LOOP$$", in_loop], ["$$LC_COUNT_END_PROCEDURE$$", end_procedure], ["$$LC_COUNT_BEFORE_RELOAD$$", before_reload], ["$$LC_COUNT_AFTER_RELOAD$$", after_reload], ])
def do_map_id_to_name_function(): L = max(map(lambda name: len(name), token_id_db.keys())) def space(Name): return " " * (L - len(Name)) # -- define the function for token names switch_cases = [] token_names = [] for token_name in sorted(token_id_db.keys()): if token_name in standard_token_id_list: continue # UCS codepoints are coded directly as pure numbers if len(token_name) > 2 and token_name[:2] == "--": token = token_id_db[token_name] switch_cases.append(" case 0x%06X: return token_id_str_%s;\n" % \ (token.number, token.name)) token_names.append(" static const char token_id_str_%s[]%s = \"%s\";\n" % \ (token.name, space(token.name), token.name)) else: switch_cases.append(" case %s%s:%s return token_id_str_%s;\n" % \ (Setup.token_id_prefix, token_name, space(token_name), token_name)) token_names.append(" static const char token_id_str_%s[]%s = \"%s\";\n" % \ (token_name, space(token_name), token_name)) return blue_print(func_str, [["$$TOKEN_ID_CASES$$", "".join(switch_cases)], ["$$TOKEN_NAMES$$", "".join(token_names)], ])
def write_mode_class_implementation(Modes, Setup): LexerClassName = Setup.output_engine_name TokenClassName = Setup.input_token_class_name OutputFilestem = Setup.output_file_stem DerivedClassName = Setup.input_derived_class_name DerivedClassHeaderFileName = Setup.input_derived_class_file ModeClassImplementationFile = Setup.output_code_file if DerivedClassHeaderFileName != "": txt = "#include<" + DerivedClassHeaderFileName + ">\n" else: txt = "#include\"" + OutputFilestem + "\"\n" # -- mode class member function definitions (on_entry, on_exit, has_base, ...) mode_class_member_functions_txt = mode_classes.do(Modes.values()) mode_objects_txt = "" for mode_name in Modes: mode_objects_txt += " QuexMode $$LEXER_CLASS_NAME$$::%s;\n" % mode_name txt += "namespace quex {\n" txt += mode_objects_txt txt += mode_class_member_functions_txt txt += "} // END: namespace quex\n" txt = blue_print(txt, [["$$LEXER_CLASS_NAME$$", LexerClassName], ["$$TOKEN_CLASS$$", TokenClassName], ["$$LEXER_DERIVED_CLASS_NAME$$", DerivedClassName]]) fh_out = open(ModeClassImplementationFile, "wb") if os.linesep != "\n": txt = txt.replace("\n", os.linesep) fh_out.write(txt) fh_out.close()
def write_mode_class_implementation(Modes, Setup): LexerClassName = Setup.output_engine_name TokenClassName = Setup.input_token_class_name OutputFilestem = Setup.output_file_stem DerivedClassName = Setup.input_derived_class_name DerivedClassHeaderFileName = Setup.input_derived_class_file ModeClassImplementationFile = Setup.output_code_file if DerivedClassHeaderFileName != "": txt = "#include<" + DerivedClassHeaderFileName +">\n" else: txt = "#include\"" + OutputFilestem +"\"\n" # -- mode class member function definitions (on_entry, on_exit, has_base, ...) mode_class_member_functions_txt = mode_classes.do(Modes.values()) mode_objects_txt = "" for mode_name in Modes: mode_objects_txt += " QuexMode $$LEXER_CLASS_NAME$$::%s;\n" % mode_name txt += "namespace quex {\n" txt += mode_objects_txt txt += mode_class_member_functions_txt txt += "} // END: namespace quex\n" txt = blue_print(txt, [["$$LEXER_CLASS_NAME$$", LexerClassName], ["$$TOKEN_CLASS$$", TokenClassName], ["$$LEXER_DERIVED_CLASS_NAME$$", DerivedClassName]]) fh_out = open(ModeClassImplementationFile, "wb") if os.linesep != "\n": txt = txt.replace("\n", os.linesep) fh_out.write(txt) fh_out.close()
def do(Modes): LexerClassName = Setup.analyzer_class_name TokenClassName = Setup.token_class_name DerivedClassName = Setup.analyzer_derived_class_name DerivedClassHeaderFileName = Setup.analyzer_derived_class_file if DerivedClassHeaderFileName != "": txt = "#include <" + get_file_reference( DerivedClassHeaderFileName) + ">\n" else: txt = "#include \"" + get_file_reference( Setup.output_header_file) + "\"\n" txt += "#include <quex/code_base/analyzer/C-adaptions.h>\n" # -- mode class member function definitions (on_entry, on_exit, has_base, ...) mode_class_member_functions_txt = write_member_functions(Modes.values()) mode_objects_txt = "" for mode_name, mode in Modes.items(): if mode.options["inheritable"] == "only": continue mode_objects_txt += "/* Global */QUEX_NAME(Mode) QUEX_NAME(%s);\n" % mode_name txt += "QUEX_NAMESPACE_MAIN_OPEN\n" txt += mode_objects_txt txt += mode_class_member_functions_txt txt += "QUEX_NAMESPACE_MAIN_CLOSE\n" txt = blue_print(txt, [["$$LEXER_CLASS_NAME$$", LexerClassName], ["$$LEXER_DERIVED_CLASS_NAME$$", DerivedClassName]]) return txt
def get_graphviz_string(self, NormalizeF=False): # (*) normalize the state indices index_map, inverse_index_map, index_sequence = self.__get_state_index_normalization( NormalizeF) # (*) Border of plot block frame_txt = """ digraph state_machine_%i { rankdir=LR; size="8,5" node [shape = doublecircle]; $$ACCEPTANCE_STATES$$; node [shape = circle]; $$TRANSITIONS$$ } """ % self.get_id() transition_str = "" acceptance_state_str = "" for state_i in index_sequence: printed_state_i = index_map[state_i] state = self.states[state_i] if state.is_acceptance(): acceptance_state_str += "%i; " % int(printed_state_i) transition_str += state.get_graphviz_string( printed_state_i, index_map) if acceptance_state_str != "": acceptance_state_str = acceptance_state_str[:-2] return blue_print(frame_txt, [["$$ACCEPTANCE_STATES$$", acceptance_state_str], ["$$TRANSITIONS$$", transition_str]])
def __get_mode_init_call(mode, LexerClassName): header_str = "%s_%s_" % (LexerClassName, mode.name) analyser_function = header_str + "analyser_function" on_indentation = header_str + "on_indentation" on_entry = header_str + "on_entry" on_exit = header_str + "on_exit" has_base = header_str + "has_base" has_entry_from = header_str + "has_entry_from" has_exit_to = header_str + "has_exit_to" if mode.options["inheritable"] == "only": analyser_function = "QuexMode_uncallable_analyser_function" if mode.on_entry_code_fragments() == []: on_entry = "QuexMode_on_entry_exit_null_function" if mode.on_exit_code_fragments() == []: on_exit = "QuexMode_on_entry_exit_null_function" if mode.on_indentation_code_fragments() == []: on_indentation = "QuexMode_on_indentation_null_function" txt = blue_print(quex_mode_init_call_str, [["$$MN$$", mode.name], ["$analyser_function", analyser_function], ["$on_indentation", on_indentation], ["$on_entry", on_entry], ["$on_exit", on_exit], ["$has_base", has_base], ["$has_entry_from", has_entry_from], ["$has_exit_to", has_exit_to]]) return txt
def do(Modes): LexerClassName = Setup.analyzer_class_name TokenClassName = Setup.token_class_name DerivedClassName = Setup.analyzer_derived_class_name DerivedClassHeaderFileName = Setup.analyzer_derived_class_file if DerivedClassHeaderFileName != "": txt = "#include <" + get_file_reference(DerivedClassHeaderFileName) +">\n" else: txt = "#include \"" + get_file_reference(Setup.output_header_file) +"\"\n" txt += "#include <quex/code_base/analyzer/C-adaptions.h>\n" # -- mode class member function definitions (on_entry, on_exit, has_base, ...) mode_class_member_functions_txt = write_member_functions(Modes.values()) mode_objects_txt = "" for mode_name, mode in Modes.items(): if mode.options["inheritable"] == "only": continue mode_objects_txt += "/* Global */QUEX_NAME(Mode) QUEX_NAME(%s);\n" % mode_name txt += "QUEX_NAMESPACE_MAIN_OPEN\n" txt += mode_objects_txt txt += mode_class_member_functions_txt txt += "QUEX_NAMESPACE_MAIN_CLOSE\n" txt = blue_print(txt, [["$$LEXER_CLASS_NAME$$", LexerClassName], ["$$LEXER_DERIVED_CLASS_NAME$$", DerivedClassName]]) return txt
def get_on_indentation_handler(Mode): # 'on_dedent' and 'on_n_dedent cannot be defined at the same time. assert not ( Mode.has_code_fragment_list("on_dedent") \ and Mode.has_code_fragment_list("on_n_dedent")) # A mode that deals only with the default indentation handler relies # on what is defined in '$QUEX_PATH/analayzer/member/on_indentation.i' if Mode.default_indentation_handler_sufficient(): return " return;" if Mode.has_code_fragment_list("on_indent"): on_indent_str, eol_f = action_code_formatter.get_code(Mode.get_code_fragment_list("on_indent")) else: on_indent_str = "self_send(__QUEX_SETTING_TOKEN_ID_INDENT);" if Mode.has_code_fragment_list("on_nodent"): on_nodent_str, eol_f = action_code_formatter.get_code(Mode.get_code_fragment_list("on_nodent")) else: on_nodent_str = "self_send(__QUEX_SETTING_TOKEN_ID_NODENT);" if Mode.has_code_fragment_list("on_dedent"): assert not Mode.has_code_fragment_list("on_n_dedent") on_dedent_str, eol_f = action_code_formatter.get_code(Mode.get_code_fragment_list("on_dedent")) on_n_dedent_str = "" elif Mode.has_code_fragment_list("on_n_dedent"): assert not Mode.has_code_fragment_list("on_dedent") on_n_dedent_str, eol_f = action_code_formatter.get_code(Mode.get_code_fragment_list("on_n_dedent")) on_dedent_str = "" else: # If no 'on_dedent' and no 'on_n_dedent' is defined ... on_dedent_str = "" on_n_dedent_str = "#if defined(QUEX_OPTION_TOKEN_REPETITION_SUPPORT)\n" on_n_dedent_str += " self_send_n(ClosedN, __QUEX_SETTING_TOKEN_ID_DEDENT);\n" on_n_dedent_str += "#else\n" on_n_dedent_str += " while( start-- != stack->back ) self_send(__QUEX_SETTING_TOKEN_ID_DEDENT);\n" on_n_dedent_str += "#endif\n" if not Mode.has_code_fragment_list("on_indentation_error"): # Default: Blow the program if there is an indentation error. on_indentation_error = 'QUEX_ERROR_EXIT("Lexical analyzer mode \'%s\': indentation error detected!\\n"' \ % Mode.name + \ ' "No \'on_indentation_error\' handler has been specified.\\n");' else: on_indentation_error, eol_f = action_code_formatter.get_code(Mode.get_code_fragment_list("on_indentation_error")) # Note: 'on_indentation_bad' is applied in code generation for # indentation counter in 'indentation_counter.py'. txt = blue_print(on_indentation_str, [["$$INDENT-PROCEDURE$$", on_indent_str], ["$$NODENT-PROCEDURE$$", on_nodent_str], ["$$DEDENT-PROCEDURE$$", on_dedent_str], ["$$N-DEDENT-PROCEDURE$$", on_n_dedent_str], ["$$INDENTATION-ERROR-PROCEDURE$$", on_indentation_error]]) return txt
def __reload_definitions(InitialStateIndex): txt = [] txt.append(Address("$reload-FORWARD", None, reload_forward_str)) txt.append(blue_print(reload_init_state_forward_str, [["$$INIT_STATE$$", get_label("$entry", InitialStateIndex, U=True)], ["$$END_OF_STREAM$$", get_label("$terminal-EOF", U=True)]])) # Append empty references to make sure that the addresses are implemented. txt.append(Address("$reload-BACKWARD", None, reload_backward_str)) return txt
def get_character_set_skipper(TriggerSet, LanguageDB): """This function implements simple 'skipping' in the sense of passing by characters that belong to a given set of characters--the TriggerSet. """ assert TriggerSet.__class__.__name__ == "NumberSet" assert not TriggerSet.is_empty() skipper_index = sm_index.get() # Mini trigger map: [ trigger set ] --> loop start # That means: As long as characters of the trigger set appear, we go to the loop start. transition_map = TransitionMap() transition_map.add_transition(TriggerSet, skipper_index) iteration_code = transition_block.do(transition_map.get_trigger_map(), skipper_index, InitStateF=False, DSM=None) comment_str = LanguageDB["$comment"]("Skip any character in " + TriggerSet.get_utf8_string()) # Line and column number counting code_str = __set_skipper_lc_counting_replacements(trigger_set_skipper_template, TriggerSet) # The finishing touch txt = blue_print(code_str, [ ["$$DELIMITER_COMMENT$$", comment_str], ["$$INPUT_P_INCREMENT$$", LanguageDB["$input/increment"]], ["$$INPUT_P_DECREMENT$$", LanguageDB["$input/decrement"]], ["$$INPUT_GET$$", LanguageDB["$input/get"]], ["$$IF_INPUT_EQUAL_DELIMITER_0$$", LanguageDB["$if =="]("SkipDelimiter$$SKIPPER_INDEX$$[0]")], ["$$ENDIF$$", LanguageDB["$endif"]], ["$$LOOP_START$$", LanguageDB["$label-def"]("$input", skipper_index)], ["$$GOTO_LOOP_START$$", LanguageDB["$goto"]("$input", skipper_index)], ["$$LOOP_REENTRANCE$$", LanguageDB["$label-def"]("$entry", skipper_index)], ["$$RESTART$$", LanguageDB["$label-def"]("$input", skipper_index)], ["$$DROP_OUT$$", LanguageDB["$label-def"]("$drop-out", skipper_index)], ["$$DROP_OUT_DIRECT$$", LanguageDB["$label-def"]("$drop-out-direct", skipper_index)], ["$$GOTO_LOOP_START$$", LanguageDB["$goto"]("$entry", skipper_index)], ["$$SKIPPER_INDEX$$", repr(skipper_index)], ["$$GOTO_TERMINAL_EOF$$", LanguageDB["$goto"]("$terminal-EOF")], ["$$GOTO_REENTRY_PREPARATION$$", LanguageDB["$goto"]("$re-start")], ["$$MARK_LEXEME_START$$", LanguageDB["$mark-lexeme-start"]], ["$$ON_TRIGGER_SET_TO_LOOP_START$$", iteration_code], ]) return blue_print(txt, [["$$GOTO_DROP_OUT$$", LanguageDB["$goto"]("$drop-out", skipper_index)]])
def __reload_definitions(InitialStateIndex): txt = [] txt.append(Address("$reload-FORWARD", None, reload_forward_str)) txt.append( blue_print(reload_init_state_forward_str, [[ "$$INIT_STATE$$", get_label("$entry", InitialStateIndex, U=True) ], ["$$END_OF_STREAM$$", get_label("$terminal-EOF", U=True)]])) # Append empty references to make sure that the addresses are implemented. txt.append(Address("$reload-BACKWARD", None, reload_backward_str)) return txt
def replace_keywords(program_txt, LanguageDB, NoIndentF): """Replaces pseudo-code keywords with keywords of the given language.""" txt = blue_print(program_txt, LanguageDB.items()) if NoIndentF == False: # delete the last newline, to prevent additional indentation if txt[-1] == "\n": txt = txt[:-1] # indent by four spaces # (if this happens in recursively called functions nested indented blocks # are correctly indented, see NumberSet::get_condition_code() for example) txt = txt.replace("\n", "\n ") + "\n" return txt
def __set_skipper_lc_counting_replacements(code_str, CharacterSet): """Line and Column Number Counting(Range Skipper): -- in loop if there appears a newline, then do: increment line_n set position from where to count column_n -- at end of skipping do one of the following: if end delimiter contains newline: column_n = number of letters since last new line in end delimiter increment line_n by number of newlines in end delimiter. (NOTE: in this case the setting of the position from where to count the column_n can be omitted.) else: column_n = current_position - position from where to count column number. NOTE: On reload we do count the column numbers and reset the column_p. """ variable_definition = \ "# if defined(QUEX_OPTION_LINE_NUMBER_COUNTING) || defined(QUEX_OPTION_COLUMN_NUMBER_COUNTING)\n" + \ "# ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \ " QUEX_CHARACTER_POSITION_TYPE column_count_p_$$SKIPPER_INDEX$$ = QuexBuffer_tell_memory_adr(&me->buffer);\n"+\ "# endif\n" + \ "# endif\n" in_loop = "" end_procedure = "" exit_loop = "" # Does the end delimiter contain a newline? if CharacterSet.contains(ord("\n")): in_loop = lc_counter_in_loop end_procedure = "# ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \ " self.counter._column_number_at_end += QuexBuffer_tell_memory_adr(&me->buffer)\n" + \ " - column_count_p_$$SKIPPER_INDEX$$;\n" + \ "# endif\n" before_reload = "# ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \ " self.counter._column_number_at_end += QuexBuffer_tell_memory_adr(&me->buffer)\n" + \ " - column_count_p_$$SKIPPER_INDEX$$;\n" + \ "# endif\n" after_reload = "# ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \ " column_count_p_$$SKIPPER_INDEX$$ = QuexBuffer_tell_memory_adr(&me->buffer);\n" + \ "# endif\n" return blue_print(code_str, [ ["$$LC_COUNT_COLUMN_N_POINTER_DEFINITION$$", variable_definition], ["$$LC_COUNT_IN_LOOP$$", in_loop], ["$$LC_COUNT_END_PROCEDURE$$", end_procedure], ["$$LC_COUNT_BEFORE_RELOAD$$", before_reload], ["$$LC_COUNT_AFTER_RELOAD$$", after_reload], ])
def __set_skipper_lc_counting_replacements(code_str, CharacterSet): """Line and Column Number Counting(Range Skipper): -- in loop if there appears a newline, then do: increment line_n set position from where to count column_n -- at end of skipping do one of the following: if end delimiter contains newline: column_n = number of letters since last new line in end delimiter increment line_n by number of newlines in end delimiter. (NOTE: in this case the setting of the position from where to count the column_n can be omitted.) else: column_n = current_position - position from where to count column number. NOTE: On reload we do count the column numbers and reset the column_p. """ variable_definition = \ "# if defined(QUEX_OPTION_LINE_NUMBER_COUNTING) || defined(QUEX_OPTION_COLUMN_NUMBER_COUNTING)\n" + \ "# ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \ " QUEX_CHARACTER_POSITION_TYPE column_count_p_$$SKIPPER_INDEX$$ = QuexBuffer_tell_memory_adr(&me->buffer);\n"+\ "# endif\n" + \ "# endif\n" in_loop = "" end_procedure = "" exit_loop = "" # Does the end delimiter contain a newline? if CharacterSet.contains(ord("\n")): in_loop = lc_counter_in_loop end_procedure = "# ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \ " self.counter._column_number_at_end += QuexBuffer_tell_memory_adr(&me->buffer)\n" + \ " - column_count_p_$$SKIPPER_INDEX$$;\n" + \ "# endif\n" before_reload = "# ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \ " self.counter._column_number_at_end += QuexBuffer_tell_memory_adr(&me->buffer)\n" + \ " - column_count_p_$$SKIPPER_INDEX$$;\n" + \ "# endif\n" after_reload = "# ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \ " column_count_p_$$SKIPPER_INDEX$$ = QuexBuffer_tell_memory_adr(&me->buffer);\n" + \ "# endif\n" return blue_print(code_str, [["$$LC_COUNT_COLUMN_N_POINTER_DEFINITION$$", variable_definition], ["$$LC_COUNT_IN_LOOP$$", in_loop], ["$$LC_COUNT_END_PROCEDURE$$", end_procedure], ["$$LC_COUNT_BEFORE_RELOAD$$", before_reload], ["$$LC_COUNT_AFTER_RELOAD$$", after_reload], ])
def write_constructor_and_memento_functions(ModeDB): FileTemplate = os.path.normpath(QUEX_PATH + Setup.language_db["$code_base"] + "/analyzer/TXT-Cpp.i") func_txt = get_file_content_or_die(FileTemplate) func_txt = blue_print(func_txt, [ ["$$CONSTRUCTOR_EXTENSTION$$", lexer_mode.class_constructor_extension.get_code()], ["$$CONVERTER_HELPER_I$$", get_file_reference(Setup.output_buffer_codec_header_i)], ["$$CONSTRUCTOR_MODE_DB_INITIALIZATION_CODE$$", get_constructor_code(ModeDB.values())], ["$$MEMENTO_EXTENSIONS_PACK$$", lexer_mode.memento_pack_extension.get_code()], ["$$MEMENTO_EXTENSIONS_UNPACK$$", lexer_mode.memento_unpack_extension.get_code()], ]) return func_txt
def delete_unused_labels(Code): LanguageDB = Setup.language_db label_list = languages.label_db_get_unused_label_list() replacement_list_db = {} for label in label_list: original = LanguageDB["$label-pure"](label) replacement = LanguageDB["$ml-comment"](original) first_letter = original[0] if replacement_list_db.has_key(first_letter) == False: replacement_list_db[first_letter] = [[original, replacement]] else: replacement_list_db[first_letter].append([original, replacement]) code = Code for first_letter, replacement_list in replacement_list_db.items(): code = blue_print(code, replacement_list, first_letter) return code
def _do(UnicodeTrafoInfo, CodecName): """ PURPOSE: Writes converters for conversion towards UTF8/UTF16/UCS2/UCS4. UnicodeTrafoInfo: Provides the information about the relation of character codes in a particular coding to unicode character codes. It is provided in the following form: # Codec Values Unicode Values [ (Source0_Begin, Source0_End, TargetInterval0_Begin), (Source1_Begin, Source1_End, TargetInterval1_Begin), (Source2_Begin, Source2_End, TargetInterval2_Begin), ... ] """ codec_name = make_safe_identifier(CodecName).lower() utf8_epilog, utf8_function_body = ConverterWriterUTF8().do(UnicodeTrafoInfo) utf16_prolog, utf16_function_body = ConverterWriterUTF16().do(UnicodeTrafoInfo) dummy, utf32_function_body = ConverterWriterUTF32().do(UnicodeTrafoInfo) # Provide only the constant which are necessary FileName = os.path.normpath( QUEX_PATH + Setup.language_db["$code_base"] + "/converter_helper/TXT-Codec.i") txt_i = blue_print(get_file_content_or_die(FileName), [["$$CODEC$$", codec_name], ["$$EPILOG$$", utf8_epilog], ["$$BODY_UTF8$$", utf8_function_body], ["$$BODY_UTF16$$", utf16_function_body], ["$$BODY_UTF32$$", utf32_function_body]]) # A separate declaration header is required FileName = os.path.normpath( QUEX_PATH + Setup.language_db["$code_base"] + "/converter_helper/TXT-Codec") template_h_txt = get_file_content_or_die(FileName) txt_h = template_h_txt.replace("$$CODEC$$", codec_name) return txt_h, txt_i
def __cpp_terminal_states(StateMachineName, sm, action_db, DefaultAction): # -- specific terminal states of patterns (entered from acceptance states) txt = "" for state_machine_id in action_db.keys(): txt += " %s:\n" % get_label("", None, state_machine_id) action_code = " " + action_db[state_machine_id].replace( "\n", "\n ") txt += " QUEX_STREAM_SEEK(last_acceptance_input_position);" txt += action_code + "\n" txt += " // if action code returns from the function, then the following is meaningless\n" if sm.states[sm.init_state_index].transitions().is_empty() == False: txt += " QUEX_STREAM_GET(input);" txt += " goto QUEX_LABEL_%s_ENTRY_INITIAL_STATE;\n" % StateMachineName specific_terminal_states_str = txt # -- general terminal state (entered from non-acceptance state) txt = "" for state_machine_id in action_db.keys(): txt += " case %s: goto %s;\n" % \ (repr(state_machine_id), get_label("", None, state_machine_id)) jumps_to_acceptance_states_str = txt # -- execute default pattern action # -- reset character stream to last success # -- goto initial state txt = blue_print( __cpp_terminal_state_str, [["$$JUMPS_TO_ACCEPTANCE_STATE$$", jumps_to_acceptance_states_str], ["$$SPECIFIC_TERMINAL_STATES$$", specific_terminal_states_str], ["$$DEFAULT_ACTION$$", DefaultAction.replace("\n", " \n")], ["$$STATE_MACHINE_NAME$$", StateMachineName], [ "$$INITIAL_STATE_INDEX_LABEL$$", get_label(StateMachineName, sm.init_state_index) ]]) return txt
def _do(UnicodeTrafoInfo, CodecName): """ PURPOSE: Writes converters for conversion towards UTF8/UTF16/UCS2/UCS4. UnicodeTrafoInfo: Provides the information about the relation of character codes in a particular coding to unicode character codes. It is provided in the following form: # Codec Values Unicode Values [ (Source0_Begin, Source0_End, TargetInterval0_Begin), (Source1_Begin, Source1_End, TargetInterval1_Begin), (Source2_Begin, Source2_End, TargetInterval2_Begin), ... ] """ codec_name = make_safe_identifier(CodecName).lower() utf8_epilog, utf8_function_body = ConverterWriterUTF8().do( UnicodeTrafoInfo) utf16_prolog, utf16_function_body = ConverterWriterUTF16().do( UnicodeTrafoInfo) dummy, utf32_function_body = ConverterWriterUTF32().do(UnicodeTrafoInfo) # Provide only the constant which are necessary FileName = os.path.normpath(QUEX_PATH + Setup.language_db["$code_base"] + "/converter_helper/TXT-Codec.i") txt_i = blue_print(get_file_content_or_die(FileName), [["$$CODEC$$", codec_name], ["$$EPILOG$$", utf8_epilog], ["$$BODY_UTF8$$", utf8_function_body], ["$$BODY_UTF16$$", utf16_function_body], ["$$BODY_UTF32$$", utf32_function_body]]) # A separate declaration header is required FileName = os.path.normpath(QUEX_PATH + Setup.language_db["$code_base"] + "/converter_helper/TXT-Codec") template_h_txt = get_file_content_or_die(FileName) txt_h = template_h_txt.replace("$$CODEC$$", codec_name) return txt_h, txt_i
def do(Data): """The generated code is very similar to the 'skipper' code. It is to be executed as soon as a 'real' newline arrived. Then it skips whitespace until the next non-whitepace (also newline may trigger a 'stop'). Dependent on the setup the indentation is determined. """ IndentationSetup = Data["indentation_setup"] assert IndentationSetup.__class__.__name__ == "IndentationSetup" LanguageDB = Setup.language_db Mode = None if IndentationSetup.containing_mode_name() != "": Mode = lexer_mode.mode_db[IndentationSetup.containing_mode_name()] counter_index = sm_index.get() # Mini trigger map: [ trigger set ] --> loop start # That means: As long as characters of the trigger set appear, we go to the loop start. trigger_map = [] # If the indentation consists only of spaces, than it is 'uniform' ... if IndentationSetup.has_only_single_spaces(): # Count indentation/column at end of run; # simply: current position - reference_p character_set = IndentationSetup.space_db.values()[0] for interval in character_set.get().get_intervals(PromiseToTreatWellF=True): trigger_map.append([interval, counter_index]) # Reference Pointer: Define Variable, Initialize, determine how to subtact. end_procedure = \ " me->counter._indentation = (size_t)(QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer) - reference_p);\n" else: # Count the indentation/column during the 'run' # Add the space counters for count, character_set in IndentationSetup.space_db.items(): for interval in character_set.get().get_intervals(PromiseToTreatWellF=True): trigger_map.append([interval, IndentationCounter("space", count, counter_index)]) # Add the grid counters for count, character_set in IndentationSetup.grid_db.items(): for interval in character_set.get().get_intervals(PromiseToTreatWellF=True): trigger_map.append([interval, IndentationCounter("grid", count, counter_index)]) # Reference Pointer: Not required. # No subtraction 'current_position - reference_p'. # (however, we pass 'reference_p' to indentation handler) end_procedure = "" # Bad character detection if IndentationSetup.bad_character_set.get().is_empty() == False: for interval in IndentationSetup.bad_character_set.get().get_intervals(PromiseToTreatWellF=True): trigger_map.append([interval, IndentationCounter("bad", None, counter_index)]) # Since we do not use a 'TransitionMap', there are some things we need # to do by hand. arrange_trigger_map(trigger_map) local_variable_db = { "reference_p" : Variable("reference_p", "QUEX_TYPE_CHARACTER_POSITION", None, "(QUEX_TYPE_CHARACTER_POSITION)0x0") } init_reference_p = " reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer);\n" + \ " me->counter._indentation = (QUEX_TYPE_INDENTATION)0;\n" iteration_code = transition_block.do(trigger_map, counter_index, DSM=None, GotoReload_Str="goto %s;" % get_label("$reload", counter_index)) comment_str = LanguageDB["$comment"]("Skip whitespace at line begin; count indentation.") # NOTE: Line and column number counting is off # -- No newline can occur # -- column number = indentation at the end of the process end_procedure += " __QUEX_IF_COUNT_COLUMNS_ADD(me->counter._indentation);\n" if Mode == None or Mode.default_indentation_handler_sufficient(): end_procedure += " QUEX_NAME(on_indentation)(me, me->counter._indentation, reference_p);\n" else: # Definition of '%s_on_indentation' in mode_classes.py. end_procedure += " QUEX_NAME(%s_on_indentation)(me, me->counter._indentation, reference_p);\n" \ % Mode.name # The finishing touch prolog = blue_print(prolog_txt, [ ["$$DELIMITER_COMMENT$$", comment_str], ["$$INIT_REFERENCE_POINTER$$", init_reference_p], ["$$COUNTER_INDEX$$", repr(counter_index)], ["$$INPUT_GET$$", LanguageDB["$input/get"]], ]) # The finishing touch epilog = blue_print(epilog_txt, [ ["$$INPUT_P_INCREMENT$$", LanguageDB["$input/increment"]], ["$$INPUT_P_DECREMENT$$", LanguageDB["$input/decrement"]], ["$$IF_INPUT_EQUAL_DELIMITER_0$$", LanguageDB["$if =="]("SkipDelimiter$$COUNTER_INDEX$$[0]")], ["$$ENDIF$$", LanguageDB["$endif"]], ["$$LOOP_REENTRANCE$$", get_label("$entry", counter_index)], ["$$INPUT_EQUAL_BUFFER_LIMIT_CODE$$", LanguageDB["$BLC"]], ["$$RELOAD$$", get_label("$reload", counter_index)], ["$$COUNTER_INDEX$$", repr(counter_index)], ["$$GOTO_TERMINAL_EOF$$", get_label("$terminal-EOF", U=True)], # When things were skipped, no change to acceptance flags or modes has # happend. One can jump immediately to the start without re-entry preparation. ["$$GOTO_START$$", get_label("$start", U=True)], ["$$END_PROCEDURE$$", end_procedure], ["$$BAD_CHARACTER_HANDLING$$", get_bad_character_handler(Mode, IndentationSetup, counter_index)], ]) txt = [prolog] txt.extend(iteration_code) # txt.append(Address("$drop-out", counter_index)) txt.append("\n") txt.append(epilog) return txt, local_variable_db
def do(setup, IndentationSupportF): """Creates a file of token-ids from a given set of names. Creates also a function: const string& $$token$$::map_id_to_name(). """ global file_str LanguageDB = Setup.language_db __propose_implicit_token_definitions() for standard_token_id in standard_token_id_list: assert token_id_db.has_key(standard_token_id) assert lexer_mode.token_type_definition != None, \ "Token type has not been defined yet, see $QUEX_PATH/quex/core.py how to\n" + \ "handle this." # (*) Token ID File ________________________________________________________________ # # The token id file can either be specified as database of # token-id names, or as a file that directly assigns the token-ids # to variables. If the flag '--user-token-id-file' is defined, then # then the token-id file is provided by the user. Otherwise, the # token id file is created by the token-id maker. # # The token id maker considers the file passed by the option '-t' # as the database file and creates a C++ file with the output filestem # plus the suffix "--token-ids". Note, that the token id file is a # header file. # if len(token_id_db.keys()) == len(standard_token_id_list): token_id_str = "%sTERMINATION and %sUNINITIALIZED" % \ (setup.token_id_prefix_plain, setup.token_id_prefix_plain) # TERMINATION + UNINITIALIZED = 2 token ids. If they are the only ones nothing can be done. error_msg("Only token ids %s are defined.\n" % token_id_str + \ "Quex refuses to proceed. Please, use the 'token { ... }' section to\n" + \ "specify at least one other token id.") #______________________________________________________________________________________ L = max(map(lambda name: len(name), token_id_db.keys())) def space(Name): return " " * (L - len(Name)) # -- define values for the token ids def define_this(txt, token): if setup.language == "C": txt.append("#define %s%s %s((QUEX_TYPE_TOKEN_ID)%i)\n" \ % (setup.token_id_prefix_plain, token.name, space(token.name), token.number)) else: txt.append("const QUEX_TYPE_TOKEN_ID %s%s%s = ((QUEX_TYPE_TOKEN_ID)%i);\n" \ % (setup.token_id_prefix_plain, token.name, space(token.name), token.number)) if setup.token_id_foreign_definition_file != "": token_id_txt = ["#include \"%s\"\n" % get_file_reference(setup.token_id_foreign_definition_file)] else: if setup.language == "C": prolog = "" epilog = "" else: prolog = LanguageDB["$namespace-open"](setup.token_id_prefix_name_space) epilog = LanguageDB["$namespace-close"](setup.token_id_prefix_name_space) token_id_txt = [prolog] # Assign values to tokens with no numeric identifier # NOTE: This has not to happen if token's are defined by the user's provided file. i = setup.token_id_counter_offset # Take the 'dummy_name' only to have the list sorted by name. The key 'dummy_name' # may contain '--' to indicate a unicode value, so do not use it as name. for dummy_name, token in sorted(token_id_db.items()): if token.number == None: while __is_token_id_occupied(i): i += 1 token.number = i; define_this(token_id_txt, token) # Double check that no token id appears twice # Again, this can only happen, if quex itself produced the numeric values for the token token_list = token_id_db.values() for i, x in enumerate(token_list): for y in token_list[i+1:]: if x.number != y.number: continue error_msg("Token id '%s'" % x.name, x.file_name, x.line_n, DontExitF=True) error_msg("and token id '%s' have same numeric value '%s'." \ % (y.name, x.number), y.file_name, y.line_n, DontExitF=True) token_id_txt.append(epilog) tc_descr = lexer_mode.token_type_definition content = blue_print(file_str, [["$$TOKEN_ID_DEFINITIONS$$", "".join(token_id_txt)], ["$$DATE$$", time.asctime()], ["$$TOKEN_CLASS_DEFINITION_FILE$$", get_file_reference(lexer_mode.token_type_definition.get_file_name())], ["$$INCLUDE_GUARD_EXT$$", get_include_guard_extension( LanguageDB["$namespace-ref"](tc_descr.name_space) + "__" + tc_descr.class_name)], ["$$TOKEN_PREFIX$$", setup.token_id_prefix]]) write_safely_and_close(setup.output_token_id_file, content)
def get_implementation_of_mode_functions(mode, Modes): """Writes constructors and mode transition functions. void quex::lexer::enter_EXAMPLE_MODE() { ... } where EXAMPLE_MODE is a lexer mode from the given lexer-modes, and 'quex::lexer' is the lexical analysis class. """ def __filter_out_inheritable_only(ModeNameList): result = [] for name in ModeNameList: for mode in Modes: if mode.name == name: if mode.options["inheritable"] != "only": result.append(name) break return result # (*) on enter on_entry_str = "# ifdef QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK\n" on_entry_str += " QUEX_NAME(%s).has_entry_from(FromMode);\n" % mode.name on_entry_str += "# endif\n" for code_info in mode.get_code_fragment_list("on_entry"): on_entry_str += code_info.get_code() if on_entry_str[-1] == "\n": on_entry_str = on_entry_str[:-1] # (*) on exit on_exit_str = "# ifdef QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK\n" on_exit_str += " QUEX_NAME(%s).has_exit_to(ToMode);\n" % mode.name on_exit_str += "# endif\n" for code_info in mode.get_code_fragment_list("on_exit"): on_exit_str += code_info.get_code() # (*) on indentation on_indentation_str = get_on_indentation_handler(mode) # (*) has base mode if mode.has_base_mode(): base_mode_list = __filter_out_inheritable_only( mode.get_base_mode_name_list()) has_base_mode_str = get_IsOneOfThoseCode(base_mode_list) else: has_base_mode_str = " return false;" # (*) has entry from try: entry_list = __filter_out_inheritable_only(mode.options["entry"]) has_entry_from_str = get_IsOneOfThoseCode( entry_list, __filter_out_inheritable_only(ConsiderDerivedClassesF=true)) # check whether the mode we come from is an allowed mode except: has_entry_from_str = " return true; /* default */" # (*) has exit to try: exit_list = __filter_out_inheritable_only(mode.options["exit"]) has_exit_to_str = get_IsOneOfThoseCode(exit_list, ConsiderDerivedClassesF=true) except: has_exit_to_str = " return true; /* default */" txt = blue_print( mode_function_implementation_str, [ ["$$ENTER-PROCEDURE$$", on_entry_str], ["$$EXIT-PROCEDURE$$", on_exit_str], # ["$$ON_INDENTATION-PROCEDURE$$", on_indentation_str], # ["$$HAS_BASE_MODE$$", has_base_mode_str], ["$$HAS_ENTRANCE_FROM$$", has_entry_from_str], ["$$HAS_EXIT_TO$$", has_exit_to_str], # ["$$MODE_NAME$$", mode.name], ]) return txt
def get_implementation_of_mode_functions(mode, Modes): """Writes constructors and mode transition functions. void quex::lexer::enter_EXAMPLE_MODE() { ... } where EXAMPLE_MODE is a lexer mode from the given lexer-modes, and 'quex::lexer' is the lexical analysis class. """ # (*) on enter code_fragments = mode.on_entry_code_fragments() on_entry_str = "#ifdef __QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK\n" on_entry_str += "assert(me->%s.has_entry_from(FromMode));\n" % mode.name on_entry_str += "#endif\n" for code_info in code_fragments: on_entry_str += code_info.get_code() if on_entry_str[-1] == "\n": on_entry_str = on_entry_str[:-1] # (*) on exit code_fragments = mode.on_exit_code_fragments() on_exit_str = "#ifdef __QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK\n" on_exit_str += "assert(me->%s.has_exit_to(ToMode));\n" % mode.name on_exit_str += "#endif\n" for code_info in code_fragments: on_exit_str += code_info.get_code() # (*) on indentation code_fragments = mode.on_indentation_code_fragments() on_indentation_str = "assert(Indentation >= 0);" for code_info in code_fragments: on_indentation_str += code_info.get_code() # (*) has base mode if mode.get_base_modes() != []: has_base_mode_str = get_IsOneOfThoseCode(mode.get_base_modes()) else: has_base_mode_str = " return false;" # (*) has entry from try: entry_list = mode.options["entry"] has_entry_from_str = get_IsOneOfThoseCode(entry_list, ConsiderDerivedClassesF=true) # check whether the mode we come from is an allowed mode except: has_entry_from_str = " return true; // default" # (*) has exit to try: exit_list = mode.options["exit"] has_exit_to_str = get_IsOneOfThoseCode(exit_list, ConsiderDerivedClassesF=true) except: has_exit_to_str = " return true; // default" txt = blue_print(mode_function_implementation_str, [ ["%%ENTER-PROCEDURE%%", on_entry_str], ["%%EXIT-PROCEDURE%%", on_exit_str], ["%%INDENTATION-PROCEDURE%%", on_indentation_str], ["%%HAS_BASE_MODE%%", has_base_mode_str], ["%%HAS_ENTRANCE_FROM%%", has_entry_from_str], ["%%HAS_EXIT_TO%%", has_exit_to_str], ["%%MODE_NAME%%", mode.name], ]) return txt
def get_skipper(OpenerSequence, CloserSequence, Mode=None, IndentationCounterTerminalID=None, OnSkipRangeOpenStr=""): assert OpenerSequence.__class__ == list assert len(OpenerSequence) >= 1 assert map(type, OpenerSequence) == [int] * len(OpenerSequence) assert CloserSequence.__class__ == list assert len(CloserSequence) >= 1 assert map(type, CloserSequence) == [int] * len(CloserSequence) assert OpenerSequence != CloserSequence LanguageDB = Setup.language_db skipper_index = sm_index.get() opener_str, opener_length_str, opener_comment_str = get_character_sequence(OpenerSequence) closer_str, closer_length_str, closer_comment_str = get_character_sequence(CloserSequence) if not end_delimiter_is_subset_of_indentation_counter_newline(Mode, CloserSequence): goto_after_end_of_skipping_str = "goto %s;" % get_label("$start", U=True) else: # If there is indentation counting involved, then the counter's terminal id must # be determined at this place. assert IndentationCounterTerminalID != None # If the ending delimiter is a subset of what the 'newline' pattern triggers # in indentation counting => move on to the indentation counter. goto_after_end_of_skipping_str = "goto %s;" % get_label("$terminal-direct", IndentationCounterTerminalID, U=True) if OnSkipRangeOpenStr != "": on_skip_range_open_str = OnSkipRangeOpenStr else: on_skip_range_open_str = get_on_skip_range_open(Mode, CloserSequence) local_variable_db = { "counter": Variable("counter", "size_t", None, "0"), "QUEX_OPTION_COLUMN_NUMBER_COUNTING/reference_p": Variable("reference_p", "QUEX_TYPE_CHARACTER_POSITION", None, "(QUEX_TYPE_CHARACTER_POSITION)0x0", "QUEX_OPTION_COLUMN_NUMBER_COUNTING") } reference_p_def = " __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n" reference_p_def = " __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n" before_reload = " __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer)\n" + \ " - reference_p));\n" after_reload = " __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n" if CloserSequence[-1] == ord('\n'): end_procedure = " __QUEX_IF_COUNT_LINES_ADD((size_t)1);\n" end_procedure += " __QUEX_IF_COUNT_COLUMNS_SET((size_t)1);\n" else: end_procedure = " __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer)\n" + \ " - reference_p));\n" code_str = blue_print(template_str, [ ["$$SKIPPER_INDEX$$", __nice(skipper_index)], # ["$$OPENER$$", opener_str], ["$$OPENER_LENGTH$$", opener_length_str], ["$$OPENER_COMMENT$$", opener_comment_str], ["$$CLOSER$$", closer_str], ["$$CLOSER_LENGTH$$", closer_length_str], ["$$CLOSER_COMMENT$$", closer_comment_str], # ["$$INPUT_P_INCREMENT$$", LanguageDB["$input/increment"]], ["$$INPUT_P_DECREMENT$$", LanguageDB["$input/decrement"]], ["$$INPUT_GET$$", LanguageDB["$input/get"]], ["$$IF_INPUT_EQUAL_DELIMITER_0$$", LanguageDB["$if =="]("Skipper$$SKIPPER_INDEX$$[0]")], ["$$ENDIF$$", LanguageDB["$endif"]], ["$$ENTRY$$", get_label("$entry", skipper_index)], ["$$RELOAD$$", get_label("$reload", skipper_index)], ["$$GOTO_AFTER_END_OF_SKIPPING$$", goto_after_end_of_skipping_str], ["$$GOTO_RELOAD$$", get_label("$reload", skipper_index)], # When things were skipped, no change to acceptance flags or modes has # happend. One can jump immediately to the start without re-entry preparation. ["$$GOTO_ENTRY$$", get_label("$entry", skipper_index)], ["$$MARK_LEXEME_START$$", LanguageDB["$mark-lexeme-start"]], ["$$ON_SKIP_RANGE_OPEN$$", on_skip_range_open_str], # ["$$LC_COUNT_COLUMN_N_POINTER_DEFINITION$$", reference_p_def], ["$$LC_COUNT_IN_LOOP$$", line_column_counter_in_loop], ["$$LC_COUNT_END_PROCEDURE$$", end_procedure], ["$$LC_COUNT_BEFORE_RELOAD$$", before_reload], ["$$LC_COUNT_AFTER_RELOAD$$", after_reload], ]) return code_str, local_variable_db
def write_engine_header(Modes, Setup): QuexClassHeaderFileTemplate = (Setup.QUEX_TEMPLATE_DB_DIR + "/template/lexical_analyzer_class").replace("//","/") CoreEngineDefinitionsHeader = (Setup.QUEX_TEMPLATE_DB_DIR + "/core_engine/").replace("//","/") QuexClassHeaderFileOutput = Setup.output_file_stem LexerClassName = Setup.output_engine_name VersionID = Setup.input_application_version_id QuexVersionID = Setup.QUEX_VERSION # -- determine character type according to number of bytes per ucs character code point # for the internal engine. quex_character_type_str = { 1: "uint8_t ", 2: "uint16_t", 4: "uint32_t", "wchar_t": "wchar_t" }[Setup.bytes_per_ucs_code_point] quex_lexeme_type_str = { 1: "char ", 2: "int16_t", 4: "int32_t", "wchar_t": "wchar_t" }[Setup.bytes_per_ucs_code_point] # are bytes of integers Setup 'little endian' or 'big endian' ? if Setup.byte_order == "little": quex_coding_name_str = { 1: "ASCII", 2: "UCS-2LE", 4: "UCS-4LE", "wchar_t": "WCHAR_T" }[Setup.bytes_per_ucs_code_point] else: quex_coding_name_str = { 1: "ASCII", 2: "UCS-2BE", 4: "UCS-4BE", "wchar_t": "WCHAR_T" }[Setup.bytes_per_ucs_code_point] # -- determine whether the lexical analyser needs indentation counting # support. if one mode has an indentation handler, than indentation # support must be provided. indentation_support_f = False for mode in Modes.values(): if mode.on_indentation.get_code() != "": indentation_support_f = True break lex_id_definitions_str = "" # NOTE: First mode-id needs to be '1' for compatibility with flex generated engines i = 0 for name in Modes.keys(): i += 1 lex_id_definitions_str += "const int LEX_ID_%s = %i;\n" % (name, i) include_guard_extension = get_include_guard_extension(Setup.output_file_stem) # -- instances of mode classes as members of the lexer mode_object_members_txt, \ constructor_txt, \ mode_specific_functions_txt, \ friend_txt = \ get_mode_class_related_code_fragments(Modes.values(), LexerClassName) # -- define a pointer that directly has the type of the derived class if Setup.input_derived_class_name == "": Setup.input_derived_class_name = LexerClassName derived_class_type_declaration = "" else: derived_class_type_declaration = "class %s;" % Setup.input_derived_class_name # -- the friends of the class friends_str = "" for friend in Setup.input_lexer_class_friends: friends_str += " friend class %s;\n" % friend # -- the class body extension class_body_extension_str = lexer_mode.class_body.get_code() # -- the class constructor extension class_constructor_extension_str = lexer_mode.class_init.get_code() fh = open_file_or_die(QuexClassHeaderFileTemplate) template_code_txt = fh.read() fh.close() # -- check if exit/entry handlers have to be active entry_handler_active_f = False exit_handler_active_f = False for mode in Modes.values(): if mode.on_entry_code_fragments() != []: entry_handler_active_f = True if mode.on_exit_code_fragments() != []: exit_handler_active_f = True txt = template_code_txt def set_switch(txt, SwitchF, Name): if SwitchF: txt = txt.replace("$$SWITCH$$ %s" % Name, "#define %s" % Name) else: txt = txt.replace("$$SWITCH$$ %s" % Name, "// #define %s" % Name) return txt txt = set_switch(txt, entry_handler_active_f, "__QUEX_OPTION_ON_ENTRY_HANDLER_PRESENT") txt = set_switch(txt, exit_handler_active_f, "__QUEX_OPTION_ON_EXIT_HANDLER_PRESENT") txt = set_switch(txt, indentation_support_f, "__QUEX_OPTION_INDENTATION_TRIGGER_SUPPORT") txt = set_switch(txt, True, "__QUEX_OPTION_SUPPORT_BEGIN_OF_LINE_PRE_CONDITION") txt = set_switch(txt, Setup.enable_iconv_f, "QUEX_OPTION_ENABLE_ICONV") txt = set_switch(txt, not Setup.disable_token_queue_f, "QUEX_OPTION_TOKEN_SENDING_VIA_QUEUE") txt = set_switch(txt, not Setup.disable_string_accumulator_f, "QUEX_OPTION_STRING_ACCUMULATOR") txt = set_switch(txt, Setup.post_categorizer_f, "QUEX_OPTION_POST_CATEGORIZER") txt = set_switch(txt, True, "QUEX_OPTION_VIRTUAL_FUNCTION_ON_ACTION_ENTRY") txt = set_switch(txt, True, "QUEX_OPTION_LINE_NUMBER_COUNTING") txt = set_switch(txt, True, "QUEX_OPTION_COLUMN_NUMBER_COUNTING") txt = set_switch(txt, Setup.output_debug_f, "QUEX_OPTION_DEBUG_TOKEN_SENDING") txt = set_switch(txt, Setup.output_debug_f, "QUEX_OPTION_DEBUG_MODE_TRANSITIONS") txt = set_switch(txt, Setup.output_debug_f, "QUEX_OPTION_DEBUG_QUEX_PATTERN_MATCHES") txt = set_switch(txt, True, "QUEX_OPTION_INCLUDE_STACK_SUPPORT") txt = set_switch(txt, not Setup.no_mode_transition_check_f, "QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK") txt = blue_print(txt, [ ["$$BUFFER_LIMIT_CODE$$", "0x%X" % Setup.buffer_limit_code], ["$$CONSTRUCTOR_EXTENSTION$$", class_constructor_extension_str], ["$$CONSTRUCTOR_MODE_DB_INITIALIZATION_CODE$$", constructor_txt], ["$$CORE_ENGINE_DEFINITIONS_HEADER$$", CoreEngineDefinitionsHeader], ["$$CLASS_BODY_EXTENSION$$", class_body_extension_str], ["$$INCLUDE_GUARD_EXTENSION$$", include_guard_extension], ["$$INITIAL_LEXER_MODE_ID$$", "LEX_ID_" + lexer_mode.initial_mode.get_code()], ["$$LEXER_BUILD_DATE$$", time.asctime()], ["$$LEXER_BUILD_VERSION$$", VersionID], ["$$LEXER_CLASS_FRIENDS$$", friends_str], ["$$LEXER_CLASS_NAME$$", LexerClassName], ["$$LEXER_DERIVED_CLASS_DECL$$", derived_class_type_declaration], ["$$LEXER_DERIVED_CLASS_NAME$$", Setup.input_derived_class_name], ["$$LEX_ID_DEFINITIONS$$", lex_id_definitions_str], ["$$MAX_MODE_CLASS_N$$", repr(len(Modes))], ["$$MODE_CLASS_FRIENDS$$", friend_txt], ["$$MODE_OBJECT_MEMBERS$$", mode_object_members_txt], ["$$MODE_SPECIFIC_ANALYSER_FUNCTIONS$$", mode_specific_functions_txt], ["$$PRETTY_INDENTATION$$", " " + " " * (len(LexerClassName)*2 + 2)], ["$$QUEX_TEMPLATE_DIR$$", Setup.QUEX_TEMPLATE_DB_DIR], ["$$QUEX_VERSION$$", QuexVersionID], ["$$TOKEN_CLASS$$", Setup.input_token_class_name], ["$$TOKEN_CLASS_DEFINITION_FILE$$", Setup.input_token_class_file.replace("//","/")], ["$$TOKEN_ID_DEFINITION_FILE$$", Setup.output_token_id_file.replace("//","/")], ["$$QUEX_CHARACTER_TYPE$$", quex_character_type_str], ["$$QUEX_LEXEME_TYPE$$", quex_lexeme_type_str], ["$$CORE_ENGINE_CHARACTER_CODING$$", quex_coding_name_str], ["$$USER_DEFINED_HEADER$$", lexer_mode.header.get_code() + "\n"], ]) fh_out = open(QuexClassHeaderFileOutput, "wb") if os.linesep != "\n": txt = txt.replace("\n", os.linesep) fh_out.write(txt) fh_out.close()
def get_skipper(EndSequence, Mode=None, IndentationCounterTerminalID=None, OnSkipRangeOpenStr=""): assert type(EndSequence) == list assert len(EndSequence) >= 1 assert map(type, EndSequence) == [int] * len(EndSequence) local_variable_db = {} global template_str LanguageDB = Setup.language_db # Name the $$SKIPPER$$ skipper_index = sm_index.get() # Determine the $$DELIMITER$$ delimiter_str, \ delimiter_length_str, \ delimiter_comment_str \ = get_character_sequence(EndSequence) delimiter_comment_str = LanguageDB["$comment"](" Delimiter: " + delimiter_comment_str) # Determine the check for the tail of the delimiter delimiter_remainder_test_str = "" if len(EndSequence) != 1: txt = "" i = 0 for letter in EndSequence[1:]: i += 1 txt += " " + LanguageDB["$input/get-offset"](i-1) + "\n" txt += " " + LanguageDB["$if !="]("Skipper$$SKIPPER_INDEX$$[%i]" % i) txt += " goto %s;" % get_label("$entry", skipper_index, U=True) txt += " " + LanguageDB["$endif"] delimiter_remainder_test_str = txt if not end_delimiter_is_subset_of_indentation_counter_newline(Mode, EndSequence): goto_after_end_of_skipping_str = "goto %s;" % get_label("$start", U=True) else: # If there is indentation counting involved, then the counter's terminal id must # be determined at this place. assert IndentationCounterTerminalID != None # If the ending delimiter is a subset of what the 'newline' pattern triggers # in indentation counting => move on to the indentation counter. goto_after_end_of_skipping_str = "goto %s;" % get_label("$terminal-direct", IndentationCounterTerminalID, U=True) if OnSkipRangeOpenStr != "": on_skip_range_open_str = OnSkipRangeOpenStr else: on_skip_range_open_str = get_on_skip_range_open(Mode, EndSequence) # The main part code_str = blue_print(template_str, [["$$DELIMITER$$", delimiter_str], ["$$DELIMITER_LENGTH$$", delimiter_length_str], ["$$DELIMITER_COMMENT$$", delimiter_comment_str], ["$$WHILE_1_PLUS_1_EQUAL_2$$", LanguageDB["$loop-start-endless"]], ["$$END_WHILE$$", LanguageDB["$loop-end"]], ["$$INPUT_P_INCREMENT$$", LanguageDB["$input/increment"]], ["$$INPUT_P_DECREMENT$$", LanguageDB["$input/decrement"]], ["$$INPUT_GET$$", LanguageDB["$input/get"]], ["$$IF_INPUT_EQUAL_DELIMITER_0$$", LanguageDB["$if =="]("Skipper$$SKIPPER_INDEX$$[0]")], ["$$ENDIF$$", LanguageDB["$endif"]], ["$$ENTRY$$", get_label("$entry", skipper_index)], ["$$RELOAD$$", get_label("$reload", skipper_index)], ["$$GOTO_ENTRY$$", get_label("$entry", skipper_index, U=True)], # When things were skipped, no change to acceptance flags or modes has # happend. One can jump immediately to the start without re-entry preparation. ["$$GOTO_AFTER_END_OF_SKIPPING$$", goto_after_end_of_skipping_str], ["$$MARK_LEXEME_START$$", LanguageDB["$mark-lexeme-start"]], ["$$DELIMITER_REMAINDER_TEST$$", delimiter_remainder_test_str], ["$$ON_SKIP_RANGE_OPEN$$", on_skip_range_open_str], ]) # Line and column number counting code_str, reference_p_f = __lc_counting_replacements(code_str, EndSequence) # The finishing touch code_str = blue_print(code_str, [["$$SKIPPER_INDEX$$", __nice(skipper_index)], ["$$GOTO_RELOAD$$", get_label("$reload", skipper_index)]]) if reference_p_f: local_variable_db["QUEX_OPTION_COLUMN_NUMBER_COUNTING/reference_p"] = \ Variable("reference_p", "QUEX_TYPE_CHARACTER_POSITION", None, "(QUEX_TYPE_CHARACTER_POSITION)0x0", "QUEX_OPTION_COLUMN_NUMBER_COUNTING") return code_str, local_variable_db
def __range_skipper_lc_counting_replacements(code_str, EndSequence): """Line and Column Number Counting(Range Skipper): -- in loop if there appears a newline, then do: increment line_n set position from where to count column_n -- at end of skipping do one of the following: if end delimiter contains newline: column_n = number of letters since last new line in end delimiter increment line_n by number of newlines in end delimiter. (NOTE: in this case the setting of the position from where to count the column_n can be omitted.) else: column_n = current_position - position from where to count column number. NOTE: On reload we do count the column numbers and reset the column_p. """ variable_definition = \ "# if defined(QUEX_OPTION_LINE_NUMBER_COUNTING) || defined(QUEX_OPTION_COLUMN_NUMBER_COUNTING)\n" + \ "# ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \ " QUEX_CHARACTER_POSITION_TYPE column_count_p_$$SKIPPER_INDEX$$ = QuexBuffer_tell_memory_adr(&me->buffer);\n"+\ "# endif\n" + \ "# endif\n" in_loop = "" end_procedure = "" exit_loop = "" new_line_detection_in_loop_enabled_f = True # Does the end delimiter contain a newline? try: index = EndSequence.index(ord("\n")) except: index = -1 if index != -1: if index == 0: # Inside the skipped range, there cannot have been a newline new_line_detection_in_loop_enabled_f = False exit_loop = "# ifdef QUEX_OPTION_LINE_NUMBER_COUNTING\n" + \ " ++(self.counter._line_number_at_end); /* First limit character was the newline */\n" \ "# endif" # If the first character in the delimiter is newline, then it was counted alread, see above. delimiter_newline_n = EndSequence[1:].count(ord("\n")) if delimiter_newline_n != 0: end_procedure += "# ifdef QUEX_OPTION_LINE_NUMBER_COUNTING\n" + \ " self.counter._line_number_at_end += %i;\n" % delimiter_newline_n + \ "# endif\n" # If delimiter contains newline, then the column number is identical to the distance # of the last newline to the end of the delimiter. dummy = deepcopy(EndSequence) dummy.reverse() delimiter_tail_n = dummy.index(ord("\n")) + 1 if delimiter_tail_n != 0: end_procedure += "# ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \ " self.counter._column_number_at_end = %i;\n" % delimiter_tail_n + \ "# endif\n" else: end_procedure = "# ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \ " self.counter._column_number_at_end += QuexBuffer_tell_memory_adr(&me->buffer)\n" + \ " - column_count_p_$$SKIPPER_INDEX$$;\n" + \ "# endif\n" before_reload = "# ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \ " self.counter._column_number_at_end += QuexBuffer_tell_memory_adr(&me->buffer)\n" + \ " - column_count_p_$$SKIPPER_INDEX$$;\n" + \ "# endif\n" after_reload = "# ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \ " column_count_p_$$SKIPPER_INDEX$$ = QuexBuffer_tell_memory_adr(&me->buffer);\n" + \ "# endif\n" if new_line_detection_in_loop_enabled_f: in_loop = lc_counter_in_loop return blue_print(code_str, [["$$LC_COUNT_COLUMN_N_POINTER_DEFINITION$$", variable_definition], ["$$LC_COUNT_IN_LOOP$$", in_loop], ["$$LC_COUNT_END_PROCEDURE$$", end_procedure], ["$$LC_COUNT_BEFORE_RELOAD$$", before_reload], ["$$LC_COUNT_AFTER_RELOAD$$", after_reload], ["$$LC_COUNT_AT_LOOP_EXIT$$", exit_loop], ])
def _do(Descr): # The following things must be ensured before the function is called assert Descr != None assert Descr.__class__.__name__ == "TokenTypeDescriptor" ## ALLOW: Descr.get_member_db().keys() == [] TemplateFile = QUEX_PATH \ + Setup.language_db["$code_base"] \ + Setup.language_db["$token_template_file"] TemplateIFile = QUEX_PATH \ + Setup.language_db["$code_base"] \ + Setup.language_db["$token_template_i_file"] template_str = open_file_or_die(TemplateFile, Mode="rb").read() template_i_str = open_file_or_die(TemplateIFile, Mode="rb").read() virtual_destructor_str = "" if Descr.open_for_derivation_f: virtual_destructor_str = "virtual " if Descr.copy.get_pure_code() == "": # Default copy operation: Plain Copy of token memory copy_str = "__QUEX_STD_memcpy((void*)__this, (void*)__That, sizeof(QUEX_TYPE_TOKEN));\n" else: copy_str = Descr.copy.get_code() take_text_str = Descr.take_text.get_code() if take_text_str == "": take_text_str = "return true;\n" include_guard_extension_str = get_include_guard_extension( Setup.language_db["$namespace-ref"](Descr.name_space) + "__" + Descr.class_name) # In case of plain 'C' the class name must incorporate the namespace (list) token_class_name = Descr.class_name if Setup.language == "C": token_class_name = Setup.token_class_name_safe txt = blue_print(template_str, [ ["$$BODY$$", Descr.body.get_code()], ["$$CONSTRUCTOR$$", Descr.constructor.get_code()], ["$$COPY$$", copy_str], ["$$DESTRUCTOR$$", Descr.destructor.get_code()], ["$$DISTINCT_MEMBERS$$", get_distinct_members(Descr)], ["$$FOOTER$$", Descr.footer.get_code()], ["$$FUNC_TAKE_TEXT$$", take_text_str], ["$$HEADER$$", Descr.header.get_code()], ["$$INCLUDE_GUARD_EXTENSION$$", include_guard_extension_str], [ "$$NAMESPACE_CLOSE$$", Setup.language_db["$namespace-close"]( Descr.name_space) ], [ "$$NAMESPACE_OPEN$$", Setup.language_db["$namespace-open"]( Descr.name_space) ], ["$$QUICK_SETTERS$$", get_quick_setters(Descr)], ["$$SETTERS_GETTERS$$", get_setter_getter(Descr)], ["$$TOKEN_CLASS$$", token_class_name], ["$$TOKEN_REPETITION_N_GET$$", Descr.repetition_get.get_code()], ["$$TOKEN_REPETITION_N_SET$$", Descr.repetition_set.get_code()], ["$$UNION_MEMBERS$$", get_union_members(Descr)], ["$$VIRTUAL_DESTRUCTOR$$", virtual_destructor_str], ]) txt_i = blue_print(template_i_str, [ ["$$CONSTRUCTOR$$", Descr.constructor.get_code()], ["$$COPY$$", copy_str], ["$$DESTRUCTOR$$", Descr.destructor.get_code()], ["$$FOOTER$$", Descr.footer.get_code()], ["$$FUNC_TAKE_TEXT$$", take_text_str], ["$$INCLUDE_GUARD_EXTENSION$$", include_guard_extension_str], [ "$$NAMESPACE_CLOSE$$", Setup.language_db["$namespace-close"]( Descr.name_space) ], [ "$$NAMESPACE_OPEN$$", Setup.language_db["$namespace-open"]( Descr.name_space) ], ["$$TOKEN_CLASS$$", token_class_name], ["$$TOKEN_REPETITION_N_GET$$", Descr.repetition_get.get_code()], ["$$TOKEN_REPETITION_N_SET$$", Descr.repetition_set.get_code()], ]) # Return declaration and implementation as two strings return txt, txt_i
def get_range_skipper(EndSequence, LanguageDB, MissingClosingDelimiterAction=""): assert EndSequence.__class__ == list assert len(EndSequence) >= 1 assert map(type, EndSequence) == [int] * len(EndSequence) # Name the $$SKIPPER$$ skipper_index = sm_index.get() # Determine the $$DELIMITER$$ delimiter_str = "" delimiter_comment_str = " Delimiter: " for letter in EndSequence: delimiter_comment_str += "'%s', " % utf8.map_unicode_to_utf8(letter) delimiter_str += "0x%X, " % letter delimiter_length_str = "%i" % len(EndSequence) delimiter_comment_str = LanguageDB["$comment"](delimiter_comment_str) # Determine the check for the tail of the delimiter delimiter_remainder_test_str = "" if len(EndSequence) != 1: txt = "" i = 0 for letter in EndSequence[1:]: i += 1 txt += " " + LanguageDB["$input/get-offset"](i-1) + "\n" txt += " " + LanguageDB["$if !="]("Skipper$$SKIPPER_INDEX$$[%i]" % i) txt += " " + LanguageDB["$goto"]("$entry", skipper_index) + "\n" txt += " " + LanguageDB["$endif"] delimiter_remainder_test_str = txt # The main part code_str = blue_print(range_skipper_template, [["$$DELIMITER$$", delimiter_str], ["$$DELIMITER_LENGTH$$", delimiter_length_str], ["$$DELIMITER_COMMENT$$", delimiter_comment_str], ["$$WHILE_1_PLUS_1_EQUAL_2$$", LanguageDB["$loop-start-endless"]], ["$$END_WHILE$$", LanguageDB["$loop-end"]], ["$$INPUT_P_INCREMENT$$", LanguageDB["$input/increment"]], ["$$INPUT_P_DECREMENT$$", LanguageDB["$input/decrement"]], ["$$INPUT_GET$$", LanguageDB["$input/get"]], ["$$IF_INPUT_EQUAL_DELIMITER_0$$", LanguageDB["$if =="]("Skipper$$SKIPPER_INDEX$$[0]")], ["$$BREAK$$", LanguageDB["$break"]], ["$$ENDIF$$", LanguageDB["$endif"]], ["$$ENTRY$$", LanguageDB["$label-def"]("$entry", skipper_index)], ["$$DROP_OUT$$", LanguageDB["$label-def"]("$drop-out", skipper_index)], ["$$GOTO_ENTRY$$", LanguageDB["$goto"]("$entry", skipper_index)], ["$$GOTO_REENTRY_PREPARATION$$", LanguageDB["$goto"]("$re-start")], ["$$MARK_LEXEME_START$$", LanguageDB["$mark-lexeme-start"]], ["$$DELIMITER_REMAINDER_TEST$$", delimiter_remainder_test_str], ["$$SET_INPUT_P_BEHIND_DELIMITER$$", LanguageDB["$input/add"](len(EndSequence)-1)], ["$$MISSING_CLOSING_DELIMITER$$", MissingClosingDelimiterAction], ]) # Line and column number counting code_str = __range_skipper_lc_counting_replacements(code_str, EndSequence) # The finishing touch code_str = blue_print(code_str, [["$$SKIPPER_INDEX$$", __nice(skipper_index)], ["$$GOTO_DROP_OUT$$", LanguageDB["$goto"]("$drop-out", skipper_index)]]) return code_str
def get_skipper(TriggerSet): """This function implements simple 'skipping' in the sense of passing by characters that belong to a given set of characters--the TriggerSet. """ global template_str assert TriggerSet.__class__.__name__ == "NumberSet" assert not TriggerSet.is_empty() LanguageDB = Setup.language_db skipper_index = sm_index.get() # Mini trigger map: [ trigger set ] --> loop start # That means: As long as characters of the trigger set appear, we go to the loop start. transition_map = TransitionMap( ) # (don't worry about 'drop-out-ranges' etc.) transition_map.add_transition(TriggerSet, skipper_index) # On buffer limit code, the skipper must transit to a dedicated reloader iteration_code = transition_block.do(transition_map.get_trigger_map(), skipper_index, DSM=None, GotoReload_Str="goto %s;" % get_label("$reload", skipper_index)) comment_str = LanguageDB["$comment"]("Skip any character in " + TriggerSet.get_utf8_string()) # Line and column number counting prolog = __lc_counting_replacements(prolog_txt, TriggerSet) epilog = __lc_counting_replacements(epilog_txt, TriggerSet) prolog = blue_print(prolog, [ ["$$DELIMITER_COMMENT$$", comment_str], ["$$SKIPPER_INDEX$$", "%i" % skipper_index], ["$$INPUT_GET$$", LanguageDB["$input/get"]], ]) epilog = blue_print( epilog, [ ["$$INPUT_P_INCREMENT$$", LanguageDB["$input/increment"]], ["$$INPUT_P_DECREMENT$$", LanguageDB["$input/decrement"]], [ "$$IF_INPUT_EQUAL_DELIMITER_0$$", LanguageDB["$if =="]("SkipDelimiter$$SKIPPER_INDEX$$[0]") ], ["$$ENDIF$$", LanguageDB["$endif"]], ["$$LOOP_REENTRANCE$$", get_label("$entry", skipper_index)], ["$$INPUT_EQUAL_BUFFER_LIMIT_CODE$$", LanguageDB["$BLC"]], ["$$RELOAD$$", get_label("$reload", skipper_index)], [ "$$DROP_OUT_DIRECT$$", get_label("$drop-out", skipper_index, U=True) ], ["$$SKIPPER_INDEX$$", "%i" % skipper_index], ["$$GOTO_TERMINAL_EOF$$", get_label("$terminal-EOF", U=True)], # When things were skipped, no change to acceptance flags or modes has # happend. One can jump immediately to the start without re-entry preparation. ["$$GOTO_START$$", get_label("$start", U=True)], ["$$MARK_LEXEME_START$$", LanguageDB["$mark-lexeme-start"]], ]) code = [prolog] code.extend(iteration_code) code.append(epilog) local_variable_db = {} local_variable_db["QUEX_OPTION_COLUMN_NUMBER_COUNTING/reference_p"] = \ Variable("reference_p", "QUEX_TYPE_CHARACTER_POSITION", None, "(QUEX_TYPE_CHARACTER_POSITION)0x0", "QUEX_OPTION_COLUMN_NUMBER_COUNTING") return code, local_variable_db
def __terminal_states(SMD, action_db, OnFailureAction, EndOfStreamAction, SupportBeginOfLineF, PreConditionIDList, LanguageDB): """NOTE: During backward-lexing, for a pre-condition, there is not need for terminal states, since only the flag 'pre-condition fulfilled is raised. """ assert SMD.__class__.__name__ == "StateMachineDecorator" sm = SMD.sm() PostConditionedStateMachineID_List = SMD.post_contexted_sm_id_list() DirectlyReachedTerminalID_List = SMD.directly_reached_terminal_id_list() # (*) specific terminal states of patterns (entered from acceptance states) specific_terminal_states = [] for state_machine_id, pattern_action_info in action_db.items(): code = get_terminal_code(state_machine_id, SMD, pattern_action_info, SupportBeginOfLineF, LanguageDB) specific_terminal_states.extend(code) # If there is at least a single terminal, the the 're-entry' preparation must be accomplished if len(action_db) != 0: get_label("$re-start", U=True) # (*) preparation of the reentry without return: # delete all pre-condition fullfilled flags delete_pre_context_flags = [] for pre_context_sm_id in PreConditionIDList: delete_pre_context_flags.append(" ") delete_pre_context_flags.append(LanguageDB["$assignment"]("pre_context_%s_fulfilled_f" % __nice(pre_context_sm_id), 0)) # -- execute 'on_failure' pattern action # -- goto initial state end_of_stream_code_action_str = __adorn_action_code(EndOfStreamAction, SMD, SupportBeginOfLineF) # -- FAILURE ACTION: Under 'normal' circumstances the on_failure action is simply to be executed # since the 'get_forward()' incremented the 'current' pointer. # HOWEVER, when end of file has been reached the 'current' pointer has to # be reset so that the initial state can drop out on the buffer limit code # and then transit to the end of file action. # NOTE: It is possible that 'miss' happens after a chain of characters appeared. In any case the input # pointer must be setup right after the lexeme start. This way, the lexer becomes a new chance as # soon as possible. on_failure = __terminal_on_failure_prolog(LanguageDB) msg = __adorn_action_code(OnFailureAction, SMD, SupportBeginOfLineF) on_failure.append(msg) if PreConditionIDList == []: precondition_involved_f = "0" else: precondition_involved_f = "1" prolog = __terminal_state_prolog router = Address("$terminal-router", None, [ blue_print(__terminal_router_prolog_str, [ ["$$RESTORE_LAST_ACCEPTANCE_POS$$", LanguageDB["$input/seek_position"]("last_acceptance_input_position")], ["$$TERMINAL_FAILURE-REF$$", "QUEX_LABEL(%i)" % get_address("$terminal-FAILURE")], ["$$TERMINAL_FAILURE$$", get_label("$terminal-FAILURE")], ]), # DO NOT 'U=True' for the state router. This is done automatically if # 'goto reload' is used. get_label("$state-router"), ";", __terminal_router_epilog_str, ]) epilog = blue_print(__terminal_state_epilog, [ ["$$FAILURE_ACTION$$", "".join(on_failure)], ["$$END_OF_STREAM_ACTION$$", end_of_stream_code_action_str], ["$$TERMINAL_END_OF_STREAM-DEF$$", get_label("$terminal-EOF")], ["$$TERMINAL_FAILURE-DEF$$", get_label("$terminal-FAILURE")], ["$$STATE_MACHINE_NAME$$", SMD.name()], ["$$GOTO_START_PREPARATION$$", get_label("$re-start", U=True)], ]) reentry_preparation = blue_print(__on_continue_reentry_preparation_str, [["$$REENTRY_PREPARATION$$", get_label("$re-start")], ["$$DELETE_PRE_CONDITION_FULLFILLED_FLAGS$$", "".join(delete_pre_context_flags)], ["$$GOTO_START$$", get_label("$start", U=True)], ["$$COMMENT_ON_POST_CONTEXT_INITIALIZATION$$", comment_on_post_context_position_init_str], ["$$TERMINAL_FAILURE-REF$$", "QUEX_LABEL(%i)" % get_address("$terminal-FAILURE")], ]) txt = [] txt.append(router) txt.append(prolog) txt.extend(specific_terminal_states) txt.append(epilog) txt.append(reentry_preparation) return txt
def __lc_counting_replacements(code_str, EndSequence): """Line and Column Number Counting(Range Skipper): -- in loop if there appears a newline, then do: increment line_n set position from where to count column_n -- at end of skipping do one of the following: if end delimiter contains newline: column_n = number of letters since last new line in end delimiter increment line_n by number of newlines in end delimiter. (NOTE: in this case the setting of the position from where to count the column_n can be omitted.) else: column_n = current_position - position from where to count column number. NOTE: On reload we do count the column numbers and reset the column_p. """ LanguageDB = Setup.language_db def get_character_n_after_last_newline(Sequence): tmp = copy(Sequence) tmp.reverse() try: return tmp.index(ord("\n")) except: return -1 char_n_after_last_newline = get_character_n_after_last_newline(EndSequence) reference_p_def = "" in_loop = "" end_procedure = "" exit_loop = "" before_reload = "" after_reload = "" exit_loop = " " + LanguageDB["$break"] on_first_delimiter = "" reference_p_required_f = False # Line/Column Counting: newline_number_in_delimiter = EndSequence.count(ord("\n")) if EndSequence == map(ord, "\n") or EndSequence == map(ord, "\r\n"): # (1) If the end-delimiter is a newline # => there cannot appear a newline inside the comment # => IN LOOP: no line number increment # no reference pointer required for column counting end_procedure += " __QUEX_IF_COUNT_COLUMNS_SET((size_t)1);\n" end_procedure += " __QUEX_IF_COUNT_LINES_ADD((size_t)1);\n" else: # (2) If end-delimiter is NOT newline # => there can appear a newline inside the comment if newline_number_in_delimiter == 0: # -- no newlines in delimiter => line and column number # must be counted. in_loop = line_column_counter_in_loop end_procedure = ( " __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer)\n" + " - reference_p));\n" ) reference_p_required_f = True else: # -- newline inside delimiter => line number must be counted # column number is fixed. end_procedure = " __QUEX_IF_COUNT_COLUMNS_SET((size_t)%i);\n" % (char_n_after_last_newline + 1) if EndSequence[0] == ord("\n") or len(EndSequence) > 1 and EndSequence[0:2] == [ord("\r"), ord("\n")]: # If the first character in the sequence is newline, then the line counting # may is prevented by the loop exit. Now, we need to count. on_first_delimiter = ( "/* First delimiter char was a newline */\n" + " __QUEX_IF_COUNT_LINES_ADD((size_t)1);\n" ) end_procedure += " __QUEX_IF_COUNT_LINES_ADD((size_t)%i);\n" % (newline_number_in_delimiter - 1) else: in_loop = line_counter_in_loop end_procedure += " __QUEX_IF_COUNT_LINES_ADD((size_t)%i);\n" % newline_number_in_delimiter if reference_p_required_f: reference_p_def = " __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n" before_reload = ( " __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer)\n" + " - reference_p));\n" ) after_reload = ( " __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n" ) if len(EndSequence) > 1: end_procedure = LanguageDB["$input/add"](len(EndSequence) - 1) + "\n" + end_procedure return ( blue_print( code_str, [ ["$$LC_COUNT_COLUMN_N_POINTER_DEFINITION$$", reference_p_def], ["$$LC_COUNT_IN_LOOP$$", in_loop], ["$$LC_COUNT_END_PROCEDURE$$", end_procedure], ["$$LC_COUNT_BEFORE_RELOAD$$", before_reload], ["$$LC_COUNT_AFTER_RELOAD$$", after_reload], ["$$LC_ON_FIRST_DELIMITER$$", on_first_delimiter], ["$$LC_COUNT_LOOP_EXIT$$", exit_loop], ], ), reference_p_required_f, )
def __range_skipper_lc_counting_replacements(code_str, EndSequence): """Line and Column Number Counting(Range Skipper): -- in loop if there appears a newline, then do: increment line_n set position from where to count column_n -- at end of skipping do one of the following: if end delimiter contains newline: column_n = number of letters since last new line in end delimiter increment line_n by number of newlines in end delimiter. (NOTE: in this case the setting of the position from where to count the column_n can be omitted.) else: column_n = current_position - position from where to count column number. NOTE: On reload we do count the column numbers and reset the column_p. """ variable_definition = \ "# if defined(QUEX_OPTION_LINE_NUMBER_COUNTING) || defined(QUEX_OPTION_COLUMN_NUMBER_COUNTING)\n" + \ "# ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \ " QUEX_CHARACTER_POSITION_TYPE column_count_p_$$SKIPPER_INDEX$$ = QuexBuffer_tell_memory_adr(&me->buffer);\n"+\ "# endif\n" + \ "# endif\n" in_loop = "" end_procedure = "" exit_loop = "" new_line_detection_in_loop_enabled_f = True # Does the end delimiter contain a newline? try: index = EndSequence.index(ord("\n")) except: index = -1 if index != -1: if index == 0: # Inside the skipped range, there cannot have been a newline new_line_detection_in_loop_enabled_f = False exit_loop = "# ifdef QUEX_OPTION_LINE_NUMBER_COUNTING\n" + \ " ++(self.counter._line_number_at_end); /* First limit character was the newline */\n" \ "# endif" # If the first character in the delimiter is newline, then it was counted alread, see above. delimiter_newline_n = EndSequence[1:].count(ord("\n")) if delimiter_newline_n != 0: end_procedure += "# ifdef QUEX_OPTION_LINE_NUMBER_COUNTING\n" + \ " self.counter._line_number_at_end += %i;\n" % delimiter_newline_n + \ "# endif\n" # If delimiter contains newline, then the column number is identical to the distance # of the last newline to the end of the delimiter. dummy = deepcopy(EndSequence) dummy.reverse() delimiter_tail_n = dummy.index(ord("\n")) + 1 if delimiter_tail_n != 0: end_procedure += "# ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \ " self.counter._column_number_at_end = %i;\n" % delimiter_tail_n + \ "# endif\n" else: end_procedure = "# ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \ " self.counter._column_number_at_end += QuexBuffer_tell_memory_adr(&me->buffer)\n" + \ " - column_count_p_$$SKIPPER_INDEX$$;\n" + \ "# endif\n" before_reload = "# ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \ " self.counter._column_number_at_end += QuexBuffer_tell_memory_adr(&me->buffer)\n" + \ " - column_count_p_$$SKIPPER_INDEX$$;\n" + \ "# endif\n" after_reload = "# ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \ " column_count_p_$$SKIPPER_INDEX$$ = QuexBuffer_tell_memory_adr(&me->buffer);\n" + \ "# endif\n" if new_line_detection_in_loop_enabled_f: in_loop = lc_counter_in_loop return blue_print(code_str, [ ["$$LC_COUNT_COLUMN_N_POINTER_DEFINITION$$", variable_definition], ["$$LC_COUNT_IN_LOOP$$", in_loop], ["$$LC_COUNT_END_PROCEDURE$$", end_procedure], ["$$LC_COUNT_BEFORE_RELOAD$$", before_reload], ["$$LC_COUNT_AFTER_RELOAD$$", after_reload], ["$$LC_COUNT_AT_LOOP_EXIT$$", exit_loop], ])
def get_implementation_of_mode_functions(mode, Modes): """Writes constructors and mode transition functions. void quex::lexer::enter_EXAMPLE_MODE() { ... } where EXAMPLE_MODE is a lexer mode from the given lexer-modes, and 'quex::lexer' is the lexical analysis class. """ # (*) on enter code_fragments = mode.on_entry_code_fragments() on_entry_str = "#ifdef __QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK\n" on_entry_str += "assert(me->%s.has_entry_from(FromMode));\n" % mode.name on_entry_str += "#endif\n" for code_info in code_fragments: on_entry_str += code_info.get_code() if on_entry_str[-1] == "\n": on_entry_str = on_entry_str[:-1] # (*) on exit code_fragments = mode.on_exit_code_fragments() on_exit_str = "#ifdef __QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK\n" on_exit_str += "assert(me->%s.has_exit_to(ToMode));\n" % mode.name on_exit_str += "#endif\n" for code_info in code_fragments: on_exit_str += code_info.get_code() # (*) on indentation code_fragments = mode.on_indentation_code_fragments() on_indentation_str = "assert(Indentation >= 0);" for code_info in code_fragments: on_indentation_str += code_info.get_code() # (*) has base mode if mode.get_base_modes() != []: has_base_mode_str = get_IsOneOfThoseCode(mode.get_base_modes()) else: has_base_mode_str = " return false;" # (*) has entry from try: entry_list = mode.options["entry"] has_entry_from_str = get_IsOneOfThoseCode(entry_list, ConsiderDerivedClassesF=true) # check whether the mode we come from is an allowed mode except: has_entry_from_str = " return true; // default" # (*) has exit to try: exit_list = mode.options["exit"] has_exit_to_str = get_IsOneOfThoseCode(exit_list, ConsiderDerivedClassesF=true) except: has_exit_to_str = " return true; // default" txt = blue_print(mode_function_implementation_str, [["%%ENTER-PROCEDURE%%", on_entry_str], ["%%EXIT-PROCEDURE%%", on_exit_str], ["%%INDENTATION-PROCEDURE%%", on_indentation_str], ["%%HAS_BASE_MODE%%", has_base_mode_str], ["%%HAS_ENTRANCE_FROM%%", has_entry_from_str], ["%%HAS_EXIT_TO%%", has_exit_to_str], ["%%MODE_NAME%%", mode.name], ]) return txt
def get_on_indentation_handler(Mode): # 'on_dedent' and 'on_n_dedent cannot be defined at the same time. assert not ( Mode.has_code_fragment_list("on_dedent") \ and Mode.has_code_fragment_list("on_n_dedent")) # A mode that deals only with the default indentation handler relies # on what is defined in '$QUEX_PATH/analayzer/member/on_indentation.i' if Mode.default_indentation_handler_sufficient(): return " return;" if Mode.has_code_fragment_list("on_indent"): on_indent_str, eol_f = action_code_formatter.get_code( Mode.get_code_fragment_list("on_indent")) else: on_indent_str = "self_send(__QUEX_SETTING_TOKEN_ID_INDENT);" if Mode.has_code_fragment_list("on_nodent"): on_nodent_str, eol_f = action_code_formatter.get_code( Mode.get_code_fragment_list("on_nodent")) else: on_nodent_str = "self_send(__QUEX_SETTING_TOKEN_ID_NODENT);" if Mode.has_code_fragment_list("on_dedent"): assert not Mode.has_code_fragment_list("on_n_dedent") on_dedent_str, eol_f = action_code_formatter.get_code( Mode.get_code_fragment_list("on_dedent")) on_n_dedent_str = "" elif Mode.has_code_fragment_list("on_n_dedent"): assert not Mode.has_code_fragment_list("on_dedent") on_n_dedent_str, eol_f = action_code_formatter.get_code( Mode.get_code_fragment_list("on_n_dedent")) on_dedent_str = "" else: # If no 'on_dedent' and no 'on_n_dedent' is defined ... on_dedent_str = "" on_n_dedent_str = "#if defined(QUEX_OPTION_TOKEN_REPETITION_SUPPORT)\n" on_n_dedent_str += " self_send_n(ClosedN, __QUEX_SETTING_TOKEN_ID_DEDENT);\n" on_n_dedent_str += "#else\n" on_n_dedent_str += " while( start-- != stack->back ) self_send(__QUEX_SETTING_TOKEN_ID_DEDENT);\n" on_n_dedent_str += "#endif\n" if not Mode.has_code_fragment_list("on_indentation_error"): # Default: Blow the program if there is an indentation error. on_indentation_error = 'QUEX_ERROR_EXIT("Lexical analyzer mode \'%s\': indentation error detected!\\n"' \ % Mode.name + \ ' "No \'on_indentation_error\' handler has been specified.\\n");' else: on_indentation_error, eol_f = action_code_formatter.get_code( Mode.get_code_fragment_list("on_indentation_error")) # Note: 'on_indentation_bad' is applied in code generation for # indentation counter in 'indentation_counter.py'. txt = blue_print( on_indentation_str, [["$$INDENT-PROCEDURE$$", on_indent_str], ["$$NODENT-PROCEDURE$$", on_nodent_str], ["$$DEDENT-PROCEDURE$$", on_dedent_str], ["$$N-DEDENT-PROCEDURE$$", on_n_dedent_str], ["$$INDENTATION-ERROR-PROCEDURE$$", on_indentation_error]]) return txt
def __terminal_states(SMD, action_db, DefaultAction, EndOfStreamAction, SupportBeginOfLineF, PreConditionIDList, LanguageDB): """NOTE: During backward-lexing, for a pre-condition, there is not need for terminal states, since only the flag 'pre-condition fulfilled is raised. """ assert SMD.__class__.__name__ == "StateMachineDecorator" sm = SMD.sm() PostConditionedStateMachineID_List = SMD.post_contexted_sm_id_list() DirectlyReachedTerminalID_List = SMD.directly_reached_terminal_id_list() # (*) specific terminal states of patterns (entered from acceptance states) txt = "" for state_machine_id, pattern_action_info in action_db.items(): txt += get_terminal_code(state_machine_id, SMD, pattern_action_info, SupportBeginOfLineF, LanguageDB) specific_terminal_states_str = txt # (*) general terminal state (entered from non-acceptance state) txt = "" for state_machine_id in action_db.keys(): txt += " case %s: " % repr(state_machine_id).replace("L", "") txt += LanguageDB["$goto"]("$terminal", state_machine_id) + "\n" jumps_to_acceptance_states_str = txt # (*) preparation of the reentry without return: # delete all pre-condition fullfilled flags txt = "" for pre_context_sm_id in PreConditionIDList: txt += " " + LanguageDB["$assignment"]("pre_context_%s_fulfilled_f" % __nice(pre_context_sm_id), 0) delete_pre_context_flags_str = txt # -- execute default pattern action # -- goto initial state end_of_stream_code_action_str = __adorn_action_code(EndOfStreamAction, SMD, SupportBeginOfLineF, IndentationOffset=16) # -- DEFAULT ACTION: Under 'normal' circumstances the default action is simply to be executed # since the 'get_forward()' incremented the 'current' pointer. # HOWEVER, when end of file has been reached the 'current' pointer has to # be reset so that the initial state can drop out on the buffer limit code # and then transit to the end of file action. # NOTE: It is possible that 'miss' happens after a chain of characters appeared. In any case the input # pointer must be setup right after the lexeme start. This way, the lexer becomes a new chance as # soon as possible. default_action_str = "me->buffer._input_p = me->buffer._lexeme_start_p;\n" default_action_str += LanguageDB["$if EOF"] + "\n" default_action_str += " " + LanguageDB["$comment"]("Next increment will stop on EOF character.") + "\n" default_action_str += LanguageDB["$endif"] + "\n" default_action_str += LanguageDB["$else"] + "\n" default_action_str += " " + LanguageDB["$comment"]("Step over nomatching character") + "\n" default_action_str += " " + LanguageDB["$input/increment"] + "\n" default_action_str += LanguageDB["$endif"] + "\n" default_action_str += __adorn_action_code(DefaultAction, SMD, SupportBeginOfLineF, IndentationOffset=16) # -- routing to states via switch statement # (note, the gcc computed goto is implement, too) txt = "" for state_index, state in sm.states.items(): if state.transitions().is_empty(): continue txt += " " txt += "case %i: " % int(state_index) + LanguageDB["$goto"]("$input", state_index) + "\n" if sm.core().pre_context_sm() != None: for state_index, state in sm.core().pre_context_sm().states.items(): if state.transitions().is_empty(): continue txt += " " txt += "case %i: " % int(state_index) + LanguageDB["$goto"]("$input", state_index) + "\n" switch_cases_drop_out_back_router_str = txt if PreConditionIDList == []: precondition_involved_f = "0" else: precondition_involved_f = "1" txt = blue_print(__terminal_state_str, [["$$JUMPS_TO_ACCEPTANCE_STATE$$", jumps_to_acceptance_states_str], ["$$SPECIFIC_TERMINAL_STATES$$", specific_terminal_states_str], ["$$DEFAULT_ACTION$$", default_action_str], ["$$END_OF_STREAM_ACTION$$", end_of_stream_code_action_str], ["$$TERMINAL_END_OF_STREAM-DEF$$", LanguageDB["$label-def"]("$terminal-EOF")], ["$$TERMINAL_DEFAULT-DEF$$", LanguageDB["$label-def"]("$terminal-DEFAULT")], ["$$TERMINAL_GENERAL-DEF$$", LanguageDB["$label-def"]("$terminal-general", False)], ["$$TERMINAL_DEFAULT-GOTO$$", LanguageDB["$goto"]("$terminal-DEFAULT")], ["$$STATE_MACHINE_NAME$$", SMD.name()], ["$$GOTO_START_PREPARATION$$", LanguageDB["$goto"]("$re-start")], ]) txt += blue_print(__on_continue_reentry_preparation_str, [["$$REENTRY_PREPARATION$$", LanguageDB["$label-def"]("$re-start")], ["$$DELETE_PRE_CONDITION_FULLFILLED_FLAGS$$", delete_pre_context_flags_str], ["$$GOTO_START$$", LanguageDB["$goto"]("$start")], ["$$COMMENT_ON_POST_CONTEXT_INITIALIZATION$$", comment_on_post_context_position_init_str], ]) return txt
def get_implementation_of_mode_functions(mode, Modes): """Writes constructors and mode transition functions. void quex::lexer::enter_EXAMPLE_MODE() { ... } where EXAMPLE_MODE is a lexer mode from the given lexer-modes, and 'quex::lexer' is the lexical analysis class. """ def __filter_out_inheritable_only(ModeNameList): result = [] for name in ModeNameList: for mode in Modes: if mode.name == name: if mode.options["inheritable"] != "only": result.append(name) break return result # (*) on enter on_entry_str = "# ifdef QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK\n" on_entry_str += " QUEX_NAME(%s).has_entry_from(FromMode);\n" % mode.name on_entry_str += "# endif\n" for code_info in mode.get_code_fragment_list("on_entry"): on_entry_str += code_info.get_code() if on_entry_str[-1] == "\n": on_entry_str = on_entry_str[:-1] # (*) on exit on_exit_str = "# ifdef QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK\n" on_exit_str += " QUEX_NAME(%s).has_exit_to(ToMode);\n" % mode.name on_exit_str += "# endif\n" for code_info in mode.get_code_fragment_list("on_exit"): on_exit_str += code_info.get_code() # (*) on indentation on_indentation_str = get_on_indentation_handler(mode) # (*) has base mode if mode.has_base_mode(): base_mode_list = __filter_out_inheritable_only(mode.get_base_mode_name_list()) has_base_mode_str = get_IsOneOfThoseCode(base_mode_list) else: has_base_mode_str = " return false;" # (*) has entry from try: entry_list = __filter_out_inheritable_only(mode.options["entry"]) has_entry_from_str = get_IsOneOfThoseCode(entry_list, __filter_out_inheritable_only(ConsiderDerivedClassesF=true)) # check whether the mode we come from is an allowed mode except: has_entry_from_str = " return true; /* default */" # (*) has exit to try: exit_list = __filter_out_inheritable_only(mode.options["exit"]) has_exit_to_str = get_IsOneOfThoseCode(exit_list, ConsiderDerivedClassesF=true) except: has_exit_to_str = " return true; /* default */" txt = blue_print(mode_function_implementation_str, [ ["$$ENTER-PROCEDURE$$", on_entry_str], ["$$EXIT-PROCEDURE$$", on_exit_str], # ["$$ON_INDENTATION-PROCEDURE$$", on_indentation_str], # ["$$HAS_BASE_MODE$$", has_base_mode_str], ["$$HAS_ENTRANCE_FROM$$", has_entry_from_str], ["$$HAS_EXIT_TO$$", has_exit_to_str], # ["$$MODE_NAME$$", mode.name], ]) return txt
def get_range_skipper(EndSequence, LanguageDB, MissingClosingDelimiterAction=""): assert EndSequence.__class__ == list assert len(EndSequence) >= 1 assert map(type, EndSequence) == [int] * len(EndSequence) # Name the $$SKIPPER$$ skipper_index = sm_index.get() # Determine the $$DELIMITER$$ delimiter_str = "" delimiter_comment_str = " Delimiter: " for letter in EndSequence: delimiter_comment_str += "'%s', " % utf8.map_unicode_to_utf8(letter) delimiter_str += "0x%X, " % letter delimiter_length_str = "%i" % len(EndSequence) delimiter_comment_str = LanguageDB["$comment"](delimiter_comment_str) # Determine the check for the tail of the delimiter delimiter_remainder_test_str = "" if len(EndSequence) != 1: txt = "" i = 0 for letter in EndSequence[1:]: i += 1 txt += " " + LanguageDB["$input/get-offset"](i - 1) + "\n" txt += " " + LanguageDB["$if !="]( "Skipper$$SKIPPER_INDEX$$[%i]" % i) txt += " " + LanguageDB["$goto"]("$entry", skipper_index) + "\n" txt += " " + LanguageDB["$endif"] delimiter_remainder_test_str = txt # The main part code_str = blue_print(range_skipper_template, [ ["$$DELIMITER$$", delimiter_str], ["$$DELIMITER_LENGTH$$", delimiter_length_str], ["$$DELIMITER_COMMENT$$", delimiter_comment_str], ["$$WHILE_1_PLUS_1_EQUAL_2$$", LanguageDB["$loop-start-endless"]], ["$$END_WHILE$$", LanguageDB["$loop-end"]], ["$$INPUT_P_INCREMENT$$", LanguageDB["$input/increment"]], ["$$INPUT_P_DECREMENT$$", LanguageDB["$input/decrement"]], ["$$INPUT_GET$$", LanguageDB["$input/get"]], [ "$$IF_INPUT_EQUAL_DELIMITER_0$$", LanguageDB["$if =="]("Skipper$$SKIPPER_INDEX$$[0]") ], ["$$BREAK$$", LanguageDB["$break"]], ["$$ENDIF$$", LanguageDB["$endif"]], ["$$ENTRY$$", LanguageDB["$label-def"]("$entry", skipper_index)], ["$$DROP_OUT$$", LanguageDB["$label-def"]("$drop-out", skipper_index)], ["$$GOTO_ENTRY$$", LanguageDB["$goto"]("$entry", skipper_index)], ["$$GOTO_REENTRY_PREPARATION$$", LanguageDB["$goto"]("$re-start")], ["$$MARK_LEXEME_START$$", LanguageDB["$mark-lexeme-start"]], ["$$DELIMITER_REMAINDER_TEST$$", delimiter_remainder_test_str], [ "$$SET_INPUT_P_BEHIND_DELIMITER$$", LanguageDB["$input/add"](len(EndSequence) - 1) ], ["$$MISSING_CLOSING_DELIMITER$$", MissingClosingDelimiterAction], ]) # Line and column number counting code_str = __range_skipper_lc_counting_replacements(code_str, EndSequence) # The finishing touch code_str = blue_print(code_str, [[ "$$SKIPPER_INDEX$$", __nice(skipper_index) ], ["$$GOTO_DROP_OUT$$", LanguageDB["$goto"]("$drop-out", skipper_index)]]) return code_str
def __terminal_states(SMD, action_db, OnFailureAction, EndOfStreamAction, SupportBeginOfLineF, PreConditionIDList, LanguageDB): """NOTE: During backward-lexing, for a pre-condition, there is not need for terminal states, since only the flag 'pre-condition fulfilled is raised. """ assert SMD.__class__.__name__ == "StateMachineDecorator" sm = SMD.sm() PostConditionedStateMachineID_List = SMD.post_contexted_sm_id_list() DirectlyReachedTerminalID_List = SMD.directly_reached_terminal_id_list() # (*) specific terminal states of patterns (entered from acceptance states) specific_terminal_states = [] for state_machine_id, pattern_action_info in action_db.items(): code = get_terminal_code(state_machine_id, SMD, pattern_action_info, SupportBeginOfLineF, LanguageDB) specific_terminal_states.extend(code) # If there is at least a single terminal, the the 're-entry' preparation must be accomplished if len(action_db) != 0: get_label("$re-start", U=True) # (*) preparation of the reentry without return: # delete all pre-condition fullfilled flags delete_pre_context_flags = [] for pre_context_sm_id in PreConditionIDList: delete_pre_context_flags.append(" ") delete_pre_context_flags.append(LanguageDB["$assignment"]( "pre_context_%s_fulfilled_f" % __nice(pre_context_sm_id), 0)) # -- execute 'on_failure' pattern action # -- goto initial state end_of_stream_code_action_str = __adorn_action_code( EndOfStreamAction, SMD, SupportBeginOfLineF) # -- FAILURE ACTION: Under 'normal' circumstances the on_failure action is simply to be executed # since the 'get_forward()' incremented the 'current' pointer. # HOWEVER, when end of file has been reached the 'current' pointer has to # be reset so that the initial state can drop out on the buffer limit code # and then transit to the end of file action. # NOTE: It is possible that 'miss' happens after a chain of characters appeared. In any case the input # pointer must be setup right after the lexeme start. This way, the lexer becomes a new chance as # soon as possible. on_failure = __terminal_on_failure_prolog(LanguageDB) msg = __adorn_action_code(OnFailureAction, SMD, SupportBeginOfLineF) on_failure.append(msg) if PreConditionIDList == []: precondition_involved_f = "0" else: precondition_involved_f = "1" prolog = __terminal_state_prolog router = Address( "$terminal-router", None, [ blue_print(__terminal_router_prolog_str, [ [ "$$RESTORE_LAST_ACCEPTANCE_POS$$", LanguageDB["$input/seek_position"] ("last_acceptance_input_position") ], [ "$$TERMINAL_FAILURE-REF$$", "QUEX_LABEL(%i)" % get_address("$terminal-FAILURE") ], ["$$TERMINAL_FAILURE$$", get_label("$terminal-FAILURE")], ]), # DO NOT 'U=True' for the state router. This is done automatically if # 'goto reload' is used. get_label("$state-router"), ";", __terminal_router_epilog_str, ]) epilog = blue_print(__terminal_state_epilog, [ ["$$FAILURE_ACTION$$", "".join(on_failure)], ["$$END_OF_STREAM_ACTION$$", end_of_stream_code_action_str], ["$$TERMINAL_END_OF_STREAM-DEF$$", get_label("$terminal-EOF")], ["$$TERMINAL_FAILURE-DEF$$", get_label("$terminal-FAILURE")], ["$$STATE_MACHINE_NAME$$", SMD.name()], ["$$GOTO_START_PREPARATION$$", get_label("$re-start", U=True)], ]) reentry_preparation = blue_print(__on_continue_reentry_preparation_str, [ ["$$REENTRY_PREPARATION$$", get_label("$re-start")], [ "$$DELETE_PRE_CONDITION_FULLFILLED_FLAGS$$", "".join(delete_pre_context_flags) ], ["$$GOTO_START$$", get_label("$start", U=True)], [ "$$COMMENT_ON_POST_CONTEXT_INITIALIZATION$$", comment_on_post_context_position_init_str ], [ "$$TERMINAL_FAILURE-REF$$", "QUEX_LABEL(%i)" % get_address("$terminal-FAILURE") ], ]) txt = [] txt.append(router) txt.append(prolog) txt.extend(specific_terminal_states) txt.append(epilog) txt.append(reentry_preparation) return txt
def get_skipper(EndSequence, Mode=None, IndentationCounterTerminalID=None, OnSkipRangeOpenStr=""): assert type(EndSequence) == list assert len(EndSequence) >= 1 assert map(type, EndSequence) == [int] * len(EndSequence) local_variable_db = {} global template_str LanguageDB = Setup.language_db # Name the $$SKIPPER$$ skipper_index = sm_index.get() # Determine the $$DELIMITER$$ delimiter_str, delimiter_length_str, delimiter_comment_str = get_character_sequence(EndSequence) delimiter_comment_str = LanguageDB["$comment"](" Delimiter: " + delimiter_comment_str) # Determine the check for the tail of the delimiter delimiter_remainder_test_str = "" if len(EndSequence) != 1: txt = "" i = 0 for letter in EndSequence[1:]: i += 1 txt += " " + LanguageDB["$input/get-offset"](i - 1) + "\n" txt += " " + LanguageDB["$if !="]("Skipper$$SKIPPER_INDEX$$[%i]" % i) txt += " goto %s;" % get_label("$entry", skipper_index, U=True) txt += " " + LanguageDB["$endif"] delimiter_remainder_test_str = txt if not end_delimiter_is_subset_of_indentation_counter_newline(Mode, EndSequence): goto_after_end_of_skipping_str = "goto %s;" % get_label("$start", U=True) else: # If there is indentation counting involved, then the counter's terminal id must # be determined at this place. assert IndentationCounterTerminalID != None # If the ending delimiter is a subset of what the 'newline' pattern triggers # in indentation counting => move on to the indentation counter. goto_after_end_of_skipping_str = "goto %s;" % get_label( "$terminal-direct", IndentationCounterTerminalID, U=True ) if OnSkipRangeOpenStr != "": on_skip_range_open_str = OnSkipRangeOpenStr else: on_skip_range_open_str = get_on_skip_range_open(Mode, EndSequence) # The main part code_str = blue_print( template_str, [ ["$$DELIMITER$$", delimiter_str], ["$$DELIMITER_LENGTH$$", delimiter_length_str], ["$$DELIMITER_COMMENT$$", delimiter_comment_str], ["$$WHILE_1_PLUS_1_EQUAL_2$$", LanguageDB["$loop-start-endless"]], ["$$END_WHILE$$", LanguageDB["$loop-end"]], ["$$INPUT_P_INCREMENT$$", LanguageDB["$input/increment"]], ["$$INPUT_P_DECREMENT$$", LanguageDB["$input/decrement"]], ["$$INPUT_GET$$", LanguageDB["$input/get"]], ["$$IF_INPUT_EQUAL_DELIMITER_0$$", LanguageDB["$if =="]("Skipper$$SKIPPER_INDEX$$[0]")], ["$$ENDIF$$", LanguageDB["$endif"]], ["$$ENTRY$$", get_label("$entry", skipper_index)], ["$$RELOAD$$", get_label("$reload", skipper_index)], ["$$GOTO_ENTRY$$", get_label("$entry", skipper_index, U=True)], # When things were skipped, no change to acceptance flags or modes has # happend. One can jump immediately to the start without re-entry preparation. ["$$GOTO_AFTER_END_OF_SKIPPING$$", goto_after_end_of_skipping_str], ["$$MARK_LEXEME_START$$", LanguageDB["$mark-lexeme-start"]], ["$$DELIMITER_REMAINDER_TEST$$", delimiter_remainder_test_str], ["$$ON_SKIP_RANGE_OPEN$$", on_skip_range_open_str], ], ) # Line and column number counting code_str, reference_p_f = __lc_counting_replacements(code_str, EndSequence) # The finishing touch code_str = blue_print( code_str, [["$$SKIPPER_INDEX$$", __nice(skipper_index)], ["$$GOTO_RELOAD$$", get_label("$reload", skipper_index)]], ) if reference_p_f: local_variable_db["QUEX_OPTION_COLUMN_NUMBER_COUNTING/reference_p"] = Variable( "reference_p", "QUEX_TYPE_CHARACTER_POSITION", None, "(QUEX_TYPE_CHARACTER_POSITION)0x0", "QUEX_OPTION_COLUMN_NUMBER_COUNTING", ) return code_str, local_variable_db
def get_skipper(TriggerSet): """This function implements simple 'skipping' in the sense of passing by characters that belong to a given set of characters--the TriggerSet. """ global template_str assert TriggerSet.__class__.__name__ == "NumberSet" assert not TriggerSet.is_empty() LanguageDB = Setup.language_db skipper_index = sm_index.get() # Mini trigger map: [ trigger set ] --> loop start # That means: As long as characters of the trigger set appear, we go to the loop start. transition_map = TransitionMap() # (don't worry about 'drop-out-ranges' etc.) transition_map.add_transition(TriggerSet, skipper_index) # On buffer limit code, the skipper must transit to a dedicated reloader iteration_code = transition_block.do(transition_map.get_trigger_map(), skipper_index, DSM=None, GotoReload_Str="goto %s;" % get_label("$reload", skipper_index)) comment_str = LanguageDB["$comment"]("Skip any character in " + TriggerSet.get_utf8_string()) # Line and column number counting prolog = __lc_counting_replacements(prolog_txt, TriggerSet) epilog = __lc_counting_replacements(epilog_txt, TriggerSet) prolog = blue_print(prolog, [ ["$$DELIMITER_COMMENT$$", comment_str], ["$$SKIPPER_INDEX$$", "%i" % skipper_index], ["$$INPUT_GET$$", LanguageDB["$input/get"]], ]) epilog = blue_print(epilog, [ ["$$INPUT_P_INCREMENT$$", LanguageDB["$input/increment"]], ["$$INPUT_P_DECREMENT$$", LanguageDB["$input/decrement"]], ["$$IF_INPUT_EQUAL_DELIMITER_0$$", LanguageDB["$if =="]("SkipDelimiter$$SKIPPER_INDEX$$[0]")], ["$$ENDIF$$", LanguageDB["$endif"]], ["$$LOOP_REENTRANCE$$", get_label("$entry", skipper_index)], ["$$INPUT_EQUAL_BUFFER_LIMIT_CODE$$", LanguageDB["$BLC"]], ["$$RELOAD$$", get_label("$reload", skipper_index)], ["$$DROP_OUT_DIRECT$$", get_label("$drop-out", skipper_index, U=True)], ["$$SKIPPER_INDEX$$", "%i" % skipper_index], ["$$GOTO_TERMINAL_EOF$$", get_label("$terminal-EOF", U=True)], # When things were skipped, no change to acceptance flags or modes has # happend. One can jump immediately to the start without re-entry preparation. ["$$GOTO_START$$", get_label("$start", U=True)], ["$$MARK_LEXEME_START$$", LanguageDB["$mark-lexeme-start"]], ]) code = [ prolog ] code.extend(iteration_code) code.append(epilog) local_variable_db = {} local_variable_db["QUEX_OPTION_COLUMN_NUMBER_COUNTING/reference_p"] = \ Variable("reference_p", "QUEX_TYPE_CHARACTER_POSITION", None, "(QUEX_TYPE_CHARACTER_POSITION)0x0", "QUEX_OPTION_COLUMN_NUMBER_COUNTING") return code, local_variable_db
def __lc_counting_replacements(code_str, EndSequence): """Line and Column Number Counting(Range Skipper): -- in loop if there appears a newline, then do: increment line_n set position from where to count column_n -- at end of skipping do one of the following: if end delimiter contains newline: column_n = number of letters since last new line in end delimiter increment line_n by number of newlines in end delimiter. (NOTE: in this case the setting of the position from where to count the column_n can be omitted.) else: column_n = current_position - position from where to count column number. NOTE: On reload we do count the column numbers and reset the column_p. """ LanguageDB = Setup.language_db def get_character_n_after_last_newline(Sequence): tmp = copy(Sequence) tmp.reverse() try: return tmp.index(ord('\n')) except: return -1 char_n_after_last_newline = get_character_n_after_last_newline(EndSequence) reference_p_def = "" in_loop = "" end_procedure = "" exit_loop = "" before_reload = "" after_reload = "" exit_loop = " " + LanguageDB["$break"] on_first_delimiter = "" reference_p_required_f = False # Line/Column Counting: newline_number_in_delimiter = EndSequence.count(ord('\n')) if EndSequence == map(ord, "\n") or EndSequence == map(ord, "\r\n"): # (1) If the end-delimiter is a newline # => there cannot appear a newline inside the comment # => IN LOOP: no line number increment # no reference pointer required for column counting end_procedure += " __QUEX_IF_COUNT_COLUMNS_SET((size_t)1);\n" end_procedure += " __QUEX_IF_COUNT_LINES_ADD((size_t)1);\n" else: # (2) If end-delimiter is NOT newline # => there can appear a newline inside the comment if newline_number_in_delimiter == 0: # -- no newlines in delimiter => line and column number # must be counted. in_loop = line_column_counter_in_loop end_procedure = " __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer)\n" + \ " - reference_p));\n" reference_p_required_f = True else: # -- newline inside delimiter => line number must be counted # column number is fixed. end_procedure = " __QUEX_IF_COUNT_COLUMNS_SET((size_t)%i);\n" \ % (char_n_after_last_newline + 1) if EndSequence[0] == ord('\n') \ or len(EndSequence) > 1 and EndSequence[0:2] == [ord('\r'), ord('\n')]: # If the first character in the sequence is newline, then the line counting # may is prevented by the loop exit. Now, we need to count. on_first_delimiter = "/* First delimiter char was a newline */\n" + \ " __QUEX_IF_COUNT_LINES_ADD((size_t)1);\n" end_procedure += " __QUEX_IF_COUNT_LINES_ADD((size_t)%i);\n" % (newline_number_in_delimiter - 1) else: in_loop = line_counter_in_loop end_procedure += " __QUEX_IF_COUNT_LINES_ADD((size_t)%i);\n" % newline_number_in_delimiter if reference_p_required_f: reference_p_def = " __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n" before_reload = " __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer)\n" + \ " - reference_p));\n" after_reload = " __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n" if len(EndSequence) > 1: end_procedure = LanguageDB["$input/add"](len(EndSequence)-1) + \ "\n" + end_procedure return blue_print(code_str, [["$$LC_COUNT_COLUMN_N_POINTER_DEFINITION$$", reference_p_def], ["$$LC_COUNT_IN_LOOP$$", in_loop], ["$$LC_COUNT_END_PROCEDURE$$", end_procedure], ["$$LC_COUNT_BEFORE_RELOAD$$", before_reload], ["$$LC_COUNT_AFTER_RELOAD$$", after_reload], ["$$LC_ON_FIRST_DELIMITER$$", on_first_delimiter], ["$$LC_COUNT_LOOP_EXIT$$", exit_loop], ]), \ reference_p_required_f
def output(global_setup): global file_str assert lexer_mode.token_id_db.has_key("TERMINATION"), \ "TERMINATION token id must be defined by setup or user." assert lexer_mode.token_id_db.has_key("UNINITIALIZED"), \ "UNINITIALIZED token id must be defined by setup or user." # (*) Token ID File ________________________________________________________________ # # The token id file can either be specified as database of # token-id names, or as a file that directly assigns the token-ids # to variables. If the flag '--user-token-id-file' is defined, then # then the token-id file is provided by the user. Otherwise, the # token id file is created by the token-id maker. # # The token id maker considers the file passed by the option '-t' # as the database file and creates a C++ file with the output filestem # plus the suffix "--token-ids". Note, that the token id file is a # header file. # setup = Setup(global_setup) if len(lexer_mode.token_id_db.keys()) == 2: # TERMINATION + UNINITIALIZED = 2 token ids. If they are the only ones nothing can be done. print "error: No token id other than %sTERMINATION and %sUNINITIALIZED are defined. " % \ (setup.token_prefix, setup.token_prefix) print "error: Quex refuses to proceed. Please, use the 'token { ... }' section to " print "error: specify at least one other token id." sys.exit(-1) if global_setup.input_user_token_id_file != "": ## print "(0) token ids provided by user" ## print " '%s'" % global_setup.input_user_token_id_file global_setup.output_token_id_file = global_setup.input_user_token_id_file return if global_setup.input_token_id_db == "": print "error: token-id database not specified" sys.exit(-1) ## print " token class file = '%s'" % global_setup.input_token_class_file ## print " => '%s'" % global_setup.output_token_id_file #______________________________________________________________________________________ L = max(map(lambda name: len(name), lexer_mode.token_id_db.keys())) def space(Name): return " " * (L - len(Name)) # -- define values for the token ids # NO LONGER: token_id_txt = "namespace quex {\n" token_id_txt = "" if setup.input_foreign_token_id_file != "": token_id_txt += "#include\"%s\"\n" % setup.input_foreign_token_id_file else: token_names = lexer_mode.token_id_db.keys() token_names.sort() i = setup.id_count_offset for token_name in token_names: token_info = lexer_mode.token_id_db[token_name] if token_info.number == None: token_info.number = i; i+= 1 token_id_txt += "#define %s%s %s((QUEX_TOKEN_ID_TYPE)%i)\n" % (setup.token_prefix, token_name, space(token_name), token_info.number) # NO LONGER: token_id_txt += "} // namespace quex\n" # -- define the function for token names db_build_txt = "" for token_name in lexer_mode.token_id_db.keys(): db_build_txt += '\n db[%s%s] %s= std::string("%s");' % (setup.token_prefix, token_name, space(token_name), token_name) t = time.localtime() date_str = "%iy%im%id_%ih%02im%02is" % (t[0], t[1], t[2], t[3], t[4], t[5]) file_str = file_str.replace("$$CONTENT$$", func_str) content = blue_print(file_str, [["$$TOKEN_ID_DEFINITIONS$$", token_id_txt], ["$$DATE$$", time.asctime()], ["$$TOKEN_CLASS_DEFINITION_FILE$$", setup.token_class_file], ["$$DATE_IG$$", date_str], ["$$TOKEN_ID_CASES$$", db_build_txt], ["$$TOKEN_PREFIX$$", setup.token_prefix], ["$$TOKEN_CLASS$$", setup.token_class]]) fh = open(setup.output_file, "wb") if os.linesep != "\n": content = content.replace("\n", os.linesep) fh.write(content) fh.close()
def output(global_setup): global file_str assert lexer_mode.token_id_db.has_key("TERMINATION"), \ "TERMINATION token id must be defined by setup or user." assert lexer_mode.token_id_db.has_key("UNINITIALIZED"), \ "UNINITIALIZED token id must be defined by setup or user." # (*) Token ID File ________________________________________________________________ # # The token id file can either be specified as database of # token-id names, or as a file that directly assigns the token-ids # to variables. If the flag '--user-token-id-file' is defined, then # then the token-id file is provided by the user. Otherwise, the # token id file is created by the token-id maker. # # The token id maker considers the file passed by the option '-t' # as the database file and creates a C++ file with the output filestem # plus the suffix "--token-ids". Note, that the token id file is a # header file. # setup = Setup(global_setup) if len(lexer_mode.token_id_db.keys()) == 2: # TERMINATION + UNINITIALIZED = 2 token ids. If they are the only ones nothing can be done. print "error: No token id other than %sTERMINATION and %sUNINITIALIZED are defined. " % \ (setup.token_prefix, setup.token_prefix) print "error: Quex refuses to proceed. Please, use the 'token { ... }' section to " print "error: specify at least one other token id." sys.exit(-1) if global_setup.input_user_token_id_file != "": ## print "(0) token ids provided by user" ## print " '%s'" % global_setup.input_user_token_id_file global_setup.output_token_id_file = global_setup.input_user_token_id_file return if global_setup.input_token_id_db == "": print "error: token-id database not specified" sys.exit(-1) ## print " token class file = '%s'" % global_setup.input_token_class_file ## print " => '%s'" % global_setup.output_token_id_file #______________________________________________________________________________________ L = max(map(lambda name: len(name), lexer_mode.token_id_db.keys())) def space(Name): return " " * (L - len(Name)) # -- define values for the token ids # NO LONGER: token_id_txt = "namespace quex {\n" token_id_txt = "" if setup.input_foreign_token_id_file != "": token_id_txt += "#include\"%s\"\n" % setup.input_foreign_token_id_file else: token_names = lexer_mode.token_id_db.keys() token_names.sort() i = setup.id_count_offset for token_name in token_names: token_info = lexer_mode.token_id_db[token_name] if token_info.number == None: token_info.number = i i += 1 token_id_txt += "#define %s%s %s((QUEX_TOKEN_ID_TYPE)%i)\n" % ( setup.token_prefix, token_name, space(token_name), token_info.number) # NO LONGER: token_id_txt += "} // namespace quex\n" # -- define the function for token names db_build_txt = "" for token_name in lexer_mode.token_id_db.keys(): db_build_txt += '\n db[%s%s] %s= std::string("%s");' % ( setup.token_prefix, token_name, space(token_name), token_name) t = time.localtime() date_str = "%iy%im%id_%ih%02im%02is" % (t[0], t[1], t[2], t[3], t[4], t[5]) file_str = file_str.replace("$$CONTENT$$", func_str) content = blue_print( file_str, [["$$TOKEN_ID_DEFINITIONS$$", token_id_txt], ["$$DATE$$", time.asctime()], ["$$TOKEN_CLASS_DEFINITION_FILE$$", setup.token_class_file], ["$$DATE_IG$$", date_str], ["$$TOKEN_ID_CASES$$", db_build_txt], ["$$TOKEN_PREFIX$$", setup.token_prefix], ["$$TOKEN_CLASS$$", setup.token_class]]) fh = open(setup.output_file, "wb") if os.linesep != "\n": content = content.replace("\n", os.linesep) fh.write(content) fh.close()
def write_engine_header(Modes, Setup): QuexClassHeaderFileTemplate = (Setup.QUEX_TEMPLATE_DB_DIR + "/template/lexical_analyzer_class").replace( "//", "/") CoreEngineDefinitionsHeader = (Setup.QUEX_TEMPLATE_DB_DIR + "/core_engine/").replace("//", "/") QuexClassHeaderFileOutput = Setup.output_file_stem LexerClassName = Setup.output_engine_name VersionID = Setup.input_application_version_id QuexVersionID = Setup.QUEX_VERSION # -- determine character type according to number of bytes per ucs character code point # for the internal engine. quex_character_type_str = { 1: "uint8_t ", 2: "uint16_t", 4: "uint32_t", "wchar_t": "wchar_t" }[Setup.bytes_per_ucs_code_point] quex_lexeme_type_str = { 1: "char ", 2: "int16_t", 4: "int32_t", "wchar_t": "wchar_t" }[Setup.bytes_per_ucs_code_point] # are bytes of integers Setup 'little endian' or 'big endian' ? if Setup.byte_order == "little": quex_coding_name_str = { 1: "ASCII", 2: "UCS-2LE", 4: "UCS-4LE", "wchar_t": "WCHAR_T" }[Setup.bytes_per_ucs_code_point] else: quex_coding_name_str = { 1: "ASCII", 2: "UCS-2BE", 4: "UCS-4BE", "wchar_t": "WCHAR_T" }[Setup.bytes_per_ucs_code_point] # -- determine whether the lexical analyser needs indentation counting # support. if one mode has an indentation handler, than indentation # support must be provided. indentation_support_f = False for mode in Modes.values(): if mode.on_indentation.get_code() != "": indentation_support_f = True break lex_id_definitions_str = "" # NOTE: First mode-id needs to be '1' for compatibility with flex generated engines i = 0 for name in Modes.keys(): i += 1 lex_id_definitions_str += "const int LEX_ID_%s = %i;\n" % (name, i) include_guard_extension = get_include_guard_extension( Setup.output_file_stem) # -- instances of mode classes as members of the lexer mode_object_members_txt, \ constructor_txt, \ mode_specific_functions_txt, \ friend_txt = \ get_mode_class_related_code_fragments(Modes.values(), LexerClassName) # -- define a pointer that directly has the type of the derived class if Setup.input_derived_class_name == "": Setup.input_derived_class_name = LexerClassName derived_class_type_declaration = "" else: derived_class_type_declaration = "class %s;" % Setup.input_derived_class_name # -- the friends of the class friends_str = "" for friend in Setup.input_lexer_class_friends: friends_str += " friend class %s;\n" % friend # -- the class body extension class_body_extension_str = lexer_mode.class_body.get_code() # -- the class constructor extension class_constructor_extension_str = lexer_mode.class_init.get_code() fh = open_file_or_die(QuexClassHeaderFileTemplate) template_code_txt = fh.read() fh.close() # -- check if exit/entry handlers have to be active entry_handler_active_f = False exit_handler_active_f = False for mode in Modes.values(): if mode.on_entry_code_fragments() != []: entry_handler_active_f = True if mode.on_exit_code_fragments() != []: exit_handler_active_f = True txt = template_code_txt def set_switch(txt, SwitchF, Name): if SwitchF: txt = txt.replace("$$SWITCH$$ %s" % Name, "#define %s" % Name) else: txt = txt.replace("$$SWITCH$$ %s" % Name, "// #define %s" % Name) return txt txt = set_switch(txt, entry_handler_active_f, "__QUEX_OPTION_ON_ENTRY_HANDLER_PRESENT") txt = set_switch(txt, exit_handler_active_f, "__QUEX_OPTION_ON_EXIT_HANDLER_PRESENT") txt = set_switch(txt, indentation_support_f, "__QUEX_OPTION_INDENTATION_TRIGGER_SUPPORT") txt = set_switch(txt, True, "__QUEX_OPTION_SUPPORT_BEGIN_OF_LINE_PRE_CONDITION") txt = set_switch(txt, Setup.enable_iconv_f, "QUEX_OPTION_ENABLE_ICONV") txt = set_switch(txt, not Setup.disable_token_queue_f, "QUEX_OPTION_TOKEN_SENDING_VIA_QUEUE") txt = set_switch(txt, not Setup.disable_string_accumulator_f, "QUEX_OPTION_STRING_ACCUMULATOR") txt = set_switch(txt, Setup.post_categorizer_f, "QUEX_OPTION_POST_CATEGORIZER") txt = set_switch(txt, True, "QUEX_OPTION_VIRTUAL_FUNCTION_ON_ACTION_ENTRY") txt = set_switch(txt, True, "QUEX_OPTION_LINE_NUMBER_COUNTING") txt = set_switch(txt, True, "QUEX_OPTION_COLUMN_NUMBER_COUNTING") txt = set_switch(txt, Setup.output_debug_f, "QUEX_OPTION_DEBUG_TOKEN_SENDING") txt = set_switch(txt, Setup.output_debug_f, "QUEX_OPTION_DEBUG_MODE_TRANSITIONS") txt = set_switch(txt, Setup.output_debug_f, "QUEX_OPTION_DEBUG_QUEX_PATTERN_MATCHES") txt = set_switch(txt, True, "QUEX_OPTION_INCLUDE_STACK_SUPPORT") txt = set_switch(txt, not Setup.no_mode_transition_check_f, "QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK") txt = blue_print(txt, [ ["$$BUFFER_LIMIT_CODE$$", "0x%X" % Setup.buffer_limit_code], ["$$CONSTRUCTOR_EXTENSTION$$", class_constructor_extension_str], ["$$CONSTRUCTOR_MODE_DB_INITIALIZATION_CODE$$", constructor_txt], ["$$CORE_ENGINE_DEFINITIONS_HEADER$$", CoreEngineDefinitionsHeader], ["$$CLASS_BODY_EXTENSION$$", class_body_extension_str], ["$$INCLUDE_GUARD_EXTENSION$$", include_guard_extension], [ "$$INITIAL_LEXER_MODE_ID$$", "LEX_ID_" + lexer_mode.initial_mode.get_code() ], ["$$LEXER_BUILD_DATE$$", time.asctime()], ["$$LEXER_BUILD_VERSION$$", VersionID], ["$$LEXER_CLASS_FRIENDS$$", friends_str], ["$$LEXER_CLASS_NAME$$", LexerClassName], ["$$LEXER_DERIVED_CLASS_DECL$$", derived_class_type_declaration], ["$$LEXER_DERIVED_CLASS_NAME$$", Setup.input_derived_class_name], ["$$LEX_ID_DEFINITIONS$$", lex_id_definitions_str], ["$$MAX_MODE_CLASS_N$$", repr(len(Modes))], ["$$MODE_CLASS_FRIENDS$$", friend_txt], ["$$MODE_OBJECT_MEMBERS$$", mode_object_members_txt], ["$$MODE_SPECIFIC_ANALYSER_FUNCTIONS$$", mode_specific_functions_txt], [ "$$PRETTY_INDENTATION$$", " " + " " * (len(LexerClassName) * 2 + 2) ], ["$$QUEX_TEMPLATE_DIR$$", Setup.QUEX_TEMPLATE_DB_DIR], ["$$QUEX_VERSION$$", QuexVersionID], ["$$TOKEN_CLASS$$", Setup.input_token_class_name], [ "$$TOKEN_CLASS_DEFINITION_FILE$$", Setup.input_token_class_file.replace("//", "/") ], [ "$$TOKEN_ID_DEFINITION_FILE$$", Setup.output_token_id_file.replace("//", "/") ], ["$$QUEX_CHARACTER_TYPE$$", quex_character_type_str], ["$$QUEX_LEXEME_TYPE$$", quex_lexeme_type_str], ["$$CORE_ENGINE_CHARACTER_CODING$$", quex_coding_name_str], ["$$USER_DEFINED_HEADER$$", lexer_mode.header.get_code() + "\n"], ]) fh_out = open(QuexClassHeaderFileOutput, "wb") if os.linesep != "\n": txt = txt.replace("\n", os.linesep) fh_out.write(txt) fh_out.close()
def _do(Descr): # The following things must be ensured before the function is called assert Descr != None assert Descr.__class__.__name__ == "TokenTypeDescriptor" ## ALLOW: Descr.get_member_db().keys() == [] TemplateFile = QUEX_PATH \ + Setup.language_db["$code_base"] \ + Setup.language_db["$token_template_file"] TemplateIFile = QUEX_PATH \ + Setup.language_db["$code_base"] \ + Setup.language_db["$token_template_i_file"] template_str = open_file_or_die(TemplateFile, Mode="rb").read() template_i_str = open_file_or_die(TemplateIFile, Mode="rb").read() virtual_destructor_str = "" if Descr.open_for_derivation_f: virtual_destructor_str = "virtual " if Descr.copy.get_pure_code() == "": # Default copy operation: Plain Copy of token memory copy_str = "__QUEX_STD_memcpy((void*)__this, (void*)__That, sizeof(QUEX_TYPE_TOKEN));\n" else: copy_str = Descr.copy.get_code() take_text_str = Descr.take_text.get_code() if take_text_str == "": take_text_str = "return true;\n" include_guard_extension_str = get_include_guard_extension( Setup.language_db["$namespace-ref"](Descr.name_space) + "__" + Descr.class_name) # In case of plain 'C' the class name must incorporate the namespace (list) token_class_name = Descr.class_name if Setup.language == "C": token_class_name = Setup.token_class_name_safe txt = blue_print(template_str, [ ["$$BODY$$", Descr.body.get_code()], ["$$CONSTRUCTOR$$", Descr.constructor.get_code()], ["$$COPY$$", copy_str], ["$$DESTRUCTOR$$", Descr.destructor.get_code()], ["$$DISTINCT_MEMBERS$$", get_distinct_members(Descr)], ["$$FOOTER$$", Descr.footer.get_code()], ["$$FUNC_TAKE_TEXT$$", take_text_str], ["$$HEADER$$", Descr.header.get_code()], ["$$INCLUDE_GUARD_EXTENSION$$", include_guard_extension_str], ["$$NAMESPACE_CLOSE$$", Setup.language_db["$namespace-close"](Descr.name_space)], ["$$NAMESPACE_OPEN$$", Setup.language_db["$namespace-open"](Descr.name_space)], ["$$QUICK_SETTERS$$", get_quick_setters(Descr)], ["$$SETTERS_GETTERS$$", get_setter_getter(Descr)], ["$$TOKEN_CLASS$$", token_class_name], ["$$TOKEN_REPETITION_N_GET$$", Descr.repetition_get.get_code()], ["$$TOKEN_REPETITION_N_SET$$", Descr.repetition_set.get_code()], ["$$UNION_MEMBERS$$", get_union_members(Descr)], ["$$VIRTUAL_DESTRUCTOR$$", virtual_destructor_str], ]) txt_i = blue_print(template_i_str, [ ["$$CONSTRUCTOR$$", Descr.constructor.get_code()], ["$$COPY$$", copy_str], ["$$DESTRUCTOR$$", Descr.destructor.get_code()], ["$$FOOTER$$", Descr.footer.get_code()], ["$$FUNC_TAKE_TEXT$$", take_text_str], ["$$INCLUDE_GUARD_EXTENSION$$", include_guard_extension_str], ["$$NAMESPACE_CLOSE$$", Setup.language_db["$namespace-close"](Descr.name_space)], ["$$NAMESPACE_OPEN$$", Setup.language_db["$namespace-open"](Descr.name_space)], ["$$TOKEN_CLASS$$", token_class_name], ["$$TOKEN_REPETITION_N_GET$$", Descr.repetition_get.get_code()], ["$$TOKEN_REPETITION_N_SET$$", Descr.repetition_set.get_code()], ]) # Return declaration and implementation as two strings return txt, txt_i