Exemplo n.º 1
0
def do_implementation(ModeDB):

    FileTemplate = os.path.normpath(QUEX_PATH +
                                    Setup.language_db["$code_base"] +
                                    "/analyzer/TXT-Cpp.i")
    func_txt = get_file_content_or_die(FileTemplate)

    func_txt = blue_print(func_txt, [
        [
            "$$CONSTRUCTOR_EXTENSTION$$",
            blackboard.class_constructor_extension.get_code()
        ],
        [
            "$$CONVERTER_HELPER_I$$",
            Setup.get_file_reference(Setup.output_buffer_codec_header_i)
        ],
        [
            "$$CONSTRUCTOR_MODE_DB_INITIALIZATION_CODE$$",
            get_constructor_code(ModeDB.values())
        ],
        [
            "$$MEMENTO_EXTENSIONS_PACK$$",
            blackboard.memento_pack_extension.get_code()
        ],
        [
            "$$MEMENTO_EXTENSIONS_UNPACK$$",
            blackboard.memento_unpack_extension.get_code()
        ],
    ])
    return func_txt
Exemplo n.º 2
0
def _do(UnicodeTrafoInfo):
    """
    PURPOSE: Writes converters for conversion towards UTF8/UTF16/UCS2/UCS4.

    UnicodeTrafoInfo:

       Provides the information about the relation of character codes in a particular 
       coding to unicode character codes. It is provided in the following form:

       # Codec Values                 Unicode Values
       [ (Source0_Begin, Source0_End, TargetInterval0_Begin), 
         (Source1_Begin, Source1_End, TargetInterval1_Begin),
         (Source2_Begin, Source2_End, TargetInterval2_Begin), 
         ... 
       ]
    """
    codec_name = make_safe_identifier(UnicodeTrafoInfo.name).lower()
    utf8_epilog,  utf8_function_body  = ConverterWriterUTF8().do(UnicodeTrafoInfo)
    utf16_prolog, utf16_function_body = ConverterWriterUTF16().do(UnicodeTrafoInfo)
    dummy,        utf32_function_body = ConverterWriterUTF32().do(UnicodeTrafoInfo)

    # Provide only the constant which are necessary
    FileName = os.path.normpath(  QUEX_PATH
                                + Lng["$code_base"] 
                                + "/converter_helper/TXT-from-codec-buffer.i")
    codec_header = Setup.get_file_reference(Setup.output_buffer_codec_header)

    txt_i = blue_print(get_file_content_or_die(FileName), 
                       [["$$CODEC$$",        codec_name],
                        ["$$EPILOG$$",       utf8_epilog],
                        ["$$CODEC_HEADER$$", codec_header],
                        ["$$BODY_UTF8$$",    utf8_function_body],
                        ["$$BODY_UTF16$$",   utf16_function_body],
                        ["$$BODY_UTF32$$",   utf32_function_body]])

    # A separate declaration header is required
    FileName = os.path.normpath(  QUEX_PATH
                                + Lng["$code_base"] 
                                + "/converter_helper/TXT-from-codec-buffer")
    template_h_txt = get_file_content_or_die(FileName)
    txt_h = template_h_txt.replace("$$CODEC$$", codec_name)
    return txt_h, txt_i
Exemplo n.º 3
0
def _do(UnicodeTrafoInfo, CodecName):
    """
    PURPOSE: Writes converters for conversion towards UTF8/UTF16/UCS2/UCS4.

    UnicodeTrafoInfo:

       Provides the information about the relation of character codes in a particular 
       coding to unicode character codes. It is provided in the following form:

       # Codec Values                 Unicode Values
       [ (Source0_Begin, Source0_End, TargetInterval0_Begin), 
         (Source1_Begin, Source1_End, TargetInterval1_Begin),
         (Source2_Begin, Source2_End, TargetInterval2_Begin), 
         ... 
       ]
    """
    codec_name = make_safe_identifier(CodecName).lower()
    utf8_epilog, utf8_function_body = ConverterWriterUTF8().do(
        UnicodeTrafoInfo)
    utf16_prolog, utf16_function_body = ConverterWriterUTF16().do(
        UnicodeTrafoInfo)
    dummy, utf32_function_body = ConverterWriterUTF32().do(UnicodeTrafoInfo)

    # Provide only the constant which are necessary
    FileName = os.path.normpath(QUEX_PATH + Setup.language_db["$code_base"] +
                                "/converter_helper/TXT-from-codec-buffer.i")
    codec_header = Setup.get_file_reference(Setup.output_buffer_codec_header)

    txt_i = blue_print(get_file_content_or_die(FileName),
                       [["$$CODEC$$", codec_name], ["$$EPILOG$$", utf8_epilog],
                        ["$$CODEC_HEADER$$", codec_header],
                        ["$$BODY_UTF8$$", utf8_function_body],
                        ["$$BODY_UTF16$$", utf16_function_body],
                        ["$$BODY_UTF32$$", utf32_function_body]])

    # A separate declaration header is required
    FileName = os.path.normpath(QUEX_PATH + Setup.language_db["$code_base"] +
                                "/converter_helper/TXT-from-codec-buffer")
    template_h_txt = get_file_content_or_die(FileName)
    txt_h = template_h_txt.replace("$$CODEC$$", codec_name)
    return txt_h, txt_i
Exemplo n.º 4
0
def do_implementation(ModeDB):

    FileTemplate = os.path.normpath(QUEX_PATH
                                    + Lng["$code_base"] 
                                    + "/analyzer/TXT-Cpp.i")
    func_txt = get_file_content_or_die(FileTemplate)

    func_txt = blue_print(func_txt,
            [
                ["$$CONSTRUCTOR_EXTENSTION$$",                  Lng.SOURCE_REFERENCED(blackboard.class_constructor_extension)],
                ["$$CONVERTER_HELPER_I$$",                      Setup.get_file_reference(Setup.output_buffer_codec_header_i)],
                ["$$CONSTRUCTOR_MODE_DB_INITIALIZATION_CODE$$", get_constructor_code(ModeDB.values())],
                ["$$MEMENTO_EXTENSIONS_PACK$$",                 Lng.SOURCE_REFERENCED(blackboard.memento_pack_extension)],
                ["$$MEMENTO_EXTENSIONS_UNPACK$$",               Lng.SOURCE_REFERENCED(blackboard.memento_unpack_extension)],
                ])
    return func_txt
Exemplo n.º 5
0
def get_supported_codec_list(IncludeAliasesF=False):
    assert type(IncludeAliasesF) == bool

    global _supported_codec_list
    if len(_supported_codec_list) != 0: 
        if IncludeAliasesF: return _supported_codec_list_plus_aliases
        else:               return _supported_codec_list

    file_name = QUEX_PATH + "/quex/engine/codec_db/database/00-SUPPORTED.txt"
    content   = get_file_content_or_die(file_name)

    _supported_codec_list = content.split()
    _supported_codec_list.sort()
    codec_db_list = parser.get_codec_list_db()
    for codec_name, aliases_list, dummy in codec_db_list:
        if codec_name in _supported_codec_list: 
            _supported_codec_list_plus_aliases.extend(filter(lambda x: x != "", aliases_list))
        
    _supported_codec_list_plus_aliases.sort()
    if IncludeAliasesF: return _supported_codec_list_plus_aliases
    else:               return _supported_codec_list
Exemplo n.º 6
0
def get_supported_codec_list(IncludeAliasesF=False):
    assert type(IncludeAliasesF) == bool

    global __supported_codec_list
    if len(__supported_codec_list) != 0: 
        if IncludeAliasesF: return __supported_codec_list_plus_aliases
        else:               return __supported_codec_list

    file_name = QUEX_PATH + "/quex/engine/codec_db/database/00-SUPPORTED.txt"
    content   = get_file_content_or_die(file_name)

    __supported_codec_list = content.split()
    __supported_codec_list.sort()
    codec_db_list = get_codec_list_db()
    for codec_name, aliases_list, dummy in codec_db_list:
        if codec_name in __supported_codec_list: 
            __supported_codec_list_plus_aliases.extend(filter(lambda x: x != "", aliases_list))
        
    __supported_codec_list_plus_aliases.sort()
    if IncludeAliasesF: return __supported_codec_list_plus_aliases
    else:               return __supported_codec_list
Exemplo n.º 7
0
def do(ModeDB):
    assert blackboard.token_type_definition is not None
    

    QuexClassHeaderFileTemplate = os.path.normpath(  QUEX_PATH
                                                   + Lng["$code_base"] 
                                                   + Lng["$analyzer_template_file"]).replace("//","/")
    LexerClassName = Setup.analyzer_class_name

    quex_converter_coding_name_str = Setup.converter_ucs_coding_name

    mode_id_definition_str = "" 
    # NOTE: First mode-id needs to be '1' for compatibility with flex generated engines
    for i, info in enumerate(ModeDB.items()):
        name = info[0]
        mode = info[1]
        if mode.abstract_f(): continue
        mode_id_definition_str += "    QUEX_NAME(ModeID_%s) = %i,\n" % (name, i)

    if mode_id_definition_str != "":
        mode_id_definition_str = mode_id_definition_str[:-2]

    # -- instances of mode classes as members of the lexer
    mode_object_members_txt,     \
    mode_specific_functions_txt, \
    friend_txt                   = get_mode_class_related_code_fragments(ModeDB.values())

    # -- define a pointer that directly has the type of the derived class
    if Setup.analyzer_derived_class_name != "":
        analyzer_derived_class_name    = Setup.analyzer_derived_class_name
        derived_class_type_declaration = "class %s;" % Setup.analyzer_derived_class_name
    else:
        analyzer_derived_class_name    = Setup.analyzer_class_name
        derived_class_type_declaration = ""

    token_class_file_name = blackboard.token_type_definition.get_file_name()
    token_class_name      = blackboard.token_type_definition.class_name
    token_class_name_safe = blackboard.token_type_definition.class_name_safe

    template_code_txt = get_file_content_or_die(QuexClassHeaderFileTemplate)

    include_guard_ext = get_include_guard_extension(
            Lng.NAMESPACE_REFERENCE(Setup.analyzer_name_space) 
            + "__" + Setup.analyzer_class_name)

    if len(Setup.token_id_foreign_definition_file) != 0:
        token_id_definition_file = Setup.token_id_foreign_definition_file
    else:
        token_id_definition_file = Setup.output_token_id_file

    lexer_name_space_safe = get_include_guard_extension(Lng.NAMESPACE_REFERENCE(Setup.analyzer_name_space))

    txt = blue_print(template_code_txt,
            [
                ["$$___SPACE___$$",                      " " * (len(LexerClassName) + 1)],
                ["$$CLASS_BODY_EXTENSION$$",             Lng.SOURCE_REFERENCED(blackboard.class_body_extension)],
                ["$$CONVERTER_HELPER$$",                 Setup.get_file_reference(Setup.output_buffer_codec_header)],
                ["$$INCLUDE_GUARD_EXTENSION$$",          include_guard_ext],
                ["$$LEXER_CLASS_NAME$$",                 LexerClassName],
                ["$$LEXER_NAME_SPACE$$",                 lexer_name_space_safe],
                ["$$LEXER_CLASS_NAME_SAFE$$",            Setup.analyzer_name_safe],
                ["$$LEXER_CONFIG_FILE$$",                Setup.get_file_reference(Setup.output_configuration_file)],
                ["$$LEXER_DERIVED_CLASS_DECL$$",         derived_class_type_declaration],
                ["$$LEXER_DERIVED_CLASS_NAME$$",         analyzer_derived_class_name],
                ["$$QUEX_MODE_ID_DEFINITIONS$$",         mode_id_definition_str],
                ["$$MEMENTO_EXTENSIONS$$",               Lng.SOURCE_REFERENCED(blackboard.memento_class_extension)],
                ["$$MODE_CLASS_FRIENDS$$",               friend_txt],
                ["$$MODE_OBJECTS$$",                     mode_object_members_txt],
                ["$$MODE_SPECIFIC_ANALYSER_FUNCTIONS$$", mode_specific_functions_txt],
                ["$$PRETTY_INDENTATION$$",               "     " + " " * (len(LexerClassName)*2 + 2)],
                ["$$QUEX_TEMPLATE_DIR$$",                QUEX_PATH + Lng["$code_base"]],
                ["$$QUEX_VERSION$$",                     QUEX_VERSION],
                ["$$TOKEN_CLASS_DEFINITION_FILE$$",      Setup.get_file_reference(token_class_file_name)],
                ["$$TOKEN_CLASS$$",                      token_class_name],
                ["$$TOKEN_CLASS_NAME_SAFE$$",            token_class_name_safe],
                ["$$TOKEN_ID_DEFINITION_FILE$$",         Setup.get_file_reference(token_id_definition_file)],
                ["$$CORE_ENGINE_CHARACTER_CODING$$",     quex_converter_coding_name_str],
                ["$$USER_DEFINED_HEADER$$",              Lng.SOURCE_REFERENCED(blackboard.header) + "\n"],
             ])

    return txt
Exemplo n.º 8
0
def do(ModeDescriptionDB):
    IndentationSupportF = blackboard.required_support_indentation_count()
    BeginOfLineSupportF = blackboard.required_support_begin_of_line()

    

    LexerClassName = Setup.analyzer_class_name

    ConfigurationTemplateFile =(  QUEX_PATH \
                                + Lng["$code_base"] \
                                + "/analyzer/configuration/TXT").replace("//","/")

    txt = get_file_content_or_die(ConfigurationTemplateFile)

    # -- check if exit/entry handlers have to be active
    entry_handler_active_f = False
    exit_handler_active_f = False
    for mode in ModeDescriptionDB.values():
        entry_handler_active_f |= mode.incidence_db.has_key(E_IncidenceIDs.MODE_ENTRY)
        exit_handler_active_f  |= mode.incidence_db.has_key(E_IncidenceIDs.MODE_EXIT)

    # Buffer filler converter (0x0 means: no buffer filler converter)
    converter_new_str = "#   define QUEX_SETTING_BUFFER_FILLERS_CONVERTER_NEW " 
    if Setup.converter_user_new_func != "": 
        converter_new_str += Setup.converter_user_new_func + "()"
    else: 
        converter_new_str = "/* " + converter_new_str + " */"

    # Token repetition support
    token_repeat_test_txt = ""
    for token_id_str in blackboard.token_repetition_token_id_list:
        token_repeat_test_txt += "TokenID == %s || " % token_id_str
    if token_repeat_test_txt != "":
        token_repeat_test_txt = token_repeat_test_txt[:-3]
    else:
        token_repeat_test_txt = "false"

    if Setup.analyzer_derived_class_name != "":
        analyzer_derived_class_name = Setup.analyzer_derived_class_name
    else:
        analyzer_derived_class_name = Setup.analyzer_class_name

    txt = __switch(txt, "QUEX_OPTION_COLUMN_NUMBER_COUNTING",        Setup.count_column_number_f)        
    txt = __switch(txt, "QUEX_OPTION_COMPUTED_GOTOS",                False)
    txt = __switch(txt, "QUEX_OPTION_CONVERTER_ICONV",               Setup.converter_iconv_f)
    txt = __switch(txt, "QUEX_OPTION_CONVERTER_ICU",                 Setup.converter_icu_f)
    txt = __switch(txt, "QUEX_OPTION_INCLUDE_STACK",                 Setup.include_stack_support_f)
    txt = __switch(txt, "QUEX_OPTION_LINE_NUMBER_COUNTING",          Setup.count_line_number_f)      
    txt = __switch(txt, "QUEX_OPTION_POST_CATEGORIZER",              Setup.post_categorizer_f)
    txt = __switch(txt, "QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK", Setup.mode_transition_check_f)
    txt = __switch(txt, "QUEX_OPTION_STRING_ACCUMULATOR",            Setup.string_accumulator_f)
    txt = __switch(txt, "QUEX_OPTION_TOKEN_POLICY_QUEUE",            Setup.token_policy == "queue")
    txt = __switch(txt, "QUEX_OPTION_TOKEN_POLICY_SINGLE",           Setup.token_policy == "single")
    txt = __switch(txt, "QUEX_OPTION_TOKEN_REPETITION_SUPPORT",      token_repeat_test_txt != "false")
    txt = __switch(txt, "QUEX_OPTION_USER_MANAGED_TOKEN_MEMORY",     Setup.token_memory_management_by_user_f)
    txt = __switch(txt, "__QUEX_OPTION_BIG_ENDIAN",                  Setup.buffer_byte_order == "big")
    txt = __switch(txt, "__QUEX_OPTION_CONVERTER_HELPER",            Setup.converter_helper_required_f)
    txt = __switch(txt, "__QUEX_OPTION_CONVERTER",                   Setup.converter_f)
    txt = __switch(txt, "QUEX_OPTION_INDENTATION_TRIGGER",           IndentationSupportF)     
    txt = __switch(txt, "__QUEX_OPTION_LITTLE_ENDIAN",               Setup.buffer_byte_order == "little")
    txt = __switch(txt, "__QUEX_OPTION_ON_ENTRY_HANDLER_PRESENT",    entry_handler_active_f)
    txt = __switch(txt, "__QUEX_OPTION_ON_EXIT_HANDLER_PRESENT",     exit_handler_active_f)
    txt = __switch(txt, "__QUEX_OPTION_PLAIN_C",                     Setup.language.upper() == "C")
    txt = __switch(txt, "__QUEX_OPTION_SUPPORT_BEGIN_OF_LINE_PRE_CONDITION", BeginOfLineSupportF)
    txt = __switch(txt, "__QUEX_OPTION_SYSTEM_ENDIAN",               Setup.byte_order_is_that_of_current_system_f)
    txt = __switch(txt, "QUEX_OPTION_BUFFER_BASED_ANALYZIS",         Setup.buffer_based_analyzis_f)
    txt = __switch(txt, "__QUEX_OPTION_ENGINE_RUNNING_ON_CODEC",     Setup.buffer_codec.name != "unicode")

    # -- token class related definitions
    token_descr = blackboard.token_type_definition

    # -- name of the character codec
    codec_name = make_safe_identifier(Setup.buffer_codec.name).lower()

    # Setup.buffer_element_size can be '-1'. This signals then that 
    # sizeof(QUEX_TYPE_CHARACTER) needs to be used. A numeric value 
    # is required here.
    character_size_str = "%i" % Setup.buffer_element_size

    def namespace(NameSpaceList):
        result = Lng.NAMESPACE_REFERENCE(NameSpaceList, TrailingDelimiterF=False)

        if len(result) == 0: return ""

        assert Setup.language.upper() != "C++" or len(result) > 2, \
               "Error while generating namespace reference '%s'" % result

        return result

    txt = blue_print(txt, 
            [
             ["$$BUFFER_LIMIT_CODE$$",          "0x%X" % Setup.buffer_limit_code],
             ["$$QUEX_SETTING_CHARACTER_CODEC$$", codec_name],
             ["$$INCLUDE_GUARD_EXTENSION$$",    get_include_guard_extension(Lng.NAMESPACE_REFERENCE(Setup.analyzer_name_space) + "__" + Setup.analyzer_class_name)],
             ["$$INITIAL_LEXER_MODE_ID$$",      "QUEX_NAME(ModeID_%s)" % blackboard.initial_mode.get_pure_text()],
             ["$$LEXER_BUILD_DATE$$",           time.asctime()],
             ["$$LEXER_CLASS_NAME$$",           LexerClassName],
             ["$$LEXER_CLASS_NAME_SAFE$$",      Setup.analyzer_name_safe],
             ["$$LEXER_DERIVED_CLASS_NAME$$",   analyzer_derived_class_name],
             ["$$MAX_MODE_CLASS_N$$",           repr(len(ModeDescriptionDB))],
             ["$$NAMESPACE_MAIN$$",             namespace(Setup.analyzer_name_space)],
             ["$$NAMESPACE_MAIN_CLOSE$$",       Lng.NAMESPACE_CLOSE(Setup.analyzer_name_space).replace("\n", "\\\n")],
             ["$$NAMESPACE_MAIN_OPEN$$",        Lng.NAMESPACE_OPEN(Setup.analyzer_name_space).replace("\n", "\\\n")],
             ["$$NAMESPACE_TOKEN$$",            namespace(token_descr.name_space)],
             ["$$NAMESPACE_TOKEN_CLOSE$$",      Lng.NAMESPACE_CLOSE(token_descr.name_space).replace("\n", "\\\n")],
             ["$$NAMESPACE_TOKEN_OPEN$$",       Lng.NAMESPACE_OPEN(token_descr.name_space).replace("\n", "\\\n")],
             ["$$PATH_TERMINATION_CODE$$",      "0x%X" % Setup.path_limit_code],
             ["$$QUEX_SETTING_BUFFER_FILLERS_CONVERTER_NEW$$", converter_new_str],
             ["$$QUEX_TYPE_CHARACTER$$",        Setup.buffer_element_type],
             ["$$QUEX_SETTING_CHARACTER_SIZE$$", character_size_str],
             ["$$QUEX_NAMESPACE_LEXEME_NULL_OPEN$$",   Lng.NAMESPACE_OPEN(Setup.lexeme_null_namespace).replace("\n", "\\\n")],
             ["$$QUEX_NAMESPACE_LEXEME_NULL_CLOSE$$",  Lng.NAMESPACE_CLOSE(Setup.lexeme_null_namespace).replace("\n", "\\\n")],
             ["$$QUEX_LEXEME_NULL$$",                  Setup.lexeme_null_full_name_cpp],
             ["$$QUEX_LEXEME_NULL_SAFE$$",             Setup.lexeme_null_name_safe],
             ["$$QUEX_LEXEME_NULL_IN_ITS_NAMESPACE$$", Setup.lexeme_null_name],
             ["$$QUEX_VERSION$$",               QUEX_VERSION],
             ["$$TOKEN_CLASS$$",                token_descr.class_name],
             ["$$TOKEN_CLASS_NAME_SAFE$$",      token_descr.class_name_safe],
             ["$$TOKEN_COLUMN_N_TYPE$$",        token_descr.column_number_type.get_pure_text()],
             ["$$TOKEN_ID_TYPE$$",              token_descr.token_id_type.get_pure_text()],
             ["$$TOKEN_LINE_N_TYPE$$",          token_descr.line_number_type.get_pure_text()],
             ["$$TOKEN_PREFIX$$",               Setup.token_id_prefix],
             ["$$TOKEN_QUEUE_SAFETY_BORDER$$",  repr(Setup.token_queue_safety_border)],
             ["$$TOKEN_QUEUE_SIZE$$",           repr(Setup.token_queue_size)],
             ["$$TOKEN_REPEAT_TEST$$",          token_repeat_test_txt],
             ["$$USER_LEXER_VERSION$$",         Setup.user_application_version_id],
             ])

    return txt
Exemplo n.º 9
0
def parse(ForeignTokenIdFile, CommentDelimiterList):
    """This function somehow interprets the user defined token id file--if there is
       one. It does this in order to find the names of defined token ids. It does
       some basic interpretation and include file following, but: **it is in no
       way perfect**. Since its only purpose is to avoid warnings about token ids
       that are not defined it is not essential that it may fail sometimes.

       It is more like a nice feature that quex tries to find definitions on its own.
       
       Nevertheless, it should work in the large majority of cases.
    """
    # Regular expression to find '#include <something>' and extract the 'something'
    # in a 'group'. Note that '(' ')' cause the storage of parts of the match.
    IncludeRE = "#[ \t]*include[ \t]*[\"<]([^\">]+)[\">]"

    include_re_obj = re.compile(IncludeRE)

    def get_line_n_of_include(FileName, IncludedFileName):
        fh = open_file_or_die(FileName, Mode="rb")
        line_n = 0
        for line in fh.readlines():
            line_n += 1
            if include_re_obj.search(
                    line) is not None and line.find(IncludedFileName) != -1:
                break
        else:
            # Included file must appear in including file, but tolerate for safety.
            pass

        fh.close()
        return line_n

    # validate(...) ensured, that the file exists.
    work_list = [ForeignTokenIdFile]
    done_list = []
    not_found_list = []
    recursive_list = []
    found_db = {}
    while len(work_list) != 0:
        file_name = work_list.pop()
        content = __delete_comments(
            get_file_content_or_die(file_name, Mode="rb"),
            CommentDelimiterList)
        done_list.append(os.path.normpath(file_name))

        # (*) Search for TokenID definitions
        begin_i = 0
        end_i = len(content)
        if Setup.token_id_foreign_definition_file_region_begin_re is not None:
            match = Setup.token_id_foreign_definition_file_region_begin_re.search(
                content)
            if match is not None:
                begin_i = match.end()

        if Setup.token_id_foreign_definition_file_region_end_re is not None:
            match = Setup.token_id_foreign_definition_file_region_end_re.search(
                content, pos=begin_i)
            if match is not None:
                end_i = match.start()
        content = content[begin_i:end_i]

        token_id_list = __extract_token_ids(content, file_name)
        if len(token_id_list) != 0:
            found_db[file_name] = copy(token_id_list)

        token_id_foreign_set.update(token_id_list)
        for token_name in token_id_list:
            # NOTE: The line number might be wrong, because of the comment deletion
            line_n = 0
            # NOTE: The actual token value is not important, since the token's numeric
            #       identifier is defined in the user's header. We do not care.
            prefix_less_token_name = cut_token_id_prefix(token_name)
            token_id_db[prefix_less_token_name] = \
                        TokenInfo(prefix_less_token_name, None, None, SourceRef(file_name, line_n))

        # (*) find "#include" statements
        #     'set' ensures that each entry is unique
        include_file_set = set(include_re_obj.findall(content))

        #     -- ensure that included files exist and are not included twice
        for included_file in include_file_set:
            normed_included_file = os.path.normpath(included_file)
            if included_file in done_list:
                line_n = get_line_n_of_include(file_name, included_file)
                recursive_list.append((file_name, line_n, included_file))
            elif not os.access(normed_included_file, os.F_OK):
                line_n = get_line_n_of_include(file_name, included_file)
                not_found_list.append((file_name, line_n, included_file))
            elif normed_included_file not in done_list:
                work_list.append(included_file)

    if Setup.token_id_foreign_definition_file_show_f:
        if len(found_db) == 0:
            error_msg("No token ids with prefix '%s' found in" %
                      Setup.token_id_prefix + "'%s' or included files." %
                      Setup.token_id_foreign_definition_file,
                      NoteF=True)
        else:
            txt = []
            for file_name, result in found_db.iteritems():
                result = set(result)
                L = max(map(len, result))
                txt.append("Token ids found in file '%s' {\n" % file_name)
                for name in sorted(result):
                    shorty = cut_token_id_prefix(name)
                    fully = Setup.token_id_prefix + shorty
                    txt.append("     %s %s=> '%s'\n" %
                               (fully, space(L, name), shorty))
                txt.append("}")
                txt.append("\n")

            if txt: txt = txt[:-1]
            error_msg("".join(txt), NoteF=True)

    ErrorN = NotificationDB.token_id_ignored_files_report
    if ErrorN not in Setup.suppressed_notification_list:
        if len(not_found_list) != 0:
            not_found_list.sort()
            error_msg("Files not found:",
                      not_found_list[0][0],
                      LineN=not_found_list[0][1],
                      DontExitF=True)
            for file_name, line_n, included_file in not_found_list:
                error_msg("%s" % included_file,
                          file_name,
                          LineN=line_n,
                          DontExitF=True)

        if len(recursive_list) != 0:
            recursive_list.sort()
            error_msg("Files recursively included (ignored second inclusion):",
                      recursive_list[0][0],
                      LineN=recursive_list[0][1],
                      DontExitF=True)
            for file_name, line_n, included_file in recursive_list:
                error_msg("%s" % included_file,
                          file_name,
                          LineN=line_n,
                          DontExitF=True)

        if len(not_found_list) != 0 or len(recursive_list) != 0:
            # file_name and line_n will be taken from last iteration of last for loop.
            error_msg(
                "\nNote, that quex does not handle C-Preprocessor instructions.",
                file_name,
                LineN=line_n,
                DontExitF=True,
                SuppressCode=ErrorN)
Exemplo n.º 10
0
def do(ModeDB):
    IndentationSupportF = blackboard.requires_indentation_count(ModeDB)
    BeginOfLineSupportF = blackboard.requires_begin_of_line_condition_support(
        ModeDB)

    LanguageDB = Setup.language_db

    LexerClassName = Setup.analyzer_class_name

    ConfigurationTemplateFile =(  QUEX_PATH \
                                + Setup.language_db["$code_base"] \
                                + "/analyzer/configuration/TXT").replace("//","/")

    txt = get_file_content_or_die(ConfigurationTemplateFile)

    # -- check if exit/entry handlers have to be active
    entry_handler_active_f = False
    exit_handler_active_f = False
    for mode in ModeDB.values():
        if len(mode.get_code_fragment_list("on_entry")) != 0:
            entry_handler_active_f = True
        if len(mode.get_code_fragment_list("on_exit")) != 0:
            exit_handler_active_f = True

    # Buffer filler converter (0x0 means: no buffer filler converter)
    converter_new_str = "#   define QUEX_SETTING_BUFFER_FILLERS_CONVERTER_NEW "
    if Setup.converter_user_new_func != "":
        converter_new_str += Setup.converter_user_new_func + "()"
    else:
        converter_new_str = "/* " + converter_new_str + " */"

    # Token repetition support
    token_repeat_test_txt = ""
    for token_id_str in blackboard.token_repetition_token_id_list:
        token_repeat_test_txt += "TokenID == %s || " % token_id_str
    if token_repeat_test_txt != "":
        token_repeat_test_txt = token_repeat_test_txt[:-3]
    else:
        token_repeat_test_txt = "false"

    if Setup.analyzer_derived_class_name != "":
        analyzer_derived_class_name = Setup.analyzer_derived_class_name
    else:
        analyzer_derived_class_name = Setup.analyzer_class_name

    txt = __switch(txt, "QUEX_OPTION_COLUMN_NUMBER_COUNTING",
                   Setup.count_column_number_f)
    txt = __switch(txt, "QUEX_OPTION_COMPUTED_GOTOS", False)
    txt = __switch(txt, "QUEX_OPTION_CONVERTER_ICONV", Setup.converter_iconv_f)
    txt = __switch(txt, "QUEX_OPTION_CONVERTER_ICU", Setup.converter_icu_f)
    txt = __switch(txt, "QUEX_OPTION_INCLUDE_STACK",
                   Setup.include_stack_support_f)
    txt = __switch(txt, "QUEX_OPTION_LINE_NUMBER_COUNTING",
                   Setup.count_line_number_f)
    txt = __switch(txt, "QUEX_OPTION_POST_CATEGORIZER",
                   Setup.post_categorizer_f)
    txt = __switch(txt, "QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK",
                   Setup.mode_transition_check_f)
    txt = __switch(txt, "QUEX_OPTION_STRING_ACCUMULATOR",
                   Setup.string_accumulator_f)
    txt = __switch(txt, "QUEX_OPTION_TOKEN_POLICY_QUEUE",
                   Setup.token_policy == "queue")
    txt = __switch(txt, "QUEX_OPTION_TOKEN_POLICY_SINGLE",
                   Setup.token_policy == "single")
    txt = __switch(txt, "QUEX_OPTION_TOKEN_REPETITION_SUPPORT",
                   token_repeat_test_txt != "false")
    txt = __switch(txt, "QUEX_OPTION_USER_MANAGED_TOKEN_MEMORY",
                   Setup.token_memory_management_by_user_f)
    txt = __switch(txt, "__QUEX_OPTION_BIG_ENDIAN",
                   Setup.buffer_byte_order == "big")
    txt = __switch(txt, "__QUEX_OPTION_CONVERTER_HELPER",
                   Setup.converter_helper_required_f)
    txt = __switch(txt, "__QUEX_OPTION_CONVERTER", Setup.converter_f)
    txt = __switch(txt, "QUEX_OPTION_INDENTATION_TRIGGER", IndentationSupportF)
    txt = __switch(txt, "__QUEX_OPTION_LITTLE_ENDIAN",
                   Setup.buffer_byte_order == "little")
    txt = __switch(txt, "__QUEX_OPTION_ON_ENTRY_HANDLER_PRESENT",
                   entry_handler_active_f)
    txt = __switch(txt, "__QUEX_OPTION_ON_EXIT_HANDLER_PRESENT",
                   exit_handler_active_f)
    txt = __switch(txt, "__QUEX_OPTION_PLAIN_C", Setup.language.upper() == "C")
    txt = __switch(txt, "__QUEX_OPTION_SUPPORT_BEGIN_OF_LINE_PRE_CONDITION",
                   BeginOfLineSupportF)
    txt = __switch(txt, "__QUEX_OPTION_SYSTEM_ENDIAN",
                   Setup.byte_order_is_that_of_current_system_f)
    txt = __switch(txt, "QUEX_OPTION_BUFFER_BASED_ANALYZIS",
                   Setup.buffer_based_analyzis_f)
    txt = __switch(txt, "__QUEX_OPTION_ENGINE_RUNNING_ON_CODEC",
                   Setup.buffer_codec != "unicode")

    # -- token class related definitions
    token_descr = blackboard.token_type_definition

    # -- name of the character codec
    codec_name = "unicode"
    if Setup.buffer_codec != "unicode":
        codec_name = make_safe_identifier(Setup.buffer_codec).lower()

    # Setup.buffer_element_size can be '-1'. This signals then that
    # sizeof(QUEX_TYPE_CHARACTER) needs to be used. A numeric value
    # is required here.
    character_size_str = "%i" % Setup.buffer_element_size

    def namespace(NameSpaceList):
        result = Setup.language_db.NAMESPACE_REFERENCE(NameSpaceList)

        if result == "::": return ""

        assert Setup.language.upper() != "C++" or len(result) > 2, \
               "Error while generating namespace reference '%s'" % result

        return result[:-2]

    txt = blue_print(txt, [
        ["$$BUFFER_LIMIT_CODE$$",
         "0x%X" % Setup.buffer_limit_code],
        ["$$QUEX_SETTING_CHARACTER_CODEC$$", codec_name],
        [
            "$$INCLUDE_GUARD_EXTENSION$$",
            get_include_guard_extension(
                LanguageDB.NAMESPACE_REFERENCE(Setup.analyzer_name_space) +
                "__" + Setup.analyzer_class_name)
        ],
        [
            "$$INITIAL_LEXER_MODE_ID$$",
            "QUEX_NAME(ModeID_%s)" % blackboard.initial_mode.get_pure_code()
        ],
        ["$$LEXER_BUILD_DATE$$", time.asctime()],
        ["$$LEXER_CLASS_NAME$$", LexerClassName],
        ["$$LEXER_CLASS_NAME_SAFE$$", Setup.analyzer_name_safe],
        ["$$LEXER_DERIVED_CLASS_NAME$$", analyzer_derived_class_name],
        ["$$MAX_MODE_CLASS_N$$", repr(len(ModeDB))],
        ["$$NAMESPACE_MAIN$$",
         namespace(Setup.analyzer_name_space)],
        [
            "$$NAMESPACE_MAIN_CLOSE$$",
            LanguageDB.NAMESPACE_CLOSE(Setup.analyzer_name_space).replace(
                "\n", "\\\n")
        ],
        [
            "$$NAMESPACE_MAIN_OPEN$$",
            LanguageDB.NAMESPACE_OPEN(Setup.analyzer_name_space).replace(
                "\n", "\\\n")
        ],
        ["$$NAMESPACE_TOKEN$$",
         namespace(token_descr.name_space)],
        [
            "$$NAMESPACE_TOKEN_CLOSE$$",
            LanguageDB.NAMESPACE_CLOSE(token_descr.name_space).replace(
                "\n", "\\\n")
        ],
        [
            "$$NAMESPACE_TOKEN_OPEN$$",
            LanguageDB.NAMESPACE_OPEN(token_descr.name_space).replace(
                "\n", "\\\n")
        ],
        ["$$PATH_TERMINATION_CODE$$",
         "0x%X" % Setup.path_limit_code],
        ["$$QUEX_SETTING_BUFFER_FILLERS_CONVERTER_NEW$$", converter_new_str],
        ["$$QUEX_TYPE_CHARACTER$$", Setup.buffer_element_type],
        ["$$QUEX_SETTING_CHARACTER_SIZE$$", character_size_str],
        [
            "$$QUEX_NAMESPACE_LEXEME_NULL_OPEN$$",
            LanguageDB.NAMESPACE_OPEN(Setup.lexeme_null_namespace).replace(
                "\n", "\\\n")
        ],
        [
            "$$QUEX_NAMESPACE_LEXEME_NULL_CLOSE$$",
            LanguageDB.NAMESPACE_CLOSE(Setup.lexeme_null_namespace).replace(
                "\n", "\\\n")
        ],
        ["$$QUEX_LEXEME_NULL$$", Setup.lexeme_null_full_name_cpp],
        ["$$QUEX_LEXEME_NULL_SAFE$$", Setup.lexeme_null_name_safe],
        ["$$QUEX_LEXEME_NULL_IN_ITS_NAMESPACE$$", Setup.lexeme_null_name],
        ["$$QUEX_VERSION$$", QUEX_VERSION],
        ["$$TOKEN_CLASS$$", token_descr.class_name],
        ["$$TOKEN_CLASS_NAME_SAFE$$", token_descr.class_name_safe],
        [
            "$$TOKEN_COLUMN_N_TYPE$$",
            token_descr.column_number_type.get_pure_code()
        ],
        ["$$TOKEN_ID_TYPE$$",
         token_descr.token_id_type.get_pure_code()],
        [
            "$$TOKEN_LINE_N_TYPE$$",
            token_descr.line_number_type.get_pure_code()
        ],
        ["$$TOKEN_PREFIX$$", Setup.token_id_prefix],
        [
            "$$TOKEN_QUEUE_SAFETY_BORDER$$",
            repr(Setup.token_queue_safety_border)
        ],
        ["$$TOKEN_QUEUE_SIZE$$",
         repr(Setup.token_queue_size)],
        ["$$TOKEN_REPEAT_TEST$$", token_repeat_test_txt],
        ["$$USER_LEXER_VERSION$$", Setup.user_application_version_id],
    ])

    return txt
Exemplo n.º 11
0
def parse(ForeignTokenIdFile, CommentDelimiterList):
    """This function somehow interprets the user defined token id file--if there is
       one. It does this in order to find the names of defined token ids. It does
       some basic interpretation and include file following, but: **it is in no
       way perfect**. Since its only purpose is to avoid warnings about token ids
       that are not defined it is not essential that it may fail sometimes.

       It is more like a nice feature that quex tries to find definitions on its own.
       
       Nevertheless, it should work in the large majority of cases.
    """
    # Regular expression to find '#include <something>' and extract the 'something'
    # in a 'group'. Note that '(' ')' cause the storage of parts of the match.
    IncludeRE = "#[ \t]*include[ \t]*[\"<]([^\">]+)[\">]"

    include_re_obj = re.compile(IncludeRE)

    def get_line_n_of_include(FileName, IncludedFileName):
        fh = open_file_or_die(FileName, Mode="rb")
        line_n = 0
        for line in fh.readlines():
            line_n += 1
            if include_re_obj.search(line) is not None and line.find(IncludedFileName) != -1:
                break
        else:
            # Included file must appear in including file, but tolerate for safety.
            pass

        fh.close()
        return line_n

    # validate(...) ensured, that the file exists.
    work_list      = [ ForeignTokenIdFile ] 
    done_list      = []
    not_found_list = []
    recursive_list = []
    found_db       = {}
    while len(work_list) != 0:
        file_name = work_list.pop()
        content   = __delete_comments(get_file_content_or_die(file_name, Mode="rb"), 
                                      CommentDelimiterList)
        done_list.append(os.path.normpath(file_name))

        # (*) Search for TokenID definitions 
        begin_i = 0
        end_i   = len(content)
        if Setup.token_id_foreign_definition_file_region_begin_re is not None:
            match = Setup.token_id_foreign_definition_file_region_begin_re.search(content)
            if match is not None:
                begin_i = match.end()

        if Setup.token_id_foreign_definition_file_region_end_re is not None:
            match = Setup.token_id_foreign_definition_file_region_end_re.search(content, pos=begin_i)
            if match is not None:
                end_i = match.start()
        content = content[begin_i:end_i]

        token_id_list = __extract_token_ids(content, file_name)
        if len(token_id_list) != 0:
            found_db[file_name] = copy(token_id_list)

        token_id_foreign_set.update(token_id_list)
        for token_name in token_id_list:
            # NOTE: The line number might be wrong, because of the comment deletion
            line_n = 0
            # NOTE: The actual token value is not important, since the token's numeric
            #       identifier is defined in the user's header. We do not care.
            prefix_less_token_name = cut_token_id_prefix(token_name)
            token_id_db[prefix_less_token_name] = \
                        TokenInfo(prefix_less_token_name, None, None, SourceRef(file_name, line_n)) 
        
        # (*) find "#include" statements
        #     'set' ensures that each entry is unique
        include_file_set = set(include_re_obj.findall(content))

        #     -- ensure that included files exist and are not included twice
        for included_file in include_file_set:
            normed_included_file = os.path.normpath(included_file)
            if included_file in done_list:
                line_n = get_line_n_of_include(file_name, included_file)
                recursive_list.append((file_name, line_n, included_file))
            elif not os.access(normed_included_file, os.F_OK): 
                line_n = get_line_n_of_include(file_name, included_file)
                not_found_list.append((file_name, line_n, included_file))
            elif normed_included_file not in done_list:
                work_list.append(included_file)

    if Setup.token_id_foreign_definition_file_show_f:
        if len(found_db) == 0:
            error_msg(  "No token ids with prefix '%s' found in" % Setup.token_id_prefix
                      + "'%s' or included files." % Setup.token_id_foreign_definition_file, 
                     NoteF=True)
        else:
            txt = [] 
            for file_name, result in found_db.iteritems():
                result = set(result)
                L = max(map(len, result))
                txt.append("Token ids found in file '%s' {\n" % file_name)
                for name in sorted(result):
                    shorty = cut_token_id_prefix(name)
                    fully  = Setup.token_id_prefix + shorty
                    txt.append("     %s %s=> '%s'\n" % (fully, space(L, name), shorty))
                txt.append("}")
                txt.append("\n")

            if txt: txt = txt[:-1]
            error_msg("".join(txt), NoteF=True)
            
    ErrorN = NotificationDB.token_id_ignored_files_report
    if ErrorN not in Setup.suppressed_notification_list:
        if len(not_found_list) != 0:
            not_found_list.sort()
            error_msg("Files not found:", 
                      not_found_list[0][0], LineN=not_found_list[0][1], 
                      DontExitF=True)
            for file_name, line_n, included_file in not_found_list:
                error_msg("%s" % included_file, file_name, LineN=line_n, DontExitF=True)

        if len(recursive_list) != 0:
            recursive_list.sort()
            error_msg("Files recursively included (ignored second inclusion):", 
                      recursive_list[0][0], LineN=recursive_list[0][1], 
                      DontExitF=True)
            for file_name, line_n, included_file in recursive_list:
                error_msg("%s" % included_file, file_name, LineN=line_n, DontExitF=True)

        if len(not_found_list) != 0 or len(recursive_list) != 0:
            # file_name and line_n will be taken from last iteration of last for loop.
            error_msg("\nNote, that quex does not handle C-Preprocessor instructions.",
                      file_name, LineN=line_n, DontExitF=True, SuppressCode=ErrorN)