Exemplo n.º 1
0
def _do(UnicodeTrafoInfo):
    """
    PURPOSE: Writes converters for conversion towards UTF8/UTF16/UCS2/UCS4.

    UnicodeTrafoInfo:

       Provides the information about the relation of character codes in a particular 
       coding to unicode character codes. It is provided in the following form:

       # Codec Values                 Unicode Values
       [ (Source0_Begin, Source0_End, TargetInterval0_Begin), 
         (Source1_Begin, Source1_End, TargetInterval1_Begin),
         (Source2_Begin, Source2_End, TargetInterval2_Begin), 
         ... 
       ]
    """
    codec_name = make_safe_identifier(UnicodeTrafoInfo.name).lower()
    utf8_epilog,  utf8_function_body  = ConverterWriterUTF8().do(UnicodeTrafoInfo)
    utf16_prolog, utf16_function_body = ConverterWriterUTF16().do(UnicodeTrafoInfo)
    dummy,        utf32_function_body = ConverterWriterUTF32().do(UnicodeTrafoInfo)

    # Provide only the constant which are necessary
    FileName = os.path.normpath(  QUEX_PATH
                                + Lng["$code_base"] 
                                + "/converter_helper/TXT-from-codec-buffer.i")
    codec_header = Setup.get_file_reference(Setup.output_buffer_codec_header)

    txt_i = blue_print(get_file_content_or_die(FileName), 
                       [["$$CODEC$$",        codec_name],
                        ["$$EPILOG$$",       utf8_epilog],
                        ["$$CODEC_HEADER$$", codec_header],
                        ["$$BODY_UTF8$$",    utf8_function_body],
                        ["$$BODY_UTF16$$",   utf16_function_body],
                        ["$$BODY_UTF32$$",   utf32_function_body]])

    # A separate declaration header is required
    FileName = os.path.normpath(  QUEX_PATH
                                + Lng["$code_base"] 
                                + "/converter_helper/TXT-from-codec-buffer")
    template_h_txt = get_file_content_or_die(FileName)
    txt_h = template_h_txt.replace("$$CODEC$$", codec_name)
    return txt_h, txt_i
Exemplo n.º 2
0
def _do(UnicodeTrafoInfo):
    """
    PURPOSE: Writes converters for conversion towards UTF8/UTF16/UCS2/UCS4.

    UnicodeTrafoInfo:

       Provides the information about the relation of character codes in a particular 
       coding to unicode character codes. It is provided in the following form:

       # Codec Values                 Unicode Values
       [ (Source0_Begin, Source0_End, TargetInterval0_Begin), 
         (Source1_Begin, Source1_End, TargetInterval1_Begin),
         (Source2_Begin, Source2_End, TargetInterval2_Begin), 
         ... 
       ]
    """
    codec_name = make_safe_identifier(UnicodeTrafoInfo.name).lower()
    utf8_epilog, utf8_function_body = ConverterWriterUTF8().do(
        UnicodeTrafoInfo)
    utf16_prolog, utf16_function_body = ConverterWriterUTF16().do(
        UnicodeTrafoInfo)
    dummy, utf32_function_body = ConverterWriterUTF32().do(UnicodeTrafoInfo)

    # Provide only the constant which are necessary
    FileName = os.path.normpath(QUEX_PATH + Lng["$code_base"] +
                                "/converter_helper/TXT-from-codec-buffer.i")
    codec_header = Setup.get_file_reference(Setup.output_buffer_codec_header)

    txt_i = blue_print(get_file_content_or_die(FileName),
                       [["$$CODEC$$", codec_name], ["$$EPILOG$$", utf8_epilog],
                        ["$$CODEC_HEADER$$", codec_header],
                        ["$$BODY_UTF8$$", utf8_function_body],
                        ["$$BODY_UTF16$$", utf16_function_body],
                        ["$$BODY_UTF32$$", utf32_function_body]])

    # A separate declaration header is required
    FileName = os.path.normpath(QUEX_PATH + Lng["$code_base"] +
                                "/converter_helper/TXT-from-codec-buffer")
    template_h_txt = get_file_content_or_die(FileName)
    txt_h = template_h_txt.replace("$$CODEC$$", codec_name)
    return txt_h, txt_i
Exemplo n.º 3
0
def do(ModeDescriptionDB):
    IndentationSupportF = blackboard.required_support_indentation_count()
    BeginOfLineSupportF = blackboard.required_support_begin_of_line()

    

    LexerClassName = Setup.analyzer_class_name

    ConfigurationTemplateFile =(  QUEX_PATH \
                                + Lng["$code_base"] \
                                + "/analyzer/configuration/TXT").replace("//","/")

    txt = get_file_content_or_die(ConfigurationTemplateFile)

    # -- check if exit/entry handlers have to be active
    entry_handler_active_f = False
    exit_handler_active_f = False
    for mode in ModeDescriptionDB.values():
        entry_handler_active_f |= mode.incidence_db.has_key(E_IncidenceIDs.MODE_ENTRY)
        exit_handler_active_f  |= mode.incidence_db.has_key(E_IncidenceIDs.MODE_EXIT)

    # Buffer filler converter (0x0 means: no buffer filler converter)
    converter_new_str = "#   define QUEX_SETTING_BUFFER_FILLERS_CONVERTER_NEW " 
    if Setup.converter_user_new_func != "": 
        converter_new_str += Setup.converter_user_new_func + "()"
    else: 
        converter_new_str = "/* " + converter_new_str + " */"

    # Token repetition support
    token_repeat_test_txt = ""
    for token_id_str in blackboard.token_repetition_token_id_list:
        token_repeat_test_txt += "TokenID == %s || " % token_id_str
    if token_repeat_test_txt != "":
        token_repeat_test_txt = token_repeat_test_txt[:-3]
    else:
        token_repeat_test_txt = "false"

    if Setup.analyzer_derived_class_name != "":
        analyzer_derived_class_name = Setup.analyzer_derived_class_name
    else:
        analyzer_derived_class_name = Setup.analyzer_class_name

    txt = __switch(txt, "QUEX_OPTION_COLUMN_NUMBER_COUNTING",        Setup.count_column_number_f)        
    txt = __switch(txt, "QUEX_OPTION_COMPUTED_GOTOS",                False)
    txt = __switch(txt, "QUEX_OPTION_CONVERTER_ICONV",               Setup.converter_iconv_f)
    txt = __switch(txt, "QUEX_OPTION_CONVERTER_ICU",                 Setup.converter_icu_f)
    txt = __switch(txt, "QUEX_OPTION_INCLUDE_STACK",                 Setup.include_stack_support_f)
    txt = __switch(txt, "QUEX_OPTION_LINE_NUMBER_COUNTING",          Setup.count_line_number_f)      
    txt = __switch(txt, "QUEX_OPTION_POST_CATEGORIZER",              Setup.post_categorizer_f)
    txt = __switch(txt, "QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK", Setup.mode_transition_check_f)
    txt = __switch(txt, "QUEX_OPTION_STRING_ACCUMULATOR",            Setup.string_accumulator_f)
    txt = __switch(txt, "QUEX_OPTION_TOKEN_POLICY_QUEUE",            Setup.token_policy == "queue")
    txt = __switch(txt, "QUEX_OPTION_TOKEN_POLICY_SINGLE",           Setup.token_policy == "single")
    txt = __switch(txt, "QUEX_OPTION_TOKEN_REPETITION_SUPPORT",      token_repeat_test_txt != "false")
    txt = __switch(txt, "QUEX_OPTION_USER_MANAGED_TOKEN_MEMORY",     Setup.token_memory_management_by_user_f)
    txt = __switch(txt, "__QUEX_OPTION_BIG_ENDIAN",                  Setup.buffer_byte_order == "big")
    txt = __switch(txt, "__QUEX_OPTION_CONVERTER_HELPER",            Setup.converter_helper_required_f)
    txt = __switch(txt, "__QUEX_OPTION_CONVERTER",                   Setup.converter_f)
    txt = __switch(txt, "QUEX_OPTION_INDENTATION_TRIGGER",           IndentationSupportF)     
    txt = __switch(txt, "__QUEX_OPTION_LITTLE_ENDIAN",               Setup.buffer_byte_order == "little")
    txt = __switch(txt, "__QUEX_OPTION_ON_ENTRY_HANDLER_PRESENT",    entry_handler_active_f)
    txt = __switch(txt, "__QUEX_OPTION_ON_EXIT_HANDLER_PRESENT",     exit_handler_active_f)
    txt = __switch(txt, "__QUEX_OPTION_PLAIN_C",                     Setup.language.upper() == "C")
    txt = __switch(txt, "__QUEX_OPTION_SUPPORT_BEGIN_OF_LINE_PRE_CONDITION", BeginOfLineSupportF)
    txt = __switch(txt, "__QUEX_OPTION_SYSTEM_ENDIAN",               Setup.byte_order_is_that_of_current_system_f)
    txt = __switch(txt, "QUEX_OPTION_BUFFER_BASED_ANALYZIS",         Setup.buffer_based_analyzis_f)
    txt = __switch(txt, "__QUEX_OPTION_ENGINE_RUNNING_ON_CODEC",     Setup.buffer_codec.name != "unicode")

    # -- token class related definitions
    token_descr = blackboard.token_type_definition

    # -- name of the character codec
    codec_name = make_safe_identifier(Setup.buffer_codec.name).lower()

    # Setup.buffer_element_size can be '-1'. This signals then that 
    # sizeof(QUEX_TYPE_CHARACTER) needs to be used. A numeric value 
    # is required here.
    character_size_str = "%i" % Setup.buffer_element_size

    def namespace(NameSpaceList):
        result = Lng.NAMESPACE_REFERENCE(NameSpaceList, TrailingDelimiterF=False)

        if len(result) == 0: return ""

        assert Setup.language.upper() != "C++" or len(result) > 2, \
               "Error while generating namespace reference '%s'" % result

        return result

    txt = blue_print(txt, 
            [
             ["$$BUFFER_LIMIT_CODE$$",          "0x%X" % Setup.buffer_limit_code],
             ["$$QUEX_SETTING_CHARACTER_CODEC$$", codec_name],
             ["$$INCLUDE_GUARD_EXTENSION$$",    get_include_guard_extension(Lng.NAMESPACE_REFERENCE(Setup.analyzer_name_space) + "__" + Setup.analyzer_class_name)],
             ["$$INITIAL_LEXER_MODE_ID$$",      "QUEX_NAME(ModeID_%s)" % blackboard.initial_mode.get_pure_text()],
             ["$$LEXER_BUILD_DATE$$",           time.asctime()],
             ["$$LEXER_CLASS_NAME$$",           LexerClassName],
             ["$$LEXER_CLASS_NAME_SAFE$$",      Setup.analyzer_name_safe],
             ["$$LEXER_DERIVED_CLASS_NAME$$",   analyzer_derived_class_name],
             ["$$MAX_MODE_CLASS_N$$",           repr(len(ModeDescriptionDB))],
             ["$$NAMESPACE_MAIN$$",             namespace(Setup.analyzer_name_space)],
             ["$$NAMESPACE_MAIN_CLOSE$$",       Lng.NAMESPACE_CLOSE(Setup.analyzer_name_space).replace("\n", "\\\n")],
             ["$$NAMESPACE_MAIN_OPEN$$",        Lng.NAMESPACE_OPEN(Setup.analyzer_name_space).replace("\n", "\\\n")],
             ["$$NAMESPACE_TOKEN$$",            namespace(token_descr.name_space)],
             ["$$NAMESPACE_TOKEN_CLOSE$$",      Lng.NAMESPACE_CLOSE(token_descr.name_space).replace("\n", "\\\n")],
             ["$$NAMESPACE_TOKEN_OPEN$$",       Lng.NAMESPACE_OPEN(token_descr.name_space).replace("\n", "\\\n")],
             ["$$PATH_TERMINATION_CODE$$",      "0x%X" % Setup.path_limit_code],
             ["$$QUEX_SETTING_BUFFER_FILLERS_CONVERTER_NEW$$", converter_new_str],
             ["$$QUEX_TYPE_CHARACTER$$",        Setup.buffer_element_type],
             ["$$QUEX_SETTING_CHARACTER_SIZE$$", character_size_str],
             ["$$QUEX_NAMESPACE_LEXEME_NULL_OPEN$$",   Lng.NAMESPACE_OPEN(Setup.lexeme_null_namespace).replace("\n", "\\\n")],
             ["$$QUEX_NAMESPACE_LEXEME_NULL_CLOSE$$",  Lng.NAMESPACE_CLOSE(Setup.lexeme_null_namespace).replace("\n", "\\\n")],
             ["$$QUEX_LEXEME_NULL$$",                  Setup.lexeme_null_full_name_cpp],
             ["$$QUEX_LEXEME_NULL_SAFE$$",             Setup.lexeme_null_name_safe],
             ["$$QUEX_LEXEME_NULL_IN_ITS_NAMESPACE$$", Setup.lexeme_null_name],
             ["$$QUEX_VERSION$$",               QUEX_VERSION],
             ["$$TOKEN_CLASS$$",                token_descr.class_name],
             ["$$TOKEN_CLASS_NAME_SAFE$$",      token_descr.class_name_safe],
             ["$$TOKEN_COLUMN_N_TYPE$$",        token_descr.column_number_type.get_pure_text()],
             ["$$TOKEN_ID_TYPE$$",              token_descr.token_id_type.get_pure_text()],
             ["$$TOKEN_LINE_N_TYPE$$",          token_descr.line_number_type.get_pure_text()],
             ["$$TOKEN_PREFIX$$",               Setup.token_id_prefix],
             ["$$TOKEN_QUEUE_SAFETY_BORDER$$",  repr(Setup.token_queue_safety_border)],
             ["$$TOKEN_QUEUE_SIZE$$",           repr(Setup.token_queue_size)],
             ["$$TOKEN_REPEAT_TEST$$",          token_repeat_test_txt],
             ["$$USER_LEXER_VERSION$$",         Setup.user_application_version_id],
             ])

    return txt
Exemplo n.º 4
0
def do(ModeDB):
    IndentationSupportF = blackboard.requires_indentation_count(ModeDB)
    BeginOfLineSupportF = blackboard.requires_begin_of_line_condition_support(
        ModeDB)

    LanguageDB = Setup.language_db

    LexerClassName = Setup.analyzer_class_name

    ConfigurationTemplateFile =(  QUEX_PATH \
                                + Setup.language_db["$code_base"] \
                                + "/analyzer/configuration/TXT").replace("//","/")

    txt = get_file_content_or_die(ConfigurationTemplateFile)

    # -- check if exit/entry handlers have to be active
    entry_handler_active_f = False
    exit_handler_active_f = False
    for mode in ModeDB.values():
        if len(mode.get_code_fragment_list("on_entry")) != 0:
            entry_handler_active_f = True
        if len(mode.get_code_fragment_list("on_exit")) != 0:
            exit_handler_active_f = True

    # Buffer filler converter (0x0 means: no buffer filler converter)
    converter_new_str = "#   define QUEX_SETTING_BUFFER_FILLERS_CONVERTER_NEW "
    if Setup.converter_user_new_func != "":
        converter_new_str += Setup.converter_user_new_func + "()"
    else:
        converter_new_str = "/* " + converter_new_str + " */"

    # Token repetition support
    token_repeat_test_txt = ""
    for token_id_str in blackboard.token_repetition_token_id_list:
        token_repeat_test_txt += "TokenID == %s || " % token_id_str
    if token_repeat_test_txt != "":
        token_repeat_test_txt = token_repeat_test_txt[:-3]
    else:
        token_repeat_test_txt = "false"

    if Setup.analyzer_derived_class_name != "":
        analyzer_derived_class_name = Setup.analyzer_derived_class_name
    else:
        analyzer_derived_class_name = Setup.analyzer_class_name

    txt = __switch(txt, "QUEX_OPTION_COLUMN_NUMBER_COUNTING",
                   Setup.count_column_number_f)
    txt = __switch(txt, "QUEX_OPTION_COMPUTED_GOTOS", False)
    txt = __switch(txt, "QUEX_OPTION_CONVERTER_ICONV", Setup.converter_iconv_f)
    txt = __switch(txt, "QUEX_OPTION_CONVERTER_ICU", Setup.converter_icu_f)
    txt = __switch(txt, "QUEX_OPTION_INCLUDE_STACK",
                   Setup.include_stack_support_f)
    txt = __switch(txt, "QUEX_OPTION_LINE_NUMBER_COUNTING",
                   Setup.count_line_number_f)
    txt = __switch(txt, "QUEX_OPTION_POST_CATEGORIZER",
                   Setup.post_categorizer_f)
    txt = __switch(txt, "QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK",
                   Setup.mode_transition_check_f)
    txt = __switch(txt, "QUEX_OPTION_STRING_ACCUMULATOR",
                   Setup.string_accumulator_f)
    txt = __switch(txt, "QUEX_OPTION_TOKEN_POLICY_QUEUE",
                   Setup.token_policy == "queue")
    txt = __switch(txt, "QUEX_OPTION_TOKEN_POLICY_SINGLE",
                   Setup.token_policy == "single")
    txt = __switch(txt, "QUEX_OPTION_TOKEN_REPETITION_SUPPORT",
                   token_repeat_test_txt != "false")
    txt = __switch(txt, "QUEX_OPTION_USER_MANAGED_TOKEN_MEMORY",
                   Setup.token_memory_management_by_user_f)
    txt = __switch(txt, "__QUEX_OPTION_BIG_ENDIAN",
                   Setup.buffer_byte_order == "big")
    txt = __switch(txt, "__QUEX_OPTION_CONVERTER_HELPER",
                   Setup.converter_helper_required_f)
    txt = __switch(txt, "__QUEX_OPTION_CONVERTER", Setup.converter_f)
    txt = __switch(txt, "QUEX_OPTION_INDENTATION_TRIGGER", IndentationSupportF)
    txt = __switch(txt, "__QUEX_OPTION_LITTLE_ENDIAN",
                   Setup.buffer_byte_order == "little")
    txt = __switch(txt, "__QUEX_OPTION_ON_ENTRY_HANDLER_PRESENT",
                   entry_handler_active_f)
    txt = __switch(txt, "__QUEX_OPTION_ON_EXIT_HANDLER_PRESENT",
                   exit_handler_active_f)
    txt = __switch(txt, "__QUEX_OPTION_PLAIN_C", Setup.language.upper() == "C")
    txt = __switch(txt, "__QUEX_OPTION_SUPPORT_BEGIN_OF_LINE_PRE_CONDITION",
                   BeginOfLineSupportF)
    txt = __switch(txt, "__QUEX_OPTION_SYSTEM_ENDIAN",
                   Setup.byte_order_is_that_of_current_system_f)
    txt = __switch(txt, "QUEX_OPTION_BUFFER_BASED_ANALYZIS",
                   Setup.buffer_based_analyzis_f)
    txt = __switch(txt, "__QUEX_OPTION_ENGINE_RUNNING_ON_CODEC",
                   Setup.buffer_codec != "unicode")

    # -- token class related definitions
    token_descr = blackboard.token_type_definition

    # -- name of the character codec
    codec_name = "unicode"
    if Setup.buffer_codec != "unicode":
        codec_name = make_safe_identifier(Setup.buffer_codec).lower()

    # Setup.buffer_element_size can be '-1'. This signals then that
    # sizeof(QUEX_TYPE_CHARACTER) needs to be used. A numeric value
    # is required here.
    character_size_str = "%i" % Setup.buffer_element_size

    def namespace(NameSpaceList):
        result = Setup.language_db.NAMESPACE_REFERENCE(NameSpaceList)

        if result == "::": return ""

        assert Setup.language.upper() != "C++" or len(result) > 2, \
               "Error while generating namespace reference '%s'" % result

        return result[:-2]

    txt = blue_print(txt, [
        ["$$BUFFER_LIMIT_CODE$$",
         "0x%X" % Setup.buffer_limit_code],
        ["$$QUEX_SETTING_CHARACTER_CODEC$$", codec_name],
        [
            "$$INCLUDE_GUARD_EXTENSION$$",
            get_include_guard_extension(
                LanguageDB.NAMESPACE_REFERENCE(Setup.analyzer_name_space) +
                "__" + Setup.analyzer_class_name)
        ],
        [
            "$$INITIAL_LEXER_MODE_ID$$",
            "QUEX_NAME(ModeID_%s)" % blackboard.initial_mode.get_pure_code()
        ],
        ["$$LEXER_BUILD_DATE$$", time.asctime()],
        ["$$LEXER_CLASS_NAME$$", LexerClassName],
        ["$$LEXER_CLASS_NAME_SAFE$$", Setup.analyzer_name_safe],
        ["$$LEXER_DERIVED_CLASS_NAME$$", analyzer_derived_class_name],
        ["$$MAX_MODE_CLASS_N$$", repr(len(ModeDB))],
        ["$$NAMESPACE_MAIN$$",
         namespace(Setup.analyzer_name_space)],
        [
            "$$NAMESPACE_MAIN_CLOSE$$",
            LanguageDB.NAMESPACE_CLOSE(Setup.analyzer_name_space).replace(
                "\n", "\\\n")
        ],
        [
            "$$NAMESPACE_MAIN_OPEN$$",
            LanguageDB.NAMESPACE_OPEN(Setup.analyzer_name_space).replace(
                "\n", "\\\n")
        ],
        ["$$NAMESPACE_TOKEN$$",
         namespace(token_descr.name_space)],
        [
            "$$NAMESPACE_TOKEN_CLOSE$$",
            LanguageDB.NAMESPACE_CLOSE(token_descr.name_space).replace(
                "\n", "\\\n")
        ],
        [
            "$$NAMESPACE_TOKEN_OPEN$$",
            LanguageDB.NAMESPACE_OPEN(token_descr.name_space).replace(
                "\n", "\\\n")
        ],
        ["$$PATH_TERMINATION_CODE$$",
         "0x%X" % Setup.path_limit_code],
        ["$$QUEX_SETTING_BUFFER_FILLERS_CONVERTER_NEW$$", converter_new_str],
        ["$$QUEX_TYPE_CHARACTER$$", Setup.buffer_element_type],
        ["$$QUEX_SETTING_CHARACTER_SIZE$$", character_size_str],
        [
            "$$QUEX_NAMESPACE_LEXEME_NULL_OPEN$$",
            LanguageDB.NAMESPACE_OPEN(Setup.lexeme_null_namespace).replace(
                "\n", "\\\n")
        ],
        [
            "$$QUEX_NAMESPACE_LEXEME_NULL_CLOSE$$",
            LanguageDB.NAMESPACE_CLOSE(Setup.lexeme_null_namespace).replace(
                "\n", "\\\n")
        ],
        ["$$QUEX_LEXEME_NULL$$", Setup.lexeme_null_full_name_cpp],
        ["$$QUEX_LEXEME_NULL_SAFE$$", Setup.lexeme_null_name_safe],
        ["$$QUEX_LEXEME_NULL_IN_ITS_NAMESPACE$$", Setup.lexeme_null_name],
        ["$$QUEX_VERSION$$", QUEX_VERSION],
        ["$$TOKEN_CLASS$$", token_descr.class_name],
        ["$$TOKEN_CLASS_NAME_SAFE$$", token_descr.class_name_safe],
        [
            "$$TOKEN_COLUMN_N_TYPE$$",
            token_descr.column_number_type.get_pure_code()
        ],
        ["$$TOKEN_ID_TYPE$$",
         token_descr.token_id_type.get_pure_code()],
        [
            "$$TOKEN_LINE_N_TYPE$$",
            token_descr.line_number_type.get_pure_code()
        ],
        ["$$TOKEN_PREFIX$$", Setup.token_id_prefix],
        [
            "$$TOKEN_QUEUE_SAFETY_BORDER$$",
            repr(Setup.token_queue_safety_border)
        ],
        ["$$TOKEN_QUEUE_SIZE$$",
         repr(Setup.token_queue_size)],
        ["$$TOKEN_REPEAT_TEST$$", token_repeat_test_txt],
        ["$$USER_LEXER_VERSION$$", Setup.user_application_version_id],
    ])

    return txt