示例#1
0
 def check_grid_specification(self, Value, sr):
     if   Value == 0: 
         error.log("A grid count of 0 is nonsense. May be define a space count of 0.", sr)
     elif Value == 1:
         error.warning("Indentation grid counts of '1' are equivalent of to a space\n" + \
                       "count of '1'. The latter is faster to compute.",
                           sr)
示例#2
0
    def __sm_newline_default(self):
        """Default newline: '(\n)|(\r\n)'
        """
        global cc_type_name_db

        newline_set = NumberSet(ord('\n'))
        retour_set  = NumberSet(ord('\r'))

        before = self.specifier_count_op_map.find_occupier(newline_set, set())
        if before is not None:
            error.warning("Trying to implement default newline: '\\n' or '\\r\\n'.\n" 
                          "The '\\n' option is not possible, since it has been occupied by '%s'.\n" \
                          "No newline can be defined by default."
                          % cc_type_name_db[before.cc_type], before.sr, 
                          SuppressCode=NotificationDB.warning_default_newline_0A_impossible)
            # In this case, no newline can be defined!
            return

        sm = StateMachine.from_character_set(newline_set)

        if Setup.dos_carriage_return_newline_f:
            before = self.specifier_count_op_map.find_occupier(retour_set, set())
            if before is not None:
                error.warning("Trying to implement default newline: '\\n' or '\\r\\n'.\n" 
                          "The '\\r\\n' option is not possible, since '\\r' has been occupied by '%s'." \
                          % cc_type_name_db[before.cc_type],
                          before.sr, 
                          SuppressCode=NotificationDB.warning_default_newline_0D_impossible)
            else:
                sm.add_transition_sequence(sm.init_state_index, [retour_set, newline_set])

        return sm
示例#3
0
文件: mode.py 项目: nyulacska/gpr
def __parse_base_mode_list(fh, new_mode):
    new_mode.derived_from_list = []
    trailing_comma_f = False
    while 1 + 1 == 2:
        if check(fh, "{"):
            fh.seek(-1, 1)
            break
        elif check(fh, "<"):
            fh.seek(-1, 1)
            break

        skip_whitespace(fh)
        identifier = read_identifier(fh)
        if identifier == "": break

        new_mode.derived_from_list.append(identifier)
        trailing_comma_f = False
        if not check(fh, ","): break
        trailing_comma_f = True

    if trailing_comma_f:
        error.warning(
            "Trailing ',' after base mode '%s'." %
            new_mode.derived_from_list[-1], fh)

    elif len(new_mode.derived_from_list) != 0:
        # This check is a 'service' -- for those who follow the old convention
        pos = fh.tell()
        skip_whitespace(fh)
        dummy_identifier = read_identifier(fh)
        if dummy_identifier != "":
            error.log("Missing separating ',' between base modes '%s' and '%s'.\n" \
                      % (new_mode.derived_from_list[-1], dummy_identifier) + \
                      "(The comma separator is mandatory since quex 0.53.1)", fh)
        fh.seek(pos)
示例#4
0
文件: mode.py 项目: mplucinski/quex
def __parse_base_mode_list(fh, new_mode):
    new_mode.derived_from_list = []
    trailing_comma_f    = False
    while 1 + 1 == 2:
        if   check(fh, "{"): fh.seek(-1, 1); break
        elif check(fh, "<"): fh.seek(-1, 1); break

        skip_whitespace(fh)
        identifier = read_identifier(fh)
        if identifier == "": break

        new_mode.derived_from_list.append(identifier)
        trailing_comma_f = False
        if not check(fh, ","): break
        trailing_comma_f = True


    if trailing_comma_f:
        error.warning("Trailing ',' after base mode '%s'." % new_mode.derived_from_list[-1], fh) 
        
    elif len(new_mode.derived_from_list) != 0:
        # This check is a 'service' -- for those who follow the old convention
        pos = fh.tell()
        skip_whitespace(fh)
        dummy_identifier = read_identifier(fh)
        if dummy_identifier != "":
            error.log("Missing separating ',' between base modes '%s' and '%s'.\n" \
                      % (new_mode.derived_from_list[-1], dummy_identifier) + \
                      "(The comma separator is mandatory since quex 0.53.1)", fh)
        fh.seek(pos)
示例#5
0
def token_id_db_verify_or_enter_token_id(fh, TokenName):
    global Setup

    prefix_less_TokenName = cut_token_id_prefix(TokenName, fh)

    # Occasionally add token id automatically to database
    if not blackboard.token_id_db.has_key(prefix_less_TokenName):
        # DO NOT ENFORCE THE TOKEN ID TO BE DEFINED, BECAUSE WHEN THE TOKEN ID
        # IS DEFINED IN C-CODE, THE IDENTIFICATION IS NOT 100% SAFE.
        if TokenName in blackboard.token_id_db.keys():
            msg = "Token id '%s' defined implicitly.\n" % TokenName
            msg += "'%s' has been defined in a token { ... } section!\n" % \
                   (Setup.token_id_prefix + TokenName)
            msg += "Token ids in the token { ... } section are automatically prefixed."
            error.warning(msg,
                          fh,
                          SuppressCode=NotificationDB.
                          warning_usage_of_undefined_token_id_name)
        else:
            # Warning is posted later when all implicit tokens have been
            # collected. See "token_id_maker.__propose_implicit_token_definitions()"
            blackboard.token_id_implicit_list.append(
                (prefix_less_TokenName, SourceRef.from_FileHandle(fh)))

        # Enter the implicit token id definition in the database
        blackboard.token_id_db[prefix_less_TokenName] = \
                TokenInfo(prefix_less_TokenName, None, None,
                          SourceRef.from_FileHandle(fh))
示例#6
0
def __warn_on_double_definition():
    """Double check that no token id appears twice. Again, this can only happen,
    if quex itself produced the numeric values for the token.

    If the token ids come from outside, Quex does not know the numeric value. It 
    cannot warn about double definitions.
    """
    assert len(Setup.token_id_foreign_definition_file) == 0

    clash_db = defaultdict(list)

    token_list = token_id_db.values()
    for i, x in enumerate(token_list):
        for y in token_list[i+1:]:
            if x.number != y.number: continue
            clash_db[x.number].append(x)
            clash_db[x.number].append(y)

    def find_source_reference(TokenList):
        for token in TokenList:
            if token.sr.is_void(): continue
            return token.sr
        return None
    
    if len(clash_db) != 0:
        item_list = clash_db.items()
        item_list.sort()
        sr = find_source_reference(item_list[0][1])
        error.warning("Following token ids have the same numeric value assigned:", sr)
        for x, token_id_list in item_list:
            sr = find_source_reference(token_id_list)
            token_ids_sorted = sorted(list(set(token_id_list)), key=attrgetter("name")) # Ensure uniqueness
            error.warning("  %s: %s" % (x, "".join(["%s, " % t.name for t in token_ids_sorted])), 
                          sr)
示例#7
0
文件: mode.py 项目: smmckay/quex3
def __parse_event(new_mode, fh, word):
    pos = fh.tell()

    # Allow '<<EOF>>' and '<<FAIL>>' out of respect for classical tools like 'lex'
    if word == "<<EOF>>": word = "on_end_of_stream"
    elif word == "<<FAIL>>": word = "on_failure"
    elif word in blackboard.all_section_title_list:
        error.log("Pattern '%s' is a quex section title. Has the closing '}' of mode %s \n" % (word, new_mode.name) \
                  + "been forgotten? Else use quotes, i.e. \"%s\"." % word, fh)
    elif len(word) < 3 or word[:3] != "on_":
        return False

    comment = "Unknown event handler '%s'. \n" % word + \
              "Note, that any pattern starting with 'on_' is considered an event handler.\n" + \
              "use double quotes to bracket patterns that start with 'on_'."

    __general_validate(fh, new_mode, word, pos)
    error.verify_word_in_list(word,
                              standard_incidence_db.keys() + ["keyword_list"],
                              comment, fh)

    code = code_fragment.parse(fh,
                               "%s::%s event handler" % (new_mode.name, word))
    incidence_id = standard_incidence_db[word][0]
    if Lng.suspicious_RETURN_in_event_handler(incidence_id, code.get_text()):
        error.warning("Suspicious 'RETURN' in event handler '%s'.\n" % incidence_id \
                      + "This statement will trigger 'on_after_match' handler.\n" \
                      + "May be, use plain return instead.", code.sr)

    if word == "on_n_dedent" and not token_db.support_repetition():
        error.warning("Found 'on_n_dedent', but no single token has been specified\n" \
                      "in a 'repeated_token' section.", code.sr)
    new_mode.incidence_db[word] = code

    return True
示例#8
0
def token_id_db_verify_or_enter_token_id(fh, TokenName):
    global Setup

    prefix_less_TokenName = cut_token_id_prefix(TokenName, fh)

    # Occasionally add token id automatically to database
    if not blackboard.token_id_db.has_key(prefix_less_TokenName):
        # DO NOT ENFORCE THE TOKEN ID TO BE DEFINED, BECAUSE WHEN THE TOKEN ID
        # IS DEFINED IN C-CODE, THE IDENTIFICATION IS NOT 100% SAFE.
        if TokenName in blackboard.token_id_db.keys():
            msg  = "Token id '%s' defined implicitly.\n" % TokenName
            msg += "'%s' has been defined in a token { ... } section!\n" % \
                   (Setup.token_id_prefix + TokenName)
            msg += "Token ids in the token { ... } section are automatically prefixed."
            error.warning(msg, fh, 
                          SuppressCode=NotificationDB.warning_usage_of_undefined_token_id_name)
        else:
            # Warning is posted later when all implicit tokens have been
            # collected. See "token_id_maker.__propose_implicit_token_definitions()"
            blackboard.token_id_implicit_list.append((prefix_less_TokenName, 
                                                      SourceRef.from_FileHandle(fh)))

        # Enter the implicit token id definition in the database
        blackboard.token_id_db[prefix_less_TokenName] = \
                TokenInfo(prefix_less_TokenName, None, None, 
                          SourceRef.from_FileHandle(fh)) 
示例#9
0
    def check_grid_values_integer_multiples(self):
        """If there are no spaces and the grid is on a homogeneous scale,
           => then the grid can be transformed into 'easy-to-compute' spaces.
        """
        grid_value_list = []
        min_info        = None
        for character_set, info in self.__map:
            if info.cc_type != E_CharacterCountType.GRID: 
                if info.cc_type == E_CharacterCountType.COLUMN: 
                    return
                continue
            elif type(info.value) in (str, unicode): 
                # If there is one single 'variable' grid value, 
                # then no assumptions can be made.
                return
            grid_value_list.append(info.value)
            if min_info is None or info.value < min_info.value:
                min_info = info

        if min_info is None:
            return

        # Are all grid values a multiple of the minimum?
        if len(filter(lambda x: x % min_info.value == 0, grid_value_list)) != len(grid_value_list):
            return

        error.warning("Setup does not contain spaces, only grids (tabulators). All grid\n" \
                      "widths are multiples of %i. The grid setup %s\n" \
                      % (min_info.value, repr(sorted(grid_value_list))[1:-1]) + \
                      "is equivalent to a setup with space counts %s.\n" \
                      % repr(map(lambda x: x / min_info.value, sorted(grid_value_list)))[1:-1] + \
                      "Space counts are faster to compute.", 
                      min_info.sr)
示例#10
0
    def check_homogenous_space_counts(self):
        common = None
        for character_set, info in self.__map:
            if info.cc_type != E_CharacterCountType.COLUMN: 
                if info.cc_type == E_CharacterCountType.GRID: 
                    return
                continue
            elif type(info.value) in (str, unicode): 
                # If there is one single 'variable' grid value, 
                # then no assumptions can be made.
                return
            elif common is None:
                common = info
            elif common.value != info.value:
                # space counts are not homogeneous
                return

        if common is None:
            return
            
        error.warning("Setup does not contain a grid but only homogeneous space counts of %i.\n" \
                  % common.value + \
                  "This setup is equivalent to a setup with space counts of 1. Space counts\n" + \
                  "of 1 are the fastest to compute.", 
                  common.sr)
示例#11
0
    def consistency_check(self):
        # Is 'take_text' section defined
        if self.take_text is not None: return

        error.warning(
            _warning_msg,
            self.sr,
            SuppressCode=NotificationDB.warning_on_no_token_class_take_text)
示例#12
0
 def warn(TokenName, ExistingList, PrefixAddF=False):
     similar_i = similarity.get(TokenName, ExistingList)
     if similar_i == -1:
         similar_str = ""
     else:
         similar_str = " /* did you mean '%s%s'? */" \
                       % (Setup.token_id_prefix, ExistingList[similar_i])
     if PrefixAddF: prefix = Setup.token_id_prefix
     else: prefix = ""
     error.warning("     %s%s;%s" % (prefix, TokenName, similar_str), sr)
示例#13
0
文件: mode.py 项目: smmckay/quex3
def __parse_element(new_mode, fh):
    """Returns: False, if a closing '}' has been found.
                True, else.
    """
    position = fh.tell()
    try:
        description = "pattern or event handler"

        skip_whitespace(fh)
        # NOTE: Do not use 'read_word' since we need to continue directly after
        #       whitespace, if a regular expression is to be parsed.
        position = fh.tell()

        identifier = read_identifier(fh)
        if identifier == "keyword_list":
            return __parse_keyword_list(new_mode, fh)
        elif similarity.get(identifier, ["keyword_list", "key words"]) != -1:
            error.warning(
                "'%s' is similar to keyword 'keyword_list'.\n"
                "For clarity, use quotes." % identifier, fh)
        elif identifier == "brief":
            return __parse_brief(new_mode, fh)
        elif similarity.get(identifier,
                            ["brief", "briefing", "briefly"]) != -1:
            error.warning(
                "'%s' is similar to keyword 'brief'.\n"
                "For clarity, use quotes." % identifier, fh)

        fh.seek(position)
        word = read_until_whitespace(fh)
        if word == "}":
            return False
            # -- check for 'on_entry', 'on_exit', ...
        elif __parse_event(new_mode, fh, word):
            return True

        fh.seek(position)
        description = "start of mode element: regular expression"
        pattern = regular_expression.parse(fh)
        pattern.set_source_reference(
            SourceRef.from_FileHandle(fh, new_mode.name))

        position = fh.tell()
        description = "start of mode element: code fragment for '%s'" % pattern.pattern_string(
        )

        __parse_action(new_mode, fh, pattern.pattern_string(), pattern)

    except EndOfStreamException:
        fh.seek(position)
        error.error_eof(description, fh)

    return True
示例#14
0
文件: core.py 项目: smmckay/quex3
    def treat(sm, ReverseF):
        backup_id = sm.get_id()
        ok_f, sm = Setup.buffer_encoding.do_state_machine(sm)
        if not ok_f:
            error.warning("Pattern contains elements not found in engine codec '%s'.\n" % Setup.buffer_encoding.name \
                          + "(Buffer element size is %s [byte])" % Setup.lexatom.size_in_byte,
                          sm.sr)

        if ReverseF:
            sm = reverse.do(sm, EnsureDFA_f=True)
        sm.set_id(backup_id)
        return sm
示例#15
0
    def check_defined(self, SourceReference, CCT):
        """Checks whether the character counter type has been defined in the 
        map.
        
        THROWS: Error in case that is has not been defined.
        """
        for character_set, info in self.__map:
            if info.cc_type == CCT: 
                return

        error.warning("Setup does not define '%s'." % cc_type_name_db[CCT], SourceReference, 
                      SuppressCode=NotificationDB.warning_counter_setup_without_newline)
示例#16
0
def __error_detection(not_found_list, recursive_list):
    ErrorN = NotificationDB.token_id_ignored_files_report
    if ErrorN not in Setup.suppressed_notification_list:
        if not_found_list:
            not_found_list.sort()
            sr = SourceRef(not_found_list[0][0], LineN=not_found_list[0][1])
            error.warning("Files not found:", sr)
            for file_name, line_n, included_file in not_found_list:
                error.warning("%s" % included_file,
                              SourceRef(file_name, line_n))

        if recursive_list:
            recursive_list.sort()
            sr = SourceRef(recursive_list[0][0], LineN=recursive_list[0][1])
            error.warning(
                "Files recursively included (ignored second inclusion):", sr)
            for file_name, line_n, included_file in recursive_list:
                error.warning("%s" % included_file,
                              SourceRef(file_name, line_n))

        if not_found_list or recursive_list:
            # source reference is taken from last setting
            error.log("\nQuex does not handle C-Preprocessor instructions.",
                      sr,
                      NoteF=True,
                      DontExitF=True,
                      SuppressCode=ErrorN)
示例#17
0
 def assign_else_count_command(self, GlobalMin, GlobalMax, SourceReference):
     """After all count commands have been assigned to characters, the 
     remaining character set can be associated with the 'else-CountOpMapEntry'.
     """
     if self.__else is None: 
         else_cmd = CountOpMapEntry(E_CharacterCountType.COLUMN, 1, SourceRef_DEFAULT)
         error.warning("No '\else' defined in counter setup. Assume '\else => space 1;'", SourceReference, 
                       SuppressCode=NotificationDB.warning_counter_setup_without_else)
     else:                   
         else_cmd = self.__else
     
     remaining_set = self.get_remaining_set(GlobalMin, GlobalMax)
     if not remaining_set.is_empty():
         self.__map.append((remaining_set, else_cmd))
示例#18
0
def __detect_empty_non_abstract_mode(mode):
    """Detects whether there is a mode that is not abstract while it is 
    completely void of patterns/event handlers.

    THROWS: Error in case.
    
    At this point in time, the matching configuration has been expressed
    in the 'pattern_list'. That is, if there are event handler's then the
    'pattern_list' is not empty.
    """
    if   mode.abstract_f:   return
    elif mode.pattern_list: return

    error.warning("Mode without pattern or pattern-related event handlers.\n" + \
                  "Option <inheritable: only> has been added automatically.", mode.sr)
示例#19
0
文件: mode.py 项目: nyulacska/gpr
    def __is_abstract(self, OriginalIncidenceDb, OriginalOptionDb):
        """If the mode has incidences and/or patterns defined it is free to be 
        abstract or not. If neither one is defined, it cannot be implemented and 
        therefore MUST be abstract.
        """
        abstract_f = (OriginalOptionDb.value("inheritable") == "only")

        if len(OriginalIncidenceDb) != 0 or len(self.pattern_list) != 0:
            return abstract_f

        elif abstract_f == False:
            error.warning("Mode without pattern and event handlers needs to be 'inheritable only'.\n" + \
                          "<inheritable: only> has been set automatically.", self.sr)
            abstract_f = True  # Change to 'inheritable: only', i.e. abstract_f == True.

        return abstract_f
示例#20
0
    def finalize(self, GlobalMin, GlobalMax, SourceReference):
        """After all count commands have been assigned to characters, the 
        remaining character set can be associated with the 'else-CountAction'.
        """
        if self.__else is None: 
            else_cmd = CountAction(E_CharacterCountType.COLUMN, 1, SourceRef_DEFAULT)
            error.warning("No '\else' defined in counter setup. Assume '\else => space 1;'", SourceReference, 
                          SuppressCode=NotificationDB.warning_counter_setup_without_else)
        else:                   
            else_cmd = self.__else
        
        remaining_set = self.get_remaining_set(GlobalMin, GlobalMax)
        if not remaining_set.is_empty():
            self.__map.append((remaining_set, else_cmd))

        return self.__map
示例#21
0
文件: mode.py 项目: mplucinski/quex
    def __is_abstract(self, OriginalIncidenceDb, OriginalOptionDb):
        """If the mode has incidences and/or patterns defined it is free to be 
        abstract or not. If neither one is defined, it cannot be implemented and 
        therefore MUST be abstract.
        """
        abstract_f = (OriginalOptionDb.value("inheritable") == "only")

        if len(OriginalIncidenceDb) != 0 or len(self.pattern_list) != 0:
            return abstract_f

        elif abstract_f == False:
            error.warning("Mode without pattern and event handlers needs to be 'inheritable only'.\n" + \
                          "<inheritable: only> has been set automatically.", self.sr)
            abstract_f = True # Change to 'inheritable: only', i.e. abstract_f == True.

        return abstract_f
示例#22
0
        def delete(MHI, Info, self, ModeName, history):
            def do(element, history):
                priority, pattern, terminal = element
                if priority.mode_hierarchy_index > MHI: return False
                elif priority.pattern_index >= Info.pattern_index: return False
                elif not superset_check.do(Info.pattern, pattern): return False
                history.append(
                    [ModeName,
                     pattern.pattern_string(), pattern.sr.mode_name])
                return True

            size = len(self)
            do_and_delete_if(self, do, history)

            if size == len(self) and Info.sr.mode_name == ModeName:
                error.warning("DELETION mark does not have any effect.",
                              Info.sr)
示例#23
0
def finalize_pattern_list(SortedPPT_List, CounterDb):
    """This function prepares a list of patterns and their related state
    machines for the integration into a single analyzing state machine.
    For that, the following steps need to be accomplished IN ORDER:
    
      (1) Get the list of patterns from the sorted list.

      (2) Prepare the 'count information' for line and column counting.
          MUST happen BEFORE an (optional) transformation to another codec.

      (3) (optional) transformation to a non-unicode codec.

      (4) Delete signaling characters from the transition maps.
          MUST happen AFTER an (optional) transformation to another codec.

      (5) Mounts pre- and post-contexts to the main analyzer state machines
          MUST happen AFTER an (optional) transformation to another codec.
    """
    pattern_list = [ 
        pattern for priority, pattern, terminal in SortedPPT_List 
    ]

    # (*) Counting information must be determined BEFORE transformation
    for pattern in pattern_list:
        pattern.prepare_count_info(CounterDb, 
                                   Setup.buffer_codec)

    # (*) Transform anything into the buffer's codec
    #     Skippers: What is relevant to enter the skippers is transformed.
    #               Related data (skip character set, ... ) is NOT transformed!
    for pattern in pattern_list:
        if not pattern.transform(Setup.buffer_codec):
            error.warning("Pattern contains elements not found in engine codec '%s'.\n" % Setup.buffer_codec.name \
                          + "(Buffer element size is %s [byte])" % Setup.buffer_element_size,
                          pattern.sr)

    # (*) Cut the signalling characters from any pattern or state machine
    for pattern in pattern_list:
        pattern.cut_character_list(blackboard.signal_character_list(Setup))

    # (*) Pre-contexts and BIPD can only be mounted, after the transformation.
    for pattern in pattern_list:
        pattern.mount_post_context_sm()
        pattern.mount_pre_context_sm()

    return pattern_list
示例#24
0
文件: mode.py 项目: mplucinski/quex
def __validate_required_token_policy_queue(Name, fh, pos):
    """Some handlers are better only used with token policy 'queue'."""

    if Name not in ["on_entry", "on_exit", 
                    "on_indent", "on_n_dedent", "on_dedent", "on_nodent", 
                    "on_indentation_bad", "on_indentation_error", 
                    "on_indentation"]: 
        return
    if Setup.token_policy == "queue":
        return

    pos_before = fh.tell()
    fh.seek(pos)
    error.warning("Using '%s' event handler, while the token queue is disabled.\n" % Name + \
                  "Use '--token-policy queue', so then tokens can be sent safer\n" + \
                  "from inside this event handler.", fh,
                  SuppressCode=NotificationDB.warning_on_no_token_queue) 
    fh.seek(pos_before)
示例#25
0
文件: mode.py 项目: xxyzzzq/quex
def __validate_required_token_policy_queue(Name, fh, pos):
    """Some handlers are better only used with token policy 'queue'."""

    if Name not in ["on_entry", "on_exit", 
                    "on_indent", "on_n_dedent", "on_dedent", "on_nodent", 
                    "on_indentation_bad", "on_indentation_error", 
                    "on_indentation"]: 
        return
    if Setup.token_policy == "queue":
        return

    pos_before = fh.tell()
    fh.seek(pos)
    error.warning("Using '%s' event handler, while the token queue is disabled.\n" % Name + \
                  "Use '--token-policy queue', so then tokens can be sent safer\n" + \
                  "from inside this event handler.", fh,
                  SuppressCode=NotificationDB.warning_on_no_token_queue) 
    fh.seek(pos_before)
示例#26
0
def configure_output_directory(setup):
    # Ensure existence of output directory
    if os.path.isfile(setup.output_directory):
        error.log(
            "The name '%s' is already a file and may not be used as output directory."
            % setup.output_directory)
    elif os.path.isdir(setup.output_directory):
        if os.access(setup.output_directory, os.W_OK) == False:
            error.log("The directory '%s' is not writeable." %
                      setup.output_directory)
    elif setup.output_directory:
        try:
            os.mkdir(setup.output_directory)
        except:
            error.warning("Cannot create directory '%s'." %
                          setup.output_directory)
    else:
        return
示例#27
0
    def repriorize(MHI, Info, ppt_list, ModeName, history):
        done_f = False
        for ppt in ppt_list:
            priority, pattern, terminal = ppt
            if   priority.mode_hierarchy_index > MHI:                      continue
            elif priority.pattern_index        >= Info.new_pattern_index:  continue
            elif not identity_checker.do(pattern, Info.pattern):           continue

            done_f = True
            history.append([ModeName, 
                            pattern.pattern_string(), pattern.sr.mode_name,
                            pattern.incidence_id(), Info.new_pattern_index])
            priority.mode_hierarchy_index = MHI
            priority.pattern_index        = Info.new_pattern_index

        if not done_f and Info.sr.mode_name == ModeName:
            error.warning("PRIORITY mark does not have any effect.", 
                          Info.sr)
示例#28
0
    def delete(MHI, Info, ppt_list, ModeName, history):
        done_f = False
        size   = len(ppt_list)
        i      = 0
        while i < size:
            priority, pattern, terminal = ppt_list[i]
            if     priority.mode_hierarchy_index <= MHI \
               and priority.pattern_index < Info.pattern_index \
               and superset_check.do(Info.pattern, pattern):
                done_f  = True
                del ppt_list[i]
                history.append([ModeName, pattern.pattern_string(), pattern.sr.mode_name])
                size   -= 1
            else:
                i += 1

        if not done_f and Info.sr.mode_name == ModeName:
            error.warning("DELETION mark does not have any effect.", Info.sr)
示例#29
0
 def find_in_mode_hierarchy(BaseModeSequence, incidence_name):
     """Find incidence handler in the mode hierarchy. An incidence handler
     can only be defined once. If none is found 'None' is returned.
     """
     found = None  # Note on style: 'for-else' does not make sense,
     #                 # because multi-definitions need to be detected.
     found_mode = None
     for mode_descr in BaseModeSequence:
         code_fragment = mode_descr.incidence_db.get(incidence_name)
         if code_fragment is None:
             continue
         elif found is not None:
             error.warning("Handler '%s' in mode '%s' overwrites previous in mode '%s'." \
                           % (incidence_name, mode_descr.name, found_mode), code_fragment.sr,
                           SuppressCode=NotificationDB.warning_incidence_handler_overwrite)
         found = code_fragment
         found_mode = mode_descr.name
     return found
示例#30
0
文件: engine.py 项目: smmckay/quex3
def snap_character_set_expression(stream, PatternDict):
    # GRAMMAR:
    #
    # set_expression:
    #                 [: set_term :]
    #                 traditional character set
    #                 \P '{' propperty string '}'
    #                 '{' identifier '}'
    #
    # set_term:
    #                 "alnum"
    #                 "alpha"
    #                 "blank"
    #                 "cntrl"
    #                 "digit"
    #                 "graph"
    #                 "lower"
    #                 "print"
    #                 "punct"
    #                 "space"
    #                 "upper"
    #                 "xdigit"
    #                 "union"        '(' set_term [ ',' set_term ]+ ')'
    #                 "intersection" '(' set_term [ ',' set_term ]+ ')'
    #                 "difference"   '(' set_term [ ',' set_term ]+ ')'
    #                 "complement"   '(' set_term ')'
    #                 set_expression
    #
    trigger_set = snap_set_expression(stream, PatternDict)

    if trigger_set is None:
        error.log("Regular Expression: snap_character_set_expression called for something\n" + \
                  "that does not start with '[:', '[' or '\\P'", stream)
    elif trigger_set.is_empty():
        error.warning(
            "Regular Expression: Character set expression results in empty set.",
            stream)

    # Create state machine that triggers with the trigger set to SUCCESS
    # NOTE: The default for the ELSE transition is FAIL.
    sm = DFA()
    sm.add_transition(sm.init_state_index, trigger_set, AcceptanceF=True)

    return __debug_exit(sm, stream)
示例#31
0
def finalize_pattern_list(SortedPPT_List, CounterDb):
    """This function prepares a list of patterns and their related state
    machines for the integration into a single analyzing state machine.
    For that, the following steps need to be accomplished IN ORDER:
    
      (1) Get the list of patterns from the sorted list.

      (2) Prepare the 'count information' for line and column counting.
          MUST happen BEFORE an (optional) transformation to another codec.

      (3) (optional) transformation to a non-unicode codec.

      (4) Delete signaling characters from the transition maps.
          MUST happen AFTER an (optional) transformation to another codec.

      (5) Mounts pre- and post-contexts to the main analyzer state machines
          MUST happen AFTER an (optional) transformation to another codec.
    """
    pattern_list = [pattern for priority, pattern, terminal in SortedPPT_List]

    # (*) Counting information must be determined BEFORE transformation
    for pattern in pattern_list:
        pattern.prepare_count_info(CounterDb, Setup.buffer_codec)

    # (*) Transform anything into the buffer's codec
    #     Skippers: What is relevant to enter the skippers is transformed.
    #               Related data (skip character set, ... ) is NOT transformed!
    for pattern in pattern_list:
        if not pattern.transform(Setup.buffer_codec):
            error.warning("Pattern contains elements not found in engine codec '%s'.\n" % Setup.buffer_codec.name \
                          + "(Buffer element size is %s [byte])" % Setup.buffer_lexatom_size_in_byte,
                          pattern.sr)

    # (*) Cut the signalling characters from any pattern or state machine
    for pattern in pattern_list:
        pattern.cut_character_list(blackboard.signal_character_list(Setup))

    # (*) Pre-contexts and BIPD can only be mounted, after the transformation.
    for pattern in pattern_list:
        pattern.mount_post_context_sm()
        pattern.mount_pre_context_sm()

    return pattern_list
示例#32
0
    def repriorize(MHI, Info, ppt_list, ModeName, history):
        done_f = False
        for ppt in ppt_list:
            priority, pattern, terminal = ppt
            if priority.mode_hierarchy_index > MHI: continue
            elif priority.pattern_index >= Info.new_pattern_index: continue
            elif not identity_checker.do(pattern, Info.pattern): continue

            done_f = True
            history.append([
                ModeName,
                pattern.pattern_string(), pattern.sr.mode_name,
                pattern.incidence_id(), Info.new_pattern_index
            ])
            priority.mode_hierarchy_index = MHI
            priority.pattern_index = Info.new_pattern_index

        if not done_f and Info.sr.mode_name == ModeName:
            error.warning("PRIORITY mark does not have any effect.", Info.sr)
示例#33
0
文件: engine.py 项目: mplucinski/quex
def snap_character_set_expression(stream, PatternDict):
    # GRAMMAR:
    #
    # set_expression: 
    #                 [: set_term :]
    #                 traditional character set
    #                 \P '{' propperty string '}'
    #                 '{' identifier '}'
    #
    # set_term:
    #                 "alnum" 
    #                 "alpha" 
    #                 "blank" 
    #                 "cntrl" 
    #                 "digit" 
    #                 "graph" 
    #                 "lower" 
    #                 "print" 
    #                 "punct" 
    #                 "space" 
    #                 "upper" 
    #                 "xdigit"
    #                 "union"        '(' set_term [ ',' set_term ]+ ')'
    #                 "intersection" '(' set_term [ ',' set_term ]+ ')'
    #                 "difference"   '(' set_term [ ',' set_term ]+ ')'
    #                 "inverse"      '(' set_term ')'
    #                 set_expression
    # 
    trigger_set = snap_set_expression(stream, PatternDict)

    if trigger_set is None: 
        error.log("Regular Expression: snap_character_set_expression called for something\n" + \
                  "that does not start with '[:', '[' or '\\P'", stream)
    elif trigger_set.is_empty():
        error.warning("Regular Expression: Character set expression results in empty set.", stream)

    # Create state machine that triggers with the trigger set to SUCCESS
    # NOTE: The default for the ELSE transition is FAIL.
    sm = StateMachine()
    sm.add_transition(sm.init_state_index, trigger_set, AcceptanceF=True)

    return __debug_exit(sm, stream)
示例#34
0
    def delete(MHI, Info, ppt_list, ModeName, history):
        done_f = False
        size = len(ppt_list)
        i = 0
        while i < size:
            priority, pattern, terminal = ppt_list[i]
            if     priority.mode_hierarchy_index <= MHI \
               and priority.pattern_index < Info.pattern_index \
               and superset_check.do(Info.pattern, pattern):
                done_f = True
                del ppt_list[i]
                history.append(
                    [ModeName,
                     pattern.pattern_string(), pattern.sr.mode_name])
                size -= 1
            else:
                i += 1

        if not done_f and Info.sr.mode_name == ModeName:
            error.warning("DELETION mark does not have any effect.", Info.sr)
示例#35
0
def __warn_on_double_definition():
    """Double check that no token id appears twice. Again, this can only happen,
    if quex itself produced the numeric values for the token.

    If the token ids come from outside, Quex does not know the numeric value. It 
    cannot warn about double definitions.
    """
    assert len(Setup.token_id_foreign_definition_file) == 0

    clash_db = defaultdict(list)

    token_list = token_id_db.values()
    for i, x in enumerate(token_list):
        for y in token_list[i + 1:]:
            if x.number != y.number: continue
            clash_db[x.number].append(x)
            clash_db[x.number].append(y)

    def find_source_reference(TokenList):
        for token in TokenList:
            if token.sr.is_void(): continue
            return token.sr
        return None

    if len(clash_db) != 0:
        item_list = clash_db.items()
        item_list.sort()
        sr = find_source_reference(item_list[0][1])
        error.warning(
            "Following token ids have the same numeric value assigned:", sr)
        for x, token_id_list in item_list:
            sr = find_source_reference(token_id_list)
            token_ids_sorted = sorted(
                list(set(token_id_list)),
                key=attrgetter("name"))  # Ensure uniqueness
            error.warning(
                "  %s: %s" %
                (x, "".join(["%s, " % t.name for t in token_ids_sorted])), sr)
示例#36
0
def __warn_implicit_token_definitions():
    """Output a message on token_ids which have been generated automatically.
    That means, that the user may have made a typo.
    """
    if not token_db.token_id_implicit_list:
        return

    def warn(TokenName, ExistingList, PrefixAddF=False):
        similar_i = similarity.get(TokenName, ExistingList)
        if similar_i == -1:
            similar_str = ""
        else:
            similar_str = " /* did you mean '%s%s'? */" \
                          % (Setup.token_id_prefix, ExistingList[similar_i])
        if PrefixAddF: prefix = Setup.token_id_prefix
        else: prefix = ""
        error.warning("     %s%s;%s" % (prefix, TokenName, similar_str), sr)

    sr = token_db.token_id_implicit_list[0][1]
    msg = "Detected implicit token identifier definitions."
    implicit_list = [tid[0] for tid in token_db.token_id_implicit_list]
    defined_list = [
        tid.name for tid in token_db.token_id_db.values()
        if tid.name not in implicit_list
    ]

    if not Setup.extern_token_id_file:
        msg += " Proposal:\n"
        msg += "   token {"
        error.warning(msg, sr)
        for token_name, sr in token_db.token_id_implicit_list:
            warn(token_name, defined_list)
        error.warning("   }", sr)
    else:
        error.warning(msg, sr)
        for token_name, sr in token_db.token_id_implicit_list:
            warn(token_name, defined_list, PrefixAddF=True)
        error.warning("Above token ids must be defined in '%s'" \
                      % Setup.extern_token_id_file, sr)
示例#37
0
def __warn_on_double_definition():
    """Double check that no token id appears twice. Again, this can only happen,
    if quex itself produced the numeric values for the token.

    If the token ids come from outside, Quex does not know the numeric value. It 
    cannot warn about double definitions.
    """
    assert not Setup.extern_token_id_file

    if NotificationDB.message_on_extra_options in blackboard.setup.suppressed_notification_list:
        return

    clash_db = defaultdict(list)

    token_list = token_db.token_id_db.values()
    for i, x in enumerate(token_list):
        for y in token_list[i + 1:]:
            if x.number != y.number: continue
            clash_db[x.number].append(x)
            clash_db[x.number].append(y)

    if not clash_db: return

    sr = None
    item_list = clash_db.items()
    item_list.sort()
    for x, token_id_list in item_list:
        done = set()
        new_token_id_list = []
        for token_id in token_id_list:
            if token_id.name in done: continue
            done.add(token_id.name)
            new_token_id_list.append(token_id)

        subitem_list = sorted([(token_id.name, token_id.sr)
                               for token_id in new_token_id_list])
        if not subitem_list: continue

        dummy, sr = subitem_list[0]
        if sr is None: sr = SourceRef_VOID
        error.warning("Token ids with same numeric value %i found:" % x, sr)
        for name, sr in subitem_list:
            if sr is None: sr = SourceRef_VOID
            error.warning("  %s" % name, sr)

    if sr is not None:
        error.warning(
            "", sr, SuppressCode=NotificationDB.warning_on_duplicate_token_id)
示例#38
0
def do(setup, command_line, argv):
    """Does a consistency check for setup and the command line.
    """

    setup.output_directory = os.path.normpath(setup.output_directory)
    if setup.output_directory:
        # Check, if the output directory exists
        if os.access(setup.output_directory, os.F_OK) == False:
            error.log(
                "The directory %s was specified for output, but does not exists."
                % setup.output_directory)
        if os.access(setup.output_directory, os.W_OK) == False:
            error.log(
                "The directory %s was specified for output, but is not writeable."
                % setup.output_directory)

    # if the mode is '--language dot' => check character display options.
    if setup.character_display not in ["hex", "utf8"]:
        error.log(
            "Character display must be either 'hex' or 'utf8'.\nFound: '%s'" %
            setup.character_display)

    # ensure that options are not specified twice
    for parameter, info in SETUP_INFO.items():
        if type(info) != list: continue
        occurence_n = 0
        for option in info[0]:
            occurence_n += argv.count(option)
        if occurence_n > 1 and info[1] not in (SetupParTypes.LIST,
                                               SetupParTypes.INT_LIST):
            error.log("Received more than one of the following options:\n" + \
                      "%s" % repr(info[0])[1:-1])

    # (*) Check for 'Depraceted' Options ___________________________________________________
    for name, info in DEPRECATED.items():
        command_line_options = SETUP_INFO[name][0]
        comment = info[0]
        depreciated_since_version = info[1]
        for option in command_line_options:
            if command_line.search(option):
                error.log("Command line option '%s' is ignored.\n" % option + \
                          comment + "\n" + \
                          "Last version of Quex supporting this option is version %s. Please, visit\n" % \
                          depreciated_since_version + \
                          "http://quex.org for further information.")

    # (*) Check for 'Straying' Options ___________________________________________________
    options = []
    for key, info in SETUP_INFO.items():
        if type(info) != list: continue
        if key in DEPRECATED: continue
        if info[1] is not None: options.extend(info[0])
    options.sort(lambda a, b: cmp(a.replace("-", ""), b.replace("-", "")))

    ufos = command_line.unidentified_options(options)
    if len(ufos) != 0:
        error.log("Unidentified option(s) = " +  repr(ufos) + "\n" + \
                  __get_supported_command_line_option_description(options))

    if setup.analyzer_derived_class_name != "" and \
       setup.analyzer_derived_class_file == "":
        error.log("Specified derived class '%s' on command line, but it was not\n" % \
                  setup.analyzer_derived_class_name + \
                  "specified which file contains the definition of it.\n" + \
                  "use command line option '--derived-class-file'.\n")

    if setup.buffer_element_size not in [-1, 1, 2, 4]:
        error.log(
            "The setting of '--buffer-element-size' (or '-b') can only be\n"
            "1, 2, or 4 (found %s)." % repr(setup.buffer_element_size))

    if setup.buffer_byte_order not in ["<system>", "little", "big"]:
        error.log("Byte order (option --endian) must be 'little', 'big', or '<system>'.\n" + \
                  "Note, that this option is only interesting for cross plattform development.\n" + \
                  "By default, quex automatically chooses the endian type of your system.")

    # Manually written token class requires token class name to be specified
    if setup.token_class_file != "" and command_line.search(
            "--token-class", "--tc") == False:
        error.log(
            "The use of a manually written token class requires that the name of the class\n"
            "is specified on the command line via the '--token-class' option.")

    # Token queue
    if setup.token_policy != "queue" and command_line.search(
            "--token-queue-size"):
        error.log("Option --token-queue-size determines a fixed token queue size. This makes\n" + \
                  "only sense in conjunction with '--token-policy queue'.\n")
    if setup.token_queue_size <= setup.token_queue_safety_border + 1:
        if setup.token_queue_size == setup.token_queue_safety_border:
            cmp_str = "equal to"
        else:
            cmp_str = "less than"
        error.log("Token queue size is %i is %s token queue safety border %i + 1.\n" % \
                  (setup.token_queue_size, cmp_str, setup.token_queue_safety_border) +
                  "Set appropriate values with --token-queue-size and --token-queue-safety-border.")

    # Check that names are valid identifiers
    if len(setup.token_id_prefix_plain) != 0:
        __check_identifier(setup, "token_id_prefix_plain", "Token prefix")
    __check_identifier(setup, "analyzer_class_name", "Engine name")
    if setup.analyzer_derived_class_name != "":
        __check_identifier(setup, "analyzer_derived_class_name",
                           "Derived class name")

    __check_file_name(setup, "token_class_file",
                      "file containing token class definition")
    __check_file_name(setup, "analyzer_derived_class_file",
                      "file containing user derived lexer class")
    __check_file_name(
        setup,
        "token_id_foreign_definition_file",
        "file containing user token ids",
        0,
        CommandLineOption=SETUP_INFO["token_id_foreign_definition"][0])
    __check_file_name(setup, "input_mode_files", "quex source file")

    # Check that not more than one converter is specified
    converter_n = 0
    if setup.converter_iconv_f: converter_n += 1
    if setup.converter_icu_f: converter_n += 1
    if len(setup.converter_user_new_func) != 0: converter_n += 1
    if converter_n > 1:
        error.log("More than one character converter has been specified. Note, that the\n" + \
                  "options '--icu', '--iconv', and '--converter-new' (or '--cn') are\n"    + \
                  "to be used mutually exclusively.")
    if converter_n == 1 and setup.buffer_codec.name != "unicode":
        # If the buffer codec is other than unicode, then no converter shall
        # be used to fill the buffer. Instead, the engine is transformed, so
        # that it works directly on the codec.
        error.log("An engine that is to be generated for a specific codec cannot rely\n"      + \
                  "on converters. Do no use '--codec' together with '--icu', '--iconv', or\n" + \
                  "`--converter-new`.")

    # If a converter has been specified and no bytes-element-size has been specified,
    # it defaults to '1 byte' which is most likely not what is desired for unicode.
    if     converter_n == 1 \
       and setup.buffer_element_size == 1 \
       and not command_line_args_defined(command_line, "buffer_element_size") \
       and not command_line_args_defined(command_line, "buffer_element_type"):
        error.log("A converter has been specified, but the default buffer element size\n" + \
                  "is left to 1 byte. Consider %s or %s." \
                  % (command_line_args_string("buffer_element_size"),
                     command_line_args_string("buffer_element_type")))

    # If a user defined type is specified for 'engine character type' and
    # a converter, then the name of the target type must be specified explicitly.
    if         setup.buffer_element_type != "" \
       and not global_character_type_db.has_key(setup.buffer_element_type) \
       and     setup.converter_ucs_coding_name == "" \
       and     converter_n != 0:
        tc = setup.buffer_element_type
        error.log("A character code converter has been specified. It is supposed to convert\n" + \
                  "incoming data into an internal buffer of unicode characters. The size of\n" + \
                  "each character is determined by '%s' which is a user defined type.\n" % tc  + \
                  "\n" + \
                  "Quex cannot determine automatically the name that the converter requires\n" +      \
                  "to produce unicode characters for type '%s'. It must be specified by the\n" % tc + \
                  "command line option %s." \
                  % command_line_args_string("converter_ucs_coding_name"))

    # Token transmission policy
    token_policy_list = ["queue", "single", "users_token", "users_queue"]
    if setup.token_policy not in token_policy_list:
        error.log("Token policy '%s' not supported. Use one of the following:\n" % setup.token_policy + \
                  repr(token_policy_list)[1:-1])
    elif setup.token_policy == "users_token":
        error.log(
            "Token policy 'users_queue' has be deprecated since 0.49.1. Use\n"
            "equivalent policy 'single'.")
    elif setup.token_policy == "users_queue":
        error.log(
            "Token policy 'users_queue' has be deprecated since 0.49.1\n")

    # Internal engine character encoding
    def __codec_vs_buffer_element_size(CodecName, RequiredBufferElementSize):
        if setup.buffer_codec.name != CodecName: return
        elif setup.buffer_element_size == RequiredBufferElementSize: return

        if setup.buffer_element_size == -1:
            msg_str = "undetermined (found type '%s')" % setup.buffer_element_type
        else:
            msg_str = "is not %i (found %i)" % (RequiredBufferElementSize,
                                                setup.buffer_element_size)

        error.log("Using codec '%s' while buffer element size %s.\n" % (CodecName, msg_str) +
                  "Consult command line argument %s" \
                  % command_line_args_string("buffer_element_size"))

    if setup.buffer_codec.name != "unicode":
        if not setup.buffer_codec_file:
            error.verify_word_in_list(
                setup.buffer_codec_name,
                codec_db.get_supported_codec_list() + ["utf8", "utf16"],
                "Codec '%s' is not supported." % setup.buffer_codec.name)
        __codec_vs_buffer_element_size("utf8", 1)
        __codec_vs_buffer_element_size("utf16", 2)

    if setup.external_lexeme_null_object and setup.token_class_only_f:
        error.log(
            "Specifying an external lexeme null object signalizes an\n"
            "external token class implementation. The 'token class only\n"
            "flag' generates a token class considered to be externally\n"
            "shared. Both flags are mutually exclusive.")

    if setup.string_accumulator_f:
        error_n = NotificationDB.warning_on_no_token_class_take_text
        if error_n in setup.suppressed_notification_list:
            error.warning(
                "The warning upon missing 'take_text' in token type definition is de-\n"
                +
                "activated by '--suppress %i'. This is dangerous, if there is a string\n"
                % error_n +
                "accumulator. May be, use '--no-string-accumulator'.",
                -1,
                SuppressCode=NotificationDB.
                warning_on_no_warning_on_missing_take_text)
示例#39
0
def _do(the_state_machine, post_context_sm, EndOfLinePostContextF, SourceReference):
    """Appends a post context to the given state machine and changes 
       state infos as required. 

       NOTE: 

           In case that:    post_context_sm is not None 
                         or EndOfLinePostContextF  

           The function appends something to the state machine and
           it is therefore required to pass 'NFA to DFA'--better
           also Hopcroft Minimization.
       
       ________________________________________________________________________
       This process is very similar to sequentialization. 
       There is a major difference, though:
       
       Given a state machine (e.g. a pattern) X with a post context Y, 
       a match is only valid if X is followed by Y. Let Xn be an acceptance
       state of X and Ym an acceptance state of Y: 

              ---(Xn-1)---->(Xn)---->(Y0)----> ... ---->((Ym))
                            store                       acceptance
                            input
                            position
       
       That is, it holds:

          -- The next input position is stored the position of Xn, even though
             it is 'officially' not an acceptance state.

          -- Ym will be an acceptance state, but it will not store 
             the input position!       

       The analysis of the next pattern will start at the position where
       X stopped, even though Ym is required to state acceptance.    
       
    """
    if post_context_sm is None and EndOfLinePostContextF == False:
        return the_state_machine, None

    # State machines with no states are senseless here. 
    assert not the_state_machine.is_empty(), \
           "empty state machine can have no post context."
    assert post_context_sm is None or not post_context_sm.is_empty(), \
           "empty state machine cannot be a post-context."

    # State machines involved with post condition building are part of a pattern, 
    # but not configured out of multiple patterns. Thus there should be no origins.
    assert the_state_machine.has_origins() == False
    assert post_context_sm is None or not post_context_sm.has_origins()

    for state in the_state_machine.get_acceptance_state_list():
        for cmd in state.single_entry.get_iterable(SeAccept): 
            assert cmd.pre_context_id() == E_PreContextIDs.NONE, \
                   "Post Contexts MUST be mounted BEFORE pre-contexts."

    if post_context_sm is None:
        assert EndOfLinePostContextF
        # Generate a new post context that just contains the 'newline'
        post_context_sm = StateMachine_Newline() 

    elif EndOfLinePostContextF: 
        # Mount 'newline' to existing post context
        post_context_sm = sequentialize.do([post_context_sm, 
                                            StateMachine_Newline()]) 

    # A post context with an initial state that is acceptance is not really a
    # 'context' since it accepts anything. The state machine remains un-post context.
    if post_context_sm.get_init_state().is_acceptance():
        error.warning("Post context accepts anything--replaced by no post context.",
                      SourceReference)
        return the_state_machine, None
    
    # (*) Two ways of handling post-contexts:
    #
    #     -- Seldom Exception: 
    #        Pseudo-Ambiguous Post Conditions (x+/x) -- detecting the end of the 
    #        core pattern after the end of the post context
    #        has been reached.
    #
    if ambiguous_post_context.detect_forward(the_state_machine, post_context_sm):
        if ambiguous_post_context.detect_backward(the_state_machine, post_context_sm):
            # -- for post contexts that are forward and backward ambiguous
            #    a philosophical cut is necessary.
            error.warning("Post context requires philosophical cut--handle with care!\n"
                      "Proposal: Isolate pattern and ensure results are as expected!", 
                      SourceReference) 
            post_context_sm = ambiguous_post_context.philosophical_cut(the_state_machine, post_context_sm)
        
        # NOTE: May be, the_state_machine does contain now an epsilon transition. See
        #       comment at entry of this function.
        bipd_sm_to_be_inverted = ambiguous_post_context.mount(the_state_machine, post_context_sm)
        the_state_machine      = beautifier.do(the_state_machine)
        return the_state_machine, bipd_sm_to_be_inverted

    # -- The 'normal' way: storing the input position at the end of the core
    #    pattern.
    #
    # (*) Need to clone the state machines, i.e. provide their internal
    #     states with new ids, but the 'behavior' remains. This allows
    #     state machines to appear twice, or being used in 'larger'
    #     conglomerates.
    post_clone = post_context_sm.clone() 

    # -- Once an acceptance state is reached no further analysis is necessary.
    ## NO: acceptance_pruning.do(post_clone)
    ## BECAUSE: it may have to compete with a pseudo-ambiguous post context

    # (*) collect all transitions from both state machines into a single one
    #
    #     NOTE: The start index is unique. Therefore, one can assume that each
    #           clone_list '.states' dictionary has different keys. One can simply
    #           take over all transitions of a start index into the result without
    #           considering interferences (see below)
    #
    orig_acceptance_state_id_list = the_state_machine.get_acceptance_state_index_list()

    # -- mount on every acceptance state the initial state of the following state
    #    machine via epsilon transition
    the_state_machine.mount_to_acceptance_states(post_clone.init_state_index, 
                                                 CancelStartAcceptanceStateF=True)
    for start_state_index, state in post_clone.states.iteritems():        
        the_state_machine.states[start_state_index] = state # states are already cloned

    # -- raise at each old acceptance state the 'store input position flag'
    # -- set the post context flag for all acceptance states
    for state_idx in orig_acceptance_state_id_list:
        state = the_state_machine.states[state_idx]
        state.set_input_position_store_f(True)
    
    # -- no acceptance state shall store the input position
    # -- set the post context flag for all acceptance states
    for state in the_state_machine.get_acceptance_state_list():
        state.set_input_position_store_f(False)
        state.set_input_position_restore_f(True)

    # No input position backward search required
    return beautifier.do(the_state_machine), None
示例#40
0
def do(ModeDB):
    """Consistency check of mode database

       -- Are there applicable modes?
       -- Start mode:
          -- specified (more than one applicable mode req. explicit specification)?
          -- is defined as mode?
          -- start mode is not inheritable only?
       -- Entry/Exit transitions are allows?
    """
    if Setup.token_class_only_f:
        if len(ModeDB) != 0:
            error.log("Modes found in input files. However, only a token class is generated.", 
                      DontExitF=True)
        return

    if len(ModeDB) == 0:
        error.log("No single mode defined - bailing out", Prefix="consistency check")

    mode_name_list             = sorted([mode.name for mode in ModeDB.itervalues()]) 
    # Applicable modes can only be determined after possible addition of "inheritable: only"
    implemented_mode_name_list = sorted([mode.name for mode in ModeDB.itervalues() if not mode.abstract_f()]) 

    if len(implemented_mode_name_list) == 0:
        error.log("There is no mode that can be implemented---all existing modes are 'inheritable only'.\n" + \
                  "modes are = " + repr(ModeDB.keys())[1:-1],
                  Prefix="consistency check")

    for mode in ModeDB.values():
        mode.check_consistency()

    # (*) If a conversion or a codec engine is specified, then the 
    #     'on_bad_lexatom' handler must be specified in every mode.
    if False and (Setup.buffer_codec.name != "unicode" or Setup.converter_f):
        for mode in ModeDB.values():
            # Later ... 
            if False and E_IncidenceIDs.BAD_LEXATOM not in mode.incidence_db:
                error.warning("Missing 'on_bad_lexatom' handler in mode '%s' (or its base modes).\n" % mode.name + \
                              "This is dangerous while using a codec engine or a converter (iconv, icu, ...).\n" + \
                              "The feature is not yet supported, but the infrastructure is currently setup for it.",
                              mode.sr, 
                              SuppressCode=NotificationDB.warning_codec_error_with_non_unicode)

    # (*) Start mode specified?
    __start_mode(implemented_mode_name_list, mode_name_list)

    # (*) Entry/Exit Transitions
    for mode in ModeDB.values():
        if mode.abstract_f(): continue
        __entry_transitions(mode, mode_name_list)
        __exit_transitions(mode, mode_name_list)

    for mode in ModeDB.values():
        # (*) [Optional] Warnings on Outrun
        if Setup.warning_on_outrun_f:
             mode.check_low_priority_outruns_high_priority_pattern()

        # (*) Special Patterns shall not match on same lexemes
        if NotificationDB.error_on_special_pattern_same not in Setup.suppressed_notification_list:
            mode.check_match_same(NotificationDB.error_on_special_pattern_same)

        # (*) Special Patterns (skip, indentation, etc.) 
        #     shall not be outrun by another pattern.
        if NotificationDB.error_on_special_pattern_outrun not in Setup.suppressed_notification_list:
            mode.check_special_incidence_outrun(NotificationDB.error_on_special_pattern_outrun)

        # (*) Special Patterns shall not have common matches with patterns
        #     of higher precedence.
        if NotificationDB.error_on_special_pattern_subset not in Setup.suppressed_notification_list:
            mode.check_higher_priority_matches_subset(NotificationDB.error_on_special_pattern_subset)

        # (*) Check for dominated patterns
        if NotificationDB.error_on_dominated_pattern not in Setup.suppressed_notification_list:
            mode.check_dominated_pattern(NotificationDB.error_on_dominated_pattern)
示例#41
0
def parse(ForeignTokenIdFile, CommentDelimiterList):
    """This function somehow interprets the user defined token id file--if there is
       one. It does this in order to find the names of defined token ids. It does
       some basic interpretation and include file following, but: **it is in no
       way perfect**. Since its only purpose is to avoid warnings about token ids
       that are not defined it is not essential that it may fail sometimes.

       It is more like a nice feature that quex tries to find definitions on its own.
       
       Nevertheless, it should work in the large majority of cases.
    """
    # Regular expression to find '#include <something>' and extract the 'something'
    # in a 'group'. Note that '(' ')' cause the storage of parts of the match.
    IncludeRE = "#[ \t]*include[ \t]*[\"<]([^\">]+)[\">]"

    include_re_obj = re.compile(IncludeRE)

    def get_line_n_of_include(FileName, IncludedFileName):
        fh = open_file_or_die(FileName, Mode="rb")
        line_n = 0
        for line in fh.readlines():
            line_n += 1
            if include_re_obj.search(line) is not None and line.find(IncludedFileName) != -1:
                break
        else:
            # Included file must appear in including file, but tolerate for safety.
            pass

        fh.close()
        return line_n

    # validate(...) ensured, that the file exists.
    work_list      = [ ForeignTokenIdFile ] 
    done_list      = []
    not_found_list = []
    recursive_list = []
    found_db       = {}
    while len(work_list) != 0:
        file_name = work_list.pop()
        content   = __delete_comments(get_file_content_or_die(file_name, Mode="rb"), 
                                      CommentDelimiterList)
        done_list.append(os.path.normpath(file_name))

        # (*) Search for TokenID definitions 
        begin_i = 0
        end_i   = len(content)
        if Setup.token_id_foreign_definition_file_region_begin_re is not None:
            match = Setup.token_id_foreign_definition_file_region_begin_re.search(content)
            if match is not None:
                begin_i = match.end()

        if Setup.token_id_foreign_definition_file_region_end_re is not None:
            match = Setup.token_id_foreign_definition_file_region_end_re.search(content, pos=begin_i)
            if match is not None:
                end_i = match.start()
        content = content[begin_i:end_i]

        token_id_list = __extract_token_ids(content, file_name)
        if len(token_id_list) != 0:
            found_db[file_name] = copy(token_id_list)

        token_id_foreign_set.update(token_id_list)
        for token_name in token_id_list:
            # NOTE: The line number might be wrong, because of the comment deletion
            line_n = 0
            # NOTE: The actual token value is not important, since the token's numeric
            #       identifier is defined in the user's header. We do not care.
            prefix_less_token_name = cut_token_id_prefix(token_name)
            token_id_db[prefix_less_token_name] = \
                        TokenInfo(prefix_less_token_name, None, None, SourceRef(file_name, line_n)) 
        
        # (*) find "#include" statements
        #     'set' ensures that each entry is unique
        include_file_set = set(include_re_obj.findall(content))

        #     -- ensure that included files exist and are not included twice
        for included_file in include_file_set:
            normed_included_file = os.path.normpath(included_file)
            if included_file in done_list:
                line_n = get_line_n_of_include(file_name, included_file)
                recursive_list.append((file_name, line_n, included_file))
            elif not os.access(normed_included_file, os.F_OK): 
                line_n = get_line_n_of_include(file_name, included_file)
                not_found_list.append((file_name, line_n, included_file))
            elif normed_included_file not in done_list:
                work_list.append(included_file)

    if Setup.token_id_foreign_definition_file_show_f:
        if len(found_db) == 0:
            error.log(  "No token ids with prefix '%s' found in" % Setup.token_id_prefix
                      + "'%s' or included files." % Setup.token_id_foreign_definition_file, 
                     NoteF=True)
        else:
            txt = [] 
            for file_name, result in found_db.iteritems():
                result = set(result)
                L = max(map(len, result))
                txt.append("Token ids found in file '%s' {\n" % file_name)
                for name in sorted(result):
                    shorty = cut_token_id_prefix(name)
                    fully  = Setup.token_id_prefix + shorty
                    txt.append("     %s %s=> '%s'\n" % (fully, space(L, name), shorty))
                txt.append("}")
                txt.append("\n")

            if txt: txt = txt[:-1]
            error.log("".join(txt), NoteF=True)
            
    ErrorN = NotificationDB.token_id_ignored_files_report
    if ErrorN not in Setup.suppressed_notification_list:
        if not_found_list:
            not_found_list.sort()
            sr = SourceRef(not_found_list[0][0], LineN=not_found_list[0][1]) 
            error.warning("Files not found:", sr)
            for file_name, line_n, included_file in not_found_list:
                error.warning("%s" % included_file, SourceRef(file_name, line_n))

        if recursive_list:
            recursive_list.sort()
            sr = SourceRef(recursive_list[0][0], LineN=recursive_list[0][1]) 
            error.warning("Files recursively included (ignored second inclusion):", 
                          sr)
            for file_name, line_n, included_file in recursive_list:
                error.warning("%s" % included_file, SourceRef(file_name, line_n))

        if not_found_list or recursive_list:
            # source reference is taken from last setting
            error.log("\nQuex does not handle C-Preprocessor instructions.",
                      sr, NoteF=True, DontExitF=True, SuppressCode=ErrorN)
示例#42
0
文件: core.py 项目: xxyzzzq/quex
def parse_token_id_definitions(fh, NamesOnlyF=False):
    """NamesOnlyF == True: Allow only definition of names, no numeric values 
                           may be assigned to it.

       'NamesOnlyF' indicates that data is not written to the global 
       'token_id_db'. Then only a list of names is returned.
    """
    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    prefix       = Setup.token_id_prefix
    prefix_plain = Setup.token_id_prefix_plain # i.e. without name space included

    if NamesOnlyF: 
        result = set()

    skip_whitespace(fh)
    if not check(fh, "{"):
        error.log("Missing opening '{' for after 'token' section identifier.", 
                  fh)

    while check(fh, "}") == False:
        skip_whitespace(fh)

        candidate = read_identifier(fh, TolerantF=True, OnMissingStr="Missing valid token identifier.")

        # -- check the name, if it starts with the token prefix paste a warning
        suspicious_prefix = None
        if len(prefix) != 0 and candidate.find(prefix) == 0:       
            suspicious_prefix = prefix
        elif len(prefix_plain) != 0 and candidate.find(prefix_plain) == 0: 
            suspicious_prefix = prefix_plain

        if suspicious_prefix is not None:
            error.warning("Token identifier '%s' starts with token prefix '%s'.\n" \
                      % (candidate, suspicious_prefix) \
                      + "Token prefix is mounted automatically. This token id appears in the source\n" \
                      + "code as '%s%s'." \
                      % (prefix, candidate), \
                      fh, 
                      SuppressCode=NotificationDB.warning_token_id_prefix_appears_in_token_id_name)

        skip_whitespace(fh)

        if NamesOnlyF:
            result.add(prefix + candidate)
            if check(fh, ";") == False:
                error.log("Missing ';' after token identifier '%s'.\n" \
                          % candidate, fh)
            continue

        # Parse a possible numeric value after '='
        numeric_value = None
        if check(fh, "="):
            skip_whitespace(fh)
            numeric_value = read_integer(fh)
            if numeric_value is None:
                error.log("Missing number after '=' for token identifier '%s'." % candidate, 
                          fh)

        if check(fh, ";") == False:
            error.log("Missing ';' after token identifier '%s'." % candidate, 
                      fh)

        if not NamesOnlyF:
            ti = TokenInfo(candidate, numeric_value, 
                           SourceReference=SourceRef.from_FileHandle(fh))
            blackboard.token_id_db[candidate] = ti

    if NamesOnlyF:
        return sorted(list(result))
    else:
        return # Changes are applied to 'blackboard.token_id_db'
示例#43
0
def _do(the_state_machine, post_context_sm, EndOfLinePostContextF,
        SourceReference):
    """Appends a post context to the given state machine and changes 
       state infos as required. 

       NOTE: 

           In case that:    post_context_sm is not None 
                         or EndOfLinePostContextF  

           The function appends something to the state machine and
           it is therefore required to pass 'NFA to DFA'--better
           also Hopcroft Minimization.
       
       ________________________________________________________________________
       This process is very similar to sequentialization. 
       There is a major difference, though:
       
       Given a state machine (e.g. a pattern) X with a post context Y, 
       a match is only valid if X is followed by Y. Let Xn be an acceptance
       state of X and Ym an acceptance state of Y: 

              ---(Xn-1)---->(Xn)---->(Y0)----> ... ---->((Ym))
                            store                       acceptance
                            input
                            position
       
       That is, it holds:

          -- The next input position is stored the position of Xn, even though
             it is 'officially' not an acceptance state.

          -- Ym will be an acceptance state, but it will not store 
             the input position!       

       The analysis of the next pattern will start at the position where
       X stopped, even though Ym is required to state acceptance.    
       
    """
    if post_context_sm is None and EndOfLinePostContextF == False:
        return the_state_machine, None

    # State machines with no states are senseless here.
    assert not the_state_machine.is_empty(), \
           "empty state machine can have no post context."
    assert post_context_sm is None or not post_context_sm.is_empty(), \
           "empty state machine cannot be a post-context."

    # State machines involved with post condition building are part of a pattern,
    # but not configured out of multiple patterns. Thus there should be no origins.
    assert the_state_machine.has_origins() == False
    assert post_context_sm is None or not post_context_sm.has_origins()

    for state in the_state_machine.get_acceptance_state_list():
        for cmd in state.single_entry.get_iterable(SeAccept):
            assert cmd.pre_context_id() == E_PreContextIDs.NONE, \
                   "Post Contexts MUST be mounted BEFORE pre-contexts."

    if post_context_sm is None:
        assert EndOfLinePostContextF
        # Generate a new post context that just contains the 'newline'
        post_context_sm = StateMachine_Newline()

    elif EndOfLinePostContextF:
        # Mount 'newline' to existing post context
        post_context_sm = sequentialize.do(
            [post_context_sm, StateMachine_Newline()])

    # A post context with an initial state that is acceptance is not really a
    # 'context' since it accepts anything. The state machine remains un-post context.
    if post_context_sm.get_init_state().is_acceptance():
        error.warning(
            "Post context accepts anything--replaced by no post context.",
            SourceReference)
        return the_state_machine, None

    # (*) Two ways of handling post-contexts:
    #
    #     -- Seldom Exception:
    #        Pseudo-Ambiguous Post Conditions (x+/x) -- detecting the end of the
    #        core pattern after the end of the post context
    #        has been reached.
    #
    if ambiguous_post_context.detect_forward(the_state_machine,
                                             post_context_sm):
        if ambiguous_post_context.detect_backward(the_state_machine,
                                                  post_context_sm):
            # -- for post contexts that are forward and backward ambiguous
            #    a philosophical cut is necessary.
            error.warning(
                "Post context requires philosophical cut--handle with care!\n"
                "Proposal: Isolate pattern and ensure results are as expected!",
                SourceReference)
            post_context_sm = ambiguous_post_context.philosophical_cut(
                the_state_machine, post_context_sm)

        # NOTE: May be, the_state_machine does contain now an epsilon transition. See
        #       comment at entry of this function.
        bipd_sm_to_be_inverted = ambiguous_post_context.mount(
            the_state_machine, post_context_sm)
        the_state_machine = beautifier.do(the_state_machine)
        return the_state_machine, bipd_sm_to_be_inverted

    # -- The 'normal' way: storing the input position at the end of the core
    #    pattern.
    #
    # (*) Need to clone the state machines, i.e. provide their internal
    #     states with new ids, but the 'behavior' remains. This allows
    #     state machines to appear twice, or being used in 'larger'
    #     conglomerates.
    post_clone = post_context_sm.clone()

    # -- Once an acceptance state is reached no further analysis is necessary.
    ## NO: acceptance_pruning.do(post_clone)
    ## BECAUSE: it may have to compete with a pseudo-ambiguous post context

    # (*) collect all transitions from both state machines into a single one
    #
    #     NOTE: The start index is unique. Therefore, one can assume that each
    #           clone_list '.states' dictionary has different keys. One can simply
    #           take over all transitions of a start index into the result without
    #           considering interferences (see below)
    #
    orig_acceptance_state_id_list = the_state_machine.get_acceptance_state_index_list(
    )

    # -- mount on every acceptance state the initial state of the following state
    #    machine via epsilon transition
    the_state_machine.mount_to_acceptance_states(
        post_clone.init_state_index, CancelStartAcceptanceStateF=True)
    for start_state_index, state in post_clone.states.iteritems():
        the_state_machine.states[
            start_state_index] = state  # states are already cloned

    # -- raise at each old acceptance state the 'store input position flag'
    # -- set the post context flag for all acceptance states
    for state_idx in orig_acceptance_state_id_list:
        state = the_state_machine.states[state_idx]
        state.set_read_position_store_f(True)

    # -- no acceptance state shall store the input position
    # -- set the post context flag for all acceptance states
    for state in the_state_machine.get_acceptance_state_list():
        state.set_read_position_store_f(False)
        state.set_read_position_restore_f(True)

    # No input position backward search required
    return beautifier.do(the_state_machine), None
示例#44
0
def __warn_implicit_token_definitions():
    """Output a message on token_ids which have been generated automatically.
    That means, that the user may have made a typo.
    """
    if len(blackboard.token_id_implicit_list) == 0: 
        return

    sr  = blackboard.token_id_implicit_list[0][1]
    msg = "Detected implicit token identifier definitions."
    if len(Setup.token_id_foreign_definition_file) == 0:
        msg += " Proposal:\n"
        msg += "   token {"
        error.warning(msg, sr)
        for token_name, sr in blackboard.token_id_implicit_list:
            error.warning("     %s;" % token_name, sr)
        error.warning("   }", sr)
    else:
        error.warning(msg, sr)
        for token_name, sr in blackboard.token_id_implicit_list:
            error.warning("     %s;" % (Setup.token_id_prefix + token_name), sr)
        error.warning("Above token ids must be defined in '%s'" \
                      % Setup.token_id_foreign_definition_file, sr)
示例#45
0
文件: core.py 项目: nyulacska/gpr
def parse_token_id_definitions(fh, NamesOnlyF=False):
    """NamesOnlyF == True: Allow only definition of names, no numeric values 
                           may be assigned to it.

       'NamesOnlyF' indicates that data is not written to the global 
       'token_id_db'. Then only a list of names is returned.
    """
    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    prefix = Setup.token_id_prefix
    prefix_plain = Setup.token_id_prefix_plain  # i.e. without name space included

    if NamesOnlyF:
        result = set()

    skip_whitespace(fh)
    if not check(fh, "{"):
        error.log("Missing opening '{' for after 'token' section identifier.",
                  fh)

    while check(fh, "}") == False:
        skip_whitespace(fh)

        candidate = read_identifier(
            fh, TolerantF=True, OnMissingStr="Missing valid token identifier.")

        # -- check the name, if it starts with the token prefix paste a warning
        suspicious_prefix = None
        if len(prefix) != 0 and candidate.find(prefix) == 0:
            suspicious_prefix = prefix
        elif len(prefix_plain) != 0 and candidate.find(prefix_plain) == 0:
            suspicious_prefix = prefix_plain

        if suspicious_prefix is not None:
            error.warning("Token identifier '%s' starts with token prefix '%s'.\n" \
                      % (candidate, suspicious_prefix) \
                      + "Token prefix is mounted automatically. This token id appears in the source\n" \
                      + "code as '%s%s'." \
                      % (prefix, candidate), \
                      fh,
                      SuppressCode=NotificationDB.warning_token_id_prefix_appears_in_token_id_name)

        skip_whitespace(fh)

        if NamesOnlyF:
            result.add(prefix + candidate)
            if check(fh, ";") == False:
                error.log("Missing ';' after token identifier '%s'.\n" \
                          % candidate, fh)
            continue

        # Parse a possible numeric value after '='
        numeric_value = None
        if check(fh, "="):
            skip_whitespace(fh)
            numeric_value = read_integer(fh)
            if numeric_value is None:
                error.log(
                    "Missing number after '=' for token identifier '%s'." %
                    candidate, fh)

        if check(fh, ";") == False:
            error.log("Missing ';' after token identifier '%s'." % candidate,
                      fh)

        if not NamesOnlyF:
            ti = TokenInfo(candidate,
                           numeric_value,
                           SourceReference=SourceRef.from_FileHandle(fh))
            blackboard.token_id_db[candidate] = ti

    if NamesOnlyF:
        return sorted(list(result))
    else:
        return  # Changes are applied to 'blackboard.token_id_db'
示例#46
0
def do(setup, command_line, argv):
    """Does a consistency check for setup and the command line.
    """

    setup.output_directory = os.path.normpath(setup.output_directory)
    if setup.output_directory:
        # Check, if the output directory exists
        if os.access(setup.output_directory, os.F_OK) == False:
            error.log("The directory %s was specified for output, but does not exists." % setup.output_directory)
        if os.access(setup.output_directory, os.W_OK) == False:
            error.log("The directory %s was specified for output, but is not writeable." % setup.output_directory)

    # if the mode is '--language dot' => check character display options. 
    if setup.character_display not in ["hex", "utf8"]:
        error.log("Character display must be either 'hex' or 'utf8'.\nFound: '%s'" % 
                  setup.character_display)

    # ensure that options are not specified twice
    for parameter, info in SETUP_INFO.items():
        if type(info) != list: continue
        occurence_n = 0 
        for option in info[0]:
            occurence_n += argv.count(option)
        if occurence_n > 1 and info[1] not in (SetupParTypes.LIST, SetupParTypes.INT_LIST):
            error.log("Received more than one of the following options:\n" + \
                      "%s" % repr(info[0])[1:-1])

    # (*) Check for 'Depraceted' Options ___________________________________________________
    for name, info in DEPRECATED.items():
        command_line_options = SETUP_INFO[name][0]
        comment                   = info[0]
        depreciated_since_version = info[1]
        for option in command_line_options:
            if command_line.search(option):
                error.log("Command line option '%s' is ignored.\n" % option + \
                          comment + "\n" + \
                          "Last version of Quex supporting this option is version %s. Please, visit\n" % \
                          depreciated_since_version + \
                          "http://quex.org for further information.")
                          
    # (*) Check for 'Straying' Options ___________________________________________________
    options = []
    for key, info in SETUP_INFO.items():
        if type(info) != list: continue
        if key in DEPRECATED: continue
        if info[1] is not None: options.extend(info[0])
    options.sort(lambda a,b: cmp(a.replace("-",""), b.replace("-","")))

    ufos = command_line.unidentified_options(options)
    if len(ufos) != 0:
        error.log("Unidentified option(s) = " +  repr(ufos) + "\n" + \
                  __get_supported_command_line_option_description(options))

    if setup.analyzer_derived_class_name != "" and \
       setup.analyzer_derived_class_file == "":
            error.log("Specified derived class '%s' on command line, but it was not\n" % \
                      setup.analyzer_derived_class_name + \
                      "specified which file contains the definition of it.\n" + \
                      "use command line option '--derived-class-file'.\n")

    if setup.buffer_element_size not in [-1, 1, 2, 4]:
        error.log("The setting of '--buffer-element-size' (or '-b') can only be\n" 
                  "1, 2, or 4 (found %s)." % repr(setup.buffer_element_size))

    if setup.buffer_byte_order not in ["<system>", "little", "big"]:
        error.log("Byte order (option --endian) must be 'little', 'big', or '<system>'.\n" + \
                  "Note, that this option is only interesting for cross plattform development.\n" + \
                  "By default, quex automatically chooses the endian type of your system.")

    # Manually written token class requires token class name to be specified
    if setup.token_class_file != "" and command_line.search("--token-class", "--tc") == False:
        error.log("The use of a manually written token class requires that the name of the class\n"
                  "is specified on the command line via the '--token-class' option.")
    
    # Token queue
    if setup.token_policy != "queue" and command_line.search("--token-queue-size"):
        error.log("Option --token-queue-size determines a fixed token queue size. This makes\n" + \
                  "only sense in conjunction with '--token-policy queue'.\n")
    if setup.token_queue_size <= setup.token_queue_safety_border + 1:
        if setup.token_queue_size == setup.token_queue_safety_border: cmp_str = "equal to"
        else:                                                         cmp_str = "less than"
        error.log("Token queue size is %i is %s token queue safety border %i + 1.\n" % \
                  (setup.token_queue_size, cmp_str, setup.token_queue_safety_border) + 
                  "Set appropriate values with --token-queue-size and --token-queue-safety-border.")

    # Check that names are valid identifiers
    if len(setup.token_id_prefix_plain) != 0:
        __check_identifier(setup, "token_id_prefix_plain", "Token prefix")
    __check_identifier(setup, "analyzer_class_name", "Engine name")
    if setup.analyzer_derived_class_name != "": 
        __check_identifier(setup, "analyzer_derived_class_name", "Derived class name")
    
    __check_file_name(setup, "token_class_file",                 "file containing token class definition")
    __check_file_name(setup, "analyzer_derived_class_file",      "file containing user derived lexer class")
    __check_file_name(setup, "token_id_foreign_definition_file", "file containing user token ids", 0,
                      CommandLineOption=SETUP_INFO["token_id_foreign_definition"][0])
    __check_file_name(setup, "input_mode_files", "quex source file")

    # Check that not more than one converter is specified
    converter_n = 0
    if setup.converter_iconv_f:                 converter_n += 1
    if setup.converter_icu_f:                   converter_n += 1 
    if len(setup.converter_user_new_func) != 0: converter_n += 1
    if converter_n > 1:
        error.log("More than one character converter has been specified. Note, that the\n" + \
                  "options '--icu', '--iconv', and '--converter-new' (or '--cn') are\n"    + \
                  "to be used mutually exclusively.")
    if converter_n == 1 and setup.buffer_codec.name != "unicode":  
        # If the buffer codec is other than unicode, then no converter shall
        # be used to fill the buffer. Instead, the engine is transformed, so 
        # that it works directly on the codec.
        error.log("An engine that is to be generated for a specific codec cannot rely\n"      + \
                  "on converters. Do no use '--codec' together with '--icu', '--iconv', or\n" + \
                  "`--converter-new`.")

    # If a converter has been specified and no bytes-element-size has been specified,
    # it defaults to '1 byte' which is most likely not what is desired for unicode.
    if     converter_n == 1 \
       and setup.buffer_element_size == 1 \
       and not command_line_args_defined(command_line, "buffer_element_size") \
       and not command_line_args_defined(command_line, "buffer_element_type"):
        error.log("A converter has been specified, but the default buffer element size\n" + \
                  "is left to 1 byte. Consider %s or %s." \
                  % (command_line_args_string("buffer_element_size"),
                     command_line_args_string("buffer_element_type")))

    # If a user defined type is specified for 'engine character type' and 
    # a converter, then the name of the target type must be specified explicitly.
    if         setup.buffer_element_type != "" \
       and not global_character_type_db.has_key(setup.buffer_element_type) \
       and     setup.converter_ucs_coding_name == "" \
       and     converter_n != 0:
        tc = setup.buffer_element_type
        error.log("A character code converter has been specified. It is supposed to convert\n" + \
                  "incoming data into an internal buffer of unicode characters. The size of\n" + \
                  "each character is determined by '%s' which is a user defined type.\n" % tc  + \
                  "\n" + \
                  "Quex cannot determine automatically the name that the converter requires\n" +      \
                  "to produce unicode characters for type '%s'. It must be specified by the\n" % tc + \
                  "command line option %s." \
                  % command_line_args_string("converter_ucs_coding_name"))

    # Token transmission policy
    token_policy_list = ["queue", "single", "users_token", "users_queue"]
    if setup.token_policy not in token_policy_list:
        error.log("Token policy '%s' not supported. Use one of the following:\n" % setup.token_policy + \
                  repr(token_policy_list)[1:-1])
    elif setup.token_policy == "users_token":
        error.log("Token policy 'users_queue' has be deprecated since 0.49.1. Use\n"
                  "equivalent policy 'single'.")
    elif setup.token_policy == "users_queue":
        error.log("Token policy 'users_queue' has be deprecated since 0.49.1\n")

    # Internal engine character encoding
    def __codec_vs_buffer_element_size(CodecName, RequiredBufferElementSize):
        if   setup.buffer_codec.name   != CodecName:                 return
        elif setup.buffer_element_size == RequiredBufferElementSize: return

        if setup.buffer_element_size == -1: 
            msg_str = "undetermined (found type '%s')" % setup.buffer_element_type
        else:
            msg_str = "is not %i (found %i)" % (RequiredBufferElementSize, setup.buffer_element_size)

        error.log("Using codec '%s' while buffer element size %s.\n" % (CodecName, msg_str) + 
                  "Consult command line argument %s" \
                  % command_line_args_string("buffer_element_size"))

    if setup.buffer_codec.name != "unicode":
        if not setup.buffer_codec_file:
            error.verify_word_in_list(setup.buffer_codec_name,
                                      codec_db.get_supported_codec_list() + ["utf8", "utf16"],
                                      "Codec '%s' is not supported." % setup.buffer_codec.name)
        __codec_vs_buffer_element_size("utf8", 1)
        __codec_vs_buffer_element_size("utf16", 2)

    if setup.external_lexeme_null_object and setup.token_class_only_f:
        error.log("Specifying an external lexeme null object signalizes an\n"
                  "external token class implementation. The 'token class only\n"
                  "flag' generates a token class considered to be externally\n"
                  "shared. Both flags are mutually exclusive.")

    if setup.string_accumulator_f:
        error_n = NotificationDB.warning_on_no_token_class_take_text
        if error_n in setup.suppressed_notification_list: 
           error.warning("The warning upon missing 'take_text' in token type definition is de-\n"
                     + "activated by '--suppress %i'. This is dangerous, if there is a string\n" % error_n
                     + "accumulator. May be, use '--no-string-accumulator'.", -1,
                    SuppressCode=NotificationDB.warning_on_no_warning_on_missing_take_text)