def __parse_event(new_mode, fh, word): pos = fh.tell() # Allow '<<EOF>>' and '<<FAIL>>' out of respect for classical tools like 'lex' if word == "<<EOF>>": word = "on_end_of_stream" elif word == "<<FAIL>>": word = "on_failure" elif word in blackboard.all_section_title_list: error.log("Pattern '%s' is a quex section title. Has the closing '}' of mode %s \n" % (word, new_mode.name) \ + "been forgotten? Else use quotes, i.e. \"%s\"." % word, fh) elif len(word) < 3 or word[:3] != "on_": return False comment = "Unknown event handler '%s'. \n" % word + \ "Note, that any pattern starting with 'on_' is considered an event handler.\n" + \ "use double quotes to bracket patterns that start with 'on_'." __general_validate(fh, new_mode, word, pos) error.verify_word_in_list(word, standard_incidence_db.keys() + ["keyword_list"], comment, fh) code = code_fragment.parse(fh, "%s::%s event handler" % (new_mode.name, word)) incidence_id = standard_incidence_db[word][0] if Lng.suspicious_RETURN_in_event_handler(incidence_id, code.get_text()): error.warning("Suspicious 'RETURN' in event handler '%s'.\n" % incidence_id \ + "This statement will trigger 'on_after_match' handler.\n" \ + "May be, use plain return instead.", code.sr) if word == "on_n_dedent" and not token_db.support_repetition(): error.warning("Found 'on_n_dedent', but no single token has been specified\n" \ "in a 'repeated_token' section.", code.sr) new_mode.incidence_db[word] = code return True
def __validate_definition(TheCodeFragment, NameStr, AlreadyMentionedList, StandardMembersF): if StandardMembersF: error.verify_word_in_list(NameStr, TokenType_StandardMemberList, "Member name '%s' not allowed in token_type section 'standard'." % NameStr, TheCodeFragment.sr) # Standard Members are all numeric types if TheCodeFragment.contains_string(Lng.Match_string) \ or TheCodeFragment.contains_string(Lng.Match_vector) \ or TheCodeFragment.contains_string(Lng.Match_map): type_str = TheCodeFragment.get_text() error.log("Numeric type required.\n" + \ "Example: <token_id: uint16_t>, Found: '%s'\n" % type_str, TheCodeFragment.sr) else: if NameStr in TokenType_StandardMemberList: error.log("Member '%s' only allowed in 'standard' section." % NameStr, TheCodeFragment.sr) for candidate in AlreadyMentionedList: if candidate[0] != NameStr: continue error.log("Token type member name '%s' defined twice." % NameStr, TheCodeFragment.sr, DontExitF=True) error.log("Previously defined here.", candidate[1].sr)
def get_character_set(self, PropertyName, Value=None, Fh=-1): """Returns the character set that corresponds to 'Property==Value'. 'Property' can be a property name or a property alias. 'Value' can be a property value or a property value alias. For binary properties 'Value' must be None. RETURNS: NumberSet in case of success. str in case an error occurred. String describes the problem. """ if self.db == {}: self.init_db() error.verify_word_in_list(PropertyName, self.get_property_name_list(), "Unknown Unicode property '%s'" % PropertyName, Fh, ExitF=True) property = self[PropertyName] if property.type == "Binary": if Value is not None: return "Binary property '%s' cannot have a value.\n" % PropertyName + \ "Received '%s = %s'." % (PropertyName, Value) elif Value is None: return "Non-Binary property '%s' must have a value.\n" % PropertyName + \ "Expected something like '%s = Value'.\n" % PropertyName + \ "Possible Values: " + \ property.get_value_list_help() return property.get_character_set(Value)
def __parse_event(new_mode, fh, word): pos = fh.tell() # Allow '<<EOF>>' and '<<FAIL>>' out of respect for classical tools like 'lex' if word == "<<EOF>>": word = "on_end_of_stream" elif word == "<<FAIL>>": word = "on_failure" elif word in blackboard.all_section_title_list: error.log("Pattern '%s' is a quex section title. Has the closing '}' of mode %s \n" % (word, new_mode.name) \ + "been forgotten? Else use quotes, i.e. \"%s\"." % word, fh) elif len(word) < 3 or word[:3] != "on_": return False comment = "Unknown event handler '%s'. \n" % word + \ "Note, that any pattern starting with 'on_' is considered an event handler.\n" + \ "use double quotes to bracket patterns that start with 'on_'." __general_validate(fh, new_mode, word, pos) error.verify_word_in_list(word, standard_incidence_db.keys(), comment, fh) __validate_required_token_policy_queue(word, fh, pos) continue_f = True if word == "on_end_of_stream" or word == "on_failure": # -- When a termination token is sent, no other token shall follow. # => Enforce return from the analyzer! Do not allow CONTINUE! # -- When an 'on_failure' is received allow immediate action of the # receiver => Do not allow CONTINUE! continue_f = False new_mode.incidence_db[word] = \ code_fragment.parse(fh, "%s::%s event handler" % (new_mode.name, word), ContinueF=continue_f) return True
def parse(fh, new_mode): source_reference = SourceRef.from_FileHandle(fh) identifier = read_option_start(fh) if identifier is None: return False error.verify_word_in_list(identifier, mode_option_info_db.keys(), "mode option", fh) if identifier == "skip": value = __parse_skip_option(fh, new_mode, identifier) elif identifier in ["skip_range", "skip_nested_range"]: value = __parse_range_skipper_option(fh, identifier, new_mode) elif identifier == "indentation": value = counter.IndentationCount_Prep(fh).parse() blackboard.required_support_indentation_count_set() elif identifier == "counter": value = counter.LineColumnCount_Prep(fh).parse() elif identifier in ("entry", "exit"): value = read_option_value(fh, ListF=True) # A 'list' of strings else: value = read_option_value(fh) # A single string # Finally, set the option new_mode.option_db.enter(identifier, value, source_reference, new_mode.name) return True
def __parse_event(new_mode, fh, word): pos = fh.tell() # Allow '<<EOF>>' and '<<FAIL>>' out of respect for classical tools like 'lex' if word == "<<EOF>>": word = "on_end_of_stream" elif word == "<<FAIL>>": word = "on_failure" elif word in blackboard.all_section_title_list: error.log("Pattern '%s' is a quex section title. Has the closing '}' of mode %s \n" % (word, new_mode.name) \ + "been forgotten? Else use quotes, i.e. \"%s\"." % word, fh) elif len(word) < 3 or word[:3] != "on_": return False comment = "Unknown event handler '%s'. \n" % word + \ "Note, that any pattern starting with 'on_' is considered an event handler.\n" + \ "use double quotes to bracket patterns that start with 'on_'." __general_validate(fh, new_mode, word, pos) error.verify_word_in_list(word, standard_incidence_db.keys(), comment, fh) __validate_required_token_policy_queue(word, fh, pos) continue_f = True if word == "on_end_of_stream" or word == "on_failure": # -- When a termination token is sent, no other token shall follow. # => Enforce return from the analyzer! Do not allow CONTINUE! # -- When an 'on_failure' is received allow immediate action of the # receiver => Do not allow CONTINUE! continue_f = False new_mode.incidence_db[word] = \ code_fragment.parse(fh, "%s::%s event handler" % (new_mode.name, word), ContinueF=continue_f) return True
def parse(fh, new_mode): source_reference = SourceRef.from_FileHandle(fh) identifier = read_option_start(fh) if identifier is None: return False error.verify_word_in_list(identifier, mode_option_info_db.keys(), "mode option", fh) if identifier == "skip": value = __parse_skip_option(fh, new_mode, identifier) elif identifier in ["skip_range", "skip_nested_range"]: value = __parse_range_skipper_option(fh, identifier, new_mode) elif identifier == "indentation": value = IndentationCount.from_FileHandle(fh) blackboard.required_support_indentation_count_set() elif identifier == "counter": value = LineColumnCount.from_FileHandle(fh) elif identifier in ("entry", "exit"): value = read_option_value(fh, ListF=True) # A 'list' of strings else: value = read_option_value(fh) # A single string # Finally, set the option new_mode.option_db.enter(identifier, value, source_reference, new_mode.name) return True
def snap_replacement(stream, PatternDict, StateMachineF=True): """Snaps a predefined pattern from the input string and returns the resulting state machine. """ skip_whitespace(stream) pattern_name = read_identifier(stream) if pattern_name == "": raise RegularExpressionException( "Pattern replacement expression misses identifier after '{'.") skip_whitespace(stream) if not check(stream, "}"): raise RegularExpressionException("Pattern replacement expression misses closing '}' after '%s'." \ % pattern_name) error.verify_word_in_list( pattern_name, PatternDict.keys(), "Specifier '%s' not found in any preceeding 'define { ... }' section." % pattern_name, stream) reference = PatternDict[pattern_name] assert reference.__class__ == PatternShorthand # The replacement may be a state machine or a number set if StateMachineF: # Get a cloned version of state machine state_machine = reference.get_state_machine() assert isinstance(state_machine, DFA) # It is essential that state machines defined as patterns do not # have origins. Otherwise, the optimization of patterns that # contain pattern replacements might get confused and can # not find all optimizations. assert not state_machine.has_specific_acceptance_id() # A state machine, that contains pre- or post- conditions cannot be part # of a replacement. The addition of new post-contexts would mess up the pattern. ## if state_machine.has_pre_or_post_context(): ## error.log("Pre- or post-conditioned pattern was used in replacement.\n" + \ ## "Quex's regular expression grammar does not allow this.", stream) return state_machine else: # Get a cloned version of character set character_set = reference.get_character_set() if character_set is None: error.log( "Replacement in character set expression must be a character set.\n" "Specifier '%s' relates to a pattern state machine." % pattern_name, stream) if character_set.is_empty(): error.log( "Referenced character set '%s' is empty.\nAborted." % pattern_name, stream) return character_set
def __access_mode(Mode, OtherModeName, ModeNameList, EntryF): type_str = { True: "entry from", False: "exit to" }[EntryF] error.verify_word_in_list(OtherModeName, ModeNameList, "Mode '%s' permits the %s mode '%s'\nbut no such mode exists." % \ (Mode.name, type_str, OtherModeName), Mode.sr) result = blackboard.mode_db[OtherModeName] assert result is not None return result
def __access_mode(Mode, OtherModeName, ModeNameList, EntryF): type_str = {True: "entry from", False: "exit to"}[EntryF] error.verify_word_in_list(OtherModeName, ModeNameList, "Mode '%s' permits the %s mode '%s'\nbut no such mode exists." % \ (Mode.name, type_str, OtherModeName), Mode.sr) result = blackboard.mode_db[OtherModeName] assert result is not None return result
def snap_set_term(stream, PatternDict): global special_character_set_db __debug_entry("set_term", stream) operation_list = ["union", "intersection", "difference", "inverse"] character_set_list = special_character_set_db.keys() skip_whitespace(stream) position = stream.tell() # if there is no following '(', then enter the 'snap_expression' block below word = read_identifier(stream) if word in operation_list: set_list = snap_set_list(stream, word, PatternDict) # if an error occurs during set_list parsing, an exception is thrown about syntax error L = len(set_list) result = set_list[0] if word == "inverse": # The inverse of multiple sets, is to be the inverse of the union of these sets. if L > 1: for character_set in set_list[1:]: result.unite_with(character_set) return __debug_exit( result.get_complement(Setup.buffer_codec.source_set), stream) if L < 2: raise RegularExpressionException("Regular Expression: A %s operation needs at least\n" % word + \ "two sets to operate on them.") if word == "union": for set in set_list[1:]: result.unite_with(set) elif word == "intersection": for set in set_list[1:]: result.intersect_with(set) elif word == "difference": for set in set_list[1:]: result.subtract(set) elif word in character_set_list: reg_expr = special_character_set_db[word] result = traditional_character_set.do_string(reg_expr) elif word != "": error.verify_word_in_list(word, character_set_list + operation_list, "Unknown keyword '%s'." % word, stream) else: stream.seek(position) result = snap_set_expression(stream, PatternDict) return __debug_exit(result, stream)
def get_codecs_for_language(Language): result = [] for record in parser.get_codec_list_db(): codec = record[0] if codec not in get_supported_codec_list(): continue if Language in record[2]: result.append(record[0]) if len(result) == 0: error.verify_word_in_list(Language, get_supported_language_list(), "No codec found for language '%s'." % Language) return result
def _get_distinct_codec_name_for_alias(CodecAlias, FH=-1): """Arguments FH and LineN correspond to the arguments of error.log.""" assert len(CodecAlias) != 0 for record in parser.get_codec_list_db(): if CodecAlias in record[1] or CodecAlias == record[0]: return record[0] error.verify_word_in_list(CodecAlias, get_supported_codec_list(), "Character encoding '%s' unknown to current version of quex." % CodecAlias, FH)
def snap_set_term(stream, PatternDict): global special_character_set_db __debug_entry("set_term", stream) operation_list = [ "union", "intersection", "difference", "inverse"] character_set_list = special_character_set_db.keys() skip_whitespace(stream) position = stream.tell() # if there is no following '(', then enter the 'snap_expression' block below word = read_identifier(stream) if word in operation_list: set_list = snap_set_list(stream, word, PatternDict) # if an error occurs during set_list parsing, an exception is thrown about syntax error L = len(set_list) result = set_list[0] if word == "inverse": # The inverse of multiple sets, is to be the inverse of the union of these sets. if L > 1: for character_set in set_list[1:]: result.unite_with(character_set) return __debug_exit(result.get_complement(Setup.buffer_codec.source_set), stream) if L < 2: raise RegularExpressionException("Regular Expression: A %s operation needs at least\n" % word + \ "two sets to operate on them.") if word == "union": for set in set_list[1:]: result.unite_with(set) elif word == "intersection": for set in set_list[1:]: result.intersect_with(set) elif word == "difference": for set in set_list[1:]: result.subtract(set) elif word in character_set_list: reg_expr = special_character_set_db[word] result = traditional_character_set.do_string(reg_expr) elif word != "": error.verify_word_in_list(word, character_set_list + operation_list, "Unknown keyword '%s'." % word, stream) else: stream.seek(position) result = snap_set_expression(stream, PatternDict) return __debug_exit(result, stream)
def get_codecs_for_language(Language): result = [] for record in parser.get_codec_list_db(): codec = record[0] if codec not in get_supported_codec_list(): continue if Language in record[2]: result.append(record[0]) if len(result) == 0: error.verify_word_in_list(Language, get_supported_language_list(), "No codec found for language '%s'." % Language) return result
def get_file_name_for_codec_alias(CodecAlias): """Arguments FH and LineN correspond to the arguments of error.log.""" assert CodecAlias for record in parser.get_codec_list_db(): if CodecAlias in record[1] or CodecAlias == record[0]: codec_name = record[0] return codec_name, "%s/%s.dat" % (QUEX_CODEC_DB_PATH, codec_name) error.verify_word_in_list(CodecAlias, get_supported_codec_list(), "Character encoding '%s' unknown to current version of quex." % CodecAlias)
def __access_mode(Mode, ModePrepList, OtherModeName, ModeNameList, EntryF): type_str = { True: "entry from", False: "exit to" }[EntryF] error.verify_word_in_list(OtherModeName, ModeNameList, "Mode '%s' permits the %s mode '%s'\nbut no such mode exists." % \ (Mode.name, type_str, OtherModeName), Mode.sr) for mode in ModePrepList: if mode.name == OtherModeName: return mode # OtherModeName MUST be in ModePrepList, at this point in time. assert False
def __determine_base_mode_sequence(self, ModeDescr, InheritancePath, base_mode_sequence): """Determine the sequence of base modes. The type of sequencing determines also the pattern precedence. The 'deep first' scheme is chosen here. For example a mode hierarchie of A / \ B C / \ / \ D E F G results in a sequence: (A, B, D, E, C, F, G).reverse() => That is the mode itself is base_mode_sequence[-1] => Patterns and event handlers of 'E' have precedence over 'C' because they are the childs of a preceding base mode. This function detects circular inheritance. __dive -- inserted this keyword for the sole purpose to signal that here is a case of recursion, which may be solved later on by a TreeWalker. """ if ModeDescr.name in InheritancePath: msg = "mode '%s'\n" % InheritancePath[0] for mode_name in InheritancePath[InheritancePath.index(ModeDescr. name) + 1:]: msg += " inherits mode '%s'\n" % mode_name msg += " inherits mode '%s'" % ModeDescr.name error.log("circular inheritance detected:\n" + msg, ModeDescr.sr) base_mode_name_list_reversed = deepcopy(ModeDescr.derived_from_list) #base_mode_name_list_reversed.reverse() for name in base_mode_name_list_reversed: # -- does mode exist? error.verify_word_in_list( name, blackboard.mode_description_db.keys(), "Mode '%s' inherits mode '%s' which does not exist." % (ModeDescr.name, name), ModeDescr.sr) if name in map(lambda m: m.name, base_mode_sequence): continue # -- grab the mode description mode_descr = blackboard.mode_description_db[name] self.__determine_base_mode_sequence( mode_descr, InheritancePath + [ModeDescr.name], base_mode_sequence) base_mode_sequence.append(ModeDescr) return base_mode_sequence
def _check_inheritance_relationships(self, ModePrepPrepDb): mode_name_set = set(ModePrepPrepDb.iterkeys()) for mode_name in self.direct_base_mode_name_list: if mode_name not in mode_name_set: error.verify_word_in_list(mode_name, mode_name_set, "mode '%s' inherits from a mode '%s'\nbut no such mode exists." % \ (self.name, mode_name), self.sr) if ModePrepPrepDb[mode_name].option_db.value( "inheritable") == "no": error.log("mode '%s' inherits mode '%s' which is not inheritable." % \ (self.name, mode_name), self.sr)
def read_character_code(fh): # NOTE: This function is tested with the regeression test for feature request 2251359. # See directory $QUEX_PATH/TEST/2251359. pos = fh.tell() start = fh.read(1) if start == "": fh.seek(pos); return -1 elif start == "'": # read an utf-8 char an get the token-id # Example: '+' if check(fh, "\\"): # snap_backslashed_character throws an exception if 'backslashed char' is nonsense. character_code = snap_backslashed_character.do(fh, ReducedSetOfBackslashedCharactersF=True) else: character_code = __read_one_utf8_code_from_stream(fh) if character_code is None: error.log("Missing utf8-character for definition of character code by character.", fh) elif fh.read(1) != '\'': error.log("Missing closing ' for definition of character code by character.", fh) return character_code if start == "U": if fh.read(1) != "C": fh.seek(pos); return -1 # read Unicode Name # Example: UC MATHEMATICAL_MONOSPACE_DIGIT_FIVE skip_whitespace(fh) ucs_name = __read_token_identifier(fh) if ucs_name == "": fh.seek(pos); return -1 # Get the character set related to the given name. Note, the size of the set # is supposed to be one. character_code = ucs_property_db.get_character_set("Name", ucs_name) if type(character_code) in [str, unicode]: error.verify_word_in_list(ucs_name, ucs_property_db["Name"].code_point_db, "The string %s\ndoes not identify a known unicode character." % ucs_name, fh) elif type(character_code) not in [int, long]: error.log("%s relates to more than one character in unicode database." % ucs_name, fh) return character_code fh.seek(pos) character_code = read_integer(fh) if character_code is not None: return character_code # Try to interpret it as something else ... fh.seek(pos) return -1
def _get_distinct_codec_name_for_alias(CodecAlias, FH=-1): """Arguments FH and LineN correspond to the arguments of error.log.""" assert len(CodecAlias) != 0 for record in parser.get_codec_list_db(): if CodecAlias in record[1] or CodecAlias == record[0]: return record[0] error.verify_word_in_list( CodecAlias, get_supported_codec_list(), "Character encoding '%s' unknown to current version of quex." % CodecAlias, FH)
def snap_replacement(stream, PatternDict, StateMachineF=True): """Snaps a predefined pattern from the input string and returns the resulting state machine. """ skip_whitespace(stream) pattern_name = read_identifier(stream) if pattern_name == "": raise RegularExpressionException("Pattern replacement expression misses identifier after '{'.") skip_whitespace(stream) if not check(stream, "}"): raise RegularExpressionException("Pattern replacement expression misses closing '}' after '%s'." \ % pattern_name) error.verify_word_in_list(pattern_name, PatternDict.keys(), "Specifier '%s' not found in any preceeding 'define { ... }' section." % pattern_name, stream) reference = PatternDict[pattern_name] assert reference.__class__ == PatternShorthand # The replacement may be a state machine or a number set if StateMachineF: # Get a cloned version of state machine state_machine = reference.get_state_machine() assert isinstance(state_machine, StateMachine) # It is essential that state machines defined as patterns do not # have origins. Otherwise, the optimization of patterns that # contain pattern replacements might get confused and can # not find all optimizations. assert state_machine.has_origins() == False # A state machine, that contains pre- or post- conditions cannot be part # of a replacement. The addition of new post-contexts would mess up the pattern. ## if state_machine.has_pre_or_post_context(): ## error.log("Pre- or post-conditioned pattern was used in replacement.\n" + \ ## "Quex's regular expression grammar does not allow this.", stream) return state_machine else: # Get a cloned version of character set character_set = reference.get_character_set() if character_set is None: error.log("Replacement in character set expression must be a character set.\n" "Specifier '%s' relates to a pattern state machine." % pattern_name, stream) if character_set.is_empty(): error.log("Referenced character set '%s' is empty.\nAborted." % pattern_name, stream) return character_set
def __determine_base_mode_sequence(self, ModeDescr, InheritancePath, base_mode_sequence): """Determine the sequence of base modes. The type of sequencing determines also the pattern precedence. The 'deep first' scheme is chosen here. For example a mode hierarchie of A / \ B C / \ / \ D E F G results in a sequence: (A, B, D, E, C, F, G).reverse() => That is the mode itself is base_mode_sequence[-1] => Patterns and event handlers of 'E' have precedence over 'C' because they are the childs of a preceding base mode. This function detects circular inheritance. __dive -- inserted this keyword for the sole purpose to signal that here is a case of recursion, which may be solved later on by a TreeWalker. """ if ModeDescr.name in InheritancePath: msg = "mode '%s'\n" % InheritancePath[0] for mode_name in InheritancePath[InheritancePath.index(ModeDescr.name) + 1:]: msg += " inherits mode '%s'\n" % mode_name msg += " inherits mode '%s'" % ModeDescr.name error.log("circular inheritance detected:\n" + msg, ModeDescr.sr) base_mode_name_list_reversed = deepcopy(ModeDescr.derived_from_list) #base_mode_name_list_reversed.reverse() for name in base_mode_name_list_reversed: # -- does mode exist? error.verify_word_in_list(name, blackboard.mode_description_db.keys(), "Mode '%s' inherits mode '%s' which does not exist." % (ModeDescr.name, name), ModeDescr.sr) if name in map(lambda m: m.name, base_mode_sequence): continue # -- grab the mode description mode_descr = blackboard.mode_description_db[name] self.__determine_base_mode_sequence(mode_descr, InheritancePath + [ModeDescr.name], base_mode_sequence) base_mode_sequence.append(ModeDescr) return base_mode_sequence
def _determine_base_mode_name_sequence(self, ModePrepPrepDb): """Determine the sequence of base modes. The type of sequencing determines also the pattern precedence. The 'deep first' scheme is chosen here. For example a mode hierarchie of A / \ B C / \ / \ D E F G results in a sequence: (A, B, D, E, C, F, G).reverse() => That is the mode itself is result[-1] => Patterns and event handlers of 'E' have precedence over 'C' because they are the childs of a preceding base mode. This function detects circular inheritance. """ Node = namedtuple("Node", ("mode_name", "inheritance_path")) global base_name_list_db result = [self.name] done = set() worklist = [Node(self.name, [])] while worklist: node = worklist.pop(0) if node.mode_name in done: continue done.add(node.mode_name) inheritance_path = node.inheritance_path + [node.mode_name] mode = ModePrepPrepDb[node.mode_name] i = result.index(node.mode_name) for name in reversed(mode.direct_base_mode_name_list): error.verify_word_in_list(name, ModePrepPrepDb.keys(), "Mode '%s' inherits mode '%s' which does not exist." \ % (mode.name, name), mode.sr) if name in inheritance_path: _error_circular_inheritance(inheritance_path, ModePrepPrepDb) elif name not in result: result.insert(i, name) worklist.extend( Node(name, inheritance_path) for name in mode.direct_base_mode_name_list) return result
def __start_mode(implemented_mode_name_list, mode_name_list): """If more then one mode is defined, then that requires an explicit definition 'start = mode'. """ assert len(implemented_mode_name_list) != 0 assert blackboard.initial_mode is not None start_mode = blackboard.initial_mode.get_pure_text() # Start mode present and applicable? error.verify_word_in_list(start_mode, mode_name_list, "Start mode '%s' is not defined." % start_mode, blackboard.initial_mode.sr) error.verify_word_in_list(start_mode, implemented_mode_name_list, "Start mode '%s' is inheritable only and cannot be instantiated." % start_mode, blackboard.initial_mode.sr)
def __start_mode(implemented_mode_name_list, mode_name_list): """If more then one mode is defined, then that requires an explicit definition 'start = mode'. """ assert len(implemented_mode_name_list) != 0 assert blackboard.initial_mode is not None start_mode = blackboard.initial_mode.get_pure_text() # Start mode present and applicable? error.verify_word_in_list(start_mode, mode_name_list, "Start mode '%s' is not defined." % start_mode, blackboard.initial_mode.sr) error.verify_word_in_list(start_mode, implemented_mode_name_list, "Start mode '%s' is inheritable only and cannot be instantiated." % start_mode, blackboard.initial_mode.sr)
def _parse_definition_head(fh, IdentifierList): if check(fh, "\\default"): error.log("'\\default' has been replaced by keyword '\\else' since quex 0.64.9!", fh) elif check(fh, "\\else"): pattern = None else: pattern = regular_expression.parse(fh) skip_whitespace(fh) check_or_die(fh, "=>", " after character set definition.") skip_whitespace(fh) identifier = read_identifier(fh, OnMissingStr="Missing identifier following '=>'.") error.verify_word_in_list(identifier, IdentifierList, "Unrecognized specifier '%s'." % identifier, fh) skip_whitespace(fh) return pattern, identifier, SourceRef.from_FileHandle(fh)
def __entry_exit_transitions(mode, mode_name_list): for mode_name in mode.exit_mode_name_list: error.verify_word_in_list(mode_name, mode_name_list, "Mode '%s' allows entry from\nmode '%s' but no such mode exists." % \ (mode.name, mode_name), mode.sr) that_mode = blackboard.mode_db[mode_name] # Other mode allows all entries => don't worry. if len(that_mode.entry_mode_name_list) == 0: continue # Other mode restricts the entries from other modes # => check if this mode or one of the base modes can enter for base_mode in mode.get_base_mode_sequence(): if base_mode.name in that_mode.entry_mode_name_list: break else: error.log("Mode '%s' has an exit to mode '%s' but" % (mode.name, mode_name), base_mode.sr, DontExitF=True) error.log("mode '%s' has no entry for mode '%s'\n" % (mode_name, mode.name) + \ "or any of its base modes.", that_mode.sr) for mode_name in mode.entry_mode_name_list: # Does that mode exist? error.verify_word_in_list(mode_name, mode_name_list, "Mode '%s' allows entry from\nmode '%s' but no such mode exists." % \ (mode.name, mode_name), mode.sr) that_mode = blackboard.mode_db[mode_name] # Other mode allows all exits => don't worry. if len(that_mode.exit_mode_name_list) == 0: continue # Other mode restricts the exits to other modes # => check if this mode or one of the base modes can be reached for base_mode in mode.get_base_mode_sequence(): if base_mode.name in that_mode.exit_mode_name_list: break else: error.log("Mode '%s' has an entry for mode '%s' but" % (mode.name, mode_name), base_mode.sr, DontExitF=True) error.log("mode '%s' has no exit to mode '%s'\n" % (mode_name, mode.name) + \ "or any of its base modes.", that_mode.sr)
def __create_token_sender_by_token_name(fh, TokenName, LexemeNullOnlyF=False, LexemeOnlyF=False): assert type(TokenName) in (str, unicode) assert not (LexemeNullOnlyF and LexemeOnlyF) # Enter token_id into database, if it is not yet defined. token_id_db_verify_or_enter_token_id(fh, TokenName) # Parse the token argument list if LexemeNullOnlyF: argument_list = ["LexemeNull"] elif LexemeOnlyF: argument_list = ["Lexeme"] else: argument_list = __parse_function_argument_list(fh, TokenName) #if cut_token_id_prefix(TokenName, fh) == "TERMINATION" and not argument_list: # argument_list.append("LexemeNull") # Create the token sender assert token_db.token_type_definition is not None, \ "A valid token_type_definition must have been parsed at this point." explicit_member_names_f = any(arg.find("=") != -1 for arg in argument_list) if not explicit_member_names_f: return __token_sender_with_implicit_member_names( TokenName, argument_list, fh) elif Setup.extern_token_class_file: error.log("Member assignments in brief token senders are inadmissible\n" + \ "with manually written token classes. User provided file '%s'.\n" % Setup.extern_token_class_file, fh) member_value_pairs = [arg.split("=") for arg in argument_list] member_value_pairs = [(m.strip(), v.strip()) for m, v in member_value_pairs] if any(not value for member, value in member_value_pairs): error.log("One explicit argument name mentioned requires all arguments to\n" + \ "be mentioned explicitly.\n", fh) global lexeme_re if any( lexeme_re.search(value) is not None for member, value in member_value_pairs): error.log( "Assignment of token member with 'Lexeme' directly being involved. The\n" + "'Lexeme' points into the text buffer and it is not owned by the token object.\n" "\n" "Proposals:\n\n" " (1) Use '(Lexeme)', i.e. surround 'Lexeme' by brackets to indicate\n" " that you are aware of the danger. Do this, if at the end of the\n" " process, the member can be assumed to relate to an object that\n" " is not directly dependent anymore on 'Lexeme'. This is particularly\n" " true if the member is of type 'std::string'. Its constructor\n" " creates a copy of the zero terminated string.\n\n" " (2) Use token senders without named arguments, for example\n" " \"%s(Lexeme+1, LexemeEnd-2)\"\n" % TokenName + " \"%s(Lexeme)\"\n" % TokenName + " These token senders create a copy of the lexeme and let the token\n" " own it.", fh) for member, value in member_value_pairs: error.verify_word_in_list( member, token_db.token_type_definition.get_member_db(), "No member: '%s' in token type description." % member, fh) txt = [ Lng.TOKEN_SET_MEMBER( token_db.token_type_definition.get_member_access(member), value) for member, value in member_value_pairs ] # Box the token, stamp it with an id and 'send' it txt.append("%s\n" % Lng.TOKEN_SEND(TokenName)) return "\n".join(txt)
def do(setup, command_line, argv): """Does a consistency check for setup and the command line. """ setup.output_directory = os.path.normpath(setup.output_directory) if setup.output_directory: # Check, if the output directory exists if os.access(setup.output_directory, os.F_OK) == False: error.log( "The directory %s was specified for output, but does not exists." % setup.output_directory) if os.access(setup.output_directory, os.W_OK) == False: error.log( "The directory %s was specified for output, but is not writeable." % setup.output_directory) # if the mode is '--language dot' => check character display options. if setup.character_display not in ["hex", "utf8"]: error.log( "Character display must be either 'hex' or 'utf8'.\nFound: '%s'" % setup.character_display) # ensure that options are not specified twice for parameter, info in SETUP_INFO.items(): if type(info) != list: continue occurence_n = 0 for option in info[0]: occurence_n += argv.count(option) if occurence_n > 1 and info[1] not in (SetupParTypes.LIST, SetupParTypes.INT_LIST): error.log("Received more than one of the following options:\n" + \ "%s" % repr(info[0])[1:-1]) # (*) Check for 'Depraceted' Options ___________________________________________________ for name, info in DEPRECATED.items(): command_line_options = SETUP_INFO[name][0] comment = info[0] depreciated_since_version = info[1] for option in command_line_options: if command_line.search(option): error.log("Command line option '%s' is ignored.\n" % option + \ comment + "\n" + \ "Last version of Quex supporting this option is version %s. Please, visit\n" % \ depreciated_since_version + \ "http://quex.org for further information.") # (*) Check for 'Straying' Options ___________________________________________________ options = [] for key, info in SETUP_INFO.items(): if type(info) != list: continue if key in DEPRECATED: continue if info[1] is not None: options.extend(info[0]) options.sort(lambda a, b: cmp(a.replace("-", ""), b.replace("-", ""))) ufos = command_line.unidentified_options(options) if len(ufos) != 0: error.log("Unidentified option(s) = " + repr(ufos) + "\n" + \ __get_supported_command_line_option_description(options)) if setup.analyzer_derived_class_name != "" and \ setup.analyzer_derived_class_file == "": error.log("Specified derived class '%s' on command line, but it was not\n" % \ setup.analyzer_derived_class_name + \ "specified which file contains the definition of it.\n" + \ "use command line option '--derived-class-file'.\n") if setup.buffer_element_size not in [-1, 1, 2, 4]: error.log( "The setting of '--buffer-element-size' (or '-b') can only be\n" "1, 2, or 4 (found %s)." % repr(setup.buffer_element_size)) if setup.buffer_byte_order not in ["<system>", "little", "big"]: error.log("Byte order (option --endian) must be 'little', 'big', or '<system>'.\n" + \ "Note, that this option is only interesting for cross plattform development.\n" + \ "By default, quex automatically chooses the endian type of your system.") # Manually written token class requires token class name to be specified if setup.token_class_file != "" and command_line.search( "--token-class", "--tc") == False: error.log( "The use of a manually written token class requires that the name of the class\n" "is specified on the command line via the '--token-class' option.") # Token queue if setup.token_policy != "queue" and command_line.search( "--token-queue-size"): error.log("Option --token-queue-size determines a fixed token queue size. This makes\n" + \ "only sense in conjunction with '--token-policy queue'.\n") if setup.token_queue_size <= setup.token_queue_safety_border + 1: if setup.token_queue_size == setup.token_queue_safety_border: cmp_str = "equal to" else: cmp_str = "less than" error.log("Token queue size is %i is %s token queue safety border %i + 1.\n" % \ (setup.token_queue_size, cmp_str, setup.token_queue_safety_border) + "Set appropriate values with --token-queue-size and --token-queue-safety-border.") # Check that names are valid identifiers if len(setup.token_id_prefix_plain) != 0: __check_identifier(setup, "token_id_prefix_plain", "Token prefix") __check_identifier(setup, "analyzer_class_name", "Engine name") if setup.analyzer_derived_class_name != "": __check_identifier(setup, "analyzer_derived_class_name", "Derived class name") __check_file_name(setup, "token_class_file", "file containing token class definition") __check_file_name(setup, "analyzer_derived_class_file", "file containing user derived lexer class") __check_file_name( setup, "token_id_foreign_definition_file", "file containing user token ids", 0, CommandLineOption=SETUP_INFO["token_id_foreign_definition"][0]) __check_file_name(setup, "input_mode_files", "quex source file") # Check that not more than one converter is specified converter_n = 0 if setup.converter_iconv_f: converter_n += 1 if setup.converter_icu_f: converter_n += 1 if len(setup.converter_user_new_func) != 0: converter_n += 1 if converter_n > 1: error.log("More than one character converter has been specified. Note, that the\n" + \ "options '--icu', '--iconv', and '--converter-new' (or '--cn') are\n" + \ "to be used mutually exclusively.") if converter_n == 1 and setup.buffer_codec.name != "unicode": # If the buffer codec is other than unicode, then no converter shall # be used to fill the buffer. Instead, the engine is transformed, so # that it works directly on the codec. error.log("An engine that is to be generated for a specific codec cannot rely\n" + \ "on converters. Do no use '--codec' together with '--icu', '--iconv', or\n" + \ "`--converter-new`.") # If a converter has been specified and no bytes-element-size has been specified, # it defaults to '1 byte' which is most likely not what is desired for unicode. if converter_n == 1 \ and setup.buffer_element_size == 1 \ and not command_line_args_defined(command_line, "buffer_element_size") \ and not command_line_args_defined(command_line, "buffer_element_type"): error.log("A converter has been specified, but the default buffer element size\n" + \ "is left to 1 byte. Consider %s or %s." \ % (command_line_args_string("buffer_element_size"), command_line_args_string("buffer_element_type"))) # If a user defined type is specified for 'engine character type' and # a converter, then the name of the target type must be specified explicitly. if setup.buffer_element_type != "" \ and not global_character_type_db.has_key(setup.buffer_element_type) \ and setup.converter_ucs_coding_name == "" \ and converter_n != 0: tc = setup.buffer_element_type error.log("A character code converter has been specified. It is supposed to convert\n" + \ "incoming data into an internal buffer of unicode characters. The size of\n" + \ "each character is determined by '%s' which is a user defined type.\n" % tc + \ "\n" + \ "Quex cannot determine automatically the name that the converter requires\n" + \ "to produce unicode characters for type '%s'. It must be specified by the\n" % tc + \ "command line option %s." \ % command_line_args_string("converter_ucs_coding_name")) # Token transmission policy token_policy_list = ["queue", "single", "users_token", "users_queue"] if setup.token_policy not in token_policy_list: error.log("Token policy '%s' not supported. Use one of the following:\n" % setup.token_policy + \ repr(token_policy_list)[1:-1]) elif setup.token_policy == "users_token": error.log( "Token policy 'users_queue' has be deprecated since 0.49.1. Use\n" "equivalent policy 'single'.") elif setup.token_policy == "users_queue": error.log( "Token policy 'users_queue' has be deprecated since 0.49.1\n") # Internal engine character encoding def __codec_vs_buffer_element_size(CodecName, RequiredBufferElementSize): if setup.buffer_codec.name != CodecName: return elif setup.buffer_element_size == RequiredBufferElementSize: return if setup.buffer_element_size == -1: msg_str = "undetermined (found type '%s')" % setup.buffer_element_type else: msg_str = "is not %i (found %i)" % (RequiredBufferElementSize, setup.buffer_element_size) error.log("Using codec '%s' while buffer element size %s.\n" % (CodecName, msg_str) + "Consult command line argument %s" \ % command_line_args_string("buffer_element_size")) if setup.buffer_codec.name != "unicode": if not setup.buffer_codec_file: error.verify_word_in_list( setup.buffer_codec_name, codec_db.get_supported_codec_list() + ["utf8", "utf16"], "Codec '%s' is not supported." % setup.buffer_codec.name) __codec_vs_buffer_element_size("utf8", 1) __codec_vs_buffer_element_size("utf16", 2) if setup.external_lexeme_null_object and setup.token_class_only_f: error.log( "Specifying an external lexeme null object signalizes an\n" "external token class implementation. The 'token class only\n" "flag' generates a token class considered to be externally\n" "shared. Both flags are mutually exclusive.") if setup.string_accumulator_f: error_n = NotificationDB.warning_on_no_token_class_take_text if error_n in setup.suppressed_notification_list: error.warning( "The warning upon missing 'take_text' in token type definition is de-\n" + "activated by '--suppress %i'. This is dangerous, if there is a string\n" % error_n + "accumulator. May be, use '--no-string-accumulator'.", -1, SuppressCode=NotificationDB. warning_on_no_warning_on_missing_take_text)
def do(setup, command_line, argv): """Does a consistency check for setup and the command line. """ if setup.extern_token_id_file_show_f and not setup.extern_token_id_file: error.log("Option '%s' cannot be used without\n" % _example_flag("extern_token_id_file_show_f") + "option '%s'." % _example_flag("extern_token_id_file")) # if the mode is '--language dot' => check character display options. if setup.character_display not in ["hex", "utf8"]: error.log( "Character display must be either 'hex' or 'utf8'.\nFound: '%s'" % setup.character_display) # ensure that options are not specified twice for parameter, info in SETUP_INFO.items(): if type(info) != list: continue occurence_n = 0 for option in info[0]: occurence_n += argv.count(option) if occurence_n > 1 and info[1] not in (SetupParTypes.LIST, SetupParTypes.INT_LIST): error.log("Received more than one of the following options:\n" + \ "%s" % repr(info[0])[1:-1]) # (*) Check for 'Depraceted' Options ___________________________________________________ for name, info in DEPRECATED.items(): command_line_options = SETUP_INFO[name][0] comment = info[0] depreciated_since_version = info[1] for option in command_line_options: if command_line.search(option): error.log("Command line option '%s' is ignored.\n" % option + \ comment + "\n" + \ "Last version of Quex supporting this option is version %s. Please, visit\n" % \ depreciated_since_version + \ "http://quex.org for further information.") # (*) Check for 'Straying' Options ___________________________________________________ options = [] for key, info in SETUP_INFO.items(): if type(info) != list: continue if key in DEPRECATED: continue if info[1] is not None: options.extend(info[0]) options.sort(lambda a, b: cmp(a.replace("-", ""), b.replace("-", ""))) ufos = command_line.unidentified_options(options) if len(ufos) != 0: error.log("Unidentified option(s) = " + repr(ufos) + "\n" + \ __get_supported_command_line_option_description(options)) if setup.analyzer_derived_class_name != "" and \ setup.analyzer_derived_class_file == "": error.log("Specified derived class '%s' on command line, but it was not\n" % \ setup.analyzer_derived_class_name + \ "specified which file contains the definition of it.\n" + \ "use command line option '--derived-class-file'.\n") if setup.lexatom.size_in_byte not in [-1, 1, 2, 4]: example_flag = SETUP_INFO["__buffer_lexatom_size_in_byte"][0][0] error.log("The setting of '%s' can only be\n" % example_flag + "1, 2, or 4 (found %s)." % repr(setup.lexatom.size_in_byte)) # Manually written token class requires token class name to be specified if setup.extern_token_class_file: if not setup.token_class: error.log( "The use of a manually written token class requires that the name of the class\n" "is specified on the command line via the '--token-class' option." ) if setup.converter_only_f: if not setup.lexatom.type: error.log( "Lexatom type must be specific for converter generation.") if not _find_flag("buffer_encoding_name", argv): error.log( "Lexeme-converter-only-mode requires explicit definition of encoding.\n" "Example: '%s unicode'." % _example_flag("buffer_encoding_name")) if not _find_flag("__buffer_lexatom_type", argv): error.log( "Lexeme-converter-only-mode requires explicit definition of the code unit type.\n" "Example: '%s uint8_t'." % _example_flag("__buffer_lexatom_type")) # Check that names are valid identifiers if setup.token_id_prefix_plain: __check_identifier(setup, "token_id_prefix_plain", "Token prefix") __check_identifier(setup, "analyzer_class_name", "Engine name") if setup.analyzer_derived_class_name != "": __check_identifier(setup, "analyzer_derived_class_name", "Derived class name") __check_file_name(setup, "extern_token_class_file", "file containing token class definition") __check_file_name(setup, "analyzer_derived_class_file", "file containing user derived lexer class") __check_file_name(setup, "extern_token_id_file", "file containing user token ids", 0, CommandLineOption=SETUP_INFO["extern_token_id_file"]) __check_file_name(setup, "input_mode_files", "quex source file") # Internal engine character encoding if setup.buffer_encoding.name not in ("utf32", "unicode"): if not setup.buffer_encoding_file: error.verify_word_in_list( setup.buffer_encoding_name, codec_db.get_supported_codec_list() + ["utf8", "utf16", "utf32"], "Codec '%s' is not supported." % setup.buffer_encoding.name) # NOT: __check_codec_vs_buffer_lexatom_size_in_byte("utf8", 1) # BECAUSE: Code unit size is one. No type has a size of less than one byte! __check_codec_vs_buffer_lexatom_size_in_byte(setup, "utf16", 2)
def prepare(command_line, argv): """RETURN: True, if process needs to be started. False, if job is done. """ # (*) Classes and their namespace __setup_analyzer_class(Setup) __setup_token_class(Setup) __setup_token_id_prefix(Setup) # (*) Line and Column number counting if Setup.__no_count_line_and_column_f: Setup.count_line_number_f = False Setup.count_column_number_f = False # (*) Output programming language Setup.language = Setup.language.upper() error.verify_word_in_list( Setup.language, output_language_db.keys(), "Programming language '%s' is not supported." % Setup.language) Setup.language_db = output_language_db[Setup.language]() # Is the output file naming scheme provided by the extension database # (Validation must happen immediately) Setup.language_db.extension_db = Setup.language_db.all_extension_db.get( Setup.output_file_naming_scheme) if Setup.language_db.extension_db is None: error.log("File extension scheme '%s' is not provided for language '%s'.\n" \ % (Setup.output_file_naming_scheme, Setup.language) + \ "Available schemes are: %s." % repr(sorted(Setup.language_db.all_extension_db.keys()))[1:-1]) if Setup.__buffer_lexatom_size_in_byte == "wchar_t": error.log( "Since Quex version 0.53.5, 'wchar_t' can no longer be specified\n" "with option '--buffer-element-size' or '-bes'. Please, specify\n" "'--buffer-element-type wchar_t' or '--bet'.") Setup.buffer_setup(Setup.__buffer_lexatom_type, Setup.__buffer_lexatom_size_in_byte, Setup.buffer_encoding_name, Setup.buffer_encoding_file) type_info = global_character_type_db.get(Setup.lexatom.type) if type_info is not None and len(type_info) >= 4 \ and type_info[3] != -1 and Setup.lexatom.size_in_byte != -1 \ and type_info[3] != Setup.lexatom.size_in_byte: error.log("\nBuffer element type ('--bet' or '--buffer-element-type') was set to '%s'.\n" \ % Setup.lexatom.type \ + "It is well known to be of size %s[byte]. However, the buffer element size\n" \ % type_info[3] \ + "('-b' or '--buffer-element-type') was specified as '%s'.\n\n" \ % Setup.lexatom.size_in_byte \ + "Quex can continue, but the result is questionable.\n", \ DontExitF=True) if Setup.extern_token_id_specification: if len(Setup.extern_token_id_specification) > 3: error.log( "Option '--foreign-token-id-file' received > 3 followers.\n" "Found: %s" % str(Setup.extern_token_id_specification)[1:-1]) if len(Setup.extern_token_id_specification) > 1: Setup.token_id_foreign_definition_file_region_begin_re = \ __compile_regular_expression(Setup.extern_token_id_specification[1], "token id region begin") if len(Setup.extern_token_id_specification) > 2: Setup.token_id_foreign_definition_file_region_end_re = \ __compile_regular_expression(Setup.extern_token_id_specification[2], "token id region end") Setup.extern_token_id_file = \ Setup.extern_token_id_specification[0] token_id_file_parse(Setup.extern_token_id_file) # AFTER: Setup.extern_token_id_file !!! Setup.prepare_output_directory() if Setup.language not in ["DOT"]: Setup.prepare_all_file_names() # (*) Compression Types compression_type_list = [] for name, ctype in [ ("compression_template_f", E_Compression.TEMPLATE), ("compression_template_uniform_f", E_Compression.TEMPLATE_UNIFORM), ("compression_path_f", E_Compression.PATH), ("compression_path_uniform_f", E_Compression.PATH_UNIFORM) ]: if command_line_args_defined(command_line, name): compression_type_list.append( (command_line_arg_position(name), ctype)) compression_type_list.sort(key=itemgetter(0)) Setup.compression_type_list = map(lambda x: x[1], compression_type_list) validation.do(Setup, command_line, argv) # (*) return Setup ___________________________________________________________________ return True
def parse_section(fh): global default_token_type_definition_triggered_by_mode_definition_f # NOTE: End of File is supposed to be reached when trying to read a new # section. Thus, the end-of-file catcher does not encompass the beginning. position = fh.tell() skip_whitespace(fh) word = read_identifier(fh, OnMissingStr="Missing section title") error.verify_word_in_list(word, blackboard.all_section_title_list, "Unknown quex section '%s'" % word, fh) try: # (*) determine what is defined # # -- 'mode { ... }' => define a mode # -- 'start = ...;' => define the name of the initial mode # -- 'header { ... }' => define code that is to be pasted on top # of the engine (e.g. "#include<...>") # -- 'body { ... }' => define code that is to be pasted in the class' body # of the engine (e.g. "public: int my_member;") # -- 'init { ... }' => define code that is to be pasted in the class' constructors # of the engine (e.g. "my_member = -1;") # -- 'define { ... }' => define patterns shorthands such as IDENTIFIER for [a-z]+ # -- 'repeated_token_id = QUEX_TKN_ ...;' => enables token repetition, defines # the token id to be repeated. # -- 'token { ... }' => define token ids # -- 'token_type { ... }' => define a customized token type # if word in blackboard.fragment_db.keys(): element_name = blackboard.fragment_db[word] fragment = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False) blackboard.__dict__[element_name] = fragment return elif word == "start": mode_name = parse_identifier_assignment(fh) if mode_name == "": error.log("Missing mode_name after 'start ='", fh) elif not blackboard.initial_mode.sr.is_void(): error.log("start mode defined more than once!", fh, DontExitF=True) error.log("previously defined here", blackboard.initial_mode.sr) blackboard.initial_mode = CodeUser(mode_name, SourceRef.from_FileHandle(fh)) return elif word == "repeated_token": blackboard.token_repetition_token_id_list = parse_token_id_definitions(fh, NamesOnlyF=True) for token_name in blackboard.token_repetition_token_id_list: error.verify_word_in_list(token_name[len(Setup.token_id_prefix):], blackboard.token_id_db.keys(), "Token ID '%s' not yet defined." % token_name, fh, ExitF=False, SuppressCode=NotificationDB.warning_repeated_token_not_yet_defined) return elif word == "define": parse_pattern_name_definitions(fh) return elif word == "token": if Setup.token_id_foreign_definition: error.log("Token id file '%s' has been specified.\n" \ % Setup.token_id_foreign_definition_file \ + "All token ids must be specified there. Section 'token'\n" \ + "is not allowed.", fh) parse_token_id_definitions(fh) return elif word == "token_type": if Setup.token_class_file != "": error.log("Section 'token_type' is intended to generate a token class.\n" \ + "However, the manually written token class file '%s'" \ % repr(Setup.token_class_file) \ + "has been specified on the command line.", fh) if blackboard.token_type_definition is None: blackboard.token_type_definition = token_type.parse(fh) return # Error case: if default_token_type_definition_triggered_by_mode_definition_f: error.log("Section 'token_type' must appear before first mode definition.", fh) else: error.log("Section 'token_type' has been defined twice.", fh, DontExitF=True) error.log("Previously defined here.", blackboard.token_type_definition.sr) return elif word == "mode": # When the first mode is parsed then a token_type definition must be # present. If not, the default token type definition is considered. if blackboard.token_type_definition is None: parse_default_token_definition() default_token_type_definition_triggered_by_mode_definition_f = True mode.parse(fh) return else: # This case should have been caught by the 'verify_word_in_list' function assert False except EndOfStreamException: fh.seek(position) error.error_eof(word, fh)
def read_character_code(fh): # NOTE: This function is tested with the regeression test for feature request 2251359. # See directory $QUEX_PATH/TEST/2251359. pos = fh.tell() start = fh.read(1) if start == "": fh.seek(pos) return -1 elif start == "'": # read an utf-8 char an get the token-id # Example: '+' if check(fh, "\\"): # snap_backslashed_character throws an exception if 'backslashed char' is nonsense. character_code = snap_backslashed_character.do( fh, ReducedSetOfBackslashedCharactersF=True) else: character_code = __read_one_utf8_code_from_stream(fh) if character_code is None: error.log( "Missing utf8-character for definition of character code by character.", fh) elif fh.read(1) != '\'': error.log( "Missing closing ' for definition of character code by character.", fh) return character_code if start == "U": if fh.read(1) != "C": fh.seek(pos) return -1 # read Unicode Name # Example: UC MATHEMATICAL_MONOSPACE_DIGIT_FIVE skip_whitespace(fh) ucs_name = __read_token_identifier(fh) if ucs_name == "": fh.seek(pos) return -1 # Get the character set related to the given name. Note, the size of the set # is supposed to be one. character_code = ucs_property_db.get_character_set("Name", ucs_name) if type(character_code) in [str, unicode]: error.verify_word_in_list( ucs_name, ucs_property_db["Name"].code_point_db, "The string %s\ndoes not identify a known unicode character." % ucs_name, fh) elif type(character_code) not in [int, long]: error.log( "%s relates to more than one character in unicode database." % ucs_name, fh) return character_code fh.seek(pos) character_code = read_integer(fh) if character_code is not None: return character_code # Try to interpret it as something else ... fh.seek(pos) return -1
def __create_token_sender_by_token_name(fh, TokenName): assert type(TokenName) in [str, unicode] # Enter token_id into database, if it is not yet defined. token_id_db_verify_or_enter_token_id(fh, TokenName) # Parse the token argument list argument_list = __parse_function_argument_list(fh, TokenName) # Create the token sender explicit_member_names_f = False for arg in argument_list: if arg.find("=") != -1: explicit_member_names_f = True assert blackboard.token_type_definition is not None, \ "A valid token_type_definition must have been parsed at this point." if not explicit_member_names_f: # There are only two allowed cases for implicit token member names: # QUEX_TKN_XYZ(Lexeme) --> call take_text(Lexeme, LexemeEnd) # QUEX_TKN_XYZ(Begin, End) --> call to take_text(Begin, End) if len(argument_list) == 2: return "QUEX_NAME_TOKEN(take_text)(self_write_token_p(), &self, (%s), (%s));\n" % \ (argument_list[0], argument_list[1]) + \ "self_send(%s);\n" % (TokenName) elif len(argument_list) == 1: if argument_list[0] == "Lexeme": return "QUEX_NAME_TOKEN(take_text)(self_write_token_p(), &self, self.buffer._lexeme_start_p, self.buffer._input_p);\n" \ "self_send(%s);\n" % (TokenName) elif argument_list[0] == "LexemeNull": return "QUEX_NAME_TOKEN(take_text)(self_write_token_p(), &self, LexemeNull, LexemeNull);\n" \ "self_send(%s);\n" % (TokenName) else: error.log("If one unnamed argument is specified it must be 'Lexeme'\n" + \ "or 'LexemeNull'. Found '%s'.\n" % argument_list[0] + \ "To cut parts of the lexeme, please, use the 2 argument sender, e.g.\n" + \ "QUEX_TKN_MY_ID(Lexeme + 1, LexemeEnd - 2);\n" + \ "Alternatively, use named parameters such as 'number=...'.", fh) elif len(argument_list) == 0: return "self_send(%s);\n" % TokenName else: error.log("Since 0.49.1, there are only the following brief token senders that can take\n" "unnamed token arguments:\n" " one argument: 'Lexeme' => token.take_text(..., LexemeBegin, LexemeEnd);\n" " two arguments: Begin, End => token.take_text(..., Begin, End);\n" + "Found: " + repr(argument_list)[1:-1] + ".", fh) # Returned from Function if implicit member names member_value_pairs = map(lambda x: x.split("="), argument_list) txt = "" for member, value in member_value_pairs: if value == "": error.log("One explicit argument name mentioned requires all arguments to\n" + \ "be mentioned explicitly. Value '%s' mentioned without argument.\n" \ % member, fh) if Setup.token_class_file != "": error.log("Member assignments in brief token senders are inadmissible\n" + \ "with manually written token classes. User provided file '%s'.\n" % Setup.token_class_file + \ "Found member assignment: '%s' = '%s'." % (member, value), fh) else: member_name = member.strip() error.verify_word_in_list(member_name, blackboard.token_type_definition.get_member_db(), "No member: '%s' in token type description." % member_name, fh) idx = value.find("Lexeme") if idx != -1: if idx != 0 and value[idx-1] == "(": pass else: error.log("Assignment of token member '%s' with 'Lexeme' directly being involved. The\n" % member_name + "'Lexeme' points into the text buffer and it is not owned by the token object.\n" "\n" "Proposals:\n\n" " (1) Use '(Lexeme)', i.e. surround 'Lexeme' by brackets to indicate\n" " that you are aware of the danger. Do this, if at the end of the\n" " process, the member can be assumed to relate to an object that\n" " is not directly dependent anymore on 'Lexeme'. This is particularly\n" " true if the member is of type 'std::string'. Its constructor\n" " creates a copy of the zero terminated string.\n\n" " (2) Use token senders without named arguments, for example\n" " \"%s(Lexeme+1, LexemeEnd-2)\"\n" % TokenName + " \"%s(Lexeme)\"\n" % TokenName + " These token senders create a copy of the lexeme and let the token\n" " own it.", fh) access = blackboard.token_type_definition.get_member_access(member_name) txt += "self_write_token_p()->%s = %s;\n" % (access, value.strip()) # Box the token, stamp it with an id and 'send' it txt += "self_send(%s);\n" % TokenName return txt
def prepare(command_line, argv): """RETURN: True, if process needs to be started. False, if job is done. """ global Setup # (*) Classes and their namespace __setup_analyzer_class(Setup) __setup_token_class(Setup) __setup_token_id_prefix(Setup) __setup_lexeme_null(Setup) # Requires 'token_class_name_space' # (*) Output programming language Setup.language = Setup.language.upper() error.verify_word_in_list(Setup.language, output_language_db.keys(), "Programming language '%s' is not supported." % Setup.language) Setup.language_db = output_language_db[Setup.language] Setup.extension_db = global_extension_db[Setup.language] # Is the output file naming scheme provided by the extension database # (Validation must happen immediately) if Setup.extension_db.has_key(Setup.output_file_naming_scheme) == False: error.log("File extension scheme '%s' is not provided for language '%s'.\n" \ % (Setup.output_file_naming_scheme, Setup.language) + \ "Available schemes are: %s." % repr(Setup.extension_db.keys())[1:-1]) if Setup.buffer_byte_order == "<system>": Setup.buffer_byte_order = sys.byteorder Setup.byte_order_is_that_of_current_system_f = True else: Setup.byte_order_is_that_of_current_system_f = False Setup.buffer_element_specification_prepare() if Setup.buffer_codec_name == "utf8": module = utf8_state_split elif Setup.buffer_codec_name == "utf16": module = utf16_state_split else: module = None Setup.buffer_codec_prepare(Setup.buffer_codec_name, Setup.buffer_codec_file, module) # AFTER: Setup.buffer_codec_prepare() !!! if Setup.language not in ["DOT"]: prepare_file_names(Setup) type_info = global_character_type_db.get(Setup.buffer_element_type) if type_info is not None and len(type_info) >= 4 \ and type_info[3] != -1 and Setup.buffer_element_size != -1 \ and type_info[3] != Setup.buffer_element_size: error.log("\nBuffer element type ('--bet' or '--buffer-element-type') was set to '%s'.\n" \ % Setup.buffer_element_type \ + "It is well known to be of size %s[byte]. However, the buffer element size\n" \ % type_info[3] \ + "('-b' or '--buffer-element-type') was specified as '%s'.\n\n" \ % Setup.buffer_element_size \ + "Quex can continue, but the result is questionable.\n", \ DontExitF=True) Setup.converter_f = False if Setup.converter_iconv_f or Setup.converter_icu_f or len(Setup.converter_user_new_func) != 0: Setup.converter_f = True # The only case where no converter helper is required is where ASCII # (Unicode restricted to [0, FF] is used. Setup.converter_helper_required_f = True if Setup.converter_f == False and Setup.buffer_element_size == 1 and Setup.buffer_codec.name == "unicode": Setup.converter_helper_required_f = False validation.do(Setup, command_line, argv) if Setup.converter_ucs_coding_name == "": if global_character_type_db.has_key(Setup.buffer_element_type): if Setup.buffer_byte_order == "little": index = 1 else: index = 2 Setup.converter_ucs_coding_name = global_character_type_db[Setup.buffer_element_type][index] if len(Setup.token_id_foreign_definition) != 0: if len(Setup.token_id_foreign_definition) > 3: error.log("Option '--foreign-token-id-file' received > 3 followers.\n" "Found: %s" % str(Setup.token_id_foreign_definition)[1:-1]) if len(Setup.token_id_foreign_definition) > 1: Setup.token_id_foreign_definition_file_region_begin_re = \ __compile_regular_expression(Setup.token_id_foreign_definition[1], "token id region begin") if len(Setup.token_id_foreign_definition) > 2: Setup.token_id_foreign_definition_file_region_end_re = \ __compile_regular_expression(Setup.token_id_foreign_definition[2], "token id region end") Setup.token_id_foreign_definition_file = \ Setup.token_id_foreign_definition[0] CommentDelimiterList = [["//", "\n"], ["/*", "*/"]] token_id_file_parse(Setup.token_id_foreign_definition_file, CommentDelimiterList) # (*) Compression Types compression_type_list = [] for name, ctype in [("compression_template_f", E_Compression.TEMPLATE), ("compression_template_uniform_f", E_Compression.TEMPLATE_UNIFORM), ("compression_path_f", E_Compression.PATH), ("compression_path_uniform_f", E_Compression.PATH_UNIFORM)]: if command_line_args_defined(command_line, name): compression_type_list.append((command_line_arg_position(name), ctype)) compression_type_list.sort(key=itemgetter(0)) Setup.compression_type_list = map(lambda x: x[1], compression_type_list) # (*) return Setup ___________________________________________________________________ return True
def prepare(command_line, argv): """RETURN: True, if process needs to be started. False, if job is done. """ global Setup # (*) Classes and their namespace __setup_analyzer_class(Setup) __setup_token_class(Setup) __setup_token_id_prefix(Setup) __setup_lexeme_null(Setup) # Requires 'token_class_name_space' # (*) Output programming language Setup.language = Setup.language.upper() error.verify_word_in_list( Setup.language, output_language_db.keys(), "Programming language '%s' is not supported." % Setup.language) Setup.language_db = output_language_db[Setup.language] Setup.extension_db = global_extension_db[Setup.language] # Is the output file naming scheme provided by the extension database # (Validation must happen immediately) if Setup.extension_db.has_key(Setup.output_file_naming_scheme) == False: error.log("File extension scheme '%s' is not provided for language '%s'.\n" \ % (Setup.output_file_naming_scheme, Setup.language) + \ "Available schemes are: %s." % repr(Setup.extension_db.keys())[1:-1]) if Setup.buffer_byte_order == "<system>": Setup.buffer_byte_order = sys.byteorder Setup.byte_order_is_that_of_current_system_f = True else: Setup.byte_order_is_that_of_current_system_f = False Setup.buffer_element_specification_prepare() if Setup.buffer_codec_name == "utf8": module = utf8_state_split elif Setup.buffer_codec_name == "utf16": module = utf16_state_split else: module = None Setup.buffer_codec_prepare(Setup.buffer_codec_name, Setup.buffer_codec_file, module) # AFTER: Setup.buffer_codec_prepare() !!! if Setup.language not in ["DOT"]: prepare_file_names(Setup) type_info = global_character_type_db.get(Setup.buffer_element_type) if type_info is not None and len(type_info) >= 4 \ and type_info[3] != -1 and Setup.buffer_element_size != -1 \ and type_info[3] != Setup.buffer_element_size: error.log("\nBuffer element type ('--bet' or '--buffer-element-type') was set to '%s'.\n" \ % Setup.buffer_element_type \ + "It is well known to be of size %s[byte]. However, the buffer element size\n" \ % type_info[3] \ + "('-b' or '--buffer-element-type') was specified as '%s'.\n\n" \ % Setup.buffer_element_size \ + "Quex can continue, but the result is questionable.\n", \ DontExitF=True) Setup.converter_f = False if Setup.converter_iconv_f or Setup.converter_icu_f or len( Setup.converter_user_new_func) != 0: Setup.converter_f = True # The only case where no converter helper is required is where ASCII # (Unicode restricted to [0, FF] is used. Setup.converter_helper_required_f = True if Setup.converter_f == False and Setup.buffer_element_size == 1 and Setup.buffer_codec.name == "unicode": Setup.converter_helper_required_f = False validation.do(Setup, command_line, argv) if Setup.converter_ucs_coding_name == "": if global_character_type_db.has_key(Setup.buffer_element_type): if Setup.buffer_byte_order == "little": index = 1 else: index = 2 Setup.converter_ucs_coding_name = global_character_type_db[ Setup.buffer_element_type][index] if len(Setup.token_id_foreign_definition) != 0: if len(Setup.token_id_foreign_definition) > 3: error.log( "Option '--foreign-token-id-file' received > 3 followers.\n" "Found: %s" % str(Setup.token_id_foreign_definition)[1:-1]) if len(Setup.token_id_foreign_definition) > 1: Setup.token_id_foreign_definition_file_region_begin_re = \ __compile_regular_expression(Setup.token_id_foreign_definition[1], "token id region begin") if len(Setup.token_id_foreign_definition) > 2: Setup.token_id_foreign_definition_file_region_end_re = \ __compile_regular_expression(Setup.token_id_foreign_definition[2], "token id region end") Setup.token_id_foreign_definition_file = \ Setup.token_id_foreign_definition[0] CommentDelimiterList = [["//", "\n"], ["/*", "*/"]] token_id_file_parse(Setup.token_id_foreign_definition_file, CommentDelimiterList) # (*) Compression Types compression_type_list = [] for name, ctype in [ ("compression_template_f", E_Compression.TEMPLATE), ("compression_template_uniform_f", E_Compression.TEMPLATE_UNIFORM), ("compression_path_f", E_Compression.PATH), ("compression_path_uniform_f", E_Compression.PATH_UNIFORM) ]: if command_line_args_defined(command_line, name): compression_type_list.append( (command_line_arg_position(name), ctype)) compression_type_list.sort(key=itemgetter(0)) Setup.compression_type_list = map(lambda x: x[1], compression_type_list) # (*) return Setup ___________________________________________________________________ return True
def parse_section(fh): global default_token_type_definition_triggered_by_mode_definition_f # NOTE: End of File is supposed to be reached when trying to read a new # section. Thus, the end-of-file catcher does not encompass the beginning. position = fh.tell() skip_whitespace(fh) word = read_identifier(fh, OnMissingStr="Missing section title") error.verify_word_in_list(word, blackboard.all_section_title_list, "Unknown quex section '%s'" % word, fh) try: # (*) determine what is defined # # -- 'mode { ... }' => define a mode # -- 'start = ...;' => define the name of the initial mode # -- 'header { ... }' => define code that is to be pasted on top # of the engine (e.g. "#include<...>") # -- 'body { ... }' => define code that is to be pasted in the class' body # of the engine (e.g. "public: int my_member;") # -- 'init { ... }' => define code that is to be pasted in the class' constructors # of the engine (e.g. "my_member = -1;") # -- 'define { ... }' => define patterns shorthands such as IDENTIFIER for [a-z]+ # -- 'repeated_token_id = QUEX_TKN_ ...;' => enables token repetition, defines # the token id to be repeated. # -- 'token { ... }' => define token ids # -- 'token_type { ... }' => define a customized token type # if word in blackboard.fragment_db.keys(): element_name = blackboard.fragment_db[word] fragment = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False) blackboard.__dict__[element_name] = fragment return elif word == "start": mode_name = parse_identifier_assignment(fh) if mode_name == "": error.log("Missing mode_name after 'start ='", fh) elif not blackboard.initial_mode.sr.is_void(): error.log("start mode defined more than once!", fh, DontExitF=True) error.log("previously defined here", blackboard.initial_mode.sr) blackboard.initial_mode = CodeUser(mode_name, SourceRef.from_FileHandle(fh)) return elif word == "repeated_token": blackboard.token_repetition_token_id_list = parse_token_id_definitions( fh, NamesOnlyF=True) for token_name in blackboard.token_repetition_token_id_list: error.verify_word_in_list( token_name[len(Setup.token_id_prefix):], blackboard.token_id_db.keys(), "Token ID '%s' not yet defined." % token_name, fh, ExitF=False, SuppressCode=NotificationDB. warning_repeated_token_not_yet_defined) return elif word == "define": parse_pattern_name_definitions(fh) return elif word == "token": if Setup.token_id_foreign_definition: error.log("Token id file '%s' has been specified.\n" \ % Setup.token_id_foreign_definition_file \ + "All token ids must be specified there. Section 'token'\n" \ + "is not allowed.", fh) parse_token_id_definitions(fh) return elif word == "token_type": if Setup.token_class_file != "": error.log("Section 'token_type' is intended to generate a token class.\n" \ + "However, the manually written token class file '%s'" \ % repr(Setup.token_class_file) \ + "has been specified on the command line.", fh) if blackboard.token_type_definition is None: blackboard.token_type_definition = token_type.parse(fh) return # Error case: if default_token_type_definition_triggered_by_mode_definition_f: error.log( "Section 'token_type' must appear before first mode definition.", fh) else: error.log("Section 'token_type' has been defined twice.", fh, DontExitF=True) error.log("Previously defined here.", blackboard.token_type_definition.sr) return elif word == "mode": # When the first mode is parsed then a token_type definition must be # present. If not, the default token type definition is considered. if blackboard.token_type_definition is None: parse_default_token_definition() default_token_type_definition_triggered_by_mode_definition_f = True mode.parse(fh) return else: # This case should have been caught by the 'verify_word_in_list' function assert False except EndOfStreamException: fh.seek(position) error.error_eof(word, fh)
def __create_token_sender_by_token_name(fh, TokenName): assert type(TokenName) in [str, unicode] # Enter token_id into database, if it is not yet defined. token_id_db_verify_or_enter_token_id(fh, TokenName) # Parse the token argument list argument_list = __parse_function_argument_list(fh, TokenName) # Create the token sender explicit_member_names_f = False for arg in argument_list: if arg.find("=") != -1: explicit_member_names_f = True assert blackboard.token_type_definition is not None, \ "A valid token_type_definition must have been parsed at this point." if not explicit_member_names_f: # There are only two allowed cases for implicit token member names: # QUEX_TKN_XYZ(Lexeme) --> call take_text(Lexeme, LexemeEnd) # QUEX_TKN_XYZ(Begin, End) --> call to take_text(Begin, End) if len(argument_list) == 2: return "QUEX_NAME_TOKEN(take_text)(self_write_token_p(), &self, (%s), (%s));\n" % \ (argument_list[0], argument_list[1]) + \ "self_send(%s);\n" % (TokenName) elif len(argument_list) == 1: if argument_list[0] == "Lexeme": return "QUEX_NAME_TOKEN(take_text)(self_write_token_p(), &self, self.buffer._lexeme_start_p, self.buffer._input_p);\n" \ "self_send(%s);\n" % (TokenName) elif argument_list[0] == "LexemeNull": return "QUEX_NAME_TOKEN(take_text)(self_write_token_p(), &self, LexemeNull, LexemeNull);\n" \ "self_send(%s);\n" % (TokenName) else: error.log("If one unnamed argument is specified it must be 'Lexeme'\n" + \ "or 'LexemeNull'. Found '%s'.\n" % argument_list[0] + \ "To cut parts of the lexeme, please, use the 2 argument sender, e.g.\n" + \ "QUEX_TKN_MY_ID(Lexeme + 1, LexemeEnd - 2);\n" + \ "Alternatively, use named parameters such as 'number=...'.", fh) elif len(argument_list) == 0: return "self_send(%s);\n" % TokenName else: error.log( "Since 0.49.1, there are only the following brief token senders that can take\n" "unnamed token arguments:\n" " one argument: 'Lexeme' => token.take_text(..., LexemeBegin, LexemeEnd);\n" " two arguments: Begin, End => token.take_text(..., Begin, End);\n" + "Found: " + repr(argument_list)[1:-1] + ".", fh) # Returned from Function if implicit member names member_value_pairs = map(lambda x: x.split("="), argument_list) txt = "" for member, value in member_value_pairs: if value == "": error.log("One explicit argument name mentioned requires all arguments to\n" + \ "be mentioned explicitly. Value '%s' mentioned without argument.\n" \ % member, fh) if Setup.token_class_file != "": error.log("Member assignments in brief token senders are inadmissible\n" + \ "with manually written token classes. User provided file '%s'.\n" % Setup.token_class_file + \ "Found member assignment: '%s' = '%s'." % (member, value), fh) else: member_name = member.strip() error.verify_word_in_list( member_name, blackboard.token_type_definition.get_member_db(), "No member: '%s' in token type description." % member_name, fh) idx = value.find("Lexeme") if idx != -1: if idx != 0 and value[idx - 1] == "(": pass else: error.log( "Assignment of token member '%s' with 'Lexeme' directly being involved. The\n" % member_name + "'Lexeme' points into the text buffer and it is not owned by the token object.\n" "\n" "Proposals:\n\n" " (1) Use '(Lexeme)', i.e. surround 'Lexeme' by brackets to indicate\n" " that you are aware of the danger. Do this, if at the end of the\n" " process, the member can be assumed to relate to an object that\n" " is not directly dependent anymore on 'Lexeme'. This is particularly\n" " true if the member is of type 'std::string'. Its constructor\n" " creates a copy of the zero terminated string.\n\n" " (2) Use token senders without named arguments, for example\n" " \"%s(Lexeme+1, LexemeEnd-2)\"\n" % TokenName + " \"%s(Lexeme)\"\n" % TokenName + " These token senders create a copy of the lexeme and let the token\n" " own it.", fh) access = blackboard.token_type_definition.get_member_access( member_name) txt += "self_write_token_p()->%s = %s;\n" % (access, value.strip()) # Box the token, stamp it with an id and 'send' it txt += "self_send(%s);\n" % TokenName return txt
def __parse_section(fh, descriptor, already_defined_list): global token_type_code_fragment_db assert type(already_defined_list) == list SubsectionList = ["name", "file_name", "standard", "distinct", "union", "inheritable", "noid"] \ + token_type_code_fragment_db.keys() position = fh.tell() skip_whitespace(fh) word = read_identifier(fh) if word == "": fh.seek(position) if check(fh, "}"): fh.seek(position) return False error.log("Missing token_type section ('standard', 'distinct', or 'union').", fh) error.verify_word_in_list(word, SubsectionList, "Subsection '%s' not allowed in token_type section." % word, fh) if word == "name": if not check(fh, "="): error.log("Missing '=' in token_type 'name' specification.", fh) descriptor.class_name, descriptor.name_space, descriptor.class_name_safe = read_namespaced_name(fh, "token_type") if not check(fh, ";"): error.log("Missing terminating ';' in token_type 'name' specification.", fh) elif word == "inheritable": descriptor.open_for_derivation_f = True check_or_die(fh, ";") elif word == "noid": descriptor.token_contains_token_id_f = False; check_or_die(fh, ";") elif word == "file_name": if not check(fh, "="): error.log("Missing '=' in token_type 'file_name' specification.", fh) descriptor.set_file_name(read_until_letter(fh, ";")) if not check(fh, ";"): error.log("Missing terminating ';' in token_type 'file_name' specification.", fh) elif word in ["standard", "distinct", "union"]: if word == "standard": parse_standard_members(fh, word, descriptor, already_defined_list) elif word == "distinct": parse_distinct_members(fh, word, descriptor, already_defined_list) elif word == "union": parse_union_members(fh, word, descriptor, already_defined_list) if not check(fh, "}"): fh.seek(position) error.log("Missing closing '}' at end of token_type section '%s'." % word, fh); elif word in token_type_code_fragment_db.keys(): fragment = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False) descriptor.__dict__[word] = fragment else: assert False, "This code section section should not be reachable because 'word'\n" + \ "was checked to fit in one of the 'elif' cases." return True
def do(setup, command_line, argv): """Does a consistency check for setup and the command line. """ setup.output_directory = os.path.normpath(setup.output_directory) if setup.output_directory: # Check, if the output directory exists if os.access(setup.output_directory, os.F_OK) == False: error.log("The directory %s was specified for output, but does not exists." % setup.output_directory) if os.access(setup.output_directory, os.W_OK) == False: error.log("The directory %s was specified for output, but is not writeable." % setup.output_directory) # if the mode is '--language dot' => check character display options. if setup.character_display not in ["hex", "utf8"]: error.log("Character display must be either 'hex' or 'utf8'.\nFound: '%s'" % setup.character_display) # ensure that options are not specified twice for parameter, info in SETUP_INFO.items(): if type(info) != list: continue occurence_n = 0 for option in info[0]: occurence_n += argv.count(option) if occurence_n > 1 and info[1] not in (SetupParTypes.LIST, SetupParTypes.INT_LIST): error.log("Received more than one of the following options:\n" + \ "%s" % repr(info[0])[1:-1]) # (*) Check for 'Depraceted' Options ___________________________________________________ for name, info in DEPRECATED.items(): command_line_options = SETUP_INFO[name][0] comment = info[0] depreciated_since_version = info[1] for option in command_line_options: if command_line.search(option): error.log("Command line option '%s' is ignored.\n" % option + \ comment + "\n" + \ "Last version of Quex supporting this option is version %s. Please, visit\n" % \ depreciated_since_version + \ "http://quex.org for further information.") # (*) Check for 'Straying' Options ___________________________________________________ options = [] for key, info in SETUP_INFO.items(): if type(info) != list: continue if key in DEPRECATED: continue if info[1] is not None: options.extend(info[0]) options.sort(lambda a,b: cmp(a.replace("-",""), b.replace("-",""))) ufos = command_line.unidentified_options(options) if len(ufos) != 0: error.log("Unidentified option(s) = " + repr(ufos) + "\n" + \ __get_supported_command_line_option_description(options)) if setup.analyzer_derived_class_name != "" and \ setup.analyzer_derived_class_file == "": error.log("Specified derived class '%s' on command line, but it was not\n" % \ setup.analyzer_derived_class_name + \ "specified which file contains the definition of it.\n" + \ "use command line option '--derived-class-file'.\n") if setup.buffer_element_size not in [-1, 1, 2, 4]: error.log("The setting of '--buffer-element-size' (or '-b') can only be\n" "1, 2, or 4 (found %s)." % repr(setup.buffer_element_size)) if setup.buffer_byte_order not in ["<system>", "little", "big"]: error.log("Byte order (option --endian) must be 'little', 'big', or '<system>'.\n" + \ "Note, that this option is only interesting for cross plattform development.\n" + \ "By default, quex automatically chooses the endian type of your system.") # Manually written token class requires token class name to be specified if setup.token_class_file != "" and command_line.search("--token-class", "--tc") == False: error.log("The use of a manually written token class requires that the name of the class\n" "is specified on the command line via the '--token-class' option.") # Token queue if setup.token_policy != "queue" and command_line.search("--token-queue-size"): error.log("Option --token-queue-size determines a fixed token queue size. This makes\n" + \ "only sense in conjunction with '--token-policy queue'.\n") if setup.token_queue_size <= setup.token_queue_safety_border + 1: if setup.token_queue_size == setup.token_queue_safety_border: cmp_str = "equal to" else: cmp_str = "less than" error.log("Token queue size is %i is %s token queue safety border %i + 1.\n" % \ (setup.token_queue_size, cmp_str, setup.token_queue_safety_border) + "Set appropriate values with --token-queue-size and --token-queue-safety-border.") # Check that names are valid identifiers if len(setup.token_id_prefix_plain) != 0: __check_identifier(setup, "token_id_prefix_plain", "Token prefix") __check_identifier(setup, "analyzer_class_name", "Engine name") if setup.analyzer_derived_class_name != "": __check_identifier(setup, "analyzer_derived_class_name", "Derived class name") __check_file_name(setup, "token_class_file", "file containing token class definition") __check_file_name(setup, "analyzer_derived_class_file", "file containing user derived lexer class") __check_file_name(setup, "token_id_foreign_definition_file", "file containing user token ids", 0, CommandLineOption=SETUP_INFO["token_id_foreign_definition"][0]) __check_file_name(setup, "input_mode_files", "quex source file") # Check that not more than one converter is specified converter_n = 0 if setup.converter_iconv_f: converter_n += 1 if setup.converter_icu_f: converter_n += 1 if len(setup.converter_user_new_func) != 0: converter_n += 1 if converter_n > 1: error.log("More than one character converter has been specified. Note, that the\n" + \ "options '--icu', '--iconv', and '--converter-new' (or '--cn') are\n" + \ "to be used mutually exclusively.") if converter_n == 1 and setup.buffer_codec.name != "unicode": # If the buffer codec is other than unicode, then no converter shall # be used to fill the buffer. Instead, the engine is transformed, so # that it works directly on the codec. error.log("An engine that is to be generated for a specific codec cannot rely\n" + \ "on converters. Do no use '--codec' together with '--icu', '--iconv', or\n" + \ "`--converter-new`.") # If a converter has been specified and no bytes-element-size has been specified, # it defaults to '1 byte' which is most likely not what is desired for unicode. if converter_n == 1 \ and setup.buffer_element_size == 1 \ and not command_line_args_defined(command_line, "buffer_element_size") \ and not command_line_args_defined(command_line, "buffer_element_type"): error.log("A converter has been specified, but the default buffer element size\n" + \ "is left to 1 byte. Consider %s or %s." \ % (command_line_args_string("buffer_element_size"), command_line_args_string("buffer_element_type"))) # If a user defined type is specified for 'engine character type' and # a converter, then the name of the target type must be specified explicitly. if setup.buffer_element_type != "" \ and not global_character_type_db.has_key(setup.buffer_element_type) \ and setup.converter_ucs_coding_name == "" \ and converter_n != 0: tc = setup.buffer_element_type error.log("A character code converter has been specified. It is supposed to convert\n" + \ "incoming data into an internal buffer of unicode characters. The size of\n" + \ "each character is determined by '%s' which is a user defined type.\n" % tc + \ "\n" + \ "Quex cannot determine automatically the name that the converter requires\n" + \ "to produce unicode characters for type '%s'. It must be specified by the\n" % tc + \ "command line option %s." \ % command_line_args_string("converter_ucs_coding_name")) # Token transmission policy token_policy_list = ["queue", "single", "users_token", "users_queue"] if setup.token_policy not in token_policy_list: error.log("Token policy '%s' not supported. Use one of the following:\n" % setup.token_policy + \ repr(token_policy_list)[1:-1]) elif setup.token_policy == "users_token": error.log("Token policy 'users_queue' has be deprecated since 0.49.1. Use\n" "equivalent policy 'single'.") elif setup.token_policy == "users_queue": error.log("Token policy 'users_queue' has be deprecated since 0.49.1\n") # Internal engine character encoding def __codec_vs_buffer_element_size(CodecName, RequiredBufferElementSize): if setup.buffer_codec.name != CodecName: return elif setup.buffer_element_size == RequiredBufferElementSize: return if setup.buffer_element_size == -1: msg_str = "undetermined (found type '%s')" % setup.buffer_element_type else: msg_str = "is not %i (found %i)" % (RequiredBufferElementSize, setup.buffer_element_size) error.log("Using codec '%s' while buffer element size %s.\n" % (CodecName, msg_str) + "Consult command line argument %s" \ % command_line_args_string("buffer_element_size")) if setup.buffer_codec.name != "unicode": if not setup.buffer_codec_file: error.verify_word_in_list(setup.buffer_codec_name, codec_db.get_supported_codec_list() + ["utf8", "utf16"], "Codec '%s' is not supported." % setup.buffer_codec.name) __codec_vs_buffer_element_size("utf8", 1) __codec_vs_buffer_element_size("utf16", 2) if setup.external_lexeme_null_object and setup.token_class_only_f: error.log("Specifying an external lexeme null object signalizes an\n" "external token class implementation. The 'token class only\n" "flag' generates a token class considered to be externally\n" "shared. Both flags are mutually exclusive.") if setup.string_accumulator_f: error_n = NotificationDB.warning_on_no_token_class_take_text if error_n in setup.suppressed_notification_list: error.warning("The warning upon missing 'take_text' in token type definition is de-\n" + "activated by '--suppress %i'. This is dangerous, if there is a string\n" % error_n + "accumulator. May be, use '--no-string-accumulator'.", -1, SuppressCode=NotificationDB.warning_on_no_warning_on_missing_take_text)