def parse(fh): descriptor = TokenTypeDescriptorCore() if not check(fh, "{"): error.log("Missing opening '{' at begin of token_type definition", fh) already_defined_list = [] position = fh.tell() sr_begin = SourceRef.from_FileHandle(fh) result = True while result == True: try: # x = fh.tell(); fh.seek(x) result = parse_section(fh, descriptor, already_defined_list) except EndOfStreamException: fh.seek(position) error.error_eof("token_type", fh) if not check(fh, "}"): fh.seek(position) error.log("Missing closing '}' at end of token_type definition.", fh); result = TokenTypeDescriptor(descriptor, sr_begin) if len(result.get_member_db()) == 0 \ and result.class_name == "Token" \ and result.token_id_type.sr.is_void() \ and result.column_number_type.sr.is_void() \ and result.line_number_type.sr.is_void(): error.log("Section 'token_type' does not define any members, does not\n" + \ "modify any standard member types, nor does it define a class\n" + \ "different from 'Token'.", fh) result.consistency_check() return result
def __parse_element(new_mode, fh): """Returns: False, if a closing '}' has been found. True, else. """ position = fh.tell() try: description = "pattern or event handler" skip_whitespace(fh) # NOTE: Do not use 'read_word' since we need to continue directly after # whitespace, if a regular expression is to be parsed. position = fh.tell() word = read_until_whitespace(fh) if word == "}": return False # -- check for 'on_entry', 'on_exit', ... if __parse_event(new_mode, fh, word): return True fh.seek(position) description = "start of mode element: regular expression" pattern = regular_expression.parse(fh) pattern.set_source_reference(SourceRef.from_FileHandle(fh, new_mode.name)) position = fh.tell() description = "start of mode element: code fragment for '%s'" % pattern.pattern_string() __parse_action(new_mode, fh, pattern.pattern_string(), pattern) except EndOfStreamException: fh.seek(position) error.error_eof(description, fh) return True
def __parse_element(new_mode, fh): """Returns: False, if a closing '}' has been found. True, else. """ position = fh.tell() try: description = "pattern or event handler" skip_whitespace(fh) # NOTE: Do not use 'read_word' since we need to continue directly after # whitespace, if a regular expression is to be parsed. position = fh.tell() word = read_until_whitespace(fh) if word == "}": return False # -- check for 'on_entry', 'on_exit', ... if __parse_event(new_mode, fh, word): return True fh.seek(position) description = "start of mode element: regular expression" pattern = regular_expression.parse(fh) pattern.set_source_reference(SourceRef.from_FileHandle(fh, new_mode.name)) position = fh.tell() description = "start of mode element: code fragment for '%s'" % pattern.pattern_string() __parse_action(new_mode, fh, pattern.pattern_string(), pattern) except EndOfStreamException: fh.seek(position) error.error_eof(description, fh) return True
def __parse_brief_token_sender(fh, ContinueF): # shorthand for { self.send(TKN_SOMETHING); QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN(); } position = fh.tell() try: skip_whitespace(fh) position = fh.tell() code = __parse_token_id_specification_by_character_code(fh) if code != -1: code = __create_token_sender_by_character_code(fh, code) else: skip_whitespace(fh) identifier = __read_token_identifier(fh) skip_whitespace(fh) if identifier in ["GOTO", "GOSUB", "GOUP"]: code = __create_mode_transition_and_token_sender( fh, identifier) else: code = __create_token_sender_by_token_name(fh, identifier) check_or_die(fh, ";") if len(code) != 0: if ContinueF: code += "QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN();\n" return CodeUser(code, SourceRef.from_FileHandle(fh)) else: return None except EndOfStreamException: fh.seek(position) error.error_eof("token", fh)
def __parse_brief_token_sender(fh, ContinueF): # shorthand for { self.send(TKN_SOMETHING); QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN(); } position = fh.tell() try: skip_whitespace(fh) position = fh.tell() code = __parse_token_id_specification_by_character_code(fh) if code != -1: code = __create_token_sender_by_character_code(fh, code) else: skip_whitespace(fh) identifier = __read_token_identifier(fh) skip_whitespace(fh) if identifier in ["GOTO", "GOSUB", "GOUP"]: code = __create_mode_transition_and_token_sender(fh, identifier) else: code = __create_token_sender_by_token_name(fh, identifier) check_or_die(fh, ";") if len(code) != 0: if ContinueF: code += "QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN();\n" return CodeUser(code, SourceRef.from_FileHandle(fh)) else: return None except EndOfStreamException: fh.seek(position) error.error_eof("token", fh)
def parse_section(fh, descriptor, already_defined_list): pos = fh.tell() try: return __parse_section(fh, descriptor, already_defined_list) except EndOfStreamException: fh.seek(pos) error.error_eof("token_type", fh)
def parse_standard_members(fh, section_name, descriptor, already_defined_list): if not check(fh, "{"): error.log("Missing opening '{' at begin of token_type section '%s'." % section_name, fh); position = fh.tell() while 1 + 1 == 2: try: result = parse_variable_definition(fh) except EndOfStreamException: fh.seek(position) error.error_eof("standard", fh) if result is None: return type_code_fragment, name = result[0], result[1] __validate_definition(type_code_fragment, name, already_defined_list, StandardMembersF=True) if name == "id": descriptor.token_id_type = type_code_fragment elif name == "column_number": descriptor.column_number_type = type_code_fragment elif name == "line_number": descriptor.line_number_type = type_code_fragment else: assert False # This should have been caught by the variable parser function already_defined_list.append([name, type_code_fragment])
def __parse_element(new_mode, fh): """Returns: False, if a closing '}' has been found. True, else. """ position = fh.tell() try: description = "pattern or event handler" skip_whitespace(fh) # NOTE: Do not use 'read_word' since we need to continue directly after # whitespace, if a regular expression is to be parsed. position = fh.tell() identifier = read_identifier(fh) if identifier == "keyword_list": return __parse_keyword_list(new_mode, fh) elif similarity.get(identifier, ["keyword_list", "key words"]) != -1: error.warning( "'%s' is similar to keyword 'keyword_list'.\n" "For clarity, use quotes." % identifier, fh) elif identifier == "brief": return __parse_brief(new_mode, fh) elif similarity.get(identifier, ["brief", "briefing", "briefly"]) != -1: error.warning( "'%s' is similar to keyword 'brief'.\n" "For clarity, use quotes." % identifier, fh) fh.seek(position) word = read_until_whitespace(fh) if word == "}": return False # -- check for 'on_entry', 'on_exit', ... elif __parse_event(new_mode, fh, word): return True fh.seek(position) description = "start of mode element: regular expression" pattern = regular_expression.parse(fh) pattern.set_source_reference( SourceRef.from_FileHandle(fh, new_mode.name)) position = fh.tell() description = "start of mode element: code fragment for '%s'" % pattern.pattern_string( ) __parse_action(new_mode, fh, pattern.pattern_string(), pattern) except EndOfStreamException: fh.seek(position) error.error_eof(description, fh) return True
def __parse_option_list(new_mode, fh): position = fh.tell() try: # ':' => inherited modes/option_db follow skip_whitespace(fh) __parse_base_mode_list(fh, new_mode) while mode_option.parse(fh, new_mode): pass except EndOfStreamException: fh.seek(position) error.error_eof("mode '%s'." % new_mode.name, fh)
def __parse_option_list(new_mode, fh): position = fh.tell() try: # ':' => inherited modes/option_db follow skip_whitespace(fh) __parse_base_mode_list(fh, new_mode) while mode_option.parse(fh, new_mode): pass except EndOfStreamException: fh.seek(position) error.error_eof("mode '%s'." % new_mode.name, fh)
def parse_variable_definition_list(fh, SectionName, already_defined_list, GroupF=False): position = fh.tell() db = OrderedDict() # See comment [MEMBER PACKAGING] while 1 + 1 == 2: try: result = parse_variable_definition( fh, GroupF=True, already_defined_list=already_defined_list) except EndOfStreamException: fh.seek(position) error.error_eof(SectionName, fh) if result is None: return db # The type_descriptor can be: # -- a UserCodeFragment with a string of the type # -- a dictionary that contains the combined variable definitions. type_descriptor = result[0] # If only one argument was returned it was a 'struct' that requires # an implicit definition of the struct that combines the variables. if len(result) == 1: name = "data_" + repr(data_name_index_counter_get()) else: name = result[1] db[name] = type_descriptor if len(result) == 1: assert isinstance(type_descriptor, OrderedDict) # In case of a 'combined' definition each variable needs to be validated. for sub_name, sub_type in type_descriptor.items(): __validate_definition(sub_type, sub_type, already_defined_list, StandardMembersF=False) already_defined_list.append([sub_name, sub_type]) else: assert type_descriptor.__class__ == CodeUser __validate_definition(type_descriptor, name, already_defined_list, StandardMembersF=False) already_defined_list.append([name, type_descriptor])
def __parse_action(new_mode, fh, pattern_str, pattern): position = fh.tell() try: skip_whitespace(fh) position = fh.tell() code = code_fragment.parse(fh, "regular expression", ErrorOnFailureF=False) if code is not None: assert isinstance(code, CodeUser), "Found: %s" % code.__class__ new_mode.add_pattern_action_pair(pattern, code, fh) return fh.seek(position) word = read_until_letter(fh, [";"]) if word == "PRIORITY-MARK": # This mark 'lowers' the priority of a pattern to the priority of the current # pattern index (important for inherited patterns, that have higher precedence). # The parser already constructed a state machine for the pattern that is to # be assigned a new priority. Since, this machine is not used, let us just # use its id. fh.seek(-1, 1) check_or_die(fh, ";", ". Since quex version 0.33.5 this is required.") new_mode.add_match_priority(pattern, fh) elif word == "DELETION": # This mark deletes any pattern that was inherited with the same 'name' fh.seek(-1, 1) check_or_die(fh, ";", ". Since quex version 0.33.5 this is required.") new_mode.add_match_deletion(pattern, fh) else: error.log("Missing token '{', 'PRIORITY-MARK', 'DELETION', or '=>' after '%s'.\n" % pattern_str + \ "found: '%s'. Note, that since quex version 0.33.5 it is required to add a ';'\n" % word + \ "to the commands PRIORITY-MARK and DELETION.", fh) except EndOfStreamException: fh.seek(position) error.error_eof("pattern action", fh)
def __parse_function_argument_list(fh, ReferenceName): argument_list = [] position = fh.tell() try: # Read argument list if check(fh, "(") == False: return [] text = "" while 1 + 1 == 2: tmp = fh.read(1) if tmp == ")": break elif tmp in ["(", "[", "{"]: closing_bracket = {"(": ")", "[": "]", "{": "}"}[tmp] text += tmp + read_until_closing_bracket( fh, tmp, closing_bracket) + closing_bracket elif tmp == "\"": text += tmp + read_until_closing_bracket( fh, "", "\"", IgnoreRegions=[]) + "\"" elif tmp == "'": text += tmp + read_until_closing_bracket( fh, "", "'", IgnoreRegions=[]) + "'" elif tmp == ",": argument_list.append(text) text = "" elif tmp == "": fh.seek(position) error.error_eof("argument list for %s" % ReferenceName, fh) else: text += tmp if text != "": argument_list.append(text) argument_list = map(lambda arg: arg.strip(), argument_list) argument_list = filter(lambda arg: arg != "", argument_list) return argument_list except EndOfStreamException: fh.seek(position) error.error_eof("token", fh)
def __parse_action(new_mode, fh, pattern_str, pattern): position = fh.tell() try: skip_whitespace(fh) position = fh.tell() code = code_fragment.parse(fh, "regular expression", ErrorOnFailureF=False) if code is not None: assert isinstance(code, CodeUser), "Found: %s" % code.__class__ new_mode.add_pattern_action_pair(pattern, code, fh) return fh.seek(position) word = read_until_letter(fh, [";"]) if word == "PRIORITY-MARK": # This mark 'lowers' the priority of a pattern to the priority of the current # pattern index (important for inherited patterns, that have higher precedence). # The parser already constructed a state machine for the pattern that is to # be assigned a new priority. Since, this machine is not used, let us just # use its id. fh.seek(-1, 1) check_or_die(fh, ";", ". Since quex version 0.33.5 this is required.") new_mode.add_match_priority(pattern, fh) elif word == "DELETION": # This mark deletes any pattern that was inherited with the same 'name' fh.seek(-1, 1) check_or_die(fh, ";", ". Since quex version 0.33.5 this is required.") new_mode.add_match_deletion(pattern, fh) else: error.log("Missing token '{', 'PRIORITY-MARK', 'DELETION', or '=>' after '%s'.\n" % pattern_str + \ "found: '%s'. Note, that since quex version 0.33.5 it is required to add a ';'\n" % word + \ "to the commands PRIORITY-MARK and DELETION.", fh) except EndOfStreamException: fh.seek(position) error.error_eof("pattern action", fh)
def __parse_function_argument_list(fh, ReferenceName): argument_list = [] position = fh.tell() try: # Read argument list if check(fh, "(") == False: return [] text = "" while 1 + 1 == 2: tmp = fh.read(1) if tmp == ")": break elif tmp in ["(", "[", "{"]: closing_bracket = {"(": ")", "[": "]", "{": "}"}[tmp] text += tmp + read_until_closing_bracket(fh, tmp, closing_bracket) + closing_bracket elif tmp == "\"": text += tmp + read_until_closing_bracket(fh, "", "\"", IgnoreRegions = []) + "\"" elif tmp == "'": text += tmp + read_until_closing_bracket(fh, "", "'", IgnoreRegions = []) + "'" elif tmp == ",": argument_list.append(text) text = "" elif tmp == "": fh.seek(position) error.error_eof("argument list for %s" % ReferenceName, fh) else: text += tmp if text != "": argument_list.append(text) argument_list = map(lambda arg: arg.strip(), argument_list) argument_list = filter(lambda arg: arg != "", argument_list) return argument_list except EndOfStreamException: fh.seek(position) error.error_eof("token", fh)
def __parse_brief_token_sender(fh): # shorthand for { self.send(TKN_SOMETHING); RETURN; } position = fh.tell() try: skip_whitespace(fh) position = fh.tell() code = __parse_token_id_specification_by_character_code(fh) if code != -1: code = __create_token_sender_by_character_code(fh, code) else: skip_whitespace(fh) identifier = __read_token_identifier(fh) skip_whitespace(fh) if identifier in ["GOTO", "GOSUB", "GOUP"]: code = __create_mode_transition_and_token_sender( fh, identifier) else: code = __create_token_sender_by_token_name(fh, identifier) check_or_die(fh, ";") if code: # IMPORTANT: For handlers 'on_end_of_stream' and 'on_failure', # => CONTINUE would be desastrous! # -- When a termination token is sent, no other token shall follow. # Return MUST be enforced => Do not allow CONTINUE! # -- When an 'on_failure' is detected allow immediate action of the # receiver. => Do not allow CONTINUE! code += "\n%s\n" % Lng.PURE_RETURN # Immediate RETURN after token sending return CodeUser(code, SourceRef.from_FileHandle(fh, BeginPos=position)) else: return None except EndOfStreamException: fh.seek(position) error.error_eof("token", fh)
def parse_section(fh): global default_token_type_definition_triggered_by_mode_definition_f # NOTE: End of File is supposed to be reached when trying to read a new # section. Thus, the end-of-file catcher does not encompass the beginning. position = fh.tell() skip_whitespace(fh) word = read_identifier(fh, OnMissingStr="Missing section title") error.verify_word_in_list(word, blackboard.all_section_title_list, "Unknown quex section '%s'" % word, fh) try: # (*) determine what is defined # # -- 'mode { ... }' => define a mode # -- 'start = ...;' => define the name of the initial mode # -- 'header { ... }' => define code that is to be pasted on top # of the engine (e.g. "#include<...>") # -- 'body { ... }' => define code that is to be pasted in the class' body # of the engine (e.g. "public: int my_member;") # -- 'init { ... }' => define code that is to be pasted in the class' constructors # of the engine (e.g. "my_member = -1;") # -- 'define { ... }' => define patterns shorthands such as IDENTIFIER for [a-z]+ # -- 'repeated_token_id = QUEX_TKN_ ...;' => enables token repetition, defines # the token id to be repeated. # -- 'token { ... }' => define token ids # -- 'token_type { ... }' => define a customized token type # if word in blackboard.fragment_db.keys(): element_name = blackboard.fragment_db[word] fragment = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False) blackboard.__dict__[element_name] = fragment return elif word == "start": mode_name = parse_identifier_assignment(fh) if mode_name == "": error.log("Missing mode_name after 'start ='", fh) elif not blackboard.initial_mode.sr.is_void(): error.log("start mode defined more than once!", fh, DontExitF=True) error.log("previously defined here", blackboard.initial_mode.sr) blackboard.initial_mode = CodeUser(mode_name, SourceRef.from_FileHandle(fh)) return elif word == "repeated_token": blackboard.token_repetition_token_id_list = parse_token_id_definitions( fh, NamesOnlyF=True) for token_name in blackboard.token_repetition_token_id_list: error.verify_word_in_list( token_name[len(Setup.token_id_prefix):], blackboard.token_id_db.keys(), "Token ID '%s' not yet defined." % token_name, fh, ExitF=False, SuppressCode=NotificationDB. warning_repeated_token_not_yet_defined) return elif word == "define": parse_pattern_name_definitions(fh) return elif word == "token": if Setup.token_id_foreign_definition: error.log("Token id file '%s' has been specified.\n" \ % Setup.token_id_foreign_definition_file \ + "All token ids must be specified there. Section 'token'\n" \ + "is not allowed.", fh) parse_token_id_definitions(fh) return elif word == "token_type": if Setup.token_class_file != "": error.log("Section 'token_type' is intended to generate a token class.\n" \ + "However, the manually written token class file '%s'" \ % repr(Setup.token_class_file) \ + "has been specified on the command line.", fh) if blackboard.token_type_definition is None: blackboard.token_type_definition = token_type.parse(fh) return # Error case: if default_token_type_definition_triggered_by_mode_definition_f: error.log( "Section 'token_type' must appear before first mode definition.", fh) else: error.log("Section 'token_type' has been defined twice.", fh, DontExitF=True) error.log("Previously defined here.", blackboard.token_type_definition.sr) return elif word == "mode": # When the first mode is parsed then a token_type definition must be # present. If not, the default token type definition is considered. if blackboard.token_type_definition is None: parse_default_token_definition() default_token_type_definition_triggered_by_mode_definition_f = True mode.parse(fh) return else: # This case should have been caught by the 'verify_word_in_list' function assert False except EndOfStreamException: fh.seek(position) error.error_eof(word, fh)
def parse_section(fh): global default_token_type_definition_triggered_by_mode_definition_f # NOTE: End of File is supposed to be reached when trying to read a new # section. Thus, the end-of-file catcher does not encompass the beginning. position = fh.tell() skip_whitespace(fh) word = read_identifier(fh, OnMissingStr="Missing section title") error.verify_word_in_list(word, blackboard.all_section_title_list, "Unknown quex section '%s'" % word, fh) try: # (*) determine what is defined # # -- 'mode { ... }' => define a mode # -- 'start = ...;' => define the name of the initial mode # -- 'header { ... }' => define code that is to be pasted on top # of the engine (e.g. "#include<...>") # -- 'body { ... }' => define code that is to be pasted in the class' body # of the engine (e.g. "public: int my_member;") # -- 'init { ... }' => define code that is to be pasted in the class' constructors # of the engine (e.g. "my_member = -1;") # -- 'define { ... }' => define patterns shorthands such as IDENTIFIER for [a-z]+ # -- 'repeated_token_id = QUEX_TKN_ ...;' => enables token repetition, defines # the token id to be repeated. # -- 'token { ... }' => define token ids # -- 'token_type { ... }' => define a customized token type # if word in blackboard.fragment_db.keys(): element_name = blackboard.fragment_db[word] fragment = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False) blackboard.__dict__[element_name] = fragment return elif word == "start": mode_name = parse_identifier_assignment(fh) if mode_name == "": error.log("Missing mode_name after 'start ='", fh) elif not blackboard.initial_mode.sr.is_void(): error.log("start mode defined more than once!", fh, DontExitF=True) error.log("previously defined here", blackboard.initial_mode.sr) blackboard.initial_mode = CodeUser(mode_name, SourceRef.from_FileHandle(fh)) return elif word == "repeated_token": blackboard.token_repetition_token_id_list = parse_token_id_definitions(fh, NamesOnlyF=True) for token_name in blackboard.token_repetition_token_id_list: error.verify_word_in_list(token_name[len(Setup.token_id_prefix):], blackboard.token_id_db.keys(), "Token ID '%s' not yet defined." % token_name, fh, ExitF=False, SuppressCode=NotificationDB.warning_repeated_token_not_yet_defined) return elif word == "define": parse_pattern_name_definitions(fh) return elif word == "token": if Setup.token_id_foreign_definition: error.log("Token id file '%s' has been specified.\n" \ % Setup.token_id_foreign_definition_file \ + "All token ids must be specified there. Section 'token'\n" \ + "is not allowed.", fh) parse_token_id_definitions(fh) return elif word == "token_type": if Setup.token_class_file != "": error.log("Section 'token_type' is intended to generate a token class.\n" \ + "However, the manually written token class file '%s'" \ % repr(Setup.token_class_file) \ + "has been specified on the command line.", fh) if blackboard.token_type_definition is None: blackboard.token_type_definition = token_type.parse(fh) return # Error case: if default_token_type_definition_triggered_by_mode_definition_f: error.log("Section 'token_type' must appear before first mode definition.", fh) else: error.log("Section 'token_type' has been defined twice.", fh, DontExitF=True) error.log("Previously defined here.", blackboard.token_type_definition.sr) return elif word == "mode": # When the first mode is parsed then a token_type definition must be # present. If not, the default token type definition is considered. if blackboard.token_type_definition is None: parse_default_token_definition() default_token_type_definition_triggered_by_mode_definition_f = True mode.parse(fh) return else: # This case should have been caught by the 'verify_word_in_list' function assert False except EndOfStreamException: fh.seek(position) error.error_eof(word, fh)
sh = StringIO(Txt_or_File) # (*) Parse the pattern => A Pattern object start_position = sh.tell() try: pattern = regex.do(sh, blackboard.shorthand_db, AllowNothingIsNecessaryF = AllowNothingIsFineF, SpecialTerminator = Terminator) except RegularExpressionException, x: sh.seek(start_position) error.log("Regular expression parsing:\n" + x.message, sh) except EndOfStreamException: sh.seek(start_position) error.error_eof("regular expression", sh) # (*) Extract the object as required if ExtractFunction is not None: result = ExtractFunction(pattern.sm) if pattern.has_pre_or_post_context() or result is None: sh.seek(start_position) pattern_str = pattern.pattern_string().strip() txt = "Regular expression '%s' cannot be interpreted as plain %s." % (pattern_str, Name) if len(pattern_str) != 0 and pattern_str[-1] == Terminator: txt += "\nMissing delimiting whitespace ' ' between the regular expression and '%s'.\n" % Terminator error.log(txt, sh) else: result = None