def parse_pattern_name_definitions(fh, Setup): """Parses pattern definitions of the form: WHITESPACE [ \t\n] IDENTIFIER [a-zA-Z0-9]+ OP_PLUS "+" That means: 'name' whitespace 'regular expression' whitespace newline. Comments can only be '//' nothing else and they have to appear at the beginning of the line. One regular expression can have more than one name, but one name can only have one regular expression. """ # NOTE: Catching of EOF happens in caller: parse_section(...) def __closing_bracket(fh): position = fh.tell() dummy = fh.read(1) if dummy == "}": return True fh.seek(position) return False # dummy, i = read_until_letter(fh, ["{"], Verbose=True) while 1 + 1 == 2: skip_whitespace(fh) if __closing_bracket(fh): return # -- get the name of the pattern skip_whitespace(fh) pattern_name = read_identifier(fh) if pattern_name == "": raise RegularExpressionException( "Missing identifier for pattern definition.") skip_whitespace(fh) if __closing_bracket(fh): raise RegularExpressionException("Missing regular expression for pattern definition '%s'." % \ pattern_name) # -- parse regular expression, build state machine regular_expression_obj, state_machine = regular_expression.parse( fh, AllowNothingIsFineF=True) lexer_mode.shorthand_db[pattern_name] = \ lexer_mode.PatternShorthand(pattern_name, state_machine, fh.name, get_current_line_info_number(fh), regular_expression_obj)
def parse_pattern_name_definitions(fh, Setup): """Parses pattern definitions of the form: WHITESPACE [ \t\n] IDENTIFIER [a-zA-Z0-9]+ OP_PLUS "+" That means: 'name' whitespace 'regular expression' whitespace newline. Comments can only be '//' nothing else and they have to appear at the beginning of the line. One regular expression can have more than one name, but one name can only have one regular expression. """ # NOTE: Catching of EOF happens in caller: parse_section(...) def __closing_bracket(fh): position = fh.tell() dummy = fh.read(1) if dummy == "}": return True fh.seek(position) return False # dummy, i = read_until_letter(fh, ["{"], Verbose=True) while 1 + 1 == 2: skip_whitespace(fh) if __closing_bracket(fh): return # -- get the name of the pattern skip_whitespace(fh) pattern_name = read_identifier(fh) if pattern_name == "": raise RegularExpressionException("Missing identifier for pattern definition.") skip_whitespace(fh) if __closing_bracket(fh): raise RegularExpressionException("Missing regular expression for pattern definition '%s'." % \ pattern_name) # -- parse regular expression, build state machine regular_expression_obj, state_machine = regular_expression.parse(fh, AllowNothingIsFineF=True) lexer_mode.shorthand_db[pattern_name] = \ lexer_mode.PatternShorthand(pattern_name, state_machine, fh.name, get_current_line_info_number(fh), regular_expression_obj)
def parse_pattern_name_definitions(fh): """Parses pattern definitions of the form: WHITESPACE [ \t\n] IDENTIFIER [a-zA-Z0-9]+ OP_PLUS "+" That means: 'name' whitespace 'regular expression' whitespace newline. Comments can only be '//' nothing else and they have to appear at the beginning of the line. One regular expression can have more than one name, but one name can only have one regular expression. """ skip_whitespace(fh) if not check(fh, "{"): error_msg("define region must start with opening '{'.", fh) while 1 + 1 == 2: skip_whitespace(fh) if check(fh, "}"): return # -- get the name of the pattern skip_whitespace(fh) pattern_name = read_identifier(fh) if pattern_name == "": error_msg("Missing identifier for pattern definition.", fh) skip_whitespace(fh) if check(fh, "}"): error_msg("Missing regular expression for pattern definition '%s'." % \ pattern_name, fh) # A regular expression state machine regular_expression_str, state_machine = \ regular_expression.parse(fh, AllowNothingIsFineF=True) if state_machine.core().has_pre_or_post_context(): error_msg("Pattern definition with pre- and/or post-context.\n" + \ "This pattern cannot be used in replacements.", fh, DontExitF=True) lexer_mode.shorthand_db[pattern_name] = \ lexer_mode.PatternShorthand(pattern_name, state_machine, fh.name, get_current_line_info_number(fh), regular_expression_str)
def parse_mode_element(new_mode, fh): """Returns: False, if a closing '}' has been found. True, else. """ position = fh.tell() try: description = "Pattern or event handler name.\n" + \ "Missing closing '}' for end of mode" skip_whitespace(fh) # NOTE: Do not use 'read_word' since we need to continue directly after # whitespace, if a regular expression is to be parsed. position = fh.tell() word = read_until_whitespace(fh) if word == "}": return False # -- check for 'on_entry', 'on_exit', ... result = check_for_event_specification(word, fh, new_mode) if result == True: return True # all work has been done in check_for_event_specification() else: fh.seek(position) description = "start of mode element: regular expression" pattern, pattern_state_machine = regular_expression.parse(fh) if new_mode.has_pattern(pattern): previous = new_mode.get_match_object(pattern) error_msg("Pattern has been defined twice.", fh, DontExitF=True) error_msg("First defined here.", previous.action().filename, previous.action().line_n) position = fh.tell() description = "start of mode element: code fragment for '%s'" % pattern parse_action_code(new_mode, fh, pattern, pattern_state_machine) except EndOfStreamException: fh.seek(position) error_msg("End of file reached while parsing %s." % description, fh) return True
def do(fh): """Parses pattern definitions of the form: [ \t] => grid 4; [:intersection([:alpha:], [\X064-\X066]):] => space 1; In other words the right hand side *must* be a character set. """ indentation_setup = IndentationSetup(fh) # NOTE: Catching of EOF happens in caller: parse_section(...) # skip_whitespace(fh) while 1 + 1 == 2: skip_whitespace(fh) if check(fh, ">"): indentation_setup.seal() indentation_setup.consistency_check(fh) return indentation_setup # A regular expression state machine pattern_str, state_machine = regular_expression.parse(fh) skip_whitespace(fh) if not check(fh, "=>"): error_msg("Missing '=>' after character set definition.", fh) skip_whitespace(fh) identifier = read_identifier(fh) if identifier == "": error_msg("Missing identifier for indentation element definition.", fh) verify_word_in_list( identifier, ["space", "grid", "bad", "newline", "suppressor"], "Unrecognized indentation specifier '%s'." % identifier, fh) trigger_set = None if identifier in ["space", "bad", "grid"]: if len(state_machine.states) != 2: error_msg("For indentation '%s' only patterns are addmissible which\n" % identifier + \ "can be matched by a single character, e.g. \" \" or [a-z].", fh) transition_map = state_machine.get_init_state().transitions( ).get_map() assert len(transition_map) == 1 trigger_set = transition_map.values()[0] skip_whitespace(fh) if identifier == "space": value = read_integer(fh) if value != None: indentation_setup.specify_space(pattern_str, trigger_set, value, fh) else: # not a number received, is it an identifier? variable = read_identifier(fh) if variable != "": indentation_setup.specify_space(pattern_str, trigger_set, variable, fh) else: indentation_setup.specify_space(pattern_str, trigger_set, 1, fh) elif identifier == "grid": value = read_integer(fh) if value != None: indentation_setup.specify_grid(pattern_str, trigger_set, value, fh) else: # not a number received, is it an identifier? skip_whitespace(fh) variable = read_identifier(fh) if variable != "": indentation_setup.specify_grid(pattern_str, trigger_set, variable, fh) else: error_msg( "Missing integer or variable name after keyword 'grid'.", fh) elif identifier == "bad": indentation_setup.specify_bad(pattern_str, trigger_set, fh) elif identifier == "newline": indentation_setup.specify_newline(pattern_str, state_machine, fh) elif identifier == "suppressor": indentation_setup.specify_suppressor(pattern_str, state_machine, fh) else: assert False, "Unreachable code reached." if not check(fh, ";"): error_msg( "Missing ';' after indentation '%s' specification." % identifier, fh)
def parse_mode_option(fh, new_mode): LanguageDB = Setup.language_db def fit_state_machine(SM): if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM) else: result = SM result = hopcroft.do(result, CreateNewStateMachineF=False) return result identifier = read_option_start(fh) if identifier == None: return False verify_word_in_list(identifier, lexer_mode.mode_option_info_db.keys(), "mode option", fh.name, get_current_line_info_number(fh)) if identifier == "skip": # A skipper 'eats' characters at the beginning of a pattern that belong # to a specified set of characters. A useful application is most probably # the whitespace skipper '[ \t\n]'. The skipper definition allows quex to # implement a very effective way to skip these regions. pattern_str, trigger_set = regular_expression.parse_character_set( fh, PatternStringF=True) skip_whitespace(fh) if fh.read(1) != ">": error_msg("missing closing '>' for mode option '%s'." % identifier, fh) if trigger_set.is_empty(): error_msg("Empty trigger set for skipper." % identifier, fh) # TriggerSet skipping is implemented the following way: As soon as one element of the # trigger set appears, the state machine enters the 'trigger set skipper section'. # Enter the skipper as if the opener pattern was a normal pattern and the 'skipper' is the action. # NOTE: The correspondent CodeFragment for skipping is created in 'implement_skippers(...)' pattern_sm = StateMachine() pattern_sm.add_transition(pattern_sm.init_state_index, trigger_set, AcceptanceF=True) # Skipper code is to be generated later action = GeneratedCode(skip_character_set.do, FileName=fh.name, LineN=get_current_line_info_number(fh)) action.data["character_set"] = trigger_set pattern_sm = fit_state_machine(pattern_sm) # For skippers line and column counting detection is not really a topic # It is done in the skipper itself. pattern_sm.side_info = SideInfo() new_mode.add_match(pattern_str, action, pattern_sm) return True elif identifier in ["skip_range", "skip_nested_range"]: # A non-nesting skipper can contain a full fledged regular expression as opener, # since it only effects the trigger. Not so the nested range skipper-see below. # -- opener skip_whitespace(fh) if identifier == "skip_nested_range": # Nested range state machines only accept 'strings' not state machines opener_str, opener_sequence = parse_string_constant( fh, "Opener pattern for 'skip_nested_range'") opener_sm = StateMachine() idx = opener_sm.init_state_index for letter in opener_sequence: idx = opener_sm.add_transition(idx, letter) opener_sm.states[idx].set_acceptance(True) else: opener_str, opener_sm = regular_expression.parse(fh) # For 'range skipping' the opener sequence is not needed, only the opener state # machine is webbed into the pattern matching state machine. opener_sequence = None skip_whitespace(fh) # -- closer closer_str, closer_sequence = parse_string_constant( fh, "Closing pattern for 'skip_range' or 'skip_nested_range'") skip_whitespace(fh) if fh.read(1) != ">": error_msg("missing closing '>' for mode option '%s'" % identifier, fh) # Skipper code is to be generated later generator_function = { "skip_range": skip_range.do, "skip_nested_range": skip_nested_range.do, }[identifier] action = GeneratedCode(generator_function, FileName=fh.name, LineN=get_current_line_info_number(fh)) action.data["opener_sequence"] = opener_sequence action.data["closer_sequence"] = closer_sequence action.data["mode_name"] = new_mode.name fit_state_machine(opener_sm) # For skippers line and column counting detection is not really a topic # It is done in the skipper itself. opener_sm.side_info = SideInfo() new_mode.add_match(opener_str, action, opener_sm) return True elif identifier == "indentation": value = indentation_setup.do(fh) # Enter 'Newline' and 'Suppressed Newline' as matches into the engine. # Similar to skippers, the indentation count is then triggered by the newline. # -- Suppressed Newline = Suppressor followed by Newline, # then newline does not trigger indentation counting. suppressed_newline_pattern = "" if value.newline_suppressor_state_machine.get() != None: suppressed_newline_pattern = \ "(" + value.newline_suppressor_state_machine.pattern_str + ")" \ + "(" + value.newline_state_machine.pattern_str + ")" suppressed_newline_sm = \ sequentialize.do([value.newline_suppressor_state_machine.get(), value.newline_state_machine.get()]) FileName = value.newline_suppressor_state_machine.file_name LineN = value.newline_suppressor_state_machine.line_n # Go back to start. code_fragment = UserCodeFragment( "goto %s;" % get_label("$start", U=True), FileName, LineN) suppressed_newline_sm = fit_state_machine(suppressed_newline_sm) # Analyze pattern for constant number of newlines, characters, etc. suppressed_newline_sm.side_info = SideInfo( character_counter.get_newline_n(suppressed_newline_sm), character_counter.get_character_n(suppressed_newline_sm)) new_mode.add_match(suppressed_newline_pattern, code_fragment, suppressed_newline_sm, Comment="indentation newline suppressor") # When there is an empty line, then there shall be no indentation count on it. # Here comes the trick: # # Let newline # be defined as: newline ([space]* newline])* # # This way empty lines are eating away before the indentation count is activated. # -- 'space' x0 = StateMachine() x0.add_transition(x0.init_state_index, value.indentation_count_character_set(), AcceptanceF=True) # -- '[space]*' x1 = repeat.do(x0) # -- '[space]* newline' x2 = sequentialize.do([x1, value.newline_state_machine.get()]) # -- '([space]* newline)*' x3 = repeat.do(x2) # -- 'newline ([space]* newline)*' x4 = sequentialize.do([value.newline_state_machine.get(), x3]) # -- nfa to dfa; hopcroft optimization sm = hopcroft.do(nfa_to_dfa.do(x4), CreateNewStateMachineF=False) FileName = value.newline_state_machine.file_name LineN = value.newline_state_machine.line_n action = GeneratedCode(indentation_counter.do, FileName, LineN) action.data["indentation_setup"] = value sm = fit_state_machine(sm) sm.side_info = SideInfo(character_counter.get_newline_n(sm), character_counter.get_character_n(sm)) new_mode.add_match(value.newline_state_machine.pattern_str, action, sm, Comment="indentation newline") # Announce the mode to which the setup belongs value.set_containing_mode_name(new_mode.name) else: value = read_option_value(fh) # The 'verify_word_in_list()' call must have ensured that the following holds assert lexer_mode.mode_option_info_db.has_key(identifier) # Is the option of the appropriate value? option_info = lexer_mode.mode_option_info_db[identifier] if option_info.domain != None and value not in option_info.domain: error_msg("Tried to set value '%s' for option '%s'. " % (Value, Option) + \ "Though, possible for this option are only: %s." % repr(oi.domain)[1:-1], fh) # Finally, set the option new_mode.add_option(identifier, value) return True
def parse_mode_option(fh, new_mode): LanguageDB = Setup.language_db def fit_state_machine(SM): if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM) else: result = SM result = hopcroft.do(result, CreateNewStateMachineF=False) return result identifier = read_option_start(fh) if identifier == None: return False verify_word_in_list(identifier, lexer_mode.mode_option_info_db.keys(), "mode option", fh.name, get_current_line_info_number(fh)) if identifier == "skip": # A skipper 'eats' characters at the beginning of a pattern that belong # to a specified set of characters. A useful application is most probably # the whitespace skipper '[ \t\n]'. The skipper definition allows quex to # implement a very effective way to skip these regions. pattern_str, trigger_set = regular_expression.parse_character_set(fh, PatternStringF=True) skip_whitespace(fh) if fh.read(1) != ">": error_msg("missing closing '>' for mode option '%s'." % identifier, fh) if trigger_set.is_empty(): error_msg("Empty trigger set for skipper." % identifier, fh) # TriggerSet skipping is implemented the following way: As soon as one element of the # trigger set appears, the state machine enters the 'trigger set skipper section'. # Enter the skipper as if the opener pattern was a normal pattern and the 'skipper' is the action. # NOTE: The correspondent CodeFragment for skipping is created in 'implement_skippers(...)' pattern_sm = StateMachine() pattern_sm.add_transition(pattern_sm.init_state_index, trigger_set, AcceptanceF=True) # Skipper code is to be generated later action = GeneratedCode(skip_character_set.do, FileName = fh.name, LineN = get_current_line_info_number(fh)) action.data["character_set"] = trigger_set pattern_sm = fit_state_machine(pattern_sm) # For skippers line and column counting detection is not really a topic # It is done in the skipper itself. pattern_sm.side_info = SideInfo() new_mode.add_match(pattern_str, action, pattern_sm) return True elif identifier in ["skip_range", "skip_nested_range"]: # A non-nesting skipper can contain a full fledged regular expression as opener, # since it only effects the trigger. Not so the nested range skipper-see below. # -- opener skip_whitespace(fh) if identifier == "skip_nested_range": # Nested range state machines only accept 'strings' not state machines opener_str, opener_sequence = parse_string_constant(fh, "Opener pattern for 'skip_nested_range'") opener_sm = StateMachine() idx = opener_sm.init_state_index for letter in opener_sequence: idx = opener_sm.add_transition(idx, letter) opener_sm.states[idx].set_acceptance(True) else: opener_str, opener_sm = regular_expression.parse(fh) # For 'range skipping' the opener sequence is not needed, only the opener state # machine is webbed into the pattern matching state machine. opener_sequence = None skip_whitespace(fh) # -- closer closer_str, closer_sequence = parse_string_constant(fh, "Closing pattern for 'skip_range' or 'skip_nested_range'") skip_whitespace(fh) if fh.read(1) != ">": error_msg("missing closing '>' for mode option '%s'" % identifier, fh) # Skipper code is to be generated later generator_function = { "skip_range": skip_range.do, "skip_nested_range": skip_nested_range.do, }[identifier] action = GeneratedCode(generator_function, FileName = fh.name, LineN = get_current_line_info_number(fh)) action.data["opener_sequence"] = opener_sequence action.data["closer_sequence"] = closer_sequence action.data["mode_name"] = new_mode.name fit_state_machine(opener_sm) # For skippers line and column counting detection is not really a topic # It is done in the skipper itself. opener_sm.side_info = SideInfo() new_mode.add_match(opener_str, action, opener_sm) return True elif identifier == "indentation": value = indentation_setup.do(fh) # Enter 'Newline' and 'Suppressed Newline' as matches into the engine. # Similar to skippers, the indentation count is then triggered by the newline. # -- Suppressed Newline = Suppressor followed by Newline, # then newline does not trigger indentation counting. suppressed_newline_pattern = "" if value.newline_suppressor_state_machine.get() != None: suppressed_newline_pattern = \ "(" + value.newline_suppressor_state_machine.pattern_str + ")" \ + "(" + value.newline_state_machine.pattern_str + ")" suppressed_newline_sm = \ sequentialize.do([value.newline_suppressor_state_machine.get(), value.newline_state_machine.get()]) FileName = value.newline_suppressor_state_machine.file_name LineN = value.newline_suppressor_state_machine.line_n # Go back to start. code_fragment = UserCodeFragment("goto %s;" % get_label("$start", U=True), FileName, LineN) suppressed_newline_sm = fit_state_machine(suppressed_newline_sm) # Analyze pattern for constant number of newlines, characters, etc. suppressed_newline_sm.side_info = SideInfo( character_counter.get_newline_n(suppressed_newline_sm), character_counter.get_character_n(suppressed_newline_sm)) new_mode.add_match(suppressed_newline_pattern, code_fragment, suppressed_newline_sm, Comment="indentation newline suppressor") # When there is an empty line, then there shall be no indentation count on it. # Here comes the trick: # # Let newline # be defined as: newline ([space]* newline])* # # This way empty lines are eating away before the indentation count is activated. # -- 'space' x0 = StateMachine() x0.add_transition(x0.init_state_index, value.indentation_count_character_set(), AcceptanceF=True) # -- '[space]*' x1 = repeat.do(x0) # -- '[space]* newline' x2 = sequentialize.do([x1, value.newline_state_machine.get()]) # -- '([space]* newline)*' x3 = repeat.do(x2) # -- 'newline ([space]* newline)*' x4 = sequentialize.do([value.newline_state_machine.get(), x3]) # -- nfa to dfa; hopcroft optimization sm = hopcroft.do(nfa_to_dfa.do(x4), CreateNewStateMachineF=False) FileName = value.newline_state_machine.file_name LineN = value.newline_state_machine.line_n action = GeneratedCode(indentation_counter.do, FileName, LineN) action.data["indentation_setup"] = value sm = fit_state_machine(sm) sm.side_info = SideInfo(character_counter.get_newline_n(sm), character_counter.get_character_n(sm)) new_mode.add_match(value.newline_state_machine.pattern_str, action, sm, Comment="indentation newline") # Announce the mode to which the setup belongs value.set_containing_mode_name(new_mode.name) else: value = read_option_value(fh) # The 'verify_word_in_list()' call must have ensured that the following holds assert lexer_mode.mode_option_info_db.has_key(identifier) # Is the option of the appropriate value? option_info = lexer_mode.mode_option_info_db[identifier] if option_info.domain != None and value not in option_info.domain: error_msg("Tried to set value '%s' for option '%s'. " % (Value, Option) + \ "Though, possible for this option are only: %s." % repr(oi.domain)[1:-1], fh) # Finally, set the option new_mode.add_option(identifier, value) return True
def parse_mode_option(fh, new_mode): skip_whitespace(fh) # (*) base modes if fh.read(1) != "<": return False skip_whitespace(fh) identifier = read_identifier(fh).strip() if identifier == "": error_msg("missing identifer after start of mode option '<'", fh) skip_whitespace(fh) if fh.read(1) != ":": error_msg("missing ':' after option name '%s'" % identifier, fh) skip_whitespace(fh) if identifier == "skip": # A skipper 'eats' characters at the beginning of a pattern that belong # to a specified set of characters. A useful application is most probably # the whitespace skipper '[ \t\n]'. The skipper definition allows quex to # implement a very effective way to skip these regions. pattern_str, trigger_set = regular_expression.parse_character_set(fh, PatternStringF=True) skip_whitespace(fh) if fh.read(1) != ">": error_msg("missing closing '>' for mode option '%s'." % identifier, fh) if trigger_set.is_empty(): error_msg("Empty trigger set for skipper." % identifier, fh) # TriggerSet skipping is implemented the following way: As soon as one element of the # trigger set appears, the state machine enters the 'trigger set skipper section'. opener_sm = StateMachine() opener_sm.add_transition(opener_sm.init_state_index, trigger_set, AcceptanceF=True) action = CodeFragment(create_skip_code(trigger_set)) # Enter the skipper as if the opener pattern was a normal pattern and the 'skipper' is the action. new_mode.add_match(pattern_str, action, opener_sm) return True elif identifier == "skip_range": # A non-nesting skipper can contain a full fledged regular expression as opener, # since it only effects the trigger. Not so the nested range skipper-see below. # -- opener skip_whitespace(fh) opener_str, opener_sm = regular_expression.parse(fh) skip_whitespace(fh) # -- closer if fh.read(1) != "\"": error_msg("closing pattern for skip_range can only be a string and must start with a quote like \".", fh) closer_sequence = snap_character_string.get_character_code_sequence(fh) skip_whitespace(fh) if fh.read(1) != ">": error_msg("missing closing '>' for mode option '%s'" % identifier, fh) action = CodeFragment(create_skip_range_code(closer_sequence)) # Enter the skipper as if the opener pattern was a normal pattern and the 'skipper' is the action. new_mode.add_match(opener_str, action, opener_sm) return True elif identifier == "skip_nesting_range": error_msg("skip_nesting_range is not yet supported.", fh) else: value, i = read_until_letter(fh, [">"], Verbose=1) if i != 0: error_msg("missing closing '>' for mode option '%s'" % identifier, fh) value = value.strip() # Does the specified option actually exist? if not lexer_mode.mode_option_info_db.has_key(identifier): error_msg("tried to set option '%s' which does not exist!\n" % identifier + \ "options are %s" % repr(lexer_mode.mode_option_info_db.keys()), fh) # Is the option of the appropriate value? option_info = lexer_mode.mode_option_info_db[identifier] if option_info.type != "list" and value not in option_info.domain: error_msg("Tried to set value '%s' for option '%s'. " % (Value, Option) + \ "Though, possible \n" + \ "for this option are %s" % repr(oi.domain), fh) # Finally, set the option new_mode.add_option(identifier, value) return True
def do(fh): """Parses pattern definitions of the form: [ \t] => grid 4; [:intersection([:alpha:], [\X064-\X066]):] => space 1; In other words the right hand side *must* be a character set. """ indentation_setup = IndentationSetup(fh) # NOTE: Catching of EOF happens in caller: parse_section(...) # skip_whitespace(fh) while 1 + 1 == 2: skip_whitespace(fh) if check(fh, ">"): indentation_setup.seal() indentation_setup.consistency_check(fh) return indentation_setup # A regular expression state machine pattern_str, state_machine = regular_expression.parse(fh) skip_whitespace(fh) if not check(fh, "=>"): error_msg("Missing '=>' after character set definition.", fh) skip_whitespace(fh) identifier = read_identifier(fh) if identifier == "": error_msg("Missing identifier for indentation element definition.", fh) verify_word_in_list( identifier, ["space", "grid", "bad", "newline", "suppressor"], "Unrecognized indentation specifier '%s'." % identifier, fh, ) trigger_set = None if identifier in ["space", "bad", "grid"]: if len(state_machine.states) != 2: error_msg( "For indentation '%s' only patterns are addmissible which\n" % identifier + 'can be matched by a single character, e.g. " " or [a-z].', fh, ) transition_map = state_machine.get_init_state().transitions().get_map() assert len(transition_map) == 1 trigger_set = transition_map.values()[0] skip_whitespace(fh) if identifier == "space": value = read_integer(fh) if value != None: indentation_setup.specify_space(pattern_str, trigger_set, value, fh) else: # not a number received, is it an identifier? variable = read_identifier(fh) if variable != "": indentation_setup.specify_space(pattern_str, trigger_set, variable, fh) else: indentation_setup.specify_space(pattern_str, trigger_set, 1, fh) elif identifier == "grid": value = read_integer(fh) if value != None: indentation_setup.specify_grid(pattern_str, trigger_set, value, fh) else: # not a number received, is it an identifier? skip_whitespace(fh) variable = read_identifier(fh) if variable != "": indentation_setup.specify_grid(pattern_str, trigger_set, variable, fh) else: error_msg("Missing integer or variable name after keyword 'grid'.", fh) elif identifier == "bad": indentation_setup.specify_bad(pattern_str, trigger_set, fh) elif identifier == "newline": indentation_setup.specify_newline(pattern_str, state_machine, fh) elif identifier == "suppressor": indentation_setup.specify_suppressor(pattern_str, state_machine, fh) else: assert False, "Unreachable code reached." if not check(fh, ";"): error_msg("Missing ';' after indentation '%s' specification." % identifier, fh)
def parse_mode_option(fh, new_mode): skip_whitespace(fh) # (*) base modes if fh.read(1) != "<": return False skip_whitespace(fh) identifier = read_identifier(fh).strip() if identifier == "": error_msg("missing identifer after start of mode option '<'", fh) skip_whitespace(fh) if fh.read(1) != ":": error_msg("missing ':' after option name '%s'" % identifier, fh) skip_whitespace(fh) if identifier == "skip": # A skipper 'eats' characters at the beginning of a pattern that belong # to a specified set of characters. A useful application is most probably # the whitespace skipper '[ \t\n]'. The skipper definition allows quex to # implement a very effective way to skip these regions. pattern_str, trigger_set = regular_expression.parse_character_set( fh, PatternStringF=True) skip_whitespace(fh) if fh.read(1) != ">": error_msg("missing closing '>' for mode option '%s'." % identifier, fh) if trigger_set.is_empty(): error_msg("Empty trigger set for skipper." % identifier, fh) # TriggerSet skipping is implemented the following way: As soon as one element of the # trigger set appears, the state machine enters the 'trigger set skipper section'. opener_sm = StateMachine() opener_sm.add_transition(opener_sm.init_state_index, trigger_set, AcceptanceF=True) action = CodeFragment(create_skip_code(trigger_set)) # Enter the skipper as if the opener pattern was a normal pattern and the 'skipper' is the action. new_mode.add_match(pattern_str, action, opener_sm) return True elif identifier == "skip_range": # A non-nesting skipper can contain a full fledged regular expression as opener, # since it only effects the trigger. Not so the nested range skipper-see below. # -- opener skip_whitespace(fh) opener_str, opener_sm = regular_expression.parse(fh) skip_whitespace(fh) # -- closer if fh.read(1) != "\"": error_msg( "closing pattern for skip_range can only be a string and must start with a quote like \".", fh) closer_sequence = snap_character_string.get_character_code_sequence(fh) skip_whitespace(fh) if fh.read(1) != ">": error_msg("missing closing '>' for mode option '%s'" % identifier, fh) action = CodeFragment(create_skip_range_code(closer_sequence)) # Enter the skipper as if the opener pattern was a normal pattern and the 'skipper' is the action. new_mode.add_match(opener_str, action, opener_sm) return True elif identifier == "skip_nesting_range": error_msg("skip_nesting_range is not yet supported.", fh) else: value, i = read_until_letter(fh, [">"], Verbose=1) if i != 0: error_msg("missing closing '>' for mode option '%s'" % identifier, fh) value = value.strip() # Does the specified option actually exist? if not lexer_mode.mode_option_info_db.has_key(identifier): error_msg("tried to set option '%s' which does not exist!\n" % identifier + \ "options are %s" % repr(lexer_mode.mode_option_info_db.keys()), fh) # Is the option of the appropriate value? option_info = lexer_mode.mode_option_info_db[identifier] if option_info.type != "list" and value not in option_info.domain: error_msg("Tried to set value '%s' for option '%s'. " % (Value, Option) + \ "Though, possible \n" + \ "for this option are %s" % repr(oi.domain), fh) # Finally, set the option new_mode.add_option(identifier, value) return True