def __get_inverse_state_machine_that_finds_end_of_core_expression(PostConditionSM): """In case of a pseudo-ambiguous post condition one needs to go backwards in order to search for the end of the core condition. This function creates the inverse state machine that is able to go backwards. NOTE: This is a special case, because one already knows that the state machine reaches the acceptance state sometime (this is where it actually started). That means, that in states other than acceptance states one can take out the 'drop out' triggers since they CANNOT occur. This enables some speed-up when going backwards. """ result = PostConditionSM.get_inverse() result = nfa_to_dfa.do(result) result = hopcroft.do(result) # -- delete 'drop-out' transitions in non-acceptance states # NOTE: When going backwards one already knows that the acceptance # state (the init state of the post condition) is reached, see above. for state in result.states.values(): # -- acceptance states can have 'drop-out' (actually, they need to have) if state.is_acceptance(): continue state.transitions().replace_drop_out_target_states_with_adjacent_targets() result = nfa_to_dfa.do(result) result = hopcroft.do(result) # Acceptance States need to be marked: Store input position. # NOTE: When tracing backwards the match is guaranteed, but there might # still be some 'trail' in case of iterations that are not directly # iterated to the ambiguous post condition. Thus drop out may # happen and it must be clear where to put the input pointer in this case. return result
def philosophical_cut(core_sm, post_context_sm): """The 'philosophical cut' is a technique introduced by Frank-Rene Schaefer to produce a pair of a core- and a post-condition that otherwise would be forward and backward ambiguous. The philosophical ground for this cut is 'greed', i.e. a core pattern should eat as much characters as it can. This idea is followed during the whole construction of the lexical analyzer. For the case of total ambiguity 'x+/x+', this idea translates into leaving the iteration in the core condition and cutting the iteration in the post condition. Thus 'x+/x+' is transformed into 'x+/x' and can be solved by the technique for forward ambiguous post conditions. """ core_acceptance_state_list = core_sm.get_acceptance_state_list() pcsm_init_state = post_context_sm.get_init_state() for csm_state in core_acceptance_state_list: __dive_to_cut_iteration(core_sm, csm_state, post_context_sm, pcsm_init_state, SM1_Path=[post_context_sm.init_state_index]) # By means of cutting, some states might have become bold. That is, they have # only an epsilon transition. Thus, it is required to do a transformation NFA->DFA # and add a hopcroft optimization. new_post_sm = nfa_to_dfa.do(post_context_sm) new_post_sm = hopcroft.do(new_post_sm) return new_post_sm
def do(sm): state_list = sm.states.items() for state_index, state in state_list: # Get the 'transition_list', i.e. a list of pairs (TargetState, NumberSet) # which indicates what target state is reached via what number set. transition_list = state.transitions().get_map().items() # Clear the state's transitions, now. This way it can absorb new # transitions to intermediate states. state.transitions().clear() # Loop over all transitions for target_state_index, number_set in transition_list: # We take the intervals with 'PromiseToTreatWellF' even though they # are changed. This is because the intervals would be lost anyway # after the state split, so we use the same memory and do not # cause a time consuming memory copy and constructor calls. interval_list = number_set.get_intervals(PromiseToTreatWellF=True) # 1st check wether a modification is necessary modification_required_f = False for interval in interval_list: if interval.begin >= 0x10000: modification_required_f = True; break if modification_required_f == False: sm.states[state_index].add_transition(number_set, target_state_index) continue # Now, intermediate states may be added for interval in interval_list: create_intermediate_states(sm, state_index, target_state_index, interval) result = hopcroft_minimization.do(nfa_to_dfa.do(sm), CreateNewStateMachineF=False) return result
def __get_DFA_compliant_state_machine(SM): result = SM if not result.is_DFA_compliant(): result = nfa_to_dfa.do(result) result = hopcroft.do(result, CreateNewStateMachineF=False) pre_sm = result.core().pre_context_sm() if pre_sm != None: # If pre-context state machine is not DFA compliant, # then make it compliant. if not pre_sm.is_DFA_compliant(): pre_sm = nfa_to_dfa.do(pre_sm) pre_sm = hopcroft.do(pre_sm, CreateNewStateMachineF=False) result.replace_pre_context_state_machine(pre_sm) return result
def detect_backward(CoreStateMachine, PostConditionStateMachine): """A 'backward ambiguity' denotes the case where it cannot be clearly be determined how far to go back from the end of a post-condition. NOTE: This does not mean that the post-condition is ambiguous. Many cases that are backward ambiguous can be handled by quex's normal post-condition handling. Examples: x/x+ is backward ambiguous because in a stream of 'x' one cannot determine with a pure state machine where to stop. This case, though can be handled by the normal post- condition implementation. x+/x+ is backward ambiguous and cannot be handled by the normal implementation. In fact, this specification does not allow any conclusions about the users intend where to reset the input after match. """ __assert_state_machines(CoreStateMachine, PostConditionStateMachine) my_post_context_sm = PostConditionStateMachine.clone() # (*) Create a modified version of the post condition, where the # initial state is an acceptance state, and no other. This # allows the detector to trigger on 'iteration'. # # -- delete all acceptance states in the post condition # for state in my_post_context_sm.states.values(): # state.set_acceptance(False) # -- set the initial state as acceptance state # my_post_context_sm.get_init_state().set_acceptance(True) my_core_sm = CoreStateMachine.get_inverse() my_core_sm = nfa_to_dfa.do(my_core_sm) my_core_sm = hopcroft.do(my_core_sm) tmp = deepcopy(PostConditionStateMachine) my_post_context_sm = tmp.get_inverse() my_post_context_sm = nfa_to_dfa.do(my_post_context_sm) my_post_context_sm = hopcroft.do(my_post_context_sm) return detect_forward(my_post_context_sm, my_core_sm)
def __beautify(the_state_machine): ## assert the_state_machine.get_orphaned_state_index_list() == [], \ ## "before conversion to DFA: orphaned states " + repr(the_state_machine) result = nfa_to_dfa.do(the_state_machine) ## assert the_state_machine.get_orphaned_state_index_list() == [], \ ## "after conversion to DFA: orphaned states " + repr(the_state_machine) result = hopcroft.do(result) #, CreateNewStateMachineF=False) ## assert the_state_machine.get_orphaned_state_index_list() == [], \ ## "after hopcroft minimization: orphaned states " + repr(the_state_machine) return result
def beautify(the_state_machine): ## assert the_state_machine.get_orphaned_state_index_list() == [], \ ## "before conversion to DFA: orphaned states " + repr(the_state_machine) result = nfa_to_dfa.do(the_state_machine) ## assert the_state_machine.get_orphaned_state_index_list() == [], \ ## "after conversion to DFA: orphaned states " + repr(the_state_machine) result = hopcroft.do(result, CreateNewStateMachineF=False) ## assert the_state_machine.get_orphaned_state_index_list() == [], \ ## "after hopcroft minimization: orphaned states " + repr(the_state_machine) return result
def __get_inverse_state_machine_that_finds_end_of_core_expression( PostConditionSM): """In case of a pseudo-ambiguous post condition one needs to go backwards in order to search for the end of the core condition. This function creates the inverse state machine that is able to go backwards. NOTE: This is a special case, because one already knows that the state machine reaches the acceptance state sometime (this is where it actually started). That means, that in states other than acceptance states one can take out the 'drop out' triggers since they CANNOT occur. This enables some speed-up when going backwards. """ result = PostConditionSM.get_inverse() result = nfa_to_dfa.do(result) result = hopcroft.do(result) # -- delete 'drop-out' transitions in non-acceptance states # NOTE: When going backwards one already knows that the acceptance # state (the init state of the post condition) is reached, see above. for state in result.states.values(): # -- acceptance states can have 'drop-out' (actually, they need to have) if state.is_acceptance(): continue state.transitions( ).replace_drop_out_target_states_with_adjacent_targets() result = nfa_to_dfa.do(result) result = hopcroft.do(result) # Acceptance States need to be marked: Store input position. # NOTE: When tracing backwards the match is guaranteed, but there might # still be some 'trail' in case of iterations that are not directly # iterated to the ambiguous post condition. Thus drop out may # happen and it must be clear where to put the input pointer in this case. return result
def do(the_state_machine, pre_context_state_machine): """Sets up a pre-condition to the given state machine. This process is entirely different from any sequentialization or paralellization of state machines. Here, the state machine representing the pre- condition ist **not** webbed into the original state machine! Instead, the following happens: -- the pre-condition state machine is inverted, because it is to be walked through backwards. -- the inverted state machine is marked with the state machine id of the_state_machine. -- the original state machine will refere to the inverse state machine of the pre-condition. -- the initial state origins and the origins of the acceptance states are marked as 'pre-conditioned' indicating the id of the inverted state machine of the pre-condition. """ #___________________________________________________________________________________________ # (*) do some consistency checking assert the_state_machine.__class__.__name__ == "StateMachine" assert pre_context_state_machine.__class__.__name__ == "StateMachine" # -- state machines with no states are senseless here. assert not the_state_machine.is_empty() assert not pre_context_state_machine.is_empty() # -- trivial pre-conditions should be added last, for simplicity assert not the_state_machine.core().pre_context_begin_of_line_f(), \ "This function was not designed to deal with trivially pre-conditioned state machines." + \ "Please, make sure the trivial pre-conditioning happens *after* regular pre-conditions." #___________________________________________________________________________________________ # (*) invert the state machine of the pre-condition inverse_pre_context = pre_context_state_machine.get_inverse() inverse_pre_context = nfa_to_dfa.do(inverse_pre_context) inverse_pre_context = hopcroft.do(inverse_pre_context) # (*) let the state machine refer to it # [Is this necessary? Is it not enough that the acceptance origins point to it? <fschaef>] the_state_machine.core().set_pre_context_sm(inverse_pre_context) pre_context_sm_id = inverse_pre_context.get_id() # (*) create origin data, in case where there is none yet create new one. # (do not delete, otherwise existing information gets lost) for state in the_state_machine.states.values(): if not state.is_acceptance(): continue state.core().set_pre_context_id(pre_context_sm_id) return the_state_machine
def do(sm): """The UTF8 encoding causes a single unicode character code being translated into a sequence of bytes. A state machine triggering on unicode characters can be converted into a state machine triggering on UTF8 bytes. For this a simple transition on a character 'X': [ 1 ]---( X )--->[ 2 ] needs to be translated into a sequence of state transitions [ 1 ]---(x0)--->[ S0 ]---(x1)--->[ S1 ]---(x2)--->[ 2 ] where, x0, x1, x2 are the UTF8 bytes that represent unicode 'X'. States S0 and S1 are intermediate states created only so that x1, x2, and x3 can trigger. Note, that the UTF8 sequence ends at the same state '2' as the previous single trigger 'X'. """ state_list = sm.states.items() for state_index, state in state_list: # Get the 'transition_list', i.e. a list of pairs (TargetState, NumberSet) # which indicates what target state is reached via what number set. transition_list = state.transitions().get_map().items() # Clear the state's transitions, now. This way it can absorb new # transitions to intermediate states. state.transitions().clear() # Loop over all transitions for target_state_index, number_set in transition_list: # We take the intervals with 'PromiseToTreatWellF' even though they # are changed. This is because the intervals would be lost anyway # after the state split, so we use the same memory and do not # cause a time consuming memory copy and constructor calls. for interval in number_set.get_intervals(PromiseToTreatWellF=True): create_intermediate_states(sm, state_index, target_state_index, interval) return hopcroft_minimization.do(nfa_to_dfa.do(sm), CreateNewStateMachineF=False)
def fit_state_machine(SM): if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM) else: result = SM result = hopcroft.do(result, CreateNewStateMachineF=False) return result
def get_combined_state_machine(StateMachine_List, FilterDominatedOriginsF=True): """Creates a DFA state machine that incorporates the paralell process of all pattern passed as state machines in the StateMachine_List. Each origins of each state machine are kept in the final state, if it is not dominated. Performs: -- parallelization -- translation from NFA to DFA -- Frank Schaefers Adapted Hopcroft optimization. Again: The state machine ids of the original state machines are traced through the whole process. FilterDominatedOriginsF, if set to False, can disable the filtering of dominated origins. This is important for pre-conditions, because, all successful patterns need to be reported! """ def __check(Place, sm): __check_on_orphan_states(Place, sm) __check_on_init_state_not_acceptance(Place, sm) def __check_on_orphan_states(Place, sm): orphan_state_list = sm.get_orphaned_state_index_list() if orphan_state_list == []: return error_msg("After '%s'" % Place + "\n" + \ "Orphaned state(s) detected in regular expression (optimization lack).\n" + \ "Please, log a defect at the projects website quex.sourceforge.net.\n" + \ "Orphan state(s) = " + repr(orphan_state_list) + "\n") def __check_on_init_state_not_acceptance(Place, sm): init_state = sm.get_init_state() if init_state.core().is_acceptance(): error_msg("After '%s'" % Place + "\n" + \ "The initial state is 'acceptance'. This should never appear.\n" + \ "Please, log a defect at the projects website quex.sourceforge.net.\n") if filter(lambda origin: origin.is_acceptance(), init_state.origins().get_list()) != []: error_msg("After '%s'" % Place + "\n" + \ "Initial state contains an origin that is 'acceptance'. This should never appear.\n" + \ "Please, log a defect at the projects website quex.sourceforge.net.\n") # (1) mark at each state machine the machine and states as 'original'. # # This is necessary to trace in the combined state machine the # pattern that actually matched. Note, that a state machine in # the StateMachine_List represents one possible pattern that can # match the current input. # map(lambda x: x.mark_state_origins(), StateMachine_List) for sm in StateMachine_List: assert sm.is_DFA_compliant(), repr(sm) # (2) setup all patterns in paralell sm = parallelize.do(StateMachine_List, CommonTerminalStateF=False) #, CloneF=False) __check("Parallelization", sm) # (3) convert the state machine to an DFA (paralellization created an NFA) sm = nfa_to_dfa.do(sm) __check("NFA to DFA", sm) # (4) determine for each state in the DFA what is the dominating original state if FilterDominatedOriginsF: sm.filter_dominated_origins() __check("Filter Dominated Origins", sm) # (5) perform hopcroft optimization # Note, that hopcroft optimization does consider the original acceptance # states when deciding if two state sets are equivalent. sm = hopcroft.do(sm) __check("Hopcroft Minimization", sm) return sm
def parse_mode_option(fh, new_mode): LanguageDB = Setup.language_db def fit_state_machine(SM): if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM) else: result = SM result = hopcroft.do(result, CreateNewStateMachineF=False) return result identifier = read_option_start(fh) if identifier == None: return False verify_word_in_list(identifier, lexer_mode.mode_option_info_db.keys(), "mode option", fh.name, get_current_line_info_number(fh)) if identifier == "skip": # A skipper 'eats' characters at the beginning of a pattern that belong # to a specified set of characters. A useful application is most probably # the whitespace skipper '[ \t\n]'. The skipper definition allows quex to # implement a very effective way to skip these regions. pattern_str, trigger_set = regular_expression.parse_character_set(fh, PatternStringF=True) skip_whitespace(fh) if fh.read(1) != ">": error_msg("missing closing '>' for mode option '%s'." % identifier, fh) if trigger_set.is_empty(): error_msg("Empty trigger set for skipper." % identifier, fh) # TriggerSet skipping is implemented the following way: As soon as one element of the # trigger set appears, the state machine enters the 'trigger set skipper section'. # Enter the skipper as if the opener pattern was a normal pattern and the 'skipper' is the action. # NOTE: The correspondent CodeFragment for skipping is created in 'implement_skippers(...)' pattern_sm = StateMachine() pattern_sm.add_transition(pattern_sm.init_state_index, trigger_set, AcceptanceF=True) # Skipper code is to be generated later action = GeneratedCode(skip_character_set.do, FileName = fh.name, LineN = get_current_line_info_number(fh)) action.data["character_set"] = trigger_set pattern_sm = fit_state_machine(pattern_sm) # For skippers line and column counting detection is not really a topic # It is done in the skipper itself. pattern_sm.side_info = SideInfo() new_mode.add_match(pattern_str, action, pattern_sm) return True elif identifier in ["skip_range", "skip_nested_range"]: # A non-nesting skipper can contain a full fledged regular expression as opener, # since it only effects the trigger. Not so the nested range skipper-see below. # -- opener skip_whitespace(fh) if identifier == "skip_nested_range": # Nested range state machines only accept 'strings' not state machines opener_str, opener_sequence = parse_string_constant(fh, "Opener pattern for 'skip_nested_range'") opener_sm = StateMachine() idx = opener_sm.init_state_index for letter in opener_sequence: idx = opener_sm.add_transition(idx, letter) opener_sm.states[idx].set_acceptance(True) else: opener_str, opener_sm = regular_expression.parse(fh) # For 'range skipping' the opener sequence is not needed, only the opener state # machine is webbed into the pattern matching state machine. opener_sequence = None skip_whitespace(fh) # -- closer closer_str, closer_sequence = parse_string_constant(fh, "Closing pattern for 'skip_range' or 'skip_nested_range'") skip_whitespace(fh) if fh.read(1) != ">": error_msg("missing closing '>' for mode option '%s'" % identifier, fh) # Skipper code is to be generated later generator_function = { "skip_range": skip_range.do, "skip_nested_range": skip_nested_range.do, }[identifier] action = GeneratedCode(generator_function, FileName = fh.name, LineN = get_current_line_info_number(fh)) action.data["opener_sequence"] = opener_sequence action.data["closer_sequence"] = closer_sequence action.data["mode_name"] = new_mode.name fit_state_machine(opener_sm) # For skippers line and column counting detection is not really a topic # It is done in the skipper itself. opener_sm.side_info = SideInfo() new_mode.add_match(opener_str, action, opener_sm) return True elif identifier == "indentation": value = indentation_setup.do(fh) # Enter 'Newline' and 'Suppressed Newline' as matches into the engine. # Similar to skippers, the indentation count is then triggered by the newline. # -- Suppressed Newline = Suppressor followed by Newline, # then newline does not trigger indentation counting. suppressed_newline_pattern = "" if value.newline_suppressor_state_machine.get() != None: suppressed_newline_pattern = \ "(" + value.newline_suppressor_state_machine.pattern_str + ")" \ + "(" + value.newline_state_machine.pattern_str + ")" suppressed_newline_sm = \ sequentialize.do([value.newline_suppressor_state_machine.get(), value.newline_state_machine.get()]) FileName = value.newline_suppressor_state_machine.file_name LineN = value.newline_suppressor_state_machine.line_n # Go back to start. code_fragment = UserCodeFragment("goto %s;" % get_label("$start", U=True), FileName, LineN) suppressed_newline_sm = fit_state_machine(suppressed_newline_sm) # Analyze pattern for constant number of newlines, characters, etc. suppressed_newline_sm.side_info = SideInfo( character_counter.get_newline_n(suppressed_newline_sm), character_counter.get_character_n(suppressed_newline_sm)) new_mode.add_match(suppressed_newline_pattern, code_fragment, suppressed_newline_sm, Comment="indentation newline suppressor") # When there is an empty line, then there shall be no indentation count on it. # Here comes the trick: # # Let newline # be defined as: newline ([space]* newline])* # # This way empty lines are eating away before the indentation count is activated. # -- 'space' x0 = StateMachine() x0.add_transition(x0.init_state_index, value.indentation_count_character_set(), AcceptanceF=True) # -- '[space]*' x1 = repeat.do(x0) # -- '[space]* newline' x2 = sequentialize.do([x1, value.newline_state_machine.get()]) # -- '([space]* newline)*' x3 = repeat.do(x2) # -- 'newline ([space]* newline)*' x4 = sequentialize.do([value.newline_state_machine.get(), x3]) # -- nfa to dfa; hopcroft optimization sm = hopcroft.do(nfa_to_dfa.do(x4), CreateNewStateMachineF=False) FileName = value.newline_state_machine.file_name LineN = value.newline_state_machine.line_n action = GeneratedCode(indentation_counter.do, FileName, LineN) action.data["indentation_setup"] = value sm = fit_state_machine(sm) sm.side_info = SideInfo(character_counter.get_newline_n(sm), character_counter.get_character_n(sm)) new_mode.add_match(value.newline_state_machine.pattern_str, action, sm, Comment="indentation newline") # Announce the mode to which the setup belongs value.set_containing_mode_name(new_mode.name) else: value = read_option_value(fh) # The 'verify_word_in_list()' call must have ensured that the following holds assert lexer_mode.mode_option_info_db.has_key(identifier) # Is the option of the appropriate value? option_info = lexer_mode.mode_option_info_db[identifier] if option_info.domain != None and value not in option_info.domain: error_msg("Tried to set value '%s' for option '%s'. " % (Value, Option) + \ "Though, possible for this option are only: %s." % repr(oi.domain)[1:-1], fh) # Finally, set the option new_mode.add_option(identifier, value) return True
def get_combined_state_machine(StateMachine_List, FilterDominatedOriginsF=True): """Creates a DFA state machine that incorporates the paralell process of all pattern passed as state machines in the StateMachine_List. Each origins of each state machine are kept in the final state, if it is not dominated. Performs: -- parallelization -- translation from NFA to DFA -- Frank Schaefers Adapted Hopcroft optimization. Again: The state machine ids of the original state machines are traced through the whole process. FilterDominatedOriginsF, if set to False, can disable the filtering of dominated origins. This is important for pre-conditions, because, all successful patterns need to be reported! """ def __check(Place, sm): __check_on_orphan_states(Place, sm) __check_on_init_state_not_acceptance(Place, sm) def __check_on_orphan_states(Place, sm): orphan_state_list = sm.get_orphaned_state_index_list() if orphan_state_list == []: return error_msg("After '%s'" % Place + "\n" + \ "Orphaned state(s) detected in regular expression (optimization lack).\n" + \ "Please, log a defect at the projects website quex.sourceforge.net.\n" + \ "Orphan state(s) = " + repr(orphan_state_list) + "\n") def __check_on_init_state_not_acceptance(Place, sm): init_state = sm.get_init_state() if init_state.core().is_acceptance(): error_msg("After '%s'" % Place + "\n" + \ "The initial state is 'acceptance'. This should never appear.\n" + \ "Please, log a defect at the projects website quex.sourceforge.net.\n") if filter(lambda origin: origin.is_acceptance(), init_state.origins().get_list()) != []: error_msg("After '%s'" % Place + "\n" + \ "Initial state contains an origin that is 'acceptance'. This should never appear.\n" + \ "Please, log a defect at the projects website quex.sourceforge.net.\n") # (1) mark at each state machine the machine and states as 'original'. # # This is necessary to trace in the combined state machine the # pattern that actually matched. Note, that a state machine in # the StateMachine_List represents one possible pattern that can # match the current input. # map(lambda x: x.mark_state_origins(), StateMachine_List) for sm in StateMachine_List: assert sm.is_DFA_compliant(), repr(sm) # (2) setup all patterns in paralell sm = parallelize.do(StateMachine_List, CommonTerminalStateF=False)#, CloneF=False) __check("Parallelization", sm) # (3) convert the state machine to an DFA (paralellization created an NFA) sm = nfa_to_dfa.do(sm) __check("NFA to DFA", sm) # (4) determine for each state in the DFA what is the dominating original state if FilterDominatedOriginsF: sm.filter_dominated_origins() __check("Filter Dominated Origins", sm) # (5) perform hopcroft optimization # Note, that hopcroft optimization does consider the original acceptance # states when deciding if two state sets are equivalent. sm = hopcroft.do(sm) __check("Hopcroft Minimization", sm) return sm
def parse_mode_option(fh, new_mode): LanguageDB = Setup.language_db def fit_state_machine(SM): if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM) else: result = SM result = hopcroft.do(result, CreateNewStateMachineF=False) return result identifier = read_option_start(fh) if identifier == None: return False verify_word_in_list(identifier, lexer_mode.mode_option_info_db.keys(), "mode option", fh.name, get_current_line_info_number(fh)) if identifier == "skip": # A skipper 'eats' characters at the beginning of a pattern that belong # to a specified set of characters. A useful application is most probably # the whitespace skipper '[ \t\n]'. The skipper definition allows quex to # implement a very effective way to skip these regions. pattern_str, trigger_set = regular_expression.parse_character_set( fh, PatternStringF=True) skip_whitespace(fh) if fh.read(1) != ">": error_msg("missing closing '>' for mode option '%s'." % identifier, fh) if trigger_set.is_empty(): error_msg("Empty trigger set for skipper." % identifier, fh) # TriggerSet skipping is implemented the following way: As soon as one element of the # trigger set appears, the state machine enters the 'trigger set skipper section'. # Enter the skipper as if the opener pattern was a normal pattern and the 'skipper' is the action. # NOTE: The correspondent CodeFragment for skipping is created in 'implement_skippers(...)' pattern_sm = StateMachine() pattern_sm.add_transition(pattern_sm.init_state_index, trigger_set, AcceptanceF=True) # Skipper code is to be generated later action = GeneratedCode(skip_character_set.do, FileName=fh.name, LineN=get_current_line_info_number(fh)) action.data["character_set"] = trigger_set pattern_sm = fit_state_machine(pattern_sm) # For skippers line and column counting detection is not really a topic # It is done in the skipper itself. pattern_sm.side_info = SideInfo() new_mode.add_match(pattern_str, action, pattern_sm) return True elif identifier in ["skip_range", "skip_nested_range"]: # A non-nesting skipper can contain a full fledged regular expression as opener, # since it only effects the trigger. Not so the nested range skipper-see below. # -- opener skip_whitespace(fh) if identifier == "skip_nested_range": # Nested range state machines only accept 'strings' not state machines opener_str, opener_sequence = parse_string_constant( fh, "Opener pattern for 'skip_nested_range'") opener_sm = StateMachine() idx = opener_sm.init_state_index for letter in opener_sequence: idx = opener_sm.add_transition(idx, letter) opener_sm.states[idx].set_acceptance(True) else: opener_str, opener_sm = regular_expression.parse(fh) # For 'range skipping' the opener sequence is not needed, only the opener state # machine is webbed into the pattern matching state machine. opener_sequence = None skip_whitespace(fh) # -- closer closer_str, closer_sequence = parse_string_constant( fh, "Closing pattern for 'skip_range' or 'skip_nested_range'") skip_whitespace(fh) if fh.read(1) != ">": error_msg("missing closing '>' for mode option '%s'" % identifier, fh) # Skipper code is to be generated later generator_function = { "skip_range": skip_range.do, "skip_nested_range": skip_nested_range.do, }[identifier] action = GeneratedCode(generator_function, FileName=fh.name, LineN=get_current_line_info_number(fh)) action.data["opener_sequence"] = opener_sequence action.data["closer_sequence"] = closer_sequence action.data["mode_name"] = new_mode.name fit_state_machine(opener_sm) # For skippers line and column counting detection is not really a topic # It is done in the skipper itself. opener_sm.side_info = SideInfo() new_mode.add_match(opener_str, action, opener_sm) return True elif identifier == "indentation": value = indentation_setup.do(fh) # Enter 'Newline' and 'Suppressed Newline' as matches into the engine. # Similar to skippers, the indentation count is then triggered by the newline. # -- Suppressed Newline = Suppressor followed by Newline, # then newline does not trigger indentation counting. suppressed_newline_pattern = "" if value.newline_suppressor_state_machine.get() != None: suppressed_newline_pattern = \ "(" + value.newline_suppressor_state_machine.pattern_str + ")" \ + "(" + value.newline_state_machine.pattern_str + ")" suppressed_newline_sm = \ sequentialize.do([value.newline_suppressor_state_machine.get(), value.newline_state_machine.get()]) FileName = value.newline_suppressor_state_machine.file_name LineN = value.newline_suppressor_state_machine.line_n # Go back to start. code_fragment = UserCodeFragment( "goto %s;" % get_label("$start", U=True), FileName, LineN) suppressed_newline_sm = fit_state_machine(suppressed_newline_sm) # Analyze pattern for constant number of newlines, characters, etc. suppressed_newline_sm.side_info = SideInfo( character_counter.get_newline_n(suppressed_newline_sm), character_counter.get_character_n(suppressed_newline_sm)) new_mode.add_match(suppressed_newline_pattern, code_fragment, suppressed_newline_sm, Comment="indentation newline suppressor") # When there is an empty line, then there shall be no indentation count on it. # Here comes the trick: # # Let newline # be defined as: newline ([space]* newline])* # # This way empty lines are eating away before the indentation count is activated. # -- 'space' x0 = StateMachine() x0.add_transition(x0.init_state_index, value.indentation_count_character_set(), AcceptanceF=True) # -- '[space]*' x1 = repeat.do(x0) # -- '[space]* newline' x2 = sequentialize.do([x1, value.newline_state_machine.get()]) # -- '([space]* newline)*' x3 = repeat.do(x2) # -- 'newline ([space]* newline)*' x4 = sequentialize.do([value.newline_state_machine.get(), x3]) # -- nfa to dfa; hopcroft optimization sm = hopcroft.do(nfa_to_dfa.do(x4), CreateNewStateMachineF=False) FileName = value.newline_state_machine.file_name LineN = value.newline_state_machine.line_n action = GeneratedCode(indentation_counter.do, FileName, LineN) action.data["indentation_setup"] = value sm = fit_state_machine(sm) sm.side_info = SideInfo(character_counter.get_newline_n(sm), character_counter.get_character_n(sm)) new_mode.add_match(value.newline_state_machine.pattern_str, action, sm, Comment="indentation newline") # Announce the mode to which the setup belongs value.set_containing_mode_name(new_mode.name) else: value = read_option_value(fh) # The 'verify_word_in_list()' call must have ensured that the following holds assert lexer_mode.mode_option_info_db.has_key(identifier) # Is the option of the appropriate value? option_info = lexer_mode.mode_option_info_db[identifier] if option_info.domain != None and value not in option_info.domain: error_msg("Tried to set value '%s' for option '%s'. " % (Value, Option) + \ "Though, possible for this option are only: %s." % repr(oi.domain)[1:-1], fh) # Finally, set the option new_mode.add_option(identifier, value) return True