def __cut_begin_core(A, B, SearchBeginList=None): """RETURN: [0] Resulting DFA [1] True, if cut has been performed; False else. If no cut has been performed, then 'A' is returned as is. """ A.assert_consistency() B.assert_consistency() if B.is_Empty(): return A, False work_list = WorkList() result = DFA(InitStateIndex=work_list.get_result_si( A.init_state_index, None, None), AcceptanceF=A.states[A.init_state_index].is_acceptance()) epsilon_transition_set = __together_walk(work_list, A, B, result) # No cut => return original DFA if epsilon_transition_set is None: return A, False __tail_walk(work_list, A, result) result.delete_hopeless_states() return __implement_epsilon_transitions(result, A, epsilon_transition_set)
def do(sh): """Converts a uni-code string into a state machine that parses its letters sequentially. Each state in the sequence correponds to the sucessful triggering of a letter. Only the last state, though, is an acceptance state. Any bailing out before is 'not accepted'. Example: "hey" is translated into the state machine: (0)-- 'h' -->(1)-- 'e' -->(2)-- 'y' --> ACCEPTANCE | | | FAIL FAIL FAIL Note: The state indices are globally unique. But, they are not necessarily 0, 1, 2, ... """ assert sh.__class__.__name__ == "StringIO" \ or sh.__class__.__name__ == "file" # resulting state machine result = DFA() state_idx = result.init_state_index # Only \" is a special character '"', any other backslashed character # remains as the sequence 'backslash' + character for char_code in get_character_code_sequence(sh): state_idx = result.add_transition(state_idx, char_code) # when the last state has trigger it is supposed to end up in 'acceptance' result.states[state_idx].set_acceptance() return result
def uniqueness(A): """Uniqueness of complement: A u B = Universal and A n B = Empty => A = complement B and vice versa Involution: A = complement(complement(A)) """ global count B = difference(DFA.Universal(), A) # => A u B = Universal and A n B = Empty assert identity(union([A, B]), DFA.Universal()) assert identity(intersection([A, B]), DFA.Empty()) # Uniqueness of complement assert identity(A, complement(B)) assert identity(B, complement(A)) # Involution/Double Complement assert identity(A, complement(complement(A))) assert identity(B, complement(complement(B))) count += 1
def __clone_until_acceptance(Dfa, StartSi): """Make a new DFA from the graph between the given 'StartSi' to the until an acceptance state is reached. Walks from a given 'StartSi' along all paths until an acceptance state is reached. RETURNS: DFA containing the graph. """ correspondance_db = {si: state_index.get() for si in Dfa.states} result = DFA(InitStateIndex=correspondance_db[StartSi], AcceptanceF=Dfa.states[StartSi].is_acceptance()) work_set = set([StartSi]) done_set = set([StartSi]) while work_set: si = work_set.pop() state = Dfa.states[si] if si == Dfa.init_state_index: result_state = result.get_init_state() target_si_iterable = state.target_map.get_target_state_index_list() elif not state.is_acceptance(): result_state = state.clone(correspondance_db) target_si_iterable = state.target_map.get_target_state_index_list() else: result_state = DFA_State() result_state.set_acceptance() target_si_iterable = [] work_set.update(target_si for target_si in target_si_iterable if target_si not in done_set) result.states[correspondance_db[si]] = result_state return result
def test_on_UCS_range(Trafo, Source, Drain, CharacterBackwardTrafo): sm = DFA() acc_db = {} for x in range(Source.begin, Source.end): ti = sm.add_transition(sm.init_state_index, x, AcceptanceF=True) acc_id = len(acc_db) sm.states[ti].mark_acceptance_id(acc_id) acc_db[x] = acc_id if Setup.bad_lexatom_detection_f: acc_db[None] = E_IncidenceIDs.BAD_LEXATOM else: acc_db[None] = None state_n_before, result = transform(Trafo, sm) # assert state_n_before == len(result.states) init_state = result.get_init_state() count = 0 for y in range(Drain.begin, Drain.end): # Translate character into x = CharacterBackwardTrafo(y) # Transit on the translated charater ti = init_state.target_map.get_resulting_target_state_index(y) # Compare resulting state with the expected state's acceptance assert_only_acceptance_id(sm.states, ti, acc_db, x, y) count += 1 print "<terminated: %i transitions ok>" % count
def test(TestString): print "expression = \"" + TestString + "\"" sm = DFA() try: trigger_set = character_set.do(StringIO(TestString + "]")) sm.add_transition(sm.init_state_index, trigger_set, AcceptanceF=True) print "state machine\n", sm except RegularExpressionException, x: print repr(x)
def complement_laws(A): global count first = union([A.clone(), complement(A.clone())]) assert identity(first, DFA.Universal()) first = intersection([A.clone(), complement(A.clone())]) assert identity(first, DFA.Empty()) count += 1
def domination(A): global count first = union([A, DFA.Universal()]) assert identity(first, DFA.Universal()) first = intersection([A, DFA.Empty()]) assert identity(first, DFA.Empty()) count += 1
def identity_vs_empty_and_universal(A): global count count += 1 # if count != 3: return first = union([A.clone(), DFA.Empty()]) assert identity(first, A.clone()) first = intersection([A.clone(), DFA.Universal()]) assert identity(first, A)
def __init__(self, Name): sh = StringIO("[:\\P{Script=%s}:]" % Name) self.name = Name self.charset = regex.snap_set_expression(sh, {}) self.sm = DFA() self.sm.add_transition(self.sm.init_state_index, self.charset, AcceptanceF=True) self.id = self.sm.get_id()
def snap_non_control_character(stream, PatternDict): __debug_entry("non-control characters", stream) # (*) read first character char_code = utf8.__read_one_utf8_code_from_stream(stream) if char_code is None: error.log( "Character could not be interpreted as UTF8 code or End of File reached prematurely.", stream) result = DFA() result.add_transition(result.init_state_index, char_code, AcceptanceF=True) return __debug_exit(result, stream)
def test(LoopMap, ColumnNPerCodeUnit): global dial_db Setup.buffer_encoding.source_set = NumberSet_All() # Generate sample state machines from what the loop map tells. appendix_sm_list = _get_appendix_sm_list(LoopMap) UserOnLoopExitDoorId = dial_db.new_door_id() events = loop.LoopEvents(ColumnNPerCodeUnit, None, UserOnLoopExitDoorId) config = loop.LoopConfig(ColumnNPerCodeUnit = ColumnNPerCodeUnit, LexemeEndCheckF = False, EngineType = engine.FORWARD, ReloadStateExtern = None, UserOnLoopExitDoorId = UserOnLoopExitDoorId, dial_db = dial_db, OnReloadFailureDoorId = None, ModeName = "M", Events = events) config.iid_loop_after_appendix_drop_out = dial.new_incidence_id() loop_sm = DFA.from_IncidenceIdMap( (lei.character_set, lei.iid_couple_terminal) for lei in LoopMap ) analyzer_list, \ door_id_loop = analyzer_construction.do(loop_sm, appendix_sm_list, config, True) print_this(analyzer_list)
def from_character_set(CharacterSet, StateMachineId, Sr, LCCI=None, PatternString="<character set>"): return Pattern(DFA.from_character_set(CharacterSet, StateMachineId), PreContextSmToBeReversed = None, BipdSmTobeReversed = None, LCCI = LCCI, PatternString = PatternString, Sr = Sr)
def _get_state_machine_vs_terminal_bad_indentation(BadSpaceCharacterSet, IncidenceDb, dial_db): """Generate state machine that detects the 'bad indentation character'. Generate terminal that emboddies the defined 'bad indentation character handler' from the incidence_dab. RETURNS: [0] state machine [1] terminal """ sm = DFA.from_character_set(BadSpaceCharacterSet, E_IncidenceIDs.INDENTATION_BAD) on_bad_indentation_txt = "".join([ "%s\n" % Lng.RAISE_ERROR_FLAG_BY_INCIDENCE_ID(E_IncidenceIDs.INDENTATION_BAD), Lng.SOURCE_REFERENCED(IncidenceDb[E_IncidenceIDs.INDENTATION_BAD]) ]) code = Lng.ON_BAD_INDENTATION(on_bad_indentation_txt, E_IncidenceIDs.INDENTATION_BAD, dial_db) terminal = loop.MiniTerminal(code, "<INDENTATION BAD INDENTATION CHARACTER>", E_IncidenceIDs.INDENTATION_BAD) return sm, terminal
def create_ALL_BUT_NEWLINE_state_machine(stream): global Setup result = DFA() # NOTE: Buffer control characters are supposed to be filtered out by the code # generator. trigger_set = NumberSet(Interval(ord("\n"))).get_complement( Setup.buffer_encoding.source_set) if trigger_set.is_empty(): error.log( "The set of admissible characters contains only newline.\n" "The '.' for 'all but newline' is an empty set.", SourceRef.from_FileHandle(stream)) result.add_transition(result.init_state_index, trigger_set, AcceptanceF=True) return result
def snap_character_set_expression(stream, PatternDict): # GRAMMAR: # # set_expression: # [: set_term :] # traditional character set # \P '{' propperty string '}' # '{' identifier '}' # # set_term: # "alnum" # "alpha" # "blank" # "cntrl" # "digit" # "graph" # "lower" # "print" # "punct" # "space" # "upper" # "xdigit" # "union" '(' set_term [ ',' set_term ]+ ')' # "intersection" '(' set_term [ ',' set_term ]+ ')' # "difference" '(' set_term [ ',' set_term ]+ ')' # "complement" '(' set_term ')' # set_expression # trigger_set = snap_set_expression(stream, PatternDict) if trigger_set is None: error.log("Regular Expression: snap_character_set_expression called for something\n" + \ "that does not start with '[:', '[' or '\\P'", stream) elif trigger_set.is_empty(): error.warning( "Regular Expression: Character set expression results in empty set.", stream) # Create state machine that triggers with the trigger set to SUCCESS # NOTE: The default for the ELSE transition is FAIL. sm = DFA() sm.add_transition(sm.init_state_index, trigger_set, AcceptanceF=True) return __debug_exit(sm, stream)
def do(StateMachineList, CommonTerminalStateF=True): """Connect state machines paralell. CommonTerminalStateF tells whether the state machines shall trigger to a common terminal. This may help nfa-to-dfa or hopcroft minimization for ISOLATED patterns. A state machine that consists of the COMBINATION of patterns MUST set this flag to 'False'. """ assert len(StateMachineList) != 0 def consider(sm): return not sm.is_Empty() and sm.get_init_state().has_transitions() # filter out empty state machines from the consideration sm_list = [ sm for sm in StateMachineList if consider(sm) ] empty_sm_list = [ sm for sm in StateMachineList if not consider(sm) ] if len(sm_list) < 2: if len(sm_list) < 1: result = DFA() else: result = sm_list[0] return __consider_empty_state_machines(result, empty_sm_list) # (*) collect all transitions from both state machines into a single one result = DFA() init_state = result.get_init_state() # Connect from the new initial state to the initial states of the # sms via epsilon transition. # Connect from each success state of the sms to the new terminal # state via epsilon transition. if __nfa_to_dfa_required(sm_list): for sm in sm_list: result.states.update(sm.states) init_state.target_map.add_epsilon_target_state(sm.init_state_index) result = nfa_to_dfa.do(result) else: # Set the 'single_entry' operations. init_state.set_single_entry(sm_list[0].get_init_state().single_entry.clone()) # Add transitions to the states. for sm in sm_list: init_state.target_map.update(sm.get_init_state().target_map) # not __nfa_to_dfa_required(...) # => No transition to an an init state. # => Original init states can be taken out. result.states.update( (si, state) for si, state in sm.states.iteritems() if si != sm.init_state_index ) result.assert_consistency() #if CommonTerminalStateF: # __combine_transitionless_acceptance_states(result) return __consider_empty_state_machines(result, empty_sm_list)
def __do(SM): """Creates a state machine that matches the reverse of what 'SM' matches. """ result = DFA(InitStateIndex=SM.init_state_index) original_acceptance_state_index_list = SM.get_acceptance_state_index_list() if len(original_acceptance_state_index_list) == 0: # If there is no acceptance state in a state machine, the state machine # cannot match any pattern, it is equivalent to '\Empty'. The reverse # of \Empty is \Empty. return DFA.Empty() # Ensure that each target state index has a state inside the state machine for state_index in SM.states.keys(): result.create_new_state(StateIdx=state_index) for state_index, state in SM.states.items(): for target_state_index, trigger_set in state.target_map.get_map( ).items(): result.states[target_state_index].add_transition( trigger_set.clone(), state_index) for target_state_index in state.target_map.get_epsilon_target_state_index_list( ): result.states[ target_state_index].target_map.add_epsilon_target_state( state_index) # -- copy all origins of the original state machine # -- We need to cancel any acceptance, because the inverted engine now starts # from a combination of the acceptance states and ends at the initial state. for state_index, state in SM.states.items(): result.states[state_index].single_entry.set( cmd.clone() for cmd in state.single_entry if cmd.__class__ != SeAccept) # deepcopy implicit # -- only the ORIGINAL initial state becomes an acceptance state (end of inverse) result.states[SM.init_state_index].set_acceptance(True) # -- setup an epsilon transition from an new init state to all previous # acceptance states. new_init_state_index = result.create_new_init_state() for state_index in original_acceptance_state_index_list: result.add_epsilon_transition(new_init_state_index, state_index) # -- for uniqueness of state ids, clone the result return result.clone()
def generate_sm_for_boarders(Boarders, Trafo): sm = DFA() for ucs_char in Boarders: target_idx = index.get() sms.line(sm, sm.init_state_index, (ucs_char, target_idx), (ucs_char, target_idx)) sm.states[target_idx].set_acceptance() Trafo.adapt_ranges_to_lexatom_type_range(Setup.lexatom.type_range) verdict_f, result = Trafo.do_state_machine(sm) assert verdict_f return result
def create_range_skipper_code(Language, TestStr, CloserSequence, QuexBufferSize=1024, CommentTestStrF=False, ShowPositionF=False, CounterPrintF=True): assert QuexBufferSize >= len(CloserSequence) + 2 end_str = __prepare(Language) sm_close = DFA.from_sequence(CloserSequence) closer_pattern = Pattern_Prep(sm_close, PatternString="<skip range closer>", Sr=SourceRef_VOID) door_id_exit = DoorID.continue_without_on_after_match(dial_db) analyzer_list, \ terminal_list, \ required_register_set, \ run_time_counter_f = range_skipper.do("MrUnitTest", CaMap = LineColumnCount_Default(), CloserPattern = closer_pattern, DoorIdExit = door_id_exit, ReloadState = FSM.reload_state, dial_db = dial_db) loop_code = generator.do_analyzer_list(analyzer_list) assert not run_time_counter_f __require_variables(required_register_set) loop_code.extend( generator.do_terminals(terminal_list, TheAnalyzer=None, dial_db=dial_db)) result = create_customized_analyzer_function(Language, TestStr, loop_code, QuexBufferSize, CommentTestStrF, ShowPositionF, end_str, SkipUntilMarkerSet=[], LocalVariableDB=deepcopy( variable_db.get()), DoorIdOnSkipRangeOpenF=True, CounterPrintF=CounterPrintF) result = language_defines + result result = result.replace("$$TEST_ANALYZER_DIR$$", test_analyzer_dir(Language)) result = result.replace("$$COMPUTED_GOTOS_CHECK$$", computed_gotos_check_str()) return result
def unary_checks(Q, operation): Q_plus = beautifier.do(repeat.do(Q)) Q_star = beautifier.do(repeat.do(Q, min_repetition_n=0)) Q_is_Q_star = identity.do(Q, Q_star) Q_is_Q_plus = identity.do(Q, Q_plus) # \Cut{Q Q} = \Nothing y = operation(Q, Q) assert y.is_Nothing() # if Q != Q+: \CutBegin{Q+ Q} = Q* if not Q_is_Q_plus: y = operation(Q_plus, Q) assert identity.do(y, Q_star) # if Q != Q*: \CutBegin{Q* Q} = Q* if not Q_is_Q_star: y = operation(Q_star, Q) assert identity.do(y, Q_star) # \Cut{Q \Nothing} = Q y = operation(Q, DFA.Nothing()) assert identity.do(y, Q) # \Cut{\Nothing Q} = \Nothing y = operation(DFA.Nothing(), Q) assert y.is_Nothing() # \Cut{Q \Universal} = \Nothing y = operation(Q, DFA.Universal()) assert y.is_Nothing() # NOT: \Cut{\Universal Q} = \Universal if not Q_is_Q_star and not Q_is_Q_plus: y = operation(Q, DFA.Universal()) assert y.is_Nothing() return Q_star, Q_plus
def test(A_str): print "_____________________________________________________________________" if isinstance(A_str, (str, unicode)): print("A = " + A_str).replace("\n", "\\n").replace("\t", "\\t") sm = regex.do(A_str, {}).extract_sm() else: sm = A_str print "A = ", sm ## print "##sm:", sm.get_string(NormalizeF=False) result_1st = complement.do(sm) print "complement(A):", result_1st # .get_string(NormalizeF=False) result_2nd = complement.do(result_1st) ## print "##2nd:", result_2nd.get_string(NormalizeF=False) print print "union(A, complement(A)): All =", DFA.is_Universal( union.do([sm, result_1st])) print "intersection(A, complement(A)): None =", DFA.is_Empty( intersection.do([sm, result_1st])) print "identity(A, complement(complement(A)):", identity.do(sm, result_2nd) assert not commonality(sm, result_1st) assert not commonality(result_1st, result_2nd)
class X: def __init__(self, Name): sh = StringIO("[:\\P{Script=%s}:]" % Name) self.name = Name self.charset = regex.snap_set_expression(sh, {}) self.sm = DFA() self.sm.add_transition(self.sm.init_state_index, self.charset, AcceptanceF=True) self.id = self.sm.get_id() def check(self, SM, TransformFunc): """This function throws an exception as soon as one single value is not matched according to the expectation. """ print "## [%i] Name = %s" % (self.id, self.name), interval_list = self.charset.get_intervals(PromiseToTreatWellF=True) interval_count = len(interval_list) for interval in interval_list: for i in range(interval.begin, interval.end): lexatom_seq = TransformFunc(i) # Apply sequence to state machine state = SM.apply_sequence(lexatom_seq) if state is None: error(self.sm, SM, lexatom_seq) # All acceptance flags must belong to the original state machine acceptance_id_list = [ cmd.acceptance_id() for cmd in state.single_entry.get_iterable(SeAccept) ] if acceptance_id_list and self.id not in acceptance_id_list: print eval("u'\U%08X'" % i) print "#Seq: ", ["%02X" % x for x in lexatom_seq] print "#acceptance-ids:", acceptance_id_list error(self.sm, SM, lexatom_seq) print " (OK=%i)" % interval_count
def do(sm): """Sanitization: .--------------------------------------------------------. | A DFA that has no acceptance states *cannot be healed* | | by this function. | '--------------------------------------------------------' This operation tries to transform a DFA into something that is admissible. Two lexemes are inadmissible and therefore their matching DFAs are inadmissible. They are: (i) The zero-length lexeme. It triggers on nothing, the lexer remains at the same position while permanently accepting the same position. (ii) The endless lexeme. It triggers an infinite number of lexatoms. The first is healed by removing acceptance from the init state. The second is healed by considering states that accept and transit to itself on any character. In that case, the transition on any character to itself is removed. Noteably, the 'Empty' DFA and the 'Universal' DFA cannot be healed. The former case is obvious. The 'Universal' DFA has only one initial state on which it accepts. Through the admissibility removal it has no further acceptance states. """ init_state = sm.get_init_state() # (i) Acceptance in init state => remove acceptance if init_state.is_acceptance(): init_state.set_acceptance(False) # (ii) Infinite iteration on any input => remove transition to itself. for state_index, state in sm.states.iteritems(): if DFA.is_AcceptAllState(sm, state_index): state.target_map.clear() sm.delete_hopeless_states() return sm
def do(SM_List): for sm in SM_List: sm.assert_consistency() if any(sm.is_Empty() for sm in SM_List): # If one state machine is '\Empty', return DFA.Empty() # then the intersection is '\Empty'. init_state_setup = tuple(sm.init_state_index for sm in SM_List) result = DFA(AcceptanceF=intersect_acceptance(init_state_setup, SM_List)) # Result state setup: A result state is setup out of a state from each DFA. # state_setup[i] is the state from DFA 'SM_List[i]'. worklist = [ (result.init_state_index, init_state_setup) ] state_setup_db = {} N = len(SM_List) while worklist: state_index, state_setup = worklist.pop() # Generate Map that shows what lexatoms trigger to what state combination. # # NumberSet Target DFA_State Combination # [0:23] --> [ State1, State24, State56 ] # [0:23] --> [ State5, State21, State55 ] # [24:60] --> [ State1, State23, State51 ] # # 'get_intersection_line_up()' only delivers those transitions where there # is a transition for each state machine's state. line_up = get_intersection_line_up([SM_List[i].states[si].target_map for i, si in enumerate(state_setup)]) for target_state_setup, trigger_set in line_up.iteritems(): assert len(target_state_setup) == N target_index, new_f = state_index_for_combination(state_setup_db, target_state_setup) acceptance_f = intersect_acceptance(target_state_setup, SM_List) result.add_transition(state_index, trigger_set, target_index, AcceptanceF = acceptance_f) if new_f: worklist.append((target_index, target_state_setup)) result.delete_hopeless_states() return result
def get_sm_shape_by_name(Name): sm = DFA(InitStateIndex=0L) if Name == "linear": sm, state_n, pic = get_linear(sm) elif Name == "butterfly": sm, state_n, pic = get_butterfly(sm) elif Name == "long_loop": sm, state_n, pic = get_long_loop(sm) elif Name == "nested_loop": sm, state_n, pic = get_nested_loop(sm) elif Name == "mini_loop": sm, state_n, pic = get_mini_loop(sm) elif Name == "fork": sm, state_n, pic = get_fork(sm) elif Name == "fork2": sm, state_n, pic = get_fork2(sm) elif Name == "fork3": sm, state_n, pic = get_fork3(sm) elif Name == "fork4": sm, state_n, pic = get_fork4(sm) elif Name == "mini_bubble": sm, state_n, pic = get_mini_bubble(sm) elif Name == "bubble": sm, state_n, pic = get_bubble(sm) elif Name == "bubble2": sm, state_n, pic = get_bubble2(sm) elif Name == "bubble2b": sm, state_n, pic = get_bubble2b(sm) elif Name == "bubble3": sm, state_n, pic = get_bubble3(sm) elif Name == "bubble4": sm, state_n, pic = get_bubble4(sm) elif Name == "mini_join": sm, state_n, pic = get_mini_join(sm) elif Name == "DEBUG": sm, state_n, pic = get_DEBUG(sm) else: sm, state_n, pic = get_tree(sm) return sm, state_n, pic
def get_transition_function(iid_map, Codec): global dial_db if Codec == "UTF8": Setup.buffer_setup("uint8_t", 1, "utf8") else: Setup.buffer_setup("uint32_t", 4, "none") Setup.bad_lexatom_detection_f = False sm = DFA.from_IncidenceIdMap(iid_map) analyzer = analyzer_generator.do(sm, engine.CHARACTER_COUNTER, dial_db=dial_db, CutF=False) tm_txt = do_analyzer(analyzer) tm_txt = Lng.GET_PLAIN_STRINGS(tm_txt, dial_db=dial_db) tm_txt.append("\n") #label = dial_db.get_label_by_door_id(DoorID.incidence(E_IncidenceIDs.MATCH_FAILURE)) for character_set, iid in iid_map: tm_txt.append("%s return (int)%s;\n" % (Lng.LABEL(DoorID.incidence(iid, dial_db)), iid)) tm_txt.append("%s return (int)-1;\n" % Lng.LABEL(DoorID.drop_out(-1, dial_db))) return "".join(tm_txt)
def setup(EntryN, StateOperation): sm = DFA() examiner = Examiner(sm, RecipeAcceptance) si = 1111L setup_state_operation(sm, StateOperation, si) operation = sm.states[si].single_entry examiner.linear_db[sm.init_state_index] = LinearStateInfo() predecessor0_recipe = RecipeAcceptance( [SeAccept(0)], { E_IncidenceIDs.CONTEXT_FREE_MATCH: 0, 10L: -1, # same for both / no restore 11L: -2, # unequal for both 12L: E_Values.RESTORE, # same for both / restore same 13L: E_Values.RESTORE, # same for both / restore differs 21L: 0, # no present in other }, { (E_R.PositionRegister, 12L): 0, (E_R.PositionRegister, 13L): 0 })
def DFA_Newline(): """Creates a state machine matching newline according to what has been specified in the setup (Setup.dos_carriage_return_newline_f). That is, if is DOS newline then the state machine represents '\r\n' and if it is unix only, then it represents '\n'. If both is required they are implemented in parallel. RETURNS: DFA """ UnixF = True DosF = Setup.dos_carriage_return_newline_f NL = ord('\n') # (pure) newline, i.e. line feed CR = ord('\r') # carriage return dfa = DFA() if UnixF: dfa.add_transition(dfa.init_state_index, NL, AcceptanceF=True) if DosF: idx = dfa.add_transition(dfa.init_state_index, CR, AcceptanceF=False) dfa.add_transition(idx, NL, AcceptanceF=True) return beautifier.do(dfa)
def test_plug_sequence(ByteSequenceDB): L = len(ByteSequenceDB[0]) for seq in ByteSequenceDB: assert len(seq) == L for x in seq: assert isinstance(x, Interval) first_different_byte_index = -1 for i in range(L): x0 = ByteSequenceDB[0][i] for seq in ByteSequenceDB[1:]: if not seq[i].is_equal(x0): first_different_byte_index = i break if first_different_byte_index != -1: break if first_different_byte_index == -1: first_different_byte_index = 0 print "# Best To be Displayed by:" print "#" print "# > " + sys.argv[0] + " " + sys.argv[1] + " | dot -Tsvg -o tmp.svg" print "#" print "# -------------------------" print "# Byte Sequences: " i = -1 for seq in ByteSequenceDB: i += 1 print "# (%i) " % i, for x in seq: print " " + x.get_string(Option="hex"), print print "# L = %i" % L print "# DIdx = %i" % first_different_byte_index sm = DFA() end_index = state_machine.index.get() sm.states[end_index] = DFA_State() Setup.buffer_setup("", 1, "utf8") if Setup.bad_lexatom_detection_f: bad_lexatom_si = index.get() else: bad_lexatom_si = None trafo = Setup.buffer_encoding new_first_tm, \ new_state_db = trafo.plug_interval_sequences(sm.init_state_index, end_index, ByteSequenceDB, BadLexatomSi=bad_lexatom_si) if bad_lexatom_si is not None: new_first_tm[bad_lexatom_si] = trafo._error_range_by_code_unit_db[0] # Generate the 'bad lexatom accepter'. bad_lexatom_state = DFA_State(AcceptanceF=True) bad_lexatom_state.mark_acceptance_id(E_IncidenceIDs.BAD_LEXATOM) sm.states[bad_lexatom_si] = bad_lexatom_state first_tm = sm.get_init_state().target_map.get_map() if end_index in first_tm: del first_tm[end_index] first_tm.update(new_first_tm) sm.states.update(new_state_db) sm = beautifier.do(sm) if len(sm.get_orphaned_state_index_list()) != 0: print "Error: Orphaned States Detected!" # Double check, that there are no 'circles' predecessor_db = sm.get_predecessor_db() assert not any(si in predecessor_db[si] for si in sm.states) show_graphviz(sm)