def get_transition_function_and_states_and_symbols_non_reduced(functions): states = set() symbols = set() start_state = State("Start") states.add(start_state) final_states = set() counter = 0 transition_function = NondeterministicTransitionFunction() linear_paths = get_all_linear_paths(functions) linear_paths = sorted(linear_paths, key=len) for linear_path in linear_paths: current_state = start_state for i, atom in enumerate(linear_path): symbol = Symbol(atom) symbols.add(symbol) next_state = State(str(counter)) states.add(next_state) counter += 1 transition_function.add_transition(current_state, symbol, next_state) current_state = next_state final_states.add(current_state) for final_state in final_states: transition_function.add_transition(final_state, finite_automaton.Epsilon(), start_state) return transition_function, states, symbols, final_states
def test_to_enfa0(self): """ Tests the transformation to a regex """ symb_a = finite_automaton.Symbol("a") symb_b = finite_automaton.Symbol("b") symb_c = finite_automaton.Symbol("c") epsilon = finite_automaton.Epsilon() regex = Regex("a|b") enfa = regex.to_epsilon_nfa() self.assertTrue(enfa.accepts([symb_a])) self.assertTrue(enfa.accepts([symb_b])) self.assertFalse(enfa.accepts([symb_c])) self.assertFalse(enfa.accepts([epsilon])) self.assertFalse(enfa.accepts([symb_a, symb_b])) regex = Regex("a b") enfa = regex.to_epsilon_nfa() self.assertFalse(enfa.accepts([symb_a])) self.assertFalse(enfa.accepts([symb_b])) self.assertTrue(enfa.accepts([symb_a, symb_b])) regex = Regex("a b c") enfa = regex.to_epsilon_nfa() self.assertFalse(enfa.accepts([symb_a, symb_b])) self.assertTrue(enfa.accepts([symb_a, symb_b, symb_c])) self.assertFalse(enfa.accepts([symb_a, symb_b, symb_a])) regex = Regex("(a b)|c") enfa = regex.to_epsilon_nfa() self.assertTrue(enfa.accepts([symb_a, symb_b])) self.assertFalse(enfa.accepts([symb_a, symb_c])) self.assertFalse(enfa.accepts([symb_b, symb_c])) self.assertTrue(enfa.accepts([symb_c])) regex = Regex("") enfa = regex.to_epsilon_nfa() self.assertFalse(enfa.accepts([symb_a])) self.assertFalse(enfa.accepts([symb_b])) self.assertFalse(enfa.accepts([symb_c])) self.assertFalse(enfa.accepts([])) regex = Regex("a*") enfa = regex.to_epsilon_nfa() self.assertTrue(enfa.accepts([symb_a])) self.assertTrue(enfa.accepts([])) self.assertTrue(enfa.accepts([symb_a, symb_a])) self.assertTrue(enfa.accepts([symb_a, symb_a, symb_a]))
def get_folded_automaton(enfa): in_edges = dict() out_edges = dict() processed = set() to_process = [] final_states = enfa.get_final_states() start_states = enfa.get_start_states() alphabet = enfa.get_symbols() states = enfa.get_states() star = Symbol("STAR") for state in states: in_edges[state] = dict() out_edges[state] = dict() for a in alphabet: in_edges[state][a] = [] out_edges[state][a] = [] # Construction for from_state in states: for symbol in alphabet: for next_state in enfa(from_state, symbol): if from_state not in in_edges[next_state][symbol]: in_edges[next_state][symbol].append(from_state) if next_state not in out_edges[from_state][symbol]: out_edges[from_state][symbol].append(next_state) # Initialization # From final state for final_state in final_states: if not in_edges[final_state][star]: continue first = in_edges[final_state][star][0] for relation in in_edges[first]: if relation == star: continue inverse_relation = Symbol(get_inverse_relation(relation.value)) for previous_state in in_edges[first][relation]: for start_state in start_states: for next_state in out_edges[start_state].get( inverse_relation, []): # previous_state -- relation --> first -- STAR --> # final_state -- epsilon --> start_state -- inverse --> # next_state to_process.append((previous_state, next_state)) processed.add((previous_state, next_state)) # From other states for state in states: for relation in out_edges[state]: if relation == star: continue inverse = Symbol(get_inverse_relation(relation.value)) for middle_state in out_edges[state][relation]: for middle_state2 in out_edges[middle_state][star]: if inverse not in out_edges[middle_state2]: continue for next_state in out_edges[middle_state2][inverse]: to_process.append((state, next_state)) processed.add((state, next_state)) while to_process: first, second = to_process.pop() # first -- epsilon --> second if first == second: continue # Special cases! if first in start_states: for final_state in final_states: if (final_state, second) not in processed: processed.add((final_state, second)) to_process.append((final_state, second)) if second in final_states: for start_state in start_states: if (first, start_state) not in processed: processed.add((first, start_state)) to_process.append((first, start_state)) # Reduction only if between two stars if len(out_edges[second][star]) == 0 or \ len(in_edges[first][star]) == 0: continue # Only one possibility normally first = in_edges[first][star][0] second = out_edges[second][star][0] # Apply L -> a- L a for a in out_edges[second]: if a == star: continue for state in out_edges[second][a]: # second -- a ---> state opposite = Symbol(get_inverse_relation(a.value)) if opposite in alphabet: for begin in in_edges[first][opposite]: # begin -- a- ---> first if (begin, state) not in processed: to_process.append((begin, state)) processed.add((begin, state)) # Apply L -> L * L for next_state in out_edges[second][star]: for state in states: if (next_state, state) in processed: if (first, state) not in processed: processed.add((first, state)) to_process.append((first, state)) for previous_state in in_edges[first][star]: for state in states: if (state, previous_state) in processed: if (state, second) not in processed: processed.add((state, second)) to_process.append((state, second)) new_enfa = enfa.copy() epsilon = finite_automaton.Epsilon() for first, second in processed: new_enfa.add_transition(first, epsilon, second) # For conveniant reasons, we also transform L -> * L, when not final... if second not in final_states: for previous_star in in_edges[first][star]: new_enfa.add_transition(previous_star, epsilon, second) return new_enfa
def _add_epsilon_transition_in_enfa_between(self, state0, state1): self._enfa.add_transition(state0, finite_automaton.Epsilon(), state1)
def intersection(self, other: Any) -> "PDA": """ Gets the intersection of the current PDA with something else Equivalent to: >> pda and regex Parameters ---------- other : any The other part of the intersection Returns ---------- new_pda : :class:`~pyformlang.pda.PDA` The pda resulting of the intersection Raises ---------- NotImplementedError When intersecting with something else than a regex or a finite automaton """ if isinstance(other, Regex): other = other.to_epsilon_nfa().to_deterministic() elif isinstance(other, FiniteAutomaton): if not other.is_deterministic(): other = other.to_deterministic() else: raise NotImplementedError start_state_other = other.start_states if len(start_state_other) == 0: return PDA() pda_state_converter = _PDAStateConverter(self._states, other.states) start_state_other = list(start_state_other)[0] final_state_other = other.final_states start = pda_state_converter.to_pda_combined_state( self._start_state, start_state_other) pda = PDA(start_state=start, start_stack_symbol=self._start_stack_symbol) symbols = self._input_symbols.copy() symbols.add(Epsilon()) to_process = [(self._start_state, start_state_other)] processed = {(self._start_state, start_state_other)} while to_process: state_in, state_dfa = to_process.pop() if (state_in in self._final_states and state_dfa in final_state_other): pda.add_final_state( pda_state_converter.to_pda_combined_state( state_in, state_dfa)) for symbol in symbols: if symbol == Epsilon(): symbol_dfa = finite_automaton.Epsilon() else: symbol_dfa = finite_automaton.Symbol(symbol.value) if symbol == Epsilon(): next_states_dfa = [state_dfa] else: next_states_dfa = other(state_dfa, symbol_dfa) if len(next_states_dfa) == 0: continue for stack_symbol in self._stack_alphabet: next_states_self = self._transition_function( state_in, symbol, stack_symbol) for next_state, next_stack in next_states_self: for next_state_dfa in next_states_dfa: pda.add_transition( pda_state_converter.to_pda_combined_state( state_in, state_dfa), symbol, stack_symbol, pda_state_converter.to_pda_combined_state( next_state, next_state_dfa), next_stack) if (next_state, next_state_dfa) not in processed: to_process.append((next_state, next_state_dfa)) processed.add((next_state, next_state_dfa)) return pda
def intersection(self, other: Any) -> "PDA": """ Gets the intersection of the language L generated by the \ current PDA when accepting by final state with something else Currently, it only works for regular languages (represented as \ regular expressions or finite automata) as the intersection \ between two PDAs is not context-free (it cannot be represented \ with a PDA). Equivalent to: >> pda and regex Parameters ---------- other : any The other part of the intersection Returns ---------- new_pda : :class:`~pyformlang.pda.PDA` The pda resulting of the intersection Raises ---------- NotImplementedError When intersecting with something else than a regex or a finite automaton """ if isinstance(other, regular_expression.Regex): enfa = other.to_epsilon_nfa() other = enfa.to_deterministic() elif isinstance(other, FiniteAutomaton): is_deterministic = other.is_deterministic() if not is_deterministic: other = other.to_deterministic() else: raise NotImplementedError start_state_other = other.start_states if len(start_state_other) == 0: return PDA() pda_state_converter = _PDAStateConverter(self._states, other.states) start_state_other = list(start_state_other)[0] final_state_other = other.final_states start = pda_state_converter.to_pda_combined_state(self._start_state, start_state_other) pda = PDA(start_state=start, start_stack_symbol=self._start_stack_symbol) symbols = self._input_symbols.copy() symbols.add(Epsilon()) to_process = [(self._start_state, start_state_other)] processed = {(self._start_state, start_state_other)} while to_process: state_in, state_dfa = to_process.pop() if (state_in in self._final_states and state_dfa in final_state_other): pda.add_final_state( pda_state_converter.to_pda_combined_state(state_in, state_dfa)) for symbol in symbols: if symbol == Epsilon(): symbol_dfa = finite_automaton.Epsilon() else: symbol_dfa = finite_automaton.Symbol(symbol.value) if symbol == Epsilon(): next_states_dfa = [state_dfa] else: next_states_dfa = other(state_dfa, symbol_dfa) if len(next_states_dfa) == 0: continue for stack_symbol in self._stack_alphabet: next_states_self = self._transition_function(state_in, symbol, stack_symbol) for next_state, next_stack in next_states_self: for next_state_dfa in next_states_dfa: pda.add_transition( pda_state_converter.to_pda_combined_state( state_in, state_dfa), symbol, stack_symbol, pda_state_converter.to_pda_combined_state( next_state, next_state_dfa), next_stack) if (next_state, next_state_dfa) not in processed: to_process.append((next_state, next_state_dfa)) processed.add((next_state, next_state_dfa)) return pda