def test_get_repr(self): regex0 = Regex("a*.(b|c)epsilon") regex_str = str(regex0) regex1 = Regex(regex_str) dfa0 = regex0.to_epsilon_nfa().to_deterministic().minimize() dfa1 = regex1.to_epsilon_nfa().to_deterministic().minimize() self.assertEqual(dfa0, dfa1)
def test_dfa_nfa_intersection(): nfa1 = NondeterministicFiniteAutomaton() state0 = finite_automaton.State(0) state1 = finite_automaton.State(1) state2 = finite_automaton.State(2) state3 = finite_automaton.State(3) nfa1.add_transition(state0, symb_a, state1) nfa1.add_transition(state0, symb_c, state2) nfa1.add_transition(state1, symb_a, state1) nfa1.add_transition(state1, symb_b, state2) nfa1.add_transition(state2, symb_a, state0) nfa1.add_transition(state0, symb_c, state3) nfa1.add_transition(state3, symb_a, state1) nfa1.add_start_state(state0) nfa1.add_final_state(state1) s = ("((((b.a)|(((a|(b.c))|(c.a)).(((b.c))*." + "(b.a)))))*.(((a|(b.c))|(c.a)).((b.c))*))") r = Regex(s) dfa = r.to_epsilon_nfa().to_deterministic().minimize() dnfa = dfa.get_intersection(nfa1) assert (dnfa.accepts([symb_a])) assert (dnfa.accepts([symb_c, symb_a])) assert (dnfa.accepts([symb_a, symb_b, symb_a, symb_a])) assert (not dnfa.accepts([symb_c, symb_b])) assert (not dnfa.accepts([symb_a, symb_a])) assert (not dnfa.accepts([symb_b]))
def test_intersection(self): """ Tests the intersection with a regex """ regex = Regex("a*b*") dfa = regex.to_epsilon_nfa() symb_a = Symbol("a") symb_b = Symbol("b") self.assertTrue(dfa.accepts([symb_a, symb_a, symb_b, symb_b])) self.assertFalse(dfa.accepts([symb_b, symb_b, symb_a])) ter_a = Terminal("a") ter_b = Terminal("b") var_s = Variable("S") productions = { Production(var_s, [ter_a, var_s, ter_b]), Production(var_s, [ter_b, var_s, ter_a]), Production(var_s, []) } cfg = CFG(productions=productions, start_symbol=var_s) self.assertTrue(cfg.contains([ter_a, ter_a, ter_b, ter_b])) self.assertFalse(cfg.contains([ter_a, ter_a, ter_b])) cfg_i = cfg.intersection(regex) self.assertTrue(cfg_i.contains([ter_a, ter_a, ter_b, ter_b])) self.assertFalse(cfg_i.contains([ter_a, ter_a, ter_b])) self.assertTrue(cfg_i.contains([])) cfg_i = cfg.intersection(dfa) self.assertTrue(cfg_i.contains([ter_a, ter_a, ter_b, ter_b])) self.assertFalse(cfg_i.contains([ter_a, ter_a, ter_b])) self.assertTrue(cfg_i.contains([]))
def _create_cfg_from_regex(cls, head: Variable, regex: Regex, variables=None) -> CFG: dfa = regex.to_epsilon_nfa().to_deterministic().minimize() transitions = dfa._transition_function._transitions state_to_var: Dict[State, Variable] = {} productions, terms, vars = set(), set(), set() for state in dfa.states: state_to_var[state] = Variable(f'{state}:{cls.__var_state_counter}') cls.__var_state_counter += 1 vars.update(state_to_var.values()) for start_state in dfa.start_states: productions.add(Production(head, [state_to_var[start_state]])) for state_from in transitions: for edge_symb in transitions[state_from]: state_to = transitions[state_from][edge_symb] current_prod_head = state_to_var[state_from] current_prod_body = [] if (not variables and edge_symb.value.isupper() or variables and edge_symb.value in variables): var = Variable(edge_symb.value) vars.add(var) current_prod_body.append(var) else: term = Terminal(edge_symb.value) terms.add(term) current_prod_body.append(term) current_prod_body.append(state_to_var[state_to]) productions.add(Production(current_prod_head, current_prod_body)) if state_to in dfa.final_states: productions.add(Production(state_to_var[state_to], [])) if not productions: return CFG(vars, terms, head, {Production(head, [])}) return CFG(vars, terms, head, productions)
def from_regex_file(path: str): file = open(path) regex = Regex(file.readline()) file.close() dfa: DeterministicFiniteAutomaton = regex.to_epsilon_nfa( ).to_deterministic().minimize() return from_dfa(dfa)
def test_regex_to_epsilon_nfa(): regex = Regex("(a|a a b)*") enfa = regex.to_epsilon_nfa() assert (len(enfa.states) == 12) assert (enfa.accepts([symb_a])) assert (enfa.accepts([symb_a, symb_a, symb_b, epsilon])) assert (not enfa.accepts([symb_c])) assert (enfa.accepts([epsilon])) assert (not enfa.accepts([symb_b, symb_a]))
def from_text(cls, text: List[str]): start_symbol = None eps_productions = [] productions_with_dfa = [] for line in text: raw_head, *raw_body = line.strip().split(' ', 1) regex = Regex(' '.join(raw_body).replace('eps', 'epsilon')) head = Variable(raw_head) if start_symbol is None: start_symbol = head if not raw_body: eps_productions.append(Production(head, [])) dfa: DeterministicFiniteAutomaton = regex.to_epsilon_nfa( ).to_deterministic().minimize() productions_with_dfa.append((head, dfa)) import wrappers.GraphWrapper rfa_graph = wrappers.GraphWrapper.empty() rfa_graph.matrix_size = sum( [len(dfa.states) for _, dfa in productions_with_dfa]) rfa_graph.vertices = set() empty_matrix = Matrix.sparse(types.BOOL, rfa_graph.matrix_size, rfa_graph.matrix_size) head_by_start_final_pair = {} total_states_counter = 0 for head, dfa in productions_with_dfa: transitions = dfa._transition_function._transitions num_by_state = {} for state in dfa.states: num_by_state[state] = total_states_counter total_states_counter += 1 rfa_graph.vertices.update(num_by_state.values()) for start_state in dfa.start_states: rfa_graph.start_states.add(num_by_state[start_state]) for final_state in dfa.final_states: rfa_graph.final_states.add(num_by_state[final_state]) head_by_start_final_pair[ num_by_state[dfa.start_state], num_by_state[final_state]] = head.value for state_from in transitions: for edge_symb in transitions[state_from]: state_to = transitions[state_from][edge_symb] matrix = rfa_graph.label_to_bool_matrix.setdefault( edge_symb, empty_matrix.dup()) matrix[num_by_state[state_from], num_by_state[state_to]] = True return cls(rfa_graph, head_by_start_final_pair, eps_productions, start_symbol)
def from_regex(self, filename): input_file = open(filename) regex = Regex(input_file.read().rstrip()) dfa = regex.to_epsilon_nfa().to_deterministic().minimize() self.n_vertices = len(dfa.states) state_renumeration = dict() i = 0 for state in dfa.states: state_renumeration[state] = i i += 1 for fro, label, to in dfa._transition_function.get_edges(): self.get_by_label(str(label))[state_renumeration[fro], state_renumeration[to]] = True self.start_vertices.add(state_renumeration[dfa.start_state]) for state in dfa.final_states: self.final_vertices.add(state_renumeration[state])
def str_to_graph(s): r = Regex(s) a = r.to_epsilon_nfa().minimize() start_states = list(a.start_states) final_states = list(a.final_states) g = a.to_networkx() g2 = nx.convert_node_labels_to_integers(g, ordering="sorted") d = {} i = 0 for node in sorted(g.nodes): d[node] = sorted(g2.nodes)[i] i += 1 for i in range(len(start_states)): start_states[i] = d[start_states[i]] for i in range(len(final_states)): final_states[i] = d[final_states[i]] labels = nx.get_edge_attributes(g2, 'label') return labels, sorted(start_states), sorted(final_states)
def regex_to_pda_graph(regex, first_node_number): regex = Regex(regex) nfa: EpsilonNFA = regex.to_epsilon_nfa().minimize() graph: nx.MultiDiGraph = nfa.to_networkx() killing_list = [] for node in graph.nodes: if not graph.nodes[node]['label']: killing_list.append(node) for node in killing_list: graph.remove_node(node) my_map = {} i = first_node_number for node in sorted(graph.nodes): my_map[node] = i i += 1 graph: nx.Graph = nx.relabel_nodes(graph, my_map) for edge in graph.edges: graph.edges[edge]['label'] = [graph.edges[edge]['label']] return nx.DiGraph(graph)
def from_regex(cls, regex: Regex, initial_label: Symbol): """ Create a recursive automaton from regular expression Parameters ----------- regex : :class:`~pyformlang.regular_expression.Regex` The regular expression initial_label : :class:`~pyformlang.finite_automaton.Symbol` The initial label for the recursive automaton Returns ----------- rsa : :class:`~pyformlang.rsa.RecursiveAutomaton` The new recursive automaton built from regular expression """ initial_label = to_symbol(initial_label) box = Box(regex.to_epsilon_nfa().minimize(), initial_label) return RecursiveAutomaton({initial_label}, initial_label, {box})
def parse_regex(self, file_path): self.__init__() # read regex from file regex_file = open(file_path, 'r') regex = Regex(regex_file.read().rstrip()) regex_file.close() # regex to dfa conversion and vertices count init dfa = regex.to_epsilon_nfa().to_deterministic().minimize() self.vertices_count = len(dfa.states) # states enumeration states = {} start = 0 for state in dfa._states: if state not in states: states[state] = start start = start + 1 # init label_matrices for start in dfa._states: for label in dfa._input_symbols: in_states = dfa._transition_function(start, label) for end in in_states: if label in self.label_matrices: self.label_matrices[label][states[start], states[end]] = True else: bool_matrix = Matrix.sparse(BOOL, self.vertices_count, self.vertices_count) bool_matrix[states[start], states[end]] = True self.label_matrices[label] = bool_matrix # init start and terminal states self.start_vertices.add(states[dfa.start_state]) for state in dfa._final_states: self.terminal_vertices.add(states[state]) return self
def test_to_enfa0(self): """ Tests the transformation to a regex """ symb_a = finite_automaton.Symbol("a") symb_b = finite_automaton.Symbol("b") symb_c = finite_automaton.Symbol("c") epsilon = finite_automaton.Epsilon() regex = Regex("a|b") enfa = regex.to_epsilon_nfa() self.assertTrue(enfa.accepts([symb_a])) self.assertTrue(enfa.accepts([symb_b])) self.assertFalse(enfa.accepts([symb_c])) self.assertFalse(enfa.accepts([epsilon])) self.assertFalse(enfa.accepts([symb_a, symb_b])) regex = Regex("a b") enfa = regex.to_epsilon_nfa() self.assertFalse(enfa.accepts([symb_a])) self.assertFalse(enfa.accepts([symb_b])) self.assertTrue(enfa.accepts([symb_a, symb_b])) regex = Regex("a b c") enfa = regex.to_epsilon_nfa() self.assertFalse(enfa.accepts([symb_a, symb_b])) self.assertTrue(enfa.accepts([symb_a, symb_b, symb_c])) self.assertFalse(enfa.accepts([symb_a, symb_b, symb_a])) regex = Regex("(a b)|c") enfa = regex.to_epsilon_nfa() self.assertTrue(enfa.accepts([symb_a, symb_b])) self.assertFalse(enfa.accepts([symb_a, symb_c])) self.assertFalse(enfa.accepts([symb_b, symb_c])) self.assertTrue(enfa.accepts([symb_c])) regex = Regex("") enfa = regex.to_epsilon_nfa() self.assertFalse(enfa.accepts([symb_a])) self.assertFalse(enfa.accepts([symb_b])) self.assertFalse(enfa.accepts([symb_c])) self.assertFalse(enfa.accepts([])) regex = Regex("a*") enfa = regex.to_epsilon_nfa() self.assertTrue(enfa.accepts([symb_a])) self.assertTrue(enfa.accepts([])) self.assertTrue(enfa.accepts([symb_a, symb_a])) self.assertTrue(enfa.accepts([symb_a, symb_a, symb_a]))
def test_to_enfa1(self): """ Tests the transformation to a regex """ symb_a = finite_automaton.Symbol("a") symb_b = finite_automaton.Symbol("b") symb_c = finite_automaton.Symbol("c") regex = Regex("a**") enfa = regex.to_epsilon_nfa() self.assertTrue(enfa.accepts([symb_a])) self.assertTrue(enfa.accepts([])) self.assertTrue(enfa.accepts([symb_a, symb_a])) self.assertTrue(enfa.accepts([symb_a, symb_a, symb_a])) regex = Regex("a*b|c") enfa = regex.to_epsilon_nfa() self.assertTrue(enfa.accepts([symb_a, symb_a, symb_b])) self.assertTrue(enfa.accepts([symb_b])) self.assertTrue(enfa.accepts([symb_c])) self.assertFalse(enfa.accepts([symb_a, symb_a, symb_c])) regex = Regex("a*(b|c)") enfa = regex.to_epsilon_nfa() self.assertTrue(enfa.accepts([symb_a, symb_a, symb_b])) self.assertTrue(enfa.accepts([symb_b])) self.assertTrue(enfa.accepts([symb_c])) self.assertTrue(enfa.accepts([symb_a, symb_a, symb_c])) regex = Regex("a*.(b|c)") enfa = regex.to_epsilon_nfa() self.assertTrue(enfa.accepts([symb_a, symb_a, symb_b])) self.assertTrue(enfa.accepts([symb_b])) self.assertTrue(enfa.accepts([symb_c])) self.assertTrue(enfa.accepts([symb_a, symb_a, symb_c])) regex = Regex("a*.(b|c)epsilon") enfa = regex.to_epsilon_nfa() self.assertTrue(enfa.accepts([symb_a, symb_a, symb_b])) self.assertTrue(enfa.accepts([symb_b])) self.assertTrue(enfa.accepts([symb_c])) self.assertTrue(enfa.accepts([symb_a, symb_a, symb_c])) regex = Regex("$") enfa = regex.to_epsilon_nfa() self.assertFalse(enfa.accepts([symb_a])) self.assertFalse(enfa.accepts([symb_b])) self.assertFalse(enfa.accepts([symb_c])) self.assertTrue(enfa.accepts([]))
def str_to_dfa(s): regex = Regex(s) enfa = regex.to_epsilon_nfa() dfa = enfa.to_deterministic() return dfa.minimize()
def regex_to_min_dfa(regex: Regex): enfa = regex.to_epsilon_nfa() dfa = enfa.to_deterministic().minimize() return dfa
def from_regex(cls, regex: str, is_python_regex=True): if is_python_regex: pyformlang_regex = Regex.from_python_regex(regex) else: pyformlang_regex = Regex(regex) return RegexGraphWrapper(pyformlang_regex.to_epsilon_nfa().minimize())
def regex_to_minimal_dfa(regex_str): regex = Regex(regex_str) return regex.to_epsilon_nfa().minimize()
def regex_to_dfa(regex): regex = Regex(regex) dfa = regex.to_epsilon_nfa().to_deterministic() return dfa
def min_dfa_from_regex(r: Regex): return r.to_epsilon_nfa().to_deterministic().minimize()