def _create_cfg_from_regex(cls, head: Variable, regex: Regex, variables=None) -> CFG: dfa = regex.to_epsilon_nfa().to_deterministic().minimize() transitions = dfa._transition_function._transitions state_to_var: Dict[State, Variable] = {} productions, terms, vars = set(), set(), set() for state in dfa.states: state_to_var[state] = Variable(f'{state}:{cls.__var_state_counter}') cls.__var_state_counter += 1 vars.update(state_to_var.values()) for start_state in dfa.start_states: productions.add(Production(head, [state_to_var[start_state]])) for state_from in transitions: for edge_symb in transitions[state_from]: state_to = transitions[state_from][edge_symb] current_prod_head = state_to_var[state_from] current_prod_body = [] if (not variables and edge_symb.value.isupper() or variables and edge_symb.value in variables): var = Variable(edge_symb.value) vars.add(var) current_prod_body.append(var) else: term = Terminal(edge_symb.value) terms.add(term) current_prod_body.append(term) current_prod_body.append(state_to_var[state_to]) productions.add(Production(current_prod_head, current_prod_body)) if state_to in dfa.final_states: productions.add(Production(state_to_var[state_to], [])) if not productions: return CFG(vars, terms, head, {Production(head, [])}) return CFG(vars, terms, head, productions)
def test_generating_object(self): """ Test the finding of CFGObject """ var_a = Variable("A") var_b = Variable("B") ter_a = Terminal("a") ter_b = Terminal("b") start = Variable("S") prod0 = Production(start, [var_a, var_b]) prod1 = Production(start, [ter_a]) prod2 = Production(var_a, [ter_b]) cfg = CFG({var_a, var_b, start}, {ter_a, ter_b}, start, {prod0, prod1, prod2}) self.assertEqual(len(cfg.variables), 3) self.assertEqual(len(cfg.terminals), 2) self.assertEqual(len(cfg.productions), 3) self.assertEqual(cfg.get_generating_symbols(), {var_a, ter_a, ter_b, start}) prod3 = Production(var_b, [Epsilon()]) cfg = CFG({var_a, var_b, start}, {ter_a, ter_b}, start, {prod0, prod1, prod2, prod3}) self.assertEqual(len(cfg.variables), 3) self.assertEqual(len(cfg.terminals), 2) self.assertEqual(len(cfg.productions), 4) self.assertEqual(cfg.get_generating_symbols(), {var_a, var_b, ter_a, ter_b, start})
def test_cnf(self): """ Tests the conversion to CNF form """ # pylint: disable=too-many-locals var_i = Variable("I") var_f = Variable("F") var_e = Variable("E") var_t = Variable("C#CNF#1") ter_a = Terminal("a") ter_b = Terminal("b") ter_0 = Terminal("0") ter_1 = Terminal("1") ter_par_open = Terminal("(") ter_par_close = Terminal(")") ter_mult = Terminal("*") ter_plus = Terminal("+") productions = { Production(var_i, [ter_a]), Production(var_i, [ter_b]), Production(var_i, [var_i, ter_a]), Production(var_i, [var_i, ter_b]), Production(var_i, [var_i, ter_0]), Production(var_i, [var_i, ter_1]), Production(var_f, [var_i]), Production(var_f, [ter_par_open, var_e, ter_par_close]), Production(var_t, [var_f]), Production(var_t, [var_t, ter_mult, var_f]), Production(var_e, [var_t]), Production(var_e, [var_e, ter_plus, var_t]) } cfg = CFG({var_i, var_f, var_e, var_t}, { ter_a, ter_b, ter_0, ter_1, ter_par_open, ter_par_close, ter_mult, ter_plus }, var_e, productions) new_cfg = cfg.to_normal_form() self.assertEqual(len(new_cfg.variables), 15) self.assertEqual(len(new_cfg.terminals), 8) self.assertEqual(len(new_cfg.productions), 41) self.assertFalse(cfg.is_empty()) new_cfg2 = cfg.to_normal_form() self.assertEqual(new_cfg, new_cfg2) cfg2 = CFG(start_symbol=var_e, productions={Production(var_e, [var_t])}) new_cfg = cfg2.to_normal_form() self.assertEqual(len(new_cfg.variables), 1) self.assertEqual(len(new_cfg.terminals), 0) self.assertEqual(len(new_cfg.productions), 0) self.assertTrue(cfg2.is_empty())
def test_intersection(self): """ Tests the intersection with a regex """ regex = Regex("a*b*") dfa = regex.to_epsilon_nfa() symb_a = Symbol("a") symb_b = Symbol("b") self.assertTrue(dfa.accepts([symb_a, symb_a, symb_b, symb_b])) self.assertFalse(dfa.accepts([symb_b, symb_b, symb_a])) ter_a = Terminal("a") ter_b = Terminal("b") var_s = Variable("S") productions = { Production(var_s, [ter_a, var_s, ter_b]), Production(var_s, [ter_b, var_s, ter_a]), Production(var_s, []) } cfg = CFG(productions=productions, start_symbol=var_s) self.assertTrue(cfg.contains([ter_a, ter_a, ter_b, ter_b])) self.assertFalse(cfg.contains([ter_a, ter_a, ter_b])) cfg_i = cfg.intersection(regex) self.assertTrue(cfg_i.contains([ter_a, ter_a, ter_b, ter_b])) self.assertFalse(cfg_i.contains([ter_a, ter_a, ter_b])) self.assertTrue(cfg_i.contains([])) cfg_i = cfg.intersection(dfa) self.assertTrue(cfg_i.contains([ter_a, ter_a, ter_b, ter_b])) self.assertFalse(cfg_i.contains([ter_a, ter_a, ter_b])) self.assertTrue(cfg_i.contains([]))
def test_derivation_empty(self): var_s = Variable("S") productions = [Production(var_s, [Epsilon()])] cfg = CFG(productions=productions, start_symbol=var_s) parse_tree = cfg.get_cnf_parse_tree([]) derivation = parse_tree.get_rightmost_derivation() self.assertEqual([[var_s], []], derivation)
def part2(): rules, words = open("in.txt").read().split("\n\n") rules = rules.replace("8: 42", "8: 42 | 42 8") rules = rules.replace("11: 42 31", "11: 42 31 | 42 11 31") variables = set() productions = set() terminals = set() for line in rules.split("\n"): left, right = line.split(":") left = Variable(left) variables.add(left) for expression in right.split("|"): if '"' in expression: # Terminal expression expression = expression.strip('" ') right = [Terminal(expression)] terminals.add(Terminal(expression)) productions.add(Production(left, right)) else: right = [ Variable(token) for token in expression.strip().split() ] productions.add(Production(left, right)) cfg = CFG(variables, terminals, Variable("0"), productions) count = sum(map(lambda x: 1 if cfg.contains(x) else 0, words.split("\n"))) print(count)
def _to_pretty(cls, cfq: ContextFreeQuery) -> "PrettyContextFreeQuery": def cfg_obj_to_pretty(cfg_obj: Union[Variable, Terminal]): if isinstance(cfg_obj, Variable): return Variable(cfg_obj.value[2:]) elif isinstance(cfg_obj, Terminal): if cfg_obj.value == 't_newline': return Terminal('\n') if cfg_obj.value == 't_escape': return Terminal('\\') return Terminal(cls._from_pretty_term(cfg_obj.value)) def production_to_pretty(production: Production): new_production = Production( cfg_obj_to_pretty(production.head), list(map(cfg_obj_to_pretty, production.body)), False ) return new_production pretty_productions = list(map(str, cfq._cfg.productions)) pretty_productions.sort() new_productions = set(map(production_to_pretty, cfq._cfg.productions)) new_cfg = CFG(productions = new_productions, start_symbol = Variable('S')) res = cls() res._generate_eps = cfq._generate_eps res._cfg = new_cfg return res
def test_intersection_dfa2(self): state0 = State(0) symb_a = Symbol("a") symb_b = Symbol("b") dfa = DeterministicFiniteAutomaton({state0}, {symb_a, symb_b}, start_state=state0, final_states={state0}) dfa.add_transition(state0, symb_a, state0) dfa.add_transition(state0, symb_b, state0) self.assertTrue(dfa.accepts([symb_a, symb_a, symb_b, symb_b])) ter_a = Terminal("a") ter_b = Terminal("b") var_s = Variable("S") var_s1 = Variable("S1") var_l = Variable("L") productions = { Production(var_s, [var_l, var_s1]), Production(var_l, [Epsilon()]), Production(var_s1, [ter_a, var_s1, ter_b]), Production(var_s1, [ter_b, var_s1, ter_a]), Production(var_s1, []) } cfg = CFG(productions=productions, start_symbol=var_s) self.assertTrue(cfg.contains([ter_a, ter_a, ter_b, ter_b])) self.assertFalse(cfg.contains([ter_a, ter_a, ter_b])) cfg_i = cfg.intersection(dfa) self.assertFalse(cfg_i.is_empty()) self.assertTrue(cfg_i.contains([ter_a, ter_a, ter_b, ter_b])) self.assertTrue(cfg_i.contains([]))
def test_intersection_with_epsilon(self): state0 = State(0) state1 = State(1) symb_a = Symbol("a") dfa = DeterministicFiniteAutomaton({state0, state1}, {symb_a}, start_state=state0, final_states={state1}) dfa.add_transition(state0, symb_a, state1) self.assertTrue(dfa.accepts([symb_a])) ter_a = Terminal("a") var_s = Variable("S") var_l = Variable("L") var_t = Variable("T") productions = { Production(var_s, [var_l, var_t]), Production(var_l, [Epsilon()]), Production(var_t, [ter_a]) } cfg = CFG(productions=productions, start_symbol=var_s) self.assertFalse(cfg.is_empty()) self.assertTrue(cfg.contains([ter_a])) cfg_temp = cfg.to_pda().to_cfg() self.assertFalse(cfg_temp.is_empty()) self.assertTrue(cfg_temp.contains([ter_a])) cfg_temp = cfg.to_pda().to_final_state().to_empty_stack().to_cfg() self.assertFalse(cfg_temp.is_empty()) self.assertTrue(cfg_temp.contains([ter_a])) cfg_i = cfg.intersection(dfa) self.assertFalse(cfg_i.is_empty())
def read_grammar_with_regex(cls, name): id = 0 terminals, variables, productions = set(), set(), set() start_symb = None with open(name, 'r') as file: productions_txt = file.readlines() for production_txt in productions_txt: line = production_txt.strip().split() head, body = line[0], ' '.join(line[1:]) head = Variable(head) if start_symb is None: start_symb = head new_productions, new_variables, new_terminals, id = CFGrammar.read_production_regex( head, Regex(body), id) productions |= new_productions variables |= new_variables terminals |= new_terminals cfg = CFG(variables, terminals, start_symb, productions) return cfg
def _test_profiling_intersection(self): size = 50 states = [State(i) for i in range(size * 2 + 1)] symb_a = Symbol("a") symb_b = Symbol("b") dfa = DeterministicFiniteAutomaton(states, {symb_a, symb_b}, start_state=states[0], final_states={states[-1]}) for i in range(size): dfa.add_transition(states[i], symb_a, states[i + 1]) for i in range(size, size * 2): dfa.add_transition(states[i], symb_b, states[i + 1]) ter_a = Terminal("a") ter_b = Terminal("b") var_s = Variable("S") var_s1 = Variable("S1") var_l = Variable("L") productions = [ Production(var_s, [var_l, var_s1]), Production(var_l, [Epsilon()]), Production(var_s1, [ter_a, var_s1, ter_b]), Production(var_s1, [ter_b, var_s1, ter_a]), Production(var_s1, []) ] cfg = CFG(productions=productions, start_symbol=var_s) cfg_i = cfg.intersection(dfa) self.assertFalse(cfg_i.is_empty()) self.assertTrue(cfg_i.contains([ter_a] * size + [ter_b] * size)) self.assertFalse(cfg_i.contains([]))
def read_grammar(cls, name): terminals, variables, productions = set(), set(), set() start_symb = None with open(name, 'r') as file: productions_txt = file.readlines() for production_txt in productions_txt: head, *body = production_txt.strip().split() if start_symb is None: start_symb = Variable(head) body_cfg = [] for letter in body: if letter.isupper(): variable = Variable(letter) variables.add(variable) body_cfg.append(variable) else: terminal = Terminal(letter) terminals.add(terminal) body_cfg.append(terminal) productions.add(Production(Variable(head), body_cfg)) cfg = CFG(variables, terminals, start_symb, productions) return cfg
def __init__(self, rules: Iterable[str], patch: bool = False): start_var: Variable vars: set[Variable] = set() terminals: Set[Terminal] = set() productions: Set[Production] = set() for rule in rules: i, r = rule.split(": ") var = Variable(i) if i == "0": start_var = var if r[0] == '"': ter = Terminal(r[1]) terminals.add(ter) productions.add(Production(var, [ter])) continue if patch: if i == "8": r = "42 | 42 8" if i == "11": r = "42 31 | 42 11 31" rr = r.split(" | ") for r in rr: productions.add( Production(var, [Variable(x) for x in r.split(" ")])) self.CFG = CFG(vars, terminals, start_var, productions)
def part2(file='input_test.txt'): rules, messages = get_input(file) # 8: 42 | 42 8 # 11: 42 31 | 42 11 31 rules['8'] = '42 | 42 8' rules['11'] = '42 31 | 42 11 31' rule_variables = set() rule_products = set() for rule in rules: subs = rules[rule].split(' | ') rule_variables.add(Variable(rule)) for sub in subs: if sub == '"a"' or sub == '"b"': rule_products.add( Production(Variable(rule), [Terminal(sub.replace('"', ''))])) else: rule_products.add( Production(Variable(rule), [Variable(x) for x in sub.split(' ')])) cfg = CFG(rule_variables, {Terminal('a'), Terminal('b')}, Variable('0'), rule_products) count = 0 for message in messages: if cfg.contains(message): count += 1 print('Part 2: Solution {}'.format(count))
def test_to_pda(self): """ Tests the conversion to PDA """ var_e = Variable("E") var_i = Variable("I") ter_a = Terminal("a") ter_b = Terminal("b") ter_0 = Terminal("0") ter_1 = Terminal("1") ter_par_open = Terminal("(") ter_par_close = Terminal(")") ter_mult = Terminal("*") ter_plus = Terminal("+") productions = { Production(var_e, [var_i]), Production(var_e, [var_e, ter_plus, var_e]), Production(var_e, [var_e, ter_mult, var_e]), Production(var_e, [ter_par_open, var_e, ter_par_close]), Production(var_i, [ter_a]), Production(var_i, [ter_b]), Production(var_i, [var_i, ter_a]), Production(var_i, [var_i, ter_b]), Production(var_i, [var_i, ter_0]), Production(var_i, [var_i, ter_1]), Production(var_i, [var_i, Epsilon()]) } cfg = CFG({var_e, var_i}, { ter_a, ter_b, ter_0, ter_1, ter_par_open, ter_par_close, ter_mult, ter_plus }, var_e, productions) pda = cfg.to_pda() self.assertEqual(len(pda.states), 1) self.assertEqual(len(pda.final_states), 0) self.assertEqual(len(pda.input_symbols), 8) self.assertEqual(len(pda.stack_symbols), 10) self.assertEqual(pda.get_number_transitions(), 19)
def test_generation_words(self): """ Tests the generation of word """ ter_a = Terminal("a") ter_b = Terminal("b") var_s = Variable("S") productions = { Production(var_s, [ter_a, var_s, ter_b]), Production(var_s, []) } cfg = CFG(productions=productions, start_symbol=var_s) words0 = list(cfg.get_words(max_length=0)) self.assertIn([], words0) self.assertEqual(len(words0), 1) words1 = list(cfg.get_words(max_length=1)) self.assertIn([], words1) self.assertEqual(len(words1), 1) words2 = list(cfg.get_words(max_length=2)) self.assertIn([], words2) self.assertIn([ter_a, ter_b], words2) self.assertEqual(len(words2), 2) words3 = list(cfg.get_words(max_length=3)) self.assertIn([], words3) self.assertIn([ter_a, ter_b], words3) self.assertEqual(len(words3), 2) words4 = list(cfg.get_words(max_length=4)) self.assertIn([], words4) self.assertIn([ter_a, ter_a, ter_b, ter_b], words4) self.assertEqual(len(words4), 3)
def test_derivation_does_not_exist(self): var_s = Variable("S") ter_a = Terminal("a") ter_b = Terminal("b") cfg = CFG(productions=[], start_symbol=var_s) with self.assertRaises(DerivationDoesNotExist): parse_tree = cfg.get_cnf_parse_tree([ter_a, ter_b]) parse_tree.get_rightmost_derivation()
def test_finite(self): """ Tests whether a grammar is finite or not """ ter_a = Terminal("a") ter_b = Terminal("b") var_s = Variable("S") var_a = Variable("A") var_b = Variable("B") prod0 = { Production(var_s, [var_a, var_b]), Production(var_a, [ter_a]), Production(var_b, [ter_b]) } cfg = CFG(productions=prod0, start_symbol=var_s) self.assertTrue(cfg.is_finite()) prod0.add(Production(var_a, [var_s])) cfg = CFG(productions=prod0, start_symbol=var_s) self.assertFalse(cfg.is_finite())
def test_creation(self): """ Tests creatin of CFG """ variable0 = Variable(0) terminal0 = Terminal("a") prod0 = Production(variable0, [terminal0, Terminal("A"), Variable(1)]) cfg = CFG({variable0}, {terminal0}, variable0, {prod0}) self.assertIsNotNone(cfg) self.assertEqual(len(cfg.variables), 2) self.assertEqual(len(cfg.terminals), 2) self.assertEqual(len(cfg.productions), 1) self.assertTrue(cfg.is_empty()) cfg = CFG() self.assertIsNotNone(cfg) self.assertEqual(len(cfg.variables), 0) self.assertEqual(len(cfg.terminals), 0) self.assertEqual(len(cfg.productions), 0) self.assertTrue(cfg.is_empty())
def test_emptiness(self): """ Tests the emptiness of a CFG """ # pylint: disable=too-many-locals var_s = Variable("S") ter_a = Terminal("a") ter_b = Terminal("b") prod0 = Production(var_s, [ter_a, var_s, ter_b]) prod1 = Production(var_s, []) cfg = CFG({var_s}, {ter_a, ter_b}, var_s, {prod0, prod1}) self.assertFalse(cfg.is_empty())
def read_grammar(cls, name): id = 0 terminals, variables, productions = set(), set(), set() start_symb = None with open(name, 'r') as file: productions_txt = file.readlines() for production_txt in productions_txt: head, _, *body_full = production_txt.strip().split() if start_symb is None: start_symb = Variable(head) tmp_body = [] bodies = [ list(group) for k, group in groupby(body_full, lambda x: x == "|") if not k ] for body in bodies: is_regex = not any([ True if '*' not in value else False for value in body ]) if is_regex: new_productions, new_variables, new_terminals, id = CFGrammar \ .read_production_regex(head, Regex.from_python_regex(body[0]), id, False) productions |= new_productions variables |= new_variables terminals |= new_terminals else: body_cfg = [] for letter in body: if letter == "epsilon": body_cfg.append(Epsilon()) elif letter.isupper(): non_terminal = Variable(letter) variables.add(non_terminal) body_cfg.append(non_terminal) else: terminal = Terminal(letter) terminals.add(terminal) body_cfg.append(terminal) productions.add(Production(Variable(head), body_cfg)) cfg = CFG(variables, terminals, start_symb, productions) return cfg
def get_weak_cnf(self) -> CFG: wcnf = self.cnf if self.generate_epsilon: new_start_symbol = Variable("S'") new_variables = set(wcnf.variables) new_variables.add(new_start_symbol) new_productions = set(wcnf.productions) new_productions.add(Production(new_start_symbol, [wcnf.start_symbol])) new_productions.add(Production(new_start_symbol, [])) return CFG(new_variables, wcnf.terminals, new_start_symbol, new_productions) return wcnf
def test_membership(self): """ Tests the membership of a CFG """ # pylint: disable=too-many-locals var_useless = Variable("USELESS") var_s = Variable("S") var_b = Variable("B") ter_a = Terminal("a") ter_b = Terminal("b") ter_c = Terminal("c") prod0 = Production(var_s, [ter_a, var_s, var_b]) prod1 = Production(var_useless, [ter_a, var_s, var_b]) prod2 = Production(var_s, [var_useless]) prod4 = Production(var_b, [ter_b]) prod5 = Production(var_useless, []) cfg0 = CFG({var_useless, var_s}, {ter_a, ter_b}, var_s, {prod0, prod1, prod2, prod4, prod5}) self.assertTrue(cfg0.contains([Epsilon()])) self.assertTrue(cfg0.contains([ter_a, ter_b])) self.assertTrue(cfg0.contains([ter_a, ter_a, ter_b, ter_b])) self.assertTrue( cfg0.contains([ter_a, ter_a, ter_a, ter_b, ter_b, ter_b])) self.assertFalse(cfg0.contains([ter_a, ter_b, ter_b])) self.assertFalse(cfg0.contains([ter_a, ter_b, ter_c, ter_b])) self.assertFalse(cfg0.contains([ter_a, ter_a, ter_a, ter_b, ter_b])) prod3 = Production(var_s, [ter_c]) cfg0 = CFG({var_s}, {ter_a, ter_b, ter_c}, var_s, {prod0, prod3}) self.assertFalse(cfg0.contains([Epsilon()])) var_a = Variable("A") prod6 = Production(var_s, [var_a, var_b]) prod7 = Production(var_a, [var_a, var_b]) prod8 = Production(var_a, [ter_a]) prod9 = Production(var_b, [ter_b]) cfg1 = CFG({var_a, var_b, var_s}, {ter_a, ter_b}, var_s, {prod6, prod7, prod8, prod9}) self.assertTrue(cfg1.contains([ter_a, ter_b, ter_b])) cfg1 = CFG({"A", "B", "S"}, {"a", "b"}, "S", {prod6, prod7, prod8, prod9}) self.assertTrue(cfg1.contains(["a", "b", "b"]))
def to_wcnf(grammar): wcnf = grammar.to_normal_form() if grammar.generate_epsilon: new_start_symbol = Variable('S\'') new_variables = set(wcnf.variables) new_variables.add(new_start_symbol) new_productions = set(wcnf.productions) new_productions.add( Production(new_start_symbol, [wcnf.start_symbol])) new_productions.add(Production(new_start_symbol, [])) return CFG(new_variables, wcnf.terminals, new_start_symbol, new_productions) return wcnf
def test_intersection_empty(self): regex = Regex("") ter_a = Terminal("a") ter_b = Terminal("b") var_s = Variable("S") productions = { Production(var_s, [ter_a, var_s, ter_b]), Production(var_s, [ter_b, var_s, ter_a]), Production(var_s, []) } cfg = CFG(productions=productions, start_symbol=var_s) cfg_i = cfg & regex self.assertFalse(cfg_i)
def test_union(self): """ Tests the union of two cfg """ var_s = Variable("S") ter_a = Terminal("a") ter_b = Terminal("b") prod0 = Production(var_s, [ter_a, var_s, ter_b]) prod1 = Production(var_s, []) cfg = CFG({var_s}, {ter_a, ter_b}, var_s, {prod0, prod1}) new_cfg = cfg.union(cfg) self.assertEqual(len(new_cfg.variables), 3) self.assertEqual(len(new_cfg.terminals), 2) self.assertEqual(len(new_cfg.productions), 6) self.assertFalse(new_cfg.is_empty()) self.assertTrue(new_cfg.contains([ter_a, ter_a, ter_b, ter_b]))
def test_reverse(self): """ Test the reversal of a CFG """ var_s = Variable("S") ter_a = Terminal("a") ter_b = Terminal("b") prod0 = Production(var_s, [ter_a, var_s, ter_b]) prod1 = Production(var_s, []) cfg = CFG({var_s}, {ter_a, ter_b}, var_s, {prod0, prod1}) new_cfg = cfg.reverse() self.assertEqual(len(new_cfg.variables), 1) self.assertEqual(len(new_cfg.terminals), 2) self.assertEqual(len(new_cfg.productions), 2) self.assertFalse(new_cfg.is_empty()) self.assertTrue(new_cfg.contains([ter_b, ter_b, ter_a, ter_a]))
def _test_profiling_conversions(): """ Tests multiple conversions """ ter_a = Terminal("a") ter_b = Terminal("b") ter_c = Terminal("c") var_s = Variable("S") productions = { Production(var_s, [ter_a, var_s, ter_b]), Production(var_s, [ter_c]) } cfg = CFG(productions=productions, start_symbol=var_s) cfg = cfg.to_pda().to_final_state().to_empty_stack().to_cfg() cfg = cfg.to_pda().to_final_state().to_empty_stack().to_cfg() cfg.to_pda().to_final_state().to_empty_stack().to_cfg()
def from_text(text, start_symbol=Variable("S")): lines = text.splitlines() production_set = set() for line in lines: production = line.split(' -> ') head = Variable(production[0]) body_str = production[1].strip() body_str = body_str.replace('?', f'|{EPS_SYM}') production_set |= Grammar_Wrapper.regex_to_production( Regex(body_str), head) return CFG(start_symbol=start_symbol, productions=production_set)
def __init__(self, start_symbol=None, productions=None): cfg = CFG(start_symbol=start_symbol, productions=productions) cnf = cfg.to_normal_form() # needed for language preservation if cfg.generate_epsilon(): cnf._productions.add(Production(cnf._start_symbol, [])) self.pair_productions = set() for p in cnf._productions: if len(p.body) == 2: self.pair_productions.add(p) super(GrammarCNF, self).__init__(start_symbol=cnf._start_symbol, productions=cnf._productions)