Exemplo n.º 1
0
 def _create_cfg_from_regex(cls, head: Variable, regex: Regex, variables=None) -> CFG:
     dfa = regex.to_epsilon_nfa().to_deterministic().minimize()
     transitions = dfa._transition_function._transitions
     state_to_var: Dict[State, Variable] = {}
     productions, terms, vars = set(), set(), set()
     for state in dfa.states:
         state_to_var[state] = Variable(f'{state}:{cls.__var_state_counter}')
         cls.__var_state_counter += 1
     vars.update(state_to_var.values())
     for start_state in dfa.start_states:
         productions.add(Production(head, [state_to_var[start_state]]))
     for state_from in transitions:
         for edge_symb in transitions[state_from]:
             state_to = transitions[state_from][edge_symb]
             current_prod_head = state_to_var[state_from]
             current_prod_body = []
             if (not variables and edge_symb.value.isupper()
                     or variables and edge_symb.value in variables):
                 var = Variable(edge_symb.value)
                 vars.add(var)
                 current_prod_body.append(var)
             else:
                 term = Terminal(edge_symb.value)
                 terms.add(term)
                 current_prod_body.append(term)
             current_prod_body.append(state_to_var[state_to])
             productions.add(Production(current_prod_head, current_prod_body))
             if state_to in dfa.final_states:
                 productions.add(Production(state_to_var[state_to], []))
     if not productions:
         return CFG(vars, terms, head, {Production(head, [])})
     return CFG(vars, terms, head, productions)
Exemplo n.º 2
0
    def test_generating_object(self):
        """ Test the finding of CFGObject """
        var_a = Variable("A")
        var_b = Variable("B")
        ter_a = Terminal("a")
        ter_b = Terminal("b")
        start = Variable("S")
        prod0 = Production(start, [var_a, var_b])
        prod1 = Production(start, [ter_a])
        prod2 = Production(var_a, [ter_b])
        cfg = CFG({var_a, var_b, start}, {ter_a, ter_b}, start,
                  {prod0, prod1, prod2})
        self.assertEqual(len(cfg.variables), 3)
        self.assertEqual(len(cfg.terminals), 2)
        self.assertEqual(len(cfg.productions), 3)
        self.assertEqual(cfg.get_generating_symbols(),
                         {var_a, ter_a, ter_b, start})

        prod3 = Production(var_b, [Epsilon()])

        cfg = CFG({var_a, var_b, start}, {ter_a, ter_b}, start,
                  {prod0, prod1, prod2, prod3})
        self.assertEqual(len(cfg.variables), 3)
        self.assertEqual(len(cfg.terminals), 2)
        self.assertEqual(len(cfg.productions), 4)
        self.assertEqual(cfg.get_generating_symbols(),
                         {var_a, var_b, ter_a, ter_b, start})
Exemplo n.º 3
0
    def test_cnf(self):
        """ Tests the conversion to CNF form """
        # pylint: disable=too-many-locals
        var_i = Variable("I")
        var_f = Variable("F")
        var_e = Variable("E")
        var_t = Variable("C#CNF#1")
        ter_a = Terminal("a")
        ter_b = Terminal("b")
        ter_0 = Terminal("0")
        ter_1 = Terminal("1")
        ter_par_open = Terminal("(")
        ter_par_close = Terminal(")")
        ter_mult = Terminal("*")
        ter_plus = Terminal("+")
        productions = {
            Production(var_i, [ter_a]),
            Production(var_i, [ter_b]),
            Production(var_i, [var_i, ter_a]),
            Production(var_i, [var_i, ter_b]),
            Production(var_i, [var_i, ter_0]),
            Production(var_i, [var_i, ter_1]),
            Production(var_f, [var_i]),
            Production(var_f, [ter_par_open, var_e, ter_par_close]),
            Production(var_t, [var_f]),
            Production(var_t, [var_t, ter_mult, var_f]),
            Production(var_e, [var_t]),
            Production(var_e, [var_e, ter_plus, var_t])
        }
        cfg = CFG({var_i, var_f, var_e, var_t}, {
            ter_a, ter_b, ter_0, ter_1, ter_par_open, ter_par_close, ter_mult,
            ter_plus
        }, var_e, productions)
        new_cfg = cfg.to_normal_form()
        self.assertEqual(len(new_cfg.variables), 15)
        self.assertEqual(len(new_cfg.terminals), 8)
        self.assertEqual(len(new_cfg.productions), 41)
        self.assertFalse(cfg.is_empty())
        new_cfg2 = cfg.to_normal_form()
        self.assertEqual(new_cfg, new_cfg2)

        cfg2 = CFG(start_symbol=var_e,
                   productions={Production(var_e, [var_t])})
        new_cfg = cfg2.to_normal_form()
        self.assertEqual(len(new_cfg.variables), 1)
        self.assertEqual(len(new_cfg.terminals), 0)
        self.assertEqual(len(new_cfg.productions), 0)
        self.assertTrue(cfg2.is_empty())
Exemplo n.º 4
0
 def test_intersection(self):
     """ Tests the intersection with a regex """
     regex = Regex("a*b*")
     dfa = regex.to_epsilon_nfa()
     symb_a = Symbol("a")
     symb_b = Symbol("b")
     self.assertTrue(dfa.accepts([symb_a, symb_a, symb_b, symb_b]))
     self.assertFalse(dfa.accepts([symb_b, symb_b, symb_a]))
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     var_s = Variable("S")
     productions = {
         Production(var_s, [ter_a, var_s, ter_b]),
         Production(var_s, [ter_b, var_s, ter_a]),
         Production(var_s, [])
     }
     cfg = CFG(productions=productions, start_symbol=var_s)
     self.assertTrue(cfg.contains([ter_a, ter_a, ter_b, ter_b]))
     self.assertFalse(cfg.contains([ter_a, ter_a, ter_b]))
     cfg_i = cfg.intersection(regex)
     self.assertTrue(cfg_i.contains([ter_a, ter_a, ter_b, ter_b]))
     self.assertFalse(cfg_i.contains([ter_a, ter_a, ter_b]))
     self.assertTrue(cfg_i.contains([]))
     cfg_i = cfg.intersection(dfa)
     self.assertTrue(cfg_i.contains([ter_a, ter_a, ter_b, ter_b]))
     self.assertFalse(cfg_i.contains([ter_a, ter_a, ter_b]))
     self.assertTrue(cfg_i.contains([]))
Exemplo n.º 5
0
 def test_derivation_empty(self):
     var_s = Variable("S")
     productions = [Production(var_s, [Epsilon()])]
     cfg = CFG(productions=productions, start_symbol=var_s)
     parse_tree = cfg.get_cnf_parse_tree([])
     derivation = parse_tree.get_rightmost_derivation()
     self.assertEqual([[var_s], []], derivation)
Exemplo n.º 6
0
def part2():
    rules, words = open("in.txt").read().split("\n\n")

    rules = rules.replace("8: 42", "8: 42 | 42 8")
    rules = rules.replace("11: 42 31", "11: 42 31 | 42 11 31")

    variables = set()
    productions = set()
    terminals = set()
    for line in rules.split("\n"):
        left, right = line.split(":")
        left = Variable(left)
        variables.add(left)
        for expression in right.split("|"):
            if '"' in expression:  # Terminal expression
                expression = expression.strip('" ')
                right = [Terminal(expression)]
                terminals.add(Terminal(expression))
                productions.add(Production(left, right))
            else:
                right = [
                    Variable(token) for token in expression.strip().split()
                ]
                productions.add(Production(left, right))

    cfg = CFG(variables, terminals, Variable("0"), productions)
    count = sum(map(lambda x: 1 if cfg.contains(x) else 0, words.split("\n")))
    print(count)
Exemplo n.º 7
0
    def _to_pretty(cls, cfq: ContextFreeQuery) -> "PrettyContextFreeQuery":
        def cfg_obj_to_pretty(cfg_obj: Union[Variable, Terminal]):
            if isinstance(cfg_obj, Variable):
                return Variable(cfg_obj.value[2:])
            elif isinstance(cfg_obj, Terminal):
                if cfg_obj.value == 't_newline':
                    return Terminal('\n')
                if cfg_obj.value == 't_escape':
                    return Terminal('\\')
                return Terminal(cls._from_pretty_term(cfg_obj.value))
        def production_to_pretty(production: Production):
            new_production = Production(
                    cfg_obj_to_pretty(production.head),
                    list(map(cfg_obj_to_pretty, production.body)),
                    False
                )
            return new_production

        pretty_productions = list(map(str, cfq._cfg.productions))
        pretty_productions.sort()
        new_productions = set(map(production_to_pretty, cfq._cfg.productions))
        new_cfg = CFG(productions = new_productions, start_symbol = Variable('S'))
        res = cls()
        res._generate_eps = cfq._generate_eps
        res._cfg = new_cfg
        return res
Exemplo n.º 8
0
    def test_intersection_dfa2(self):
        state0 = State(0)
        symb_a = Symbol("a")
        symb_b = Symbol("b")
        dfa = DeterministicFiniteAutomaton({state0}, {symb_a, symb_b},
                                           start_state=state0,
                                           final_states={state0})
        dfa.add_transition(state0, symb_a, state0)
        dfa.add_transition(state0, symb_b, state0)
        self.assertTrue(dfa.accepts([symb_a, symb_a, symb_b, symb_b]))

        ter_a = Terminal("a")
        ter_b = Terminal("b")
        var_s = Variable("S")
        var_s1 = Variable("S1")
        var_l = Variable("L")
        productions = {
            Production(var_s, [var_l, var_s1]),
            Production(var_l, [Epsilon()]),
            Production(var_s1, [ter_a, var_s1, ter_b]),
            Production(var_s1, [ter_b, var_s1, ter_a]),
            Production(var_s1, [])
        }
        cfg = CFG(productions=productions, start_symbol=var_s)
        self.assertTrue(cfg.contains([ter_a, ter_a, ter_b, ter_b]))
        self.assertFalse(cfg.contains([ter_a, ter_a, ter_b]))
        cfg_i = cfg.intersection(dfa)
        self.assertFalse(cfg_i.is_empty())
        self.assertTrue(cfg_i.contains([ter_a, ter_a, ter_b, ter_b]))
        self.assertTrue(cfg_i.contains([]))
Exemplo n.º 9
0
    def test_intersection_with_epsilon(self):
        state0 = State(0)
        state1 = State(1)
        symb_a = Symbol("a")
        dfa = DeterministicFiniteAutomaton({state0, state1}, {symb_a},
                                           start_state=state0,
                                           final_states={state1})
        dfa.add_transition(state0, symb_a, state1)
        self.assertTrue(dfa.accepts([symb_a]))

        ter_a = Terminal("a")
        var_s = Variable("S")
        var_l = Variable("L")
        var_t = Variable("T")
        productions = {
            Production(var_s, [var_l, var_t]),
            Production(var_l, [Epsilon()]),
            Production(var_t, [ter_a])
        }
        cfg = CFG(productions=productions, start_symbol=var_s)
        self.assertFalse(cfg.is_empty())
        self.assertTrue(cfg.contains([ter_a]))

        cfg_temp = cfg.to_pda().to_cfg()
        self.assertFalse(cfg_temp.is_empty())
        self.assertTrue(cfg_temp.contains([ter_a]))

        cfg_temp = cfg.to_pda().to_final_state().to_empty_stack().to_cfg()
        self.assertFalse(cfg_temp.is_empty())
        self.assertTrue(cfg_temp.contains([ter_a]))

        cfg_i = cfg.intersection(dfa)
        self.assertFalse(cfg_i.is_empty())
    def read_grammar_with_regex(cls, name):
        id = 0

        terminals, variables, productions = set(), set(), set()
        start_symb = None

        with open(name, 'r') as file:
            productions_txt = file.readlines()

            for production_txt in productions_txt:
                line = production_txt.strip().split()
                head, body = line[0], ' '.join(line[1:])
                head = Variable(head)

                if start_symb is None:
                    start_symb = head

                new_productions, new_variables, new_terminals, id = CFGrammar.read_production_regex(
                    head, Regex(body), id)

                productions |= new_productions
                variables |= new_variables
                terminals |= new_terminals

        cfg = CFG(variables, terminals, start_symb, productions)

        return cfg
Exemplo n.º 11
0
    def _test_profiling_intersection(self):
        size = 50
        states = [State(i) for i in range(size * 2 + 1)]
        symb_a = Symbol("a")
        symb_b = Symbol("b")
        dfa = DeterministicFiniteAutomaton(states, {symb_a, symb_b},
                                           start_state=states[0],
                                           final_states={states[-1]})
        for i in range(size):
            dfa.add_transition(states[i], symb_a, states[i + 1])
        for i in range(size, size * 2):
            dfa.add_transition(states[i], symb_b, states[i + 1])

        ter_a = Terminal("a")
        ter_b = Terminal("b")
        var_s = Variable("S")
        var_s1 = Variable("S1")
        var_l = Variable("L")
        productions = [
            Production(var_s, [var_l, var_s1]),
            Production(var_l, [Epsilon()]),
            Production(var_s1, [ter_a, var_s1, ter_b]),
            Production(var_s1, [ter_b, var_s1, ter_a]),
            Production(var_s1, [])
        ]
        cfg = CFG(productions=productions, start_symbol=var_s)
        cfg_i = cfg.intersection(dfa)
        self.assertFalse(cfg_i.is_empty())
        self.assertTrue(cfg_i.contains([ter_a] * size + [ter_b] * size))
        self.assertFalse(cfg_i.contains([]))
    def read_grammar(cls, name):
        terminals, variables, productions = set(), set(), set()
        start_symb = None

        with open(name, 'r') as file:
            productions_txt = file.readlines()

            for production_txt in productions_txt:
                head, *body = production_txt.strip().split()

                if start_symb is None:
                    start_symb = Variable(head)

                body_cfg = []
                for letter in body:
                    if letter.isupper():
                        variable = Variable(letter)
                        variables.add(variable)
                        body_cfg.append(variable)
                    else:
                        terminal = Terminal(letter)
                        terminals.add(terminal)
                        body_cfg.append(terminal)

                productions.add(Production(Variable(head), body_cfg))

        cfg = CFG(variables, terminals, start_symb, productions)

        return cfg
Exemplo n.º 13
0
    def __init__(self, rules: Iterable[str], patch: bool = False):
        start_var: Variable
        vars: set[Variable] = set()
        terminals: Set[Terminal] = set()
        productions: Set[Production] = set()

        for rule in rules:
            i, r = rule.split(": ")
            var = Variable(i)
            if i == "0":
                start_var = var
            if r[0] == '"':
                ter = Terminal(r[1])
                terminals.add(ter)
                productions.add(Production(var, [ter]))
                continue
            if patch:
                if i == "8":
                    r = "42 | 42 8"
                if i == "11":
                    r = "42 31 | 42 11 31"
            rr = r.split(" | ")
            for r in rr:
                productions.add(
                    Production(var, [Variable(x) for x in r.split(" ")]))

        self.CFG = CFG(vars, terminals, start_var, productions)
Exemplo n.º 14
0
def part2(file='input_test.txt'):
    rules, messages = get_input(file)

    # 8: 42 | 42 8
    # 11: 42 31 | 42 11 31
    rules['8'] = '42 | 42 8'
    rules['11'] = '42 31 | 42 11 31'

    rule_variables = set()
    rule_products = set()

    for rule in rules:
        subs = rules[rule].split(' | ')
        rule_variables.add(Variable(rule))
        for sub in subs:
            if sub == '"a"' or sub == '"b"':
                rule_products.add(
                    Production(Variable(rule),
                               [Terminal(sub.replace('"', ''))]))
            else:
                rule_products.add(
                    Production(Variable(rule),
                               [Variable(x) for x in sub.split(' ')]))

    cfg = CFG(rule_variables, {Terminal('a'), Terminal('b')}, Variable('0'),
              rule_products)
    count = 0

    for message in messages:
        if cfg.contains(message):
            count += 1

    print('Part 2: Solution {}'.format(count))
Exemplo n.º 15
0
 def test_to_pda(self):
     """ Tests the conversion to PDA """
     var_e = Variable("E")
     var_i = Variable("I")
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     ter_0 = Terminal("0")
     ter_1 = Terminal("1")
     ter_par_open = Terminal("(")
     ter_par_close = Terminal(")")
     ter_mult = Terminal("*")
     ter_plus = Terminal("+")
     productions = {
         Production(var_e, [var_i]),
         Production(var_e, [var_e, ter_plus, var_e]),
         Production(var_e, [var_e, ter_mult, var_e]),
         Production(var_e, [ter_par_open, var_e, ter_par_close]),
         Production(var_i, [ter_a]),
         Production(var_i, [ter_b]),
         Production(var_i, [var_i, ter_a]),
         Production(var_i, [var_i, ter_b]),
         Production(var_i, [var_i, ter_0]),
         Production(var_i, [var_i, ter_1]),
         Production(var_i, [var_i, Epsilon()])
     }
     cfg = CFG({var_e, var_i}, {
         ter_a, ter_b, ter_0, ter_1, ter_par_open, ter_par_close, ter_mult,
         ter_plus
     }, var_e, productions)
     pda = cfg.to_pda()
     self.assertEqual(len(pda.states), 1)
     self.assertEqual(len(pda.final_states), 0)
     self.assertEqual(len(pda.input_symbols), 8)
     self.assertEqual(len(pda.stack_symbols), 10)
     self.assertEqual(pda.get_number_transitions(), 19)
Exemplo n.º 16
0
 def test_generation_words(self):
     """ Tests the generation of word """
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     var_s = Variable("S")
     productions = {
         Production(var_s, [ter_a, var_s, ter_b]),
         Production(var_s, [])
     }
     cfg = CFG(productions=productions, start_symbol=var_s)
     words0 = list(cfg.get_words(max_length=0))
     self.assertIn([], words0)
     self.assertEqual(len(words0), 1)
     words1 = list(cfg.get_words(max_length=1))
     self.assertIn([], words1)
     self.assertEqual(len(words1), 1)
     words2 = list(cfg.get_words(max_length=2))
     self.assertIn([], words2)
     self.assertIn([ter_a, ter_b], words2)
     self.assertEqual(len(words2), 2)
     words3 = list(cfg.get_words(max_length=3))
     self.assertIn([], words3)
     self.assertIn([ter_a, ter_b], words3)
     self.assertEqual(len(words3), 2)
     words4 = list(cfg.get_words(max_length=4))
     self.assertIn([], words4)
     self.assertIn([ter_a, ter_a, ter_b, ter_b], words4)
     self.assertEqual(len(words4), 3)
Exemplo n.º 17
0
 def test_derivation_does_not_exist(self):
     var_s = Variable("S")
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     cfg = CFG(productions=[], start_symbol=var_s)
     with self.assertRaises(DerivationDoesNotExist):
         parse_tree = cfg.get_cnf_parse_tree([ter_a, ter_b])
         parse_tree.get_rightmost_derivation()
Exemplo n.º 18
0
 def test_finite(self):
     """ Tests whether a grammar is finite or not """
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     var_s = Variable("S")
     var_a = Variable("A")
     var_b = Variable("B")
     prod0 = {
         Production(var_s, [var_a, var_b]),
         Production(var_a, [ter_a]),
         Production(var_b, [ter_b])
     }
     cfg = CFG(productions=prod0, start_symbol=var_s)
     self.assertTrue(cfg.is_finite())
     prod0.add(Production(var_a, [var_s]))
     cfg = CFG(productions=prod0, start_symbol=var_s)
     self.assertFalse(cfg.is_finite())
Exemplo n.º 19
0
    def test_creation(self):
        """ Tests creatin of CFG """
        variable0 = Variable(0)
        terminal0 = Terminal("a")
        prod0 = Production(variable0, [terminal0, Terminal("A"), Variable(1)])
        cfg = CFG({variable0}, {terminal0}, variable0, {prod0})
        self.assertIsNotNone(cfg)
        self.assertEqual(len(cfg.variables), 2)
        self.assertEqual(len(cfg.terminals), 2)
        self.assertEqual(len(cfg.productions), 1)
        self.assertTrue(cfg.is_empty())

        cfg = CFG()
        self.assertIsNotNone(cfg)
        self.assertEqual(len(cfg.variables), 0)
        self.assertEqual(len(cfg.terminals), 0)
        self.assertEqual(len(cfg.productions), 0)
        self.assertTrue(cfg.is_empty())
Exemplo n.º 20
0
 def test_emptiness(self):
     """ Tests the emptiness of a CFG """
     # pylint: disable=too-many-locals
     var_s = Variable("S")
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     prod0 = Production(var_s, [ter_a, var_s, ter_b])
     prod1 = Production(var_s, [])
     cfg = CFG({var_s}, {ter_a, ter_b}, var_s, {prod0, prod1})
     self.assertFalse(cfg.is_empty())
    def read_grammar(cls, name):
        id = 0

        terminals, variables, productions = set(), set(), set()
        start_symb = None

        with open(name, 'r') as file:
            productions_txt = file.readlines()

            for production_txt in productions_txt:
                head, _, *body_full = production_txt.strip().split()

                if start_symb is None:
                    start_symb = Variable(head)

                tmp_body = []
                bodies = [
                    list(group)
                    for k, group in groupby(body_full, lambda x: x == "|")
                    if not k
                ]

                for body in bodies:

                    is_regex = not any([
                        True if '*' not in value else False for value in body
                    ])

                    if is_regex:
                        new_productions, new_variables, new_terminals, id = CFGrammar \
                                                                            .read_production_regex(head, Regex.from_python_regex(body[0]), id, False)

                        productions |= new_productions
                        variables |= new_variables
                        terminals |= new_terminals
                    else:
                        body_cfg = []
                        for letter in body:
                            if letter == "epsilon":
                                body_cfg.append(Epsilon())
                            elif letter.isupper():
                                non_terminal = Variable(letter)
                                variables.add(non_terminal)
                                body_cfg.append(non_terminal)
                            else:
                                terminal = Terminal(letter)
                                terminals.add(terminal)
                                body_cfg.append(terminal)

                        productions.add(Production(Variable(head), body_cfg))

        cfg = CFG(variables, terminals, start_symb, productions)

        return cfg
Exemplo n.º 22
0
 def get_weak_cnf(self) -> CFG:
     wcnf = self.cnf
     if self.generate_epsilon:
         new_start_symbol = Variable("S'")
         new_variables = set(wcnf.variables)
         new_variables.add(new_start_symbol)
         new_productions = set(wcnf.productions)
         new_productions.add(Production(new_start_symbol, [wcnf.start_symbol]))
         new_productions.add(Production(new_start_symbol, []))
         return CFG(new_variables, wcnf.terminals, new_start_symbol, new_productions)
     return wcnf
Exemplo n.º 23
0
    def test_membership(self):
        """ Tests the membership of a CFG """
        # pylint: disable=too-many-locals
        var_useless = Variable("USELESS")
        var_s = Variable("S")
        var_b = Variable("B")
        ter_a = Terminal("a")
        ter_b = Terminal("b")
        ter_c = Terminal("c")
        prod0 = Production(var_s, [ter_a, var_s, var_b])
        prod1 = Production(var_useless, [ter_a, var_s, var_b])
        prod2 = Production(var_s, [var_useless])
        prod4 = Production(var_b, [ter_b])
        prod5 = Production(var_useless, [])
        cfg0 = CFG({var_useless, var_s}, {ter_a, ter_b}, var_s,
                   {prod0, prod1, prod2, prod4, prod5})
        self.assertTrue(cfg0.contains([Epsilon()]))
        self.assertTrue(cfg0.contains([ter_a, ter_b]))
        self.assertTrue(cfg0.contains([ter_a, ter_a, ter_b, ter_b]))
        self.assertTrue(
            cfg0.contains([ter_a, ter_a, ter_a, ter_b, ter_b, ter_b]))
        self.assertFalse(cfg0.contains([ter_a, ter_b, ter_b]))
        self.assertFalse(cfg0.contains([ter_a, ter_b, ter_c, ter_b]))
        self.assertFalse(cfg0.contains([ter_a, ter_a, ter_a, ter_b, ter_b]))

        prod3 = Production(var_s, [ter_c])
        cfg0 = CFG({var_s}, {ter_a, ter_b, ter_c}, var_s, {prod0, prod3})
        self.assertFalse(cfg0.contains([Epsilon()]))

        var_a = Variable("A")
        prod6 = Production(var_s, [var_a, var_b])
        prod7 = Production(var_a, [var_a, var_b])
        prod8 = Production(var_a, [ter_a])
        prod9 = Production(var_b, [ter_b])
        cfg1 = CFG({var_a, var_b, var_s}, {ter_a, ter_b}, var_s,
                   {prod6, prod7, prod8, prod9})
        self.assertTrue(cfg1.contains([ter_a, ter_b, ter_b]))
        cfg1 = CFG({"A", "B", "S"}, {"a", "b"}, "S",
                   {prod6, prod7, prod8, prod9})
        self.assertTrue(cfg1.contains(["a", "b", "b"]))
Exemplo n.º 24
0
 def to_wcnf(grammar):
     wcnf = grammar.to_normal_form()
     if grammar.generate_epsilon:
         new_start_symbol = Variable('S\'')
         new_variables = set(wcnf.variables)
         new_variables.add(new_start_symbol)
         new_productions = set(wcnf.productions)
         new_productions.add(
             Production(new_start_symbol, [wcnf.start_symbol]))
         new_productions.add(Production(new_start_symbol, []))
         return CFG(new_variables, wcnf.terminals, new_start_symbol,
                    new_productions)
     return wcnf
Exemplo n.º 25
0
 def test_intersection_empty(self):
     regex = Regex("")
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     var_s = Variable("S")
     productions = {
         Production(var_s, [ter_a, var_s, ter_b]),
         Production(var_s, [ter_b, var_s, ter_a]),
         Production(var_s, [])
     }
     cfg = CFG(productions=productions, start_symbol=var_s)
     cfg_i = cfg & regex
     self.assertFalse(cfg_i)
Exemplo n.º 26
0
 def test_union(self):
     """ Tests the union of two cfg """
     var_s = Variable("S")
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     prod0 = Production(var_s, [ter_a, var_s, ter_b])
     prod1 = Production(var_s, [])
     cfg = CFG({var_s}, {ter_a, ter_b}, var_s, {prod0, prod1})
     new_cfg = cfg.union(cfg)
     self.assertEqual(len(new_cfg.variables), 3)
     self.assertEqual(len(new_cfg.terminals), 2)
     self.assertEqual(len(new_cfg.productions), 6)
     self.assertFalse(new_cfg.is_empty())
     self.assertTrue(new_cfg.contains([ter_a, ter_a, ter_b, ter_b]))
Exemplo n.º 27
0
 def test_reverse(self):
     """ Test the reversal of a CFG """
     var_s = Variable("S")
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     prod0 = Production(var_s, [ter_a, var_s, ter_b])
     prod1 = Production(var_s, [])
     cfg = CFG({var_s}, {ter_a, ter_b}, var_s, {prod0, prod1})
     new_cfg = cfg.reverse()
     self.assertEqual(len(new_cfg.variables), 1)
     self.assertEqual(len(new_cfg.terminals), 2)
     self.assertEqual(len(new_cfg.productions), 2)
     self.assertFalse(new_cfg.is_empty())
     self.assertTrue(new_cfg.contains([ter_b, ter_b, ter_a, ter_a]))
Exemplo n.º 28
0
 def _test_profiling_conversions():
     """ Tests multiple conversions """
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     ter_c = Terminal("c")
     var_s = Variable("S")
     productions = {
         Production(var_s, [ter_a, var_s, ter_b]),
         Production(var_s, [ter_c])
     }
     cfg = CFG(productions=productions, start_symbol=var_s)
     cfg = cfg.to_pda().to_final_state().to_empty_stack().to_cfg()
     cfg = cfg.to_pda().to_final_state().to_empty_stack().to_cfg()
     cfg.to_pda().to_final_state().to_empty_stack().to_cfg()
Exemplo n.º 29
0
    def from_text(text, start_symbol=Variable("S")):
        lines = text.splitlines()
        production_set = set()

        for line in lines:
            production = line.split(' -> ')
            head = Variable(production[0])
            body_str = production[1].strip()

            body_str = body_str.replace('?', f'|{EPS_SYM}')

            production_set |= Grammar_Wrapper.regex_to_production(
                Regex(body_str), head)

        return CFG(start_symbol=start_symbol, productions=production_set)
Exemplo n.º 30
0
    def __init__(self, start_symbol=None, productions=None):
        cfg = CFG(start_symbol=start_symbol, productions=productions)

        cnf = cfg.to_normal_form()
        # needed for language preservation
        if cfg.generate_epsilon():
            cnf._productions.add(Production(cnf._start_symbol, []))

        self.pair_productions = set()
        for p in cnf._productions:
            if len(p.body) == 2:
                self.pair_productions.add(p)

        super(GrammarCNF, self).__init__(start_symbol=cnf._start_symbol,
                                         productions=cnf._productions)