示例#1
0
 def test_get_first_set2(self):
     # Example from:
     # https://www.geeksforgeeks.org/first-set-in-syntax-analysis/
     text = """
         S -> A C B | C b b | B a
         A -> d a | B C
         B -> g | Є
         C -> h | Є
     """
     cfg = CFG.from_text(text)
     llone_parser = LLOneParser(cfg)
     first_set = llone_parser.get_first_set()
     self.assertEqual(first_set[Variable("S")],
                      {Terminal(x)
                       for x in {"d", "g", "h", "b", "a"}
                       }.union({Epsilon()}))
     self.assertEqual(first_set[Variable("A")],
                      {Terminal(x)
                       for x in {"d", "g", "h"}}.union({Epsilon()}))
     self.assertEqual(first_set[Variable("B")],
                      {Terminal(x)
                       for x in {"g"}}.union({Epsilon()}))
     self.assertEqual(first_set[Variable("C")],
                      {Terminal(x)
                       for x in {"h"}}.union({Epsilon()}))
示例#2
0
    def test_intersection_with_epsilon(self):
        state0 = State(0)
        state1 = State(1)
        symb_a = Symbol("a")
        dfa = DeterministicFiniteAutomaton({state0, state1}, {symb_a},
                                           start_state=state0,
                                           final_states={state1})
        dfa.add_transition(state0, symb_a, state1)
        self.assertTrue(dfa.accepts([symb_a]))

        ter_a = Terminal("a")
        var_s = Variable("S")
        var_l = Variable("L")
        var_t = Variable("T")
        productions = {
            Production(var_s, [var_l, var_t]),
            Production(var_l, [Epsilon()]),
            Production(var_t, [ter_a]),
            Production(var_t, [Epsilon()])
        }
        cfg = CFG(productions=productions, start_symbol=var_s)
        self.assertFalse(cfg.is_empty())
        self.assertTrue(cfg.contains([ter_a]))

        cfg_temp = cfg.to_pda().to_cfg()
        self.assertFalse(cfg_temp.is_empty())
        self.assertTrue(cfg_temp.contains([ter_a]))

        cfg_temp = cfg.to_pda().to_final_state().to_empty_stack().to_cfg()
        self.assertFalse(cfg_temp.is_empty())
        self.assertTrue(cfg_temp.contains([ter_a]))

        cfg_i = cfg.intersection(dfa)
        self.assertFalse(cfg_i.is_empty())
示例#3
0
 def _get_first_set_production(production, first_set):
     first_not_containing_epsilon = 0
     first_set_temp = set()
     for body_component in production.body:
         first_set_temp = first_set_temp.union(
             first_set.get(production.body[first_not_containing_epsilon],
                           set()))
         if Epsilon() not in first_set.get(body_component, set()):
             break
         first_not_containing_epsilon += 1
     if first_not_containing_epsilon != len(production.body):
         if Epsilon() in first_set_temp:
             first_set_temp.remove(Epsilon())
     return first_set_temp
    def read_production_regex(cls, head, regex, id, case_sens=True):
        var_by_state = dict()
        terminals, variables, productions = set(), set(), set()

        enfa = regex.to_epsilon_nfa().minimize()

        if len(enfa.states) == 0:
            variables.add(head)
            productions.add(Production(head, [Epsilon()]))
            return productions, variables, terminals, id

        for state in enfa.states:
            var_by_state[state] = Variable(f'Id{id},{state}')
            id += 1

        transitions = enfa._transition_function

        for start_st in enfa.start_states:
            productions.add(Production(head, [var_by_state[start_st]]))

        for st_from, symb, st_to in transitions:
            new_head = var_by_state[st_from]
            new_body = []

            value = symb.value

            if value == 'eps':
                new_body.append(Epsilon())
            elif value.isupper() and case_sens:
                variable = Variable(value)
                new_body.append(variable)
                variables.add(variable)
            elif value.isdigit() or value.islower() or not case_sens:
                variable = Terminal(value)
                new_body.append(variable)
                variables.add(variable)

            else:
                raise ValueError(
                    f'Symbol "{value}" should be either lower or upper case')

            new_body.append(var_by_state[st_to])

            productions.add(Production(new_head, new_body))

            if st_to in enfa.final_states:
                productions.add(Production(var_by_state[st_to], []))

        return productions, variables, terminals, id
示例#5
0
 def test_nullable_object(self):
     """ Tests the finding of nullable objects """
     var_a = Variable("A")
     var_b = Variable("B")
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     start = Variable("S")
     prod0 = Production(start, [var_a, var_b])
     prod1 = Production(var_a, [ter_a, var_a, var_a])
     prod2 = Production(var_a, [Epsilon()])
     prod3 = Production(var_b, [ter_b, var_b, var_b])
     prod4 = Production(var_b, [Epsilon()])
     cfg = CFG({var_a, var_b, start}, {ter_a, ter_b}, start,
               {prod0, prod1, prod2, prod3, prod4})
     self.assertEqual(cfg.get_nullable_symbols(), {var_a, var_b, start})
示例#6
0
    def get_parse_tree(self, word, left=True):
        """
            Get a parse tree for a given word

            Parameters
            ----------
            word : list
                The word to parse
            left
                If we do the recursive from the left or the right(left by \
                default)

            Returns
            -------
            parse_tree : :class:`~pyformlang.cfg.ParseTree`
                The parse tree

            Raises
            --------
            NotParsableException
                When the word cannot be parsed

        """
        word = [to_terminal(x) for x in word if x != Epsilon()]
        parse_tree = ParseTree(self._cfg.start_symbol)
        starting_expansion = [(self._cfg.start_symbol, parse_tree)]
        if self._get_parse_tree_sub(word, starting_expansion, left):
            return parse_tree
        raise NotParsableException
示例#7
0
    def _test_profiling_intersection(self):
        size = 50
        states = [State(i) for i in range(size * 2 + 1)]
        symb_a = Symbol("a")
        symb_b = Symbol("b")
        dfa = DeterministicFiniteAutomaton(states, {symb_a, symb_b},
                                           start_state=states[0],
                                           final_states={states[-1]})
        for i in range(size):
            dfa.add_transition(states[i], symb_a, states[i + 1])
        for i in range(size, size * 2):
            dfa.add_transition(states[i], symb_b, states[i + 1])

        ter_a = Terminal("a")
        ter_b = Terminal("b")
        var_s = Variable("S")
        var_s1 = Variable("S1")
        var_l = Variable("L")
        productions = [
            Production(var_s, [var_l, var_s1]),
            Production(var_l, [Epsilon()]),
            Production(var_s1, [ter_a, var_s1, ter_b]),
            Production(var_s1, [ter_b, var_s1, ter_a]),
            Production(var_s1, [])
        ]
        cfg = CFG(productions=productions, start_symbol=var_s)
        cfg_i = cfg.intersection(dfa)
        self.assertFalse(cfg_i.is_empty())
        self.assertTrue(cfg_i.contains([ter_a] * size + [ter_b] * size))
        self.assertFalse(cfg_i.contains([]))
示例#8
0
    def from_cfg(cls, cfg: CFG):
        """ Create a recursive automaton from context-free grammar

        Parameters
        -----------
        cfg : :class:`~pyformlang.cfg.CFG`
            The context-free grammar

        Returns
        -----------
        rsa : :class:`~pyformlang.rsa.RecursiveAutomaton`
            The new recursive automaton built from context-free grammar
        """

        initial_label = to_symbol(cfg.start_symbol)
        grammar_in_true_format = remove_repetition_of_nonterminals_from_productions(
            cfg.to_text())

        boxes = set()
        labels = set()
        notation_for_epsilon = Epsilon().to_text()
        for production in grammar_in_true_format.splitlines():
            head, body = production.split(" -> ")
            labels.add(to_symbol(head))

            if body == "":
                body = notation_for_epsilon

            boxes.add(
                Box(Regex(body).to_epsilon_nfa().minimize(), to_symbol(head)))

        return RecursiveAutomaton(labels, initial_label, boxes)
示例#9
0
    def test_intersection_dfa2(self):
        state0 = State(0)
        symb_a = Symbol("a")
        symb_b = Symbol("b")
        dfa = DeterministicFiniteAutomaton({state0}, {symb_a, symb_b},
                                           start_state=state0,
                                           final_states={state0})
        dfa.add_transition(state0, symb_a, state0)
        dfa.add_transition(state0, symb_b, state0)
        self.assertTrue(dfa.accepts([symb_a, symb_a, symb_b, symb_b]))

        ter_a = Terminal("a")
        ter_b = Terminal("b")
        var_s = Variable("S")
        var_s1 = Variable("S1")
        var_l = Variable("L")
        productions = {
            Production(var_s, [var_l, var_s1]),
            Production(var_l, [Epsilon()]),
            Production(var_s1, [ter_a, var_s1, ter_b]),
            Production(var_s1, [ter_b, var_s1, ter_a]),
            Production(var_s1, [])
        }
        cfg = CFG(productions=productions, start_symbol=var_s)
        self.assertTrue(cfg.contains([ter_a, ter_a, ter_b, ter_b]))
        self.assertFalse(cfg.contains([ter_a, ter_a, ter_b]))
        cfg_i = cfg.intersection(dfa)
        self.assertFalse(cfg_i.is_empty())
        self.assertTrue(cfg_i.contains([ter_a, ter_a, ter_b, ter_b]))
        self.assertTrue(cfg_i.contains([]))
示例#10
0
 def test_to_pda(self):
     """ Tests the conversion to PDA """
     var_e = Variable("E")
     var_i = Variable("I")
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     ter_0 = Terminal("0")
     ter_1 = Terminal("1")
     ter_par_open = Terminal("(")
     ter_par_close = Terminal(")")
     ter_mult = Terminal("*")
     ter_plus = Terminal("+")
     productions = {
         Production(var_e, [var_i]),
         Production(var_e, [var_e, ter_plus, var_e]),
         Production(var_e, [var_e, ter_mult, var_e]),
         Production(var_e, [ter_par_open, var_e, ter_par_close]),
         Production(var_i, [ter_a]),
         Production(var_i, [ter_b]),
         Production(var_i, [var_i, ter_a]),
         Production(var_i, [var_i, ter_b]),
         Production(var_i, [var_i, ter_0]),
         Production(var_i, [var_i, ter_1]),
         Production(var_i, [var_i, Epsilon()])
     }
     cfg = CFG({var_e, var_i}, {
         ter_a, ter_b, ter_0, ter_1, ter_par_open, ter_par_close, ter_mult,
         ter_plus
     }, var_e, productions)
     pda = cfg.to_pda()
     self.assertEqual(len(pda.states), 1)
     self.assertEqual(len(pda.final_states), 0)
     self.assertEqual(len(pda.input_symbols), 8)
     self.assertEqual(len(pda.stack_symbols), 10)
     self.assertEqual(pda.get_number_transitions(), 19)
示例#11
0
    def test_generating_object(self):
        """ Test the finding of CFGObject """
        var_a = Variable("A")
        var_b = Variable("B")
        ter_a = Terminal("a")
        ter_b = Terminal("b")
        start = Variable("S")
        prod0 = Production(start, [var_a, var_b])
        prod1 = Production(start, [ter_a])
        prod2 = Production(var_a, [ter_b])
        cfg = CFG({var_a, var_b, start}, {ter_a, ter_b}, start,
                  {prod0, prod1, prod2})
        self.assertEqual(len(cfg.variables), 3)
        self.assertEqual(len(cfg.terminals), 2)
        self.assertEqual(len(cfg.productions), 3)
        self.assertEqual(cfg.get_generating_symbols(),
                         {var_a, ter_a, ter_b, start})

        prod3 = Production(var_b, [Epsilon()])

        cfg = CFG({var_a, var_b, start}, {ter_a, ter_b}, start,
                  {prod0, prod1, prod2, prod3})
        self.assertEqual(len(cfg.variables), 3)
        self.assertEqual(len(cfg.terminals), 2)
        self.assertEqual(len(cfg.productions), 4)
        self.assertEqual(cfg.get_generating_symbols(),
                         {var_a, var_b, ter_a, ter_b, start})
示例#12
0
 def test_derivation_empty(self):
     var_s = Variable("S")
     productions = [Production(var_s, [Epsilon()])]
     cfg = CFG(productions=productions, start_symbol=var_s)
     parse_tree = cfg.get_cnf_parse_tree([])
     derivation = parse_tree.get_rightmost_derivation()
     self.assertEqual([[var_s], []], derivation)
示例#13
0
 def _initialize_follow_set(self, first_set):
     to_process = SetQueue()
     follow_set = dict()
     follow_set[self._cfg.start_symbol] = {"$"}
     to_process.append(self._cfg.start_symbol)
     for production in self._cfg.productions:
         for i, component in enumerate(production.body):
             for component_next in production.body[i + 1:]:
                 follow_set[component] = follow_set.get(
                     component,
                     set()).union(first_set.get(component_next, set()))
                 if Epsilon() not in first_set.get(component_next, set()):
                     break
             if Epsilon() in follow_set.get(component, set()):
                 follow_set[component].remove(Epsilon())
             if follow_set.get(component, set()):
                 to_process.append(component)
     return follow_set, to_process
    def read_grammar(cls, name):
        id = 0

        terminals, variables, productions = set(), set(), set()
        start_symb = None

        with open(name, 'r') as file:
            productions_txt = file.readlines()

            for production_txt in productions_txt:
                head, _, *body_full = production_txt.strip().split()

                if start_symb is None:
                    start_symb = Variable(head)

                tmp_body = []
                bodies = [
                    list(group)
                    for k, group in groupby(body_full, lambda x: x == "|")
                    if not k
                ]

                for body in bodies:

                    is_regex = not any([
                        True if '*' not in value else False for value in body
                    ])

                    if is_regex:
                        new_productions, new_variables, new_terminals, id = CFGrammar \
                                                                            .read_production_regex(head, Regex.from_python_regex(body[0]), id, False)

                        productions |= new_productions
                        variables |= new_variables
                        terminals |= new_terminals
                    else:
                        body_cfg = []
                        for letter in body:
                            if letter == "epsilon":
                                body_cfg.append(Epsilon())
                            elif letter.isupper():
                                non_terminal = Variable(letter)
                                variables.add(non_terminal)
                                body_cfg.append(non_terminal)
                            else:
                                terminal = Terminal(letter)
                                terminals.add(terminal)
                                body_cfg.append(terminal)

                        productions.add(Production(Variable(head), body_cfg))

        cfg = CFG(variables, terminals, start_symb, productions)

        return cfg
示例#15
0
 def test_get_first_set(self):
     # Example from:
     # https://www.geeksforgeeks.org/first-set-in-syntax-analysis/
     text = """
         E  -> T E’
         E’ -> + T E’ | Є
         T  -> F T’
         T’ -> * F T’ | Є
         F  -> ( E ) | id
     """
     cfg = CFG.from_text(text)
     llone_parser = LLOneParser(cfg)
     first_set = llone_parser.get_first_set()
     self.assertEqual(first_set[Variable("E")],
                      {Terminal("("), Terminal("id")})
     self.assertEqual(first_set[Variable("E’")], {Terminal("+"), Epsilon()})
     self.assertEqual(first_set[Variable("T")],
                      {Terminal("("), Terminal("id")})
     self.assertEqual(first_set[Variable("T’")], {Terminal("*"), Epsilon()})
     self.assertEqual(first_set[Variable("F")],
                      {Terminal("("), Terminal("id")})
示例#16
0
    def test_membership(self):
        """ Tests the membership of a CFG """
        # pylint: disable=too-many-locals
        var_useless = Variable("USELESS")
        var_s = Variable("S")
        var_b = Variable("B")
        ter_a = Terminal("a")
        ter_b = Terminal("b")
        ter_c = Terminal("c")
        prod0 = Production(var_s, [ter_a, var_s, var_b])
        prod1 = Production(var_useless, [ter_a, var_s, var_b])
        prod2 = Production(var_s, [var_useless])
        prod4 = Production(var_b, [ter_b])
        prod5 = Production(var_useless, [])
        cfg0 = CFG({var_useless, var_s}, {ter_a, ter_b}, var_s,
                   {prod0, prod1, prod2, prod4, prod5})
        self.assertTrue(cfg0.contains([Epsilon()]))
        self.assertTrue(cfg0.contains([ter_a, ter_b]))
        self.assertTrue(cfg0.contains([ter_a, ter_a, ter_b, ter_b]))
        self.assertTrue(
            cfg0.contains([ter_a, ter_a, ter_a, ter_b, ter_b, ter_b]))
        self.assertFalse(cfg0.contains([ter_a, ter_b, ter_b]))
        self.assertFalse(cfg0.contains([ter_a, ter_b, ter_c, ter_b]))
        self.assertFalse(cfg0.contains([ter_a, ter_a, ter_a, ter_b, ter_b]))

        prod3 = Production(var_s, [ter_c])
        cfg0 = CFG({var_s}, {ter_a, ter_b, ter_c}, var_s, {prod0, prod3})
        self.assertFalse(cfg0.contains([Epsilon()]))

        var_a = Variable("A")
        prod6 = Production(var_s, [var_a, var_b])
        prod7 = Production(var_a, [var_a, var_b])
        prod8 = Production(var_a, [ter_a])
        prod9 = Production(var_b, [ter_b])
        cfg1 = CFG({var_a, var_b, var_s}, {ter_a, ter_b}, var_s,
                   {prod6, prod7, prod8, prod9})
        self.assertTrue(cfg1.contains([ter_a, ter_b, ter_b]))
        cfg1 = CFG({"A", "B", "S"}, {"a", "b"}, "S",
                   {prod6, prod7, prod8, prod9})
        self.assertTrue(cfg1.contains(["a", "b", "b"]))
示例#17
0
def cyk(cfgrammar, w):
    w = w.split()
    length = len(w)
    if length != 0:
        number = len(cfgrammar.variables)
        matrix = [[[0 for _ in range(length)] for _ in range(length)]
                  for _ in range(number)]
        variables = dict(zip(cfgrammar.variables, range(number)))

        symbols = defaultdict(list)
        for i, s in enumerate(w):
            symbols[s].append(i)

        bodies = defaultdict(list)
        for i, s in enumerate(list(map(check_eps, cfgrammar.productions))):
            bodies[s].append(i)

        for s in w:
            if s == ' ':
                term = Epsilon()
            else:
                term = Terminal(s)

            if term in bodies:
                for i in symbols[s]:
                    for j in bodies[term]:
                        matrix[variables[list(
                            cfgrammar.productions)[j].head]][i][i] = 1
        for m in range(1, length):
            for i in range(length - m):
                j = i + m
                for n in range(number):
                    for p in cfgrammar.productions:
                        for k in range(i, j):
                            for key, value in variables.items():
                                if n == value:
                                    h = key

                            if p.head == h and len(p.body) == 2:
                                matrix[n][i][j] += matrix[variables[list(
                                    p.body)[0]]][i][k] * matrix[variables[list(
                                        p.body)[1]]][k + 1][j]
                                if matrix[n][i][j]:
                                    break
                        if matrix[n][i][j]:
                            break

    else:
        return cfgrammar.generate_epsilon()

    return bool(matrix[variables[cfgrammar.start_symbol]][0][length - 1])
示例#18
0
 def _get_triggers_follow_set(self, first_set):
     triggers = dict()
     for production in self._cfg.productions:
         if production.head not in triggers:
             triggers[production.head] = set()
         for i, component in enumerate(production.body):
             all_epsilon = True
             for component_next in production.body[i + 1:]:
                 if Epsilon() not in first_set.get(component_next, set()):
                     all_epsilon = False
                     break
             if all_epsilon:
                 triggers[production.head].add(component)
     return triggers
示例#19
0
 def _initialize_first_set(self, triggers):
     to_process = SetQueue()
     first_set = dict()
     # Initialisation
     for terminal in self._cfg.terminals:
         first_set[terminal] = {terminal}
         for triggered in triggers.get(terminal, []):
             to_process.append(triggered)
     # Generate only epsilon
     for production in self._cfg.productions:
         if not production.body:
             first_set[production.head] = {Epsilon()}
             for triggered in triggers.get(production.head, []):
                 to_process.append(triggered)
     return first_set, to_process
示例#20
0
def regex_to_grammar_productions(regex, head, var_dict, terminal_dict):
    _var_dict = {}
    production_set = set()

    # Getting an NFA from regex
    enfa = regex.to_epsilon_nfa()
    enfa = enfa.minimize()
    transitions = enfa._transition_function._transitions

    # Producing variables from NFA states
    for state in enfa.states:
        _var_dict[state] = Variable(
            # Creating new CFG variable with unique name
            '%s#REGEX#%s' % (head.value, get_new_var_num()))

    for head_state in transitions:
        # Adding productions from head to start states
        for start_state in enfa.start_states:
            start_p = Production(head, [_var_dict[start_state]])
            production_set.add(start_p)

        # Getting productions from NFA transitions
        for sym in list(transitions[head_state]):
            body_state = transitions[head_state][sym]
            inner_head = _var_dict[head_state]
            inner_body = []

            if sym in var_dict:
                inner_body.append(var_dict[sym])
            elif sym in terminal_dict:
                inner_body.append(terminal_dict[sym])
            elif sym == EPS_SYM:
                inner_body.append(Epsilon())
            else:
                raise ValueError(f'''Symbol "{sym}" is not defined as
                                 a terminal or a variable''')

            inner_body.append(_var_dict[body_state])
            production_set.add(Production(inner_head, inner_body))

            if transitions[head_state][sym] in enfa.final_states:
                eps_p = Production(_var_dict[body_state], [])
                production_set.add(eps_p)
    return production_set
示例#21
0
    def get_llone_parse_tree(self, word):
        """
        Get LL(1) parse Tree

        Parameters
        ----------
        word : list
            The word to parse

        Returns
        -------
        parse_tree : :class:`~pyformlang.cfg.ParseTree`
            The parse tree

        Raises
        --------
        NotParsableException
            When the word cannot be parsed

        """
        word = [to_terminal(x) for x in word if x != Epsilon()]
        word.append("$")
        word = word[::-1]
        parsing_table = self.get_llone_parsing_table()
        parse_tree = ParseTree(self._cfg.start_symbol)
        stack = ["$", parse_tree]
        while stack:
            current = stack.pop()
            if current == "$" and word[-1] == "$":
                return parse_tree
            if current.value == word[-1]:
                word.pop()
            else:
                rule_applied = list(
                    parsing_table.get(current.value, dict()).get(word[-1], []))
                if len(rule_applied) == 1:
                    for component in rule_applied[0].body[::-1]:
                        new_node = ParseTree(component)
                        current.sons.append(new_node)
                        stack.append(new_node)
                else:
                    raise NotParsableException
                current.sons = current.sons[::-1]
        raise NotParsableException
示例#22
0
 def test_remove_epsilon(self):
     """ Tests the removal of epsilon """
     var_a = Variable("A")
     var_b = Variable("B")
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     start = Variable("S")
     prod0 = Production(start, [var_a, var_b])
     prod1 = Production(var_a, [ter_a, var_a, var_a])
     prod2 = Production(var_a, [Epsilon()])
     prod3 = Production(var_b, [ter_b, var_b, var_b])
     prod4 = Production(var_b, [])
     cfg = CFG({var_a, var_b, start}, {ter_a, ter_b}, start,
               {prod0, prod1, prod2, prod3, prod4})
     new_cfg = cfg.remove_epsilon()
     self.assertEqual(len(new_cfg.variables), 3)
     self.assertEqual(len(new_cfg.terminals), 2)
     self.assertEqual(len(set(new_cfg.productions)), 9)
     self.assertEqual(len(new_cfg.get_nullable_symbols()), 0)
     self.assertFalse(cfg.is_empty())
示例#23
0
    def regex_to_grammar_productions(regex, head):
        _var_dict = {}
        production_set = set()

        # Getting an NFA from regex
        enfa = regex.to_epsilon_nfa()
        enfa = enfa.minimize()
        transitions = enfa._transition_function._transitions

        for state in enfa.states:
            _var_dict[state] = Variable(
                # Creating new CFG variable with unique name
                '%s#REGEX#%s' % (head.value, get_new_var_num()))

        for head_state in transitions:
            # Adding productions from head to start states
            for start_state in enfa.start_states:
                start_p = Production(head, [_var_dict[start_state]])
                production_set.add(start_p)

            # Getting productions from NFA transitions
            for sym in list(transitions[head_state]):
                body_state = transitions[head_state][sym]
                inner_head = _var_dict[head_state]
                inner_body = []

                if sym.value == EPS_SYM:
                    inner_body.append(Epsilon())
                elif sym.value.isupper():
                    inner_body.append(Variable(sym))
                else:
                    inner_body.append(Terminal(sym))

                inner_body.append(_var_dict[body_state])
                production_set.add(Production(inner_head, inner_body))

                if transitions[head_state][sym] in enfa.final_states:
                    eps_p = Production(_var_dict[body_state], [])
                    production_set.add(eps_p)
        return production_set
示例#24
0
    def regex_to_production(regex, head):
        _dict = {}
        production_set = set()

        enfa = regex.to_epsilon_nfa()
        enfa = enfa.minimize()
        transitions = enfa._transition_function._transitions

        for state in enfa.states:
            _dict[state] = Variable('%s#REGEX#%s' %
                                    (head.value, get_new_var_num()))

        for head_state in transitions:
            for start_state in enfa.start_states:
                start_production = Production(head, [_dict[start_state]])
                production_set.add(start_production)

            for symbol in list(transitions[head_state]):
                body_state = transitions[head_state][symbol]
                inner_head = _dict[head_state]
                inner_body = []

                if symbol.value == EPS_SYM:
                    inner_body.append(Epsilon())
                elif symbol.value.isupper():
                    inner_body.append(Variable(symbol))
                else:
                    inner_body.append(Terminal(symbol))

                inner_body.append(_dict[body_state])
                production_set.add(Production(inner_head, inner_body))

                if transitions[head_state][symbol] in enfa.final_states:
                    eps_production = Production(_dict[body_state], [])
                    production_set.add(eps_production)
        return production_set
示例#25
0
 def test_pda_object_creator(self):
     pda_oc = PDAObjectCreator([], [])
     self.assertEqual(pda_oc.get_symbol_from(Epsilon()), pda.Epsilon())
     self.assertEqual(pda_oc.get_stack_symbol_from(Epsilon()),
                      pda.Epsilon())
示例#26
0
def check_eps(p):
    if p.body:
        if len(p.body) == 1:
            return list(p.body)[0]
    else:
        return Epsilon()