Exemplo n.º 1
0
def probe(*args):
    print "--", len(args), "-", " ".join(map(str, args))


def prompt(*args):
    print ">>> ",


grammar = from_string(rpcalc_grammar,
                      'input',
                      print_result=print_result,
                      probe=probe,
                      prompt=prompt,
                      up=lambda x: x,
                      add=lambda x, y: x + y,
                      sub=lambda x, y: x - y,
                      mul=lambda x, y: x * y,
                      div=lambda x, y: x / y,
                      mod=lambda x, y: x % y,
                      neg=lambda x: -x)

states, gotos, first = build_parsing_table(grammar, LR0(grammar.START, 0, 0))
driver = Driver(states, gotos, first)

print "Reverse polish calculator. Write expressions like "
print " >>> 2 + 3       (result in 2 + 3 = 5)  or "
print " >>> 2 + 4 * 5   (result in 2 + (4 * 5) = 22)"
print
prompt()
driver.parse(CalcLexer(sys.stdin))
Exemplo n.º 2
0

def prompt(*args):
    print ">>> ",


grammar = from_string(
    rpcalc_grammar,
    "input",
    print_result=print_result,
    probe=probe,
    prompt=prompt,
    up=lambda x: x,
    add=lambda x, y: x + y,
    sub=lambda x, y: x - y,
    mul=lambda x, y: x * y,
    div=lambda x, y: x / y,
    mod=lambda x, y: x % y,
    neg=lambda x: -x,
)

states, gotos, first = build_parsing_table(grammar, LR0(grammar.START, 0, 0))
driver = Driver(states, gotos, first)

print "Reverse polish calculator. Write expressions like "
print " >>> 2 + 3       (result in 2 + 3 = 5)  or "
print " >>> 2 + 4 * 5   (result in 2 + (4 * 5) = 22)"
print
prompt()
driver.parse(CalcLexer(sys.stdin))
Exemplo n.º 3
0
class RegressionTestUnknowGotoState(unittest.TestCase):
   class CalcLexer(Lexer):
      def __init__(self, source):
         self._source = source
         self._white_characters = (' ', '\t', '\v', '\n', '\r')
         self._number = re.compile('[+-]?\d+(\.\d+)?')
         self._var_name = re.compile('\w+')
      
      def tokenizer(self):
         '''Return a iterable or generator of all tokens.
            Each token is a tuple with at least one value, the terminal id. 
            The next values, if any, are the attributes of the token.'''
      
         for line in self._source:
            i = 0
            while i < len(line):
               while i < len(line) and line[i] in self._white_characters:
                  i += 1 

               if len(line) == i:
                  continue

               match_id = self._number.match(line, i)
               match_var = self._var_name.match(line, i)
               if match_id:
                  yield ('id', int(match_id.group()))
                  i = match_id.end()
               elif match_var:
                  if match_var.group() == 'let':
                     yield ('let', 'let')
                  else:
                     yield ('var', match_var.group())
                  i = match_var.end()
               else:
                  yield (line[i], line[i])
                  i += 1

         yield (grammar.Grammar.EOF, grammar.Grammar.EOF)
         return 


   def setUp(self):
      self.arith = grammar.Grammar('S', ('(', ')', 'id', 'let'))
      self.symbol_table = [dict()]

      def push(*args): pass
      def pop(*args): pass

      self.arith.add_rule('S', ['E'])
      self.arith.add_rule('E', ['id'])
      self.arith.add_rule('E', ['let', push, '(', 'E', ')', pop, lambda *args:args])

      self.action_table, self.goto_table, self.start_state = build_parsing_table(self.arith, LR0(self.arith.START, 0, 0), disable_mapping=True)
      self.driver = Driver(self.action_table, dict(self.goto_table), self.start_state)

   def test_cannonical_collection(self):
      collection = canonical_collection(self.arith, LR0(self.arith.START, 0, 0))
      
      states = frozenset([
      frozenset([
         LR0(self.arith.START, 0, 0),
         LR0('S', 0, 0), 
         LR0('E', 0, 0), LR0('E', 1, 0),]),
      
      frozenset([
         LR0(self.arith.START, 0, 1),]),
      
      frozenset([
         LR0('E', 0, 1),]),

      frozenset([
         LR0('S', 0, 1),]),
      
      frozenset([
         LR0('E', 1, 1), 
         LR0(self.arith.ACTION_INSIDE % (1, 'push'), 0, 0),]),
      
      frozenset([
         LR0('E', 1, 2),]),
      
      frozenset([
         LR0('E', 1, 4),]),
      
      frozenset([
         LR0('E', 1, 6),]),
      
      frozenset([
         LR0('E', 1, 5), 
         LR0(self.arith.ACTION_INSIDE % (2, 'pop'), 0, 0),]),

      frozenset([
         LR0('E', 1, 3), 
         LR0('E', 0, 0), LR0('E', 1, 0),]),
      ])
      
      self.assertTrue(states == collection)


   def test_goto_table(self):
      states_gotos = [
      (frozenset([
         LR0(self.arith.START, 0, 0),
         LR0('S', 0, 0), 
         LR0('E', 0, 0), LR0('E', 1, 0),]),  (('S', 1), ('id', 2), ('E', 3), ('let', 4))),
      
      (frozenset([
         LR0(self.arith.START, 0, 1),]),  ()),
      
      (frozenset([
         LR0('E', 0, 1),]),  ()),

      (frozenset([
         LR0('S', 0, 1),]),  ()),
      
      (frozenset([
         LR0('E', 1, 1), 
         LR0(self.arith.ACTION_INSIDE % (1, 'push'), 0, 0),]),  ((self.arith.ACTION_INSIDE % (1, 'push'), 5),)),
      
      (frozenset([
         LR0('E', 1, 2),]),  (('(', 9),)),
      
      (frozenset([
         LR0('E', 1, 4),]),  ((')', 8),)),
      
      (frozenset([
         LR0('E', 1, 6),]),  ()),
      
      (frozenset([
         LR0('E', 1, 5), 
         LR0(self.arith.ACTION_INSIDE % (2, 'pop'), 0, 0),]), ((self.arith.ACTION_INSIDE % (2, 'pop'), 7),) ),

      (frozenset([
         LR0('E', 1, 3), 
         LR0('E', 0, 0), LR0('E', 1, 0),]),  (('E', 6), ('id', 2), ('let', 4))),
      ]

      checked = 0
      for state, gotos in states_gotos:
         h = hash(state)
         if not gotos:
            self.assertTrue(h not in self.goto_table)
            continue

         checked += 1
         expected_keys, expected_states_id = zip(*gotos)

         found_gotos_keys = frozenset(self.goto_table[h].keys())
         found_gotos_hashs = frozenset(self.goto_table[h].values())

         self.assertTrue(frozenset(expected_keys) == found_gotos_keys)
         self.assertTrue(frozenset([hash(states_gotos[i][0]) for i in expected_states_id]) == found_gotos_hashs)

      self.assertTrue(checked == len(self.goto_table.keys()))


   def test_action_table(self):
      states_shifts_reduce_actions = [
      (frozenset([
         LR0(self.arith.START, 0, 0),
         LR0('S', 0, 0), 
         LR0('E', 0, 0), LR0('E', 1, 0),]), (('id', 2), ('let', 4)), () ),
      
      (frozenset([
         LR0(self.arith.START, 0, 1),]), (), () ),
      
      (frozenset([
         LR0('E', 0, 1),]), (), () ),

      (frozenset([
         LR0('S', 0, 1),]), (), () ),
      
      (frozenset([
         LR0('E', 1, 1), 
         LR0(self.arith.ACTION_INSIDE % (1, 'push'), 0, 0),]), (), () ),
      
      (frozenset([
         LR0('E', 1, 2),]), (('(', 9),), () ),
      
      (frozenset([
         LR0('E', 1, 4),]), ((')', 8),), () ),
      
      (frozenset([
         LR0('E', 1, 6),]), (), () ),
      
      (frozenset([
         LR0('E', 1, 5), 
         LR0(self.arith.ACTION_INSIDE % (2, 'pop'), 0, 0),]), (), () ),

      (frozenset([
         LR0('E', 1, 3), 
         LR0('E', 0, 0), LR0('E', 1, 0),]), (('id', 2), ('let', 4)), () ),
      ]

      self.assertTrue(len(states_shifts_reduce_actions) == len(self.action_table.keys()))
      
      for state, shifts, reduces in states_shifts_reduce_actions:
         h = hash(state)

         self.assertTrue(len(shifts) == len(filter(lambda action: "Shift" in str(action), self.action_table[h].values())))

         if not shifts:
            continue
         keys, ids = zip(*shifts)
         found_ids = [i for i in range(len(states_shifts_reduce_actions)) if ("Shift %s" % hash(states_shifts_reduce_actions[i][0])) in map(lambda a: str(a), self.action_table[h].values())]
         found_keys = self.action_table[h].keys()

         self.assertTrue(frozenset(ids) == frozenset(found_ids))
         self.assertTrue(frozenset(keys) == frozenset(found_keys))


   def test_parse_complex_let_var_expression(self):
      source = StringIO("let (2)")
      lexer = RegressionTestUnknowGotoState.CalcLexer(source)
      self.driver.parse(lexer)
Exemplo n.º 4
0
class RegressionTestParser(unittest.TestCase):
    class CalcLexer(Lexer):
        def __init__(self, source):
            self._source = source
            self._white_characters = (' ', '\t', '\v', '\r')
            self._number = re.compile('\d+(\.\d+)?')

        def tokenizer(self):
            '''Return a iterable or generator of all tokens.
            Each token is a tuple with at least one value, the terminal id. 
            The next values, if any, are the attributes of the token.'''

            line = self._source.readline()
            while line:
                i = 0

                while i < len(line):
                    # skip whitespaces
                    while i < len(line) and line[i] in self._white_characters:
                        i += 1

                    match = self._number.match(line, i)
                    if i == len(line):
                        continue
                    elif line[i] == '\n':
                        yield 'NL', None
                    elif match:
                        yield ('NUM', int(match.group()))
                        i = match.end() - 1
                    else:
                        yield line[i], None

                    i += 1

                line = self._source.readline()
            yield (grammar.Grammar.EOF, None)

    def setUp(self):
        syn = syntax.Syntax('input')
        self.result = []

        syn.terminal('NL', None)
        syn.terminal('NUM', None)
        syn.terminal('+', None)
        syn.terminal('-', None)

        syn.repeat(('line', ), 'input')
        syn.choice((('NL', ), ('expr', 'NL', lambda x: self.result.append(x))),
                   'line')
        syn.choice((('NUM', ), ('expr', 'expr', '+', lambda x, y: x + y),
                    ('expr', 'expr', '-', lambda x, y: x - y)), 'expr')

        self.grammar = syn.as_grammar()

        self.start_item = LR0(self.grammar.START, 0, 0)
        self.action_table, self.goto_table, self.start_state = build_parsing_table(
            self.grammar, self.start_item, disable_mapping=True)
        self.driver = Driver(self.action_table, self.goto_table,
                             self.start_state)

        self.kernel_states = [
            frozenset([
                LR0(self.grammar.START, 0, 0),
            ]),
            frozenset([
                LR0(self.grammar.START, 0, 1),
            ]),
            frozenset([
                LR0('input', 0, 1),
                LR0('input', 1, 1),
            ]),
            frozenset([
                LR0('line', 0, 1),
            ]),
            frozenset([
                LR0('line', 1, 1),
                LR0('expr', 1, 1),
                LR0('expr', 2, 1),
            ]),
            frozenset([
                LR0('expr', 0, 1),
            ]),
            frozenset([
                LR0('input', 0, 2),
            ]),
            frozenset([
                LR0('line', 1, 2),
            ]),
            frozenset([
                LR0('expr', 1, 1),
                LR0('expr', 1, 2),
                LR0('expr', 2, 1),
                LR0('expr', 2, 2),
            ]),
            frozenset([
                LR0('expr', 1, 3),
            ]),
            frozenset([
                LR0('expr', 2, 3),
            ])
        ]

    def test_grammar(self):
        self.assertTrue(set(self.grammar.iter_on_all_symbols()) - set(self.grammar.iter_nonterminals()) ==\
              {'NL', 'NUM', '+', '-'})

        self.assertTrue(set(self.grammar.iter_nonterminals()) == \
              {'input', 'line', 'expr', self.grammar.START})

        self.assertTrue(set(self.grammar[self.grammar.START]) == \
              {('input',)})

        self.assertTrue(set(self.grammar['input']) == \
              {('line',), ('line', 'input')})

        self.assertTrue(set(self.grammar['line']) == \
              {('NL',), ('expr', 'NL')})

        self.assertTrue(set(self.grammar['expr']) == \
              {('NUM',), ('expr', 'expr', '+'), ('expr', 'expr', '-')})

    def test_canonical(self):
        collection = kernel_collection(self.grammar, self.start_item)
        self.assertTrue(len(collection) == 11)
        self.assertTrue(frozenset(self.kernel_states) == collection)

    def test_action_table(self):
        states = [
            closure(kernel, self.grammar) for kernel in self.kernel_states
        ]

        expected_terminal_shift = [
            {'NL', 'NUM'},
            {},
            {'NL', 'NUM'},
            {},
            {'NL', 'NUM'},
            {},
            {},
            {},
            {'NUM', '+', '-'},
            {},
            {},
        ]

        for state, terminals in zip(states, expected_terminal_shift):
            keys = self.action_table[hash(state)].keys()
            keys = filter(
                lambda k: "Shift" in str(self.action_table[hash(state)][k]),
                keys)
            self.assertTrue(frozenset(keys) == frozenset(terminals))

        expected_terminal_reduce = [
            {},
            {},
            {self.grammar.EOF},  #input
            {self.grammar.EOF, 'NL', 'NUM'},  #line
            {},
            {'NL', '+', '-', 'NUM'},  #expr
            {self.grammar.EOF},  #input
            {self.grammar.EOF, 'NL', 'NUM'},  #line
            {},
            {'NL', '+', '-', 'NUM'},  #expr
            {'NL', '+', '-', 'NUM'},  #expr
        ]

        for state, terminals in zip(states, expected_terminal_reduce):
            keys = self.action_table[hash(state)].keys()
            keys = filter(
                lambda k: "Reduce" in str(self.action_table[hash(state)][k]),
                keys)
            self.assertTrue(frozenset(keys) == frozenset(terminals))

    def test_bug_missing_terminals_found_by_follow_algorithm(self):
        '''The problem is due the 'follow' algorithm assumes that if
         a rule has the nonterminal X, the rules has only one X.
         Obviously, this is not true in general.
         '''
        found = follow(self.grammar, 'expr')
        self.assertTrue({'NL', 'NUM'} & found)  #found by the 'first' algorithm
        self.assertTrue({'+', '-'} & found)  #found by the 'follow' algorithm

        self.assertTrue(frozenset(['NUM', 'NL', '+', '-']) == found)

    def test_single_number(self):
        source = StringIO("3\n")
        lexer = RegressionTestParser.CalcLexer(source)
        self.driver.parse(lexer)
        self.assertTrue(self.result == [3])

    def test_multiple_numbers(self):
        source = StringIO("3\n\n\n2\n1\n")
        lexer = RegressionTestParser.CalcLexer(source)
        self.driver.parse(lexer)
        self.assertTrue(self.result == [3, 2, 1])

    def test_add(self):
        source = StringIO("3 2 +\n")
        lexer = RegressionTestParser.CalcLexer(source)
        self.driver.parse(lexer)
        self.assertTrue(self.result == [3 + 2])
Exemplo n.º 5
0
class IntegralTestParseCalculator(unittest.TestCase):
    class CalcLexer(Lexer):
        def __init__(self, source):
            self._source = source
            self._white_characters = (' ', '\t', '\v', '\n', '\r')
            self._number = re.compile('[+-]?\d+(\.\d+)?')

        def tokenizer(self):
            '''Return a iterable or generator of all tokens.
            Each token is a tuple with at least one value, the terminal id. 
            The next values, if any, are the attributes of the token.'''

            for line in self._source:
                i = 0
                while i < len(line):
                    while i < len(line) and line[i] in self._white_characters:
                        i += 1

                    if len(line) == i:
                        continue

                    match = self._number.match(line, i)
                    if match:
                        yield ('id', int(match.group()))
                        i = match.end()
                    else:
                        yield (line[i], None)
                        i += 1

            yield (grammar.Grammar.EOF, None)
            return

    def setUp(self):
        self.arith = grammar.Grammar('S', ('+', '*', '(', ')', 'id'))
        self.result = None

        def get_result(v):
            self.result = v
            return v

        def add(x, y):
            t = x + y
            return t

        def mul(x, y):
            t = x * y
            return t

        self.arith.add_rule('S', ['E', get_result])
        self.arith.add_rule('E', ['E', '+', 'T', add])
        self.arith.add_rule('E', ['T', lambda v: v])
        self.arith.add_rule('T', ['T', '*', 'F', mul])
        self.arith.add_rule('T', ['F', lambda v: v])
        self.arith.add_rule('F', ['(', 'E', ')', lambda v: v])
        self.arith.add_rule('F', ['id', lambda v: v])

        self.action_table, self.goto_table, self.start_state = build_parsing_table(
            self.arith, LR0(self.arith.START, 0, 0))
        self.driver = Driver(self.action_table, self.goto_table,
                             self.start_state)

    def test_parse_simple_expresion(self):
        source = StringIO("3 + 2")
        lexer = IntegralTestParseCalculator.CalcLexer(source)
        self.driver.parse(lexer)

        self.assertTrue(self.result == 3 + 2)

    def test_parse_complex_expresion(self):
        source = StringIO("3 * 2 + 9")
        lexer = IntegralTestParseCalculator.CalcLexer(source)
        self.driver.parse(lexer)

        self.assertTrue(self.result == (3 * 2) + 9)

    def test_parse_complex_expresion_reversed(self):
        source = StringIO("9 + 3 * 2")
        lexer = IntegralTestParseCalculator.CalcLexer(source)
        self.driver.parse(lexer)

        self.assertTrue(self.result == (3 * 2) + 9)
Exemplo n.º 6
0
class IntegralTestParseCalculator(unittest.TestCase):
   class CalcLexer(Lexer):
      def __init__(self, source):
         self._source = source
         self._white_characters = (' ', '\t', '\v', '\n', '\r')
         self._number = re.compile('[+-]?\d+(\.\d+)?')
      
      def tokenizer(self):
         '''Return a iterable or generator of all tokens.
            Each token is a tuple with at least one value, the terminal id. 
            The next values, if any, are the attributes of the token.'''
      
         for line in self._source:
            i = 0
            while i < len(line):
               while i < len(line) and line[i] in self._white_characters:
                  i += 1 

               if len(line) == i:
                  continue

               match = self._number.match(line, i)
               if match:
                  yield ('id', int(match.group()))
                  i = match.end()
               else:
                  yield (line[i], None)
                  i += 1

         yield (grammar.Grammar.EOF, None)
         return 


   def setUp(self):
      self.arith = grammar.Grammar('S', ('+', '*', '(', ')', 'id'))
      self.result = None

      def get_result(v): self.result = v; return v
      def add(x, y): t = x + y; return t
      def mul(x, y): t = x * y; return t

      self.arith.add_rule('S', ['E',           get_result])
      self.arith.add_rule('E', ['E', '+', 'T', add])
      self.arith.add_rule('E', ['T',           lambda v: v])
      self.arith.add_rule('T', ['T', '*', 'F', mul])
      self.arith.add_rule('T', ['F',           lambda v: v])
      self.arith.add_rule('F', ['(', 'E', ')', lambda v: v])
      self.arith.add_rule('F', ['id',          lambda v: v])

      self.action_table, self.goto_table, self.start_state = build_parsing_table(self.arith, LR0(self.arith.START, 0, 0))
      self.driver = Driver(self.action_table, self.goto_table, self.start_state)

   
   def test_parse_simple_expresion(self):
      source = StringIO("3 + 2")
      lexer = IntegralTestParseCalculator.CalcLexer(source)
      self.driver.parse(lexer)

      self.assertTrue(self.result == 3+2)

   def test_parse_complex_expresion(self):
      source = StringIO("3 * 2 + 9")
      lexer = IntegralTestParseCalculator.CalcLexer(source)
      self.driver.parse(lexer)

      self.assertTrue(self.result == (3*2) + 9)

   def test_parse_complex_expresion_reversed(self):
      source = StringIO("9 + 3 * 2")
      lexer = IntegralTestParseCalculator.CalcLexer(source)
      self.driver.parse(lexer)

      self.assertTrue(self.result == (3*2) + 9)
Exemplo n.º 7
0
class IntegralTestParseCalculatorForLALRGrammar(unittest.TestCase):
   class CalcLexer(Lexer):
      def __init__(self, source):
         self._source = source
         self._white_characters = (' ', '\t', '\v', '\n', '\r')
         self._number = re.compile('[+-]?\d+(\.\d+)?')
         self._var_name = re.compile('\w+')
      
      def tokenizer(self):
         '''Return a iterable or generator of all tokens.
            Each token is a tuple with at least one value, the terminal id. 
            The next values, if any, are the attributes of the token.'''
      
         for line in self._source:
            i = 0
            while i < len(line):
               while i < len(line) and line[i] in self._white_characters:
                  i += 1 

               if len(line) == i:
                  continue

               match_id = self._number.match(line, i)
               match_var = self._var_name.match(line, i)
               if match_id:
                  yield ('num', int(match_id.group()))
                  i = match_id.end()
               elif match_var:
                  yield ('id', match_var.group())
                  i = match_var.end()
               else:
                  yield (line[i], None)
                  i += 1

         yield (grammar.Grammar.EOF, None)
         return 

   def setUp(self):
      self.lrvalue = grammar.Grammar(None, ('=', '*', ';', 'id', 'num'))
      self.symbol_table = dict()
      self.last_value = None

      def set_var(lv, rv): 
         self.symbol_table[lv] = rv; 
         return self.symbol_table[lv]

      def get_var(rv): 
         if rv in self.symbol_table:
            return self.symbol_table[rv]
         
         raise KeyError(rv)

      def grab_last_value(v):
         self.last_value = v if isinstance(v, int) else self.symbol_table[v]
         return v

      self.lrvalue.augment('S')

      self.lrvalue.add_rule('S', ['E', ';', 'S',   lambda e, s: s])
      self.lrvalue.add_rule('S', ['E', ';',        lambda v: v])
      self.lrvalue.add_rule('E', ['L', '=', 'R',   set_var])
      self.lrvalue.add_rule('E', ['R',             grab_last_value])
      self.lrvalue.add_rule('L', ['*', 'R',        get_var])
      self.lrvalue.add_rule('L', ['id',            lambda v: v])
      self.lrvalue.add_rule('R', ['L',             lambda v: v])
      self.lrvalue.add_rule('R', ['num',           lambda v: v])

      self.action_table, self.goto_table, self.start_state = build_parsing_table_lalr(self.lrvalue, LR0(self.lrvalue.START, 0, 0), False)
      self.driver = Driver(self.action_table, self.goto_table, self.start_state)

   
   def test_parse_simple_expresion(self):
      source = StringIO("2;")
      lexer = IntegralTestParseCalculatorForLALRGrammar.CalcLexer(source)
      self.driver.parse(lexer)

      self.assertTrue(self.last_value == 2)

   def test_parse_assign_expresion(self):
      source = StringIO('''a = 2;
                           a;''')
      lexer = IntegralTestParseCalculatorForLALRGrammar.CalcLexer(source)
      self.driver.parse(lexer)

      self.assertTrue(self.last_value == 2)

   def test_parse_simple_derefered(self):
      source = StringIO('''a = 2;
                           b = *a;
                           a = 4;
                           b;''')
      lexer = IntegralTestParseCalculatorForLALRGrammar.CalcLexer(source)
      self.driver.parse(lexer)

      self.assertTrue(self.last_value == 2)

   def test_parse_complex_derefered(self):
      source = StringIO('''a = 0; b = 0; c = 0;

                           a = 2;
                           b = a;
                           c = **b;
                           
                           a = 0; b = 0;
                           c;''')
      lexer = IntegralTestParseCalculatorForLALRGrammar.CalcLexer(source)
      self.driver.parse(lexer)

      self.assertTrue(self.last_value == 2)
Exemplo n.º 8
0
class IntegralTestParseCalculatorWithContexts(unittest.TestCase):
    class CalcLexer(Lexer):
        def __init__(self, source):
            self._source = source
            self._white_characters = (' ', '\t', '\v', '\n', '\r')
            self._number = re.compile('[+-]?\d+(\.\d+)?')
            self._var_name = re.compile('\w+')

        def tokenizer(self):
            '''Return a iterable or generator of all tokens.
            Each token is a tuple with at least one value, the terminal id. 
            The next values, if any, are the attributes of the token.'''

            for line in self._source:
                i = 0
                while i < len(line):
                    while i < len(line) and line[i] in self._white_characters:
                        i += 1

                    if len(line) == i:
                        continue

                    match_id = self._number.match(line, i)
                    match_var = self._var_name.match(line, i)
                    if match_id:
                        yield ('id', int(match_id.group()))
                        i = match_id.end()
                    elif match_var:
                        if match_var.group() == 'let':
                            yield ('let', None)
                        else:
                            yield ('var', match_var.group())
                        i = match_var.end()
                    else:
                        yield (line[i], None)
                        i += 1

            yield (grammar.Grammar.EOF, None)
            return

    def setUp(self):
        self.arith = grammar.Grammar(
            'S', ('+', '*', '(', ')', 'id', 'var', '=', 'let'))
        self.symbol_table = [dict()]
        self.result = None

        def get_result(x):
            self.result = x
            return x

        def add(x, y):
            t = x + y
            return t

        def mul(x, y):
            t = x * y
            return t

        def set_var(lv, rv):
            self.symbol_table[-1][lv] = rv
            return rv

        def get_var(rv):
            for table in reversed(self.symbol_table):
                if rv in table:
                    return table[rv]

            raise KeyError(rv)

        def push():
            self.symbol_table.append(dict())

        def pop(*others):
            self.symbol_table.pop()

        self.arith.add_rule('S', ['E', get_result])
        self.arith.add_rule('E', ['E', '+', 'T', add])
        self.arith.add_rule('E', ['T', lambda v: v])
        self.arith.add_rule('T', ['T', '*', 'F', mul])
        self.arith.add_rule('T', ['F', lambda v: v])
        self.arith.add_rule('F', ['(', 'E', ')', lambda v: v])
        self.arith.add_rule('F', ['id', lambda v: v])
        self.arith.add_rule('F', ['var', '=', 'E', set_var])
        self.arith.add_rule('F', ['var', get_var])
        self.arith.add_rule('F',
                            ['let', push, '(', 'E', ')', pop, lambda v: v])

        self.action_table, self.goto_table, self.start_state = build_parsing_table(
            self.arith, LR0(self.arith.START, 0, 0))
        self.driver = Driver(self.action_table, self.goto_table,
                             self.start_state)

    def test_parse_simple_expression(self):
        source = StringIO("3 + 2")
        lexer = IntegralTestParseCalculatorWithContexts.CalcLexer(source)
        self.driver.parse(lexer)

        self.assertTrue(self.result == 3 + 2)

    def test_parse_var_expression(self):
        source = StringIO("X = 3 * 2 + 9")
        lexer = IntegralTestParseCalculatorWithContexts.CalcLexer(source)
        self.driver.parse(lexer)

        self.assertTrue(self.result == (3 * 2) + 9)
        self.assertTrue(self.symbol_table[-1]['X'] == (3 * 2) + 9)

    def test_parse_complex_var_expression(self):
        source = StringIO("Z= (X=9) + 3 * (Y=2)")
        lexer = IntegralTestParseCalculatorWithContexts.CalcLexer(source)
        self.driver.parse(lexer)

        self.assertTrue(self.result == (3 * 2) + 9)
        self.assertTrue(self.symbol_table[-1]['X'] == 9)
        self.assertTrue(self.symbol_table[-1]['Y'] == 2)

    def test_parse_complex_let_var_expression(self):
        source = StringIO("(X=5) + let ((X=2) + 3) + X")
        lexer = IntegralTestParseCalculatorWithContexts.CalcLexer(source)
        self.driver.parse(lexer)

        self.assertTrue(self.result == 5 + (2 + 3) + 5)
        self.assertTrue(self.symbol_table[-1]['X'] == 5)
Exemplo n.º 9
0
class IntegralTestParseCalculatorWithContexts(unittest.TestCase):
   class CalcLexer(Lexer):
      def __init__(self, source):
         self._source = source
         self._white_characters = (' ', '\t', '\v', '\n', '\r')
         self._number = re.compile('[+-]?\d+(\.\d+)?')
         self._var_name = re.compile('\w+')
      
      def tokenizer(self):
         '''Return a iterable or generator of all tokens.
            Each token is a tuple with at least one value, the terminal id. 
            The next values, if any, are the attributes of the token.'''
      
         for line in self._source:
            i = 0
            while i < len(line):
               while i < len(line) and line[i] in self._white_characters:
                  i += 1 

               if len(line) == i:
                  continue

               match_id = self._number.match(line, i)
               match_var = self._var_name.match(line, i)
               if match_id:
                  yield ('id', int(match_id.group()))
                  i = match_id.end()
               elif match_var:
                  if match_var.group() == 'let':
                     yield ('let', None)
                  else:
                     yield ('var', match_var.group())
                  i = match_var.end()
               else:
                  yield (line[i], None)
                  i += 1

         yield (grammar.Grammar.EOF, None)
         return 


   def setUp(self):
      self.arith = grammar.Grammar('S', ('+', '*', '(', ')', 'id', 'var', '=', 'let'))
      self.symbol_table = [dict()]
      self.result = None

      def get_result(x): self.result = x; return x
      def add(x, y): t = x + y; return t
      def mul(x, y): t = x * y; return t

      def set_var(lv, rv): self.symbol_table[-1][lv] = rv; return rv
      def get_var(rv): 
         for table in reversed(self.symbol_table):
            if rv in table:
               return table[rv]
         
         raise KeyError(rv)

      def push(): self.symbol_table.append(dict());
      def pop(*others): self.symbol_table.pop();


      self.arith.add_rule('S', ['E',                             get_result])
      self.arith.add_rule('E', ['E', '+', 'T',                   add])
      self.arith.add_rule('E', ['T',                             lambda v: v])
      self.arith.add_rule('T', ['T', '*', 'F',                   mul])
      self.arith.add_rule('T', ['F',                             lambda v: v])
      self.arith.add_rule('F', ['(', 'E', ')',                   lambda v: v])
      self.arith.add_rule('F', ['id',                            lambda v: v])
      self.arith.add_rule('F', ['var', '=', 'E',                 set_var])
      self.arith.add_rule('F', ['var',                           get_var])
      self.arith.add_rule('F', ['let', push, '(', 'E', ')', pop, lambda v: v])

      self.action_table, self.goto_table, self.start_state = build_parsing_table(self.arith, LR0(self.arith.START, 0, 0))
      self.driver = Driver(self.action_table, self.goto_table, self.start_state)

   
   def test_parse_simple_expression(self):
      source = StringIO("3 + 2")
      lexer = IntegralTestParseCalculatorWithContexts.CalcLexer(source)
      self.driver.parse(lexer)

      self.assertTrue(self.result == 3+2)

   def test_parse_var_expression(self):
      source = StringIO("X = 3 * 2 + 9")
      lexer = IntegralTestParseCalculatorWithContexts.CalcLexer(source)
      self.driver.parse(lexer)

      self.assertTrue(self.result == (3*2) + 9)
      self.assertTrue(self.symbol_table[-1]['X'] == (3*2) + 9)

   def test_parse_complex_var_expression(self):
      source = StringIO("Z= (X=9) + 3 * (Y=2)")
      lexer = IntegralTestParseCalculatorWithContexts.CalcLexer(source)
      self.driver.parse(lexer)

      self.assertTrue(self.result == (3*2) + 9)
      self.assertTrue(self.symbol_table[-1]['X'] == 9)
      self.assertTrue(self.symbol_table[-1]['Y'] == 2)

   def test_parse_complex_let_var_expression(self):
      source = StringIO("(X=5) + let ((X=2) + 3) + X")
      lexer = IntegralTestParseCalculatorWithContexts.CalcLexer(source)
      self.driver.parse(lexer)

      self.assertTrue(self.result == 5 + (2 + 3) + 5)
      self.assertTrue(self.symbol_table[-1]['X'] == 5)
Exemplo n.º 10
0
class RegressionTestParser(unittest.TestCase):
   class CalcLexer(Lexer):
      def __init__(self, source):
         self._source = source
         self._white_characters = (' ', '\t', '\v', '\r')
         self._number = re.compile('\d+(\.\d+)?')
      
      def tokenizer(self):
         '''Return a iterable or generator of all tokens.
            Each token is a tuple with at least one value, the terminal id. 
            The next values, if any, are the attributes of the token.'''
    
         line = self._source.readline()
         while line:
            i = 0

            while i < len(line):
               # skip whitespaces
               while i < len(line) and line[i] in self._white_characters:
                  i += 1

               match = self._number.match(line, i)
               if i == len(line):
                  continue
               elif line[i] == '\n':
                  yield 'NL', None
               elif match:
                  yield ('NUM', int(match.group()))
                  i = match.end() - 1
               else:
                  yield line[i], None

               i += 1
            
            line = self._source.readline()
         yield (grammar.Grammar.EOF, None)


   def setUp(self):
      syn = syntax.Syntax('input')
      self.result = []

      syn.terminal('NL', None)
      syn.terminal('NUM', None)
      syn.terminal('+', None)
      syn.terminal('-', None)

      syn.repeat(('line',), 'input')
      syn.choice((('NL',), ('expr','NL', lambda x: self.result.append(x))),'line')
      syn.choice((('NUM',), ('expr','expr', '+', lambda x, y: x+y), ('expr','expr', '-', lambda x, y: x-y)),'expr')

      self.grammar = syn.as_grammar()
      
      self.start_item = LR0(self.grammar.START, 0, 0)
      self.action_table, self.goto_table, self.start_state = build_parsing_table(self.grammar, self.start_item, disable_mapping=True)
      self.driver = Driver(self.action_table, self.goto_table, self.start_state)
      
      self.kernel_states = [
         frozenset([
            LR0(self.grammar.START, 0, 0),
            ]),
         frozenset([
            LR0(self.grammar.START, 0, 1),
            ]),
         frozenset([
            LR0('input', 0, 1),
            LR0('input', 1, 1),
            ]),
         frozenset([
            LR0('line', 0, 1),
            ]),
         frozenset([
            LR0('line', 1, 1),
            LR0('expr', 1, 1),
            LR0('expr', 2, 1),
            ]),
         frozenset([
            LR0('expr', 0, 1),
            ]),
         frozenset([
            LR0('input', 0, 2),
            ]),
         frozenset([
            LR0('line', 1, 2),
            ]),
         frozenset([
            LR0('expr', 1, 1),
            LR0('expr', 1, 2),
            LR0('expr', 2, 1),
            LR0('expr', 2, 2),
            ]),
         frozenset([
            LR0('expr', 1, 3),
            ]),
         frozenset([
            LR0('expr', 2, 3),
            ])
         ]

   
   def test_grammar(self):
      self.assertTrue(set(self.grammar.iter_on_all_symbols()) - set(self.grammar.iter_nonterminals()) ==\
            {'NL', 'NUM', '+', '-'})

      self.assertTrue(set(self.grammar.iter_nonterminals()) == \
            {'input', 'line', 'expr', self.grammar.START})

      self.assertTrue(set(self.grammar[self.grammar.START]) == \
            {('input',)})

      self.assertTrue(set(self.grammar['input']) == \
            {('line',), ('line', 'input')})
      
      self.assertTrue(set(self.grammar['line']) == \
            {('NL',), ('expr', 'NL')})
      
      self.assertTrue(set(self.grammar['expr']) == \
            {('NUM',), ('expr', 'expr', '+'), ('expr', 'expr', '-')})


   def test_canonical(self):
      collection = kernel_collection(self.grammar, self.start_item)
      self.assertTrue(len(collection) == 11)
      self.assertTrue(frozenset(self.kernel_states) == collection)

   def test_action_table(self):
      states = [closure(kernel, self.grammar) for kernel in self.kernel_states]
      
      expected_terminal_shift = [
            {'NL', 'NUM'},
            {},
            {'NL', 'NUM'},
            {},
            {'NL', 'NUM'},
            {},
            {},
            {},
            {'NUM', '+', '-'},
            {},
            {},
            ]
   
      for state, terminals in zip(states, expected_terminal_shift):
         keys = self.action_table[hash(state)].keys()
         keys = filter(lambda k: "Shift" in str(self.action_table[hash(state)][k]), keys) 
         self.assertTrue(frozenset(keys) == frozenset(terminals))

      expected_terminal_reduce = [
            {},
            {},
            {self.grammar.EOF}, #input
            {self.grammar.EOF, 'NL', 'NUM'}, #line
            {},
            {'NL', '+', '-', 'NUM'}, #expr
            {self.grammar.EOF}, #input
            {self.grammar.EOF, 'NL', 'NUM'}, #line
            {},
            {'NL', '+', '-', 'NUM'}, #expr
            {'NL', '+', '-', 'NUM'}, #expr
            ]
      
      for state, terminals in zip(states, expected_terminal_reduce):
         keys = self.action_table[hash(state)].keys()
         keys = filter(lambda k: "Reduce" in str(self.action_table[hash(state)][k]), keys) 
         self.assertTrue(frozenset(keys) == frozenset(terminals))
      

   def test_bug_missing_terminals_found_by_follow_algorithm(self):
      '''The problem is due the 'follow' algorithm assumes that if
         a rule has the nonterminal X, the rules has only one X.
         Obviously, this is not true in general.
         '''
      found = follow(self.grammar, 'expr')
      self.assertTrue({'NL', 'NUM'} & found) #found by the 'first' algorithm
      self.assertTrue({'+', '-'} & found)    #found by the 'follow' algorithm

      self.assertTrue(frozenset(['NUM', 'NL', '+', '-']) == found)


   def test_single_number(self):
      source = StringIO("3\n")
      lexer = RegressionTestParser.CalcLexer(source)
      self.driver.parse(lexer)
      self.assertTrue(self.result == [3])
   
   def test_multiple_numbers(self):
      source = StringIO("3\n\n\n2\n1\n")
      lexer = RegressionTestParser.CalcLexer(source)
      self.driver.parse(lexer)
      self.assertTrue(self.result == [3, 2, 1])

   def test_add(self):
      source = StringIO("3 2 +\n")
      lexer = RegressionTestParser.CalcLexer(source)
      self.driver.parse(lexer)
      self.assertTrue(self.result == [3+2])
Exemplo n.º 11
0
class RegressionTestUnknowGotoState(unittest.TestCase):
    class CalcLexer(Lexer):
        def __init__(self, source):
            self._source = source
            self._white_characters = (' ', '\t', '\v', '\n', '\r')
            self._number = re.compile('[+-]?\d+(\.\d+)?')
            self._var_name = re.compile('\w+')

        def tokenizer(self):
            '''Return a iterable or generator of all tokens.
            Each token is a tuple with at least one value, the terminal id. 
            The next values, if any, are the attributes of the token.'''

            for line in self._source:
                i = 0
                while i < len(line):
                    while i < len(line) and line[i] in self._white_characters:
                        i += 1

                    if len(line) == i:
                        continue

                    match_id = self._number.match(line, i)
                    match_var = self._var_name.match(line, i)
                    if match_id:
                        yield ('id', int(match_id.group()))
                        i = match_id.end()
                    elif match_var:
                        if match_var.group() == 'let':
                            yield ('let', 'let')
                        else:
                            yield ('var', match_var.group())
                        i = match_var.end()
                    else:
                        yield (line[i], line[i])
                        i += 1

            yield (grammar.Grammar.EOF, grammar.Grammar.EOF)
            return

    def setUp(self):
        self.arith = grammar.Grammar('S', ('(', ')', 'id', 'let'))
        self.symbol_table = [dict()]

        def push(*args):
            pass

        def pop(*args):
            pass

        self.arith.add_rule('S', ['E'])
        self.arith.add_rule('E', ['id'])
        self.arith.add_rule(
            'E', ['let', push, '(', 'E', ')', pop, lambda *args: args])

        self.action_table, self.goto_table, self.start_state = build_parsing_table(
            self.arith, LR0(self.arith.START, 0, 0), disable_mapping=True)
        self.driver = Driver(self.action_table, dict(self.goto_table),
                             self.start_state)

    def test_cannonical_collection(self):
        collection = canonical_collection(self.arith,
                                          LR0(self.arith.START, 0, 0))

        states = frozenset([
            frozenset([
                LR0(self.arith.START, 0, 0),
                LR0('S', 0, 0),
                LR0('E', 0, 0),
                LR0('E', 1, 0),
            ]),
            frozenset([
                LR0(self.arith.START, 0, 1),
            ]),
            frozenset([
                LR0('E', 0, 1),
            ]),
            frozenset([
                LR0('S', 0, 1),
            ]),
            frozenset([
                LR0('E', 1, 1),
                LR0(self.arith.ACTION_INSIDE % (1, 'push'), 0, 0),
            ]),
            frozenset([
                LR0('E', 1, 2),
            ]),
            frozenset([
                LR0('E', 1, 4),
            ]),
            frozenset([
                LR0('E', 1, 6),
            ]),
            frozenset([
                LR0('E', 1, 5),
                LR0(self.arith.ACTION_INSIDE % (2, 'pop'), 0, 0),
            ]),
            frozenset([
                LR0('E', 1, 3),
                LR0('E', 0, 0),
                LR0('E', 1, 0),
            ]),
        ])

        self.assertTrue(states == collection)

    def test_goto_table(self):
        states_gotos = [
            (frozenset([
                LR0(self.arith.START, 0, 0),
                LR0('S', 0, 0),
                LR0('E', 0, 0),
                LR0('E', 1, 0),
            ]), (('S', 1), ('id', 2), ('E', 3), ('let', 4))),
            (frozenset([
                LR0(self.arith.START, 0, 1),
            ]), ()),
            (frozenset([
                LR0('E', 0, 1),
            ]), ()),
            (frozenset([
                LR0('S', 0, 1),
            ]), ()),
            (frozenset([
                LR0('E', 1, 1),
                LR0(self.arith.ACTION_INSIDE % (1, 'push'), 0, 0),
            ]), ((self.arith.ACTION_INSIDE % (1, 'push'), 5), )),
            (frozenset([
                LR0('E', 1, 2),
            ]), (('(', 9), )),
            (frozenset([
                LR0('E', 1, 4),
            ]), ((')', 8), )),
            (frozenset([
                LR0('E', 1, 6),
            ]), ()),
            (frozenset([
                LR0('E', 1, 5),
                LR0(self.arith.ACTION_INSIDE % (2, 'pop'), 0, 0),
            ]), ((self.arith.ACTION_INSIDE % (2, 'pop'), 7), )),
            (frozenset([
                LR0('E', 1, 3),
                LR0('E', 0, 0),
                LR0('E', 1, 0),
            ]), (('E', 6), ('id', 2), ('let', 4))),
        ]

        checked = 0
        for state, gotos in states_gotos:
            h = hash(state)
            if not gotos:
                self.assertTrue(h not in self.goto_table)
                continue

            checked += 1
            expected_keys, expected_states_id = zip(*gotos)

            found_gotos_keys = frozenset(self.goto_table[h].keys())
            found_gotos_hashs = frozenset(self.goto_table[h].values())

            self.assertTrue(frozenset(expected_keys) == found_gotos_keys)
            self.assertTrue(
                frozenset(
                    [hash(states_gotos[i][0])
                     for i in expected_states_id]) == found_gotos_hashs)

        self.assertTrue(checked == len(self.goto_table.keys()))

    def test_action_table(self):
        states_shifts_reduce_actions = [
            (frozenset([
                LR0(self.arith.START, 0, 0),
                LR0('S', 0, 0),
                LR0('E', 0, 0),
                LR0('E', 1, 0),
            ]), (('id', 2), ('let', 4)), ()),
            (frozenset([
                LR0(self.arith.START, 0, 1),
            ]), (), ()),
            (frozenset([
                LR0('E', 0, 1),
            ]), (), ()),
            (frozenset([
                LR0('S', 0, 1),
            ]), (), ()),
            (frozenset([
                LR0('E', 1, 1),
                LR0(self.arith.ACTION_INSIDE % (1, 'push'), 0, 0),
            ]), (), ()),
            (frozenset([
                LR0('E', 1, 2),
            ]), (('(', 9), ), ()),
            (frozenset([
                LR0('E', 1, 4),
            ]), ((')', 8), ), ()),
            (frozenset([
                LR0('E', 1, 6),
            ]), (), ()),
            (frozenset([
                LR0('E', 1, 5),
                LR0(self.arith.ACTION_INSIDE % (2, 'pop'), 0, 0),
            ]), (), ()),
            (frozenset([
                LR0('E', 1, 3),
                LR0('E', 0, 0),
                LR0('E', 1, 0),
            ]), (('id', 2), ('let', 4)), ()),
        ]

        self.assertTrue(
            len(states_shifts_reduce_actions) == len(self.action_table.keys()))

        for state, shifts, reduces in states_shifts_reduce_actions:
            h = hash(state)

            self.assertTrue(
                len(shifts) == len(
                    filter(lambda action: "Shift" in str(action),
                           self.action_table[h].values())))

            if not shifts:
                continue
            keys, ids = zip(*shifts)
            found_ids = [
                i for i in range(len(states_shifts_reduce_actions))
                if ("Shift %s" % hash(states_shifts_reduce_actions[i][0])
                    ) in map(lambda a: str(a), self.action_table[h].values())
            ]
            found_keys = self.action_table[h].keys()

            self.assertTrue(frozenset(ids) == frozenset(found_ids))
            self.assertTrue(frozenset(keys) == frozenset(found_keys))

    def test_parse_complex_let_var_expression(self):
        source = StringIO("let (2)")
        lexer = RegressionTestUnknowGotoState.CalcLexer(source)
        self.driver.parse(lexer)