def expression_without_precedence(self): if self.__accept_and_consume(KeywordToken('nil')): return NilValue() elif self.__accept_type(NumberToken): token = self.__next() return IntegerValue.from_string(token.value) elif self.__accept_and_consume(SymbolToken('-')): token = self.__next() return IntegerValue.from_string('-' + token.value) elif self.__accept_type(StringToken): token = self.__next() return StringValue(token.value) elif self.__accept(SymbolToken('(')): return self.sequence() elif self.__accept_type(IdentifierToken): return self.id_started() elif self.__accept(KeywordToken('if')): return self.if_then() elif self.__accept(KeywordToken('while')): return self.while_do() elif self.__accept(KeywordToken('for')): return self.for_do() elif self.__accept_and_consume(KeywordToken('break')): return Break() elif self.__accept(KeywordToken('let')): return self.let() elif self.__accept(KeywordToken('type')): return self.type_declaration() elif self.__accept(KeywordToken('var')): return self.variable_declaration() elif self.__accept(KeywordToken('function')): return self.function_declaration() else: return None
def lvalue_next(self): next_lvalue = None if self.__accept_and_consume(SymbolToken('.')): next_lvalue = self.record_lvalue() elif self.__accept_and_consume(SymbolToken('[')): next_lvalue = self.array_lvalue() return next_lvalue
def variable_declaration(self): self.__expect(KeywordToken('var')) name = self.id() type_id = None if self.__accept_and_consume(SymbolToken(':')): type_id = self.type() self.__expect(SymbolToken(':=')) exp = self.expression() return VariableDeclaration(name, type_id, exp)
def id_started(self): """An ID has been peeked above, peek further...""" if self.__accept(SymbolToken('{'), self.__peek(1)): return self.record() elif self.__accept(SymbolToken('('), self.__peek(1)): return self.function_call() else: lvalue = self.lvalue() return self.lvalue_started(lvalue)
def sequence(self): exps = [] self.__expect(SymbolToken('(')) if not self.__peek().equals(SymbolToken(')')): exp = self.expression() exps.append(exp) while self.__accept_and_consume(SymbolToken(';')): exp = self.expression() exps.append(exp) self.__expect(SymbolToken(')')) return Sequence(exps)
def arguments(self): self.__expect(SymbolToken('(')) args = [] token = self.__peek() if not token.equals(SymbolToken(')')): exp = self.expression() args.append(exp) while self.__accept_and_consume(SymbolToken(',')): exp = self.expression() args.append(exp) self.__expect(SymbolToken(')')) return args
def function_declaration(self): self.__expect(KeywordToken('function')) function_name = self.id() self.__expect(SymbolToken('(')) parameters = self.parameters() self.__expect(SymbolToken(')')) return_type = None if self.__accept_and_consume(SymbolToken(':')): return_type = self.type() self.__expect(SymbolToken('=')) body = self.expression() return FunctionDeclaration(function_name, parameters, return_type, body)
def type(self): token = self.__next() if self.__accept_type(IdentifierToken, token): return TypeId(token.value) elif self.__accept(SymbolToken('{'), token): type_fields = self.type_fields() self.__expect(SymbolToken('}')) return RecordType(type_fields) elif self.__accept(KeywordToken('array'), token): self.__expect(KeywordToken('of')) type_name = self.__expect_type(IdentifierToken) return ArrayType(type_name.value) else: raise ExpectationError('a type definition', token)
def record(self): type_token = self.__expect_type(IdentifierToken) self.__expect(SymbolToken('{')) fields = OrderedDict() while self.__accept_type(IdentifierToken): field_name, exp = self.id_field() fields[field_name] = exp token = self.__next() if self.__accept(SymbolToken(','), token): pass elif self.__accept(SymbolToken('}'), token): break else: raise ParseError('Expected either , or }', token) # TODO possibility for garbage after ', ...' return RecordCreation(TypeId(type_token.value), fields)
def expressions(self): # note that though Dr. Appel's specification admits empty lists of expressions, I restrict this to at # least one expression to avoid exception handling expressions = [self.expression()] while self.__accept_and_consume(SymbolToken(';')): expressions.append(self.expression()) return expressions
def lvalue_started(self, lvalue): if self.__accept_and_consume(SymbolToken(':=')): exp = self.expression() return Assign(lvalue, exp) elif self.__accept_and_consume(KeywordToken('of')): return self.array_from_lvalue(lvalue) else: return lvalue
def test_peeking(self): sut = Tokenizer('2 + 2') self.assertEqual(sut.peek(), NumberToken('2')) self.assertEqual(sut.peek(), sut.peek(0)) self.assertEqual(sut.peek(1), SymbolToken('+')) self.assertEqual(sut.peek(2), NumberToken('2')) self.assertEqual(sut.peek(3), None) self.assertEqual(sut.next(), NumberToken('2'))
def type_fields(self): type_fields = OrderedDict() if self.__accept_type(IdentifierToken): name, type_id = self.type_field() type_fields[name] = type_id while self.__accept_and_consume(SymbolToken(',')): name, type_id = self.type_field() type_fields[name] = type_id return type_fields
def for_do(self): self.__expect(KeywordToken('for')) var = self.__expect_type(IdentifierToken) self.__expect(SymbolToken(':=')) start = self.expression() self.__expect(KeywordToken('to')) end = self.expression() self.__expect(KeywordToken('do')) body = self.expression() return For(var.value, start, end, body)
def tokenize(self): """Retrieve the next token from the text""" c = self.__current_character() while c: if self.is_whitespace(c): pass elif self.is_eol(c): self.__newline() # do line accounting elif self.is_quote(c): location = self.current_location() return StringToken(self.__string(), location) elif self.is_number(c): location = self.current_location() return NumberToken(self.__number(), location) elif self.is_underscore(c): pass # read _main elif self.is_letter(c): location = self.current_location() value = self.__identifier() if self.is_keyword(value): return KeywordToken(value, location) else: return IdentifierToken(value, location) elif self.is_symbol(c): location = self.current_location() d = self.__advance() if c == '/' and d == '*': self.__comment() # advance until end of comment elif c == '/' and d == '/': self.__advance_until('\n') elif c == '<' and d in '>=': self.__advance() return SymbolToken(c + d, location) elif c in '>:' and d == '=': self.__advance() return SymbolToken(c + d, location) else: return SymbolToken(c, location) else: raise TokenError('Invalid character: ' + c, self.current_location()) c = self.__advance() return None
def parameters(self): if self.__accept_type(IdentifierToken): parameters = [] name, type_id = self.type_field() parameters.append(FunctionParameter(name, type_id)) while self.__accept_and_consume(SymbolToken(',')): name, type_id = self.type_field() parameters.append(FunctionParameter(name, type_id)) return parameters else: return []
def test_multi_line_comments(self): self.assertTokenizesTo('a /* ... */ b', [IdentifierToken('a'), IdentifierToken('b')]) self.assertTokenizesTo('/ /* ... /* ... */ ... */ /', [SymbolToken('/'), SymbolToken('/')])
def test_division(self): self.assertTokenizesTo('a / b', [IdentifierToken('a'), SymbolToken('/'), IdentifierToken('b')])
def test_less_than(self): self.assertTokenizesTo('a <= b', [IdentifierToken('a'), SymbolToken('<='), IdentifierToken('b')])
def test_not_equal(self): self.assertTokenizesTo('2 <> 1', [NumberToken('2'), SymbolToken('<>'), NumberToken('1')])
def id_field(self): field_name = self.id() self.__expect(SymbolToken('=')) exp = self.expression() return field_name, exp
def test_assign(self): self.assertTokenizesTo('a := b', [IdentifierToken('a'), SymbolToken(':='), IdentifierToken('b')])
def test_multiline(self): self.assertTokenizesTo("a = 1\nb = 2", [IdentifierToken('a'), SymbolToken('='), NumberToken('1'), IdentifierToken('b'), SymbolToken('='), NumberToken('2')])
def test_simple(self): self.assertTokenizesTo("2 + 2", [NumberToken('2'), SymbolToken('+'), NumberToken('2')])
def array_lvalue(self): exp = self.expression() self.__expect(SymbolToken(']')) next_lvalue = self.lvalue_next() return ArrayLValue(exp, next_lvalue)
def type_field(self): field_name = self.id() self.__expect(SymbolToken(':')) type_id = self.type_id() return field_name, type_id
def type_declaration(self): self.__expect(KeywordToken('type')) type_name = self.id() self.__expect(SymbolToken('=')) ty = self.type() return TypeDeclaration(type_name, ty)