示例#1
0
class Lexer(object):
    def __init__(self, interpreter):
        self.input = Buffer(interpreter.read())
        self.line = 1

    def get_line(self):
        return self.line

    def add_line(self):
        self.line += 1

    def collect(self):
        return ''.join(self.input.accept())

    def restore(self):
        self.input.reject()

    def expect(self, condition=None):
        return self.input.move(condition) is not None

    def try_identifier_or_keyword(self):
        """
        identifier: [a-zA-Z][a-zA-Z0-9]*
        """

        # [a-zA-Z]
        if not self.expect(str.isalpha):
            self.restore()
            return None

        # [a-zA-Z0-9]*
        while self.expect(str.isalnum):
            pass

        name = self.collect()
        token = KeywordManager.try_keyword(name, self.get_line())
        return IdentifierToken(name, self.get_line()) if token is None else token

    def try_number(self):
        """
        number: ([0-9]+(\.[0-9]+)?)|(.[0-9]+)
        """

        # [0-9]*
        while self.expect(str.isdigit):
            pass

        # (.[0-9]+)?
        if self.expect(lambda c: c == '.'):
            if not self.expect(str.isdigit):
                # no digit follows '.'
                self.restore()
                return None

            while self.expect(str.isdigit):
                pass

        name = self.collect()
        if len(name) == 0:
            self.restore()
            return None

        return NumberToken(name, self.get_line())

    def try_whitespaces(self):
        begin_line = self.get_line()

        while self.expect(str.isspace):
            pass

        name = self.collect()
        if len(name) == 0:
            self.restore()
            return None

        for char in name:
            if char == '\n':
                self.add_line()

        return WhitespacesToken(name, begin_line)

    def try_comment(self):
        """
        comment: #.*
        """

        begin_line = self.get_line()

        if not self.expect(lambda c: c == '#'):
            self.restore()
            return None

        while self.expect(lambda c: c != '\n'):
            pass

        if self.expect():
            # eat '\n'
            self.add_line()

        return CommentToken(self.collect(), begin_line)

    def try_character_or_operator(self):
        if not self.expect():
            self.restore()
            return None

        return CharacterToken(self.collect(), self.get_line())

    def tokenize(self):
        while True:
            token = self.try_identifier_or_keyword()
            if token is not None:
                yield token
                continue

            token = self.try_number()
            if token is not None:
                yield token
                continue

            if self.try_whitespaces() is not None:
                continue

            if self.try_comment() is not None:
                continue

            token = self.try_character_or_operator()
            if token is not None:
                yield token
            else:
                break

        yield EOFToken(self.get_line())