Пример #1
0
class ReLexer(Lexer):
    regex_pat = r'''
    \s*(                                        #空白
    (//.*)|                                     #注释
    (\d+)|                                      #数字类型
    ("[^"]*")|                                  #字符串类型
    [A-Z_a-z][A-z_a-z0-9]*|                     #标识符
    ==|<=|>=|&&|\|\||                           #符号
    [\+\-\*/\{\}\=\|\&\[\]\(\)\<\>\;\%]           #符号
    )
    '''


    def __init__(self, f):
        self.has_more = True
        self.reader = FileReader(f)
        self.line_no = 0
        self.queue = []
        self.pattern = re.compile(self.regex_pat, re.X)

    def read(self):
        if self.fill_queue(0):
            return self.queue.pop(0)
        return Token.EOF

    def peek(self, i):
        if self.fill_queue(i):
            return self.queue[i]
        return Token.EOF

    def fill_queue(self, i):
        while i >= len(self.queue):
            if self.has_more:
                self.read_line()
            else:
                return False
        return True

    def read_line(self):
        line = self.reader.read_line()
        if not line:
            self.has_more = False
            return
        line = line[:-1]

        self.line_no = self.reader.line_no
        start = 0
        end = len(line)
        while start < end:
            matcher = self.pattern.search(line[start:end])
            start += matcher.end()
            if matcher:
                self.add_token(matcher)
        self.queue.append(IdToken(self.line_no, Token.EOL))

    def add_token(self, matcher):
        m = matcher.group(1)
        if m is not None:  # not a space line
            if matcher.group(2) == None:  # not a comment
                if matcher.group(3) != None:
                    token = NumToken(self.line_no, int(m))
                elif matcher.group(4) != None:
                    token = StrToken(self.line_no, m)
                else:
                    token = IdToken(self.line_no, m)
                self.queue.append(token)