示例#1
0
class Tokeniser(object):
    modes = {
        'expr': (tokens, token_indices),
        'filename': (filename_tokens, {}),
    }

    def __init__(self, feeder):
        self.pos = 0
        self.feeder = feeder
        self.prescanner = Prescanner(feeder)
        self.code = self.prescanner.scan()
        self.change_mode('expr')

    def change_mode(self, mode):
        self.mode = mode
        self.tokens, self.token_indices = self.modes[mode]

    def incomplete(self):
        'get more code from the prescanner and continue'
        self.prescanner.incomplete()
        self.code += self.prescanner.scan()

    def sntx_message(self, pos=None):
        if pos is None:
            pos = self.pos
        pre, post = self.code[:pos], self.code[pos:].rstrip('\n')
        if pos == 0:
            self.feeder.message('Syntax', 'sntxb', post)
        else:
            self.feeder.message('Syntax', 'sntxf', pre, post)

    def next(self):
        'return next token'
        self.skip_blank()
        if self.pos >= len(self.code):
            return Token('END', '', len(self.code))

        # look for a matching pattern
        indices = self.token_indices.get(self.code[self.pos], ())
        if indices:
            for index in indices:
                tag, pattern = self.tokens[index]
                match = pattern.match(self.code, self.pos)
                if match is not None:
                    break
        else:
            for tag, pattern in self.tokens:
                match = pattern.match(self.code, self.pos)
                if match is not None:
                    break

        # no matching pattern found
        if match is None:
            self.sntx_message()
            raise ScanError()

        # custom tokenisation rules defined with t_tag
        override = getattr(self, 't_' + tag, None)
        if override is not None:
            return override(match)
        else:
            text = match.group(0)
            self.pos = match.end(0)
            return Token(tag, text, match.start(0))

    def skip_blank(self):
        'skip whitespace and comments'
        comment = []   # start positions of comments
        while True:
            if self.pos >= len(self.code):
                if comment:
                    self.incomplete()
                else:
                    break
            if comment:
                if self.code.startswith('(*', self.pos):
                    comment.append(self.pos)
                    self.pos += 2
                elif self.code.startswith('*)', self.pos):
                    comment.pop()
                    self.pos += 2
                else:
                    self.pos += 1
            elif self.code.startswith('(*', self.pos):
                comment.append(self.pos)
                self.pos += 2
            elif self.code[self.pos] in ' \r\n\t':
                self.pos += 1
            else:
                break

    def t_String(self, match):
        start, end = self.pos, None
        self.pos += 1   # skip opening '"'
        newlines = []
        while True:
            if self.pos >= len(self.code):
                if end is None:
                    # reached end while still inside string
                    self.incomplete()
                    newlines.append(self.pos)
                else:
                    break
            c = self.code[self.pos]
            if c == '"':
                self.pos += 1
                end = self.pos
                break
            elif c == '\\':
                self.pos += 2
            else:
                self.pos += 1
        indices = [start] + newlines + [end]
        result = ''.join(self.code[indices[i]:indices[i + 1]]
                         for i in range(len(indices) - 1))
        return Token('String', result, start)

    def t_Number(self, match):
        text = match.group(0)
        pos = match.end(0)
        if self.code[pos - 1:pos + 1] == '..':
            # Trailing .. should be ignored. That is, `1..` is `Repeated[1]`.
            text = text[:-1]
            self.pos = pos - 1
        else:
            self.pos = pos
        return Token('Number', text, match.start(0))

    def token_mode(self, match, tag, mode):
        'consume a token and switch mode'
        text = match.group(0)
        self.pos = match.end(0)
        self.change_mode(mode)
        return Token(tag, text, match.start(0))

    def t_Get(self, match):
        return self.token_mode(match, 'Get', 'filename')

    def t_Put(self, match):
        return self.token_mode(match, 'Put', 'filename')

    def t_PutAppend(self, match):
        return self.token_mode(match, 'PutAppend', 'filename')

    def t_Filename(self, match):
        return self.token_mode(match, 'Filename', 'expr')
示例#2
0
class Tokeniser(object):
    modes = {
        'expr': (tokens, token_indices),
        'filename': (filename_tokens, {}),
    }

    def __init__(self, feeder):
        self.pos = 0
        self.feeder = feeder
        self.prescanner = Prescanner(feeder)
        self.code = self.prescanner.scan()
        self.change_mode('expr')

    def change_mode(self, mode):
        self.mode = mode
        self.tokens, self.token_indices = self.modes[mode]

    def incomplete(self):
        'get more code from the prescanner and continue'
        self.prescanner.incomplete()
        self.code += self.prescanner.scan()

    def sntx_message(self, pos=None):
        if pos is None:
            pos = self.pos
        pre, post = self.code[:pos], self.code[pos:].rstrip('\n')
        if pos == 0:
            self.feeder.message('Syntax', 'sntxb', post)
        else:
            self.feeder.message('Syntax', 'sntxf', pre, post)

    def next(self):
        'return next token'
        self.skip_blank()
        if self.pos >= len(self.code):
            return Token('END', '', len(self.code))

        # look for a matching pattern
        indices = self.token_indices.get(self.code[self.pos], ())
        if indices:
            for index in indices:
                tag, pattern = self.tokens[index]
                match = pattern.match(self.code, self.pos)
                if match is not None:
                    break
        else:
            for tag, pattern in self.tokens:
                match = pattern.match(self.code, self.pos)
                if match is not None:
                    break

        # no matching pattern found
        if match is None:
            self.sntx_message()
            raise ScanError()

        # custom tokenisation rules defined with t_tag
        override = getattr(self, 't_' + tag, None)
        if override is not None:
            return override(match)
        else:
            text = match.group(0)
            self.pos = match.end(0)
            return Token(tag, text, match.start(0))

    def skip_blank(self):
        'skip whitespace and comments'
        comment = []  # start positions of comments
        while True:
            if self.pos >= len(self.code):
                if comment:
                    self.incomplete()
                else:
                    break
            if comment:
                if self.code.startswith('(*', self.pos):
                    comment.append(self.pos)
                    self.pos += 2
                elif self.code.startswith('*)', self.pos):
                    comment.pop()
                    self.pos += 2
                else:
                    self.pos += 1
            elif self.code.startswith('(*', self.pos):
                comment.append(self.pos)
                self.pos += 2
            elif self.code[self.pos] in ' \r\n\t':
                self.pos += 1
            else:
                break

    def t_String(self, match):
        start, end = self.pos, None
        self.pos += 1  # skip opening '"'
        newlines = []
        while True:
            if self.pos >= len(self.code):
                if end is None:
                    # reached end while still inside string
                    self.incomplete()
                    newlines.append(self.pos)
                else:
                    break
            c = self.code[self.pos]
            if c == '"':
                self.pos += 1
                end = self.pos
                break
            elif c == '\\':
                self.pos += 2
            else:
                self.pos += 1
        indices = [start] + newlines + [end]
        result = ''.join(self.code[indices[i]:indices[i + 1]]
                         for i in range(len(indices) - 1))
        return Token('String', result, start)

    def t_Number(self, match):
        text = match.group(0)
        pos = match.end(0)
        if self.code[pos - 1:pos + 1] == '..':
            # Trailing .. should be ignored. That is, `1..` is `Repeated[1]`.
            text = text[:-1]
            self.pos = pos - 1
        else:
            self.pos = pos
        return Token('Number', text, match.start(0))

    def token_mode(self, match, tag, mode):
        'consume a token and switch mode'
        text = match.group(0)
        self.pos = match.end(0)
        self.change_mode(mode)
        return Token(tag, text, match.start(0))

    def t_Get(self, match):
        return self.token_mode(match, 'Get', 'filename')

    def t_Put(self, match):
        return self.token_mode(match, 'Put', 'filename')

    def t_PutAppend(self, match):
        return self.token_mode(match, 'PutAppend', 'filename')

    def t_Filename(self, match):
        return self.token_mode(match, 'Filename', 'expr')
示例#3
0
class Tokeniser(object):
    modes = {
        "expr": (tokens, token_indices),
        "filename": (filename_tokens, {}),
    }

    def __init__(self, feeder):
        self.pos = 0
        self.feeder = feeder
        self.prescanner = Prescanner(feeder)
        self.code = self.prescanner.scan()
        self.change_mode("expr")

    def change_mode(self, mode):
        self.mode = mode
        self.tokens, self.token_indices = self.modes[mode]

    def incomplete(self):
        "get more code from the prescanner and continue"
        self.prescanner.incomplete()
        self.code += self.prescanner.scan()

    def sntx_message(self, pos=None):
        if pos is None:
            pos = self.pos
        pre, post = self.code[:pos], self.code[pos:].rstrip("\n")
        if pos == 0:
            self.feeder.message("Syntax", "sntxb", post)
        else:
            self.feeder.message("Syntax", "sntxf", pre, post)

    def next(self):
        "return next token"
        self.skip_blank()
        if self.pos >= len(self.code):
            return Token("END", "", len(self.code))

        # look for a matching pattern
        indices = self.token_indices.get(self.code[self.pos], ())
        if indices:
            for index in indices:
                tag, pattern = self.tokens[index]
                match = pattern.match(self.code, self.pos)
                if match is not None:
                    break
        else:
            for tag, pattern in self.tokens:
                match = pattern.match(self.code, self.pos)
                if match is not None:
                    break

        # no matching pattern found
        if match is None:
            self.sntx_message()
            raise ScanError()

        # custom tokenisation rules defined with t_tag
        override = getattr(self, "t_" + tag, None)
        if override is not None:
            return override(match)
        else:
            text = match.group(0)
            self.pos = match.end(0)
            return Token(tag, text, match.start(0))

    def skip_blank(self):
        "skip whitespace and comments"
        comment = []  # start positions of comments
        while True:
            if self.pos >= len(self.code):
                if comment:
                    self.incomplete()
                else:
                    break
            if comment:
                if self.code.startswith("(*", self.pos):
                    comment.append(self.pos)
                    self.pos += 2
                elif self.code.startswith("*)", self.pos):
                    comment.pop()
                    self.pos += 2
                else:
                    self.pos += 1
            elif self.code.startswith("(*", self.pos):
                comment.append(self.pos)
                self.pos += 2
            elif self.code[self.pos] in " \r\n\t":
                self.pos += 1
            else:
                break

    def t_String(self, match):
        start, end = self.pos, None
        self.pos += 1  # skip opening '"'
        newlines = []
        while True:
            if self.pos >= len(self.code):
                if end is None:
                    # reached end while still inside string
                    self.incomplete()
                    newlines.append(self.pos)
                else:
                    break
            c = self.code[self.pos]
            if c == '"':
                self.pos += 1
                end = self.pos
                break
            elif c == "\\":
                self.pos += 2
            else:
                self.pos += 1
        indices = [start] + newlines + [end]
        result = "".join(self.code[indices[i]:indices[i + 1]]
                         for i in range(len(indices) - 1))
        return Token("String", result, start)

    def t_Number(self, match):
        text = match.group(0)
        pos = match.end(0)
        if self.code[pos - 1:pos + 1] == "..":
            # Trailing .. should be ignored. That is, `1..` is `Repeated[1]`.
            text = text[:-1]
            self.pos = pos - 1
        else:
            self.pos = pos
        return Token("Number", text, match.start(0))

    def token_mode(self, match, tag, mode):
        "consume a token and switch mode"
        text = match.group(0)
        self.pos = match.end(0)
        self.change_mode(mode)
        return Token(tag, text, match.start(0))

    def t_Get(self, match):
        return self.token_mode(match, "Get", "filename")

    def t_Put(self, match):
        return self.token_mode(match, "Put", "filename")

    def t_PutAppend(self, match):
        return self.token_mode(match, "PutAppend", "filename")

    def t_Filename(self, match):
        return self.token_mode(match, "Filename", "expr")