class Tokeniser(object): modes = { 'expr': (tokens, token_indices), 'filename': (filename_tokens, {}), } def __init__(self, feeder): self.pos = 0 self.feeder = feeder self.prescanner = Prescanner(feeder) self.code = self.prescanner.scan() self.change_mode('expr') def change_mode(self, mode): self.mode = mode self.tokens, self.token_indices = self.modes[mode] def incomplete(self): 'get more code from the prescanner and continue' self.prescanner.incomplete() self.code += self.prescanner.scan() def sntx_message(self, pos=None): if pos is None: pos = self.pos pre, post = self.code[:pos], self.code[pos:].rstrip('\n') if pos == 0: self.feeder.message('Syntax', 'sntxb', post) else: self.feeder.message('Syntax', 'sntxf', pre, post) def next(self): 'return next token' self.skip_blank() if self.pos >= len(self.code): return Token('END', '', len(self.code)) # look for a matching pattern indices = self.token_indices.get(self.code[self.pos], ()) if indices: for index in indices: tag, pattern = self.tokens[index] match = pattern.match(self.code, self.pos) if match is not None: break else: for tag, pattern in self.tokens: match = pattern.match(self.code, self.pos) if match is not None: break # no matching pattern found if match is None: self.sntx_message() raise ScanError() # custom tokenisation rules defined with t_tag override = getattr(self, 't_' + tag, None) if override is not None: return override(match) else: text = match.group(0) self.pos = match.end(0) return Token(tag, text, match.start(0)) def skip_blank(self): 'skip whitespace and comments' comment = [] # start positions of comments while True: if self.pos >= len(self.code): if comment: self.incomplete() else: break if comment: if self.code.startswith('(*', self.pos): comment.append(self.pos) self.pos += 2 elif self.code.startswith('*)', self.pos): comment.pop() self.pos += 2 else: self.pos += 1 elif self.code.startswith('(*', self.pos): comment.append(self.pos) self.pos += 2 elif self.code[self.pos] in ' \r\n\t': self.pos += 1 else: break def t_String(self, match): start, end = self.pos, None self.pos += 1 # skip opening '"' newlines = [] while True: if self.pos >= len(self.code): if end is None: # reached end while still inside string self.incomplete() newlines.append(self.pos) else: break c = self.code[self.pos] if c == '"': self.pos += 1 end = self.pos break elif c == '\\': self.pos += 2 else: self.pos += 1 indices = [start] + newlines + [end] result = ''.join(self.code[indices[i]:indices[i + 1]] for i in range(len(indices) - 1)) return Token('String', result, start) def t_Number(self, match): text = match.group(0) pos = match.end(0) if self.code[pos - 1:pos + 1] == '..': # Trailing .. should be ignored. That is, `1..` is `Repeated[1]`. text = text[:-1] self.pos = pos - 1 else: self.pos = pos return Token('Number', text, match.start(0)) def token_mode(self, match, tag, mode): 'consume a token and switch mode' text = match.group(0) self.pos = match.end(0) self.change_mode(mode) return Token(tag, text, match.start(0)) def t_Get(self, match): return self.token_mode(match, 'Get', 'filename') def t_Put(self, match): return self.token_mode(match, 'Put', 'filename') def t_PutAppend(self, match): return self.token_mode(match, 'PutAppend', 'filename') def t_Filename(self, match): return self.token_mode(match, 'Filename', 'expr')
class Tokeniser(object): modes = { "expr": (tokens, token_indices), "filename": (filename_tokens, {}), } def __init__(self, feeder): self.pos = 0 self.feeder = feeder self.prescanner = Prescanner(feeder) self.code = self.prescanner.scan() self.change_mode("expr") def change_mode(self, mode): self.mode = mode self.tokens, self.token_indices = self.modes[mode] def incomplete(self): "get more code from the prescanner and continue" self.prescanner.incomplete() self.code += self.prescanner.scan() def sntx_message(self, pos=None): if pos is None: pos = self.pos pre, post = self.code[:pos], self.code[pos:].rstrip("\n") if pos == 0: self.feeder.message("Syntax", "sntxb", post) else: self.feeder.message("Syntax", "sntxf", pre, post) def next(self): "return next token" self.skip_blank() if self.pos >= len(self.code): return Token("END", "", len(self.code)) # look for a matching pattern indices = self.token_indices.get(self.code[self.pos], ()) if indices: for index in indices: tag, pattern = self.tokens[index] match = pattern.match(self.code, self.pos) if match is not None: break else: for tag, pattern in self.tokens: match = pattern.match(self.code, self.pos) if match is not None: break # no matching pattern found if match is None: self.sntx_message() raise ScanError() # custom tokenisation rules defined with t_tag override = getattr(self, "t_" + tag, None) if override is not None: return override(match) else: text = match.group(0) self.pos = match.end(0) return Token(tag, text, match.start(0)) def skip_blank(self): "skip whitespace and comments" comment = [] # start positions of comments while True: if self.pos >= len(self.code): if comment: self.incomplete() else: break if comment: if self.code.startswith("(*", self.pos): comment.append(self.pos) self.pos += 2 elif self.code.startswith("*)", self.pos): comment.pop() self.pos += 2 else: self.pos += 1 elif self.code.startswith("(*", self.pos): comment.append(self.pos) self.pos += 2 elif self.code[self.pos] in " \r\n\t": self.pos += 1 else: break def t_String(self, match): start, end = self.pos, None self.pos += 1 # skip opening '"' newlines = [] while True: if self.pos >= len(self.code): if end is None: # reached end while still inside string self.incomplete() newlines.append(self.pos) else: break c = self.code[self.pos] if c == '"': self.pos += 1 end = self.pos break elif c == "\\": self.pos += 2 else: self.pos += 1 indices = [start] + newlines + [end] result = "".join(self.code[indices[i]:indices[i + 1]] for i in range(len(indices) - 1)) return Token("String", result, start) def t_Number(self, match): text = match.group(0) pos = match.end(0) if self.code[pos - 1:pos + 1] == "..": # Trailing .. should be ignored. That is, `1..` is `Repeated[1]`. text = text[:-1] self.pos = pos - 1 else: self.pos = pos return Token("Number", text, match.start(0)) def token_mode(self, match, tag, mode): "consume a token and switch mode" text = match.group(0) self.pos = match.end(0) self.change_mode(mode) return Token(tag, text, match.start(0)) def t_Get(self, match): return self.token_mode(match, "Get", "filename") def t_Put(self, match): return self.token_mode(match, "Put", "filename") def t_PutAppend(self, match): return self.token_mode(match, "PutAppend", "filename") def t_Filename(self, match): return self.token_mode(match, "Filename", "expr")