def register_tokens(token_dict): def next_token_slot(): index = max(token.tok_name.keys(), default=0) return index + 1 escaped_tokens = [] for name, value in token_dict.items(): slot = next_token_slot() setattr(token, name, slot) token.tok_name[slot] = name token.EXACT_TOKEN_TYPES[value] = slot escaped_tokens.append(re.escape(value)) tokenize.PseudoToken = tokenize.Whitespace + tokenize.group( *escaped_tokens, tokenize.PseudoExtras, tokenize.Number, tokenize.Funny, tokenize.ContStr, tokenize.Name, )
] } rules = [['NUMBER', tokenize.Number], ['STRING', tokenize.String], ['NAME', tokenize.Name], ['PLUS', r'\+'], ['MINUS', r'-'], ['MUL', r'\*'], ['POWER', r'\^'], ['DIV', r'/'], ['MOD', r'%'], ['LPAR', r'\('], ['RPAR', r'\)'], ['LSQB', r'\['], ['RSQB', r'\]'], ['LBRACE', r'\{'], ['RBRACE', r'\}'], ['EQ', r'=='], ['LE', r'<='], ['LT', r'<'], ['GE', r'>='], ['GT', r'>'], ['NE', r'!='], ['DOT', r'\.'], ['COMMA', r'\,'], ['COLON', r':'], ['ASSIGN', r'='], ['NEWLINE', '\n']] for name, regex in rules: lg.add(name, regex) lg.ignore(tokenize.group(r'\\\r?\n', r'[ \f\t]+', tokenize.Comment)) class DragonLexer: def __init__(self): self.lexer = lg.build() def lex(self, code): tokens = self.lexer.lex(code) while True: token = next(tokens) tokentype = token.gettokentype() if tokentype == 'NAME': token.name = reserved.get(token.getstr(), tokentype) yield token
import codecs import io import token as tokens import tokenize from functools import partial tokens.COLONEQUAL = 0xFF tokens.tok_name[0xFF] = "COLONEQUAL" tokenize.EXACT_TOKEN_TYPES[":="] = tokens.COLONEQUAL tokenize.PseudoToken = tokenize.Whitespace + tokenize.group( r":=", tokenize.PseudoExtras, tokenize.Number, tokenize.Funny, tokenize.ContStr, tokenize.Name, ) def generate_walrused_source(readline): source_tokens = list(tokenize.tokenize(readline)) modified_source_tokens = source_tokens.copy() def inc(token, by=1, page=0): start = list(token.start) end = list(token.end) start[page] += by end[page] += by
#! /usr/bin/env python3 # -*- coding: utf-8 -*- import re import tokenize # 2016-08-13T23:28+08:00 # From Chromium depo tools/scm.py def ValidateEmail(email): return (re.match(r"^[a-zA-Z0-9._%-+]+@[a-zA-Z0-9._%-]+.[a-zA-Z]{2,6}$", email) is not None) # 2017-12-29T14:16+08:00 int_0_255_re = tokenize.group(r'(\d{1,2})|(1\d\d)|(2[0-4]\d)|(25[0-5])') ipv4_re = '^' + tokenize.group(int_0_255_re + '.') + r'{3}' + int_0_255_re + '$' # References: # Sams Teach Yourself Regular Expressions in 10 Minutes def ValidateIPv4(ip_addr): return (re.match(ipv4_re, ip_addr) is not None) # References: # https://github.com/Microsoft/vcpkg/blob/master/toolsrc/src/vcpkg/base/files.cpp#L9 def ValidateFileName(filename): return (re.match(r'[^\[\/:*?"<>|\]]+', filename) is not None)
from mython.trampoline import TokenStream # ______________________________________________________________________ # Module data tok_name = tokenize.tok_name.copy() BANG, MYEXPR, MYSUITE = (tokenize.N_TOKENS + count for count in range(3)) tok_name[BANG] = 'BANG' tok_name[MYEXPR] = 'MYEXPR' tok_name[MYSUITE] = 'MYSUITE' N_TOKENS = tokenize.N_TOKENS + 3 pseudoprog = re.compile( tokenize.Whitespace + tokenize.group(tokenize.PseudoExtras, tokenize.Number, tokenize.Funny, tokenize.ContStr, tokenize.Name, r"[!]") ) CLOSERS = { '{' : '}', '(' : ')', '<' : '>', '[' : ']', } __DEBUG__ = False # ______________________________________________________________________ # Class definitions. class MythonReadliner (object):
from typing import Iterator from pyasdl.__pegen.tokenizer import Tokenizer from pyasdl.grammar import Module, Sum from pyasdl.parser import GeneratedParser as _ASDLParser __all__ = ["parse", "fetch_comments", "is_simple_sum"] # Since the pegen.tokenizer.Tokenizer uses .type instead of .exact_type # it is not trivial to change the default comment behavior. A workaround # way is sanitizing the input before passing it into the real parser COMMENT_PATTERN = _tokenize.Whitespace + r"--.*?\n" _tokenize.PseudoToken = _tokenize.Whitespace + _tokenize.group( COMMENT_PATTERN, _tokenize.PseudoExtras, _tokenize.Number, _tokenize.Funny, _tokenize.ContStr, _tokenize.Name, ) def tokenize( source: str, ignore_comments: bool = True ) -> Iterator[_tokenize.TokenInfo]: # A wrapper around tokenize.generate_tokens to pass comment tokens source_buffer = io.StringIO(source) for token in _tokenize.generate_tokens(source_buffer.readline): if token.string.startswith("--") and ignore_comments: continue yield token
def update_event(self, inp=-1): self.set_output_val(0, tokenize.group())
def group(*choices): return tokenize.group(*choices)
from basil.lang.python import TokenUtils from basil.lang.mython.MyFrontExceptions import MyFrontSyntaxError # ______________________________________________________________________ # Module data tok_name = tokenize.tok_name.copy() BANG, MYEXPR, MYSUITE = (tokenize.N_TOKENS + count for count in xrange(3)) tok_name[BANG] = 'BANG' tok_name[MYEXPR] = 'MYEXPR' tok_name[MYSUITE] = 'MYSUITE' N_TOKENS = tokenize.N_TOKENS + 3 pseudoprog = re.compile( tokenize.Whitespace + tokenize.group(tokenize.PseudoExtras, tokenize.Number, tokenize.Funny, tokenize.ContStr, tokenize.Name, r"[!]") ) CLOSERS = { '{' : '}', '(' : ')', '<' : '>', '[' : ']', } # ______________________________________________________________________ # Compatibility layer 2.5/2.6 if type(__builtins__) == dict: define_next = "next" not in __builtins__.keys() else: