def _detect_DIY_features(self, source): have_docstring = False gen = tokenize.generate_tokens(io.StringIO(source).readline) def advance(): tok = next(gen) return tok[0], tok[1] ignore = frozenset({token.NEWLINE, tokenize.NL, token.COMMENT}) features = set() try: while True: tp, value = advance() if tp in ignore: continue #main elif tp == token.STRING: if value == "'__main__'": features.add("main") elif tp == token.NAME and value == "main": features.add("main") #print elif (tp == token.NAME) & ("print" not in features): if (value == "print") | (value == "input") | ( value == "raw_input"): features.add("print") else: continue except StopIteration: pass return frozenset(features)
def tokenize_source_string(s, base_byte=0): fobj = io.StringIO(six.text_type(s).rstrip(' \t\r\n\\')) lines = Lines(fobj) fobj.seek(0) for typ, tok, spos, epos, _ in tokenize.generate_tokens(fobj.readline): yield typ, tok, Interval( lines.byte_of_pos(*spos) + base_byte, lines.byte_of_pos(*epos) + base_byte)
def find_comments(s, base_byte=0): fobj = io.StringIO(six.text_type(s)) lines = Lines(fobj) fobj.seek(0) for typ, tok, spos, epos, _ in tokenize.generate_tokens(fobj.readline): if typ == tokenize.COMMENT: yield tok, Interval( lines.byte_of_pos(*spos) + base_byte, lines.byte_of_pos(*epos) + base_byte)
def __init__(self, filename, stream=None): close_stream = None if stream is None: stream = open(filename) close_stream = stream.close self.filename = filename self.stream = stream self.generator = tokenize.generate_tokens(stream.readline) self.gettoken() # Initialize lookahead self.dfas, self.startsymbol = self.parse() if close_stream is not None: close_stream() self.first = {} # map from symbol name to set of tokens self.addfirstsets()
def adjust_comment(text): gen = tokenize.generate_tokens(io.StringIO(text).readline) result = [] try: while True: tok = next(gen) if(tok[0]==token.COMMENT)\ and not tok[1].startswith('#include')\ and not tok[1].startswith('#define'): result.append((tok[0],\ tok[1].replace('#','//',1),\ tok[2],tok[3],tok[4])) else: result.append(tok) except StopIteration: pass return tokenize.untokenize(result[:-1])
def adjust_multistr(text): gen = tokenize.generate_tokens(io.StringIO(text).readline) result = [] try: while True: tok = next(gen) if(tok[0]==token.STRING)and \ tok[1].startswith('"""'): result_str = tok[1].replace('"""', '"') result_str = result_str.replace('\r\n', '\\n') result_str = result_str.replace('\n', '\\n') result.append((tok[0],\ result_str,\ tok[2],tok[3],tok[4])) else: result.append(tok) except StopIteration: pass return tokenize.untokenize(result[:-1])
def detect_future_features(infile): # pragma: nocover have_docstring = False gen = tokenize.generate_tokens(infile.readline) def advance(): tok = next(gen) return tok[0], tok[1] ignore = frozenset((token.NEWLINE, tokenize.NL, token.COMMENT)) features = set() try: while True: tp, value = advance() if tp in ignore: continue elif tp == token.STRING: if have_docstring: break have_docstring = True elif tp == token.NAME and value == 'from': tp, value = advance() if tp != token.NAME or value != '__future__': break tp, value = advance() if tp != token.NAME or value != 'import': break tp, value = advance() if tp == token.OP and value == '(': tp, value = advance() while tp == token.NAME: features.add(value) tp, value = advance() if tp != token.OP or value != ',': break tp, value = advance() else: break except StopIteration: pass return frozenset(features)
def parse_stream_raw(self, stream, debug=False): """Parse a stream and return the syntax tree.""" tokens = tokenize.generate_tokens(stream.readline) return self.parse_tokens(tokens, debug)
def parse_string(self, text, debug=False): """Parse a string and return the syntax tree.""" tokens = tokenize.generate_tokens(generate_lines(text).next) return self.parse_tokens(tokens, debug)
with open(args.file) as f: def readline(): while True: line = f.readline() if line.endswith('\n'): yield line else: yield line+'\n' yield '' break tokens, tokens_copy = itertools.tee(generate_tokens(readline().__next__)) try: st = driver.parse_tokens(tokens) print(st) except TokenError as err: print([t for t in tokens_copy]) except IndentationError as err: print(err.msg) print(err.filename) print(err.lineno) print(err.offset) print(err.text)