def check(paths, verbose): status = 0 for path in paths: if verbose: sys.stderr.write('%s\n' % path) sys.stderr.flush() source_fd = open(path, 'r') try: source = source_fd.read() finally: source_fd.close() try: parser.suite(source) except SyntaxError: ex_type, ex, ex_traceback = sys.exc_info() status = 1 message = ex.text.splitlines()[0].strip() sys.stdout.write("%s:%d:%d: SyntaxError: %s\n" % (path, ex.lineno, ex.offset, message)) sys.stdout.flush() sys.exit(status)
def verify(self, src, exp, dump=False): ''' Run the verification of a source value against an expected output value. The values are list of strings, each string representing an individual line. An empty list is interpretted as an empty file. And empty string is interpretted as an empty line. ''' # We create one long string for both source and expected values so # that the caller can enumerate each line without adding new lines, # making it easier to see what is being written. srcStr = "\n".join(src) if src: # Add the trailing new line only if there was something in the # "file". srcStr += "\n" expStr = "\n".join(exp) if exp: expStr += "\n" try: l = parseSource(srcStr, self.buf, 0, dump) except AssertionError as ae: self.fail("Internal AssertionError Encountered: %s\n" "Concrete Syntax Tree:\n" "%s\n" % (ae, dumpCst(parser.suite(srcStr),StringIO()).getvalue())) self.assertEqual(l, len(self.buf)) output = "".join(self.buf) self.assertEqual(output, expStr, "Output not quite what was expected:\n" " out: %r\n" " exp: %r\n" "Concrete Syntax Tree:\n" "%s\n" % (output, expStr, dumpCst(parser.suite(srcStr),StringIO()).getvalue()))
def try_parse(self, src): # Make sure it ends in a newline src += '\n' # Ignore consistent indentation. if src.lstrip('\n').startswith(' '): src = textwrap.dedent(src) # Replace "..." by a mark which is also a valid python expression # (Note, the highlighter gets the original source, this is only done # to allow "..." in code and still highlight it as Python code.) mark = "__highlighting__ellipsis__" src = src.replace("...", mark) # lines beginning with "..." are probably placeholders for suite src = re.sub(r"(?m)^(\s*)" + mark + "(.)", r"\1" + mark + r"# \2", src) if PY2 and isinstance(src, text_type): # Non-ASCII chars will only occur in string literals # and comments. If we wanted to give them to the parser # correctly, we'd have to find out the correct source # encoding. Since it may not even be given in a snippet, # just replace all non-ASCII characters. src = src.encode('ascii', 'replace') if parser is None: return True try: parser.suite(src) except (SyntaxError, UnicodeEncodeError): return False else: return True
def install(args): if args == []: prefix = "/" else: prefix = args[0] # Make sure that there isn't a syntax error in mudur.py code = file("bin/mudur.py").read() parser.suite(code).compile() install_file("bin/mudur.py", prefix, "sbin/mudur.py") install_file("bin/muavin.py", prefix, "sbin/muavin.py") install_file("bin/update-environment.py", prefix, "sbin/update-environment") install_file("bin/update-modules.py", prefix, "sbin/update-modules") dest = os.path.join(prefix, "sbin/modules-update") if os.path.exists(dest) or os.path.islink(dest): os.unlink(dest) os.symlink("update-modules", dest) install_file("bin/service.py", prefix, "bin/service") install_file("etc/udev-mudur.rules", prefix, "etc/udev/rules.d/51-mudur.rules") install_file("etc/blacklist", prefix, "etc/hotplug/blacklist") install_file("etc/mudur.conf", prefix, "etc/conf.d/mudur") install_file("etc/aliases_modutils", prefix, "etc/modules.d/aliases_modutils") for item in os.listdir("po"): if item.endswith(".po"): lang = item[:-3] dest = "usr/share/locale/%s/LC_MESSAGES/mudur.mo" % lang try: os.makedirs(os.path.dirname(os.path.join(prefix, dest))) except: pass path = os.path.join(prefix, dest) print "compiling '%s' translation '%s'" % (lang, path) os.system("msgfmt po/%s -o %s" % (item, path))
def try_parse(self, src): # Make sure it ends in a newline src += '\n' # Replace "..." by a mark which is also a valid python expression # (Note, the highlighter gets the original source, this is only done # to allow "..." in code and still highlight it as Python code.) mark = "__highlighting__ellipsis__" src = src.replace("...", mark) # lines beginning with "..." are probably placeholders for suite src = re.sub(r"(?m)^(\s*)" + mark + "(.)", r"\1"+ mark + r"# \2", src) # if we're using 2.5, use the with statement if sys.version_info >= (2, 5): src = 'from __future__ import with_statement\n' + src if isinstance(src, unicode): # Non-ASCII chars will only occur in string literals # and comments. If we wanted to give them to the parser # correctly, we'd have to find out the correct source # encoding. Since it may not even be given in a snippet, # just replace all non-ASCII characters. src = src.encode('ascii', 'replace') try: parser.suite(src) except parsing_exceptions: return False else: return True
def test_check_keywords(self): import parser grammar = ''' @@keyword :: A start = {id}+ $ ; @name id = /\w+/ ; ''' model = compile(grammar, 'test') c = codegen(model) parser.suite(c) ast = model.parse('hello world') self.assertEqual(['hello', 'world'], ast) try: ast = model.parse("hello A world") self.assertEqual(['hello', 'A', 'world'], ast) self.fail('accepted keyword as name') except FailedParse as e: self.assertTrue('"A" is a reserved word' in str(e)) pass
def test_comparisons(self): # ST objects should support order and equality comparisons st1 = parser.expr('2 + 3') st2 = parser.suite('x = 2; y = x + 3') st3 = parser.expr('list(x**3 for x in range(20))') st1_copy = parser.expr('2 + 3') st2_copy = parser.suite('x = 2; y = x + 3') st3_copy = parser.expr('list(x**3 for x in range(20))') # exercise fast path for object identity self.assertEqual(st1 == st1, True) self.assertEqual(st2 == st2, True) self.assertEqual(st3 == st3, True) # slow path equality self.assertEqual(st1, st1_copy) self.assertEqual(st2, st2_copy) self.assertEqual(st3, st3_copy) self.assertEqual(st1 == st2, False) self.assertEqual(st1 == st3, False) self.assertEqual(st2 == st3, False) self.assertEqual(st1 != st1, False) self.assertEqual(st2 != st2, False) self.assertEqual(st3 != st3, False) self.assertEqual(st1 != st1_copy, False) self.assertEqual(st2 != st2_copy, False) self.assertEqual(st3 != st3_copy, False) self.assertEqual(st2 != st1, True) self.assertEqual(st1 != st3, True) self.assertEqual(st3 != st2, True) # we don't particularly care what the ordering is; just that # it's usable and self-consistent self.assertEqual(st1 < st2, not (st2 <= st1)) self.assertEqual(st1 < st3, not (st3 <= st1)) self.assertEqual(st2 < st3, not (st3 <= st2)) self.assertEqual(st1 < st2, st2 > st1) self.assertEqual(st1 < st3, st3 > st1) self.assertEqual(st2 < st3, st3 > st2) self.assertEqual(st1 <= st2, st2 >= st1) self.assertEqual(st3 <= st1, st1 >= st3) self.assertEqual(st2 <= st3, st3 >= st2) # transitivity bottom = min(st1, st2, st3) top = max(st1, st2, st3) mid = sorted([st1, st2, st3])[1] self.assertTrue(bottom < mid) self.assertTrue(bottom < top) self.assertTrue(mid < top) self.assertTrue(bottom <= mid) self.assertTrue(bottom <= top) self.assertTrue(mid <= top) self.assertTrue(bottom <= bottom) self.assertTrue(mid <= mid) self.assertTrue(top <= top) # interaction with other types self.assertEqual(st1 == 1588.602459, False) self.assertEqual('spanish armada' != st2, True) self.assertRaises(TypeError, operator.ge, st3, None) self.assertRaises(TypeError, operator.le, False, st1) self.assertRaises(TypeError, operator.lt, st1, 1815) self.assertRaises(TypeError, operator.gt, b'waterloo', st2)
def test_sizeof(self): def XXXROUNDUP(n): if n <= 1: return n if n <= 128: return (n + 3) & ~3 return 1 << (n - 1).bit_length() basesize = support.calcobjsize('Pii') nodesize = struct.calcsize('hP3iP0h') def sizeofchildren(node): if node is None: return 0 res = 0 hasstr = len(node) > 1 and isinstance(node[-1], str) if hasstr: res += len(node[-1]) + 1 children = node[1:-1] if hasstr else node[1:] if children: res += XXXROUNDUP(len(children)) * nodesize for child in children: res += sizeofchildren(child) return res def check_st_sizeof(st): self.check_sizeof(st, basesize + nodesize + sizeofchildren(st.totuple())) check_st_sizeof(parser.expr('2 + 3')) check_st_sizeof(parser.expr('2 + 3 + 4')) check_st_sizeof(parser.suite('x = 2 + 3')) check_st_sizeof(parser.suite('')) check_st_sizeof(parser.suite('# -*- coding: utf-8 -*-')) check_st_sizeof(parser.expr('[' + '2,' * 1000 + ']'))
def parse_encoding(fp): pos = fp.tell() fp.seek(0) try: line1 = fp.readline() has_bom = line1.startswith(codecs.BOM_UTF8) if has_bom: line1 = line1[len(codecs.BOM_UTF8):] m = PYTHON_MAGIC_COMMENT_re.match(line1) if not m: try: import parser parser.suite(line1) except (ImportError, SyntaxError): pass else: line2 = fp.readline() m = PYTHON_MAGIC_COMMENT_re.match(line2) if has_bom: if m: raise SyntaxError('python refuses to compile code with both a UTF8 byte-order-mark and a magic encoding comment') return 'utf_8' if m: return m.group(1) return finally: fp.seek(pos)
def install(args): if args == []: prefix = "/" else: prefix = args[0] # Make sure that there isn't a syntax error in mudur.py code = file("bin/mudur.py").read() parser.suite(code).compile() install_file("bin/mudur.py", prefix, "sbin/mudur.py") install_file("bin/update-environment.py", prefix, "sbin/update-environment") install_file("bin/update-fstab.py", prefix, "sbin/update-fstab") install_file("bin/compat.py", prefix, "etc/init.d/compat.py") install_file("bin/service.py", prefix, "bin/service") install_file("bin/network.py", prefix, "bin/network") install_file("etc/mudur.conf", prefix, "etc/conf.d/mudur") for item in os.listdir("po"): if item.endswith(".po"): lang = item[:-3] dest = "usr/share/locale/%s/LC_MESSAGES/mudur.mo" % lang try: os.makedirs(os.path.dirname(os.path.join(prefix, dest))) except: pass path = os.path.join(prefix, dest) print "compiling '%s' translation '%s'" % (lang, path) os.system("msgfmt po/%s -o %s" % (item, path))
def test_group_join(self): grammar = ''' start = ('a' 'b')%{'x'}+ ; ''' model = compile(grammar, "test") c = codegen(model) import parser parser.suite(c) ast = model.parse("x a b x", nameguard=False) self.assertEqual(['x', ['a', 'b'], 'x'], ast)
def test_group_join(self): grammar = ''' start = ('a' 'b').{'x'} ; ''' model = genmodel("test", grammar) c = codegen(model) import parser parser.suite(c) ast = model.parse("x a b x", nameguard=False) self.assertEquals(['x', 'x'], ast)
def assert_identical_parse_trees(self, path, patterns=[]): """Asserts equality of recompiled writer output to the original parse tree.""" with open(path, "r") as f: expected = parser.suite(f.read()) try: actual = parser.suite(self._cat(path, patterns)) except SyntaxError: self.fail("recompilation of {} failed".format(path)) # self.assertEqual(actual, expected) self.assertTrue(actual == expected) # To avoid unreadable output.
def highlight_block(self, source, lang, linenos=False): if not pygments: return self.unhighlighted(source) if lang == 'python': if source.startswith('>>>'): # interactive session lexer = lexers['pycon'] else: # maybe Python -- try parsing it src = source + '\n' # Replace "..." by a mark which is also a valid python expression # (Note, the highlighter gets the original source, this is only done # to allow "..." in code and still highlight it as Python code.) mark = "__highlighting__ellipsis__" src = src.replace("...", mark) # lines beginning with "..." are probably placeholders for suite src = re.sub(r"(?m)^(\s*)" + mark + "(.)", r"\1"+ mark + r"# \2", src) # if we're using 2.5, use the with statement if sys.version_info >= (2, 5): src = 'from __future__ import with_statement\n' + src if isinstance(src, unicode): # Non-ASCII chars will only occur in string literals # and comments. If we wanted to give them to the parser # correctly, we'd have to find out the correct source # encoding. Since it may not even be given in a snippet, # just replace all non-ASCII characters. src = src.encode('ascii', 'replace') try: parser.suite(src) except parsing_exceptions: return self.unhighlighted(source) else: lexer = lexers['python'] else: if lang in lexers: lexer = lexers[lang] else: lexer = lexers[lang] = get_lexer_by_name(lang) lexer.add_filter('raiseonerror') try: if self.dest == 'html': return highlight(source, lexer, self.hfmter[bool(linenos)]) else: hlsource = highlight(source, lexer, self.lfmter[bool(linenos)]) return hlsource.translate(tex_hl_escape_map) except ErrorToken: # this is most probably not the selected language, # so let it pass unhighlighted return self.unhighlighted(source)
def parse_encoding(fp): """Deduce the encoding of a source file from magic comment. It does this in the same way as the `Python interpreter`__ .. __: http://docs.python.org/ref/encodings.html The ``fp`` argument should be a seekable file object. (From Jeff Dairiki) """ pos = fp.tell() fp.seek(0) try: line1 = fp.readline() if isinstance(line1, text_type): line1 = line1.encode() has_bom = line1.startswith(codecs.BOM_UTF8) if has_bom: line1 = line1[len(codecs.BOM_UTF8) :] m = PYTHON_MAGIC_COMMENT_re.match(line1) if not m: try: import parser parser.suite(line1.decode()) except (ImportError, SyntaxError): # Either it's a real syntax error, in which case the source is # not valid python source, or line2 is a continuation of line1, # in which case we don't want to scan line2 for a magic # comment. pass else: line2 = fp.readline() if isinstance(line2, text_type): line2 = line2.encode() m = PYTHON_MAGIC_COMMENT_re.match(line2) if has_bom: if m: raise SyntaxError( "python refuses to compile code with both a UTF8 " "byte-order-mark and a magic encoding comment" ) return "utf_8" elif m: return m.group(1) else: return None finally: fp.seek(pos)
def highlight_block(self, source, lang, linenos=False): def unhighlighted(): if self.dest == 'html': return '<pre>' + cgi.escape(source) + '</pre>\n' else: return '\\begin{Verbatim}[commandchars=@\\[\\]]\n' + \ escape_tex(source) + '\\end{Verbatim}\n' if not pygments: return unhighlighted() if lang == 'python': if source.startswith('>>>'): # interactive session lexer = lexers['pycon'] else: # maybe Python -- try parsing it src = source + '\n' # Replace "..." by a mark which is also a valid python expression # (Note, the highlighter gets the original source, this is only done # to allow "..." in code and still highlight it as Python code.) mark = "__highlighting__ellipsis__" src = src.replace("...", mark) # lines beginning with "..." are probably placeholders for suite src = re.sub(r"(?m)^(\s*)" + mark + "(.)", r"\1"+ mark + r"# \2", src) # if we're using 2.5, use the with statement if sys.version_info >= (2, 5): src = 'from __future__ import with_statement\n' + src try: parser.suite(src) except parsing_exceptions: return unhighlighted() else: lexer = lexers['python'] else: if lang in lexers: lexer = lexers[lang] else: lexer = lexers[lang] = get_lexer_by_name(lang) lexer.add_filter('raiseonerror') try: fmter = (self.dest == 'html' and self.hfmter or self.lfmter)[bool(linenos)] return highlight(source, lexer, fmter) except ErrorToken: # this is most probably not the selected language, # so let it pass unhighlighted return unhighlighted()
def _find_last_expr(code_lines): for x in range(len(code_lines) - 1, -1, -1): code = "\n".join(code_lines[x:]) try: parser.suite(code) try: parser.expr(code) except: # last statement is not an expression return None return x except: pass return None
def isparseable(self, deindent=True): """ return True if source is parseable, heuristically deindenting it by default. """ import parser if deindent: source = str(self.deindent()) else: source = str(self) try: parser.suite(source+'\n') except (parser.ParserError, SyntaxError): return False else: return True
def parse_encoding(fp): """Deduce the encoding of a source file from magic comment. It does this in the same way as the `Python interpreter`__ .. __: https://docs.python.org/3.4/reference/lexical_analysis.html#encoding-declarations The ``fp`` argument should be a seekable file object. (From Jeff Dairiki) """ pos = fp.tell() fp.seek(0) try: line1 = fp.readline() has_bom = line1.startswith(codecs.BOM_UTF8) if has_bom: line1 = line1[len(codecs.BOM_UTF8):] m = PYTHON_MAGIC_COMMENT_re.match(line1) if not m: try: import parser parser.suite(line1.decode('latin-1')) except (ImportError, SyntaxError): # Either it's a real syntax error, in which case the source is # not valid python source, or line2 is a continuation of line1, # in which case we don't want to scan line2 for a magic # comment. pass else: line2 = fp.readline() m = PYTHON_MAGIC_COMMENT_re.match(line2) if has_bom: if m: magic_comment_encoding = m.group(1).decode('latin-1') if magic_comment_encoding != 'utf-8': raise SyntaxError( 'encoding problem: {0} with BOM'.format( magic_comment_encoding)) return 'utf-8' elif m: return m.group(1).decode('latin-1') else: return None finally: fp.seek(pos)
def generic_test(self,suite_string,desired): import parser ast_tuple = parser.suite(suite_string).totuple() found, data = find_first_pattern(ast_tuple,indexed_array_pattern) subscript = data['subscript_list'][1] #[0] is symbol, [1] is the supscript actual = slice_handler.slice_ast_to_dict(subscript) print_assert_equal(suite_string,actual,desired)
def _loadfile(self, fileno): try: filename = self._filemap[fileno] except KeyError: print 'Could not identify fileId', fileno return 1 if filename is None: return 1 else: absname = os.path.normcase(os.path.join(self.cwd, filename)) try: fp = open(absname) except IOError: return st = parser.suite(fp.read()) fp.close() funcdef = symbol.funcdef lambdef = symbol.lambdef stack = [st.totuple(1)] while stack: tree = stack.pop() try: sym = tree[0] except (IndexError, TypeError): continue if sym == funcdef: self._funcmap[fileno, tree[2][2]] = (filename, tree[2][1]) elif sym == lambdef: self._funcmap[fileno, tree[1][2]] = (filename, '<lambda>') stack.extend(list(tree[1:])) return
def _loadfile(self, fileno): try: filename = self._filemap[fileno] except KeyError: print "Could not identify fileId", fileno return 1 if filename is None: return 1 absname = os.path.normcase(os.path.join(self.cwd, filename)) try: fp = open(absname) except IOError: return st = parser.suite(fp.read()) fp.close() # Scan the tree looking for def and lambda nodes, filling in # self._funcmap with all the available information. funcdef = symbol.funcdef lambdef = symbol.lambdef stack = [st.totuple(1)] while stack: tree = stack.pop() try: sym = tree[0] except (IndexError, TypeError): continue if sym == funcdef: self._funcmap[(fileno, tree[2][2])] = filename, tree[2][1] elif sym == lambdef: self._funcmap[(fileno, tree[1][2])] = filename, "<lambda>" stack.extend(list(tree[1:]))
def get_identifiers(expr): ''' Returns the list of identifiers (variables or functions) in the Python expression (string). ''' # cleaner: parser.expr(expr).tolist() then find leaves of the form [1,name] return parser.suite(expr).compile().co_names
def test_no_asserts(self): """bzr shouldn't use the 'assert' statement.""" # assert causes too much variation between -O and not, and tends to # give bad errors to the user def search(x): # scan down through x for assert statements, report any problems # this is a bit cheesy; it may get some false positives? if x[0] == symbol.assert_stmt: return True elif x[0] == token.NAME: # can't search further down return False for sub in x[1:]: if sub and search(sub): return True return False badfiles = [] for fname, text in self.get_source_file_contents(): if not self.is_our_code(fname): continue ast = parser.ast2tuple(parser.suite(''.join(text))) if search(ast): badfiles.append(fname) if badfiles: self.fail( "these files contain an assert statement and should not:\n%s" % '\n'.join(badfiles))
def parsePythonFile(filename): """ Read a python source file and print the global symbols defined in it. The named file is opened and read. Newlines are canonicalized. The whole file is handed to the internal python parser, which generates an Abstract Syntax Tree. This is converted into a list form which is then scanned by lookAt() for global symbol definitions. @param filename: name of a python source file. @type filename: string """ file = open(filename) codeString = file.read() codeString = string.replace(codeString, "\r\n", "\n") codeString = string.replace(codeString, "\r", "\n") if codeString and codeString[-1] != '\n' : codeString = codeString + '\n' # print "file: %s" % codeString file.close() try: ast = parser.suite(codeString) except SyntaxError: return parseTree = parser.ast2list(ast) if (verbose): printParseTree(parseTree, "") lookAt(parseTree)
def parse_arguments(argstr): """ Takes a string representing arguments and returns the positional and keyword argument list and dict respectively. All the entries in these are python source, except the dict keys. """ # Get the tree tree = STTree(parser.suite(argstr).totuple()) # Initialise the lists curr_kwd = None args = [] kwds = {} # Walk through, assigning things testlists = tree.find("testlist") for i, testlist in enumerate(testlists): # BTW: A testlist is to the left or right of an =. items = list(testlist.walk(recursive=False)) for j, item in enumerate(items): if item[0] == symbol.test: if curr_kwd: kwds[curr_kwd] = item[1].reform() curr_kwd = None elif j == len(items)-1 and i != len(testlists)-1: # Last item in a group must be a keyword, unless it's last overall curr_kwd = item[1].reform() else: args.append(item[1].reform()) return args, kwds
def generic_2d(self,expr,typ): # The complex testing is pretty lame... ast = parser.suite(expr) arg_list = harvest_variables(ast.tolist()) all_sizes = [(10,10), (50,50), (100,100), (500,500), (1000,1000)] debug_print('\nExpression:', expr) with TempdirBlitz(): for size in all_sizes: arg_dict = {} for arg in arg_list: arg_dict[arg] = random.normal(0,1,size).astype(typ) # set imag part of complex values to non-zero value try: arg_dict[arg].imag = arg_dict[arg].real except: pass debug_print('Run:', size,typ) standard,compiled = self.generic_check(expr,arg_dict,type,size) try: speed_up = standard/compiled except: speed_up = -1. debug_print("1st run(numpy,compiled,speed up): %3.4f, %3.4f, " "%3.4f" % (standard,compiled,speed_up)) standard,compiled = self.generic_check(expr,arg_dict,type,size) try: speed_up = standard/compiled except: speed_up = -1. debug_print("2nd run(numpy,compiled,speed up): %3.4f, %3.4f, " "%3.4f" % (standard,compiled,speed_up))
def find_multiline_statements(source): """Parses the python source and finds multiline statements. Based on counting the number of open and closed parenthesis on each line. Args: source: The source code string. Returns: A dict that maps a line index A to a line index B, where A is the end of a multiline statement and B is the start. Line indexing is 0-based. """ # Get the AST. tree = parser.suite(source) line2paren_count = [0] * (source.count('\n') + 1) _count_brackets_braces_parenthesis(tree.totuple(True), line2paren_count) line2start = {} for end in range(len(line2paren_count)): if line2paren_count[end] >= 0: # This is not the end of a multiline statement. continue cumulative_paren_count = 0 for start in range(end, -1, -1): cumulative_paren_count += line2paren_count[start] if cumulative_paren_count == 0: line2start[end] = start break return line2start
def parseSource(sourcecode, indexbuff, indexbuff_len, dump=False): """Parses python source code and puts the resulting index information into the buffer. """ if len(sourcecode) == 0: return indexbuff_len # Parse the source to an Concrete Syntax Tree (cst) sourcecode = sourcecode.replace('\r\n', '\n') if sourcecode[-1] != '\n': # We need to make sure files are terminated by a newline. sourcecode += '\n' try: cst = parser.suite(sourcecode) except Exception as e: print("source code %s" % sourcecode) #raise e return indexbuff_len if dump: dumpCst(cst) ctx = Context() walkCst(ctx, cst.totuple(True)) indexbuff.extend(ctx.buff) indexbuff_len += len(ctx.buff) return indexbuff_len
def find_executable_statements(self, text, exclude=None): # Find lines which match an exclusion pattern. excluded = {} suite_spots = {} if exclude: reExclude = re.compile(exclude) lines = text.split('\n') for i in range(len(lines)): if reExclude.search(lines[i]): excluded[i+1] = 1 # Parse the code and analyze the parse tree to find out which statements # are multiline, and where suites begin and end. import parser tree = parser.suite(text+'\n\n').totuple(1) self.get_suite_spots(tree, suite_spots) #print "Suite spots:", suite_spots # Use the compiler module to parse the text and find the executable # statements. We add newlines to be impervious to final partial lines. statements = {} ast = compiler.parse(text+'\n\n') visitor = StatementFindingAstVisitor(statements, excluded, suite_spots) compiler.walk(ast, visitor, walker=visitor) lines = statements.keys() lines.sort() excluded_lines = excluded.keys() excluded_lines.sort() return lines, excluded_lines, suite_spots
def test_position(self): # An absolutely minimal test of position information. Better # tests would be a big project. code = "def f(x):\n return x + 1" st = parser.suite(code) def walk(tree): node_type = tree[0] next = tree[1] if isinstance(next, (tuple, list)): for elt in tree[1:]: for x in walk(elt): yield x else: yield tree expected = [ (1, 'def', 1, 0), (1, 'f', 1, 4), (7, '(', 1, 5), (1, 'x', 1, 6), (8, ')', 1, 7), (11, ':', 1, 8), (4, '', 1, 9), (5, '', 2, -1), (1, 'return', 2, 4), (1, 'x', 2, 11), (14, '+', 2, 13), (2, '1', 2, 15), (4, '', 2, 16), (6, '', 2, -1), (4, '', 2, -1), (0, '', 2, -1), ] self.assertEqual(list(walk(st.totuple(line_info=True, col_info=True))), expected) self.assertEqual(list(walk(st.totuple())), [(t, n) for t, n, l, c in expected]) self.assertEqual(list(walk(st.totuple(line_info=True))), [(t, n, l) for t, n, l, c in expected]) self.assertEqual(list(walk(st.totuple(col_info=True))), [(t, n, c) for t, n, l, c in expected]) self.assertEqual(list(walk(st.tolist(line_info=True, col_info=True))), [list(x) for x in expected]) self.assertEqual( list(walk(parser.st2tuple(st, line_info=True, col_info=True))), expected) self.assertEqual( list(walk(parser.st2list(st, line_info=True, col_info=True))), [list(x) for x in expected])
def try_parse(self, src): # Make sure it ends in a newline src += '\n' # Ignore consistent indentation. if src.lstrip('\n').startswith(' '): src = textwrap.dedent(src) # Replace "..." by a mark which is also a valid python expression # (Note, the highlighter gets the original source, this is only done # to allow "..." in code and still highlight it as Python code.) mark = "__highlighting__ellipsis__" src = src.replace("...", mark) # lines beginning with "..." are probably placeholders for suite src = re.sub(r"(?m)^(\s*)" + mark + "(.)", r"\1" + mark + r"# \2", src) # if we're using 2.5, use the with statement if sys.version_info >= (2, 5): src = 'from __future__ import with_statement\n' + src if sys.version_info < (3, 0) and isinstance(src, unicode): # Non-ASCII chars will only occur in string literals # and comments. If we wanted to give them to the parser # correctly, we'd have to find out the correct source # encoding. Since it may not even be given in a snippet, # just replace all non-ASCII characters. src = src.encode('ascii', 'replace') if parser is None: return True try: parser.suite(src) except parsing_exceptions: return False else: return True
def test_copy_pickle(self): sts = [ parser.expr('2 + 3'), parser.suite('x = 2; y = x + 3'), parser.expr('list(x**3 for x in range(20))') ] for st in sts: st_copy = copy.copy(st) self.assertEqual(st_copy.totuple(), st.totuple()) st_copy = copy.deepcopy(st) self.assertEqual(st_copy.totuple(), st.totuple()) for proto in range(pickle.HIGHEST_PROTOCOL+1): st_copy = pickle.loads(pickle.dumps(st, proto)) self.assertEqual(st_copy.totuple(), st.totuple())
def depends_fromfile(self): """ Find module dependencies from the file. """ log.debug(self.name(), "finding dependencies from source", self.source()) modules = list() file = self.source() if file == None: modules source = open(file).read() try: ast = parser.suite(source) except parser.ParserError, msg: raise SyntaxError(file + ":" + str(msg))
def get_model_tree(model): # Get the source of the model's file try: source = inspect.getsource(model).replace("\r\n", "\n").replace( "\r", "\n") + "\n" except IOError: return None tree = STTree(parser.suite(source).totuple()) # Now, we have to find it for poss in tree.find("compound_stmt"): if poss.value[1][0] == symbol.classdef and \ poss.value[1][2][1].lower() == model.__name__.lower(): # This is the tree return poss
def __init__(self, filename, reclevel=0, maxreclevel=0): self.reclevel = reclevel self.maxreclevel = maxreclevel if reclevel > maxreclevel: raise ValueError() self.funcalls = set() self.classes = set() self.imports = set() self.file = filename self.name = os.path.basename(self.file.name).split('.')[0] code = self.file.read() ast = parser.suite(code) self.parse(ast.tolist())
def test_function(): expr = "ex[:,1:,1:] = k + ca_x[:,1:,1:] * ex[:,1:,1:]" \ "+ cb_y_x[:,1:,1:] * (hz[:,1:,1:] - hz[:,:-1,1:])"\ "- cb_z_x[:,1:,1:] * (hy[:,1:,1:] - hy[:,1:,:-1])" #ast = parser.suite('a = (b + c) * sin(d)') ast = parser.suite(expr) k = 1. ex = numpy.ones((1, 1, 1), dtype=numpy.float32) ca_x = numpy.ones((1, 1, 1), dtype=numpy.float32) cb_y_x = numpy.ones((1, 1, 1), dtype=numpy.float32) cb_z_x = numpy.ones((1, 1, 1), dtype=numpy.float32) hz = numpy.ones((1, 1, 1), dtype=numpy.float32) hy = numpy.ones((1, 1, 1), dtype=numpy.float32) blitz(expr)
def lint(self): print('Linting... ', end='') self.errors = [] if (self.code == None): with open(self.filename, 'rt', encoding="utf-8") as f: try: self.code = f.read() except e: msg = 'Error when trying to read file:\n' + str(e) expl = ("This usually means something got corrupted in " "your file\n\t\t\t and you should remove the " "corrupted portions or\n\t\t\t start a new file.") self.oops(msg, expl) raise _LintError(self.errors) if (self.code in [None, '']): self.oops('Could not read code from "%s"' % self.filename) raise _LintError(self.errors) self.lines = self.code.splitlines() self.st = parser.suite(self.code) self.stList = parser.st2list(self.st, line_info=True, col_info=True) self.astList = self.buildSimpleAST(self.stList, textOnly=False) self.astTextOnlyList = self.buildSimpleAST(self.stList, textOnly=True) # allow if...main() last line... if (self.astTextOnlyList[-1] in [[ 'if', ['__name__', '==', "'__main__'"], ':', ['main', ['(', ')']] ], [ 'if', ['(', ['__name__', '==', "'__main__'"], ')'], ':', ['main', ['(', ')']] ], [ 'if', ['__name__', '==', '"__main__"'], ':', ['main', ['(', ')']] ], [ 'if', ['(', ['__name__', '==', '"__main__"'], ')'], ':', ['main', ['(', ')']] ]]): # just remove it... self.astTextOnlyList.pop() self.astList.pop() # now do the actual linting... self.lintLineWidths() self.lintTopLevel() # just import, def, class, or if...main() self.lintAllLevels(self.astList) if (self.errors != []): raise _LintError(self.errors) print("Passed!")
def parseSource(sourcecode, indexbuff): """Parses python source code and puts the resulting index into the buffer. """ # Parse the source to an Abstract Syntax Tree ast = parser.suite(sourcecode) astlist = parser.ast2list(ast, True) # Set these globals before each file's AST is walked global sourcelinehassymbol sourcelinehassymbol = False global currentlinenum currentlinenum = 0 # Walk the AST to index the rest of the file walkAst(astlist, indexbuff)
def testGoodStreamBadPipe(self, ): import pprint orig_pprint = pprint.pprint def mockEpipe(obj, stm): e = IOError() e.errno = errno.EPIPE raise e pprint.pprint = mockEpipe try: res = dumpCst(parser.suite("a = 1"), StringIO()).getvalue() finally: pprint.pprint = orig_pprint self.assertEquals(res, "")
def test_check_keywords(self): import parser grammar = ''' @@keyword :: A start = {id}+ $ ; @name id = /\w+/ ; ''' model = genmodel('test', grammar) c = codegen(model) parser.suite(c) ast = model.parse('hello world') self.assertEqual(['hello', 'world'], ast) try: ast = model.parse("hello A world") self.assertEqual(['hello', 'A', 'world'], ast) self.fail('accepted keyword as name') except FailedSemantics: pass
def error_check(request): data = json.loads(request.body) code = data['code'] errors = [] # Note: only catches first SyntaxError in code # when user fixes this error, then it will catch the next one, and so on # could use linter, but that has false positives sometimes # since syntax errors often affect future code try: parser.suite(code) except SyntaxError as e: errors.append({ 'type': 'syntax', 'lineno': e.lineno, 'code': e.text, 'message': str(e) }) try: filter_errors = filter_code(code) errors.extend(filter_errors) except SyntaxError as e: pass if len(errors) > 0: return JsonResponse({ 'is_error': True, 'errors': errors }) return JsonResponse({ 'is_error': False })
def testChunk(t, fileName): global _numFailed print '----', fileName, try: st = parser.suite(t) tup = parser.st2tuple(st) # this discards the first ST; a huge memory savings when running # against a large source file like Tkinter.py. st = None new = parser.tuple2st(tup) except parser.ParserError, err: print print 'parser module raised exception on input file', fileName + ':' traceback.print_exc() _numFailed = _numFailed + 1
def __init__(self, pyFile): f = file(pyFile) fContent = f.read() f.close() # For some unknown reason, when an UTF-8 encoding is declared, parsing # does not work. if fContent.startswith(self.utf8prologue): fContent = fContent[len(self.utf8prologue):] fContent = fContent.replace('\r', '') ast = parser.suite(fContent).totuple() # Get all the classes defined within this module. self.classes = {} classNodes = AstMatcher.match(self.classPattern, ast) for node in classNodes: astClass = AstClass(node) self.classes[astClass.name] = astClass
def lint(self): print('Linting... ', end='') self.errors = [] if (self.code == None): with open(self.filename, 'rt') as f: try: self.code = f.read() except UnicodeDecodeError as e: self.oops('Non-Ascii Character in File:\n' + str(e)) raise _LintError(self.errors) if (self.code in [None, '']): self.oops('Could not read code from "%s"' % self.filename) raise _LintError(self.errors) self.lines = self.code.splitlines() self.st = parser.suite(self.code) self.stList = parser.st2list(self.st, line_info=True, col_info=True) self.astList = self.buildSimpleAST(self.stList, textOnly=False) self.astTextOnlyList = self.buildSimpleAST(self.stList, textOnly=True) # allow if...main() last line... if (self.astTextOnlyList[-1] in [[ 'if', ['__name__', '==', "'__main__'"], ':', ['main', ['(', ')']] ], [ 'if', ['(', ['__name__', '==', "'__main__'"], ')'], ':', ['main', ['(', ')']] ], [ 'if', ['__name__', '==', '"__main__"'], ':', ['main', ['(', ')']] ], [ 'if', ['(', ['__name__', '==', '"__main__"'], ')'], ':', ['main', ['(', ')']] ], ['main', ['(', ')']]]): # just remove it... self.astTextOnlyList.pop() self.astList.pop() # now do the actual linting... self.lintLineWidths() self.lintTopLevel() # just import, def, class, or if...main() self.lintAllLevels(self.astList) if (self.errors != []): raise _LintError(self.errors) print("Passed!")
def testGoodStreamIOError(self, ): import pprint orig_pprint = pprint.pprint def mockEpipe(obj, stm): e = IOError() e.errno = errno.ENOENT raise e pprint.pprint = mockEpipe try: dumpCst(parser.suite("a = 1"), StringIO()).getvalue() except IOError as e: assert e.errno == errno.ENOENT else: self.fail("Expected IOError raised") finally: pprint.pprint = orig_pprint
def testIssue0009(self): """ Verify dumpCst works on tuples. """ try: from cStringIO import StringIO except ImportError: from io import StringIO out = StringIO() import parser, sys cst = parser.suite("import sys\na = b\n") pycscope.dumpCst(cst.totuple(True), out) output = out.getvalue() if sys.hexversion < 0x03000000: expected = "['file_input',\n ['stmt',\n ['simple_stmt',\n ['small_stmt',\n ['import_stmt',\n ['import_name',\n ['NAME', 'import', 1],\n ['dotted_as_names',\n ['dotted_as_name', ['dotted_name', ['NAME', 'sys', 1]]]]]]],\n ['NEWLINE', '', 1]]],\n ['stmt',\n ['simple_stmt',\n ['small_stmt',\n ['expr_stmt',\n ['testlist',\n ['test',\n ['or_test',\n ['and_test',\n ['not_test',\n ['comparison',\n ['expr',\n ['xor_expr',\n ['and_expr',\n ['shift_expr',\n ['arith_expr',\n ['term',\n ['factor',\n ['power', ['atom', ['NAME', 'a', 2]]]]]]]]]]]]]]]],\n ['EQUAL', '=', 2],\n ['testlist',\n ['test',\n ['or_test',\n ['and_test',\n ['not_test',\n ['comparison',\n ['expr',\n ['xor_expr',\n ['and_expr',\n ['shift_expr',\n ['arith_expr',\n ['term',\n ['factor',\n ['power', ['atom', ['NAME', 'b', 2]]]]]]]]]]]]]]]]]],\n ['NEWLINE', '', 2]]],\n ['NEWLINE', '', 2],\n ['ENDMARKER', '', 2]]\n" else: expected = "['file_input',\n ['stmt',\n ['simple_stmt',\n ['small_stmt',\n ['import_stmt',\n ['import_name',\n ['NAME', 'import', 1],\n ['dotted_as_names',\n ['dotted_as_name', ['dotted_name', ['NAME', 'sys', 1]]]]]]],\n ['NEWLINE', '', 1]]],\n ['stmt',\n ['simple_stmt',\n ['small_stmt',\n ['expr_stmt',\n ['testlist_star_expr',\n ['test',\n ['or_test',\n ['and_test',\n ['not_test',\n ['comparison',\n ['expr',\n ['xor_expr',\n ['and_expr',\n ['shift_expr',\n ['arith_expr',\n ['term',\n ['factor',\n ['power', ['atom', ['NAME', 'a', 2]]]]]]]]]]]]]]]],\n ['EQUAL', '=', 2],\n ['testlist_star_expr',\n ['test',\n ['or_test',\n ['and_test',\n ['not_test',\n ['comparison',\n ['expr',\n ['xor_expr',\n ['and_expr',\n ['shift_expr',\n ['arith_expr',\n ['term',\n ['factor',\n ['power', ['atom', ['NAME', 'b', 2]]]]]]]]]]]]]]]]]],\n ['NEWLINE', '', 2]]],\n ['NEWLINE', '', 2],\n ['ENDMARKER', '', 2]]\n" print(repr(output)) self.assertEqual(output, expected)
def p_statement_action(t): 'statement : LABEL LABEL LPAREN STRING RPAREN' if t[1] != "action": print("Expected keyword action") return source_string = t[4].strip('"') try: st = pythonparser.suite(source_string) code = st.compile() label = t[2] if label in names: node = names[label] else: node = Node(TSTRING, "") names[label] = node node.code = code except: print("error in code fragment for %s" % t[2])
def __init__(self, source): if type(source) != Source: raise TypeError("'source' must be instance of Source class") self._source = source if source.source_type == SourceType.SUITE: self._st = parser.suite(source.source) elif source.source_type == SourceType.EXPR: self._st = parser.expr(source.source) else: raise ValueError("Dismantler cannot accept any undefined values") self._code = self._st.compile() self._tup = self._st.totuple() self._token_list = list() self._symbol_list = list() self._node_tree = self.__get_node_tree()
def find_executable_linenos(filename): """return a dict of the line numbers from executable statements in a file Works by finding all of the code-like objects in the module then searching the byte code for 'SET_LINENO' terms (so this won't work one -O files). """ import parser prog = open(filename).read() ast = parser.suite(prog) code = parser.compileast(ast, filename) # The only way I know to find line numbers is to look for the # SET_LINENO instructions. Isn't there some way to get it from # the AST? return _find_LINENO(code)
def _parse_expr(self, expr): # Parse expr and return the parsetree and alias. # If expr is just an expression, then alias will be None. # If expr is an assignment v=e then alias will be v, and # expr_parsetree will be the parsetree for e. # If the parse raises a syntax error, just let that be handled # by the regular Python compiler's error handler. # Raise an exception if the expression doesn't match either an expression # or a statement (this would happen if the expression consists of multiple # statements, which parses correctly so wouldn't be caught by the Python compiler). full_tree = parser.ast2tuple(parser.suite(expr)) same, vars = match(FULL_TREE_EXPRESSION, full_tree) if same: return (vars['expr'], None) same, vars = match(FULL_TREE_ASSIGNMENT, full_tree) if same: return (vars['expr'], vars['alias']) raise ValueError, "invalid expression (perhaps multiple statements?): " + expr
def generate_dot_code(python_file): """ generate_dot_code :param py_code: :return: """ with open(python_file) as source: ast_module_obj = parser.suite(source.read()) ast_obj_list = parser.st2list(ast_module_obj) # ast_module_obj = ast.parse(source.read(), python_file, mode='exec') # ast_module_obj_body = ast_module_obj.body ast_obj = ast_obj_list print('ast_list\n', repr(ast_module_obj)) print('ast_iter_tuple\n', ast.iter_fields(ast_module_obj)) # print('ast_body\n', ast_module_obj_body) print('ast_obj\n\n', repr(ast_obj)) # print('ast.iter_child_nodes\n', ast.iter_child_nodes(ast_obj)) # for b in ast.walk(ast_obj): # print('ast_obj\n', repr(b)) call_graph = {} construct_call_graph(ast_obj, call_graph) # pprint.pprint(call_graph) dot = [] dot.append("digraph G {") dot.append("rankdir=LR") for from_fn, to_fns in call_graph.items(): if not to_fns: dot.append('%s;' % from_fn) for to_fn in to_fns: if to_fn not in call_graph: continue dot.append('%s -> %s;' % (from_fn, to_fn)) dot.append("}") return '\n'.join(dot)
def gaussian_quadrature(function, a, b, polyorder): func_string = 'def temp_func(x):\n\treturn ' + function #input validation print(func_string) h = parser.suite(func_string).compile() exec(h) [Ws, xs, err] = GaussLegendreWeights(polyorder) if err == 0: ans = (b - a) * 0.5 * sum(Ws * temp_func((b - a) * 0.5 * xs + (b + a) * 0.5)) else: err = 1 ans = None return [ans, err]
def get_import_definition(manager, import_node: Node, mypy_file: MypyFile, line: int, column: int, path: str) -> Optional[Node]: # lines are 1 based, cols 0 based. with open(path) as file: code_lines: List[str] = file.readlines() if import_node.line == import_node.end_line: import_code = code_lines[import_node.line - 1][import_node.column:import_node.end_column] else: first_line = code_lines[import_node.line - 1][import_node.column:] intermediate_lines = ''.join( code_lines[import_node.line:import_node.end_line - 1]) last_line = code_lines[import_node.end_line - 1][:import_node.end_column] import_code = first_line + intermediate_lines + last_line try: suite = parser.suite(import_code).tolist(True, True) except SyntaxError: return None line_relative_to_import = line - import_node.line + 1 column_relative_to_import = column if line == import_node.line: column_relative_to_import -= import_node.column module_name, name = find_import_name(import_node, line_relative_to_import, column_relative_to_import, suite, mypy_file) if not module_name: return None module = manager.modules.get(module_name) if not module: return None if name: symbol_node = module.names.get(name) if symbol_node: return symbol_node.node # TODO: return file too else: return None else: return module
def test_full_expression(self): full_expr = "urbansim.gridcell.population" t = parser.ast2tuple(parser.suite(full_expr)) same1, vars1 = match(FULL_TREE_EXPRESSION, t) self.assert_(same1, msg="pattern did not match") expr_tree = vars1['expr'] same2, vars2 = match(EXPRESSION_IS_FULLY_QUALIFIED_VARIABLE, expr_tree) self.assert_(same2, msg="pattern did not match") self.assertEqual(len(vars2), 3, msg="wrong number of items in dictionary") self.assertEqual(vars2['package'], 'urbansim', msg="bad value in dictionary") self.assertEqual(vars2['dataset'], 'gridcell', msg="bad value in dictionary") self.assertEqual(vars2['shortname'], 'population', msg="bad value in dictionary")
def test_position(self): code = 'def f(x):\n return x + 1' st1 = parser.suite(code) st2 = st1.totuple(line_info=1, col_info=1) def walk(tree): node_type = tree[0] next = tree[1] if isinstance(next, tuple): for elt in tree[1:]: for x in walk(elt): yield x else: yield tree terminals = list(walk(st2)) self.assertEqual([(1, 'def', 1, 0), (1, 'f', 1, 4), (7, '(', 1, 5), (1, 'x', 1, 6), (8, ')', 1, 7), (11, ':', 1, 8), (4, '', 1, 9), (5, '', 2, -1), (1, 'return', 2, 4), (1, 'x', 2, 11), (14, '+', 2, 13), (2, '1', 2, 15), (4, '', 2, 16), (6, '', 2, -1), (4, '', 2, -1), (0, '', 2, -1)], terminals)
def load_python_parse_tree(code, filter_test=False, line_info=True, col_info=True): """ input a python code, return the parse tree. the tree is represented in list-tree. :param code: :param filter_test: filter the redundant nodes in parse tree. The redundant nodes are introduced by the grammar. :param line_info: :param col_info: :return: """ try: st_obj = parser.suite(code) except Exception as e: return None st_list = parser.st2list(st_obj, line_info=line_info, col_info=col_info) if filter_test: st_list = construct_filter_list_tree(st_list) # print_list_tree(st_list) return st_list
def _parseconf(confstr): """ Parse the configuration *confstr* string and remove anything else than the supported constructs, which are: Assignments, bool, dict list, string, float, bool, and, or, xor, arithmetics, string expressions and if..then..else. The *entire* statement containing the unsupported statement is removed from the parser; the effect is that the whole expression is ignored from the 'root' down. The modified AST object is returned to the Python parser for evaluation. """ # Initialise the parse tree, convert to list format and get a list of # the symbol ID's for the unwanted statements. Might raise SyntaxError. ast = parser.suite(confstr) #ast = parser.expr(confstr) stmts = parser.ast2list(ast) rmsym = _get_forbidden_symbols() result = list() # copy 256: 'single_input', 257: 'file_input' or 258: 'eval_input'. The # parse tree must begin with one of these to compile back to an AST obj. result.append(stmts[0]) # NOTE: This might be improved with reduce(...) builtin!? How can we get # line number for better warnings? for i in range(1, len(stmts)): # censor the parse tree produced from parsing the configuration. if _check_ast(stmts[i], rmsym): result.append(stmts[i]) else: pass return parser.sequence2ast(result)
def rewrite_and_compile(code, output_func_name=None, output_func_self=None, print_func_name=None, encoding="utf8"): """ Compiles the supplied text into code, while rewriting the parse tree so: * Print statements without a destination file are transformed into calls to <print_func_name>(*args), if print_func_name is not None * Statements which are simply expressions are transformed into calls to <output_func_name>(*args), if output_fnuc_name is not None (More than one argument is passed if the statement is in the form of a list; for example '1,2'.) At the same time, the code is scanned for possible mutations, and a list is returned. In the list: * A string indicates the mutation of a variable by assignment to a slice of it, or to an attribute. * A tuple of (variable_name, method_name) indicates the invocation of a method on the variable; this will sometimes be a mutation (e.g., list.append(value)), and sometimes not. """ state = _RewriteState(output_func_name=output_func_name, output_func_self=output_func_self, print_func_name=print_func_name) if (isinstance(code, unicode)): code = code.encode("utf8") encoding = "utf8" original = parser.suite(code) rewritten = _rewrite_file_input(original.totuple(), state) encoded = (symbol.encoding_decl, rewritten, encoding) compiled = parser.sequence2ast(encoded).compile() return (compiled, state.mutated)
def __init__(self, tree, name, file): """ The code can be a string (in which case it is parsed), or it can be in parse tree form already. """ self.name = name self.file = file self.class_info = {} self.function_info = {} self.assign_info = {} self.derivs = {} if isinstance(tree, types.StringType): try: tree = parser.suite(tree + "\n").totuple() if (tree): found, vars = self.match(DOCSTRING_STMT_PATTERN, tree[1]) if found: self.docstring = vars["docstring"] except: print "CAUTION --- Parse failed: " + name if isinstance(tree, types.TupleType): self.extract_info(tree)