def is_async_fn(node): code_node = node[0][0] for n in node[0]: if hasattr(n, "attr") and iscode(n.attr): code_node = n break pass pass is_code = hasattr(code_node, "attr") and iscode(code_node.attr) return is_code and co_flags_is_async(code_node.attr.co_flags)
def is_async_fn(node): code_node = node[0][0] for n in node[0]: if hasattr(n, "attr") and iscode(n.attr): code_node = n break pass pass is_code = hasattr(code_node, "attr") and iscode(code_node.attr) return is_code and (code_node.attr.co_flags & (COMPILER_FLAG_BIT["COROUTINE"] | COMPILER_FLAG_BIT["ITERABLE_COROUTINE"] | COMPILER_FLAG_BIT["ASYNC_GENERATOR"]))
def uncompyle(version, co, out=None, showasm=False, showast=False, timestamp=None, showgrammar=False, code_objects={}): """ disassembles and deparses a given code block 'co' """ assert iscode(co) # store final output stream for case of error real_out = out or sys.stdout print('# Python bytecode %s (decompiled from Python %s)' % (version, PYTHON_VERSION), file=real_out) if co.co_filename: print('# Embedded file name: %s' % co.co_filename, file=real_out) if timestamp: print('# Compiled at: %s' % datetime.datetime.fromtimestamp(timestamp), file=real_out) try: pysource.deparse_code(version, co, out, showasm, showast, showgrammar, code_objects=code_objects) except pysource.SourceWalkerError as e: # deparsing failed print("\n") print(co.co_filename) if real_out != out: print("\n", file=real_out) print(e, file=real_out)
def disco_loop(disasm, queue, real_out): while len(queue) > 0: co = queue.popleft() if co.co_name != '<module>': print('\n# %s line %d of %s' % (co.co_name, co.co_firstlineno, co.co_filename), file=real_out) tokens, customize = disasm(co) for t in tokens: if iscode(t.pattr): queue.append(t.pattr) elif iscode(t.attr): queue.append(t.attr) print(t, file=real_out) pass pass
def n_function_def(node): code_node = node[0][0] for n in node[0]: if hasattr(n, 'attr') and iscode(n.attr): code_node = n break pass pass is_code = hasattr(code_node, 'attr') and iscode(code_node.attr) if (is_code and (code_node.attr.co_flags & COMPILER_FLAG_BIT['COROUTINE'])): self.template_engine(('\n\n%|async def %c\n', -2), node) else: self.template_engine(('\n\n%|def %c\n', -2), node) self.prune()
def disco_loop(opc, version, queue, real_out, dup_lines=False, show_bytes=False): """Disassembles a queue of code objects. If we discover another code object which will be found in co_consts, we add the new code to the list. Note that the order of code discovery is in the order of first encountered which is not amenable for the format used by a disassembler where code objects should be defined before using them in other functions. However this is not recursive and will overall lead to less memory consumption at run time. """ while len(queue) > 0: co = queue.popleft() if co.co_name not in ("<module>", "?"): real_out.write("\n" + format_code_info(co, version) + "\n") bytecode = Bytecode(co, opc, dup_lines=dup_lines) real_out.write(bytecode.dis(show_bytes=show_bytes) + "\n") for c in co.co_consts: if iscode(c): queue.append(c) pass pass
def disco(bytecode_version, co, timestamp, out=sys.stdout, is_pypy=False, magic_int=None, source_size=None, header=True, asm_format=False, show_bytes=False, dup_lines=False): """ diassembles and deparses a given code block 'co' """ assert iscode(co) show_module_header(bytecode_version, co, timestamp, out, is_pypy, magic_int, source_size, header, show_filename=False) # store final output stream for case of error real_out = out or sys.stdout if co.co_filename and not asm_format: real_out.write(format_code_info(co, bytecode_version) + "\n") pass opc = get_opcode(bytecode_version, is_pypy) if asm_format: disco_loop_asm_format(opc, bytecode_version, co, real_out, {}, set([])) else: queue = deque([co]) disco_loop(opc, bytecode_version, queue, real_out, show_bytes=show_bytes)
def python_parser(version, co, out=sys.stdout, showasm=False, parser_debug=PARSER_DEFAULT_DEBUG, is_pypy=False): """ Parse a code object to an abstract syntax tree representation. :param version: The python version this code is from as a float, for example 2.6, 2.7, 3.2, 3.3, 3.4, 3.5 etc. :param co: The code object to parse. :param out: File like object to write the output to. :param showasm: Flag which determines whether the disassembled and ingested code is written to sys.stdout or not. :param parser_debug: dict containing debug flags for the spark parser. :return: Abstract syntax tree representation of the code object. """ assert iscode(co) from uncompyle6.scanner import get_scanner scanner = get_scanner(version, is_pypy) tokens, customize = scanner.ingest(co) maybe_show_asm(showasm, tokens) # For heavy grammar debugging # parser_debug = {'rules': True, 'transition': True, 'reduce' : True, # 'showstack': 'full'} p = get_python_parser(version, parser_debug) return parse(p, tokens, customize)
def disco_loop(opc, version, queue, real_out, dup_lines=False, show_bytes=False): """Disassembles a queue of code objects. If we discover another code object which will be found in co_consts, we add the new code to the list. Note that the order of code discovery is in the order of first encountered which is not amenable for the format used by a disassembler where code objects should be defined before using them in other functions. However this is not recursive and will overall lead to less memory consumption at run time. """ while len(queue) > 0: co = queue.popleft() if co.co_name not in ('<module>', '?'): real_out.write("\n" + format_code_info(co, version) + "\n") bytecode = Bytecode(co, opc, dup_lines=dup_lines) real_out.write(bytecode.dis(show_bytes=show_bytes) + "\n") for c in co.co_consts: if iscode(c): queue.append(c) pass pass
def uncompyle( version, co, out=None, showasm=False, showast=False, timestamp=None, showgrammar=False, code_objects={}, is_pypy=False, magic_int=None): """ disassembles and deparses a given code block 'co' """ assert iscode(co) # store final output stream for case of error real_out = out or sys.stdout co_pypy_str = 'PyPy ' if is_pypy else '' run_pypy_str = 'PyPy ' if IS_PYPY else '' print('# %sPython bytecode %s%s disassembled from %sPython %s' % (co_pypy_str, version, " (%d)" % magic_int if magic_int else "", run_pypy_str, PYTHON_VERSION), file=real_out) if co.co_filename: print('# Embedded file name: %s' % co.co_filename, file=real_out) if timestamp: print('# Compiled at: %s' % datetime.datetime.fromtimestamp(timestamp), file=real_out) try: pysource.deparse_code(version, co, out, showasm, showast, showgrammar, code_objects=code_objects, is_pypy=is_pypy) except pysource.SourceWalkerError as e: # deparsing failed print("\n") print(co.co_filename) if real_out != out: print("\n", file=real_out) print(e, file=real_out)
def find_code_node(node, start: int): for i in range(-start, len(node) + 1): if node[-i].kind == "LOAD_CODE": code_node = node[-i] assert iscode(code_node.attr) return code_node pass assert False, "did not find code node starting at %d in %s" % (start, node)
def align_deparse_code(version, co, out=sys.stderr, showasm=False, showast=False, showgrammar=False, code_objects={}, compile_mode='exec', is_pypy=False): """ ingests and deparses a given code block 'co' """ assert iscode(co) # store final output stream for case of error scanner = get_scanner(version, is_pypy=is_pypy) tokens, customize = scanner.ingest(co, code_objects=code_objects) maybe_show_asm(showasm, tokens) debug_parser = dict(PARSER_DEFAULT_DEBUG) if showgrammar: debug_parser['reduce'] = showgrammar debug_parser['errorstack'] = True # Build AST from disassembly. deparsed = AligningWalker(version, scanner, out, showast=showast, debug_parser=debug_parser, compile_mode=compile_mode, is_pypy = is_pypy) isTopLevel = co.co_name == '<module>' deparsed.ast = deparsed.build_ast(tokens, customize, isTopLevel=isTopLevel) assert deparsed.ast == 'stmts', 'Should have parsed grammar start' del tokens # save memory deparsed.mod_globs = find_globals(deparsed.ast, set()) # convert leading '__doc__ = "..." into doc string try: if deparsed.ast[0][0] == ASSIGN_DOC_STRING(co.co_consts[0]): deparsed.print_docstring('', co.co_consts[0]) del deparsed.ast[0] if deparsed.ast[-1] == RETURN_NONE: deparsed.ast.pop() # remove last node # todo: if empty, add 'pass' except: pass # What we've been waiting for: Generate source from AST! deparsed.gen_source(deparsed.ast, co.co_name, customize) for g in deparsed.mod_globs: deparsed.write('# global %s ## Warning: Unused global' % g) if deparsed.ERROR: raise SourceWalkerError("Deparsing stopped due to parse error") return deparsed
def n_function_def(node): if self.version >= 3.6: code_node = node[0][0] else: code_node = node[0][1] is_code = hasattr(code_node, 'attr') and iscode(code_node.attr) if (is_code and (code_node.attr.co_flags & COMPILER_FLAG_BIT['COROUTINE'])): self.template_engine(('\n\n%|async def %c\n', -2), node) else: self.template_engine(('\n\n%|def %c\n', -2), node) self.prune()
def number_loop(queue, mappings, opc): while len(queue) > 0: code1 = queue.popleft() code2 = queue.popleft() assert code1.co_name == code2.co_name linestarts_orig = findlinestarts(code1) linestarts_uncompiled = list(findlinestarts(code2)) mappings += [[line, offset2line(offset, linestarts_uncompiled)] for offset, line in linestarts_orig] bytecode1 = Bytecode(code1, opc) bytecode2 = Bytecode(code2, opc) instr2s = bytecode2.get_instructions(code2) seen = set([code1.co_name]) for instr in bytecode1.get_instructions(code1): next_code1 = None if iscode(instr.argval): next_code1 = instr.argval if next_code1: next_code2 = None while not next_code2: try: instr2 = next(instr2s) if iscode(instr2.argval): next_code2 = instr2.argval pass except StopIteration: break pass if next_code2: assert next_code1.co_name == next_code2.co_name if next_code1.co_name not in seen: seen.add(next_code1.co_name) queue.append(next_code1) queue.append(next_code2) pass pass pass pass
def disco( bytecode_version, co, timestamp, out=sys.stdout, is_pypy=False, magic_int=None, source_size=None, header=True, asm_format=False, show_bytes=False, dup_lines=False, ): """ diassembles and deparses a given code block 'co' """ assert iscode(co) show_module_header( bytecode_version, co, timestamp, out, is_pypy, magic_int, source_size, header, show_filename=False, ) # store final output stream for case of error real_out = out or sys.stdout if co.co_filename and not asm_format: real_out.write(format_code_info(co, bytecode_version) + "\n") pass opc = get_opcode(bytecode_version, is_pypy) if asm_format: disco_loop_asm_format(opc, bytecode_version, co, real_out, {}, set([])) else: queue = deque([co]) disco_loop(opc, bytecode_version, queue, real_out, show_bytes=show_bytes)
def n_function_def(node): n0 = node[0] is_code = False for i in list(range(len(n0) - 2, -1, -1)): code_node = n0[i] if hasattr(code_node, 'attr') and iscode(code_node.attr): is_code = True break if (is_code and (code_node.attr.co_flags & COMPILER_FLAG_BIT['COROUTINE'])): self.template_engine(('\n\n%|async def %c\n', -2), node) else: self.template_engine(('\n\n%|def %c\n', -2), node) self.prune()
def disco_loop_asm_format(opc, version, co, real_out): """Produces disassembly in a format more conducive to automatic assembly by producing inner modules before they are used by outer ones. Since this is recusive, we'll use more stack space at runtime. """ for c in co.co_consts: if iscode(c): disco_loop_asm_format(opc, version, c, real_out) pass if co.co_name != '<module>' or co.co_filename: real_out.write("\n" + format_code_info(co, version) + "\n") bytecode = Bytecode(co, opc) real_out.write(bytecode.dis(asm_format=True) + "\n")
def decompile(bytecode_version, co, out=None, showasm=None, showast=False, timestamp=None, showgrammar=False, code_objects={}, source_size=None, is_pypy=False, magic_int=None): """ ingests and deparses a given code block 'co' """ assert iscode(co) # store final output stream for case of error real_out = out or sys.stdout co_pypy_str = 'PyPy ' if is_pypy else '' run_pypy_str = 'PyPy ' if IS_PYPY else '' print('# uncompyle6 version %s\n' '# %sPython bytecode %s%s\n# Decompiled from: %sPython %s' % (VERSION, co_pypy_str, bytecode_version, " (%d)" % magic_int if magic_int else "", run_pypy_str, '\n# '.join( sys.version.split('\n'))), file=real_out) if co.co_filename: print('# Embedded file name: %s' % co.co_filename, file=real_out) if timestamp: print('# Compiled at: %s' % datetime.datetime.fromtimestamp(timestamp), file=real_out) if source_size: print('# Size of source mod 2**32: %d bytes' % source_size, file=real_out) try: pysource.deparse_code(bytecode_version, co, out, showasm, showast, showgrammar, code_objects=code_objects, is_pypy=is_pypy) except pysource.SourceWalkerError as e: # deparsing failed raise pysource.SourceWalkerError(str(e))
def disco(version, co, out=None, is_pypy=False): """ diassembles and deparses a given code block 'co' """ assert iscode(co) # store final output stream for case of error real_out = out or sys.stdout print('# Python %s' % version, file=real_out) if co.co_filename: print('# Embedded file name: %s' % co.co_filename, file=real_out) scanner = get_scanner(version, is_pypy=is_pypy) queue = deque([co]) disco_loop(scanner.ingest, queue, real_out)
def disco(bytecode_version, co, timestamp, out=sys.stdout, is_pypy=False, magic_int=None, source_size=None, header=True, asm_format=False, dup_lines=False): """ diassembles and deparses a given code block 'co' """ assert iscode(co) # store final output stream for case of error real_out = out or sys.stdout co_pypy_str = 'PyPy ' if is_pypy else '' run_pypy_str = 'PyPy ' if IS_PYPY else '' if header: real_out.write(('# pydisasm version %s\n# %sPython bytecode %s%s' '\n# Disassembled from %sPython %s\n') % (VERSION, co_pypy_str, bytecode_version, " (%d)" % magic_int if magic_int else "", run_pypy_str, '\n# '.join(sys.version.split('\n')))) if timestamp > 0: value = datetime.datetime.fromtimestamp(timestamp) real_out.write('# Timestamp in code: %d' % timestamp) real_out.write(value.strftime(' (%Y-%m-%d %H:%M:%S)\n')) if source_size: real_out.write('# Source code size mod 2**32: %d bytes\n' % source_size) if co.co_filename and not asm_format: real_out.write(format_code_info(co, bytecode_version) + "\n") pass opc = get_opcode(bytecode_version, is_pypy) if asm_format: disco_loop_asm_format(opc, bytecode_version, co, real_out) else: queue = deque([co]) disco_loop(opc, bytecode_version, queue, real_out)
def disco(version, co, out=None, is_pypy=False): """ diassembles and deparses a given code block 'co' """ assert iscode(co) # store final output stream for case of error real_out = out or sys.stdout print('# Python %s' % version, file=real_out) if co.co_filename: print('# Embedded file name: %s' % co.co_filename, file=real_out) scanner = get_scanner(version, is_pypy=is_pypy) queue = deque([co]) disco_loop(scanner.disassemble, queue, real_out)
def disco(bytecode_version, co, timestamp, out=sys.stdout, is_pypy=False, magic_int=None, source_size=None, header=True, asm_format=False): """ diassembles and deparses a given code block 'co' """ assert iscode(co) # store final output stream for case of error real_out = out or sys.stdout co_pypy_str = 'PyPy ' if is_pypy else '' run_pypy_str = 'PyPy ' if IS_PYPY else '' if header: real_out.write(('# pydisasm version %s\n# %sPython bytecode %s%s' '\n# Disassembled from %sPython %s\n') % (VERSION, co_pypy_str, bytecode_version, " (%d)" % magic_int if magic_int else "", run_pypy_str, '\n# '.join(sys.version.split('\n')))) if timestamp > 0: value = datetime.datetime.fromtimestamp(timestamp) real_out.write('# Timestamp in code: %d' % timestamp) real_out.write(value.strftime(' (%Y-%m-%d %H:%M:%S)\n') ) if source_size: real_out.write('# Source code size mod 2**32: %d bytes\n' % source_size) if co.co_filename and not asm_format: real_out.write(format_code_info(co, bytecode_version) + "\n") pass opc = get_opcode(bytecode_version, is_pypy) if asm_format: disco_loop_asm_format(opc, bytecode_version, co, real_out) else: queue = deque([co]) disco_loop(opc, bytecode_version, queue, real_out)
def decompile( bytecode_version, co, out=None, showasm=None, showast=False, timestamp=None, showgrammar=False, code_objects={}, source_size=None, is_pypy=False, magic_int=None): """ ingests and deparses a given code block 'co' """ assert iscode(co) # store final output stream for case of error real_out = out or sys.stdout co_pypy_str = 'PyPy ' if is_pypy else '' run_pypy_str = 'PyPy ' if IS_PYPY else '' print('# uncompyle6 version %s\n' '# %sPython bytecode %s%s\n# Decompiled from: %sPython %s' % (VERSION, co_pypy_str, bytecode_version, " (%d)" % magic_int if magic_int else "", run_pypy_str, '\n# '.join(sys.version.split('\n'))), file=real_out) if co.co_filename: print('# Embedded file name: %s' % co.co_filename, file=real_out) if timestamp: print('# Compiled at: %s' % datetime.datetime.fromtimestamp(timestamp), file=real_out) if source_size: print('# Size of source mod 2**32: %d bytes' % source_size, file=real_out) try: pysource.deparse_code(bytecode_version, co, out, showasm, showast, showgrammar, code_objects=code_objects, is_pypy=is_pypy) except pysource.SourceWalkerError as e: # deparsing failed raise pysource.SourceWalkerError(str(e))
def make_function3(self, node, isLambda, nested=1, codeNode=None): """Dump function definition, doc string, and function body in Python version 3.0 and above """ # For Python 3.3, the evaluation stack in MAKE_FUNCTION is: # * default argument objects in positional order # * pairs of name and default argument, with the name just below # the object on the stack, for keyword-only parameters # * parameter annotation objects # * a tuple listing the parameter names for the annotations # (only if there are ony annotation objects) # * the code associated with the function (at TOS1) # * the qualified name of the function (at TOS) # For Python 3.0 .. 3.2 the evaluation stack is: # The function object is defined to have argc default parameters, # which are found below TOS. # * first come positional args in the order they are given in the source, # * next come the keyword args in the order they given in the source, # * finally is the code associated with the function (at TOS) # # Note: There is no qualified name at TOS # MAKE_CLOSURE adds an additional closure slot # Thank you, Python, for a such a well-thought out system that has # changed 4 or so times. def build_param(ast, name, default): """build parameters: - handle defaults - handle format tuple parameters """ if default: value = self.traverse(default, indent='') maybe_show_ast_param_default(self.showast, name, value) result = '%s=%s' % (name, value) if result[-2:] == '= ': # default was 'LOAD_CONST None' result += 'None' return result else: return name # MAKE_FUNCTION_... or MAKE_CLOSURE_... assert node[-1].type.startswith('MAKE_') # Python 3.3+ adds a qualified name at TOS (-1) # moving down the LOAD_LAMBDA instruction if 3.0 <= self.version <= 3.2: lambda_index = -2 elif 3.03 <= self.version: lambda_index = -3 else: lambda_index = None args_node = node[-1] if isinstance(args_node.attr, tuple): pos_args, kw_args, annotate_argc = args_node.attr if self.version <= 3.3 and len( node) > 2 and node[lambda_index] != 'LOAD_LAMBDA': # args are after kwargs; kwargs are bundled as one node defparams = node[1:args_node.attr[0] + 1] else: # args are before kwargs; kwags as bundled as one node defparams = node[:args_node.attr[0]] else: if self.version < 3.6: defparams = node[:args_node.attr] else: default, kw, annotate, closure = args_node.attr # FIXME: start here for Python 3.6 and above: defparams = [] # if default: # defparams = node[-(2 + kw + annotate + closure)] # else: # defparams = [] kw_args = 0 pass if lambda_index and isLambda and iscode(node[lambda_index].attr): assert node[lambda_index].type == 'LOAD_LAMBDA' code = node[lambda_index].attr else: code = codeNode.attr assert iscode(code) code = Code(code, self.scanner, self.currentclass) # add defaults values to parameter names argc = code.co_argcount paramnames = list(code.co_varnames[:argc]) # defaults are for last n parameters, thus reverse if not 3.0 <= self.version <= 3.1: paramnames.reverse() defparams.reverse() try: ast = self.build_ast(code._tokens, code._customize, isLambda=isLambda, noneInNames=('None' in code.co_names)) except ParserError as p: self.write(str(p)) self.ERROR = p return kw_pairs = args_node.attr[1] if self.version >= 3.0 else 0 # build parameters params = [ build_param(ast, name, d) for name, d in zip_longest(paramnames, defparams, fillvalue=None) ] if not 3.0 <= self.version <= 3.1: params.reverse() # back to correct order if code_has_star_arg(code): if self.version > 3.0: params.append('*%s' % code.co_varnames[argc + kw_pairs]) else: params.append('*%s' % code.co_varnames[argc]) argc += 1 # dump parameter list (with default values) if isLambda: self.write("lambda ", ", ".join(params)) else: self.write("(", ", ".join(params)) # self.println(indent, '#flags:\t', int(code.co_flags)) if kw_args > 0: if not (4 & code.co_flags): if argc > 0: self.write(", *, ") else: self.write("*, ") pass else: self.write(", ") if not 3.0 <= self.version <= 3.2: for n in node: if n == 'pos_arg': continue elif self.version >= 3.4 and not (n.type in ('kwargs', 'kwarg')): continue else: self.preorder(n) break else: kwargs = node[0] last = len(kwargs) - 1 i = 0 for n in node[0]: if n == 'kwarg': self.write('%s=' % n[0].pattr) self.preorder(n[1]) if i < last: self.write(', ') i += 1 pass pass pass pass if code_has_star_star_arg(code): if argc > 0: self.write(', ') self.write('**%s' % code.co_varnames[argc + kw_pairs]) if isLambda: self.write(": ") else: self.println("):") if len(code.co_consts ) > 0 and code.co_consts[0] is not None and not isLambda: # ugly # docstring exists, dump it print_docstring(self, self.indent, code.co_consts[0]) code._tokens = None # save memory assert ast == 'stmts' all_globals = find_all_globals(ast, set()) for g in ((all_globals & self.mod_globs) | find_globals(ast, set())): self.println(self.indent, 'global ', g) self.mod_globs -= all_globals has_none = 'None' in code.co_names rn = has_none and not find_none(ast) self.gen_source(ast, code.co_name, code._customize, isLambda=isLambda, returnNone=rn) code._tokens = None code._customize = None # save memory
def make_function2(self, node, isLambda, nested=1, codeNode=None): """ Dump function defintion, doc string, and function body. This code is specialied for Python 2. """ # FIXME: call make_function3 if we are self.version >= 3.0 # and then simplify the below. def build_param(ast, name, default): """build parameters: - handle defaults - handle format tuple parameters """ # if formal parameter is a tuple, the paramater name # starts with a dot (eg. '.1', '.2') if name.startswith('.'): # replace the name with the tuple-string name = self.get_tuple_parameter(ast, name) pass if default: value = self.traverse(default, indent='') maybe_show_ast_param_default(self.showast, name, value) result = '%s=%s' % (name, value) if result[-2:] == '= ': # default was 'LOAD_CONST None' result += 'None' return result else: return name # MAKE_FUNCTION_... or MAKE_CLOSURE_... assert node[-1].type.startswith('MAKE_') args_node = node[-1] if isinstance(args_node.attr, tuple): # positional args are after kwargs defparams = node[1:args_node.attr[0] + 1] pos_args, kw_args, annotate_argc = args_node.attr else: defparams = node[:args_node.attr] kw_args = 0 annotate_argc = 0 pass lambda_index = None if lambda_index and isLambda and iscode(node[lambda_index].attr): assert node[lambda_index].type == 'LOAD_LAMBDA' code = node[lambda_index].attr else: code = codeNode.attr assert iscode(code) code = Code(code, self.scanner, self.currentclass) # add defaults values to parameter names argc = code.co_argcount paramnames = list(code.co_varnames[:argc]) # defaults are for last n parameters, thus reverse paramnames.reverse() defparams.reverse() try: ast = self.build_ast(code._tokens, code._customize, isLambda=isLambda, noneInNames=('None' in code.co_names)) except ParserError as p: self.write(str(p)) self.ERROR = p return kw_pairs = args_node.attr[1] if self.version >= 3.0 else 0 indent = self.indent # build parameters params = [ build_param(ast, name, default) for name, default in zip_longest(paramnames, defparams, fillvalue=None) ] params.reverse() # back to correct order if code_has_star_arg(code): params.append('*%s' % code.co_varnames[argc]) argc += 1 # dump parameter list (with default values) if isLambda: self.write("lambda ", ", ".join(params)) else: self.write("(", ", ".join(params)) if kw_args > 0: if not (4 & code.co_flags): if argc > 0: self.write(", *, ") else: self.write("*, ") pass else: self.write(", ") for n in node: if n == 'pos_arg': continue else: self.preorder(n) break pass if code_has_star_star_arg(code): if argc > 0: self.write(', ') if argc + kw_pairs > 0: self.write('**%s' % code.co_varnames[argc + kw_pairs]) if isLambda: self.write(": ") else: self.println("):") if len(code.co_consts ) > 0 and code.co_consts[0] is not None and not isLambda: # ugly # docstring exists, dump it print_docstring(self, indent, code.co_consts[0]) code._tokens = None # save memory assert ast == 'stmts' all_globals = find_all_globals(ast, set()) for g in ((all_globals & self.mod_globs) | find_globals(ast, set())): self.println(self.indent, 'global ', g) self.mod_globs -= all_globals has_none = 'None' in code.co_names rn = has_none and not find_none(ast) self.gen_source(ast, code.co_name, code._customize, isLambda=isLambda, returnNone=rn) code._tokens = None code._customize = None # save memory
def ingest(self, co, classname=None, code_objects={}, show_asm=None): """ Pick out tokens from an uncompyle6 code object, and transform them, returning a list of uncompyle6 'Token's. The transformations are made to assist the deparsing grammar. Specificially: - various types of LOAD_CONST's are categorized in terms of what they load - COME_FROM instructions are added to assist parsing control structures - MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments Also, when we encounter certain tokens, we add them to a set which will cause custom grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST cause specific rules for the specific number of arguments they take. """ show_asm = self.show_asm if not show_asm else show_asm # show_asm = 'after' if show_asm in ('both', 'before'): from xdis.bytecode import Bytecode bytecode = Bytecode(co, self.opc) for instr in bytecode.get_instructions(co): print(instr._disassemble()) # Container for tokens tokens = [] customize = {} if self.is_pypy: customize['PyPy'] = 1 Token = self.Token # shortcut n = self.setup_code(co) self.build_lines_data(co, n) self.build_prev_op(n) free, names, varnames = self.unmangle_code_names(co, classname) self.names = names # Scan for assertions. Later we will # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'. # 'LOAD_ASSERT' is used in assert statements. self.load_asserts = set() for i in self.op_range(0, n): # We need to detect the difference between: # raise AssertionError # and # assert ... # Below we use the heuristic that it is preceded by a POP_JUMP. # however we could also use followed by RAISE_VARARGS # or for PyPy there may be a JUMP_IF_NOT_DEBUG before. # FIXME: remove uses of PJIF, and PJIT if self.is_pypy: have_pop_jump = self.code[i] in (self.opc.PJIF, self.opc.PJIT) else: have_pop_jump = self.code[i] == self.opc.PJIT if have_pop_jump and self.code[i + 3] == self.opc.LOAD_GLOBAL: if names[self.get_argument(i + 3)] == 'AssertionError': self.load_asserts.add(i + 3) jump_targets = self.find_jump_targets(show_asm) # contains (code, [addrRefToCode]) last_stmt = self.next_stmt[0] i = self.next_stmt[last_stmt] replace = {} while i < n - 1: if self.lines[last_stmt].next > i: # Distinguish "print ..." from "print ...," if self.code[last_stmt] == self.opc.PRINT_ITEM: if self.code[i] == self.opc.PRINT_ITEM: replace[i] = 'PRINT_ITEM_CONT' elif self.code[i] == self.opc.PRINT_NEWLINE: replace[i] = 'PRINT_NEWLINE_CONT' last_stmt = i i = self.next_stmt[i] extended_arg = 0 for offset in self.op_range(0, n): if offset in jump_targets: jump_idx = 0 # We want to process COME_FROMs to the same offset to be in *descending* # offset order so we have the larger range or biggest instruction interval # last. (I think they are sorted in increasing order, but for safety # we sort them). That way, specific COME_FROM tags will match up # properly. For example, a "loop" with an "if" nested in it should have the # "loop" tag last so the grammar rule matches that properly. # last_offset = -1 for jump_offset in sorted(jump_targets[offset], reverse=True): # if jump_offset == last_offset: # continue # last_offset = jump_offset come_from_name = 'COME_FROM' op_name = self.opc.opname[self.code[jump_offset]] if op_name.startswith('SETUP_') and self.version == 2.7: come_from_type = op_name[len('SETUP_'):] if come_from_type not in ('LOOP', 'EXCEPT'): come_from_name = 'COME_FROM_%s' % come_from_type pass tokens.append( Token(come_from_name, None, repr(jump_offset), offset="%s_%d" % (offset, jump_idx), has_arg=True)) jump_idx += 1 op = self.code[offset] op_name = self.opc.opname[op] oparg = None pattr = None has_arg = op_has_argument(op, self.opc) if has_arg: oparg = self.get_argument(offset) + extended_arg extended_arg = 0 if op == self.opc.EXTENDED_ARG: extended_arg = oparg * scan.L65536 continue if op in self.opc.hasconst: const = co.co_consts[oparg] if iscode(const): oparg = const if const.co_name == '<lambda>': assert op_name == 'LOAD_CONST' op_name = 'LOAD_LAMBDA' elif const.co_name == '<genexpr>': op_name = 'LOAD_GENEXPR' elif const.co_name == '<dictcomp>': op_name = 'LOAD_DICTCOMP' elif const.co_name == '<setcomp>': op_name = 'LOAD_SETCOMP' # verify() uses 'pattr' for comparison, since 'attr' # now holds Code(const) and thus can not be used # for comparison (todo: think about changing this) # pattr = 'code_object @ 0x%x %s->%s' %\ # (id(const), const.co_filename, const.co_name) pattr = '<code_object ' + const.co_name + '>' else: pattr = const elif op in self.opc.hasname: pattr = names[oparg] elif op in self.opc.hasjrel: # use instead: hasattr(self, 'patch_continue'): ? if self.version == 2.7: self.patch_continue(tokens, offset, op) pattr = repr(offset + 3 + oparg) elif op in self.opc.hasjabs: # use instead: hasattr(self, 'patch_continue'): ? if self.version == 2.7: self.patch_continue(tokens, offset, op) pattr = repr(oparg) elif op in self.opc.haslocal: pattr = varnames[oparg] elif op in self.opc.hascompare: pattr = self.opc.cmp_op[oparg] elif op in self.opc.hasfree: pattr = free[oparg] if op in self.varargs_ops: # CE - Hack for >= 2.5 # Now all values loaded via LOAD_CLOSURE are packed into # a tuple before calling MAKE_CLOSURE. if op == self.opc.BUILD_TUPLE and \ self.code[self.prev[offset]] == self.opc.LOAD_CLOSURE: continue else: if self.is_pypy and not oparg and op_name == 'BUILD_MAP': op_name = 'BUILD_MAP_n' else: op_name = '%s_%d' % (op_name, oparg) if op != self.opc.BUILD_SLICE: customize[op_name] = oparg elif self.is_pypy and op_name in ('LOOKUP_METHOD', 'JUMP_IF_NOT_DEBUG', 'SETUP_EXCEPT', 'SETUP_FINALLY'): # The value in the dict is in special cases in semantic actions, such # as CALL_FUNCTION. The value is not used in these cases, so we put # in arbitrary value 0. customize[op_name] = 0 elif op == self.opc.JUMP_ABSOLUTE: # Further classify JUMP_ABSOLUTE into backward jumps # which are used in loops, and "CONTINUE" jumps which # may appear in a "continue" statement. The loop-type # and continue-type jumps will help us classify loop # boundaries The continue-type jumps help us get # "continue" statements with would otherwise be turned # into a "pass" statement because JUMPs are sometimes # ignored in rules as just boundary overhead. In # comprehensions we might sometimes classify JUMP_BACK # as CONTINUE, but that's okay since we add a grammar # rule for that. target = self.get_target(offset) if target <= offset: if (offset in self.stmts and self.code[offset + 3] not in (self.opc.END_FINALLY, self.opc.POP_BLOCK) and offset not in self.not_continue): op_name = 'CONTINUE' else: op_name = 'JUMP_BACK' elif op == self.opc.LOAD_GLOBAL: if offset in self.load_asserts: op_name = 'LOAD_ASSERT' elif op == self.opc.RETURN_VALUE: if offset in self.return_end_ifs: op_name = 'RETURN_END_IF' if offset in self.linestartoffsets: linestart = self.linestartoffsets[offset] else: linestart = None if offset not in replace: tokens.append( Token(op_name, oparg, pattr, offset, linestart, op, has_arg, self.opc)) else: tokens.append( Token(replace[offset], oparg, pattr, offset, linestart, op, has_arg, self.opc)) pass pass if show_asm in ('both', 'after'): for t in tokens: print(t.format(line_prefix='L.')) print() return tokens, customize
def disassemble(self, co, classname=None, code_objects={}, show_asm=None): """ Pick out tokens from an uncompyle6 code object, and transform them, returning a list of uncompyle6 'Token's. The tranformations are made to assist the deparsing grammar. Specificially: - various types of LOAD_CONST's are categorized in terms of what they load - COME_FROM instructions are added to assist parsing control structures - MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments Also, when we encounter certain tokens, we add them to a set which will cause custom grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST cause specific rules for the specific number of arguments they take. """ show_asm = self.show_asm if not show_asm else show_asm # show_asm = 'before' if show_asm in ('both', 'before'): from xdis.bytecode import Bytecode bytecode = Bytecode(co, self.opc) for instr in bytecode.get_instructions(co): print(instr._disassemble()) # Container for tokens tokens = [] customize = {} if self.is_pypy: customize['PyPy'] = 1; Token = self.Token # shortcut n = self.setup_code(co) self.build_lines_data(co, n) self.build_prev_op(n) # self.lines contains (block,addrLastInstr) if classname: classname = '_' + classname.lstrip('_') + '__' def unmangle(name): if name.startswith(classname) and name[-2:] != '__': return name[len(classname) - 2:] return name free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ] names = [ unmangle(name) for name in co.co_names ] varnames = [ unmangle(name) for name in co.co_varnames ] else: free = co.co_cellvars + co.co_freevars names = co.co_names varnames = co.co_varnames self.names = names # Scan for assertions. Later we will # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'. # 'LOAD_ASSERT' is used in assert statements. self.load_asserts = set() for i in self.op_range(0, n): # We need to detect the difference between: # raise AssertionError # and # assert ... # Below we use the heuristic that it is preceded by a POP_JUMP. # however we could also use followed by RAISE_VARARGS # or for PyPy there may be a JUMP_IF_NOT_DEBUG before. # FIXME: remove uses of PJIF, and PJIT if self.is_pypy: have_pop_jump = self.code[i] in (self.opc.PJIF, self.opc.PJIT) else: have_pop_jump = self.code[i] == self.opc.PJIT if have_pop_jump and self.code[i+3] == self.opc.LOAD_GLOBAL: if names[self.get_argument(i+3)] == 'AssertionError': self.load_asserts.add(i+3) cf = self.find_jump_targets() # contains (code, [addrRefToCode]) last_stmt = self.next_stmt[0] i = self.next_stmt[last_stmt] replace = {} while i < n-1: if self.lines[last_stmt].next > i: if self.code[last_stmt] == self.opc.PRINT_ITEM: if self.code[i] == self.opc.PRINT_ITEM: replace[i] = 'PRINT_ITEM_CONT' elif self.code[i] == self.opc.PRINT_NEWLINE: replace[i] = 'PRINT_NEWLINE_CONT' last_stmt = i i = self.next_stmt[i] extended_arg = 0 for offset in self.op_range(0, n): if offset in cf: k = 0 for j in cf[offset]: tokens.append(Token( 'COME_FROM', None, repr(j), offset="%s_%d" % (offset, k), has_arg = True)) k += 1 op = self.code[offset] opname = self.opc.opname[op] oparg = None; pattr = None has_arg = (op >= self.opc.HAVE_ARGUMENT) if has_arg: oparg = self.get_argument(offset) + extended_arg extended_arg = 0 if op == self.opc.EXTENDED_ARG: extended_arg = oparg * scan.L65536 continue if op in self.opc.hasconst: const = co.co_consts[oparg] if iscode(const): oparg = const if const.co_name == '<lambda>': assert opname == 'LOAD_CONST' opname = 'LOAD_LAMBDA' elif const.co_name == '<genexpr>': opname = 'LOAD_GENEXPR' elif const.co_name == '<dictcomp>': opname = 'LOAD_DICTCOMP' elif const.co_name == '<setcomp>': opname = 'LOAD_SETCOMP' # verify() uses 'pattr' for comparison, since 'attr' # now holds Code(const) and thus can not be used # for comparison (todo: think about changing this) # pattr = 'code_object @ 0x%x %s->%s' %\ # (id(const), const.co_filename, const.co_name) pattr = '<code_object ' + const.co_name + '>' else: pattr = const elif op in self.opc.hasname: pattr = names[oparg] elif op in self.opc.hasjrel: # use instead: hasattr(self, 'patch_continue'): ? if self.version == 2.7: self.patch_continue(tokens, offset, op) pattr = repr(offset + 3 + oparg) elif op in self.opc.hasjabs: # use instead: hasattr(self, 'patch_continue'): ? if self.version == 2.7: self.patch_continue(tokens, offset, op) pattr = repr(oparg) elif op in self.opc.haslocal: pattr = varnames[oparg] elif op in self.opc.hascompare: pattr = self.opc.cmp_op[oparg] elif op in self.opc.hasfree: pattr = free[oparg] if op in self.varargs_ops: # CE - Hack for >= 2.5 # Now all values loaded via LOAD_CLOSURE are packed into # a tuple before calling MAKE_CLOSURE. if op == self.opc.BUILD_TUPLE and \ self.code[self.prev[offset]] == self.opc.LOAD_CLOSURE: continue else: if self.is_pypy and not oparg and opname == 'BUILD_MAP': opname = 'BUILD_MAP_n' else: opname = '%s_%d' % (opname, oparg) if op != self.opc.BUILD_SLICE: customize[opname] = oparg elif self.is_pypy and opname in ('LOOKUP_METHOD', 'JUMP_IF_NOT_DEBUG', 'SETUP_EXCEPT', 'SETUP_FINALLY'): # The value in the dict is in special cases in semantic actions, such # as CALL_FUNCTION. The value is not used in these cases, so we put # in arbitrary value 0. customize[opname] = 0 elif op == self.opc.JUMP_ABSOLUTE: # Further classify JUMP_ABSOLUTE into backward jumps # which are used in loops, and "CONTINUE" jumps which # may appear in a "continue" statement. The loop-type # and continue-type jumps will help us classify loop # boundaries The continue-type jumps help us get # "continue" statements with would otherwise be turned # into a "pass" statement because JUMPs are sometimes # ignored in rules as just boundary overhead. In # comprehensions we might sometimes classify JUMP_BACK # as CONTINUE, but that's okay since we add a grammar # rule for that. target = self.get_target(offset) if target <= offset: if (offset in self.stmts and self.code[offset+3] not in (self.opc.END_FINALLY, self.opc.POP_BLOCK) and offset not in self.not_continue): opname = 'CONTINUE' else: opname = 'JUMP_BACK' elif op == self.opc.LOAD_GLOBAL: if offset in self.load_asserts: opname = 'LOAD_ASSERT' elif op == self.opc.RETURN_VALUE: if offset in self.return_end_ifs: opname = 'RETURN_END_IF' if offset in self.linestartoffsets: linestart = self.linestartoffsets[offset] else: linestart = None if offset not in replace: tokens.append(Token( opname, oparg, pattr, offset, linestart, op, has_arg, self.opc)) else: tokens.append(Token( replace[offset], oparg, pattr, offset, linestart, op, has_arg, self.opc)) pass pass if show_asm in ('both', 'after'): for t in tokens: print(t) print() return tokens, customize
def disassemble(self, co, classname=None, code_objects={}, show_asm=None): """ Pick out tokens from an uncompyle6 code object, and transform them, returning a list of uncompyle6 'Token's. The tranformations are made to assist the deparsing grammar. Specificially: - various types of LOAD_CONST's are categorized in terms of what they load - COME_FROM instructions are added to assist parsing control structures - MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments Also, when we encounter certain tokens, we add them to a set which will cause custom grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST cause specific rules for the specific number of arguments they take. """ show_asm = self.show_asm if not show_asm else show_asm # show_asm = 'both' if show_asm in ('both', 'before'): bytecode = Bytecode(co, self.opc) for instr in bytecode.get_instructions(co): print(instr._disassemble()) # Container for tokens tokens = [] customize = {} if self.is_pypy: customize['PyPy'] = 1; self.code = array('B', co.co_code) self.build_lines_data(co) self.build_prev_op() bytecode = Bytecode(co, self.opc) # FIXME: put as its own method? # Scan for assertions. Later we will # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'. # 'LOAD_ASSERT' is used in assert statements. self.load_asserts = set() bs = list(bytecode) n = len(bs) for i in range(n): inst = bs[i] # We need to detect the difference between # "raise AssertionError" and "assert" # If we have a JUMP_FORWARD after the # RAISE_VARARGS then we have a "raise" statement # else we have an "assert" statement. if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n: next_inst = bs[i+1] if (next_inst.opname == 'LOAD_GLOBAL' and next_inst.argval == 'AssertionError'): for j in range(i+2, n): raise_inst = bs[j] if raise_inst.opname.startswith('RAISE_VARARGS'): if j+1 >= n or bs[j+1].opname != 'JUMP_FORWARD': self.load_asserts.add(next_inst.offset) pass break pass pass # Get jump targets # Format: {target offset: [jump offsets]} jump_targets = self.find_jump_targets() for inst in bytecode: argval = inst.argval if inst.offset in jump_targets: jump_idx = 0 for jump_offset in jump_targets[inst.offset]: tokens.append(Token('COME_FROM', None, repr(jump_offset), offset='%s_%s' % (inst.offset, jump_idx), has_arg = True, opc=self.opc)) jump_idx += 1 pass pass pattr = inst.argrepr opname = inst.opname op = inst.opcode if opname in ['LOAD_CONST']: const = inst.argval if iscode(const): if const.co_name == '<lambda>': opname = 'LOAD_LAMBDA' elif const.co_name == '<genexpr>': opname = 'LOAD_GENEXPR' elif const.co_name == '<dictcomp>': opname = 'LOAD_DICTCOMP' elif const.co_name == '<setcomp>': opname = 'LOAD_SETCOMP' elif const.co_name == '<listcomp>': opname = 'LOAD_LISTCOMP' # verify() uses 'pattr' for comparison, since 'attr' # now holds Code(const) and thus can not be used # for comparison (todo: think about changing this) # pattr = 'code_object @ 0x%x %s->%s' %\ # (id(const), const.co_filename, const.co_name) pattr = '<code_object ' + const.co_name + '>' else: pattr = const pass elif opname in ('MAKE_FUNCTION', 'MAKE_CLOSURE'): pos_args, name_pair_args, annotate_args = parse_fn_counts(inst.argval) if name_pair_args > 0: opname = '%s_N%d' % (opname, name_pair_args) pass if annotate_args > 0: opname = '%s_A_%d' % [opname, annotate_args] pass opname = '%s_%d' % (opname, pos_args) pattr = ("%d positional, %d keyword pair, %d annotated" % (pos_args, name_pair_args, annotate_args)) tokens.append( Token( type_ = opname, attr = (pos_args, name_pair_args, annotate_args), pattr = pattr, offset = inst.offset, linestart = inst.starts_line, op = op, has_arg = op_has_argument(op, op3), opc = self.opc ) ) continue elif op in self.varargs_ops: pos_args = inst.argval if self.is_pypy and not pos_args and opname == 'BUILD_MAP': opname = 'BUILD_MAP_n' else: opname = '%s_%d' % (opname, pos_args) elif self.is_pypy and opname in ('CALL_METHOD', 'JUMP_IF_NOT_DEBUG'): # The value in the dict is in special cases in semantic actions, such # as CALL_FUNCTION. The value is not used in these cases, so we put # in arbitrary value 0. customize[opname] = 0 elif opname == 'UNPACK_EX': # FIXME: try with scanner and parser by # changing inst.argval before_args = inst.argval & 0xFF after_args = (inst.argval >> 8) & 0xff pattr = "%d before vararg, %d after" % (before_args, after_args) argval = (before_args, after_args) opname = '%s_%d+%d' % (opname, before_args, after_args) elif op == self.opc.JUMP_ABSOLUTE: # Further classify JUMP_ABSOLUTE into backward jumps # which are used in loops, and "CONTINUE" jumps which # may appear in a "continue" statement. The loop-type # and continue-type jumps will help us classify loop # boundaries The continue-type jumps help us get # "continue" statements with would otherwise be turned # into a "pass" statement because JUMPs are sometimes # ignored in rules as just boundary overhead. In # comprehensions we might sometimes classify JUMP_BACK # as CONTINUE, but that's okay since we add a grammar # rule for that. pattr = inst.argval target = self.get_target(inst.offset) if target <= inst.offset: next_opname = self.opname[self.code[inst.offset+3]] if (inst.offset in self.stmts and next_opname not in ('END_FINALLY', 'POP_BLOCK') and inst.offset not in self.not_continue): opname = 'CONTINUE' else: opname = 'JUMP_BACK' # FIXME: this is a hack to catch stuff like: # if x: continue # the "continue" is not on a new line. # There are other situations were we don't catch # CONTINUE as well. if tokens[-1].type == 'JUMP_BACK': tokens[-1].type = intern('CONTINUE') elif op == self.opc.RETURN_VALUE: if inst.offset in self.return_end_ifs: opname = 'RETURN_END_IF' elif inst.offset in self.load_asserts: opname = 'LOAD_ASSERT' tokens.append( Token( type_ = opname, attr = argval, pattr = pattr, offset = inst.offset, linestart = inst.starts_line, op = op, has_arg = (op >= op3.HAVE_ARGUMENT), opc = self.opc ) ) pass if show_asm in ('both', 'after'): for t in tokens: print(t) print() return tokens, customize
def make_function3_annotate(self, node, is_lambda, nested=1, code_node=None, annotate_last=-1): """ Dump function defintion, doc string, and function body. This code is specialized for Python 3""" def build_param(ast, name, default): """build parameters: - handle defaults - handle format tuple parameters """ if default: value = self.traverse(default, indent='') maybe_show_tree_param_default(self, name, value) result = '%s=%s' % (name, value) if result[-2:] == '= ': # default was 'LOAD_CONST None' result += 'None' return result else: return name # MAKE_FUNCTION_... or MAKE_CLOSURE_... assert node[-1].kind.startswith('MAKE_') annotate_tuple = None for annotate_last in range(len(node) - 1, -1, -1): if node[annotate_last] == 'annotate_tuple': annotate_tuple = node[annotate_last] break annotate_args = {} if (annotate_tuple == 'annotate_tuple' and annotate_tuple[0] in ('LOAD_CONST', 'LOAD_NAME') and isinstance(annotate_tuple[0].attr, tuple)): annotate_tup = annotate_tuple[0].attr i = -1 j = annotate_last - 1 l = -len(node) while j >= l and node[j].kind in ('annotate_arg', 'annotate_tuple'): annotate_args[annotate_tup[i]] = node[j][0] i -= 1 j -= 1 args_node = node[-1] if isinstance(args_node.attr, tuple): # positional args are before kwargs defparams = node[:args_node.attr[0]] pos_args, kw_args, annotate_argc = args_node.attr if 'return' in annotate_args.keys(): annotate_argc = len(annotate_args) - 1 else: defparams = node[:args_node.attr] kw_args = 0 annotate_argc = 0 pass annotate_dict = {} for name in annotate_args.keys(): n = self.traverse(annotate_args[name], indent='') annotate_dict[name] = n if 3.0 <= self.version <= 3.2: lambda_index = -2 elif 3.03 <= self.version: lambda_index = -3 else: lambda_index = None if lambda_index and is_lambda and iscode(node[lambda_index].attr): assert node[lambda_index].kind == 'LOAD_LAMBDA' code = node[lambda_index].attr else: code = code_node.attr assert iscode(code) code = Code(code, self.scanner, self.currentclass) # add defaults values to parameter names argc = code.co_argcount kwonlyargcount = code.co_kwonlyargcount paramnames = list(code.co_varnames[:argc]) if kwonlyargcount > 0: kwargs = list(code.co_varnames[argc:argc + kwonlyargcount]) try: ast = self.build_ast(code._tokens, code._customize, is_lambda=is_lambda, noneInNames=('None' in code.co_names)) except (ParserError, ParserError2) as p: self.write(str(p)) if not self.tolerate_errors: self.ERROR = p return kw_pairs = args_node.attr[1] indent = self.indent if is_lambda: self.write("lambda ") else: self.write("(") last_line = self.f.getvalue().split("\n")[-1] l = len(last_line) indent = ' ' * l line_number = self.line_number i = len(paramnames) - len(defparams) suffix = '' for param in paramnames[:i]: self.write(suffix, param) suffix = ', ' if param in annotate_dict: self.write(': %s' % annotate_dict[param]) if (line_number != self.line_number): suffix = ",\n" + indent line_number = self.line_number # value, string = annotate_args[param] # if string: # self.write(': "%s"' % value) # else: # self.write(': %s' % value) suffix = ', ' if i > 0 else '' for n in node: if n == 'pos_arg': self.write(suffix) param = paramnames[i] self.write(param) if param in annotate_args: aa = annotate_args[param] if isinstance(aa, tuple): aa = aa[0] self.write(': "%s"' % aa) elif isinstance(aa, SyntaxTree): self.write(': ') self.preorder(aa) self.write('=') i += 1 self.preorder(n) if (line_number != self.line_number): suffix = ",\n" + indent line_number = self.line_number else: suffix = ', ' if code_has_star_arg(code): star_arg = code.co_varnames[argc + kwonlyargcount] if annotate_dict and star_arg in annotate_dict: self.write(suffix, '*%s: %s' % (star_arg, annotate_dict[star_arg])) else: self.write(suffix, '*%s' % star_arg) argc += 1 # self.println(indent, '#flags:\t', int(code.co_flags)) ends_in_comma = False if kwonlyargcount > 0: if not code_has_star_arg(code): if argc > 0: self.write(", *, ") else: self.write("*, ") pass ends_in_comma = True else: if argc > 0: self.write(", ") ends_in_comma = True kw_args = [None] * kwonlyargcount for n in node: if n == 'kwargs': n = n[0] if n == 'kwarg': name = eval(n[0].pattr) idx = kwargs.index(name) default = self.traverse(n[1], indent='') if annotate_dict and name in annotate_dict: kw_args[idx] = '%s: %s=%s' % (name, annotate_dict[name], default) else: kw_args[idx] = '%s=%s' % (name, default) pass pass # handling other args other_kw = [c == None for c in kw_args] for i, flag in enumerate(other_kw): if flag: n = kwargs[i] if n in annotate_dict: kw_args[i] = "%s: %s" % (n, annotate_dict[n]) else: kw_args[i] = "%s" % n self.write(', '.join(kw_args)) ends_in_comma = False else: if argc == 0: ends_in_comma = True if code_has_star_star_arg(code): if not ends_in_comma: self.write(', ') star_star_arg = code.co_varnames[argc + kwonlyargcount] if annotate_dict and star_star_arg in annotate_dict: self.write('**%s: %s' % (star_star_arg, annotate_dict[star_star_arg])) else: self.write('**%s' % star_star_arg) if is_lambda: self.write(": ") else: self.write(')') if 'return' in annotate_tuple[0].attr: if (line_number != self.line_number) and not no_paramnames: self.write("\n" + indent) line_number = self.line_number self.write(' -> ') # value, string = annotate_args['return'] # if string: # self.write(' -> "%s"' % value) # else: # self.write(' -> %s' % value) self.preorder(node[annotate_last - 1]) self.println(":") if (len(code.co_consts) > 0 and code.co_consts[0] is not None and not is_lambda): # ugly # docstring exists, dump it print_docstring(self, self.indent, code.co_consts[0]) code._tokens = None # save memory assert ast == 'stmts' all_globals = find_all_globals(ast, set()) globals, nonlocals = find_globals_and_nonlocals(ast, set(), set(), code, self.version) for g in sorted((all_globals & self.mod_globs) | globals): self.println(self.indent, 'global ', g) for nl in sorted(nonlocals): self.println(self.indent, 'nonlocal ', nl) self.mod_globs -= all_globals has_none = 'None' in code.co_names rn = has_none and not find_none(ast) self.gen_source(ast, code.co_name, code._customize, is_lambda=is_lambda, returnNone=rn) code._tokens = code._customize = None # save memory
def n_classdef3(node): # class definition ('class X(A,B,C):') cclass = self.currentclass # Pick out various needed bits of information # * class_name - the name of the class # * subclass_info - the parameters to the class e.g. # class Foo(bar, baz) # ---------- # * subclass_code - the code for the subclass body subclass_info = None if node == 'classdefdeco2': if self.version >= 3.6: class_name = node[1][1].pattr elif self.version <= 3.3: class_name = node[2][0].pattr else: class_name = node[1][2].pattr build_class = node else: build_class = node[0] if self.version >= 3.6: if build_class == 'build_class_kw': mkfunc = build_class[1] assert mkfunc == 'mkfunc' subclass_info = build_class if hasattr(mkfunc[0], 'attr') and iscode(mkfunc[0].attr): subclass_code = mkfunc[0].attr else: assert mkfunc[0] == 'load_closure' subclass_code = mkfunc[1].attr assert iscode(subclass_code) if build_class[1][0] == 'load_closure': code_node = build_class[1][1] else: code_node = build_class[1][0] class_name = code_node.attr.co_name else: class_name = node[1][0].pattr build_class = node[0] assert 'mkfunc' == build_class[1] mkfunc = build_class[1] if mkfunc[0] in ('kwargs', 'no_kwargs'): if 3.0 <= self.version <= 3.2: for n in mkfunc: if hasattr(n, 'attr') and iscode(n.attr): subclass_code = n.attr break elif n == 'expr': subclass_code = n[0].attr pass pass else: for n in mkfunc: if hasattr(n, 'attr') and iscode(n.attr): subclass_code = n.attr break pass pass if node == 'classdefdeco2': subclass_info = node else: subclass_info = node[0] elif build_class[1][0] == 'load_closure': # Python 3 with closures not functions load_closure = build_class[1] if hasattr(load_closure[-3], 'attr'): # Python 3.3 classes with closures work like this. # Note have to test before 3.2 case because # index -2 also has an attr. subclass_code = load_closure[-3].attr elif hasattr(load_closure[-2], 'attr'): # Python 3.2 works like this subclass_code = load_closure[-2].attr else: raise 'Internal Error n_classdef: cannot find class body' if hasattr(build_class[3], '__len__'): if not subclass_info: subclass_info = build_class[3] elif hasattr(build_class[2], '__len__'): subclass_info = build_class[2] else: raise 'Internal Error n_classdef: cannot superclass name' elif self.version >= 3.6 and node == 'classdefdeco2': subclass_info = node subclass_code = build_class[1][0].attr elif not subclass_info: if mkfunc[0] in ('no_kwargs', 'kwargs'): subclass_code = mkfunc[1].attr else: subclass_code = mkfunc[0].attr if node == 'classdefdeco2': subclass_info = node else: subclass_info = node[0] if (node == 'classdefdeco2'): self.write('\n') else: self.write('\n\n') self.currentclass = str(class_name) self.write(self.indent, 'class ', self.currentclass) self.print_super_classes3(subclass_info) self.println(':') # class body self.indent_more() self.build_class(subclass_code) self.indent_less() self.currentclass = cclass if len(self.param_stack) > 1: self.write('\n\n') else: self.write('\n\n\n') self.prune()
def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify, name=""): """ Compare two code-objects. This is the main part of this module. """ # print code_obj1, type(code_obj2) assert iscode( code_obj1 ), "cmp_code_object first object type is %s, not code" % type(code_obj1) assert iscode( code_obj2 ), "cmp_code_object second object type is %s, not code" % type(code_obj2) # print dir(code_obj1) if isinstance(code_obj1, object): # new style classes (Python 2.2) # assume _both_ code objects to be new stle classes assert dir(code_obj1) == dir(code_obj2) else: # old style classes assert dir(code_obj1) == code_obj1.__members__ assert dir(code_obj2) == code_obj2.__members__ assert code_obj1.__members__ == code_obj2.__members__ if name == "__main__": name = code_obj1.co_name else: name = "%s.%s" % (name, code_obj1.co_name) if name == ".?": name = "__main__" if isinstance(code_obj1, object) and code_equal(code_obj1, code_obj2): # use the new style code-classes' __cmp__ method, which # should be faster and more sophisticated # if this compare fails, we use the old routine to # find out, what exactly is nor equal # if this compare succeds, simply return # return pass if isinstance(code_obj1, object): members = [x for x in dir(code_obj1) if x.startswith("co_")] else: members = dir(code_obj1) members.sort() # ; members.reverse() tokens1 = None for member in members: if member in __IGNORE_CODE_MEMBERS__ or verify != "verify": pass elif member == "co_code": if verify != "strong": continue scanner = get_scanner(version, is_pypy, show_asm=False) global JUMP_OPS JUMP_OPS = list(scan.JUMP_OPS) + ["JUMP_BACK"] # use changed Token class # We (re)set this here to save exception handling, # which would get confusing. scanner.setTokenClass(Token) try: # ingest both code-objects tokens1, customize = scanner.ingest(code_obj1) del customize # save memory tokens2, customize = scanner.ingest(code_obj2) del customize # save memory finally: scanner.resetTokenClass() # restore Token class targets1 = dis.findlabels(code_obj1.co_code) tokens1 = [t for t in tokens1 if t.kind != "COME_FROM"] tokens2 = [t for t in tokens2 if t.kind != "COME_FROM"] i1 = 0 i2 = 0 offset_map = {} check_jumps = {} while i1 < len(tokens1): if i2 >= len(tokens2): if (len(tokens1) == len(tokens2) + 2 and tokens1[-1].kind == "RETURN_VALUE" and tokens1[-2].kind == "LOAD_CONST" and tokens1[-2].pattr is None and tokens1[-3].kind == "RETURN_VALUE"): break else: raise CmpErrorCodeLen(name, tokens1, tokens2) offset_map[tokens1[i1].offset] = tokens2[i2].offset for idx1, idx2, offset2 in check_jumps.get( tokens1[i1].offset, []): if offset2 != tokens2[i2].offset: raise CmpErrorCode( name, tokens1[idx1].offset, tokens1[idx1], tokens2[idx2], tokens1, tokens2, ) if tokens1[i1].kind != tokens2[i2].kind: if tokens1[i1].kind == "LOAD_CONST" == tokens2[i2].kind: i = 1 while tokens1[i1 + i].kind == "LOAD_CONST": i += 1 if tokens1[i1 + i].kind.startswith( ("BUILD_TUPLE", "BUILD_LIST")) and i == int( tokens1[i1 + i].kind.split("_")[-1]): t = tuple( [elem.pattr for elem in tokens1[i1:i1 + i]]) if t != tokens2[i2].pattr: raise CmpErrorCode( name, tokens1[i1].offset, tokens1[i1], tokens2[i2], tokens1, tokens2, ) i1 += i + 1 i2 += 1 continue elif (i == 2 and tokens1[i1 + i].kind == "ROT_TWO" and tokens2[i2 + 1].kind == "UNPACK_SEQUENCE_2"): i1 += 3 i2 += 2 continue elif i == 2 and tokens1[i1 + i].kind in BIN_OP_FUNCS: f = BIN_OP_FUNCS[tokens1[i1 + i].kind] if (f(tokens1[i1].pattr, tokens1[i1 + 1].pattr) == tokens2[i2].pattr): i1 += 3 i2 += 1 continue elif tokens1[i1].kind == "UNARY_NOT": if tokens2[i2].kind == "POP_JUMP_IF_TRUE": if tokens1[i1 + 1].kind == "POP_JUMP_IF_FALSE": i1 += 2 i2 += 1 continue elif tokens2[i2].kind == "POP_JUMP_IF_FALSE": if tokens1[i1 + 1].kind == "POP_JUMP_IF_TRUE": i1 += 2 i2 += 1 continue elif (tokens1[i1].kind in ("JUMP_FORWARD", "JUMP_BACK") and tokens1[i1 - 1].kind == "RETURN_VALUE" and tokens2[i2 - 1].kind in ("RETURN_VALUE", "RETURN_END_IF") and int(tokens1[i1].offset) not in targets1): i1 += 1 continue elif (tokens1[i1].kind == "JUMP_BACK" and tokens2[i2].kind == "CONTINUE"): # FIXME: should make sure that offset is inside loop, not outside of it i1 += 2 i2 += 2 continue elif (tokens1[i1].kind == "JUMP_FORWARD" and tokens2[i2].kind == "JUMP_BACK" and tokens1[i1 + 1].kind == "JUMP_BACK" and tokens2[i2 + 1].kind == "JUMP_BACK" and int(tokens1[i1].pattr) == int(tokens1[i1].offset) + 3): if int(tokens1[i1].pattr) == int(tokens1[i1 + 1].offset): i1 += 2 i2 += 2 continue elif (tokens1[i1].kind == "LOAD_NAME" and tokens2[i2].kind == "LOAD_CONST" and tokens1[i1].pattr == "None" and tokens2[i2].pattr is None): pass elif (tokens1[i1].kind == "LOAD_GLOBAL" and tokens2[i2].kind == "LOAD_NAME" and tokens1[i1].pattr == tokens2[i2].pattr): pass elif (tokens1[i1].kind == "LOAD_ASSERT" and tokens2[i2].kind == "LOAD_NAME" and tokens1[i1].pattr == tokens2[i2].pattr): pass elif (tokens1[i1].kind == "RETURN_VALUE" and tokens2[i2].kind == "RETURN_END_IF"): pass elif (tokens1[i1].kind == "BUILD_TUPLE_0" and tokens2[i2].pattr == ()): pass else: raise CmpErrorCode( name, tokens1[i1].offset, tokens1[i1], tokens2[i2], tokens1, tokens2, ) elif (tokens1[i1].kind in JUMP_OPS and tokens1[i1].pattr != tokens2[i2].pattr): if tokens1[i1].kind == "JUMP_BACK": dest1 = int(tokens1[i1].pattr) dest2 = int(tokens2[i2].pattr) if offset_map[dest1] != dest2: raise CmpErrorCode( name, tokens1[i1].offset, tokens1[i1], tokens2[i2], tokens1, tokens2, ) else: # import pdb; pdb.set_trace() try: dest1 = int(tokens1[i1].pattr) if dest1 in check_jumps: check_jumps[dest1].append((i1, i2, dest2)) else: check_jumps[dest1] = [(i1, i2, dest2)] except: pass i1 += 1 i2 += 1 del tokens1, tokens2 # save memory elif member == "co_consts": # partial optimization can make the co_consts look different, # so we'll just compare the code consts codes1 = (c for c in code_obj1.co_consts if hasattr(c, "co_consts")) codes2 = (c for c in code_obj2.co_consts if hasattr(c, "co_consts")) for c1, c2 in zip(codes1, codes2): cmp_code_objects(version, is_pypy, c1, c2, verify, name=name) elif member == "co_flags": flags1 = code_obj1.co_flags flags2 = code_obj2.co_flags if is_pypy: # For PYPY for now we don't care about PYPY_SOURCE_IS_UTF8: flags2 &= ~0x0100 # PYPY_SOURCE_IS_UTF8 # We also don't care about COROUTINE or GENERATOR for now flags1 &= ~0x000000A0 flags2 &= ~0x000000A0 if flags1 != flags2: raise CmpErrorMember(name, "co_flags", pretty_flags(flags1), pretty_flags(flags2)) else: # all other members must be equal if getattr(code_obj1, member) != getattr(code_obj2, member): raise CmpErrorMember(name, member, getattr(code_obj1, member), getattr(code_obj2, member))
def make_function3(self, node, isLambda, nested=1, codeNode=None): """Dump function definition, doc string, and function body.""" # FIXME: call make_function3 if we are self.version >= 3.0 # and then simplify the below. def build_param(ast, name, default): """build parameters: - handle defaults - handle format tuple parameters """ if default: value = self.traverse(default, indent='') maybe_show_ast_param_default(self.showast, name, value) result = '%s=%s' % (name, value) if result[-2:] == '= ': # default was 'LOAD_CONST None' result += 'None' return result else: return name # MAKE_FUNCTION_... or MAKE_CLOSURE_... assert node[-1].type.startswith('MAKE_') args_node = node[-1] if isinstance(args_node.attr, tuple): if self.version <= 3.3 and len(node) > 2 and node[-3] != 'LOAD_LAMBDA': # positional args are after kwargs defparams = node[1:args_node.attr[0] + 1] else: # positional args are before kwargs defparams = node[:args_node.attr[0]] pos_args, kw_args, annotate_argc = args_node.attr else: defparams = node[:args_node.attr] kw_args = 0 pass if 3.0 <= self.version <= 3.2: lambda_index = -2 elif 3.03 <= self.version: lambda_index = -3 else: lambda_index = None if lambda_index and isLambda and iscode(node[lambda_index].attr): assert node[lambda_index].type == 'LOAD_LAMBDA' code = node[lambda_index].attr else: code = codeNode.attr assert iscode(code) code = Code(code, self.scanner, self.currentclass) # add defaults values to parameter names argc = code.co_argcount paramnames = list(code.co_varnames[:argc]) # defaults are for last n parameters, thus reverse if not 3.0 <= self.version <= 3.2: paramnames.reverse() defparams.reverse() try: ast = self.build_ast(code._tokens, code._customize, isLambda=isLambda, noneInNames=('None' in code.co_names)) except ParserError as p: self.write(str(p)) self.ERROR = p return kw_pairs = args_node.attr[1] if self.version >= 3.0 else 0 indent = self.indent # build parameters if self.version != 3.2: params = [ build_param(ast, name, default) for name, default in zip_longest( paramnames, defparams, fillvalue=None) ] params.reverse() # back to correct order if code_has_star_arg(code): if self.version > 3.0: params.append('*%s' % code.co_varnames[argc + kw_pairs]) else: params.append('*%s' % code.co_varnames[argc]) argc += 1 # dump parameter list (with default values) if isLambda: self.write("lambda ", ", ".join(params)) else: self.write("(", ", ".join(params)) # self.println(indent, '#flags:\t', int(code.co_flags)) else: if isLambda: self.write("lambda ") else: self.write("(") pass last_line = self.f.getvalue().split("\n")[-1] l = len(last_line) indent = ' ' * l line_number = self.line_number if code_has_star_arg(code): self.write('*%s' % code.co_varnames[argc + kw_pairs]) argc += 1 i = len(paramnames) - len(defparams) self.write(", ".join(paramnames[:i])) suffix = ', ' if i > 0 else '' for n in node: if n == 'pos_arg': self.write(suffix) self.write(paramnames[i] + '=') i += 1 self.preorder(n) if (line_number != self.line_number): suffix = ",\n" + indent line_number = self.line_number else: suffix = ', ' if kw_args > 0: if not (4 & code.co_flags): if argc > 0: self.write(", *, ") else: self.write("*, ") pass else: self.write(", ") if not 3.0 <= self.version <= 3.2: for n in node: if n == 'pos_arg': continue elif self.version >= 3.4 and n.type != 'kwargs': continue else: self.preorder(n) break else: kwargs = node[0] last = len(kwargs) - 1 i = 0 for n in node[0]: if n == 'kwarg': self.write('%s=' % n[0].pattr) self.preorder(n[1]) if i < last: self.write(', ') i += 1 pass pass pass pass if code_has_star_star_arg(code): if argc > 0: self.write(', ') self.write('**%s' % code.co_varnames[argc + kw_pairs]) if isLambda: self.write(": ") else: self.println("):") if len(code.co_consts ) > 0 and code.co_consts[0] is not None and not isLambda: # ugly # docstring exists, dump it print_docstring(self, self.indent, code.co_consts[0]) code._tokens = None # save memory assert ast == 'stmts' all_globals = find_all_globals(ast, set()) for g in ((all_globals & self.mod_globs) | find_globals(ast, set())): self.println(self.indent, 'global ', g) self.mod_globs -= all_globals has_none = 'None' in code.co_names rn = has_none and not find_none(ast) self.gen_source(ast, code.co_name, code._customize, isLambda=isLambda, returnNone=rn) code._tokens = None code._customize = None # save memory
def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, name=''): """ Compare two code-objects. This is the main part of this module. """ # print code_obj1, type(code_obj2) assert iscode(code_obj1), \ "cmp_code_object first object type is %s, not code" % type(code_obj1) assert iscode(code_obj2), \ "cmp_code_object second object type is %s, not code" % type(code_obj2) # print dir(code_obj1) if isinstance(code_obj1, object): # new style classes (Python 2.2) # assume _both_ code objects to be new stle classes assert dir(code_obj1) == dir(code_obj2) else: # old style classes assert dir(code_obj1) == code_obj1.__members__ assert dir(code_obj2) == code_obj2.__members__ assert code_obj1.__members__ == code_obj2.__members__ if name == '__main__': name = code_obj1.co_name else: name = '%s.%s' % (name, code_obj1.co_name) if name == '.?': name = '__main__' if isinstance(code_obj1, object) and code_equal(code_obj1, code_obj2): # use the new style code-classes' __cmp__ method, which # should be faster and more sophisticated # if this compare fails, we use the old routine to # find out, what exactly is nor equal # if this compare succeds, simply return # return pass if isinstance(code_obj1, object): members = [x for x in dir(code_obj1) if x.startswith('co_')] else: members = dir(code_obj1) members.sort() # ; members.reverse() tokens1 = None for member in members: if member in __IGNORE_CODE_MEMBERS__: pass elif member == 'co_code': if version == 2.3: import uncompyle6.scanners.scanner23 as scan scanner = scan.Scanner26() elif version == 2.4: import uncompyle6.scanners.scanner24 as scan scanner = scan.Scanner25() elif version == 2.5: import uncompyle6.scanners.scanner25 as scan scanner = scan.Scanner25() elif version == 2.6: import uncompyle6.scanners.scanner26 as scan scanner = scan.Scanner26() elif version == 2.7: if is_pypy: import uncompyle6.scanners.pypy27 as scan scanner = scan.ScannerPyPy27(show_asm=False) else: import uncompyle6.scanners.scanner27 as scan scanner = scan.Scanner27() elif version == 3.2: if is_pypy: import uncompyle6.scanners.pypy32 as scan scanner = scan.ScannerPyPy32() else: import uncompyle6.scanners.scanner32 as scan scanner = scan.Scanner32() elif version == 3.3: import uncompyle6.scanners.scanner33 as scan scanner = scan.Scanner33() elif version == 3.4: import uncompyle6.scanners.scanner34 as scan scanner = scan.Scanner34() elif version == 3.5: import uncompyle6.scanners.scanner35 as scan scanner = scan.Scanner35() elif version == 3.6: import uncompyle6.scanners.scanner36 as scan scanner = scan.Scanner36() global JUMP_OPs JUMP_OPs = list(scan.JUMP_OPs) + ['JUMP_BACK'] # use changed Token class # We (re)set this here to save exception handling, # which would get confusing. scanner.setTokenClass(Token) try: # disassemble both code-objects tokens1, customize = scanner.disassemble(code_obj1) del customize # save memory tokens2, customize = scanner.disassemble(code_obj2) del customize # save memory finally: scanner.resetTokenClass() # restore Token class targets1 = dis.findlabels(code_obj1.co_code) tokens1 = [t for t in tokens1 if t.type != 'COME_FROM'] tokens2 = [t for t in tokens2 if t.type != 'COME_FROM'] i1 = 0; i2 = 0 offset_map = {}; check_jumps = {} while i1 < len(tokens1): if i2 >= len(tokens2): if len(tokens1) == len(tokens2) + 2 \ and tokens1[-1].type == 'RETURN_VALUE' \ and tokens1[-2].type == 'LOAD_CONST' \ and tokens1[-2].pattr is None \ and tokens1[-3].type == 'RETURN_VALUE': break else: raise CmpErrorCodeLen(name, tokens1, tokens2) offset_map[tokens1[i1].offset] = tokens2[i2].offset for idx1, idx2, offset2 in check_jumps.get(tokens1[i1].offset, []): if offset2 != tokens2[i2].offset: raise CmpErrorCode(name, tokens1[idx1].offset, tokens1[idx1], tokens2[idx2], tokens1, tokens2) if tokens1[i1].type != tokens2[i2].type: if tokens1[i1].type == 'LOAD_CONST' == tokens2[i2].type: i = 1 while tokens1[i1+i].type == 'LOAD_CONST': i += 1 if tokens1[i1+i].type.startswith(('BUILD_TUPLE', 'BUILD_LIST')) \ and i == int(tokens1[i1+i].type.split('_')[-1]): t = tuple([ elem.pattr for elem in tokens1[i1:i1+i] ]) if t != tokens2[i2].pattr: raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], tokens2[i2], tokens1, tokens2) i1 += i + 1 i2 += 1 continue elif i == 2 and tokens1[i1+i].type == 'ROT_TWO' and tokens2[i2+1].type == 'UNPACK_SEQUENCE_2': i1 += 3 i2 += 2 continue elif i == 2 and tokens1[i1+i].type in BIN_OP_FUNCS: f = BIN_OP_FUNCS[tokens1[i1+i].type] if f(tokens1[i1].pattr, tokens1[i1+1].pattr) == tokens2[i2].pattr: i1 += 3 i2 += 1 continue elif tokens1[i1].type == 'UNARY_NOT': if tokens2[i2].type == 'POP_JUMP_IF_TRUE': if tokens1[i1+1].type == 'POP_JUMP_IF_FALSE': i1 += 2 i2 += 1 continue elif tokens2[i2].type == 'POP_JUMP_IF_FALSE': if tokens1[i1+1].type == 'POP_JUMP_IF_TRUE': i1 += 2 i2 += 1 continue elif tokens1[i1].type in ('JUMP_FORWARD', 'JUMP_BACK') \ and tokens1[i1-1].type == 'RETURN_VALUE' \ and tokens2[i2-1].type in ('RETURN_VALUE', 'RETURN_END_IF') \ and int(tokens1[i1].offset) not in targets1: i1 += 1 continue elif tokens1[i1].type == 'JUMP_FORWARD' and tokens2[i2].type == 'JUMP_BACK' \ and tokens1[i1+1].type == 'JUMP_BACK' and tokens2[i2+1].type == 'JUMP_BACK' \ and int(tokens1[i1].pattr) == int(tokens1[i1].offset) + 3: if int(tokens1[i1].pattr) == int(tokens1[i1+1].offset): i1 += 2 i2 += 2 continue raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], tokens2[i2], tokens1, tokens2) elif tokens1[i1].type in JUMP_OPs and tokens1[i1].pattr != tokens2[i2].pattr: dest1 = int(tokens1[i1].pattr) dest2 = int(tokens2[i2].pattr) if tokens1[i1].type == 'JUMP_BACK': if offset_map[dest1] != dest2: raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], tokens2[i2], tokens1, tokens2) else: # import pdb; pdb.set_trace() if dest1 in check_jumps: check_jumps[dest1].append((i1, i2, dest2)) else: check_jumps[dest1] = [(i1, i2, dest2)] i1 += 1 i2 += 1 del tokens1, tokens2 # save memory elif member == 'co_consts': # partial optimization can make the co_consts look different, # so we'll just compare the code consts codes1 = ( c for c in code_obj1.co_consts if hasattr(c, 'co_consts') ) codes2 = ( c for c in code_obj2.co_consts if hasattr(c, 'co_consts') ) for c1, c2 in zip(codes1, codes2): cmp_code_objects(version, is_pypy, c1, c2, name=name) else: # all other members must be equal if getattr(code_obj1, member) != getattr(code_obj2, member): raise CmpErrorMember(name, member, getattr(code_obj1, member), getattr(code_obj2, member))
def disassemble(self, co, classname=None, code_objects={}, show_asm=None): """ Disassemble a Python 2 code object, returning a list of 'Token'. Various tranformations are made to assist the deparsing grammar. For example: - various types of LOAD_CONST's are categorized in terms of what they load - COME_FROM instructions are added to assist parsing control structures - MAKE_FUNCTION and FUNCTION_CALLS append the number of positional aruments The main part of this procedure is modelled after dis.disassemble(). """ show_asm = self.show_asm if not show_asm else show_asm # show_asm = 'before' if show_asm in ('both', 'before'): from xdis.bytecode import Bytecode bytecode = Bytecode(co, self.opc) for instr in bytecode.get_instructions(co): print(instr._disassemble()) # Container for tokens tokens = [] customize = {} Token = self.Token # shortcut n = self.setup_code(co) self.build_lines_data(co, n) self.build_prev_op(n) # self.lines contains (block,addrLastInstr) if classname: classname = '_' + classname.lstrip('_') + '__' def unmangle(name): if name.startswith(classname) and name[-2:] != '__': return name[len(classname) - 2:] return name free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ] names = [ unmangle(name) for name in co.co_names ] varnames = [ unmangle(name) for name in co.co_varnames ] else: free = co.co_cellvars + co.co_freevars names = co.co_names varnames = co.co_varnames self.names = names # Scan for assertions. Later we will # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'. # 'LOAD_ASSERT' is used in assert statements. self.load_asserts = set() for i in self.op_range(0, n): # We need to detect the difference between # "raise AssertionError" and # "assert" if self.code[i] == self.opc.PJIT and self.code[i+3] == self.opc.LOAD_GLOBAL: if names[self.get_argument(i+3)] == 'AssertionError': self.load_asserts.add(i+3) cf = self.find_jump_targets() # contains (code, [addrRefToCode]) last_stmt = self.next_stmt[0] i = self.next_stmt[last_stmt] replace = {} while i < n-1: if self.lines[last_stmt].next > i: if self.code[last_stmt] == self.opc.PRINT_ITEM: if self.code[i] == self.opc.PRINT_ITEM: replace[i] = 'PRINT_ITEM_CONT' elif self.code[i] == self.opc.PRINT_NEWLINE: replace[i] = 'PRINT_NEWLINE_CONT' last_stmt = i i = self.next_stmt[i] extended_arg = 0 for offset in self.op_range(0, n): if offset in cf: k = 0 for j in cf[offset]: tokens.append(Token( 'COME_FROM', None, repr(j), offset="%s_%d" % (offset, k), has_arg = True)) k += 1 op = self.code[offset] opname = self.opc.opname[op] oparg = None; pattr = None has_arg = (op >= self.opc.HAVE_ARGUMENT) if has_arg: oparg = self.get_argument(offset) + extended_arg extended_arg = 0 if op == self.opc.EXTENDED_ARG: extended_arg = oparg * scan.L65536 continue if op in self.opc.hasconst: const = co.co_consts[oparg] if iscode(const): oparg = const if const.co_name == '<lambda>': assert opname == 'LOAD_CONST' opname = 'LOAD_LAMBDA' elif const.co_name == '<genexpr>': opname = 'LOAD_GENEXPR' elif const.co_name == '<dictcomp>': opname = 'LOAD_DICTCOMP' elif const.co_name == '<setcomp>': opname = 'LOAD_SETCOMP' # verify() uses 'pattr' for comparison, since 'attr' # now holds Code(const) and thus can not be used # for comparison (todo: think about changing this) # pattr = 'code_object @ 0x%x %s->%s' %\ # (id(const), const.co_filename, const.co_name) pattr = '<code_object ' + const.co_name + '>' else: pattr = const elif op in self.opc.hasname: pattr = names[oparg] elif op in self.opc.hasjrel: pattr = repr(offset + 3 + oparg) elif op in self.opc.hasjabs: pattr = repr(oparg) elif op in self.opc.haslocal: pattr = varnames[oparg] elif op in self.opc.hascompare: pattr = self.opc.cmp_op[oparg] elif op in self.opc.hasfree: pattr = free[oparg] if op in self.varargs_ops: # CE - Hack for >= 2.5 # Now all values loaded via LOAD_CLOSURE are packed into # a tuple before calling MAKE_CLOSURE. if op == self.opc.BUILD_TUPLE and \ self.code[self.prev[offset]] == self.opc.LOAD_CLOSURE: continue else: opname = '%s_%d' % (opname, oparg) if op != self.opc.BUILD_SLICE: customize[opname] = oparg elif op == self.opc.JUMP_ABSOLUTE: target = self.get_target(offset) if target < offset: if (offset in self.stmts and self.code[offset+3] not in (self.opc.END_FINALLY, self.opc.POP_BLOCK) and offset not in self.not_continue): opname = 'CONTINUE' else: opname = 'JUMP_BACK' elif op == self.opc.LOAD_GLOBAL: if offset in self.load_asserts: opname = 'LOAD_ASSERT' elif op == self.opc.RETURN_VALUE: if offset in self.return_end_ifs: opname = 'RETURN_END_IF' if offset in self.linestartoffsets: linestart = self.linestartoffsets[offset] else: linestart = None if offset not in replace: tokens.append(Token( opname, oparg, pattr, offset, linestart, op, has_arg)) else: tokens.append(Token( replace[offset], oparg, pattr, offset, linestart, op, has_arg)) pass pass if show_asm in ('both', 'after'): for t in tokens: print(t.format()) print() return tokens, customize
def make_function3_annotate(self, node, isLambda, nested=1, codeNode=None, annotate_last=-1): """ Dump function defintion, doc string, and function body. This code is specialized for Python 3""" def build_param(ast, name, default): """build parameters: - handle defaults - handle format tuple parameters """ if default: value = self.traverse(default, indent='') maybe_show_ast_param_default(self.showast, name, value) result = '%s=%s' % (name, value) if result[-2:] == '= ': # default was 'LOAD_CONST None' result += 'None' return result else: return name # MAKE_FUNCTION_... or MAKE_CLOSURE_... assert node[-1].type.startswith('MAKE_') annotate_tuple = None for annotate_last in range(len(node) - 1, -1, -1): if node[annotate_last] == 'annotate_tuple': annotate_tuple = node[annotate_last] break annotate_args = {} if (annotate_tuple == 'annotate_tuple' and annotate_tuple[0] in ('LOAD_CONST', 'LOAD_NAME') and isinstance(annotate_tuple[0].attr, tuple)): annotate_tup = annotate_tuple[0].attr i = -1 j = annotate_last - 1 l = -len(node) while j >= l and node[j].type in ('annotate_arg' 'annotate_tuple'): annotate_args[annotate_tup[i]] = node[j][0] i -= 1 j -= 1 args_node = node[-1] if isinstance(args_node.attr, tuple): # positional args are before kwargs defparams = node[:args_node.attr[0]] pos_args, kw_args, annotate_argc = args_node.attr if 'return' in annotate_args.keys(): annotate_argc = len(annotate_args) - 1 else: defparams = node[:args_node.attr] kw_args = 0 annotate_argc = 0 pass if 3.0 <= self.version <= 3.2: lambda_index = -2 elif 3.03 <= self.version: lambda_index = -3 else: lambda_index = None if lambda_index and isLambda and iscode(node[lambda_index].attr): assert node[lambda_index].type == 'LOAD_LAMBDA' code = node[lambda_index].attr else: code = codeNode.attr assert iscode(code) code = Code(code, self.scanner, self.currentclass) # add defaults values to parameter names argc = code.co_argcount paramnames = list(code.co_varnames[:argc]) try: ast = self.build_ast(code._tokens, code._customize, isLambda=isLambda, noneInNames=('None' in code.co_names)) except ParserError as p: self.write(str(p)) self.ERROR = p return kw_pairs = args_node.attr[1] indent = self.indent if isLambda: self.write("lambda ") else: self.write("(") last_line = self.f.getvalue().split("\n")[-1] l = len(last_line) indent = ' ' * l line_number = self.line_number if code_has_star_arg(code): self.write('*%s' % code.co_varnames[argc + kw_pairs]) argc += 1 i = len(paramnames) - len(defparams) suffix = '' no_paramnames = len(paramnames[:i]) == 0 for param in paramnames[:i]: self.write(suffix, param) suffix = ', ' if param in annotate_tuple[0].attr: p = annotate_tuple[0].attr.index(param) self.write(': ') self.preorder(node[p]) if (line_number != self.line_number): suffix = ",\n" + indent line_number = self.line_number # value, string = annotate_args[param] # if string: # self.write(': "%s"' % value) # else: # self.write(': %s' % value) suffix = ', ' if i > 0 else '' for n in node: if n == 'pos_arg': no_paramnames = False self.write(suffix) param = paramnames[i] self.write(param) if param in annotate_args: aa = annotate_args[param] if isinstance(aa, tuple): aa = aa[0] self.write(': "%s"' % aa) elif isinstance(aa, AST): self.write(': ') self.preorder(aa) self.write('=') i += 1 self.preorder(n) if (line_number != self.line_number): suffix = ",\n" + indent line_number = self.line_number else: suffix = ', ' # self.println(indent, '#flags:\t', int(code.co_flags)) if kw_args + annotate_argc > 0: if no_paramnames: if not code_has_star_arg(code): if argc > 0: self.write(", *, ") else: self.write("*, ") pass else: self.write(", ") kwargs = node[0] last = len(kwargs) - 1 i = 0 for n in node[0]: if n == 'kwarg': if (line_number != self.line_number): self.write("\n" + indent) line_number = self.line_number self.write('%s=' % n[0].pattr) self.preorder(n[1]) if i < last: self.write(', ') i += 1 pass pass annotate_args = [] for n in node: if n == 'annotate_arg': annotate_args.append(n[0]) elif n == 'annotate_tuple': t = n[0].attr if t[-1] == 'return': t = t[0:-1] annotate_args = annotate_args[:-1] pass last = len(annotate_args) - 1 for i in range(len(annotate_args)): self.write("%s: " % (t[i])) self.preorder(annotate_args[i]) if i < last: self.write(', ') pass pass break pass pass if code_has_star_star_arg(code): if argc > 0: self.write(', ') self.write('**%s' % code.co_varnames[argc + kw_pairs]) if isLambda: self.write(": ") else: self.write(')') if 'return' in annotate_tuple[0].attr: if (line_number != self.line_number) and not no_paramnames: self.write("\n" + indent) line_number = self.line_number self.write(' -> ') # value, string = annotate_args['return'] # if string: # self.write(' -> "%s"' % value) # else: # self.write(' -> %s' % value) self.preorder(node[annotate_last - 1]) self.println(":") if (len(code.co_consts) > 0 and code.co_consts[0] is not None and not isLambda): # ugly # docstring exists, dump it print_docstring(self, self.indent, code.co_consts[0]) code._tokens = None # save memory assert ast == 'stmts' all_globals = find_all_globals(ast, set()) for g in ((all_globals & self.mod_globs) | find_globals(ast, set())): self.println(self.indent, 'global ', g) self.mod_globs -= all_globals has_none = 'None' in code.co_names rn = has_none and not find_none(ast) self.gen_source(ast, code.co_name, code._customize, isLambda=isLambda, returnNone=rn) code._tokens = code._customize = None # save memory
def make_function2(self, node, is_lambda, nested=1, code_node=None): """ Dump function defintion, doc string, and function body. This code is specialied for Python 2. """ # FIXME: call make_function3 if we are self.version >= 3.0 # and then simplify the below. def build_param(ast, name, default): """build parameters: - handle defaults - handle format tuple parameters """ # if formal parameter is a tuple, the paramater name # starts with a dot (eg. '.1', '.2') if name.startswith('.'): # replace the name with the tuple-string name = self.get_tuple_parameter(ast, name) pass if default: value = self.traverse(default, indent='') maybe_show_tree_param_default(self.showast, name, value) result = '%s=%s' % (name, value) if result[-2:] == '= ': # default was 'LOAD_CONST None' result += 'None' return result else: return name # MAKE_FUNCTION_... or MAKE_CLOSURE_... assert node[-1].kind.startswith('MAKE_') args_node = node[-1] if isinstance(args_node.attr, tuple): # positional args are after kwargs defparams = node[1:args_node.attr[0] + 1] pos_args, kw_args, annotate_argc = args_node.attr else: defparams = node[:args_node.attr] kw_args = 0 pass lambda_index = None if lambda_index and is_lambda and iscode(node[lambda_index].attr): assert node[lambda_index].kind == 'LOAD_LAMBDA' code = node[lambda_index].attr else: code = code_node.attr assert iscode(code) code = Code(code, self.scanner, self.currentclass) # add defaults values to parameter names argc = code.co_argcount paramnames = list(code.co_varnames[:argc]) # defaults are for last n parameters, thus reverse paramnames.reverse() defparams.reverse() try: ast = self.build_ast(code._tokens, code._customize, is_lambda=is_lambda, noneInNames=('None' in code.co_names)) except (ParserError, ParserError2) as p: self.write(str(p)) if not self.tolerate_errors: self.ERROR = p return kw_pairs = 0 indent = self.indent # build parameters params = [ build_param(ast, name, default) for name, default in zip_longest(paramnames, defparams, fillvalue=None) ] params.reverse() # back to correct order if code_has_star_arg(code): params.append('*%s' % code.co_varnames[argc]) argc += 1 # dump parameter list (with default values) if is_lambda: self.write("lambda ", ", ".join(params)) # If the last statement is None (which is the # same thing as "return None" in a lambda) and the # next to last statement is a "yield". Then we want to # drop the (return) None since that was just put there # to have something to after the yield finishes. # FIXME: this is a bit hoaky and not general if (len(ast) > 1 and self.traverse(ast[-1]) == 'None' and self.traverse(ast[-2]).strip().startswith('yield')): del ast[-1] # Now pick out the expr part of the last statement ast_expr = ast[-1] while ast_expr.kind != 'expr': ast_expr = ast_expr[0] ast[-1] = ast_expr pass else: self.write("(", ", ".join(params)) if kw_args > 0: if not (4 & code.co_flags): if argc > 0: self.write(", *, ") else: self.write("*, ") pass else: self.write(", ") for n in node: if n == 'pos_arg': continue else: self.preorder(n) break pass if code_has_star_star_arg(code): if argc > 0: self.write(', ') self.write('**%s' % code.co_varnames[argc + kw_pairs]) if is_lambda: self.write(": ") else: self.println("):") if len(code.co_consts ) > 0 and code.co_consts[0] is not None and not is_lambda: # ugly # docstring exists, dump it print_docstring(self, indent, code.co_consts[0]) code._tokens = None # save memory if not is_lambda: assert ast == 'stmts' all_globals = find_all_globals(ast, set()) globals, nonlocals = find_globals_and_nonlocals(ast, set(), set(), code, self.version) # Python 2 doesn't support the "nonlocal" statement assert self.version >= 3.0 or not nonlocals for g in sorted((all_globals & self.mod_globs) | globals): self.println(self.indent, 'global ', g) self.mod_globs -= all_globals has_none = 'None' in code.co_names rn = has_none and not find_none(ast) self.gen_source(ast, code.co_name, code._customize, is_lambda=is_lambda, returnNone=rn) code._tokens = None code._customize = None # save memory
def make_function3_annotate(self, node, isLambda, nested=1, codeNode=None, annotate_last=-1): """ Dump function defintion, doc string, and function body. This code is specialized for Python 3""" def build_param(ast, name, default): """build parameters: - handle defaults - handle format tuple parameters """ if default: value = self.traverse(default, indent='') maybe_show_ast_param_default(self.showast, name, value) result = '%s=%s' % (name, value) if result[-2:] == '= ': # default was 'LOAD_CONST None' result += 'None' return result else: return name # MAKE_FUNCTION_... or MAKE_CLOSURE_... assert node[-1].type.startswith('MAKE_') annotate_tuple = None for annotate_last in range(len(node)-1, -1, -1): if node[annotate_last] == 'annotate_tuple': annotate_tuple = node[annotate_last] break annotate_args = {} if (annotate_tuple == 'annotate_tuple' and annotate_tuple[0] in ('LOAD_CONST', 'LOAD_NAME') and isinstance(annotate_tuple[0].attr, tuple)): annotate_tup = annotate_tuple[0].attr i = -1 j = annotate_last-1 l = -len(node) while j >= l and node[j].type in ('annotate_arg' 'annotate_tuple'): annotate_args[annotate_tup[i]] = node[j][0] i -= 1 j -= 1 args_node = node[-1] if isinstance(args_node.attr, tuple): # positional args are before kwargs defparams = node[:args_node.attr[0]] pos_args, kw_args, annotate_argc = args_node.attr if 'return' in annotate_args.keys(): annotate_argc = len(annotate_args) - 1 else: defparams = node[:args_node.attr] kw_args = 0 annotate_argc = 0 pass if 3.0 <= self.version <= 3.2: lambda_index = -2 elif 3.03 <= self.version: lambda_index = -3 else: lambda_index = None if lambda_index and isLambda and iscode(node[lambda_index].attr): assert node[lambda_index].type == 'LOAD_LAMBDA' code = node[lambda_index].attr else: code = codeNode.attr assert iscode(code) code = Code(code, self.scanner, self.currentclass) # add defaults values to parameter names argc = code.co_argcount paramnames = list(code.co_varnames[:argc]) try: ast = self.build_ast(code._tokens, code._customize, isLambda = isLambda, noneInNames = ('None' in code.co_names)) except ParserError as p: self.write(str(p)) self.ERROR = p return kw_pairs = args_node.attr[1] indent = self.indent if isLambda: self.write("lambda ") else: self.write("(") last_line = self.f.getvalue().split("\n")[-1] l = len(last_line) indent = ' ' * l line_number = self.line_number if code_has_star_arg(code): self.write('*%s' % code.co_varnames[argc + kw_pairs]) argc += 1 i = len(paramnames) - len(defparams) suffix = '' for param in paramnames[:i]: self.write(suffix, param) suffix = ', ' if param in annotate_tuple[0].attr: p = annotate_tuple[0].attr.index(param) self.write(': ') self.preorder(node[p]) if (line_number != self.line_number): suffix = ",\n" + indent line_number = self.line_number # value, string = annotate_args[param] # if string: # self.write(': "%s"' % value) # else: # self.write(': %s' % value) suffix = ', ' if i > 0 else '' for n in node: if n == 'pos_arg': self.write(suffix) param = paramnames[i] self.write(param) if param in annotate_args: aa = annotate_args[param] if isinstance(aa, tuple): aa = aa[0] self.write(': "%s"' % aa) self.write('=') i += 1 self.preorder(n) if (line_number != self.line_number): suffix = ",\n" + indent line_number = self.line_number else: suffix = ', ' # self.println(indent, '#flags:\t', int(code.co_flags)) if kw_args + annotate_argc > 0: if not code_has_star_arg(code): if argc > 0: self.write(", *, ") else: self.write("*, ") pass else: self.write(", ") kwargs = node[0] last = len(kwargs)-1 i = 0 for n in node[0]: if n == 'kwarg': if (line_number != self.line_number): self.write("\n" + indent) line_number = self.line_number self.write('%s=' % n[0].pattr) self.preorder(n[1]) if i < last: self.write(', ') i += 1 pass pass annotate_args = [] for n in node: if n == 'annotate_arg': annotate_args.append(n[0]) elif n == 'annotate_tuple': t = n[0].attr if t[-1] == 'return': t = t[0:-1] annotate_args = annotate_args[:-1] pass last = len(annotate_args) - 1 for i in range(len(annotate_args)): self.write("%s: " % (t[i])) self.preorder(annotate_args[i]) if i < last: self.write(', ') pass pass break pass pass if code_has_star_star_arg(code): if argc > 0: self.write(', ') self.write('**%s' % code.co_varnames[argc + kw_pairs]) if isLambda: self.write(": ") else: self.write(')') if 'return' in annotate_tuple[0].attr: if (line_number != self.line_number): self.write("\n" + indent) line_number = self.line_number self.write(' -> ') # value, string = annotate_args['return'] # if string: # self.write(' -> "%s"' % value) # else: # self.write(' -> %s' % value) self.preorder(node[annotate_last-1]) self.println(":") if (len(code.co_consts) > 0 and code.co_consts[0] is not None and not isLambda): # ugly # docstring exists, dump it print_docstring(self, self.indent, code.co_consts[0]) code._tokens = None # save memory assert ast == 'stmts' all_globals = find_all_globals(ast, set()) for g in ((all_globals & self.mod_globs) | find_globals(ast, set())): self.println(self.indent, 'global ', g) self.mod_globs -= all_globals has_none = 'None' in code.co_names rn = has_none and not find_none(ast) self.gen_source(ast, code.co_name, code._customize, isLambda=isLambda, returnNone=rn) code._tokens = code._customize = None # save memory
def make_function3(self, node, is_lambda, nested=1, code_node=None): """Dump function definition, doc string, and function body in Python version 3.0 and above """ # For Python 3.3, the evaluation stack in MAKE_FUNCTION is: # * default argument objects in positional order # * pairs of name and default argument, with the name just below # the object on the stack, for keyword-only parameters # * parameter annotation objects # * a tuple listing the parameter names for the annotations # (only if there are ony annotation objects) # * the code associated with the function (at TOS1) # * the qualified name of the function (at TOS) # For Python 3.0 .. 3.2 the evaluation stack is: # The function object is defined to have argc default parameters, # which are found below TOS. # * first come positional args in the order they are given in the source, # * next come the keyword args in the order they given in the source, # * finally is the code associated with the function (at TOS) # # Note: There is no qualified name at TOS # MAKE_CLOSURE adds an additional closure slot # In Python 3.6 stack entries change again. I understand # 3.7 changes some of those changes. Yes, it is hard to follow # and I am sure I haven't been able to keep up. # Thank you, Python. def build_param(ast, name, default, annotation=None): """build parameters: - handle defaults - handle format tuple parameters """ if self.version >= 3.6: value = default else: value = self.traverse(default, indent='') maybe_show_tree_param_default(self.showast, name, value) if annotation: result = '%s: %s=%s' % (name, annotation, value) else: result = '%s=%s' % (name, value) # The below can probably be removed. This is probably # a holdover from days when LOAD_CONST erroneously # didn't handle LOAD_CONST None properly if result[-2:] == '= ': # default was 'LOAD_CONST None' result += 'None' return result # MAKE_FUNCTION_... or MAKE_CLOSURE_... assert node[-1].kind.startswith('MAKE_') # Python 3.3+ adds a qualified name at TOS (-1) # moving down the LOAD_LAMBDA instruction if 3.0 <= self.version <= 3.2: lambda_index = -2 elif 3.03 <= self.version: lambda_index = -3 else: lambda_index = None args_node = node[-1] annotate_dict = {} # Get a list of tree nodes that constitute the values for the "default # parameters"; these are default values that appear before any *, and are # not to be confused with keyword parameters which may appear after *. args_attr = args_node.attr if isinstance(args_attr, tuple) or (self.version >= 3.6 and isinstance(args_attr, list)): if len(args_attr) == 3: pos_args, kw_args, annotate_argc = args_attr else: pos_args, kw_args, annotate_argc, closure = args_attr i = -4 kw_pairs = 0 if closure: # FIXME: fill in i -= 1 if annotate_argc: # Turn into subroutine and DRY with other use annotate_node = node[i] if annotate_node == 'expr': annotate_node = annotate_node[0] annotate_name_node = annotate_node[-1] if annotate_node == 'dict' and annotate_name_node.kind.startswith( 'BUILD_CONST_KEY_MAP'): types = [ self.traverse(n, indent='') for n in annotate_node[:-2] ] names = annotate_node[-2].attr l = len(types) assert l == len(names) for i in range(l): annotate_dict[names[i]] = types[i] pass pass i -= 1 if kw_args: kw_node = node[i] if kw_node == 'expr': kw_node = kw_node[0] if kw_node == 'dict': kw_pairs = kw_node[-1].attr # FIXME: there is probably a better way to classify this. have_kwargs = node[0].kind.startswith( 'kwarg') or node[0] == 'no_kwargs' if len(node) >= 4: lc_index = -4 else: lc_index = -3 pass if (3.0 <= self.version <= 3.3 and len(node) > 2 and node[lambda_index] != 'LOAD_LAMBDA' and (have_kwargs or node[lc_index].kind != 'load_closure')): # Find the index in "node" where the first default # parameter value is located. Note this is in contrast to # key-word arguments, pairs of (name, value), which appear after "*". # "default_values_start" is this location. default_values_start = 0 if node[0] == 'no_kwargs': default_values_start += 1 # args are after kwargs; kwargs are bundled as one node if node[default_values_start] == 'kwargs': default_values_start += 1 defparams = node[default_values_start:default_values_start + args_node.attr[0]] else: if self.version < 3.6: defparams = node[:args_node.attr[0]] kw_args = 0 else: defparams = [] # FIXME: DRY with code below default, kw_args, annotate_argc = args_node.attr[0:3] if default: expr_node = node[0] if node[0] == 'pos_arg': expr_node = expr_node[0] assert expr_node == 'expr', "expecting mkfunc default node to be an expr" if (expr_node[0] == 'LOAD_CONST' and isinstance(expr_node[0].attr, tuple)): defparams = [repr(a) for a in expr_node[0].attr] elif expr_node[0] in frozenset( ('list', 'tuple', 'dict', 'set')): defparams = [ self.traverse(n, indent='') for n in expr_node[0][:-1] ] else: defparams = [] pass else: if self.version < 3.6: defparams = node[:args_node.attr] kw_args = 0 else: default, kw_args, annotate, closure = args_node.attr if default: expr_node = node[0] if node[0] == 'pos_arg': expr_node = expr_node[0] assert expr_node == 'expr', "expecting mkfunc default node to be an expr" if (expr_node[0] == 'LOAD_CONST' and isinstance(expr_node[0].attr, tuple)): defparams = [repr(a) for a in expr_node[0].attr] elif expr_node[0] in frozenset( ('list', 'tuple', 'dict', 'set')): defparams = [ self.traverse(n, indent='') for n in expr_node[0][:-1] ] else: defparams = [] i = -4 kw_pairs = 0 if closure: # FIXME: fill in annotate = node[i] i -= 1 if annotate_argc: # Turn into subroutine and DRY with other use annotate_node = node[i] if annotate_node == 'expr': annotate_node = annotate_node[0] annotate_name_node = annotate_node[-1] if annotate_node == 'dict' and annotate_name_node.kind.startswith( 'BUILD_CONST_KEY_MAP'): types = [ self.traverse(n, indent='') for n in annotate_node[:-2] ] names = annotate_node[-2].attr l = len(types) assert l == len(names) for i in range(l): annotate_dict[names[i]] = types[i] pass pass i -= 1 if kw_args: kw_node = node[i] if kw_node == 'expr': kw_node = kw_node[0] if kw_node == 'dict': kw_pairs = kw_node[-1].attr pass if lambda_index and is_lambda and iscode(node[lambda_index].attr): assert node[lambda_index].kind == 'LOAD_LAMBDA' code = node[lambda_index].attr else: code = code_node.attr assert iscode(code) scanner_code = Code(code, self.scanner, self.currentclass) # add defaults values to parameter names argc = code.co_argcount kwonlyargcount = code.co_kwonlyargcount paramnames = list(scanner_code.co_varnames[:argc]) if kwonlyargcount > 0: kwargs = list(scanner_code.co_varnames[argc:argc + kwonlyargcount]) # defaults are for last n parameters, thus reverse paramnames.reverse() defparams.reverse() try: ast = self.build_ast(scanner_code._tokens, scanner_code._customize, is_lambda=is_lambda, noneInNames=('None' in code.co_names)) except (ParserError, ParserError2) as p: self.write(str(p)) if not self.tolerate_errors: self.ERROR = p return if self.version >= 3.0: if self.version < 3.6: kw_pairs = args_node.attr[1] else: kw_pairs = 0 i = len(paramnames) - len(defparams) # build parameters params = [] if defparams: for i, defparam in enumerate(defparams): params.append( build_param(ast, paramnames[i], defparam, annotate_dict.get(paramnames[i]))) for param in paramnames[i + 1:]: if param in annotate_dict: params.append("%s: %s" % (param, annotate_dict[param])) else: params.append(param) else: for param in paramnames: if param in annotate_dict: params.append("%s: %s" % (param, annotate_dict[param])) else: params.append(param) params.reverse() # back to correct order if code_has_star_arg(code): if self.version > 3.0: star_arg = code.co_varnames[argc + kwonlyargcount] if annotate_dict and star_arg in annotate_dict: params.append('*%s: %s' % (star_arg, annotate_dict[star_arg])) else: params.append('*%s' % star_arg) else: params.append('*%s' % code.co_varnames[argc]) argc += 1 # dump parameter list (with default values) if is_lambda: self.write("lambda ", ", ".join(params)) # If the last statement is None (which is the # same thing as "return None" in a lambda) and the # next to last statement is a "yield". Then we want to # drop the (return) None since that was just put there # to have something to after the yield finishes. # FIXME: this is a bit hoaky and not general if (len(ast) > 1 and self.traverse(ast[-1]) == 'None' and self.traverse(ast[-2]).strip().startswith('yield')): del ast[-1] # Now pick out the expr part of the last statement ast_expr = ast[-1] while ast_expr.kind != 'expr': ast_expr = ast_expr[0] ast[-1] = ast_expr pass else: # FIXME: add annotations here self.write("(", ", ".join(params)) # self.println(indent, '#flags:\t', int(code.co_flags)) # FIXME: Could we remove ends_in_comma and its tests if we just # created a parameter list and at the very end did a join on that? # Unless careful, We might lose line breaks though. ends_in_comma = False if kwonlyargcount > 0: if not (4 & code.co_flags): if argc > 0: self.write(", *, ") else: self.write("*, ") pass ends_in_comma = True else: if argc > 0: self.write(", ") ends_in_comma = True if 3.0 <= self.version <= 3.5: kw_args = [None] * kwonlyargcount kw_nodes = node[0] if kw_nodes == "kwargs": for n in kw_nodes: name = eval(n[0].pattr) default = self.traverse(n[1], indent='') idx = kwargs.index(name) kw_args[idx] = "%s=%s" % (name, default) other_kw = [c == None for c in kw_args] for i, flag in enumerate(other_kw): if flag: kw_args[i] = "%s" % kwargs[i] self.write(', '.join(kw_args)) ends_in_comma = False elif self.version >= 3.6: # argc = node[-1].attr # co = node[-3].attr # argcount = co.co_argcount # kwonlyargcount = co.co_kwonlyargcount free_tup = ann_dict = kw_dict = default_tup = None fn_bits = node[-1].attr index = -4 # Skip over: # MAKE_FUNCTION, # LOAD_CONST qualified name, # LOAD_CONST code object if fn_bits[-1]: free_tup = node[index] index -= 1 if fn_bits[-2]: ann_dict = node[index] index -= 1 if fn_bits[-3]: kw_dict = node[index] index -= 1 if fn_bits[-4]: default_tup = node[index] if kw_dict == 'expr': kw_dict = kw_dict[0] # FIXME: handle free_tup, annotate_dict, and default_tup kw_args = [None] * kwonlyargcount if kw_dict: assert kw_dict == 'dict' defaults = [self.traverse(n, indent='') for n in kw_dict[:-2]] names = eval(self.traverse(kw_dict[-2])) assert len(defaults) == len(names) sep = '' # FIXME: possibly handle line breaks for i, n in enumerate(names): idx = kwargs.index(n) if annotate_dict and n in annotate_dict: t = "%s: %s=%s" % (n, annotate_dict[n], defaults[i]) else: t = "%s=%s" % (n, defaults[i]) kw_args[idx] = t pass pass # handle others other_kw = [c == None for c in kw_args] for i, flag in enumerate(other_kw): if flag: n = kwargs[i] if ann_dict and n in annotate_dict: kw_args[i] = "%s: %s" % (n, annotate_dict[n]) else: kw_args[i] = "%s" % n self.write(', '.join(kw_args)) ends_in_comma = False pass else: if argc == 0: ends_in_comma = True if code_has_star_star_arg(code): if not ends_in_comma: self.write(', ') star_star_arg = code.co_varnames[argc + kwonlyargcount] if annotate_dict and star_star_arg in annotate_dict: self.write('**%s: %s' % (star_star_arg, annotate_dict[star_star_arg])) else: self.write('**%s' % star_star_arg) if is_lambda: self.write(": ") else: self.write(')') if annotate_dict and 'return' in annotate_dict: self.write(' -> %s' % annotate_dict['return']) self.println(":") if len(code.co_consts ) > 0 and code.co_consts[0] is not None and not is_lambda: # ugly # docstring exists, dump it print_docstring(self, self.indent, code.co_consts[0]) scanner_code._tokens = None # save memory assert ast == 'stmts' all_globals = find_all_globals(ast, set()) globals, nonlocals = find_globals_and_nonlocals(ast, set(), set(), code, self.version) for g in sorted((all_globals & self.mod_globs) | globals): self.println(self.indent, 'global ', g) for nl in sorted(nonlocals): self.println(self.indent, 'nonlocal ', nl) self.mod_globs -= all_globals has_none = 'None' in code.co_names rn = has_none and not find_none(ast) self.gen_source(ast, code.co_name, scanner_code._customize, is_lambda=is_lambda, returnNone=rn) scanner_code._tokens = None scanner_code._customize = None # save memory
def disco_loop_asm_format(opc, version, co, real_out, fn_name_map, all_fns): """Produces disassembly in a format more conducive to automatic assembly by producing inner modules before they are used by outer ones. Since this is recusive, we'll use more stack space at runtime. """ if version < 3.0: co = code2compat(co) else: co = code3compat(co) co_name = co.co_name mapped_name = fn_name_map.get(co_name, co_name) new_consts = [] for c in co.co_consts: if iscode(c): if version < 3.0: c_compat = code2compat(c) else: c_compat = code3compat(c) disco_loop_asm_format(opc, version, c_compat, real_out, fn_name_map, all_fns) m = re.match(".* object <(.+)> at", str(c)) if m: basename = m.group(1) if basename != 'module': mapped_name = code_uniquify(basename, c.co_code) c_compat.co_name = mapped_name c_compat.freeze() new_consts.append(c_compat) else: new_consts.append(c) pass co.co_consts = new_consts m = re.match("^<(.+)>$", co.co_name) if m or co_name in all_fns: if co_name in all_fns: basename = co_name else: basename = m.group(1) if basename != 'module': mapped_name = code_uniquify(basename, co.co_code) co_name = mapped_name assert mapped_name not in fn_name_map fn_name_map[mapped_name] = basename co.co_name = mapped_name pass elif co_name in fn_name_map: # FIXME: better would be a hash of the co_code mapped_name = code_uniquify(co_name, co.co_code) fn_name_map[mapped_name] = co_name co.co_name = mapped_name pass co = co.freeze() all_fns.add(co_name) if co.co_name != '<module>' or co.co_filename: real_out.write("\n" + format_code_info(co, version, mapped_name) + "\n") bytecode = Bytecode(co, opc, dup_lines=True) real_out.write(bytecode.dis(asm_format=True) + "\n")
def ingest(self, co, classname=None, code_objects={}, show_asm=None): """ Pick out tokens from an uncompyle6 code object, and transform them, returning a list of uncompyle6 'Token's. The transformations are made to assist the deparsing grammar. Specificially: - various types of LOAD_CONST's are categorized in terms of what they load - COME_FROM instructions are added to assist parsing control structures - MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments Also, when we encounter certain tokens, we add them to a set which will cause custom grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST cause specific rules for the specific number of arguments they take. """ show_asm = self.show_asm if not show_asm else show_asm # show_asm = 'after' if show_asm in ('both', 'before'): bytecode = Bytecode(co, self.opc) for instr in bytecode.get_instructions(co): print(instr._disassemble()) # Container for tokens tokens = [] customize = {} if self.is_pypy: customize['PyPy'] = 1 self.code = array('B', co.co_code) self.build_lines_data(co) self.build_prev_op() bytecode = Bytecode(co, self.opc) # FIXME: put as its own method? # Scan for assertions. Later we will # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'. # 'LOAD_ASSERT' is used in assert statements. self.load_asserts = set() bs = list(bytecode) n = len(bs) for i in range(n): inst = bs[i] # We need to detect the difference between # "raise AssertionError" and "assert" # If we have a JUMP_FORWARD after the # RAISE_VARARGS then we have a "raise" statement # else we have an "assert" statement. if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n: next_inst = bs[i+1] if (next_inst.opname == 'LOAD_GLOBAL' and next_inst.argval == 'AssertionError'): for j in range(i+2, n): raise_inst = bs[j] if raise_inst.opname.startswith('RAISE_VARARGS'): if j+1 >= n or bs[j+1].opname != 'JUMP_FORWARD': self.load_asserts.add(next_inst.offset) pass break pass pass # Get jump targets # Format: {target offset: [jump offsets]} jump_targets = self.find_jump_targets(show_asm) for inst in bytecode: argval = inst.argval if inst.offset in jump_targets: jump_idx = 0 # We want to process COME_FROMs to the same offset to be in *descending* # offset order so we have the larger range or biggest instruction interval # last. (I think they are sorted in increasing order, but for safety # we sort them). That way, specific COME_FROM tags will match up # properly. For example, a "loop" with an "if" nested in it should have the # "loop" tag last so the grammar rule matches that properly. for jump_offset in sorted(jump_targets[inst.offset], reverse=True): come_from_name = 'COME_FROM' opname = self.opName(jump_offset) if opname.startswith('SETUP_'): come_from_type = opname[len('SETUP_'):] come_from_name = 'COME_FROM_%s' % come_from_type pass tokens.append(Token(come_from_name, None, repr(jump_offset), offset='%s_%s' % (inst.offset, jump_idx), has_arg = True, opc=self.opc)) jump_idx += 1 pass pass elif inst.offset in self.else_start: end_offset = self.else_start[inst.offset] tokens.append(Token('ELSE', None, repr(end_offset), offset='%s' % (inst.offset), has_arg = True, opc=self.opc)) pass pattr = inst.argrepr opname = inst.opname op = inst.opcode if opname in ['LOAD_CONST']: const = inst.argval if iscode(const): if const.co_name == '<lambda>': opname = 'LOAD_LAMBDA' elif const.co_name == '<genexpr>': opname = 'LOAD_GENEXPR' elif const.co_name == '<dictcomp>': opname = 'LOAD_DICTCOMP' elif const.co_name == '<setcomp>': opname = 'LOAD_SETCOMP' elif const.co_name == '<listcomp>': opname = 'LOAD_LISTCOMP' # verify() uses 'pattr' for comparison, since 'attr' # now holds Code(const) and thus can not be used # for comparison (todo: think about changing this) # pattr = 'code_object @ 0x%x %s->%s' %\ # (id(const), const.co_filename, const.co_name) pattr = '<code_object ' + const.co_name + '>' else: pattr = const pass elif opname in ('MAKE_FUNCTION', 'MAKE_CLOSURE'): pos_args, name_pair_args, annotate_args = parse_fn_counts(inst.argval) if name_pair_args > 0: opname = '%s_N%d' % (opname, name_pair_args) pass if annotate_args > 0: opname = '%s_A_%d' % (opname, annotate_args) pass opname = '%s_%d' % (opname, pos_args) pattr = ("%d positional, %d keyword pair, %d annotated" % (pos_args, name_pair_args, annotate_args)) tokens.append( Token( type_ = opname, attr = (pos_args, name_pair_args, annotate_args), pattr = pattr, offset = inst.offset, linestart = inst.starts_line, op = op, has_arg = op_has_argument(op, op3), opc = self.opc ) ) continue elif op in self.varargs_ops: pos_args = inst.argval if self.is_pypy and not pos_args and opname == 'BUILD_MAP': opname = 'BUILD_MAP_n' else: opname = '%s_%d' % (opname, pos_args) elif self.is_pypy and opname in ('CALL_METHOD', 'JUMP_IF_NOT_DEBUG'): # The value in the dict is in special cases in semantic actions, such # as CALL_FUNCTION. The value is not used in these cases, so we put # in arbitrary value 0. customize[opname] = 0 elif opname == 'UNPACK_EX': # FIXME: try with scanner and parser by # changing inst.argval before_args = inst.argval & 0xFF after_args = (inst.argval >> 8) & 0xff pattr = "%d before vararg, %d after" % (before_args, after_args) argval = (before_args, after_args) opname = '%s_%d+%d' % (opname, before_args, after_args) elif op == self.opc.JUMP_ABSOLUTE: # Further classify JUMP_ABSOLUTE into backward jumps # which are used in loops, and "CONTINUE" jumps which # may appear in a "continue" statement. The loop-type # and continue-type jumps will help us classify loop # boundaries The continue-type jumps help us get # "continue" statements with would otherwise be turned # into a "pass" statement because JUMPs are sometimes # ignored in rules as just boundary overhead. In # comprehensions we might sometimes classify JUMP_BACK # as CONTINUE, but that's okay since we add a grammar # rule for that. pattr = inst.argval target = self.get_target(inst.offset) if target <= inst.offset: next_opname = self.opname[self.code[inst.offset+3]] if (inst.offset in self.stmts and next_opname not in ('END_FINALLY', 'POP_BLOCK', # Python 3.0 only uses POP_TOP 'POP_TOP') and inst.offset not in self.not_continue): opname = 'CONTINUE' else: opname = 'JUMP_BACK' # FIXME: this is a hack to catch stuff like: # if x: continue # the "continue" is not on a new line. # There are other situations where we don't catch # CONTINUE as well. if tokens[-1].type == 'JUMP_BACK' and tokens[-1].attr <= argval: # intern is used because we are changing the *previous* token tokens[-1].type = intern('CONTINUE') elif op == self.opc.RETURN_VALUE: if inst.offset in self.return_end_ifs: opname = 'RETURN_END_IF' elif inst.offset in self.load_asserts: opname = 'LOAD_ASSERT' tokens.append( Token( type_ = opname, attr = argval, pattr = pattr, offset = inst.offset, linestart = inst.starts_line, op = op, has_arg = (op >= op3.HAVE_ARGUMENT), opc = self.opc ) ) pass if show_asm in ('both', 'after'): for t in tokens: print(t) print() return tokens, customize
def make_function2(self, node, isLambda, nested=1, codeNode=None): """ Dump function defintion, doc string, and function body. This code is specialied for Python 2. """ # FIXME: call make_function3 if we are self.version >= 3.0 # and then simplify the below. def build_param(ast, name, default): """build parameters: - handle defaults - handle format tuple parameters """ # if formal parameter is a tuple, the paramater name # starts with a dot (eg. '.1', '.2') if name.startswith('.'): # replace the name with the tuple-string name = self.get_tuple_parameter(ast, name) pass if default: value = self.traverse(default, indent='') maybe_show_ast_param_default(self.showast, name, value) result = '%s=%s' % (name, value) if result[-2:] == '= ': # default was 'LOAD_CONST None' result += 'None' return result else: return name # MAKE_FUNCTION_... or MAKE_CLOSURE_... assert node[-1].type.startswith('MAKE_') args_node = node[-1] if isinstance(args_node.attr, tuple): # positional args are after kwargs defparams = node[1:args_node.attr[0]+1] pos_args, kw_args, annotate_argc = args_node.attr else: defparams = node[:args_node.attr] kw_args = 0 annotate_argc = 0 pass lambda_index = None if lambda_index and isLambda and iscode(node[lambda_index].attr): assert node[lambda_index].type == 'LOAD_LAMBDA' code = node[lambda_index].attr else: code = codeNode.attr assert iscode(code) code = Code(code, self.scanner, self.currentclass) # add defaults values to parameter names argc = code.co_argcount paramnames = list(code.co_varnames[:argc]) # defaults are for last n parameters, thus reverse paramnames.reverse(); defparams.reverse() try: ast = self.build_ast(code._tokens, code._customize, isLambda = isLambda, noneInNames = ('None' in code.co_names)) except ParserError as p: self.write(str(p)) self.ERROR = p return kw_pairs = args_node.attr[1] if self.version >= 3.0 else 0 indent = self.indent # build parameters params = [build_param(ast, name, default) for name, default in zip_longest(paramnames, defparams, fillvalue=None)] params.reverse() # back to correct order if code_has_star_arg(code): params.append('*%s' % code.co_varnames[argc]) argc += 1 # dump parameter list (with default values) if isLambda: self.write("lambda ", ", ".join(params)) else: self.write("(", ", ".join(params)) if kw_args > 0: if not (4 & code.co_flags): if argc > 0: self.write(", *, ") else: self.write("*, ") pass else: self.write(", ") for n in node: if n == 'pos_arg': continue else: self.preorder(n) break pass if code_has_star_star_arg(code): if argc > 0: self.write(', ') self.write('**%s' % code.co_varnames[argc + kw_pairs]) if isLambda: self.write(": ") else: self.println("):") if len(code.co_consts) > 0 and code.co_consts[0] is not None and not isLambda: # ugly # docstring exists, dump it print_docstring(self, indent, code.co_consts[0]) code._tokens = None # save memory assert ast == 'stmts' all_globals = find_all_globals(ast, set()) for g in ((all_globals & self.mod_globs) | find_globals(ast, set())): self.println(self.indent, 'global ', g) self.mod_globs -= all_globals has_none = 'None' in code.co_names rn = has_none and not find_none(ast) self.gen_source(ast, code.co_name, code._customize, isLambda=isLambda, returnNone=rn) code._tokens = None; code._customize = None # save memory
def decompile(bytecode_version, co, out=None, showasm=None, showast=False, timestamp=None, showgrammar=False, code_objects={}, source_size=None, is_pypy=None, magic_int=None, mapstream=None, do_fragments=False): """ ingests and deparses a given code block 'co' if `bytecode_version` is None, use the current Python intepreter version. Caller is responsible for closing `out` and `mapstream` """ if bytecode_version is None: bytecode_version = sysinfo2float() # store final output stream for case of error real_out = out or sys.stdout def write(s): s += '\n' real_out.write(s) assert iscode(co) co_pypy_str = 'PyPy ' if is_pypy else '' run_pypy_str = 'PyPy ' if IS_PYPY else '' sys_version_lines = sys.version.split('\n') write('# decompyle3 version %s\n' '# %sPython bytecode %s%s\n# Decompiled from: %sPython %s' % (VERSION, co_pypy_str, bytecode_version, " (%s)" % str(magic_int) if magic_int else "", run_pypy_str, '\n# '.join(sys_version_lines))) if co.co_filename: write('# Embedded file name: %s' % co.co_filename, ) if timestamp: write('# Compiled at: %s' % datetime.datetime.fromtimestamp(timestamp)) if source_size: write('# Size of source mod 2**32: %d bytes' % source_size) debug_opts = {'asm': showasm, 'ast': showast, 'grammar': showgrammar} try: if mapstream: if isinstance(mapstream, str): mapstream = _get_outstream(mapstream) deparsed = deparse_code_with_map( bytecode_version, co, out, showasm, showast, showgrammar, code_objects=code_objects, is_pypy=is_pypy, ) header_count = 3 + len(sys_version_lines) linemap = [(line_no, deparsed.source_linemap[line_no] + header_count) for line_no in sorted(deparsed.source_linemap.keys())] mapstream.write("\n\n# %s\n" % linemap) else: if do_fragments: deparse_fn = code_deparse_fragments else: deparse_fn = code_deparse deparsed = deparse_fn(co, out, bytecode_version, debug_opts=debug_opts, is_pypy=is_pypy) pass return deparsed except pysource.SourceWalkerError as e: # deparsing failed raise pysource.SourceWalkerError(str(e))
def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, name='', ignore_code=False): """ Compare two code-objects. This is the main part of this module. """ # print code_obj1, type(code_obj2) assert iscode(code_obj1), \ "cmp_code_object first object type is %s, not code" % type(code_obj1) assert iscode(code_obj2), \ "cmp_code_object second object type is %s, not code" % type(code_obj2) # print dir(code_obj1) if isinstance(code_obj1, object): # new style classes (Python 2.2) # assume _both_ code objects to be new stle classes assert dir(code_obj1) == dir(code_obj2) else: # old style classes assert dir(code_obj1) == code_obj1.__members__ assert dir(code_obj2) == code_obj2.__members__ assert code_obj1.__members__ == code_obj2.__members__ if name == '__main__': name = code_obj1.co_name else: name = '%s.%s' % (name, code_obj1.co_name) if name == '.?': name = '__main__' if isinstance(code_obj1, object) and code_equal(code_obj1, code_obj2): # use the new style code-classes' __cmp__ method, which # should be faster and more sophisticated # if this compare fails, we use the old routine to # find out, what exactly is nor equal # if this compare succeds, simply return # return pass if isinstance(code_obj1, object): members = [x for x in dir(code_obj1) if x.startswith('co_')] else: members = dir(code_obj1) members.sort() # ; members.reverse() tokens1 = None for member in members: if member in __IGNORE_CODE_MEMBERS__ or ignore_code: pass elif member == 'co_code' and not ignore_code: if version == 2.3: import uncompyle6.scanners.scanner23 as scan scanner = scan.Scanner23(show_asm=False) elif version == 2.4: import uncompyle6.scanners.scanner24 as scan scanner = scan.Scanner24(show_asm=False) elif version == 2.5: import uncompyle6.scanners.scanner25 as scan scanner = scan.Scanner25(show_asm=False) elif version == 2.6: import uncompyle6.scanners.scanner26 as scan scanner = scan.Scanner26(show_asm=False) elif version == 2.7: if is_pypy: import uncompyle6.scanners.pypy27 as scan scanner = scan.ScannerPyPy27(show_asm=False) else: import uncompyle6.scanners.scanner27 as scan scanner = scan.Scanner27() elif version == 3.0: import uncompyle6.scanners.scanner30 as scan scanner = scan.Scanner30() elif version == 3.1: import uncompyle6.scanners.scanner32 as scan scanner = scan.Scanner32() elif version == 3.2: if is_pypy: import uncompyle6.scanners.pypy32 as scan scanner = scan.ScannerPyPy32() else: import uncompyle6.scanners.scanner32 as scan scanner = scan.Scanner32() elif version == 3.3: import uncompyle6.scanners.scanner33 as scan scanner = scan.Scanner33() elif version == 3.4: import uncompyle6.scanners.scanner34 as scan scanner = scan.Scanner34() elif version == 3.5: import uncompyle6.scanners.scanner35 as scan scanner = scan.Scanner35() elif version == 3.6: import uncompyle6.scanners.scanner36 as scan scanner = scan.Scanner36() global JUMP_OPS JUMP_OPS = list(scan.JUMP_OPS) + ['JUMP_BACK'] # use changed Token class # We (re)set this here to save exception handling, # which would get confusing. scanner.setTokenClass(Token) try: # ingest both code-objects tokens1, customize = scanner.ingest(code_obj1) del customize # save memory tokens2, customize = scanner.ingest(code_obj2) del customize # save memory finally: scanner.resetTokenClass() # restore Token class targets1 = dis.findlabels(code_obj1.co_code) tokens1 = [t for t in tokens1 if t.kind != 'COME_FROM'] tokens2 = [t for t in tokens2 if t.kind != 'COME_FROM'] i1 = 0 i2 = 0 offset_map = {} check_jumps = {} while i1 < len(tokens1): if i2 >= len(tokens2): if len(tokens1) == len(tokens2) + 2 \ and tokens1[-1].kind == 'RETURN_VALUE' \ and tokens1[-2].kind == 'LOAD_CONST' \ and tokens1[-2].pattr is None \ and tokens1[-3].kind == 'RETURN_VALUE': break else: raise CmpErrorCodeLen(name, tokens1, tokens2) offset_map[tokens1[i1].offset] = tokens2[i2].offset for idx1, idx2, offset2 in check_jumps.get( tokens1[i1].offset, []): if offset2 != tokens2[i2].offset: raise CmpErrorCode(name, tokens1[idx1].offset, tokens1[idx1], tokens2[idx2], tokens1, tokens2) if tokens1[i1].kind != tokens2[i2].kind: if tokens1[i1].kind == 'LOAD_CONST' == tokens2[i2].kind: i = 1 while tokens1[i1 + i].kind == 'LOAD_CONST': i += 1 if tokens1[i1+i].kind.startswith(('BUILD_TUPLE', 'BUILD_LIST')) \ and i == int(tokens1[i1+i].kind.split('_')[-1]): t = tuple( [elem.pattr for elem in tokens1[i1:i1 + i]]) if t != tokens2[i2].pattr: raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], tokens2[i2], tokens1, tokens2) i1 += i + 1 i2 += 1 continue elif i == 2 and tokens1[ i1 + i].kind == 'ROT_TWO' and tokens2[ i2 + 1].kind == 'UNPACK_SEQUENCE_2': i1 += 3 i2 += 2 continue elif i == 2 and tokens1[i1 + i].kind in BIN_OP_FUNCS: f = BIN_OP_FUNCS[tokens1[i1 + i].kind] if f(tokens1[i1].pattr, tokens1[i1 + 1].pattr) == tokens2[i2].pattr: i1 += 3 i2 += 1 continue elif tokens1[i1].kind == 'UNARY_NOT': if tokens2[i2].kind == 'POP_JUMP_IF_TRUE': if tokens1[i1 + 1].kind == 'POP_JUMP_IF_FALSE': i1 += 2 i2 += 1 continue elif tokens2[i2].kind == 'POP_JUMP_IF_FALSE': if tokens1[i1 + 1].kind == 'POP_JUMP_IF_TRUE': i1 += 2 i2 += 1 continue elif tokens1[i1].kind in ('JUMP_FORWARD', 'JUMP_BACK') \ and tokens1[i1-1].kind == 'RETURN_VALUE' \ and tokens2[i2-1].kind in ('RETURN_VALUE', 'RETURN_END_IF') \ and int(tokens1[i1].offset) not in targets1: i1 += 1 continue elif tokens1[i1].kind == 'JUMP_BACK' and tokens2[ i2].kind == 'CONTINUE': # FIXME: should make sure that offset is inside loop, not outside of it i1 += 2 i2 += 2 continue elif tokens1[i1].kind == 'JUMP_FORWARD' and tokens2[i2].kind == 'JUMP_BACK' \ and tokens1[i1+1].kind == 'JUMP_BACK' and tokens2[i2+1].kind == 'JUMP_BACK' \ and int(tokens1[i1].pattr) == int(tokens1[i1].offset) + 3: if int(tokens1[i1].pattr) == int(tokens1[i1 + 1].offset): i1 += 2 i2 += 2 continue elif tokens1[i1].kind == 'LOAD_NAME' and tokens2[i2].kind == 'LOAD_CONST' \ and tokens1[i1].pattr == 'None' and tokens2[i2].pattr is None: pass elif tokens1[i1].kind == 'LOAD_GLOBAL' and tokens2[i2].kind == 'LOAD_NAME' \ and tokens1[i1].pattr == tokens2[i2].pattr: pass elif tokens1[i1].kind == 'LOAD_ASSERT' and tokens2[i2].kind == 'LOAD_NAME' \ and tokens1[i1].pattr == tokens2[i2].pattr: pass elif (tokens1[i1].kind == 'RETURN_VALUE' and tokens2[i2].kind == 'RETURN_END_IF'): pass elif (tokens1[i1].kind == 'BUILD_TUPLE_0' and tokens2[i2].pattr == ()): pass else: raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], tokens2[i2], tokens1, tokens2) elif tokens1[i1].kind in JUMP_OPS and tokens1[ i1].pattr != tokens2[i2].pattr: if tokens1[i1].kind == 'JUMP_BACK': dest1 = int(tokens1[i1].pattr) dest2 = int(tokens2[i2].pattr) if offset_map[dest1] != dest2: raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], tokens2[i2], tokens1, tokens2) else: # import pdb; pdb.set_trace() try: dest1 = int(tokens1[i1].pattr) if dest1 in check_jumps: check_jumps[dest1].append((i1, i2, dest2)) else: check_jumps[dest1] = [(i1, i2, dest2)] except: pass i1 += 1 i2 += 1 del tokens1, tokens2 # save memory elif member == 'co_consts': # partial optimization can make the co_consts look different, # so we'll just compare the code consts codes1 = (c for c in code_obj1.co_consts if hasattr(c, 'co_consts')) codes2 = (c for c in code_obj2.co_consts if hasattr(c, 'co_consts')) for c1, c2 in zip(codes1, codes2): cmp_code_objects(version, is_pypy, c1, c2, name=name) elif member == 'co_flags': flags1 = code_obj1.co_flags flags2 = code_obj2.co_flags if is_pypy: # For PYPY for now we don't care about PYPY_SOURCE_IS_UTF8: flags2 &= ~0x0100 # PYPY_SOURCE_IS_UTF8 # We also don't care about COROUTINE or GENERATOR for now flags1 &= ~0x000000a0 flags2 &= ~0x000000a0 if flags1 != flags2: raise CmpErrorMember(name, 'co_flags', pretty_flags(flags1), pretty_flags(flags2)) else: # all other members must be equal if getattr(code_obj1, member) != getattr(code_obj2, member): raise CmpErrorMember(name, member, getattr(code_obj1, member), getattr(code_obj2, member))
def n_classdef3(node): """Handle "classdef" nonterminal for 3.0 >= version 3.0 <= 3.5 """ assert 3.0 <= self.version <= 3.5 # class definition ('class X(A,B,C):') cclass = self.currentclass # Pick out various needed bits of information # * class_name - the name of the class # * subclass_info - the parameters to the class e.g. # class Foo(bar, baz) # ---------- # * subclass_code - the code for the subclass body subclass_info = None if node == "classdefdeco2": if self.version <= 3.3: class_name = node[2][0].attr else: class_name = node[1][2].attr build_class = node else: build_class = node[0] class_name = node[1][0].attr build_class = node[0] assert "mkfunc" == build_class[1] mkfunc = build_class[1] if mkfunc[0] in ("kwargs", "no_kwargs"): if 3.0 <= self.version <= 3.2: for n in mkfunc: if hasattr(n, "attr") and iscode(n.attr): subclass_code = n.attr break elif n == "expr": subclass_code = n[0].attr pass pass else: for n in mkfunc: if hasattr(n, "attr") and iscode(n.attr): subclass_code = n.attr break pass pass if node == "classdefdeco2": subclass_info = node else: subclass_info = node[0] elif build_class[1][0] == "load_closure": # Python 3 with closures not functions load_closure = build_class[1] if hasattr(load_closure[-3], "attr"): # Python 3.3 classes with closures work like this. # Note have to test before 3.2 case because # index -2 also has an attr. subclass_code = find_code_node(load_closure, -3).attr elif hasattr(load_closure[-2], "attr"): # Python 3.2 works like this subclass_code = find_code_node(load_closure, -2).attr else: raise "Internal Error n_classdef: cannot find class body" if hasattr(build_class[3], "__len__"): if not subclass_info: subclass_info = build_class[3] elif hasattr(build_class[2], "__len__"): subclass_info = build_class[2] else: raise "Internal Error n_classdef: cannot superclass name" elif not subclass_info: if mkfunc[0] in ("no_kwargs", "kwargs"): subclass_code = mkfunc[1].attr else: subclass_code = mkfunc[0].attr if node == "classdefdeco2": subclass_info = node else: subclass_info = node[0] if node == "classdefdeco2": self.write("\n") else: self.write("\n\n") self.currentclass = str(class_name) self.write(self.indent, "class ", self.currentclass) self.print_super_classes3(subclass_info) self.println(":") # class body self.indent_more() self.build_class(subclass_code) self.indent_less() self.currentclass = cclass if len(self.param_stack) > 1: self.write("\n\n") else: self.write("\n\n\n") self.prune()
def make_function3(self, node, isLambda, nested=1, codeNode=None): """Dump function definition, doc string, and function body.""" # FIXME: call make_function3 if we are self.version >= 3.0 # and then simplify the below. def build_param(ast, name, default): """build parameters: - handle defaults - handle format tuple parameters """ if default: value = self.traverse(default, indent='') maybe_show_ast_param_default(self.showast, name, value) result = '%s=%s' % (name, value) if result[-2:] == '= ': # default was 'LOAD_CONST None' result += 'None' return result else: return name # MAKE_FUNCTION_... or MAKE_CLOSURE_... assert node[-1].type.startswith('MAKE_') args_node = node[-1] if isinstance(args_node.attr, tuple): if self.version <= 3.3 and len(node) > 2 and node[-3] != 'LOAD_LAMBDA': # positional args are after kwargs defparams = node[1:args_node.attr[0]+1] else: # positional args are before kwargs defparams = node[:args_node.attr[0]] pos_args, kw_args, annotate_argc = args_node.attr else: defparams = node[:args_node.attr] kw_args = 0 pass if 3.0 <= self.version <= 3.2: lambda_index = -2 elif 3.03 <= self.version: lambda_index = -3 else: lambda_index = None if lambda_index and isLambda and iscode(node[lambda_index].attr): assert node[lambda_index].type == 'LOAD_LAMBDA' code = node[lambda_index].attr else: code = codeNode.attr assert iscode(code) code = Code(code, self.scanner, self.currentclass) # add defaults values to parameter names argc = code.co_argcount paramnames = list(code.co_varnames[:argc]) # defaults are for last n parameters, thus reverse if not 3.0 <= self.version <= 3.2: paramnames.reverse(); defparams.reverse() try: ast = self.build_ast(code._tokens, code._customize, isLambda = isLambda, noneInNames = ('None' in code.co_names)) except ParserError as p: self.write(str(p)) self.ERROR = p return kw_pairs = args_node.attr[1] if self.version >= 3.0 else 0 indent = self.indent # build parameters if self.version != 3.2: params = [build_param(ast, name, default) for name, default in zip_longest(paramnames, defparams, fillvalue=None)] params.reverse() # back to correct order if code_has_star_arg(code): if self.version > 3.0: params.append('*%s' % code.co_varnames[argc + kw_pairs]) else: params.append('*%s' % code.co_varnames[argc]) argc += 1 # dump parameter list (with default values) if isLambda: self.write("lambda ", ", ".join(params)) else: self.write("(", ", ".join(params)) # self.println(indent, '#flags:\t', int(code.co_flags)) else: if isLambda: self.write("lambda ") else: self.write("(") pass last_line = self.f.getvalue().split("\n")[-1] l = len(last_line) indent = ' ' * l line_number = self.line_number if code_has_star_arg(code): self.write('*%s' % code.co_varnames[argc + kw_pairs]) argc += 1 i = len(paramnames) - len(defparams) self.write(", ".join(paramnames[:i])) suffix = ', ' if i > 0 else '' for n in node: if n == 'pos_arg': self.write(suffix) self.write(paramnames[i] + '=') i += 1 self.preorder(n) if (line_number != self.line_number): suffix = ",\n" + indent line_number = self.line_number else: suffix = ', ' if kw_args > 0: if not (4 & code.co_flags): if argc > 0: self.write(", *, ") else: self.write("*, ") pass else: self.write(", ") if not 3.0 <= self.version <= 3.2: for n in node: if n == 'pos_arg': continue elif self.version >= 3.4 and not (n.type in ('kwargs', 'kwarg')): continue else: self.preorder(n) break else: kwargs = node[0] last = len(kwargs)-1 i = 0 for n in node[0]: if n == 'kwarg': self.write('%s=' % n[0].pattr) self.preorder(n[1]) if i < last: self.write(', ') i += 1 pass pass pass pass if code_has_star_star_arg(code): if argc > 0: self.write(', ') self.write('**%s' % code.co_varnames[argc + kw_pairs]) if isLambda: self.write(": ") else: self.println("):") if len(code.co_consts) > 0 and code.co_consts[0] is not None and not isLambda: # ugly # docstring exists, dump it print_docstring(self, self.indent, code.co_consts[0]) code._tokens = None # save memory assert ast == 'stmts' all_globals = find_all_globals(ast, set()) for g in ((all_globals & self.mod_globs) | find_globals(ast, set())): self.println(self.indent, 'global ', g) self.mod_globs -= all_globals has_none = 'None' in code.co_names rn = has_none and not find_none(ast) self.gen_source(ast, code.co_name, code._customize, isLambda=isLambda, returnNone=rn) code._tokens = None; code._customize = None # save memory