def is_async_fn(node): code_node = node[0][0] for n in node[0]: if hasattr(n, "attr") and iscode(n.attr): code_node = n break pass pass is_code = hasattr(code_node, "attr") and iscode(code_node.attr) return is_code and co_flags_is_async(code_node.attr.co_flags)
def disco_loop(disasm, queue, real_out): while len(queue) > 0: co = queue.popleft() if co.co_name != "<module>": real_out.write("\n# %s line %d of %s\n" % (co.co_name, co.co_firstlineno, co.co_filename)) tokens, customize = disasm(co) for t in tokens: if iscode(t.pattr): queue.append(t.pattr) elif iscode(t.attr): queue.append(t.attr) real_out.write(t) pass pass
def python_parser( version: str, co, out=sys.stdout, showasm=False, parser_debug=PARSER_DEFAULT_DEBUG, is_pypy=False, is_lambda=False, ): """ Parse a code object to an abstract syntax tree representation. :param version: The python version this code is from as a float, for example 2.6, 2.7, 3.2, 3.3, 3.4, 3.5 etc. :param co: The code object to parse. :param out: File like object to write the output to. :param showasm: Flag which determines whether the disassembled and ingested code is written to sys.stdout or not. :param parser_debug: dict containing debug flags for the spark parser. :return: Abstract syntax tree representation of the code object. """ assert iscode(co) from decompyle3.scanner import get_scanner scanner = get_scanner(version, is_pypy) tokens, customize = scanner.ingest(co) maybe_show_asm(showasm, tokens) # For heavy grammar debugging # parser_debug = {'rules': True, 'transition': True, 'reduce' : True, # 'showstack': 'full'} p = get_python_parser(version, parser_debug) return parse(p, tokens, customize, is_lambda)
def find_code_node(node, start): for i in range(-start, len(node) + 1): if node[-i].kind == "LOAD_CODE": code_node = node[-i] assert iscode(code_node.attr) return code_node pass assert False, "did not find code node starting at %d in %s" % (start, node)
def _recursively_extract_all_code_objects(co) -> List[bytes]: """Co is a code object, with potentially nested code objects.""" co_code_objects: List[bytes] = [co.co_code] search_list: List[Union[Any]] = list(co.co_consts) co_obj: Any for co_obj in search_list: if iscode(co_obj): if co_obj not in co_code_objects: co_code_objects.append(co_obj.co_code) search_list.extend(co_obj.co_consts) return co_code_objects
def number_loop(queue, mappings, opc): while len(queue) > 0: code1 = queue.popleft() code2 = queue.popleft() assert code1.co_name == code2.co_name linestarts_orig = findlinestarts(code1) linestarts_uncompiled = list(findlinestarts(code2)) mappings += [ [line, offset2line(offset, linestarts_uncompiled)] for offset, line in linestarts_orig ] bytecode1 = Bytecode(code1, opc) bytecode2 = Bytecode(code2, opc) instr2s = bytecode2.get_instructions(code2) seen = set([code1.co_name]) for instr in bytecode1.get_instructions(code1): next_code1 = None if iscode(instr.argval): next_code1 = instr.argval if next_code1: next_code2 = None while not next_code2: try: instr2 = next(instr2s) if iscode(instr2.argval): next_code2 = instr2.argval pass except StopIteration: break pass if next_code2: assert next_code1.co_name == next_code2.co_name if next_code1.co_name not in seen: seen.add(next_code1.co_name) queue.append(next_code1) queue.append(next_code2) pass pass pass pass
def disco(version, co, out=None, is_pypy=False): """ diassembles and deparses a given code block 'co' """ assert iscode(co) # store final output stream for case of error real_out = out or sys.stdout print("# Python %s" % version, file=real_out) if co.co_filename: print("# Embedded file name: %s" % co.co_filename, file=real_out) scanner = get_scanner(version, is_pypy=is_pypy) queue = deque([co]) disco_loop(scanner.ingest, queue, real_out)
def _build_opcode_index(co_code_objects, HAVE_ARGUMENT=90, version: str = None) -> List[int]: """Build a list of opcodes contained within the list of co_code objects.""" # Helpful for learning about opcode + arg length: # https://laike9m.com/blog/demystifying-extended_arg,124/ if iscode(co_code_objects): co_code_objects: List[bytes] = [co_code_objects] opcode_index: List[int] = [] co_code: bytes for co_code in co_code_objects: i: int = 0 while i < len(co_code): incrementer: int = 1 opcode: int = co_code[i] if opcode >= HAVE_ARGUMENT: incrementer = 3 opcode_index.append(opcode) if version and float(version[:3]) >= 3.6: # After 3.6 all opcodes are two bytes, and the second byte # is empty if the opcode doesn't take an argument. incrementer = 2 i += incrementer return opcode_index
def code_deparse_align( co, out=sys.stderr, version=None, is_pypy=None, debug_opts=DEFAULT_DEBUG_OPTS, code_objects={}, compile_mode="exec", ): """ ingests and deparses a given code block 'co' """ assert iscode(co) if version is None: version = float(sys.version[0:3]) if is_pypy is None: is_pypy = IS_PYPY # store final output stream for case of error scanner = get_scanner(version, is_pypy=is_pypy) tokens, customize = scanner.ingest(co, code_objects=code_objects) show_asm = debug_opts.get("asm", None) maybe_show_asm(show_asm, tokens) debug_parser = dict(PARSER_DEFAULT_DEBUG) show_grammar = debug_opts.get("grammar", None) show_grammar = debug_opts.get("grammar", None) if show_grammar: debug_parser["reduce"] = show_grammar debug_parser["errorstack"] = True # Build a parse tree from tokenized and massaged disassembly. show_ast = debug_opts.get("ast", None) deparsed = AligningWalker( version, scanner, out, showast=show_ast, debug_parser=debug_parser, compile_mode=compile_mode, is_pypy=is_pypy, ) isTopLevel = co.co_name == "<module>" deparsed.ast = deparsed.build_ast(tokens, customize, isTopLevel=isTopLevel) assert deparsed.ast == "stmts", "Should have parsed grammar start" del tokens # save memory deparsed.mod_globs = find_globals(deparsed.ast, set()) # convert leading '__doc__ = "..." into doc string try: if deparsed.ast[0][0] == ASSIGN_DOC_STRING(co.co_consts[0]): deparsed.print_docstring("", co.co_consts[0]) del deparsed.ast[0] if deparsed.ast[-1] == RETURN_NONE: deparsed.ast.pop() # remove last node # todo: if empty, add 'pass' except: pass # What we've been waiting for: Generate Python source from the parse tree! deparsed.gen_source(deparsed.ast, co.co_name, customize) for g in sorted(deparsed.mod_globs): deparsed.write("# global %s ## Warning: Unused global\n" % g) if deparsed.ERROR: raise SourceWalkerError("Deparsing stopped due to parse error") return deparsed
def diff_opcode(code_standard: CodeType, code_remapped: CodeType, version: str = None) -> Dict[int, Dict[int, int]]: """Calculate remapped opcodes from two Code objects of the same sourcecode. Parameters ---------- code_standard : Code (xdis.CodeX or types.CodeType) The standard-opcode Code object code_remapped : Code (xdis.CodeX or types.CodeType) The remapped-opcode Code object version : str, optional The Python version that marshaled the former two arguments. Used for figuring out what operations push arguments to the stack. Returns ------- Dict[int, Dict[int, int]] A dictionary of original_opcode to Dict[replacement_opcode:replacement_count]. replacement_opcode is an opcode that was seen in place of original_opcode, and the replacement_count is the amount of times it was seen replacing the original_opcode throughout all the bytecode that was analyzed. Raises ------ RuntimeError Args aren't correct type or differ in total opcode count too much. """ def _recursively_extract_all_code_objects(co) -> List[bytes]: """Co is a code object, with potentially nested code objects.""" co_code_objects: List[bytes] = [co.co_code] search_list: List[Union[Any]] = list(co.co_consts) co_obj: Any for co_obj in search_list: if iscode(co_obj): if co_obj not in co_code_objects: co_code_objects.append(co_obj.co_code) search_list.extend(co_obj.co_consts) return co_code_objects def _build_opcode_index(co_code_objects, HAVE_ARGUMENT=90, version: str = None) -> List[int]: """Build a list of opcodes contained within the list of co_code objects.""" # Helpful for learning about opcode + arg length: # https://laike9m.com/blog/demystifying-extended_arg,124/ if iscode(co_code_objects): co_code_objects: List[bytes] = [co_code_objects] opcode_index: List[int] = [] co_code: bytes for co_code in co_code_objects: i: int = 0 while i < len(co_code): incrementer: int = 1 opcode: int = co_code[i] if opcode >= HAVE_ARGUMENT: incrementer = 3 opcode_index.append(opcode) if version and float(version[:3]) >= 3.6: # After 3.6 all opcodes are two bytes, and the second byte # is empty if the opcode doesn't take an argument. incrementer = 2 i += incrementer return opcode_index if not iscode(code_standard) or not iscode(code_remapped): raise RuntimeError( "diff_opcode requires two Code objects as arguments") HAVE_ARGUMENT: int = 90 if version: try: xdis_opcode: ModuleType = xdis.main.get_opcode( version, is_pypy=("pypy" in version)) except TypeError: logger.warning( "[!] Couldn't retrieve version {version}'s opcodes from xdis.") else: HAVE_ARGUMENT = xdis_opcode.HAVE_ARGUMENT standard_code_objects: List[bytes] = _recursively_extract_all_code_objects( code_standard) remapped_code_objects: List[bytes] = _recursively_extract_all_code_objects( code_remapped) standard_opcodes_list: List[int] = _build_opcode_index( standard_code_objects, HAVE_ARGUMENT, version=version) remapped_opcodes_list: List[int] = _build_opcode_index( remapped_code_objects, HAVE_ARGUMENT, version=version) if abs(len(standard_opcodes_list) - len(remapped_opcodes_list)): # This is to prevent cases where files are being compared that don't # share source code raise RuntimeError( "The two co_code objects differ in length and therefore cannot do a comparison of the opcodes." ) i: int remappings: Dict[int, Dict[int, int]] = {} for i, remapped_opcode in enumerate(remapped_opcodes_list): if standard_opcodes_list[i] in remappings: existing_remap_options: Dict[int, int] = remappings[ standard_opcodes_list[i]] if remapped_opcode in existing_remap_options: existing_remap_options[remapped_opcode] += 1 else: existing_remap_options[remapped_opcode] = 1 else: remappings[standard_opcodes_list[i]] = {remapped_opcode: 1} return remappings
def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify, name=""): """ Compare two code-objects. This is the main part of this module. """ # print code_obj1, type(code_obj2) assert iscode( code_obj1 ), "cmp_code_object first object type is %s, not code" % type(code_obj1) assert iscode( code_obj2 ), "cmp_code_object second object type is %s, not code" % type(code_obj2) # print dir(code_obj1) if isinstance(code_obj1, object): # new style classes (Python 2.2) # assume _both_ code objects to be new stle classes assert dir(code_obj1) == dir(code_obj2) else: # old style classes assert dir(code_obj1) == code_obj1.__members__ assert dir(code_obj2) == code_obj2.__members__ assert code_obj1.__members__ == code_obj2.__members__ if name == "__main__": name = code_obj1.co_name else: name = "%s.%s" % (name, code_obj1.co_name) if name == ".?": name = "__main__" if isinstance(code_obj1, object) and code_equal(code_obj1, code_obj2): # use the new style code-classes' __cmp__ method, which # should be faster and more sophisticated # if this compare fails, we use the old routine to # find out, what exactly is nor equal # if this compare succeds, simply return # return pass if isinstance(code_obj1, object): members = [x for x in dir(code_obj1) if x.startswith("co_")] else: members = dir(code_obj1) members.sort() # ; members.reverse() tokens1 = None for member in members: if member in __IGNORE_CODE_MEMBERS__ or verify != "verify": pass elif member == "co_code": if verify != "strong": continue scanner = get_scanner(version, is_pypy, show_asm=False) global JUMP_OPS JUMP_OPS = list(scan.JUMP_OPS) + ["JUMP_BACK"] # use changed Token class # We (re)set this here to save exception handling, # which would get confusing. scanner.setTokenClass(Token) try: # ingest both code-objects tokens1, customize = scanner.ingest(code_obj1) del customize # save memory tokens2, customize = scanner.ingest(code_obj2) del customize # save memory finally: scanner.resetTokenClass() # restore Token class targets1 = dis.findlabels(code_obj1.co_code) tokens1 = [t for t in tokens1 if t.kind != "COME_FROM"] tokens2 = [t for t in tokens2 if t.kind != "COME_FROM"] i1 = 0 i2 = 0 offset_map = {} check_jumps = {} while i1 < len(tokens1): if i2 >= len(tokens2): if (len(tokens1) == len(tokens2) + 2 and tokens1[-1].kind == "RETURN_VALUE" and tokens1[-2].kind == "LOAD_CONST" and tokens1[-2].pattr is None and tokens1[-3].kind == "RETURN_VALUE"): break else: raise CmpErrorCodeLen(name, tokens1, tokens2) offset_map[tokens1[i1].offset] = tokens2[i2].offset for idx1, idx2, offset2 in check_jumps.get( tokens1[i1].offset, []): if offset2 != tokens2[i2].offset: raise CmpErrorCode( name, tokens1[idx1].offset, tokens1[idx1], tokens2[idx2], tokens1, tokens2, ) if tokens1[i1].kind != tokens2[i2].kind: if tokens1[i1].kind == "LOAD_CONST" == tokens2[i2].kind: i = 1 while tokens1[i1 + i].kind == "LOAD_CONST": i += 1 if tokens1[i1 + i].kind.startswith( ("BUILD_TUPLE", "BUILD_LIST")) and i == int( tokens1[i1 + i].kind.split("_")[-1]): t = tuple( [elem.pattr for elem in tokens1[i1:i1 + i]]) if t != tokens2[i2].pattr: raise CmpErrorCode( name, tokens1[i1].offset, tokens1[i1], tokens2[i2], tokens1, tokens2, ) i1 += i + 1 i2 += 1 continue elif (i == 2 and tokens1[i1 + i].kind == "ROT_TWO" and tokens2[i2 + 1].kind == "UNPACK_SEQUENCE_2"): i1 += 3 i2 += 2 continue elif i == 2 and tokens1[i1 + i].kind in BIN_OP_FUNCS: f = BIN_OP_FUNCS[tokens1[i1 + i].kind] if (f(tokens1[i1].pattr, tokens1[i1 + 1].pattr) == tokens2[i2].pattr): i1 += 3 i2 += 1 continue elif tokens1[i1].kind == "UNARY_NOT": if tokens2[i2].kind == "POP_JUMP_IF_TRUE": if tokens1[i1 + 1].kind == "POP_JUMP_IF_FALSE": i1 += 2 i2 += 1 continue elif tokens2[i2].kind == "POP_JUMP_IF_FALSE": if tokens1[i1 + 1].kind == "POP_JUMP_IF_TRUE": i1 += 2 i2 += 1 continue elif (tokens1[i1].kind in ("JUMP_FORWARD", "JUMP_BACK") and tokens1[i1 - 1].kind == "RETURN_VALUE" and tokens2[i2 - 1].kind in ("RETURN_VALUE", "RETURN_END_IF") and int(tokens1[i1].offset) not in targets1): i1 += 1 continue elif (tokens1[i1].kind == "JUMP_BACK" and tokens2[i2].kind == "CONTINUE"): # FIXME: should make sure that offset is inside loop, not outside of it i1 += 2 i2 += 2 continue elif (tokens1[i1].kind == "JUMP_FORWARD" and tokens2[i2].kind == "JUMP_BACK" and tokens1[i1 + 1].kind == "JUMP_BACK" and tokens2[i2 + 1].kind == "JUMP_BACK" and int(tokens1[i1].pattr) == int(tokens1[i1].offset) + 3): if int(tokens1[i1].pattr) == int(tokens1[i1 + 1].offset): i1 += 2 i2 += 2 continue elif (tokens1[i1].kind == "LOAD_NAME" and tokens2[i2].kind == "LOAD_CONST" and tokens1[i1].pattr == "None" and tokens2[i2].pattr is None): pass elif (tokens1[i1].kind == "LOAD_GLOBAL" and tokens2[i2].kind == "LOAD_NAME" and tokens1[i1].pattr == tokens2[i2].pattr): pass elif (tokens1[i1].kind == "LOAD_ASSERT" and tokens2[i2].kind == "LOAD_NAME" and tokens1[i1].pattr == tokens2[i2].pattr): pass elif (tokens1[i1].kind == "RETURN_VALUE" and tokens2[i2].kind == "RETURN_END_IF"): pass elif (tokens1[i1].kind == "BUILD_TUPLE_0" and tokens2[i2].pattr == ()): pass else: raise CmpErrorCode( name, tokens1[i1].offset, tokens1[i1], tokens2[i2], tokens1, tokens2, ) elif (tokens1[i1].kind in JUMP_OPS and tokens1[i1].pattr != tokens2[i2].pattr): if tokens1[i1].kind == "JUMP_BACK": dest1 = int(tokens1[i1].pattr) dest2 = int(tokens2[i2].pattr) if offset_map[dest1] != dest2: raise CmpErrorCode( name, tokens1[i1].offset, tokens1[i1], tokens2[i2], tokens1, tokens2, ) else: # import pdb; pdb.set_trace() try: dest1 = int(tokens1[i1].pattr) if dest1 in check_jumps: check_jumps[dest1].append((i1, i2, dest2)) else: check_jumps[dest1] = [(i1, i2, dest2)] except: pass i1 += 1 i2 += 1 del tokens1, tokens2 # save memory elif member == "co_consts": # partial optimization can make the co_consts look different, # so we'll just compare the code consts codes1 = (c for c in code_obj1.co_consts if hasattr(c, "co_consts")) codes2 = (c for c in code_obj2.co_consts if hasattr(c, "co_consts")) for c1, c2 in zip(codes1, codes2): cmp_code_objects(version, is_pypy, c1, c2, verify, name=name) elif member == "co_flags": flags1 = code_obj1.co_flags flags2 = code_obj2.co_flags if is_pypy: # For PYPY for now we don't care about PYPY_SOURCE_IS_UTF8: flags2 &= ~0x0100 # PYPY_SOURCE_IS_UTF8 # We also don't care about COROUTINE or GENERATOR for now flags1 &= ~0x000000A0 flags2 &= ~0x000000A0 if flags1 != flags2: raise CmpErrorMember( name, "co_flags", pretty_code_flags(flags1), pretty_code_flags(flags2), ) else: # all other members must be equal if getattr(code_obj1, member) != getattr(code_obj2, member): raise CmpErrorMember(name, member, getattr(code_obj1, member), getattr(code_obj2, member))
def make_function2(self, node, is_lambda, nested=1, code_node=None): """ Dump function defintion, doc string, and function body. This code is specialied for Python 2. """ def build_param(ast, name, default): """build parameters: - handle defaults - handle format tuple parameters """ # if formal parameter is a tuple, the paramater name # starts with a dot (eg. '.1', '.2') if name.startswith("."): # replace the name with the tuple-string name = self.get_tuple_parameter(ast, name) pass if default: value = self.traverse(default, indent="") maybe_show_tree_param_default(self.showast, name, value) result = "%s=%s" % (name, value) if result[-2:] == "= ": # default was 'LOAD_CONST None' result += "None" return result else: return name # MAKE_FUNCTION_... or MAKE_CLOSURE_... assert node[-1].kind.startswith("MAKE_") args_node = node[-1] if isinstance(args_node.attr, tuple): # positional args are after kwargs defparams = node[1:args_node.attr[0] + 1] pos_args, kw_args, annotate_argc = args_node.attr else: defparams = node[:args_node.attr] kw_args = 0 pass lambda_index = None if lambda_index and is_lambda and iscode(node[lambda_index].attr): assert node[lambda_index].kind == "LOAD_LAMBDA" code = node[lambda_index].attr else: code = code_node.attr assert iscode(code) code = Code(code, self.scanner, self.currentclass) # add defaults values to parameter names argc = code.co_argcount paramnames = list(code.co_varnames[:argc]) # defaults are for last n parameters, thus reverse paramnames.reverse() defparams.reverse() try: ast = self.build_ast( code._tokens, code._customize, code, is_lambda=is_lambda, noneInNames=("None" in code.co_names), ) except ParserError, p: self.write(str(p)) if not self.tolerate_errors: self.ERROR = p return
def make_function3(self, node, is_lambda, nested=1, code_node=None): """Dump function definition, doc string, and function body in Python version 3.0 and above """ # For Python 3.3, the evaluation stack in MAKE_FUNCTION is: # * default argument objects in positional order # * pairs of name and default argument, with the name just below # the object on the stack, for keyword-only parameters # * parameter annotation objects # * a tuple listing the parameter names for the annotations # (only if there are ony annotation objects) # * the code associated with the function (at TOS1) # * the qualified name of the function (at TOS) # For Python 3.0 .. 3.2 the evaluation stack is: # The function object is defined to have argc default parameters, # which are found below TOS. # * first come positional args in the order they are given in the source, # * next come the keyword args in the order they given in the source, # * finally is the code associated with the function (at TOS) # # Note: There is no qualified name at TOS # MAKE_CLOSURE adds an additional closure slot # In Python 3.6 stack entries change again. I understand # 3.7 changes some of those changes. Yes, it is hard to follow # and I am sure I haven't been able to keep up. # Thank you, Python. def build_param(ast, name, default, annotation=None): """build parameters: - handle defaults - handle format tuple parameters """ value = self.traverse(default, indent="") maybe_show_tree_param_default(self.showast, name, value) if annotation: result = "%s: %s=%s" % (name, annotation, value) else: result = "%s=%s" % (name, value) # The below can probably be removed. This is probably # a holdover from days when LOAD_CONST erroneously # didn't handle LOAD_CONST None properly if result[-2:] == "= ": # default was 'LOAD_CONST None' result += "None" return result # MAKE_FUNCTION_... or MAKE_CLOSURE_... assert node[-1].kind.startswith("MAKE_") # Python 3.3+ adds a qualified name at TOS (-1) # moving down the LOAD_LAMBDA instruction if 3.0 <= self.version <= 3.2: lambda_index = -2 elif 3.03 <= self.version: lambda_index = -3 else: lambda_index = None args_node = node[-1] annotate_dict = {} # Get a list of tree nodes that constitute the values for the "default # parameters"; these are default values that appear before any *, and are # not to be confused with keyword parameters which may appear after *. args_attr = args_node.attr if isinstance(args_attr, tuple): if len(args_attr) == 3: pos_args, kw_args, annotate_argc = args_attr else: pos_args, kw_args, annotate_argc, closure = args_attr i = -4 kw_pairs = 0 if closure: # FIXME: fill in i -= 1 if annotate_argc: # Turn into subroutine and DRY with other use annotate_node = node[i] if annotate_node == "expr": annotate_node = annotate_node[0] annotate_name_node = annotate_node[-1] if annotate_node == "dict" and annotate_name_node.kind.startswith( "BUILD_CONST_KEY_MAP"): types = [ self.traverse(n, indent="") for n in annotate_node[:-2] ] names = annotate_node[-2].attr l = len(types) assert l == len(names) for i in range(l): annotate_dict[names[i]] = types[i] pass pass i -= 1 if kw_args: kw_node = node[i] if kw_node == "expr": kw_node = kw_node[0] if kw_node == "dict": kw_pairs = kw_node[-1].attr # FIXME: there is probably a better way to classify this. have_kwargs = node[0].kind.startswith( "kwarg") or node[0] == "no_kwargs" if len(node) >= 4: lc_index = -4 else: lc_index = -3 pass if len(node) > 2 and (have_kwargs or node[lc_index].kind != "load_closure"): # Find the index in "node" where the first default # parameter value is located. Note this is in contrast to # key-word arguments, pairs of (name, value), which appear after "*". # "default_values_start" is this location. default_values_start = 0 if node[0] == "no_kwargs": default_values_start += 1 # If in a lambda named args are a sequence of kwarg, not bundled. # If not in a lambda, named args are after kwargs; kwargs are bundled as one node. if node[default_values_start] == "kwarg": assert node[lambda_index] == "LOAD_LAMBDA" i = default_values_start defparams = [] while node[i] == "kwarg": defparams.append(node[i][1]) i += 1 else: if node[default_values_start] == "kwargs": default_values_start += 1 defparams = node[default_values_start:default_values_start + args_node.attr[0]] else: defparams = node[:args_node.attr[0]] kw_args = 0 else: defparams = node[:args_node.attr] kw_args = 0 pass if 3.0 <= self.version <= 3.2: lambda_index = -2 elif 3.03 <= self.version: lambda_index = -3 else: lambda_index = None if lambda_index and is_lambda and iscode(node[lambda_index].attr): assert node[lambda_index].kind == "LOAD_LAMBDA" code = node[lambda_index].attr else: code = code_node.attr assert iscode(code) scanner_code = Code(code, self.scanner, self.currentclass) # add defaults values to parameter names argc = code.co_argcount kwonlyargcount = code.co_kwonlyargcount paramnames = list(scanner_code.co_varnames[:argc]) if kwonlyargcount > 0: if is_lambda: kwargs = [] for i in range(kwonlyargcount): paramnames.append(scanner_code.co_varnames[argc + i]) pass else: kwargs = list(scanner_code.co_varnames[argc:argc + kwonlyargcount]) # defaults are for last n parameters when not in a lambda, thus reverse paramnames.reverse() defparams.reverse() try: ast = self.build_ast( scanner_code._tokens, scanner_code._customize, scanner_code, is_lambda=is_lambda, noneInNames=("None" in code.co_names), ) except (ParserError(p), ParserError2(p)): self.write(str(p)) if not self.tolerate_errors: self.ERROR = p return i = len(paramnames) - len(defparams) # build parameters params = [] if defparams: for i, defparam in enumerate(defparams): params.append( build_param(ast, paramnames[i], defparam, annotate_dict.get(paramnames[i]))) for param in paramnames[i + 1:]: if param in annotate_dict: params.append("%s: %s" % (param, annotate_dict[param])) else: params.append(param) else: for param in paramnames: if param in annotate_dict: params.append("%s: %s" % (param, annotate_dict[param])) else: params.append(param) params.reverse() # back to correct order if code_has_star_arg(code): star_arg = code.co_varnames[argc + kwonlyargcount] if annotate_dict and star_arg in annotate_dict: params.append("*%s: %s" % (star_arg, annotate_dict[star_arg])) else: params.append("*%s" % star_arg) pass if is_lambda: params.reverse() if not is_lambda: argc += 1 pass elif is_lambda and kwonlyargcount > 0: params.insert(0, "*") kwonlyargcount = 0 # dump parameter list (with default values) if is_lambda: self.write("lambda ", ", ".join(params)) # If the last statement is None (which is the # same thing as "return None" in a lambda) and the # next to last statement is a "yield". Then we want to # drop the (return) None since that was just put there # to have something to after the yield finishes. # FIXME: this is a bit hoaky and not general if (len(ast) > 1 and self.traverse(ast[-1]) == "None" and self.traverse(ast[-2]).strip().startswith("yield")): del ast[-1] # Now pick out the expr part of the last statement ast_expr = ast[-1] while ast_expr.kind != "expr": ast_expr = ast_expr[0] ast[-1] = ast_expr pass else: # FIXME: add annotations here self.write("(", ", ".join(params)) # self.println(indent, '#flags:\t', int(code.co_flags)) # FIXME: Could we remove ends_in_comma and its tests if we just # created a parameter list and at the very end did a join on that? # Unless careful, We might lose line breaks though. ends_in_comma = False if kwonlyargcount > 0: if not (4 & code.co_flags): if argc > 0: self.write(", *, ") else: self.write("*, ") pass ends_in_comma = True else: if argc > 0 and node[0] != "kwarg": self.write(", ") ends_in_comma = True kw_args = [None] * kwonlyargcount if self.version <= 3.3: kw_nodes = node[0] else: kw_nodes = node[args_node.attr[0]] if kw_nodes == "kwargs": for n in kw_nodes: name = eval(n[0].pattr) default = self.traverse(n[1], indent="") idx = kwargs.index(name) kw_args[idx] = "%s=%s" % (name, default) pass pass # FIXME: something weird is going on and the below # might not be right. On 3.4 kw_nodes != "kwarg" # because of some sort of type mismatch. I think # the test is for versions earlier than 3.3 # on 3.5 if we have "kwarg" we still want to do this. # Perhaps we should be testing that kw_nodes is iterable? if kw_nodes != "kwarg" or self.version == 3.5: other_kw = [c == None for c in kw_args] for i, flag in enumerate(other_kw): if flag: if i < len(kwargs): kw_args[i] = "%s" % kwargs[i] else: del kw_args[i] pass self.write(", ".join(kw_args)) ends_in_comma = False pass pass else: if argc == 0: ends_in_comma = True if code_has_star_star_arg(code): if not ends_in_comma: self.write(", ") star_star_arg = code.co_varnames[argc + kwonlyargcount] if annotate_dict and star_star_arg in annotate_dict: self.write("**%s: %s" % (star_star_arg, annotate_dict[star_star_arg])) else: self.write("**%s" % star_star_arg) if is_lambda: self.write(": ") else: self.write(")") if annotate_dict and "return" in annotate_dict: self.write(" -> %s" % annotate_dict["return"]) self.println(":") if (len(code.co_consts) > 0 and code.co_consts[0] is not None and not is_lambda): # ugly # docstring exists, dump it print_docstring(self, self.indent, code.co_consts[0]) assert ast == "stmts" all_globals = find_all_globals(ast, set()) globals, nonlocals = find_globals_and_nonlocals(ast, set(), set(), code, self.version) for g in sorted((all_globals & self.mod_globs) | globals): self.println(self.indent, "global ", g) for nl in sorted(nonlocals): self.println(self.indent, "nonlocal ", nl) self.mod_globs -= all_globals has_none = "None" in code.co_names rn = has_none and not find_none(ast) self.gen_source(ast, code.co_name, scanner_code._customize, is_lambda=is_lambda, returnNone=rn) # In obscure cases, a function may be a generator but the "yield" # was optimized away. Here, we need to put in unreachable code to # add in "yield" just so that the compiler will mark # the GENERATOR bit of the function. See for example # Python 3.x's test_generator.py test program. if not is_lambda and code.co_flags & CO_GENERATOR: need_bogus_yield = True for token in scanner_code._tokens: if token in ("YIELD_VALUE", "YIELD_FROM"): need_bogus_yield = False break pass if need_bogus_yield: self.template_engine(("%|if False:\n%+%|yield None%-", ), node) scanner_code._tokens = None # save memory scanner_code._customize = None # save memory
def make_function36(self, node, is_lambda, nested=1, code_node=None): """Dump function definition, doc string, and function body in Python version 3.6 and above. """ # MAKE_CLOSURE adds an additional closure slot # In Python 3.6 and above stack change again. I understand # 3.7 changes some of those changes, although I don't # see it in this code yet. Yes, it is hard to follow # and I am sure I haven't been able to keep up. # Thank you, Python. def build_param(ast, name, default, annotation=None): """build parameters: - handle defaults - handle format tuple parameters """ value = default maybe_show_tree_param_default(self.showast, name, value) if annotation: result = "%s: %s=%s" % (name, annotation, value) else: result = "%s=%s" % (name, value) # The below can probably be removed. This is probably # a holdover from days when LOAD_CONST erroneously # didn't handle LOAD_CONST None properly if result[-2:] == "= ": # default was 'LOAD_CONST None' result += "None" return result # MAKE_FUNCTION_... or MAKE_CLOSURE_... assert node[-1].kind.startswith("MAKE_") # Python 3.3+ adds a qualified name at TOS (-1) # moving down the LOAD_LAMBDA instruction lambda_index = -3 args_node = node[-1] annotate_dict = {} # Get a list of tree nodes that constitute the values for the "default # parameters"; these are default values that appear before any *, and are # not to be confused with keyword parameters which may appear after *. args_attr = args_node.attr if len(args_attr) == 3: pos_args, kw_args, annotate_argc = args_attr else: pos_args, kw_args, annotate_argc, closure = args_attr i = -4 if node[-2] != "docstring" else -5 kw_pairs = 0 if annotate_argc: # Turn into subroutine and DRY with other use annotate_node = node[i] if annotate_node == "expr": annotate_node = annotate_node[0] annotate_name_node = annotate_node[-1] if annotate_node == "dict" and annotate_name_node.kind.startswith( "BUILD_CONST_KEY_MAP" ): types = [ self.traverse(n, indent="") for n in annotate_node[:-2] ] names = annotate_node[-2].attr l = len(types) assert l == len(names) for i in range(l): annotate_dict[names[i]] = types[i] pass pass i -= 1 if closure: # FIXME: fill in # annotate = node[i] i -= 1 if kw_args: kw_node = node[pos_args] if kw_node == "expr": kw_node = kw_node[0] if kw_node == "dict": kw_pairs = kw_node[-1].attr defparams = [] # FIXME: DRY with code below default, kw_args, annotate_argc = args_node.attr[0:3] if default: expr_node = node[0] if node[0] == "pos_arg": expr_node = expr_node[0] assert expr_node == "expr", "expecting mkfunc default node to be an expr" if expr_node[0] == "LOAD_CONST" and isinstance(expr_node[0].attr, tuple): defparams = [repr(a) for a in expr_node[0].attr] elif expr_node[0] in frozenset(("list", "tuple", "dict", "set")): defparams = [self.traverse(n, indent="") for n in expr_node[0][:-1]] else: defparams = [] pass if lambda_index and is_lambda and iscode(node[lambda_index].attr): assert node[lambda_index].kind == "LOAD_LAMBDA" code = node[lambda_index].attr else: code = code_node.attr assert iscode(code) scanner_code = Code(code, self.scanner, self.currentclass) # add defaults values to parameter names argc = code.co_argcount kwonlyargcount = code.co_kwonlyargcount paramnames = list(scanner_code.co_varnames[:argc]) kwargs = list(scanner_code.co_varnames[argc : argc + kwonlyargcount]) paramnames.reverse() defparams.reverse() try: ast = self.build_ast( scanner_code._tokens, scanner_code._customize, is_lambda=is_lambda, noneInNames=("None" in code.co_names), ) except (ParserError, ParserError2) as p: self.write(str(p)) if not self.tolerate_errors: self.ERROR = p return i = len(paramnames) - len(defparams) # build parameters params = [] if defparams: for i, defparam in enumerate(defparams): params.append( build_param( ast, paramnames[i], defparam, annotate_dict.get(paramnames[i]) ) ) for param in paramnames[i + 1 :]: if param in annotate_dict: params.append("%s: %s" % (param, annotate_dict[param])) else: params.append(param) else: for param in paramnames: if param in annotate_dict: params.append("%s: %s" % (param, annotate_dict[param])) else: params.append(param) params.reverse() # back to correct order if code_has_star_arg(code): star_arg = code.co_varnames[argc + kwonlyargcount] if star_arg in annotate_dict: params.append("*%s: %s" % (star_arg, annotate_dict[star_arg])) else: params.append("*%s" % star_arg) argc += 1 # dump parameter list (with default values) if is_lambda: self.write("lambda") if len(params): self.write(" ", ", ".join(params)) elif kwonlyargcount > 0 and not (4 & code.co_flags): assert argc == 0 self.write(" ") # If the last statement is None (which is the # same thing as "return None" in a lambda) and the # next to last statement is a "yield". Then we want to # drop the (return) None since that was just put there # to have something to after the yield finishes. # FIXME: this is a bit hoaky and not general if ( len(ast) > 1 and self.traverse(ast[-1]) == "None" and self.traverse(ast[-2]).strip().startswith("yield") ): del ast[-1] # Now pick out the expr part of the last statement ast_expr = ast[-1] while ast_expr.kind != "expr": ast_expr = ast_expr[0] ast[-1] = ast_expr pass else: self.write("(", ", ".join(params)) # self.println(indent, '#flags:\t', int(code.co_flags)) ends_in_comma = False if kwonlyargcount > 0: if not (4 & code.co_flags): if argc > 0: self.write(", *, ") else: self.write("*, ") pass ends_in_comma = True else: if argc > 0: self.write(", ") ends_in_comma = True ann_dict = kw_dict = default_tup = None fn_bits = node[-1].attr # Skip over: # MAKE_FUNCTION, # optional docstring # LOAD_CONST qualified name, # LOAD_CONST code object index = -5 if node[-2] == "docstring" else -4 if fn_bits[-1]: index -= 1 if fn_bits[-2]: ann_dict = node[index] index -= 1 if fn_bits[-3]: kw_dict = node[index] index -= 1 if fn_bits[-4]: default_tup = node[index] if kw_dict == "expr": kw_dict = kw_dict[0] kw_args = [None] * kwonlyargcount # FIXME: handle free_tup, ann_dict, and default_tup if kw_dict: assert kw_dict == "dict" defaults = [self.traverse(n, indent="") for n in kw_dict[:-2]] names = eval(self.traverse(kw_dict[-2])) assert len(defaults) == len(names) sep = "" # FIXME: possibly handle line breaks for i, n in enumerate(names): idx = kwargs.index(n) if annotate_dict and n in annotate_dict: t = "%s: %s=%s" % (n, annotate_dict[n], defaults[i]) else: t = "%s=%s" % (n, defaults[i]) kw_args[idx] = t pass pass # handle others other_kw = [c == None for c in kw_args] for i, flag in enumerate(other_kw): if flag: n = kwargs[i] if n in annotate_dict: kw_args[i] = "%s: %s" % (n, annotate_dict[n]) else: kw_args[i] = "%s" % n self.write(", ".join(kw_args)) ends_in_comma = False pass else: if argc == 0: ends_in_comma = True if code_has_star_star_arg(code): if not ends_in_comma: self.write(", ") star_star_arg = code.co_varnames[argc + kwonlyargcount] if annotate_dict and star_star_arg in annotate_dict: self.write("**%s: %s" % (star_star_arg, annotate_dict[star_star_arg])) else: self.write("**%s" % star_star_arg) if is_lambda: self.write(": ") else: self.write(")") if annotate_dict and "return" in annotate_dict: self.write(" -> %s" % annotate_dict["return"]) self.println(":") if ( node[-2] == "docstring" and not is_lambda ): # docstring exists, dump it self.println(self.traverse(node[-2])) assert ast in ("stmts", "lambda_start") all_globals = find_all_globals(ast, set()) globals, nonlocals = find_globals_and_nonlocals( ast, set(), set(), code, self.version ) for g in sorted((all_globals & self.mod_globs) | globals): self.println(self.indent, "global ", g) for nl in sorted(nonlocals): self.println(self.indent, "nonlocal ", nl) self.mod_globs -= all_globals has_none = "None" in code.co_names rn = has_none and not find_none(ast) self.gen_source( ast, code.co_name, scanner_code._customize, is_lambda=is_lambda, returnNone=rn ) # In obscure cases, a function may be a generator but the "yield" # was optimized away. Here, we need to put in unreachable code to # add in "yield" just so that the compiler will mark # the GENERATOR bit of the function. See for example # Python 3.x's test_connection.py and test_contexlib_async test programs. if not is_lambda and code.co_flags & (CO_GENERATOR | CO_ASYNC_GENERATOR): need_bogus_yield = True for token in scanner_code._tokens: if token == "YIELD_VALUE": need_bogus_yield = False break pass if need_bogus_yield: self.template_engine(("%|if False:\n%+%|yield None%-",), node) scanner_code._tokens = None # save memory scanner_code._customize = None # save memory
def __init__( self, name, code, globs, argdefs, closure=None, vm=None, kwdefaults={}, annotations={}, doc=None, qualname=None, ): self._vm = vm self.version = vm.version self.__doc__ = doc if not name is None and not isinstance(name, str): raise TypeError( "Function() argument 1 (name) must None or string, not %s" % type(name)) if not iscode(code): raise TypeError( "Function() argument 2 (code) must be code, not %s" % type(code)) if not isinstance(globs, dict): raise TypeError( "Function() argument 3 (argdefs) must be dict, not %s" % type(globs)) if closure is not None and not isinstance(closure, tuple): raise TypeError( "Function() argument 5 (closure) must None or tuple, not %s" % type(closure)) if not vm: raise TypeError("Function() argument 6 (vm) must be passed") # Function field names below change between Python 2.7 and 3.x. # We create attributes for both names. Other code in this file assumes # 2.7ish names, while bytecode for 3.x will use 3.x names. # TODO: be more stringent based on vm version. self.func_code = self.__code__ = code self.func_name = self.__name__ = name or code.co_name self.func_defaults = self.__defaults__ = tuple( argdefs) if argdefs else tuple() self.func_closure = self.__closure__ = closure self.func_globals = globs self.func_locals = vm.frame.f_locals self.__dict__ = { "version": vm.version, "_vm": vm, } self.__doc__ = (code.co_consts[0] if hasattr(code, "co_consts") and code.co_consts else None) if vm.version >= 3.0: self.__annotations__ = annotations self.__kwdefaults__ = kwdefaults if vm.version >= 3.4: self.__qualname__ = qualname if qualname else self.__name__ else: assert qualname is None else: assert annotations == {} assert kwdefaults == {} # In Python 3.x is varous generators and list comprehensions have a .0 arg # but inspect doesn't show that. In the various MAKE_FUNCTION routines, # we will detect this and store True in this field when appropriate. if not argdefs and self.__name__.split( ".")[-1] in COMPREHENSION_FN_NAMES: self.has_dot_zero = True else: self.has_dot_zero = False # From byterun.py: # Sometimes, we need a real Python function. This is for that. # # # An elaboration of the above pity comment may be helpful. # Until this project emulates more functions, we rely heavily # on some built-in, or standard library # functions. `__build_class__` is an example of a builtin; # `import` is another example. Many of Python's standard # library inspect routines require native functions, not our # emulated classes and types. # # For the `inspect` module, we've started providing equivalent # alternatives, but overall more of this needs to be done. # # The intent in providing native functions is for use in type # testing, mostly. The functios should not be run, since that defeats our # ability to trace functions. kw = { "argdefs": self.func_defaults, } if closure: kw["closure"] = tuple(make_cell(0) for _ in closure) if not isinstance(code, types.CodeType) and hasattr(code, "to_native"): try: code = code.to_native() except: pass if isinstance(code, types.CodeType): try: self._func = types.FunctionType(code, globs, **kw) if vm.version >= 3.0: # Above, types.FunctionType() above doesn't allow passing # in the following attributes, so we set them as # assignments below. self._func.__kwdefaults__ = kwdefaults self._func.__annotations__ = annotations pass except: self._func = None else: # cross version interpreting... FIXME: fix this up self._func = None
def make_function2(self, node, is_lambda, nested=1, code_node=None): """ Dump function defintion, doc string, and function body. This code is specialied for Python 2. """ def build_param(ast, name, default): """build parameters: - handle defaults - handle format tuple parameters """ # if formal parameter is a tuple, the paramater name # starts with a dot (eg. '.1', '.2') if name.startswith("."): # replace the name with the tuple-string name = self.get_tuple_parameter(ast, name) pass if default: value = self.traverse(default, indent="") maybe_show_tree_param_default(self.showast, name, value) result = "%s=%s" % (name, value) if result[-2:] == "= ": # default was 'LOAD_CONST None' result += "None" return result else: return name # MAKE_FUNCTION_... or MAKE_CLOSURE_... assert node[-1].kind.startswith("MAKE_") args_node = node[-1] if isinstance(args_node.attr, tuple): # positional args are after kwargs defparams = node[1:args_node.attr[0] + 1] pos_args, kw_args, annotate_argc = args_node.attr else: defparams = node[:args_node.attr] kw_args = 0 pass lambda_index = None if lambda_index and is_lambda and iscode(node[lambda_index].attr): assert node[lambda_index].kind == "LOAD_LAMBDA" code = node[lambda_index].attr else: code = code_node.attr assert iscode(code) code = Code(code, self.scanner, self.currentclass) # add defaults values to parameter names argc = code.co_argcount paramnames = list(code.co_varnames[:argc]) # defaults are for last n parameters, thus reverse paramnames.reverse() defparams.reverse() try: ast = self.build_ast( code._tokens, code._customize, is_lambda=is_lambda, noneInNames=("None" in code.co_names), ) except (ParserError, ParserError2) as p: self.write(str(p)) if not self.tolerate_errors: self.ERROR = p return kw_pairs = 0 indent = self.indent # build parameters params = [ build_param(ast, name, default) for name, default in zip_longest(paramnames, defparams, fillvalue=None) ] params.reverse() # back to correct order if code_has_star_arg(code): params.append("*%s" % code.co_varnames[argc]) argc += 1 # dump parameter list (with default values) if is_lambda: self.write("lambda ", ", ".join(params)) # If the last statement is None (which is the # same thing as "return None" in a lambda) and the # next to last statement is a "yield". Then we want to # drop the (return) None since that was just put there # to have something to after the yield finishes. # FIXME: this is a bit hoaky and not general if (len(ast) > 1 and self.traverse(ast[-1]) == "None" and self.traverse(ast[-2]).strip().startswith("yield")): del ast[-1] # Now pick out the expr part of the last statement ast_expr = ast[-1] while ast_expr.kind != "expr": ast_expr = ast_expr[0] ast[-1] = ast_expr pass else: self.write("(", ", ".join(params)) if kw_args > 0: if not (4 & code.co_flags): if argc > 0: self.write(", *, ") else: self.write("*, ") pass else: self.write(", ") for n in node: if n == "pos_arg": continue else: self.preorder(n) break pass if code_has_star_star_arg(code): if argc > 0: self.write(", ") self.write("**%s" % code.co_varnames[argc + kw_pairs]) if is_lambda: self.write(": ") else: self.println("):") if (len(code.co_consts) > 0 and code.co_consts[0] is not None and not is_lambda): # ugly # docstring exists, dump it print_docstring(self, indent, code.co_consts[0]) if not is_lambda: assert ast == "stmts" all_globals = find_all_globals(ast, set()) globals, nonlocals = find_globals_and_nonlocals(ast, set(), set(), code, self.version) # Python 2 doesn't support the "nonlocal" statement assert self.version >= 3.0 or not nonlocals for g in sorted((all_globals & self.mod_globs) | globals): self.println(self.indent, "global ", g) self.mod_globs -= all_globals has_none = "None" in code.co_names rn = has_none and not find_none(ast) self.gen_source(ast, code.co_name, code._customize, is_lambda=is_lambda, returnNone=rn) code._tokens = None # save memory code._customize = None # save memory
def n_classdef3(node): """Handle "classdef" nonterminal for 3.0 >= version 3.0 < 3.6 """ assert (3, 0) <= self.version < (3, 6) # class definition ('class X(A,B,C):') cclass = self.currentclass # Pick out various needed bits of information # * class_name - the name of the class # * subclass_info - the parameters to the class e.g. # class Foo(bar, baz) # ---------- # * subclass_code - the code for the subclass body subclass_info = None if node == "classdefdeco2": if self.version < (3, 4): class_name = node[2][0].attr else: class_name = node[1][2].attr build_class = node else: build_class = node[0] class_name = node[1][0].attr build_class = node[0] assert "mkfunc" == build_class[1] mkfunc = build_class[1] if mkfunc[0] in ("kwargs", "no_kwargs"): if (3, 0) <= self.version < (3, 3): for n in mkfunc: if hasattr(n, "attr") and iscode(n.attr): subclass_code = n.attr break elif n == "expr": subclass_code = n[0].attr pass pass else: for n in mkfunc: if hasattr(n, "attr") and iscode(n.attr): subclass_code = n.attr break pass pass if node == "classdefdeco2": subclass_info = node else: subclass_info = node[0] elif build_class[1][0] == "load_closure": # Python 3 with closures not functions load_closure = build_class[1] if hasattr(load_closure[-3], "attr"): # Python 3.3 classes with closures work like this. # Note have to test before 3.2 case because # index -2 also has an attr. subclass_code = find_code_node(load_closure, -3).attr elif hasattr(load_closure[-2], "attr"): # Python 3.2 works like this subclass_code = find_code_node(load_closure, -2).attr else: raise "Internal Error n_classdef: cannot find class body" if hasattr(build_class[3], "__len__"): if not subclass_info: subclass_info = build_class[3] elif hasattr(build_class[2], "__len__"): subclass_info = build_class[2] else: raise "Internal Error n_classdef: cannot superclass name" elif not subclass_info: if mkfunc[0] in ("no_kwargs", "kwargs"): subclass_code = mkfunc[1].attr else: subclass_code = mkfunc[0].attr if node == "classdefdeco2": subclass_info = node else: subclass_info = node[0] if node == "classdefdeco2": self.write("\n") else: self.write("\n\n") self.currentclass = str(class_name) self.write(self.indent, "class ", self.currentclass) self.print_super_classes3(subclass_info) self.println(":") # class body self.indent_more() self.build_class(subclass_code) self.indent_less() self.currentclass = cclass if len(self.param_stack) > 1: self.write("\n\n") else: self.write("\n\n\n") self.prune()
def make_function36(self, node, is_lambda, nested=1, code_node=None): """Dump function definition, doc string, and function body in Python version 3.6 and above. """ # MAKE_CLOSURE adds an additional closure slot # In Python 3.6 and above stack change again. I understand # 3.7 changes some of those changes, although I don't # see it in this code yet. Yes, it is hard to follow # and I am sure I haven't been able to keep up. # Thank you, Python. def build_param(ast, name, default, annotation=None): """build parameters: - handle defaults - handle format tuple parameters """ value = default maybe_show_tree_param_default(self.showast, name, value) if annotation: result = "%s: %s=%s" % (name, annotation, value) else: result = "%s=%s" % (name, value) # The below can probably be removed. This is probably # a holdover from days when LOAD_CONST erroneously # didn't handle LOAD_CONST None properly if result[-2:] == "= ": # default was 'LOAD_CONST None' result += "None" return result # MAKE_FUNCTION_... or MAKE_CLOSURE_... assert node[-1].kind.startswith("MAKE_") # Python 3.3+ adds a qualified name at TOS (-1) # moving down the LOAD_LAMBDA instruction lambda_index = -3 args_node = node[-1] annotate_dict = {} # Get a list of tree nodes that constitute the values for the "default # parameters"; these are default values that appear before any *, and are # not to be confused with keyword parameters which may appear after *. args_attr = args_node.attr if len(args_attr) == 3: pos_args, kw_args, annotate_argc = args_attr else: pos_args, kw_args, annotate_argc, closure = args_attr if node[-2] != "docstring": i = -4 else: i = -5 kw_pairs = 0 if annotate_argc: # Turn into subroutine and DRY with other use annotate_node = node[i] if annotate_node == "expr": annotate_node = annotate_node[0] annotate_name_node = annotate_node[-1] if annotate_node == "dict" and annotate_name_node.kind.startswith( "BUILD_CONST_KEY_MAP" ): types = [self.traverse(n, indent="") for n in annotate_node[:-2]] names = annotate_node[-2].attr l = len(types) assert l == len(names) for i in range(l): annotate_dict[names[i]] = types[i] pass pass i -= 1 if closure: # FIXME: fill in # annotate = node[i] i -= 1 if kw_args: kw_node = node[pos_args] if kw_node == "expr": kw_node = kw_node[0] if kw_node == "dict": kw_pairs = kw_node[-1].attr defparams = [] # FIXME: DRY with code below default, kw_args, annotate_argc = args_node.attr[0:3] if default: expr_node = node[0] if node[0] == "pos_arg": expr_node = expr_node[0] assert expr_node == "expr", "expecting mkfunc default node to be an expr" if expr_node[0] == "LOAD_CONST" and isinstance(expr_node[0].attr, tuple): defparams = [repr(a) for a in expr_node[0].attr] elif expr_node[0] in frozenset(("list", "tuple", "dict", "set")): defparams = [self.traverse(n, indent="") for n in expr_node[0][:-1]] else: defparams = [] pass if lambda_index and is_lambda and iscode(node[lambda_index].attr): assert node[lambda_index].kind == "LOAD_LAMBDA" code = node[lambda_index].attr else: code = code_node.attr assert iscode(code) scanner_code = Code(code, self.scanner, self.currentclass) # add defaults values to parameter names argc = code.co_argcount kwonlyargcount = code.co_kwonlyargcount paramnames = list(scanner_code.co_varnames[:argc]) kwargs = list(scanner_code.co_varnames[argc : argc + kwonlyargcount]) paramnames.reverse() defparams.reverse() try: ast = self.build_ast( scanner_code._tokens, scanner_code._customize, scanner_code, is_lambda=is_lambda, noneInNames=("None" in code.co_names), ) except ParserError, p: self.write(str(p)) if not self.tolerate_errors: self.ERROR = p return
def listcomp_closure3(node): """List comprehensions in Python 3 when handled as a closure. See if we can combine code. """ p = self.prec self.prec = 27 code_obj = node[1].attr assert iscode(code_obj) code = Code(code_obj, self.scanner, self.currentclass) ast = self.build_ast(code._tokens, code._customize) self.customize(code._customize) # skip over: sstmt, stmt, return, ret_expr # and other singleton derivations while len(ast) == 1 or (ast in ("sstmt", "return") and ast[-1] in ("RETURN_LAST", "RETURN_VALUE")): self.prec = 100 ast = ast[0] n = ast[1] # collections is the name of the expression(s) we are iterating over collections = [node[-3]] list_ifs = [] if self.version == 3.0 and n != "list_iter": # FIXME 3.0 is a snowflake here. We need # special code for this. Not sure if this is totally # correct. stores = [ast[3]] assert ast[4] == "comp_iter" n = ast[4] # Find the list comprehension body. It is the inner-most # node that is not comp_.. . while n == "comp_iter": if n[0] == "comp_for": n = n[0] stores.append(n[2]) n = n[3] elif n[0] in ("comp_if", "comp_if_not"): n = n[0] # FIXME: just a guess if n[0].kind == "expr": list_ifs.append(n) else: list_ifs.append([1]) n = n[2] pass else: break pass # Skip over n[0] which is something like: _[1] self.preorder(n[1]) else: assert n == "list_iter" stores = [] # Find the list comprehension body. It is the inner-most # node that is not list_.. . while n == "list_iter": # recurse one step n = n[0] if n == "list_for": stores.append(n[2]) n = n[3] if n[0] == "list_for": # Dog-paddle down largely singleton reductions # to find the collection (expr) c = n[0][0] if c == "expr": c = c[0] # FIXME: grammar is wonky here? Is this really an attribute? if c == "attribute": c = c[0] collections.append(c) pass elif n in ("list_if", "list_if_not"): # FIXME: just a guess if n[0].kind == "expr": list_ifs.append(n) else: list_ifs.append([1]) n = n[2] pass elif n == "list_if37": list_ifs.append(n) n = n[-1] pass elif n == "list_afor": collections.append(n[0][0]) n = n[1] stores.append(n[1][0]) n = n[3] pass assert n == "lc_body", ast self.preorder(n[0]) # FIXME: add indentation around "for"'s and "in"'s n_colls = len(collections) for i, store in enumerate(stores): if i >= n_colls: break if collections[i] == "LOAD_DEREF" and co_flags_is_async( code_obj.co_flags): self.write(" async") pass self.write(" for ") self.preorder(store) self.write(" in ") self.preorder(collections[i]) if i < len(list_ifs): self.preorder(list_ifs[i]) pass pass self.prec = p
def decompile( bytecode_version: str, co, out=None, showasm=None, showast={}, timestamp=None, showgrammar=False, source_encoding=None, code_objects={}, source_size=None, is_pypy=None, magic_int=None, mapstream=None, do_fragments=False, compile_mode="exec", ) -> Any: """ ingests and deparses a given code block 'co' if `bytecode_version` is None, use the current Python intepreter version. Caller is responsible for closing `out` and `mapstream` """ if bytecode_version is None: bytecode_version = sysinfo2float() # store final output stream for case of error real_out = out or sys.stdout def write(s): s += "\n" real_out.write(s) assert iscode(co) co_pypy_str = "PyPy " if is_pypy else "" run_pypy_str = "PyPy " if IS_PYPY else "" sys_version_lines = sys.version.split("\n") if source_encoding: write("# -*- coding: %s -*-" % source_encoding) write("# decompyle3 version %s\n" "# %sPython bytecode %s%s\n# Decompiled from: %sPython %s" % ( VERSION, co_pypy_str, bytecode_version, " (%s)" % str(magic_int) if magic_int else "", run_pypy_str, "\n# ".join(sys_version_lines), )) if co.co_filename: write("# Embedded file name: %s" % co.co_filename) if timestamp: write("# Compiled at: %s" % datetime.datetime.fromtimestamp(timestamp)) if source_size: write("# Size of source mod 2**32: %d bytes" % source_size) debug_opts = {"asm": showasm, "ast": showast, "grammar": showgrammar} try: if mapstream: if isinstance(mapstream, str): mapstream = _get_outstream(mapstream) deparsed = deparse_code_with_map( bytecode_version, co, out, showasm, showast, showgrammar, code_objects=code_objects, is_pypy=is_pypy, ) header_count = 3 + len(sys_version_lines) linemap = [(line_no, deparsed.source_linemap[line_no] + header_count) for line_no in sorted(deparsed.source_linemap.keys())] mapstream.write("\n\n# %s\n" % linemap) else: if do_fragments: deparse_fn = code_deparse_fragments else: deparse_fn = code_deparse deparsed = deparse_fn( co, out, bytecode_version, debug_opts=debug_opts, is_pypy=is_pypy, compile_mode=compile_mode, ) pass return deparsed except pysource.SourceWalkerError as e: # deparsing failed raise pysource.SourceWalkerError(str(e))
def ingest(self, co, classname=None, code_objects={}, show_asm=None): """ Pick out tokens from an decompyle3 code object, and transform them, returning a list of decompyle3 Token's. The transformations are made to assist the deparsing grammar. Specificially: - various types of LOAD_CONST's are categorized in terms of what they load - COME_FROM instructions are added to assist parsing control structures - MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments - some EXTENDED_ARGS instructions are removed Also, when we encounter certain tokens, we add them to a set which will cause custom grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST cause specific rules for the specific number of arguments they take. """ def tokens_append(j, token): tokens.append(token) self.offset2tok_index[token.offset] = j j += 1 assert j == len(tokens) return j if not show_asm: show_asm = self.show_asm bytecode = self.build_instructions(co) # show_asm = 'both' if show_asm in ("both", "before"): for instr in bytecode.get_instructions(co): print(instr.disassemble()) # "customize" is in the process of going away here customize = {} if self.is_pypy: customize["PyPy"] = 0 # Scan for assertions. Later we will # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'. # 'LOAD_ASSERT' is used in assert statements. self.load_asserts = set() # list of tokens/instructions tokens = [] self.offset2tok_index = {} n = len(self.insts) for i, inst in enumerate(self.insts): # We need to detect the difference between: # raise AssertionError # and # assert ... # If we have a JUMP_FORWARD after the # RAISE_VARARGS then we have a "raise" statement # else we have an "assert" statement. assert_can_follow = inst.opname == "POP_JUMP_IF_TRUE" and i + 1 < n if assert_can_follow: next_inst = self.insts[i + 1] if ( next_inst.opname == "LOAD_GLOBAL" and next_inst.argval == "AssertionError" ): raise_idx = self.offset2inst_index[self.prev_op[inst.argval]] raise_inst = self.insts[raise_idx] if raise_inst.opname.startswith("RAISE_VARARGS"): self.load_asserts.add(next_inst.offset) pass pass # Operand values in Python wordcode are small. As a result, # there are these EXTENDED_ARG instructions - way more than # before 3.6. These parsing a lot of pain. # To simplify things we want to untangle this. We also # do this loop before we compute jump targets. for i, inst in enumerate(self.insts): # One artifact of the "too-small" operand problem, is that # some backward jumps, are turned into forward jumps to another # "extended arg" backward jump to the same location. if inst.opname == "JUMP_FORWARD": jump_inst = self.insts[self.offset2inst_index[inst.argval]] if jump_inst.has_extended_arg and jump_inst.opname.startswith("JUMP"): # Create comination of the jump-to instruction and # this one. Keep the position information of this instruction, # but the operator and operand properties come from the other # instruction self.insts[i] = Instruction( jump_inst.opname, jump_inst.opcode, jump_inst.optype, jump_inst.inst_size, jump_inst.arg, jump_inst.argval, jump_inst.argrepr, jump_inst.has_arg, inst.offset, inst.starts_line, inst.is_jump_target, inst.has_extended_arg, ) # Get jump targets # Format: {target offset: [jump offsets]} jump_targets = self.find_jump_targets(show_asm) # print("XXX2", jump_targets) last_op_was_break = False j = 0 for i, inst in enumerate(self.insts): argval = inst.argval op = inst.opcode if inst.opname == "EXTENDED_ARG": # FIXME: The EXTENDED_ARG is used to signal annotation # parameters if i + 1 < n and self.insts[i + 1].opcode != self.opc.MAKE_FUNCTION: continue if inst.offset in jump_targets: jump_idx = 0 # We want to process COME_FROMs to the same offset to be in *descending* # offset order so we have the larger range or biggest instruction interval # last. (I think they are sorted in increasing order, but for safety # we sort them). That way, specific COME_FROM tags will match up # properly. For example, a "loop" with an "if" nested in it should have the # "loop" tag last so the grammar rule matches that properly. for jump_offset in sorted(jump_targets[inst.offset], reverse=True): come_from_name = "COME_FROM" opname = self.opname_for_offset(jump_offset) if opname == "EXTENDED_ARG": k = xdis.next_offset(op, self.opc, jump_offset) opname = self.opname_for_offset(k) if opname.startswith("SETUP_"): come_from_type = opname[len("SETUP_") :] come_from_name = "COME_FROM_%s" % come_from_type pass elif inst.offset in self.except_targets: come_from_name = "COME_FROM_EXCEPT_CLAUSE" j = tokens_append( j, Token( come_from_name, jump_offset, repr(jump_offset), offset="%s_%s" % (inst.offset, jump_idx), has_arg=True, opc=self.opc, has_extended_arg=False, ), ) jump_idx += 1 pass pass pattr = inst.argrepr opname = inst.opname if op in self.opc.CONST_OPS: const = argval if iscode(const): if const.co_name == "<lambda>": assert opname == "LOAD_CONST" opname = "LOAD_LAMBDA" elif const.co_name == "<genexpr>": opname = "LOAD_GENEXPR" elif const.co_name == "<dictcomp>": opname = "LOAD_DICTCOMP" elif const.co_name == "<setcomp>": opname = "LOAD_SETCOMP" elif const.co_name == "<listcomp>": opname = "LOAD_LISTCOMP" else: opname = "LOAD_CODE" # verify() uses 'pattr' for comparison, since 'attr' # now holds Code(const) and thus can not be used # for comparison (todo: think about changing this) # pattr = 'code_object @ 0x%x %s->%s' %\ # (id(const), const.co_filename, const.co_name) pattr = "<code_object " + const.co_name + ">" elif isinstance(const, str): opname = "LOAD_STR" else: if isinstance(inst.arg, int) and inst.arg < len(co.co_consts): argval, _ = _get_const_info(inst.arg, co.co_consts) # Why don't we use _ above for "pattr" rather than "const"? # This *is* a little hoaky, but we have to coordinate with # other parts like n_LOAD_CONST in pysource.py for example. pattr = const pass elif opname == "IMPORT_NAME": if "." in inst.argval: opname = "IMPORT_NAME_ATTR" pass elif opname in ("MAKE_FUNCTION", "MAKE_CLOSURE"): flags = argval opname = "MAKE_FUNCTION_%d" % (flags) attr = [] for flag in self.MAKE_FUNCTION_FLAGS: bit = flags & 1 attr.append(bit) flags >>= 1 attr = attr[:4] # remove last value: attr[5] == False j = tokens_append( j, Token( opname=opname, attr=attr, pattr=pattr, offset=inst.offset, linestart=inst.starts_line, op=op, has_arg=inst.has_arg, opc=self.opc, has_extended_arg=inst.has_extended_arg, ), ) continue elif op in self.varargs_ops: pos_args = argval if self.is_pypy and not pos_args and opname == "BUILD_MAP": opname = "BUILD_MAP_n" else: opname = "%s_%d" % (opname, pos_args) elif self.is_pypy and opname == "JUMP_IF_NOT_DEBUG": # The value in the dict is in special cases in semantic actions, such # as JUMP_IF_NOT_DEBUG. The value is not used in these cases, so we put # in arbitrary value 0. customize[opname] = 0 elif opname == "UNPACK_EX": # FIXME: try with scanner and parser by # changing argval before_args = argval & 0xFF after_args = (argval >> 8) & 0xFF pattr = "%d before vararg, %d after" % (before_args, after_args) argval = (before_args, after_args) opname = "%s_%d+%d" % (opname, before_args, after_args) elif op == self.opc.JUMP_ABSOLUTE: # Refine JUMP_ABSOLUTE further in into: # # * "JUMP_BACK" - which are are used in loops. This is sometimes # found at the end of a looping construct # * "BREAK_LOOP" - which are are used to break loops. # * "CONTINUE" - jumps which may appear in a "continue" statement. # It is okay to confuse this with JUMP_BACK. The # grammar should tolerate this. # * "JUMP_FORWARD - forward jumps that are not BREAK_LOOP jumps. # # The loop-type and continue-type jumps will help us # classify loop boundaries The continue-type jumps # help us get "continue" statements with would # otherwise be turned into a "pass" statement because # JUMPs are sometimes ignored in rules as just # boundary overhead. Again, in comprehensions we might # sometimes classify JUMP_BACK as CONTINUE, but that's # okay since grammar rules should tolerate that. pattr = argval target = self.get_target(inst.offset) if target <= inst.offset: next_opname = self.insts[i + 1].opname # 'Continue's include jumps to loops that are not # and the end of a block which follow with POP_BLOCK and COME_FROM_LOOP. # If the JUMP_ABSOLUTE is to a FOR_ITER and it is followed by another JUMP_FORWARD # then we'll take it as a "continue". is_continue = ( self.insts[self.offset2inst_index[target]].opname == "FOR_ITER" and self.insts[i + 1].opname == "JUMP_FORWARD" ) if self.version < 3.8 and ( is_continue or ( inst.offset in self.stmts and ( inst.starts_line and next_opname not in self.not_continue_follow ) ) ): opname = "CONTINUE" else: opname = "JUMP_BACK" # FIXME: this is a hack to catch stuff like: # if x: continue # the "continue" is not on a new line. # There are other situations where we don't catch # CONTINUE as well. if tokens[-1].kind == "JUMP_BACK" and tokens[-1].attr <= argval: if tokens[-2].kind == "BREAK_LOOP": del tokens[-1] else: # intern is used because we are changing the *previous* token. # A POP_TOP suggests a "break" rather than a "continue"? if tokens[-2] == "POP_TOP": tokens[-1].kind = sys.intern("BREAK_LOOP") else: tokens[-1].kind = sys.intern("CONTINUE") pass pass pass if last_op_was_break and opname == "CONTINUE": last_op_was_break = False continue pass else: opname = "JUMP_FORWARD" elif opname.startswith("POP_JUMP_IF_") and not inst.jumps_forward(): opname += "_BACK" elif inst.offset in self.load_asserts: opname = "LOAD_ASSERT" last_op_was_break = opname == "BREAK_LOOP" j = tokens_append( j, Token( opname=opname, attr=argval, pattr=pattr, offset=inst.offset, linestart=inst.starts_line, op=op, has_arg=inst.has_arg, opc=self.opc, has_extended_arg=inst.has_extended_arg, ), ) pass if show_asm in ("both", "after"): for t in tokens: print(t.format(line_prefix="")) print() return tokens, customize
def make_function3_annotate(self, node, is_lambda, nested=1, code_node=None, annotate_last=-1): """ Dump function defintion, doc string, and function body. This code is specialized for Python 3""" def build_param(ast, name, default): """build parameters: - handle defaults - handle format tuple parameters """ if default: value = self.traverse(default, indent="") maybe_show_tree_param_default(self, name, value) result = "%s=%s" % (name, value) if result[-2:] == "= ": # default was 'LOAD_CONST None' result += "None" return result else: return name # MAKE_FUNCTION_... or MAKE_CLOSURE_... assert node[-1].kind.startswith("MAKE_") annotate_tuple = None for annotate_last in range(len(node) - 1, -1, -1): if node[annotate_last] == "annotate_tuple": annotate_tuple = node[annotate_last] break annotate_args = {} if (annotate_tuple == "annotate_tuple" and annotate_tuple[0] in ("LOAD_CONST", "LOAD_NAME") and isinstance(annotate_tuple[0].attr, tuple)): annotate_tup = annotate_tuple[0].attr i = -1 j = annotate_last - 1 l = -len(node) while j >= l and node[j].kind in ("annotate_arg", "annotate_tuple"): annotate_args[annotate_tup[i]] = node[j][0] i -= 1 j -= 1 args_node = node[-1] if isinstance(args_node.attr, tuple): # positional args are before kwargs defparams = node[:args_node.attr[0]] pos_args, kw_args, annotate_argc = args_node.attr if "return" in annotate_args.keys(): annotate_argc = len(annotate_args) - 1 else: defparams = node[:args_node.attr] kw_args = 0 annotate_argc = 0 pass annotate_dict = {} for name in annotate_args.keys(): n = self.traverse(annotate_args[name], indent="") annotate_dict[name] = n if 3.0 <= self.version <= 3.2: lambda_index = -2 elif 3.03 <= self.version: lambda_index = -3 else: lambda_index = None if lambda_index and is_lambda and iscode(node[lambda_index].attr): assert node[lambda_index].kind == "LOAD_LAMBDA" code = node[lambda_index].attr else: code = code_node.attr assert iscode(code) code = Code(code, self.scanner, self.currentclass) # add defaults values to parameter names argc = code.co_argcount kwonlyargcount = code.co_kwonlyargcount paramnames = list(code.co_varnames[:argc]) if kwonlyargcount > 0: kwargs = list(code.co_varnames[argc:argc + kwonlyargcount]) try: ast = self.build_ast( code._tokens, code._customize, is_lambda=is_lambda, noneInNames=("None" in code.co_names), ) except (ParserError, ParserError2) as p: self.write(str(p)) if not self.tolerate_errors: self.ERROR = p return kw_pairs = args_node.attr[1] indent = self.indent if is_lambda: self.write("lambda ") else: self.write("(") last_line = self.f.getvalue().split("\n")[-1] l = len(last_line) indent = " " * l line_number = self.line_number i = len(paramnames) - len(defparams) suffix = "" for param in paramnames[:i]: self.write(suffix, param) suffix = ", " if param in annotate_dict: self.write(": %s" % annotate_dict[param]) if line_number != self.line_number: suffix = ",\n" + indent line_number = self.line_number # value, string = annotate_args[param] # if string: # self.write(': "%s"' % value) # else: # self.write(': %s' % value) suffix = ", " if i > 0 else "" for n in node: if n == "pos_arg": self.write(suffix) param = paramnames[i] self.write(param) if param in annotate_args: aa = annotate_args[param] if isinstance(aa, tuple): aa = aa[0] self.write(': "%s"' % aa) elif isinstance(aa, SyntaxTree): self.write(": ") self.preorder(aa) self.write("=") i += 1 self.preorder(n) if line_number != self.line_number: suffix = ",\n" + indent line_number = self.line_number else: suffix = ", " if code_has_star_arg(code): star_arg = code.co_varnames[argc + kwonlyargcount] if annotate_dict and star_arg in annotate_dict: self.write(suffix, "*%s: %s" % (star_arg, annotate_dict[star_arg])) else: self.write(suffix, "*%s" % star_arg) argc += 1 # self.println(indent, '#flags:\t', int(code.co_flags)) ends_in_comma = False if kwonlyargcount > 0: if not code_has_star_arg(code): if argc > 0: self.write(", *, ") else: self.write("*, ") pass ends_in_comma = True else: if argc > 0: self.write(", ") ends_in_comma = True kw_args = [None] * kwonlyargcount for n in node: if n == "kwargs": n = n[0] if n == "kwarg": name = eval(n[0].pattr) idx = kwargs.index(name) default = self.traverse(n[1], indent="") if annotate_dict and name in annotate_dict: kw_args[idx] = "%s: %s=%s" % (name, annotate_dict[name], default) else: kw_args[idx] = "%s=%s" % (name, default) pass pass # handling other args other_kw = [c == None for c in kw_args] for i, flag in enumerate(other_kw): if flag: n = kwargs[i] if n in annotate_dict: kw_args[i] = "%s: %s" % (n, annotate_dict[n]) else: kw_args[i] = "%s" % n self.write(", ".join(kw_args)) ends_in_comma = False else: if argc == 0: ends_in_comma = True if code_has_star_star_arg(code): if not ends_in_comma: self.write(", ") star_star_arg = code.co_varnames[argc + kwonlyargcount] if annotate_dict and star_star_arg in annotate_dict: self.write("**%s: %s" % (star_star_arg, annotate_dict[star_star_arg])) else: self.write("**%s" % star_star_arg) if is_lambda: self.write(": ") else: self.write(")") if "return" in annotate_tuple[0].attr: if (line_number != self.line_number) and not no_paramnames: self.write("\n" + indent) line_number = self.line_number self.write(" -> ") if "return" in annotate_dict: self.write(annotate_dict["return"]) else: # value, string = annotate_args['return'] # if string: # self.write(' -> "%s"' % value) # else: # self.write(' -> %s' % value) self.preorder(node[annotate_last - 1]) self.println(":") if (len(code.co_consts) > 0 and code.co_consts[0] is not None and not is_lambda): # ugly # docstring exists, dump it print_docstring(self, self.indent, code.co_consts[0]) code._tokens = None # save memory assert ast == "stmts" all_globals = find_all_globals(ast, set()) globals, nonlocals = find_globals_and_nonlocals(ast, set(), set(), code, self.version) for g in sorted((all_globals & self.mod_globs) | globals): self.println(self.indent, "global ", g) for nl in sorted(nonlocals): self.println(self.indent, "nonlocal ", nl) self.mod_globs -= all_globals has_none = "None" in code.co_names rn = has_none and not find_none(ast) self.gen_source(ast, code.co_name, code._customize, is_lambda=is_lambda, returnNone=rn) code._tokens = code._customize = None # save memory
def n_classdef36(node): # class definition ('class X(A,B,C):') cclass = self.currentclass # Pick out various needed bits of information # * class_name - the name of the class # * subclass_info - the parameters to the class e.g. # class Foo(bar, baz) # ---------- # * subclass_code - the code for the subclass body subclass_info = None if node == "classdefdeco2": if isinstance(node[1][1].attr, str): class_name = node[1][1].attr else: class_name = node[1][2].attr build_class = node else: build_class = node[0] if build_class == "build_class_kw": mkfunc = build_class[1] assert mkfunc == "mkfunc" subclass_info = build_class if hasattr(mkfunc[0], "attr") and iscode(mkfunc[0].attr): subclass_code = mkfunc[0].attr else: assert mkfunc[0] == "load_closure" subclass_code = mkfunc[1].attr assert iscode(subclass_code) if build_class[1][0] == "load_closure": code_node = build_class[1][1] else: code_node = build_class[1][0] class_name = code_node.attr.co_name assert "mkfunc" == build_class[1] mkfunc = build_class[1] if mkfunc[0] in ("kwargs", "no_kwargs"): for n in mkfunc: if hasattr(n, "attr") and iscode(n.attr): subclass_code = n.attr break pass if node == "classdefdeco2": subclass_info = node else: subclass_info = node[0] elif build_class[1][0] == "load_closure": # Python 3 with closures not functions load_closure = build_class[1] if hasattr(load_closure[-3], "attr"): # Python 3.3 classes with closures work like this. # Note have to test before 3.2 case because # index -2 also has an attr. subclass_code = load_closure[-3].attr elif hasattr(load_closure[-2], "attr"): # Python 3.2 works like this subclass_code = load_closure[-2].attr else: raise "Internal Error n_classdef: cannot find class body" if hasattr(build_class[3], "__len__"): if not subclass_info: subclass_info = build_class[3] elif hasattr(build_class[2], "__len__"): subclass_info = build_class[2] else: raise "Internal Error n_classdef: cannot superclass name" elif node == "classdefdeco2": subclass_info = node subclass_code = build_class[1][0].attr elif not subclass_info: if mkfunc[0] in ("no_kwargs", "kwargs"): subclass_code = mkfunc[1].attr else: subclass_code = mkfunc[0].attr if node == "classdefdeco2": subclass_info = node else: subclass_info = node[0] if node == "classdefdeco2": self.write("\n") else: self.write("\n\n") self.currentclass = str(class_name) self.write(self.indent, "class ", self.currentclass) self.print_super_classes3(subclass_info) self.println(":") # class body self.indent_more() self.build_class(subclass_code) self.indent_less() self.currentclass = cclass if len(self.param_stack) > 1: self.write("\n\n") else: self.write("\n\n\n") self.prune()
def make_function3_annotate(self, node, is_lambda, nested=1, code_node=None, annotate_last=-1): """ Dump function defintion, doc string, and function body. This code is specialized for Python 3""" def build_param(ast, name, default): """build parameters: - handle defaults - handle format tuple parameters """ if default: value = self.traverse(default, indent="") maybe_show_tree_param_default(self, name, value) result = "%s=%s" % (name, value) if result[-2:] == "= ": # default was 'LOAD_CONST None' result += "None" return result else: return name # MAKE_FUNCTION_... or MAKE_CLOSURE_... assert node[-1].kind.startswith("MAKE_") annotate_tuple = None for annotate_last in range(len(node) - 1, -1, -1): if node[annotate_last] == "annotate_tuple": annotate_tuple = node[annotate_last] break annotate_args = {} if (annotate_tuple == "annotate_tuple" and annotate_tuple[0] in ("LOAD_CONST", "LOAD_NAME") and isinstance(annotate_tuple[0].attr, tuple)): annotate_tup = annotate_tuple[0].attr i = -1 j = annotate_last - 1 l = -len(node) while j >= l and node[j].kind in ("annotate_arg", "annotate_tuple"): annotate_args[annotate_tup[i]] = node[j][0] i -= 1 j -= 1 args_node = node[-1] if isinstance(args_node.attr, tuple): # positional args are before kwargs defparams = node[:args_node.attr[0]] pos_args, kw_args, annotate_argc = args_node.attr if "return" in annotate_args.keys(): annotate_argc = len(annotate_args) - 1 else: defparams = node[:args_node.attr] kw_args = 0 annotate_argc = 0 pass annotate_dict = {} for name in annotate_args.keys(): n = self.traverse(annotate_args[name], indent="") annotate_dict[name] = n if 3.0 <= self.version <= 3.2: lambda_index = -2 elif 3.03 <= self.version: lambda_index = -3 else: lambda_index = None if lambda_index and is_lambda and iscode(node[lambda_index].attr): assert node[lambda_index].kind == "LOAD_LAMBDA" code = node[lambda_index].attr else: code = code_node.attr assert iscode(code) code = Code(code, self.scanner, self.currentclass) # add defaults values to parameter names argc = code.co_argcount kwonlyargcount = code.co_kwonlyargcount paramnames = list(code.co_varnames[:argc]) if kwonlyargcount > 0: kwargs = list(code.co_varnames[argc:argc + kwonlyargcount]) try: ast = self.build_ast( code._tokens, code._customize, code, is_lambda=is_lambda, noneInNames=("None" in code.co_names), ) except ParserError, p: self.write(str(p)) if not self.tolerate_errors: self.ERROR = p return
def getargs(co, version): """Get information about the arguments accepted by a code object. Three things are returned: (args, varargs, varkw), where 'args' is a list of argument names (possibly containing nested lists), and 'varargs' and 'varkw' are the names of the * and ** arguments or None.""" if not iscode(co): raise TypeError('{!r} is not a code object'.format(co)) nargs = co.co_argcount names = co.co_varnames args = list(names[:nargs]) step = 0 # The following acrobatics are for anonymous (tuple) arguments. for i in range(nargs): if args[i][:1] in ('', '.'): stack, remain, count = [], [], [] while step < len(co.co_code): op = ord(co.co_code[step]) step = step + 1 if op >= opc.HAVE_ARGUMENT: opname = opc.opname[op] value = ord( co.co_code[step]) + ord(co.co_code[step + 1]) * 256 step = step + 2 if opname in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'): remain.append(value) count.append(value) elif opname in ('STORE_FAST', 'STORE_DEREF'): if opname == 'STORE_FAST': stack.append(names[value]) else: stack.append(co.co_cellvars[value]) # Special case for sublists of length 1: def foo((bar)) # doesn't generate the UNPACK_TUPLE bytecode, so if # `remain` is empty here, we have such a sublist. if not remain: stack[0] = [stack[0]] break else: remain[-1] = remain[-1] - 1 while remain[-1] == 0: remain.pop() size = count.pop() stack[-size:] = [stack[-size:]] if not remain: break remain[-1] = remain[-1] - 1 if not remain: break args[i] = stack[0] varargs = None if co.co_flags & COMPILER_FLAG_BIT["VARARGS"]: varargs = co.co_varnames[nargs] nargs = nargs + 1 varkw = None if co.co_flags & COMPILER_FLAG_BIT["VARKEYWORDS"]: varkw = co.co_varnames[nargs] return Arguments(args, varargs, varkw)