def enter_code_object(self, co_obj): labels = dis.findlabels(co_obj.co_code) labels = opcode_util.extendlabels(co_obj.co_code, labels) self.blocks = dict((index, []) for index in labels) self.stack = [] self.loop_stack = [] self.blocks[0] = self.block = []
def __init__(self, func): func = get_function_object(func) code = get_code_object(func) pysig = utils.pysignature(func) if not code: raise errors.ByteCodeSupportError( "%s does not provide its bytecode" % func) if code.co_cellvars: raise NotImplementedError("cell vars are not supported") table = utils.SortedMap(ByteCodeIter(code)) labels = set(dis.findlabels(code.co_code)) labels.add(0) try: func_qualname = func.__qualname__ except AttributeError: func_qualname = func.__name__ self._mark_lineno(table, code) super(ByteCode, self).__init__(func=func, func_qualname=func_qualname, is_generator=inspect.isgeneratorfunction(func), pysig=pysig, filename=code.co_filename, co_names=code.co_names, co_varnames=code.co_varnames, co_consts=code.co_consts, co_freevars=code.co_freevars, table=table, labels=list(sorted(labels)))
def __init__(self, func): func = get_function_object(func) code = get_code_object(func) pysig = utils.pysignature(func) if not code: raise errors.ByteCodeSupportError( "%s does not provide its bytecode" % func) # A map of {offset: ByteCodeInst} table = utils.SortedMap(ByteCodeIter(code)) labels = set(dis.findlabels(code.co_code)) labels.add(0) try: func_qualname = func.__qualname__ except AttributeError: func_qualname = func.__name__ self._mark_lineno(table, code) super(ByteCode, self).__init__(func=func, func_qualname=func_qualname, is_generator=inspect.isgeneratorfunction(func), pysig=pysig, filename=code.co_filename, co_names=code.co_names, co_varnames=code.co_varnames, co_consts=code.co_consts, co_freevars=code.co_freevars, table=table, labels=list(sorted(labels)))
def _get_instructions_bytes(code, varnames=None, names=None, constants=None, cells=None, linestarts=None, line_offset=0): """Iterate over the instructions in a bytecode string. Generates a sequence of Instruction namedtuples giving the details of each opcode. Additional information about the code's runtime environment (e.g. variable names, constants) can be specified using optional arguments. """ labels = dis.findlabels(code) extended_arg = 0 starts_line = None free = None # enumerate() is not an option, since we sometimes process # multiple elements on a single pass through the loop n = len(code) i = 0 while i < n: op = code[i] offset = i if linestarts is not None: starts_line = linestarts.get(i, None) if starts_line is not None: starts_line += line_offset is_jump_target = i in labels i = i + 1 arg = None argval = None argrepr = '' if op >= dis.HAVE_ARGUMENT: arg = code[i] + code[i + 1] * 256 + extended_arg extended_arg = 0 i = i + 2 if op == dis.EXTENDED_ARG: extended_arg = arg * 65536 # Set argval to the dereferenced value of the argument when # availabe, and argrepr to the string representation of argval. # _disassemble_bytes needs the string repr of the # raw name index for LOAD_GLOBAL, LOAD_CONST, etc. argval = arg if op in dis.hasconst: argval, argrepr = dis._get_const_info(arg, constants) elif op in dis.hasname: argval, argrepr = dis._get_name_info(arg, names) elif op in dis.hasjrel: argval = i + arg argrepr = "to " + repr(argval) elif op in dis.haslocal: argval, argrepr = dis._get_name_info(arg, varnames) elif op in dis.hascompare: argval = dis.cmp_op[arg] argrepr = argval elif op in dis.hasfree: argval, argrepr = dis._get_name_info(arg, cells) elif op in dis.hasnargs: argrepr = "%d positional, %d keyword pair" % (code[i - 2], code[i - 1]) yield dis.Instruction(dis.opname[op], op, arg, argval, argrepr, offset, starts_line, is_jump_target)
def __init__(self, func): func = get_function_object(func) code = get_code_object(func) if not code: raise ByteCodeSupportError("%s does not provide its bytecode" % func) if code.co_cellvars: raise ByteCodeSupportError("does not support cellvars") table = utils.SortedMap(ByteCodeIter(code)) labels = set(dis.findlabels(code.co_code)) labels.add(0) try: func_qualname = func.__qualname__ except AttributeError: func_qualname = func.__name__ self._mark_lineno(table, code) super(ByteCode, self).__init__(func=func, func_qualname=func_qualname, argspec=inspect.getargspec(func), filename=code.co_filename, co_names=code.co_names, co_varnames=code.co_varnames, co_consts=code.co_consts, co_freevars=code.co_freevars, table=table, labels=list(sorted(labels)))
def __init__(self, method_name, code_object): Declaration.__init__(self, Declaration.METHOD, code_object) self._method_name = method_name self._formal_parameters = [] self._body = None self._labels = dis.findlabels(code_object.co_code) self._nested_types = []
def disassemble(co, lasti=-1): """Disassemble a code object.""" # Taken from dis.disassemble, returns disassembled code instead of printing # it (the f**k python ?). # Also, unicodified. # Also, use % operator instead of string operations. # Also, one statement per line. out = StringIO() code = co.co_code labels = dis.findlabels(code) linestarts = dict(dis.findlinestarts(co)) n = len(code) i = 0 extended_arg = 0 free = None while i < n: c = code[i] op = ord(c) if i in linestarts: if i > 0: print(end=u'\n', file=out) print(u'%3d' % linestarts[i], end=u' ', file=out) else: print(u' ', end=u' ', file=out) if i == lasti: print(u'-->', end=u' ', file=out) else: print(u' ', end=u' ', file=out) if i in labels: print(u'>>', end=u' ', file=out) else: print(u' ', end=u' ', file=out) print(u'%4i' % i, end=u' ', file=out) print(u'%-20s' % dis.opname[op], end=u' ', file=out) i = i + 1 if op >= dis.HAVE_ARGUMENT: oparg = ord(code[i]) + ord(code[i + 1]) * 256 + extended_arg extended_arg = 0 i = i + 2 if op == dis.EXTENDED_ARG: extended_arg = oparg * 65536 print(u'%5i' % oparg, end=u' ', file=out) if op in dis.hasconst: print(u'(%r)' % co.co_consts[oparg], end=u' ', file=out) elif op in dis.hasname: print(u'(%s)' % co.co_names[oparg], end=u' ', file=out) elif op in dis.hasjrel: print(u'(to %r)' % (i + oparg), end=u' ', file=out) elif op in dis.haslocal: print(u'(%s)' % co.co_varnames[oparg], end=u' ', file=out) elif op in dis.hascompare: print(u'(%s)' % dis.cmp_op[oparg], end=u' ', file=out) elif op in dis.hasfree: if free is None: free = co.co_cellvars + co.co_freevars print(u'(%s)' % free[oparg], end=u' ', file=out) print(end=u'\n', file=out) return out.getvalue()
def build_basic_blocks(co_obj): co_code = co_obj.co_code labels = extendlabels(co_code, dis.findlabels(co_code)) labels.sort() blocks = dict( (index, list(itercode(co_code[index:next_index], index))) for index, next_index in zip([0] + labels, labels + [len(co_code)])) return blocks
def enter_code_object (self, co_obj): labels = dis.findlabels(co_obj.co_code) labels = opcode_util.extendlabels(co_obj.co_code, labels) self.blocks = dict((index, []) for index in labels) self.stack = [] self.loop_stack = [] self.blocks[0] = self.block = []
def build_basic_blocks (co_obj): co_code = co_obj.co_code labels = extendlabels(co_code, dis.findlabels(co_code)) labels.sort() blocks = dict((index, list(itercode(co_code[index:next_index], index))) for index, next_index in zip([0] + labels, labels + [len(co_code)])) return blocks
def __init__(self, *args, **kwargs): super(InstructionInterpreter, self).__init__(*args, **kwargs) self._labels = dis.findlabels(self._co_code) self.starts_line = None self.is_jump_target = False self.argval = None self.argrepr = None self.current_line = -1
def disassembler(co, lasti=-1): """Disassemble a code object. :param co: code object :param lasti: internal :yields: Instructions. """ code = co.co_code labels = dis.findlabels(code) linestarts = dict(dis.findlinestarts(co)) i = 0 extended_arg = 0 lineno = 0 free = None for i, op, oparg in _walk_ops(co): if i in linestarts: lineno = linestarts[i] instr = Instruction(i=i, op=op, lineno=lineno) instr.linestart = i in linestarts if i == lasti: instr.lasti = True else: instr.lasti = False if i in labels: instr.label = True else: instr.label = False instr.oparg = oparg extended_arg = 0 if op == dis.EXTENDED_ARG: extended_arg = oparg * 65536 instr.extended_arg = extended_arg if op >= dis.HAVE_ARGUMENT: if op in dis.hasconst: instr.arg = co.co_consts[oparg] elif op in dis.hasname: instr.arg = co.co_names[oparg] elif op in dis.hasjrel: instr.arg = i + oparg elif op in dis.haslocal: instr.arg = co.co_varnames[oparg] elif op in dis.hascompare: instr.arg = dis.cmp_op[oparg] elif op in dis.hasfree: if free is None: free = co.co_cellvars + co.co_freevars instr.arg = free[oparg] yield instr
def __init__(self, func): self.code = get_code_object(func) #print dis.dis(self.code) # assert not self.code.co_freevars, "does not support freevars" # assert not self.code.co_cellvars, "does not support cellvars" self.table = SortedMap(ByteCodeIter(self.code)) labels = set(dis.findlabels(self.code.co_code) + find_targets(self)) labels.add(0) self.labels = sorted(labels) self._mark_lineno()
def read(func): proc = Procedure(func) tables = Tables( jump = {}, labels = findlabels(func.func_code.co_code), variables = [Local(n) for n in func.func_code.co_varnames]) entry = interpret(proc, 0, [], tables) assert len(entry.prec) == 0 # If this assertion fails, then code in the # interpret() for Arguments need to be fixed. ssa_conversion(proc) return proc
def dissco(co): try: code = co.co_code except AttributeError: raise TypeError( 'disassembly only works on objects with code components.') ret = [] labels = findlabels(code) linestarts = dict(findlinestarts(co)) print labels, linestarts n = len(code) i = 0 cur_line = 0 oparg = None arg = None extended_arg = 0 free = None while i < n: c = code[i] op = ord(c) if i in linestarts: cur_line = linestarts[i] op_name = opname[op] i += 1 if op >= HAVE_ARGUMENT: oparg = ord(code[i]) + ord(code[i + 1]) * 256 + extended_arg extended_arg = 0 i += 2 if op == EXTENDED_ARG: extended_arg = oparg * 65536L if op in hasconst: arg = co.co_consts[oparg] elif op in hasname: arg = co.co_names[oparg] elif op in hasjrel: arg = i + oparg elif op in haslocal: arg = co.co_varnames[oparg] elif op in hascompare: arg = cmp_op[oparg] elif op in hasfree: if free is None: free = co.co_cellvars + co.co_freevars arg = free[oparg] ret.append(Instruction(op, oparg, arg, cur_line)) oparg = None arg = None return ret
def disassemble(co): """Disassemble a code object.""" lines = [] code = co.co_code labels = dis.findlabels(code) linestarts = dict(dis.findlinestarts(co)) n = len(code) i = 0 extended_arg = 0 free = None while i < n: c = code[i] op = ord(c) data = {"raw_op": op} if i in linestarts: data["line_no"] = linestarts[i] else: data["line_no"] = None if i in labels: data["in_labels"] = True else: data["in_labels"] = False data["i"] = i data["opname"] = opname[op] data["hex"] = "%02X" % op i = i + 1 if op >= HAVE_ARGUMENT: oparg = ord(code[i]) + ord(code[i + 1]) * 256 + extended_arg extended_arg = 0 data["hex"] += " %02X %02X" % (ord(code[i]), ord(code[i + 1])) i = i + 2 if op == EXTENDED_ARG: extended_arg = oparg * 65536L data["raw_arg"] = oparg if op in hasconst: data['arg'] = {"value": repr(co.co_consts[oparg]), "type": "const"} elif op in hasname: data['arg'] = {"value": co.co_names[oparg], "type": "name"} elif op in hasjrel: data['arg'] = {"value": repr(i + oparg), "type": "jump_relative"} elif op in haslocal: data['arg'] = {"value": co.co_varnames[oparg], "type": "local_name"} elif op in hascompare: data['arg'] = {"value": cmp_op[oparg], "type": "compare"} elif op in hasfree: if free is None: free = co.co_cellvars + co.co_freevars data['arg'] = {"value": cmp_op[oparg], "type": "free_variable"} lines.append(data) return json.dumps(lines)
def dissco(co): try: code = co.co_code except AttributeError: raise TypeError("disassembly only works on objects with code components.") ret = [] labels = findlabels(code) linestarts = dict(findlinestarts(co)) print labels, linestarts n = len(code) i = 0 cur_line = 0 oparg = None arg = None extended_arg = 0 free = None while i < n: c = code[i] op = ord(c) if i in linestarts: cur_line = linestarts[i] op_name = opname[op] i += 1 if op >= HAVE_ARGUMENT: oparg = ord(code[i]) + ord(code[i + 1]) * 256 + extended_arg extended_arg = 0 i += 2 if op == EXTENDED_ARG: extended_arg = oparg * 65536L if op in hasconst: arg = co.co_consts[oparg] elif op in hasname: arg = co.co_names[oparg] elif op in hasjrel: arg = i + oparg elif op in haslocal: arg = co.co_varnames[oparg] elif op in hascompare: arg = cmp_op[oparg] elif op in hasfree: if free is None: free = co.co_cellvars + co.co_freevars arg = free[oparg] ret.append(Instruction(op, oparg, arg, cur_line)) oparg = None arg = None return ret
def from_code(cls, co): """Disassemble a Python code object into a Code object""" co_code = co.co_code labels = dict((addr, Label()) for addr in findlabels(co_code)) linestarts = dict(cls._findlinestarts(co)) cellfree = co.co_cellvars + co.co_freevars code = [] n = len(co_code) i = extended_arg = 0 while i < n: op = Opcode(co_code[i]) if i in labels: code.append((labels[i], None)) if i in linestarts: code.append((SetLineno, linestarts[i])) i += 1 if op in hascode: lastop, lastarg = code[-1] if lastop != LOAD_CONST: raise ValueError( "%s should be preceded by LOAD_CONST code" % op) code[-1] = (LOAD_CONST, Code.from_code(lastarg)) if op not in hasarg: code.append((op, None)) else: arg = co_code[i] | co_code[i + 1] << 8 | extended_arg extended_arg = 0 i += 2 if op == opcode.EXTENDED_ARG: extended_arg = arg << 16 else: code.append( (op, co.co_consts[arg] if op in hasconst else co.co_names[arg] if op in hasname else labels[arg] if op in hasjabs else labels[i + arg] if op in hasjrel else co.co_varnames[arg] if op in haslocal else cmp_op[arg] if op in hascompare else cellfree[arg] if op in hasfree else arg)) varargs = not not co.co_flags & CO_VARARGS varkwargs = not not co.co_flags & CO_VARKEYWORDS return cls(code=code, freevars=co.co_freevars, args=co.co_varnames[:co.co_argcount + varargs + varkwargs], varargs=varargs, varkwargs=varkwargs, newlocals=not not co.co_flags & CO_NEWLOCALS, name=co.co_name, filename=co.co_filename, firstlineno=co.co_firstlineno, docstring=co.co_consts[0] if co.co_consts and isinstance(co.co_consts[0], str) else None)
def make_junk_code(code): # code_blocks = {block_id:[block_code,block_range]} code_blocks, begin, code_block_id_tab, code_block_id_xref_dict = make_code_block( code.co_code) add_junk_code_dict = {} add_jmp_dict = {} code_block_new_position_dict = {} random.shuffle(code_block_id_tab) junk_code_max_size = (0x10000 - len(code.co_code) - len( dis.findlabels(code.co_code))) / len(code_block_id_tab) - 5 junk_code_max_size = 9 if junk_code_max_size > 9 else junk_code_max_size junk_code = add_junk_code( random.randint(junk_code_max_size / 2, junk_code_max_size)) begin_position = 3 + len(junk_code) for code_block_id in code_block_id_tab: add_junk_code_dict[code_block_id] = add_junk_code( random.randint(junk_code_max_size / 2, junk_code_max_size)) add_jmp_dict[code_block_id] = find_jmp(code_blocks[code_block_id]) code_block_new_position_dict[code_block_id] = begin_position begin_position += len(code_blocks[code_block_id][0]) + 3 + 3 * len( add_jmp_dict[code_block_id]) + len( add_junk_code_dict[code_block_id]) co_code = chr(dis.opmap['JUMP_ABSOLUTE']) + \ n2b(code_block_new_position_dict[begin]) + junk_code for code_block_id in code_block_id_tab: next_code_block_id = 0 if code_block_id in code_block_id_xref_dict: next_code_block_id = code_block_id_xref_dict[code_block_id] co_code += patch_code( code_blocks, add_jmp_dict[code_block_id], code_block_id, next_code_block_id, code_block_new_position_dict) + add_junk_code_dict[code_block_id] co_consts = [] for v in code.co_consts: if isinstance(v, types.CodeType): co_consts += [make_junk_code(v)] else: co_consts += [v] co_consts = tuple(co_consts) if len(co_code) > 0xffff: print '[ERROR]code is too long.' return types.CodeType(code.co_argcount, code.co_nlocals, code.co_stacksize, code.co_flags, co_code, co_consts, code.co_names, code.co_varnames, code.co_filename, code.co_name, code.co_firstlineno, code.co_lnotab)
def disassemble(co, lasti=-1): """Disassemble a code object.""" code = co.co_code labels = findlabels(code) linestarts = dict(findlinestarts(co)) n = len(code) i = 0 extended_arg = 0 free = None while i < n: c = code[i] op = ord(c) if i in linestarts: if i > 0: print print "%03d" % linestarts[i], else: print ' ', if i == lasti: print '-->', else: print ' ', if i in labels: print '>>', else: print ' ', print repr(i).rjust(4), print opname[op].ljust(20), i = i+1 if op >= HAVE_ARGUMENT: oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg extended_arg = 0 i = i+2 if op == EXTENDED_ARG: extended_arg = oparg*65536L print repr(oparg).rjust(5), if op in hasconst: print '(' + repr(co.co_consts[oparg]) + ')', elif op in hasname: print '(' + repr(co.co_names[oparg]) + ')', elif op in hasjrel: print '(to ' + repr(i + oparg) + ')', elif op in haslocal: print '(' + co.co_varnames[oparg] + ')', elif op in hascompare: print '(' + repr(cmp_op[oparg]) + ')', elif op in hasfree: if free is None: free = co.co_cellvars + co.co_freevars print '(' + free[oparg] + ')', print
def _iter_as_bytecode_as_instructions_py2(co): code = co.co_code op_offset_to_line = dict(dis.findlinestarts(co)) labels = set(dis.findlabels(code)) bytecode_len = len(code) i = 0 extended_arg = 0 free = None op_to_name = opname while i < bytecode_len: c = code[i] op = ord(c) is_jump_target = i in labels curr_op_name = op_to_name[op] initial_bytecode_offset = i i = i + 1 if op < HAVE_ARGUMENT: yield _Instruction(curr_op_name, op, _get_line(op_offset_to_line, initial_bytecode_offset, 0), None, is_jump_target, initial_bytecode_offset) else: oparg = ord(code[i]) + ord(code[i + 1]) * 256 + extended_arg extended_arg = 0 i = i + 2 if op == EXTENDED_ARG: extended_arg = oparg * 65536 if op in hasconst: yield _Instruction(curr_op_name, op, _get_line(op_offset_to_line, initial_bytecode_offset, 0), co.co_consts[oparg], is_jump_target, initial_bytecode_offset) elif op in hasname: yield _Instruction(curr_op_name, op, _get_line(op_offset_to_line, initial_bytecode_offset, 0), co.co_names[oparg], is_jump_target, initial_bytecode_offset) elif op in hasjrel: argval = i + oparg yield _Instruction(curr_op_name, op, _get_line(op_offset_to_line, initial_bytecode_offset, 0), argval, is_jump_target, initial_bytecode_offset) elif op in haslocal: yield _Instruction(curr_op_name, op, _get_line(op_offset_to_line, initial_bytecode_offset, 0), co.co_varnames[oparg], is_jump_target, initial_bytecode_offset) elif op in hascompare: yield _Instruction(curr_op_name, op, _get_line(op_offset_to_line, initial_bytecode_offset, 0), cmp_op[oparg], is_jump_target, initial_bytecode_offset) elif op in hasfree: if free is None: free = co.co_cellvars + co.co_freevars yield _Instruction(curr_op_name, op, _get_line(op_offset_to_line, initial_bytecode_offset, 0), free[oparg], is_jump_target, initial_bytecode_offset) else: yield _Instruction(curr_op_name, op, _get_line(op_offset_to_line, initial_bytecode_offset, 0), oparg, is_jump_target, initial_bytecode_offset)
def _get_instructions_bytes(code, varnames=None, names=None, constants=None, cells=None, linestarts=None, line_offset=0): """Iterate over the instructions in a bytecode string. Generates a sequence of Instruction namedtuples giving the details of each opcode. Additional information about the code's runtime environment (e.g. variable names, constants) can be specified using optional arguments. """ labels = findlabels(code) starts_line = None free = None for offset, op, arg in _unpack_opargs(code): if linestarts is not None: starts_line = linestarts.get(offset, None) if starts_line is not None: starts_line += line_offset is_jump_target = offset in labels argval = None argrepr = '' if arg is not None: # Set argval to the dereferenced value of the argument when # available, and argrepr to the string representation of argval. # _disassemble_bytes needs the string repr of the # raw name index for LOAD_GLOBAL, LOAD_CONST, etc. argval = arg if op in hasconst: argval, argrepr = _get_const_info(arg, constants) elif op in hasname: argval, argrepr = _get_name_info(arg, names) elif op in hasjrel: argval = offset + 3 + arg argrepr = "to " + repr(argval) elif op in haslocal: argval, argrepr = _get_name_info(arg, varnames) elif op in hascompare: argval = cmp_op[arg] argrepr = argval elif op in hasfree: argval, argrepr = _get_name_info(arg, cells) yield Instruction(opname[op], op, arg, argval, argrepr, offset, starts_line, is_jump_target)
def __init__(self, func_id): code = func_id.code labels = set(dis.findlabels(code.co_code)) labels.add(0) # A map of {offset: ByteCodeInst} table = OrderedDict(ByteCodeIter(code)) self._compute_lineno(table, code) self.func_id = func_id self.co_names = code.co_names self.co_varnames = code.co_varnames self.co_consts = code.co_consts self.co_freevars = code.co_freevars self.table = table self.labels = sorted(labels)
def __init__(self, method_name, code_object): Declaration.__init__(self, Declaration.METHOD, code_object) self._method_name = method_name self._formal_parameters = [] self._body = None self._labels = dis.findlabels(code_object.co_code) self._nested_types = [] self.pretty_summary = [] self.one_param_summary = [] self.all_params_summary = {} self.vuln_summary = [] self.inter_vuln_summary = [] self.all_params_vuln_summary = {} self.returns_tainted = False self.all_params_returns_tainted = {}
def __init__(self, func): self.func = func self.code = get_code_object(func) self.filename = self.code.co_filename # Do basic checking on support for the given bytecode if not self.code: raise ByteCodeSupportError("%s does not provide its bytecode" % func) if self.code.co_freevars: raise ByteCodeSupportError("does not support freevars") if self.code.co_cellvars: raise ByteCodeSupportError("does not support cellvars") self.table = utils.SortedMap(ByteCodeIter(self.code)) labels = set(dis.findlabels(self.code.co_code)) labels.add(0) self.labels = list(sorted(labels)) self._mark_lineno()
def disassemble(co): code = co.co_code labels = dis.findlabels(code) linestarts = dict(dis.findlinestarts(co)) n = len(code) i = 0 extended_arg = 0 free = None lineno = None while i < n: c = code[i] op = ord(c) lineno = linestarts.get(i, lineno) is_label = i in labels ist = i i += 1 if op >= opcode.HAVE_ARGUMENT: oparg = ord(code[i]) + ord(code[i + 1]) * 256 + extended_arg extended_arg = 0 i += 2 if op == opcode.EXTENDED_ARG: extended_arg = oparg * 65536L if op in opcode.hasconst: arg = co.co_consts[oparg] elif op in opcode.hasname: arg = co.co_names[oparg] elif op in opcode.hasjrel: arg = i + oparg elif op in opcode.haslocal: arg = co.co_varnames[oparg] elif op in opcode.hascompare: arg = opcode.cmp_op[oparg] elif op in opcode.hasfree: if free is None: free = co.co_cellvars + co.co_freevars arg = free[oparg] else: arg = NOVAL else: arg = NOVAL yield ist, lineno, is_label, opcode.opname[op], arg
def disassemble(co, lasti=-1): """Disassemble a code object.""" code = co.co_code labels = dis.findlabels(code) linestarts = dict(dis.findlinestarts(co)) n = len(code) i = 0 extended_arg = 0 free = None while i < n: c = code[i] op = ord(c) i = i + 1 oparg = None if op >= dis.HAVE_ARGUMENT: oparg = ord(code[i]) + ord(code[i + 1]) * 256 + extended_arg extended_arg = 0 i = i + 2 if op == dis.EXTENDED_ARG: extended_arg = oparg * 65536L yield (op, oparg)
def make_code_block(code, obfuscate=False): i = 0 code_block_id_xref_dict = {} code_labels = dis.findlabels(code) if obfuscate else [] code_blocks = {} code_block_id_tab = [] while (i < len(code)): code_size = random.randint(3, 5) if obfuscate else random.randint( 8, 10) code_block = '' tmp = i while (code_size and i < len(code)): c = ord(code[i]) if i in code_labels: if not code_block == '': break if obfuscate and (c in dis.hasjabs or c in dis.hasjrel): code_block += code[i:i + 3] i += 3 break elif c in has_args: code_block += code[i:i + 3] i += 3 else: code_block += code[i] i += 1 code_size -= 1 code_block_id = random.randint(1, 0xffffffff) while code_block_id in code_block_id_tab: code_block_id = random.randint(1, 0xffffffff) code_block_id_tab += [code_block_id] code_blocks[code_block_id] = [code_block, xrange(tmp, i)] for i, v in enumerate(code_block_id_tab[:-1]): code_block_id_xref_dict[v] = code_block_id_tab[i + 1] begin = code_block_id_tab[0] return code_blocks, begin, code_block_id_tab, code_block_id_xref_dict
def pydis(code): n = len(code) i = 0 extarg = 0 ops = [] # Find branch targets lbls = dis.findlabels(code) while i < n: # Insert label pseudo-instruction if i in lbls: ops.append((LABEL, lbls.index(i))) op = ord(code[i]) i += 1 if op >= dis.HAVE_ARGUMENT: arg = dec_arg(code[i : i + 2]) | extarg << 16 extarg = 0 i += 2 if op == dis.EXTENDED_ARG: extarg = arg continue targ = None if op in dis.hasjrel: targ = i + arg elif op in dis.hasjabs: targ = arg if targ != None: arg = lbls.index(targ) else: arg = None ops.append((op, arg)) return ops
def get_call_function_name(frame): """If f_back is looking at a call function, return the name for it. Otherwise return None""" f_back = frame.f_back if not f_back: return None if 'CALL_FUNCTION' != op_at_frame(f_back): return None co = f_back.f_code code = co.co_code labels = dis.findlabels(code) linestarts = dict(dis.findlinestarts(co)) inst = f_back.f_lasti while inst >= 0: c = code[inst] op = ord(c) if inst in linestarts: inst += 1 oparg = ord(code[inst]) + (ord(code[inst + 1]) << 8) return co.co_names[oparg] inst -= 1 pass return None
def make_code_from_frame(frame): """Make a Code object from the given frame. Returns a two-tuple giving the Code object, and the offset of the last instruction executed by the frame. """ code = frame.f_code offset = i = 0 while i < frame.f_lasti: offset += 1 if ord(code.co_code[i]) < HAVE_ARGUMENT: i += 1 else: i += 3 assert i == frame.f_lasti for j in findlabels(code.co_code): if j <= frame.f_lasti: offset += 1 c = Code.from_code(code) c.code[:] = [op for op in c.code if op[0] != SetLineno] assert c.code[offset][0] == ord(code.co_code[frame.f_lasti]) return (c,offset)
def get_call_function_name(frame): """If f_back is looking at a call function, return the name for it. Otherwise return None""" f_back = frame.f_back if not f_back: return None if 'CALL_FUNCTION' != op_at_frame(f_back): return None co = f_back.f_code code = co.co_code labels = dis.findlabels(code) linestarts = dict(dis.findlinestarts(co)) inst = f_back.f_lasti while inst >= 0: c = code[inst] op = ord(c) if inst in linestarts: inst += 1 oparg = ord(code[inst]) + (ord(code[inst+1]) << 8) return co.co_names[oparg] inst -= 1 pass return None
def listbytecode(co): code=co.co_code labels=dis.findlabels(code) n=len(code) i=0 extended_arg=0 free=None codes=[] cnt=0 while i<n: c=code[i] op=ord(c) script={'pos':cnt,'line':i,'co':op,'code':dis.opname[op]} cnt+=1 i=i+1 codes.append(script) if op>dis.HAVE_ARGUMENT: oparg=ord(code[i])+ord(code[i+1])*256+extended_arg extended_arg=0 i=i+2 if op==dis.EXTENDED_ARG: extended_arg=oparg*65536L script['arg']=oparg if op in dis.hasconst: script['const']=co.co_consts[oparg] elif op in dis.hasname: script['name']=co.co_names[oparg] elif op in dis.hasjrel: script['target']=i+oparg elif op in dis.haslocal: script['local']=co.co_varnames[oparg] elif op in dis.hascompare: script['cmp']=dis.cmp_op[oparg] elif op in dis.hasfree: if free is None: free=co.co_cellvars+co.co_frevars script['free']=free[oparg] print '\n'.join(map(repr,codes)) return codes
def from_code(cls, co): """Disassemble a Python code object into a Code object""" co_code=co.co_code labels={addr:Label() for addr in findlabels(co_code)} linestarts=dict(cls._findlinestarts(co)) cellfree=co.co_cellvars+co.co_freevars code = [] n = len(co_code) i = extended_arg=0 while i<n: op = Opcode(co_code[i]) if i in labels:code.append((labels[i], None)) if i in linestarts:code.append((SetLineno, linestarts[i])) i += 1 if op in hascode: lastop,lastarg=code[-2] if lastop!=LOAD_CONST:raise ValueError("%s should be preceded by LOAD_CONST"%op) code[-1]=(LOAD_CONST,cls.from_code(lastarg)) if op not in hasarg:code.append((op, None)) else: arg=co_code[i]|co_code[i+1]<<8|extended_arg extended_arg=0 i+=2 if op==opcode.EXTENDED_ARG:extended_arg=arg<<16 else:code.append((op,co.co_consts[arg] if op in hasconst else co.co_names[arg] if op in hasname else labels[arg] if op in hasjabs else labels[i+arg] if op in hasjrel else co.co_varnames[arg] if op in haslocal else cmp_op[arg] if op in hascompare else cellfree[arg] if op in hasfree else arg)) varargs = not not co.co_flags&CO_VARARGS varkwargs = not not co.co_flags&CO_VARKEYWORDS return cls(co=co, code = code, freevars = co.co_freevars, args = co.co_varnames[:co.co_argcount+varargs+varkwargs], kwonly = co.co_kwonlyargcount, varargs = varargs, varkwargs = varkwargs, newlocals = not not co.co_flags&CO_NEWLOCALS, name = co.co_name, filename = co.co_filename, firstlineno = co.co_firstlineno, docstring = co.co_consts[0] if co.co_consts and isinstance(co.co_consts[0],str) else None)
def _disassemble(code_object): code_string = code_object.co_code linestarts = dict(dis.findlinestarts(code_object)) labels = list(dis.findlabels(code_string)) i = 0 while i < len(code_string): opcode = ord(code_string[i]) opname = dis.opname[opcode] i += 1 if opcode >= dis.HAVE_ARGUMENT: oparg = (ord(code_string[i]) + ord(code_string[i+1])*256) if opcode in dis.hasconst: oparg = code_object.co_consts[oparg] elif opcode in dis.hasname: oparg = code_object.co_names[oparg] yield opname, oparg if hasattr(oparg, 'co_code'): for oc, oa in _disassemble(oparg): yield oc, oa i += 2 else: yield opname, None
def get_names_at(co, lasti): """Get the names that are referenced in the active part of code""" code = co.co_code labels = dis.findlabels(code) linestarts = dict(dis.findlinestarts(co)) n = len(code) i = 0 extended_arg = 0 free = None linenames = [] found = False while i < n: c = code[i] op = ord(c) if i in linestarts: if found: return linenames linenames = [] if i == lasti: found = True i = i+1 if op >= dis.HAVE_ARGUMENT: oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg extended_arg = 0 i = i+2 if op == dis.EXTENDED_ARG: extended_arg = oparg*65536L if op in dis.hasname: linenames.append(co.co_names[oparg]) elif op in dis.haslocal: linenames.append(co.co_varnames[oparg]) elif op in dis.hasfree: if free is None: free = co.co_cellvars + co.co_freevars linenames.append(free[oparg]) if found: return linenames
def disassemble(co, lasti=-1): code = co.co_code labels = dis.findlabels(code) linestarts = dict(findlinestarts(co)) n = len(code) i = 0 extended_arg = 0 free = co.co_cellvars + co.co_freevars while i < n: op = ord(code[i]) o = Op(op, i) i += 1 if i in linestarts and i > 0: o.line = linestarts[i] if i in labels: o.target = True if op > dis.HAVE_ARGUMENT: arg = ord(code[i]) + ord(code[i + 1]) * 256 + extended_arg extended_arg = 0 i += 2 if op == dis.EXTENDED_ARG: extended_arg = arg << 16 o.argcode = arg if op in dis.hasconst: o.arg = co.co_consts[arg] elif op in dis.hasname: o.arg = co.co_names[arg] elif op in dis.hasjrel: o.arg = i + arg elif op in dis.haslocal: o.arg = co.co_varnames[arg] elif op in dis.hascompare: o.arg = dis.cmp_op[arg] elif op in dis.hasfree: o.arg = free[arg] yield o
def disassemble(co, lasti=-1): code = co.co_code labels = dis.findlabels(code) linestarts = dict(findlinestarts(co)) n = len(code) i = 0 extended_arg = 0 free = co.co_cellvars + co.co_freevars while i < n: op = ord(code[i]) o = Op(op, i) i += 1 if i in linestarts and i > 0: o.line = linestarts[i] if i in labels: o.target = True if op > dis.HAVE_ARGUMENT: arg = ord(code[i]) + ord(code[i+1]) * 256 + extended_arg extended_arg = 0 i += 2 if op == dis.EXTENDED_ARG: extended_arg = arg << 16 o.argcode = arg if op in dis.hasconst: o.arg = co.co_consts[arg] elif op in dis.hasname: o.arg = co.co_names[arg] elif op in dis.hasjrel: o.arg = i + arg elif op in dis.haslocal: o.arg = co.co_varnames[arg] elif op in dis.hascompare: o.arg = dis.cmp_op[arg] elif op in dis.hasfree: o.arg = free[arg] yield o
def from_code(cls, co): """Disassemble a Python code object into a Code object""" free_cell_isection = set(co.co_cellvars) & set(co.co_freevars) if free_cell_isection: print(co.co_name + ': has non-empty co.co_cellvars & co.co_freevars', free_cell_isection) return None co_code = co.co_code labels = {addr: Label() for addr in findlabels(co_code)} linestarts = dict(cls._findlinestarts(co)) cellfree = co.co_cellvars + co.co_freevars code = [] n = len(co_code) i = extended_arg = 0 is_generator = False if version_info >= (3, 5,): is_coroutine = False while i < n: op = Opcode(co_code[i]) if i in labels: code.append((labels[i], None)) if i in linestarts: code.append((SetLineno, linestarts[i])) i += 1 if op in hascode: lastop, lastarg = code[-2] if lastop != LOAD_CONST: raise ValueError("%s should be preceded by LOAD_CONST" % op) sub_code = Code.from_code(lastarg) if sub_code is None: print(co.co_name + ': has unexpected subcode block') return None code[-2] = (LOAD_CONST, sub_code) if op not in hasarg: code.append((op, None)) else: arg = co_code[i] | co_code[i + 1] << 8 | extended_arg extended_arg = 0 i += 2 if op == opcode.EXTENDED_ARG: extended_arg = arg << 16 else: byteplay_arg = co.co_consts[arg] if op in hasconst else \ co.co_names[arg] if op in hasname else \ labels[arg] if op in hasjabs else \ labels[i + arg] if op in hasjrel else \ co.co_varnames[arg] if op in haslocal else \ cmp_op[arg] if op in hascompare else \ cellfree[arg] if op in hasfree else \ arg code.append((op, byteplay_arg)) if op == YIELD_VALUE or op == YIELD_FROM: is_generator = True if version_info >= (3, 5,) and op in coroutine_opcodes: is_coroutine = True varargs = not not co.co_flags & CO_VARARGS varkwargs = not not co.co_flags & CO_VARKEYWORDS force_generator = not is_generator and (co.co_flags & CO_GENERATOR) if version_info >= (3, 5,): force_coroutine = not is_coroutine and (co.co_flags & CO_COROUTINE) force_iterable_coroutine = co.co_flags & CO_ITERABLE_COROUTINE assert not (force_coroutine and force_iterable_coroutine) future_generator_stop = co.co_flags & CO_FUTURE_GENERATOR_STOP else: force_coroutine = None force_iterable_coroutine =None future_generator_stop = None return cls(code=code, freevars=co.co_freevars, args=co.co_varnames[:co.co_argcount + varargs + varkwargs + co.co_kwonlyargcount], kwonly=co.co_kwonlyargcount, varargs=varargs, varkwargs=varkwargs, newlocals=not not co.co_flags & CO_NEWLOCALS, name=co.co_name, filename=co.co_filename, firstlineno=co.co_firstlineno, docstring=co.co_consts[0] if co.co_consts and isinstance(co.co_consts[0], str) else None, force_generator=force_generator, force_coroutine=force_coroutine, force_iterable_coroutine=force_iterable_coroutine, future_generator_stop=future_generator_stop)
def getjoinpoints(self): """Compute the bytecode positions that are potential join points (for FlowObjSpace)""" # first approximation return dis.findlabels(self.co_code)
def disassemble_bytes(orig_msg, orig_msg_nocr, code, lasti=-1, cur_line=0, start_line=-1, end_line=None, relative_pos=False, varnames=(), names=(), constants=(), cells=(), freevars=(), linestarts={}, highlight='light', start_offset=0, end_offset=None): """Disassemble byte string of code. If end_line is negative it counts the number of statement linestarts to use.""" statement_count = 10000 if end_line is None: end_line = 10000 elif relative_pos: end_line += start_line -1 pass labels = findlabels(code) null_print = lambda x: None if start_line > cur_line: msg_nocr = null_print msg = null_print else: msg_nocr = orig_msg_nocr msg = orig_msg for instr in get_instructions_bytes(code, opc, varnames, names, constants, cells, linestarts): offset = instr.offset if end_offset and offset > end_offset: break if instr.starts_line: if offset: msg("") cur_line = instr.starts_line if (start_line and ((start_line > cur_line) or start_offset and start_offset > offset)) : msg_nocr = null_print msg = null_print else: statement_count -= 1 msg_nocr = orig_msg_nocr msg = orig_msg pass if ((cur_line > end_line) or (end_offset and offset > end_offset)): break msg_nocr(format_token(Mformat.LineNumber, "%3d" % cur_line, highlight=highlight)) else: if start_offset and offset and start_offset <= offset: msg_nocr = orig_msg_nocr msg = orig_msg pass msg_nocr(' ') if offset == lasti: msg_nocr(format_token(Mformat.Arrow, '-->', highlight=highlight)) else: msg_nocr(' ') if offset in labels: msg_nocr(format_token(Mformat.Arrow, '>>', highlight=highlight)) else: msg_nocr(' ') msg_nocr(repr(offset).rjust(4)) msg_nocr(' ') msg_nocr(format_token(Mformat.Opcode, instr.opname.ljust(20), highlight=highlight)) msg_nocr(repr(instr.arg).ljust(10)) msg_nocr(' ') # Show argval? msg(format_token(Mformat.Name, instr.argrepr.ljust(20), highlight=highlight)) pass return code, offset
def from_code(cls, co): """Disassemble a Python code object into a Code object.""" co_code = co.co_code labels = dict((addr, Label()) for addr in findlabels(co_code)) linestarts = dict(cls._findlinestarts(co)) cellfree = co.co_cellvars + co.co_freevars code = CodeList() n = len(co_code) i = 0 extended_arg = 0 while i < n: op = Opcode(ord(co_code[i])) if i in labels: code.append((labels[i], None)) if i in linestarts: code.append((SetLineno, linestarts[i])) i += 1 if op in hascode: lastop, lastarg = code[-1] if lastop != LOAD_CONST: raise ValueError, \ "%s should be preceded by LOAD_CONST code" % op code[-1] = (LOAD_CONST, Code.from_code(lastarg)) if op not in hasarg: code.append((op, None)) else: arg = ord(co_code[i]) + ord(co_code[i+1])*256 + extended_arg extended_arg = 0 i += 2 if op == opcode.EXTENDED_ARG: extended_arg = arg << 16 elif op in hasconst: code.append((op, co.co_consts[arg])) elif op in hasname: code.append((op, co.co_names[arg])) elif op in hasjabs: code.append((op, labels[arg])) elif op in hasjrel: code.append((op, labels[i + arg])) elif op in haslocal: code.append((op, co.co_varnames[arg])) elif op in hascompare: code.append((op, cmp_op[arg])) elif op in hasfree: code.append((op, cellfree[arg])) else: code.append((op, arg)) varargs = bool(co.co_flags & CO_VARARGS) varkwargs = bool(co.co_flags & CO_VARKEYWORDS) newlocals = bool(co.co_flags & CO_NEWLOCALS) args = co.co_varnames[:co.co_argcount + varargs + varkwargs] if co.co_consts and isinstance(co.co_consts[0], basestring): docstring = co.co_consts[0] else: docstring = None return cls(code = code, freevars = co.co_freevars, args = args, varargs = varargs, varkwargs = varkwargs, newlocals = newlocals, name = co.co_name, filename = co.co_filename, firstlineno = co.co_firstlineno, docstring = docstring, )
def disassemble_bytes(orig_msg, orig_msg_nocr, code, lasti=-1, cur_line=0, start_line=-1, end_line=None, relative_pos=False, varnames=(), names=(), consts=(), cellvars=(), freevars=(), linestarts={}, highlight='light', start_offset=0, end_offset=None): """Disassemble byte string of code. If end_line is negative it counts the number of statement linestarts to use.""" statement_count = 10000 if end_line is None: end_line = 10000 elif relative_pos: end_line += start_line -1 pass labels = findlabels(code) n = len(code) i = 0 extended_arg = 0 free = None null_print = lambda x: None if start_line > cur_line: msg_nocr = null_print msg = null_print else: msg_nocr = orig_msg_nocr msg = orig_msg pass while i < n and statement_count >= 0: c = code[i] op = ord(c) if end_offset and i > end_offset: break if start_offset > i : msg_nocr = null_print msg = null_print else: msg_nocr = orig_msg_nocr msg = orig_msg if i in linestarts: if i > 0: msg("") cur_line = linestarts[i] if ((start_line and start_line > cur_line) or start_offset > i) : msg_nocr = null_print msg = null_print else: statement_count -= 1 msg_nocr = orig_msg_nocr msg = orig_msg pass if ((cur_line > end_line) or (end_offset and i > end_offset)): break msg_nocr(format_token(Mformat.LineNumber, "%3d" % cur_line, highlight=highlight)) else: msg_nocr(' ') if i == lasti: msg_nocr(format_token(Mformat.Arrow, '-->', highlight=highlight)) else: msg_nocr(' ') if i in labels: msg_nocr(format_token(Mformat.Arrow, '>>', highlight=highlight)) else: msg_nocr(' ') msg_nocr(repr(i).rjust(4)) msg_nocr(' ') msg_nocr(format_token(Mformat.Opcode, opname[op].ljust(20), highlight=highlight)) i += 1 if op >= HAVE_ARGUMENT: oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg extended_arg = 0 i += 2 if op == EXTENDED_ARG: extended_arg = oparg*65536 msg_nocr(repr(oparg).rjust(5)) msg_nocr(' ') if op in hasconst: msg_nocr('(' + format_token(Mformat.Const, repr(consts[oparg]), highlight=highlight) + ')') pass elif op in hasname: msg_nocr('(' + format_token(Mformat.Name, names[oparg], highlight=highlight) + ')') elif op in hasjrel: msg_nocr(format_token(Mformat.Label, '(to ' + repr(i + oparg) + ')', highlight=highlight)) elif op in haslocal: msg_nocr('(' + format_token(Mformat.Var, varnames[oparg], highlight=highlight) + ')') elif op in hascompare: msg_nocr('(' + format_token(Mformat.Compare, cmp_op[oparg], highlight=highlight) + ')') elif op in hasfree: if free is None: free = cellvars + freevars msg_nocr('(' + free[oparg] + ')') pass pass msg("") return
def disassembler(co, lasti= -1): """Disassemble a code object. :param co: code object :param lasti: internal :yields: Instructions. """ code = co.co_code labels = findlabels(code) linestarts = dict(findlinestarts(co)) n = len(code) i = 0 extended_arg = 0 lineno = 0 free = None while i < n: c = code[i] op = co_ord(c) if i in linestarts: lineno = linestarts[i] instr = Instruction(i=i, op=op, lineno=lineno) instr.linestart = i in linestarts if i == lasti: instr.lasti = True else: instr.lasti = False if i in labels: instr.label = True else: instr.label = False i = i + 1 if op >= opcode.HAVE_ARGUMENT: oparg = co_ord(code[i]) + co_ord(code[i + 1]) * 256 + extended_arg instr.oparg = oparg extended_arg = 0 i = i + 2 if op == opcode.EXTENDED_ARG: extended_arg = oparg * 65536 instr.extended_arg = extended_arg if op in opcode.hasconst: instr.arg = co.co_consts[oparg] elif op in opcode.hasname: instr.arg = co.co_names[oparg] elif op in opcode.hasjrel: instr.arg = i + oparg elif op in opcode.haslocal: instr.arg = co.co_varnames[oparg] elif op in opcode.hascompare: instr.arg = opcode.cmp_op[oparg] elif op in opcode.hasfree: if free is None: free = co.co_cellvars + co.co_freevars instr.arg = free[oparg] yield instr
def disassemble_bytes(orig_msg, orig_msg_nocr, code, lasti=-1, cur_line=0, start_line=-1, end_line=None, relative_pos=False, varnames=(), names=(), consts=(), cellvars=(), freevars=(), linestarts={}, highlight='light', start_offset=0, end_offset=None): """Disassemble byte string of code. If end_line is negative it counts the number of statement linestarts to use.""" statement_count = 10000 if end_line is None: end_line = 10000 elif relative_pos: end_line += start_line - 1 pass labels = findlabels(code) n = len(code) i = 0 extended_arg = 0 free = None null_print = lambda x: None if start_line > cur_line: msg_nocr = null_print msg = null_print else: msg_nocr = orig_msg_nocr msg = orig_msg pass while i < n and statement_count >= 0: c = code[i] op = ord(c) if end_offset and i > end_offset: break if start_offset > i: msg_nocr = null_print msg = null_print else: msg_nocr = orig_msg_nocr msg = orig_msg if i in linestarts: if i > 0: msg("") cur_line = linestarts[i] if ((start_line and start_line > cur_line) or start_offset > i): msg_nocr = null_print msg = null_print else: statement_count -= 1 msg_nocr = orig_msg_nocr msg = orig_msg pass if ((cur_line > end_line) or (end_offset and i > end_offset)): break msg_nocr( format_token(Mformat.LineNumber, "%3d" % cur_line, highlight=highlight)) else: msg_nocr(' ') if i == lasti: msg_nocr(format_token(Mformat.Arrow, '-->', highlight=highlight)) else: msg_nocr(' ') if i in labels: msg_nocr(format_token(Mformat.Arrow, '>>', highlight=highlight)) else: msg_nocr(' ') msg_nocr(repr(i).rjust(4)) msg_nocr(' ') msg_nocr( format_token(Mformat.Opcode, opname[op].ljust(20), highlight=highlight)) i += 1 if op >= HAVE_ARGUMENT: oparg = ord(code[i]) + ord(code[i + 1]) * 256 + extended_arg extended_arg = 0 i += 2 if op == EXTENDED_ARG: extended_arg = oparg * 65536 msg_nocr(repr(oparg).rjust(5)) msg_nocr(' ') if op in hasconst: msg_nocr('(' + format_token( Mformat.Const, repr(consts[oparg]), highlight=highlight) + ')') pass elif op in hasname: msg_nocr('(' + format_token( Mformat.Name, names[oparg], highlight=highlight) + ')') elif op in hasjrel: msg_nocr( format_token(Mformat.Label, '(to ' + repr(i + oparg) + ')', highlight=highlight)) elif op in haslocal: msg_nocr('(' + format_token( Mformat.Var, varnames[oparg], highlight=highlight) + ')') elif op in hascompare: msg_nocr('(' + format_token( Mformat.Compare, cmp_op[oparg], highlight=highlight) + ')') elif op in hasfree: if free is None: free = cellvars + freevars msg_nocr('(' + free[oparg] + ')') pass pass msg("") return
def from_code(cls, co): """Disassemble a Python code object into a Code object""" free_cell_isection = set(co.co_cellvars) & set(co.co_freevars) if free_cell_isection: print( co.co_name + ': has non-empty co.co_cellvars & co.co_freevars', free_cell_isection) return None co_code = co.co_code labels = {addr: Label() for addr in findlabels(co_code)} linestarts = dict(cls._findlinestarts(co)) cellfree = co.co_cellvars + co.co_freevars code = [] n = len(co_code) i = extended_arg = 0 is_generator = False if version_info >= ( 3, 5, ): is_coroutine = False while i < n: op = Opcode(co_code[i]) if i in labels: code.append((labels[i], None)) if i in linestarts: code.append((SetLineno, linestarts[i])) i += 1 if op in hascode: lastop, lastarg = code[-2] if lastop != LOAD_CONST: raise ValueError("%s should be preceded by LOAD_CONST" % op) sub_code = Code.from_code(lastarg) if sub_code is None: print(co.co_name + ': has unexpected subcode block') return None code[-2] = (LOAD_CONST, sub_code) if op not in hasarg: code.append((op, None)) else: arg = co_code[i] | co_code[i + 1] << 8 | extended_arg extended_arg = 0 i += 2 if op == opcode.EXTENDED_ARG: extended_arg = arg << 16 else: byteplay_arg = co.co_consts[arg] if op in hasconst else \ co.co_names[arg] if op in hasname else \ labels[arg] if op in hasjabs else \ labels[i + arg] if op in hasjrel else \ co.co_varnames[arg] if op in haslocal else \ cmp_op[arg] if op in hascompare else \ cellfree[arg] if op in hasfree else \ arg code.append((op, byteplay_arg)) if op == YIELD_VALUE or op == YIELD_FROM: is_generator = True if version_info >= ( 3, 5, ) and op in coroutine_opcodes: is_coroutine = True varargs = not not co.co_flags & CO_VARARGS varkwargs = not not co.co_flags & CO_VARKEYWORDS force_generator = not is_generator and (co.co_flags & CO_GENERATOR) if version_info >= ( 3, 5, ): force_coroutine = not is_coroutine and (co.co_flags & CO_COROUTINE) force_iterable_coroutine = co.co_flags & CO_ITERABLE_COROUTINE assert not (force_coroutine and force_iterable_coroutine) future_generator_stop = co.co_flags & CO_FUTURE_GENERATOR_STOP else: force_coroutine = None force_iterable_coroutine = None future_generator_stop = None return cls(code=code, freevars=co.co_freevars, args=co.co_varnames[:co.co_argcount + varargs + varkwargs + co.co_kwonlyargcount], kwonly=co.co_kwonlyargcount, varargs=varargs, varkwargs=varkwargs, newlocals=not not co.co_flags & CO_NEWLOCALS, name=co.co_name, filename=co.co_filename, firstlineno=co.co_firstlineno, docstring=co.co_consts[0] if co.co_consts and isinstance(co.co_consts[0], str) else None, force_generator=force_generator, force_coroutine=force_coroutine, force_iterable_coroutine=force_iterable_coroutine, future_generator_stop=future_generator_stop)
def from_code(cls, co): """Disassemble a Python code object into a Code object.""" co_code = co.co_code labels = dict((addr, Label()) for addr in findlabels(co_code)) linestarts = dict(cls._findlinestarts(co)) cellfree = co.co_cellvars + co.co_freevars code = CodeList() n = len(co_code) i = 0 extended_arg = 0 while i < n: op = Opcode(ord(co_code[i])) if i in labels: code.append((labels[i], None)) if i in linestarts: code.append((SetLineno, linestarts[i])) i += 1 if op in hascode: lastop, lastarg = code[-1] if lastop != LOAD_CONST: raise ValueError, \ "%s should be preceded by LOAD_CONST code" % op code[-1] = (LOAD_CONST, Code.from_code(lastarg)) if op not in hasarg: code.append((op, None)) else: arg = ord( co_code[i]) + ord(co_code[i + 1]) * 256 + extended_arg extended_arg = 0 i += 2 if op == opcode.EXTENDED_ARG: extended_arg = arg << 16 elif op in hasconst: code.append((op, co.co_consts[arg])) elif op in hasname: code.append((op, co.co_names[arg])) elif op in hasjabs: code.append((op, labels[arg])) elif op in hasjrel: code.append((op, labels[i + arg])) elif op in haslocal: code.append((op, co.co_varnames[arg])) elif op in hascompare: code.append((op, cmp_op[arg])) elif op in hasfree: code.append((op, cellfree[arg])) else: code.append((op, arg)) varargs = bool(co.co_flags & CO_VARARGS) varkwargs = bool(co.co_flags & CO_VARKEYWORDS) newlocals = bool(co.co_flags & CO_NEWLOCALS) args = co.co_varnames[:co.co_argcount + varargs + varkwargs] if co.co_consts and isinstance(co.co_consts[0], basestring): docstring = co.co_consts[0] else: docstring = None return cls( code=code, freevars=co.co_freevars, args=args, varargs=varargs, varkwargs=varkwargs, newlocals=newlocals, name=co.co_name, filename=co.co_filename, firstlineno=co.co_firstlineno, docstring=docstring, )
def cmp_code_objects(version, code_obj1, code_obj2, name=''): """ Compare two code-objects. This is the main part of this module. """ #print code_obj1, type(code_obj2) assert type(code_obj1) == types.CodeType assert type(code_obj2) == types.CodeType #print dir(code_obj1) if isinstance(code_obj1, object): # new style classes (Python 2.2) # assume _both_ code objects to be new stle classes assert dir(code_obj1) == dir(code_obj2) else: # old style classes assert dir(code_obj1) == code_obj1.__members__ assert dir(code_obj2) == code_obj2.__members__ assert code_obj1.__members__ == code_obj2.__members__ if name == '__main__': name = code_obj1.co_name else: name = '%s.%s' % (name, code_obj1.co_name) if name == '.?': name = '__main__' if isinstance(code_obj1, object) and cmp(code_obj1, code_obj2): # use the new style code-classes' __cmp__ method, which # should be faster and more sophisticated # if this compare fails, we use the old routine to # find out, what exactly is nor equal # if this compare succeds, simply return #return pass if isinstance(code_obj1, object): members = filter(lambda x: x.startswith('co_'), dir(code_obj1)) else: members = dir(code_obj1) members.sort() #members.reverse() tokens1 = None for member in members: if member in __IGNORE_CODE_MEMBERS__: pass elif member == 'co_code': if version == 2.7: import scanner27 as scan scanner = scan.Scanner27() elif version == 2.6: import scanner26 as scan scanner = scan.Scanner26() elif version == 2.5: import scanner25 as scan scanner = scan.Scanner25() scanner.setShowAsm(showasm=0) global JUMP_OPs JUMP_OPs = scan.JUMP_OPs + ['JUMP_BACK'] # use changed Token class # we (re)set this here to save exception handling, # which would get 'unubersichtlich' scanner.setTokenClass(Token) try: # disassemble both code-objects tokens1, customize = scanner.disassemble(code_obj1) del customize # save memory tokens2, customize = scanner.disassemble(code_obj2) del customize # save memory finally: scanner.resetTokenClass() # restore Token class targets1 = dis.findlabels(code_obj1.co_code) tokens1 = [t for t in tokens1 if t.type != 'COME_FROM'] tokens2 = [t for t in tokens2 if t.type != 'COME_FROM'] i1 = 0 i2 = 0 offset_map = {} check_jumps = {} while i1 < len(tokens1): if i2 >= len(tokens2): if len(tokens1) == len(tokens2) + 2 \ and tokens1[-1].type == 'RETURN_VALUE' \ and tokens1[-2].type == 'LOAD_CONST' \ and tokens1[-2].pattr == None \ and tokens1[-3].type == 'RETURN_VALUE': break else: raise CmpErrorCodeLen(name, tokens1, tokens2) offset_map[tokens1[i1].offset] = tokens2[i2].offset for idx1, idx2, offset2 in check_jumps.get( tokens1[i1].offset, []): if offset2 != tokens2[i2].offset: raise CmpErrorCode(name, tokens1[idx1].offset, tokens1[idx1], tokens2[idx2], tokens1, tokens2) if tokens1[i1] != tokens2[i2]: if tokens1[i1].type == 'LOAD_CONST' == tokens2[i2].type: i = 1 while tokens1[i1 + i].type == 'LOAD_CONST': i += 1 if tokens1[i1+i].type.startswith(('BUILD_TUPLE', 'BUILD_LIST')) \ and i == int(tokens1[i1+i].type.split('_')[-1]): t = tuple( [elem.pattr for elem in tokens1[i1:i1 + i]]) if t != tokens2[i2].pattr: raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], tokens2[i2], tokens1, tokens2) i1 += i + 1 i2 += 1 continue elif i == 2 and tokens1[ i1 + i].type == 'ROT_TWO' and tokens2[ i2 + 1].type == 'UNPACK_SEQUENCE_2': i1 += 3 i2 += 2 continue elif i == 2 and tokens1[i1 + i].type in BIN_OP_FUNCS: f = BIN_OP_FUNCS[tokens1[i1 + i].type] if f(tokens1[i1].pattr, tokens1[i1 + 1].pattr) == tokens2[i2].pattr: i1 += 3 i2 += 1 continue elif tokens1[i1].type == 'UNARY_NOT': if tokens2[i2].type == 'POP_JUMP_IF_TRUE': if tokens1[i1 + 1].type == 'POP_JUMP_IF_FALSE': i1 += 2 i2 += 1 continue elif tokens2[i2].type == 'POP_JUMP_IF_FALSE': if tokens1[i1 + 1].type == 'POP_JUMP_IF_TRUE': i1 += 2 i2 += 1 continue elif tokens1[i1].type in ('JUMP_FORWARD', 'JUMP_BACK') \ and tokens1[i1-1].type == 'RETURN_VALUE' \ and tokens2[i2-1].type in ('RETURN_VALUE', 'RETURN_END_IF') \ and int(tokens1[i1].offset) not in targets1: i1 += 1 continue elif tokens1[i1].type == 'JUMP_FORWARD' and tokens2[i2].type == 'JUMP_BACK' \ and tokens1[i1+1].type == 'JUMP_BACK' and tokens2[i2+1].type == 'JUMP_BACK' \ and int(tokens1[i1].pattr) == int(tokens1[i1].offset) + 3: if int(tokens1[i1].pattr) == int(tokens1[i1 + 1].offset): i1 += 2 i2 += 2 continue raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], tokens2[i2], tokens1, tokens2) elif tokens1[i1].type in JUMP_OPs and tokens1[ i1].pattr != tokens2[i2].pattr: dest1 = int(tokens1[i1].pattr) dest2 = int(tokens2[i2].pattr) if tokens1[i1].type == 'JUMP_BACK': if offset_map[dest1] != dest2: raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], tokens2[i2], tokens1, tokens2) else: #import pdb; pdb.set_trace() if dest1 in check_jumps: check_jumps[dest1].append((i1, i2, dest2)) else: check_jumps[dest1] = [(i1, i2, dest2)] i1 += 1 i2 += 1 del tokens1, tokens2 # save memory elif member == 'co_consts': # partial optimization can make the co_consts look different, # so we'll just compare the code consts codes1 = (c for c in code_obj1.co_consts if type(c) == types.CodeType) codes2 = (c for c in code_obj2.co_consts if type(c) == types.CodeType) for c1, c2 in zip(codes1, codes2): cmp_code_objects(version, c1, c2, name=name) else: # all other members must be equal if getattr(code_obj1, member) != getattr(code_obj2, member): raise CmpErrorMember(name, member, getattr(code_obj1, member), getattr(code_obj2, member))
def print_code(co, lasti=-1, level=0): """Disassemble a code object.""" code = co.co_code for constant in co.co_consts: print('| |' * level, end=' ') print('constant:', constant) labels = findlabels(code) linestarts = dict(findlinestarts(co)) n = len(code) i = 0 extended_arg = 0 free = None while i < n: have_inner = False c = code[i] op = co_ord(c) if i in linestarts: if i > 0: print() print('| |' * level, end=' ') print("%3d" % linestarts[i], end=' ') else: print('| |' * level, end=' ') print(' ', end=' ') if i == lasti: print('-->', end=' ') else: print(' ', end=' ') if i in labels: print('>>', end=' ') else: print(' ', end=' ') print(repr(i).rjust(4), end=' ') print(opcode.opname[op].ljust(20), end=' ') i = i + 1 if op >= opcode.HAVE_ARGUMENT: oparg = co_ord(code[i]) + co_ord(code[i + 1]) * 256 + extended_arg extended_arg = 0 i = i + 2 if op == opcode.EXTENDED_ARG: extended_arg = oparg * 65536 print(repr(oparg).rjust(5), end=' ') if op in opcode.hasconst: print('(' + repr(co.co_consts[oparg]) + ')', end=' ') if type(co.co_consts[oparg]) == types.CodeType: have_inner = co.co_consts[oparg] elif op in opcode.hasname: print('(' + co.co_names[oparg] + ')', end=' ') elif op in opcode.hasjrel: print('(to ' + repr(i + oparg) + ')', end=' ') elif op in opcode.haslocal: print('(' + co.co_varnames[oparg] + ')', end=' ') elif op in opcode.hascompare: print('(' + opcode.cmp_op[oparg] + ')', end=' ') elif op in opcode.hasfree: if free is None: free = co.co_cellvars + co.co_freevars print('(' + free[oparg] + ')', end=' ') print() if have_inner is not False: print_code(have_inner, level=level + 1)
def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, name='', ignore_code=False): """ Compare two code-objects. This is the main part of this module. """ # print code_obj1, type(code_obj2) assert iscode(code_obj1), \ "cmp_code_object first object type is %s, not code" % type(code_obj1) assert iscode(code_obj2), \ "cmp_code_object second object type is %s, not code" % type(code_obj2) # print dir(code_obj1) if isinstance(code_obj1, object): # new style classes (Python 2.2) # assume _both_ code objects to be new stle classes assert dir(code_obj1) == dir(code_obj2) else: # old style classes assert dir(code_obj1) == code_obj1.__members__ assert dir(code_obj2) == code_obj2.__members__ assert code_obj1.__members__ == code_obj2.__members__ if name == '__main__': name = code_obj1.co_name else: name = '%s.%s' % (name, code_obj1.co_name) if name == '.?': name = '__main__' if isinstance(code_obj1, object) and code_equal(code_obj1, code_obj2): # use the new style code-classes' __cmp__ method, which # should be faster and more sophisticated # if this compare fails, we use the old routine to # find out, what exactly is nor equal # if this compare succeds, simply return # return pass if isinstance(code_obj1, object): members = [x for x in dir(code_obj1) if x.startswith('co_')] else: members = dir(code_obj1) members.sort() # ; members.reverse() tokens1 = None for member in members: if member in __IGNORE_CODE_MEMBERS__ or ignore_code: pass elif member == 'co_code' and not ignore_code: if version == 2.3: import uncompyle6.scanners.scanner23 as scan scanner = scan.Scanner23(show_asm=False) elif version == 2.4: import uncompyle6.scanners.scanner24 as scan scanner = scan.Scanner24(show_asm=False) elif version == 2.5: import uncompyle6.scanners.scanner25 as scan scanner = scan.Scanner25(show_asm=False) elif version == 2.6: import uncompyle6.scanners.scanner26 as scan scanner = scan.Scanner26(show_asm=False) elif version == 2.7: if is_pypy: import uncompyle6.scanners.pypy27 as scan scanner = scan.ScannerPyPy27(show_asm=False) else: import uncompyle6.scanners.scanner27 as scan scanner = scan.Scanner27() elif version == 3.0: import uncompyle6.scanners.scanner30 as scan scanner = scan.Scanner30() elif version == 3.1: import uncompyle6.scanners.scanner32 as scan scanner = scan.Scanner32() elif version == 3.2: if is_pypy: import uncompyle6.scanners.pypy32 as scan scanner = scan.ScannerPyPy32() else: import uncompyle6.scanners.scanner32 as scan scanner = scan.Scanner32() elif version == 3.3: import uncompyle6.scanners.scanner33 as scan scanner = scan.Scanner33() elif version == 3.4: import uncompyle6.scanners.scanner34 as scan scanner = scan.Scanner34() elif version == 3.5: import uncompyle6.scanners.scanner35 as scan scanner = scan.Scanner35() elif version == 3.6: import uncompyle6.scanners.scanner36 as scan scanner = scan.Scanner36() global JUMP_OPS JUMP_OPS = list(scan.JUMP_OPS) + ['JUMP_BACK'] # use changed Token class # We (re)set this here to save exception handling, # which would get confusing. scanner.setTokenClass(Token) try: # ingest both code-objects tokens1, customize = scanner.ingest(code_obj1) del customize # save memory tokens2, customize = scanner.ingest(code_obj2) del customize # save memory finally: scanner.resetTokenClass() # restore Token class targets1 = dis.findlabels(code_obj1.co_code) tokens1 = [t for t in tokens1 if t.kind != 'COME_FROM'] tokens2 = [t for t in tokens2 if t.kind != 'COME_FROM'] i1 = 0 i2 = 0 offset_map = {} check_jumps = {} while i1 < len(tokens1): if i2 >= len(tokens2): if len(tokens1) == len(tokens2) + 2 \ and tokens1[-1].kind == 'RETURN_VALUE' \ and tokens1[-2].kind == 'LOAD_CONST' \ and tokens1[-2].pattr is None \ and tokens1[-3].kind == 'RETURN_VALUE': break else: raise CmpErrorCodeLen(name, tokens1, tokens2) offset_map[tokens1[i1].offset] = tokens2[i2].offset for idx1, idx2, offset2 in check_jumps.get( tokens1[i1].offset, []): if offset2 != tokens2[i2].offset: raise CmpErrorCode(name, tokens1[idx1].offset, tokens1[idx1], tokens2[idx2], tokens1, tokens2) if tokens1[i1].kind != tokens2[i2].kind: if tokens1[i1].kind == 'LOAD_CONST' == tokens2[i2].kind: i = 1 while tokens1[i1 + i].kind == 'LOAD_CONST': i += 1 if tokens1[i1+i].kind.startswith(('BUILD_TUPLE', 'BUILD_LIST')) \ and i == int(tokens1[i1+i].kind.split('_')[-1]): t = tuple( [elem.pattr for elem in tokens1[i1:i1 + i]]) if t != tokens2[i2].pattr: raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], tokens2[i2], tokens1, tokens2) i1 += i + 1 i2 += 1 continue elif i == 2 and tokens1[ i1 + i].kind == 'ROT_TWO' and tokens2[ i2 + 1].kind == 'UNPACK_SEQUENCE_2': i1 += 3 i2 += 2 continue elif i == 2 and tokens1[i1 + i].kind in BIN_OP_FUNCS: f = BIN_OP_FUNCS[tokens1[i1 + i].kind] if f(tokens1[i1].pattr, tokens1[i1 + 1].pattr) == tokens2[i2].pattr: i1 += 3 i2 += 1 continue elif tokens1[i1].kind == 'UNARY_NOT': if tokens2[i2].kind == 'POP_JUMP_IF_TRUE': if tokens1[i1 + 1].kind == 'POP_JUMP_IF_FALSE': i1 += 2 i2 += 1 continue elif tokens2[i2].kind == 'POP_JUMP_IF_FALSE': if tokens1[i1 + 1].kind == 'POP_JUMP_IF_TRUE': i1 += 2 i2 += 1 continue elif tokens1[i1].kind in ('JUMP_FORWARD', 'JUMP_BACK') \ and tokens1[i1-1].kind == 'RETURN_VALUE' \ and tokens2[i2-1].kind in ('RETURN_VALUE', 'RETURN_END_IF') \ and int(tokens1[i1].offset) not in targets1: i1 += 1 continue elif tokens1[i1].kind == 'JUMP_BACK' and tokens2[ i2].kind == 'CONTINUE': # FIXME: should make sure that offset is inside loop, not outside of it i1 += 2 i2 += 2 continue elif tokens1[i1].kind == 'JUMP_FORWARD' and tokens2[i2].kind == 'JUMP_BACK' \ and tokens1[i1+1].kind == 'JUMP_BACK' and tokens2[i2+1].kind == 'JUMP_BACK' \ and int(tokens1[i1].pattr) == int(tokens1[i1].offset) + 3: if int(tokens1[i1].pattr) == int(tokens1[i1 + 1].offset): i1 += 2 i2 += 2 continue elif tokens1[i1].kind == 'LOAD_NAME' and tokens2[i2].kind == 'LOAD_CONST' \ and tokens1[i1].pattr == 'None' and tokens2[i2].pattr is None: pass elif tokens1[i1].kind == 'LOAD_GLOBAL' and tokens2[i2].kind == 'LOAD_NAME' \ and tokens1[i1].pattr == tokens2[i2].pattr: pass elif tokens1[i1].kind == 'LOAD_ASSERT' and tokens2[i2].kind == 'LOAD_NAME' \ and tokens1[i1].pattr == tokens2[i2].pattr: pass elif (tokens1[i1].kind == 'RETURN_VALUE' and tokens2[i2].kind == 'RETURN_END_IF'): pass elif (tokens1[i1].kind == 'BUILD_TUPLE_0' and tokens2[i2].pattr == ()): pass else: raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], tokens2[i2], tokens1, tokens2) elif tokens1[i1].kind in JUMP_OPS and tokens1[ i1].pattr != tokens2[i2].pattr: if tokens1[i1].kind == 'JUMP_BACK': dest1 = int(tokens1[i1].pattr) dest2 = int(tokens2[i2].pattr) if offset_map[dest1] != dest2: raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], tokens2[i2], tokens1, tokens2) else: # import pdb; pdb.set_trace() try: dest1 = int(tokens1[i1].pattr) if dest1 in check_jumps: check_jumps[dest1].append((i1, i2, dest2)) else: check_jumps[dest1] = [(i1, i2, dest2)] except: pass i1 += 1 i2 += 1 del tokens1, tokens2 # save memory elif member == 'co_consts': # partial optimization can make the co_consts look different, # so we'll just compare the code consts codes1 = (c for c in code_obj1.co_consts if hasattr(c, 'co_consts')) codes2 = (c for c in code_obj2.co_consts if hasattr(c, 'co_consts')) for c1, c2 in zip(codes1, codes2): cmp_code_objects(version, is_pypy, c1, c2, name=name) elif member == 'co_flags': flags1 = code_obj1.co_flags flags2 = code_obj2.co_flags if is_pypy: # For PYPY for now we don't care about PYPY_SOURCE_IS_UTF8: flags2 &= ~0x0100 # PYPY_SOURCE_IS_UTF8 # We also don't care about COROUTINE or GENERATOR for now flags1 &= ~0x000000a0 flags2 &= ~0x000000a0 if flags1 != flags2: raise CmpErrorMember(name, 'co_flags', pretty_flags(flags1), pretty_flags(flags2)) else: # all other members must be equal if getattr(code_obj1, member) != getattr(code_obj2, member): raise CmpErrorMember(name, member, getattr(code_obj1, member), getattr(code_obj2, member))