def nop_jump_junk(co, opc): # Get the first jump target. first_jump_inst = next(get_instructions_bytes(co.co_code, opc)) first_jump_target = first_jump_inst.argval # Abuse the text header to find the second jump. header_start_target = co.co_code.find(b'\r\n\r\n========') has_pyprotect_header = header_start_target != -1 # Get the start of the JUMP_FORWARD, right before the header. jump_forward_inst_size = instruction_size(opc.opmap['JUMP_FORWARD'], opc) second_jump_start = header_start_target - jump_forward_inst_size # Go over the existing code and decide whether to copy each byte. fixed_code = bytearray() for i in range(len(co.co_code)): if (i < first_jump_target): # Replace the first anti-disassembly jump with NOPs fixed_code.append(opc.opmap['NOP']) elif has_pyprotect_header and (i >= second_jump_start): # Don't copy the second AD jump or anything after it. pass else: # Copy everything else. fixed_code.append(co.co_code[i]) # Set fixed code back on co. co.co_code = fixed_code co.freeze()
def op_range(self, start, end): """ Iterate through positions of opcodes, skipping arguments. """ while start < end: yield start start += instruction_size(self.code[start], self.opc)
def get_inst(self, offset): # Instructions can get moved as a result of EXTENDED_ARGS removal. # So if "offset" is not in self.offset2inst_index, then # we assume that it was an instruction moved back. # We check that assumption though by looking at # self.code's opcode. if offset not in self.offset2inst_index: offset -= instruction_size(self.opc.EXTENDED_ARG, self.opc) assert self.code[offset] == self.opc.EXTENDED_ARG return self.insts[self.offset2inst_index[offset]]
def build_prev_op(self): """ Compose 'list-map' which allows to jump to previous op, given offset of current op as index. """ code = self.code codelen = len(code) # 2.x uses prev 3.x uses prev_op. Sigh # Until we get this sorted out. self.prev = self.prev_op = [0] for offset in self.op_range(0, codelen): op = code[offset] for _ in range(instruction_size(op, self.opc)): self.prev_op.append(offset)
def detect_control_flow(self, offset, targets, extended_arg): """ Detect structures and their boundaries to fix optimized jumps in python2.3+ """ # TODO: check the struct boundaries more precisely -Dan code = self.code op = code[offset] # Detect parent structure parent = self.structs[0] start = parent['start'] end = parent['end'] # Pick inner-most parent for our offset for struct in self.structs: current_start = struct['start'] current_end = struct['end'] if ((current_start <= offset < current_end) and (current_start >= start and current_end <= end)): start = current_start end = current_end parent = struct if op == self.opc.SETUP_LOOP: # We categorize loop types: 'for', 'while', 'while 1' with # possibly suffixes '-loop' and '-else' # Try to find the jump_back instruction of the loop. # It could be a return instruction. start += instruction_size(op, self.opc) target = self.get_target(offset, extended_arg) end = self.restrict_to_parent(target, parent) self.setup_loops[target] = offset if target != end: self.fixed_jumps[offset] = end (line_no, next_line_byte) = self.lines[offset] jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE, next_line_byte, False) if jump_back: jump_forward_offset = xdis.next_offset(code[jump_back], self.opc, jump_back) else: jump_forward_offset = None return_val_offset1 = self.prev[self.prev[end]] if (jump_back and jump_back != self.prev_op[end] and self.is_jump_forward(jump_forward_offset)): if (code[self.prev_op[end]] == self.opc.RETURN_VALUE or (code[self.prev_op[end]] == self.opc.POP_BLOCK and code[return_val_offset1] == self.opc.RETURN_VALUE)): jump_back = None if not jump_back: jump_back = self.last_instr(start, end, self.opc.RETURN_VALUE) if not jump_back: return jump_back += 2 # FIXME ??? if_offset = None if code[self.prev_op[next_line_byte]] not in self.pop_jump_tf: if_offset = self.prev[next_line_byte] if if_offset: loop_type = 'while' self.ignore_if.add(if_offset) else: loop_type = 'for' target = next_line_byte end = xdis.next_offset(code[jump_back], self.opc, jump_back) else: if self.get_target(jump_back, 0) >= next_line_byte: jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE, start, False) if end > jump_back+4 and self.is_jump_forward(end): if self.is_jump_forward(jump_back+4): if self.get_target(jump_back+4, extended_arg) == self.get_target(end, extended_arg): self.fixed_jumps[offset] = jump_back+4 end = jump_back+4 elif target < offset: self.fixed_jumps[offset] = jump_back+4 end = jump_back+4 # I think 0 right because jump_back has been adjusted for any EXTENDED_ARG # it encounters target = self.get_target(jump_back, 0) if code[target] in (self.opc.FOR_ITER, self.opc.GET_ITER): loop_type = 'for' else: loop_type = 'while' test = self.prev_op[next_line_byte] if test == offset: loop_type = 'while 1' elif self.code[test] in self.opc.JUMP_OPs: self.ignore_if.add(test) test_target = self.get_target(test, extended_arg) if test_target > (jump_back+3): jump_back = test_target self.not_continue.add(jump_back) self.loops.append(target) self.structs.append({'type': loop_type + '-loop', 'start': target, 'end': jump_back}) after_jump_offset = xdis.next_offset(code[jump_back], self.opc, jump_back) if after_jump_offset != end: self.structs.append({'type': loop_type + '-else', 'start': after_jump_offset, 'end': end}) elif op in self.pop_jump_tf: start = offset + instruction_size(op, self.opc) target = self.get_target(offset, extended_arg) rtarget = self.restrict_to_parent(target, parent) prev_op = self.prev_op # Do not let jump to go out of parent struct bounds if target != rtarget and parent['type'] == 'and/or': self.fixed_jumps[offset] = rtarget return # Does this jump to right after another conditional jump that is # not myself? If so, it's part of a larger conditional. # rocky: if we have a conditional jump to the next instruction, then # possibly I am "skipping over" a "pass" or null statement. if ((code[prev_op[target]] in self.pop_jump_if_pop) and (target > offset) and prev_op[target] != offset): # FIXME: this is not accurate The commented out below # is what it should be. However grammar rules right now # assume the incorrect offsets. # self.fixed_jumps[offset] = target self.fixed_jumps[offset] = prev_op[target] self.structs.append({'type': 'and/or', 'start': start, 'end': prev_op[target]}) return # The opcode *two* instructions before the target jump offset is important # in making a determination of what we have. Save that. pre_rtarget = prev_op[rtarget] # Is it an "and" inside an "if" or "while" block if op == self.opc.POP_JUMP_IF_FALSE and self.version < 3.6: # Search for another POP_JUMP_IF_FALSE targetting the same op, # in current statement, starting from current offset, and filter # everything inside inner 'or' jumps and midline ifs match = self.rem_or(start, self.next_stmt[offset], self.opc.POP_JUMP_IF_FALSE, target) # If we still have any offsets in set, start working on it if match: is_jump_forward = self.is_jump_forward(pre_rtarget) if (is_jump_forward and pre_rtarget not in self.stmts and self.restrict_to_parent(self.get_target(pre_rtarget, extended_arg), parent) == rtarget): if (code[prev_op[pre_rtarget]] == self.opc.JUMP_ABSOLUTE and self.remove_mid_line_ifs([offset]) and target == self.get_target(prev_op[pre_rtarget], extended_arg) and (prev_op[pre_rtarget] not in self.stmts or self.get_target(prev_op[pre_rtarget], extended_arg) > prev_op[pre_rtarget]) and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, prev_op[pre_rtarget], self.pop_jump_tf, target)))): pass elif (code[prev_op[pre_rtarget]] == self.opc.RETURN_VALUE and self.remove_mid_line_ifs([offset]) and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[pre_rtarget], self.pop_jump_tf, target))) | set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[pre_rtarget], (self.opc.POP_JUMP_IF_FALSE, self.opc.POP_JUMP_IF_TRUE, self.opc.JUMP_ABSOLUTE), pre_rtarget, True)))))): pass else: fix = None jump_ifs = self.all_instr(start, self.next_stmt[offset], self.opc.POP_JUMP_IF_FALSE) last_jump_good = True for j in jump_ifs: if target == self.get_target(j, extended_arg): if self.lines[j].next == j + 3 and last_jump_good: fix = j break else: last_jump_good = False self.fixed_jumps[offset] = fix or match[-1] return else: self.fixed_jumps[offset] = match[-1] return # op == POP_JUMP_IF_TRUE else: next = self.next_stmt[offset] if prev_op[next] == offset: pass elif self.is_jump_forward(next) and target == self.get_target(next, extended_arg): if code[prev_op[next]] == self.opc.POP_JUMP_IF_FALSE: if (code[next] == self.opc.JUMP_FORWARD or target != rtarget or code[prev_op[pre_rtarget]] not in (self.opc.JUMP_ABSOLUTE, self.opc.RETURN_VALUE)): self.fixed_jumps[offset] = prev_op[next] return elif (code[next] == self.opc.JUMP_ABSOLUTE and self.is_jump_forward(target) and self.get_target(target, extended_arg) == self.get_target(next, extended_arg)): self.fixed_jumps[offset] = prev_op[next] return # Don't add a struct for a while test, it's already taken care of if offset in self.ignore_if: return if (code[pre_rtarget] == self.opc.JUMP_ABSOLUTE and pre_rtarget in self.stmts and pre_rtarget != offset and prev_op[pre_rtarget] != offset and not (code[rtarget] == self.opc.JUMP_ABSOLUTE and code[rtarget+3] == self.opc.POP_BLOCK and code[prev_op[pre_rtarget]] != self.opc.JUMP_ABSOLUTE)): rtarget = pre_rtarget # Does the "jump if" jump beyond a jump op? # That is, we have something like: # POP_JUMP_IF_FALSE HERE # ... # JUMP_FORWARD # HERE: # # If so, this can be block inside an "if" statement # or a conditional assignment like: # x = 1 if x else 2 # # There are other contexts we may need to consider # like whether the target is "END_FINALLY" # or if the condition jump is to a forward location if self.is_jump_forward(pre_rtarget): if_end = self.get_target(pre_rtarget, 0) # If the jump target is back, we are looping if (if_end < pre_rtarget and (code[prev_op[if_end]] == self.opc.SETUP_LOOP)): if (if_end > start): return end = self.restrict_to_parent(if_end, parent) self.structs.append({'type': 'if-then', 'start': start, 'end': pre_rtarget}) # FIXME: add this # self.fixed_jumps[offset] = rtarget self.not_continue.add(pre_rtarget) if rtarget < end and ( code[rtarget] not in (self.opc.END_FINALLY, self.opc.JUMP_ABSOLUTE) and code[prev_op[pre_rtarget]] not in (self.opc.POP_EXCEPT, self.opc.END_FINALLY)): self.structs.append({'type': 'else', 'start': rtarget, 'end': end}) self.else_start[rtarget] = end elif self.is_jump_back(pre_rtarget, 0): if_end = rtarget self.structs.append({'type': 'if-then', 'start': start, 'end': pre_rtarget}) self.not_continue.add(pre_rtarget) elif code[pre_rtarget] in (self.opc.RETURN_VALUE, self.opc.BREAK_LOOP): self.structs.append({'type': 'if-then', 'start': start, 'end': rtarget}) # It is important to distingish if this return is inside some sort # except block return jump_prev = prev_op[offset] if self.is_pypy and code[jump_prev] == self.opc.COMPARE_OP: if self.opc.cmp_op[code[jump_prev+1]] == 'exception-match': return if self.version >= 3.5: # Python 3.5 may remove as dead code a JUMP # instruction after a RETURN_VALUE. So we check # based on seeing SETUP_EXCEPT various places. if code[rtarget] == self.opc.SETUP_EXCEPT: return # Check that next instruction after pops and jump is # not from SETUP_EXCEPT next_op = rtarget if code[next_op] == self.opc.POP_BLOCK: next_op += instruction_size(self.code[next_op], self.opc) if code[next_op] == self.opc.JUMP_ABSOLUTE: next_op += instruction_size(self.code[next_op], self.opc) if next_op in targets: for try_op in targets[next_op]: come_from_op = code[try_op] if come_from_op == self.opc.SETUP_EXCEPT: return pass pass if code[pre_rtarget] == self.opc.RETURN_VALUE: self.return_end_ifs.add(pre_rtarget) else: self.fixed_jumps[offset] = rtarget self.not_continue.add(pre_rtarget) else: # For now, we'll only tag forward jump. if self.version >= 3.6: if target > offset: self.fixed_jumps[offset] = target pass else: # FIXME: This is probably a bug in < 3.6 and we should # instead use the above code. But until we smoke things # out we'll stick with it. if rtarget > offset: self.fixed_jumps[offset] = rtarget elif op == self.opc.SETUP_EXCEPT: target = self.get_target(offset, extended_arg) end = self.restrict_to_parent(target, parent) self.fixed_jumps[offset] = end elif op == self.opc.POP_EXCEPT: next_offset = xdis.next_offset(op, self.opc, offset) target = self.get_target(next_offset, extended_arg) if target > next_offset: next_op = code[next_offset] if (self.opc.JUMP_ABSOLUTE == next_op and self.opc.END_FINALLY != code[xdis.next_offset(next_op, self.opc, next_offset)]): self.fixed_jumps[next_offset] = target self.except_targets[target] = next_offset elif op == self.opc.SETUP_FINALLY: target = self.get_target(offset, extended_arg) end = self.restrict_to_parent(target, parent) self.fixed_jumps[offset] = end elif op in self.jump_if_pop: target = self.get_target(offset, extended_arg) if target > offset: unop_target = self.last_instr(offset, target, self.opc.JUMP_FORWARD, target) if unop_target and code[unop_target+3] != self.opc.ROT_TWO: self.fixed_jumps[offset] = unop_target else: self.fixed_jumps[offset] = self.restrict_to_parent(target, parent) pass pass elif self.version >= 3.5: # 3.5+ has Jump optimization which too often causes RETURN_VALUE to get # misclassified as RETURN_END_IF. Handle that here. # In RETURN_VALUE, JUMP_ABSOLUTE, RETURN_VALUE is never RETURN_END_IF if op == self.opc.RETURN_VALUE: next_offset = xdis.next_offset(op, self.opc, offset) if (next_offset < len(code) and code[next_offset] == self.opc.JUMP_ABSOLUTE and offset in self.return_end_ifs): self.return_end_ifs.remove(offset) pass pass elif op == self.opc.JUMP_FORWARD: # If we have: # JUMP_FORWARD x, [non-jump, insns], RETURN_VALUE, x: # then RETURN_VALUE is not RETURN_END_IF rtarget = self.get_target(offset, extended_arg) rtarget_prev = self.prev[rtarget] if (code[rtarget_prev] == self.opc.RETURN_VALUE and rtarget_prev in self.return_end_ifs): i = rtarget_prev while i != offset: if code[i] in [op3.JUMP_FORWARD, op3.JUMP_ABSOLUTE]: return i = self.prev[i] self.return_end_ifs.remove(rtarget_prev) pass return
def build_statement_indices(self): code = self.code start = 0 end = codelen = len(code) # Compose preliminary list of indices with statements, # using plain statement opcodes prelim = self.all_instr(start, end, self.statement_opcodes) # Initialize final container with statements with # preliminary data stmts = self.stmts = set(prelim) # Same for opcode sequences pass_stmts = set() for sequence in self.statement_opcode_sequences: for i in self.op_range(start, end-(len(sequence)+1)): match = True for elem in sequence: if elem != code[i]: match = False break i += instruction_size(code[i], self.opc) if match is True: i = self.prev_op[i] stmts.add(i) pass_stmts.add(i) # Initialize statement list with the full data we've gathered so far if pass_stmts: stmt_offset_list = list(stmts) stmt_offset_list.sort() else: stmt_offset_list = prelim # 'List-map' which contains offset of start of # next statement, when op offset is passed as index self.next_stmt = slist = [] last_stmt_offset = -1 i = 0 # Go through all statement offsets for stmt_offset in stmt_offset_list: # Process absolute jumps, but do not remove 'pass' statements # from the set if (code[stmt_offset] == self.opc.JUMP_ABSOLUTE and stmt_offset not in pass_stmts): # If absolute jump occurs in forward direction or it takes off from the # same line as previous statement, this is not a statement # FIXME: 0 isn't always correct target = self.get_target(stmt_offset, 0) if target > stmt_offset or self.lines[last_stmt_offset].l_no == self.lines[stmt_offset].l_no: stmts.remove(stmt_offset) continue # Rewing ops till we encounter non-JUMP_ABSOLUTE one j = self.prev_op[stmt_offset] while code[j] == self.opc.JUMP_ABSOLUTE: j = self.prev_op[j] # If we got here, then it's list comprehension which # is not a statement too if code[j] == self.opc.LIST_APPEND: stmts.remove(stmt_offset) continue # Exclude ROT_TWO + POP_TOP elif (code[stmt_offset] == self.opc.POP_TOP and code[self.prev_op[stmt_offset]] == self.opc.ROT_TWO): stmts.remove(stmt_offset) continue # Exclude FOR_ITER + designators elif code[stmt_offset] in self.designator_ops: j = self.prev_op[stmt_offset] while code[j] in self.designator_ops: j = self.prev_op[j] if code[j] == self.opc.FOR_ITER: stmts.remove(stmt_offset) continue # Add to list another list with offset of current statement, # equal to length of previous statement slist += [stmt_offset] * (stmt_offset-i) last_stmt_offset = stmt_offset i = stmt_offset # Finish filling the list for last statement slist += [codelen] * (codelen-len(slist))
def detect_control_flow(self, offset, targets, inst_index): """ Detect type of block structures and their boundaries to fix optimized jumps in python2.3+ """ code = self.code inst = self.insts[inst_index] op = inst.opcode # Detect parent structure parent = self.structs[0] start = parent["start"] end = parent["end"] # Pick inner-most parent for our offset for struct in self.structs: current_start = struct["start"] current_end = struct["end"] if (current_start <= offset < current_end) and ( current_start >= start and current_end <= end): start = current_start end = current_end parent = struct if self.version < 3.8 and op == self.opc.SETUP_LOOP: # We categorize loop types: 'for', 'while', 'while 1' with # possibly suffixes '-loop' and '-else' # Try to find the jump_back instruction of the loop. # It could be a return instruction. start += inst.inst_size target = self.get_target(offset) end = self.restrict_to_parent(target, parent) self.setup_loops[target] = offset if target != end: self.fixed_jumps[offset] = end (line_no, next_line_byte) = self.lines[offset] jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE, next_line_byte, False) if jump_back: jump_forward_offset = xdis.next_offset(code[jump_back], self.opc, jump_back) else: jump_forward_offset = None return_val_offset1 = self.prev[self.prev[end]] if (jump_back and jump_back != self.prev_op[end] and self.is_jump_forward(jump_forward_offset)): if code[self.prev_op[end]] == self.opc.RETURN_VALUE or ( code[self.prev_op[end]] == self.opc.POP_BLOCK and code[return_val_offset1] == self.opc.RETURN_VALUE): jump_back = None if not jump_back: # loop suite ends in return jump_back = self.last_instr(start, end, self.opc.RETURN_VALUE) if not jump_back: return jb_inst = self.get_inst(jump_back) jump_back = self.next_offset(jb_inst.opcode, jump_back) if_offset = None if code[self.prev_op[next_line_byte]] not in self.pop_jump_tf: if_offset = self.prev[next_line_byte] if if_offset: loop_type = "while" self.ignore_if.add(if_offset) else: loop_type = "for" target = next_line_byte end = xdis.next_offset(code[jump_back], self.opc, jump_back) else: if self.get_target(jump_back) >= next_line_byte: jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE, start, False) jb_inst = self.get_inst(jump_back) jb_next_offset = self.next_offset(jb_inst.opcode, jump_back) if end > jb_next_offset and self.is_jump_forward(end): if self.is_jump_forward(jb_next_offset): if self.get_target(jb_next_offset) == self.get_target( end): self.fixed_jumps[offset] = jb_next_offset end = jb_next_offset elif target < offset: self.fixed_jumps[offset] = jb_next_offset end = jb_next_offset target = self.get_target(jump_back) if code[target] in (self.opc.FOR_ITER, self.opc.GET_ITER): loop_type = "for" else: loop_type = "while" test = self.prev_op[next_line_byte] if test == offset: loop_type = "while 1" elif self.code[test] in self.opc.JUMP_OPs: self.ignore_if.add(test) test_target = self.get_target(test) if test_target > (jump_back + 3): jump_back = test_target self.not_continue.add(jump_back) self.loops.append(target) self.structs.append({ "type": loop_type + "-loop", "start": target, "end": jump_back }) after_jump_offset = xdis.next_offset(code[jump_back], self.opc, jump_back) if after_jump_offset != end: self.structs.append({ "type": loop_type + "-else", "start": after_jump_offset, "end": end, }) elif op in self.pop_jump_tf: start = offset + inst.inst_size target = inst.argval rtarget = self.restrict_to_parent(target, parent) prev_op = self.prev_op # Do not let jump to go out of parent struct bounds if target != rtarget and parent["type"] == "and/or": self.fixed_jumps[offset] = rtarget return # Does this jump to right after another conditional jump that is # not myself? If so, it's part of a larger conditional. # rocky: if we have a conditional jump to the next instruction, then # possibly I am "skipping over" a "pass" or null statement. pretarget = self.get_inst(prev_op[target]) if (pretarget.opcode in self.pop_jump_if_pop and (target > offset) and pretarget.offset != offset): # FIXME: hack upon hack... # In some cases the pretarget can be a jump to the next instruction # and these aren't and/or's either. We limit to 3.5+ since we experienced there # but it might be earlier versions, or might be a general principle. if pretarget.argval != target: # FIXME: this is not accurate The commented out below # is what it should be. However grammar rules right now # assume the incorrect offsets. # self.fixed_jumps[offset] = target self.fixed_jumps[offset] = pretarget.offset self.structs.append({ "type": "and/or", "start": start, "end": pretarget.offset }) return # The opcode *two* instructions before the target jump offset is important # in making a determination of what we have. Save that. pre_rtarget = prev_op[rtarget] if op == self.opc.POP_JUMP_IF_FALSE: self.fixed_jumps[offset] = target # op == POP_JUMP_IF_TRUE else: next = self.next_stmt[offset] if prev_op[next] == offset: pass elif self.is_jump_forward(next) and target == self.get_target( next): if code[prev_op[next]] == self.opc.POP_JUMP_IF_FALSE: if (code[next] == self.opc.JUMP_FORWARD or target != rtarget or code[prev_op[pre_rtarget]] not in (self.opc.JUMP_ABSOLUTE, self.opc.RETURN_VALUE)): self.fixed_jumps[offset] = prev_op[next] return elif (code[next] == self.opc.JUMP_ABSOLUTE and self.is_jump_forward(target) and self.get_target(target) == self.get_target(next)): self.fixed_jumps[offset] = prev_op[next] return rtarget_is_ja = code[pre_rtarget] == self.opc.JUMP_ABSOLUTE if (rtarget_is_ja and pre_rtarget in self.stmts and pre_rtarget != offset and prev_op[pre_rtarget] != offset and not (code[rtarget] == self.opc.JUMP_ABSOLUTE and code[rtarget + 3] == self.opc.POP_BLOCK and code[prev_op[pre_rtarget]] != self.opc.JUMP_ABSOLUTE)): rtarget = pre_rtarget # Does the "jump if" jump beyond a jump op? # That is, we have something like: # POP_JUMP_IF_FALSE HERE # ... # JUMP_FORWARD # HERE: # # If so, this can be block inside an "if" statement # or a conditional assignment like: # x = 1 if x else 2 # # For 3.5, for JUMP_FORWARD above we could have also # JUMP_BACK or CONTINUE # # There are other situations we may need to consider, like # if the condition jump is to a forward location. # Also the existence of a jump to the instruction after "END_FINALLY" # will distinguish "try/else" from "try". rtarget_break = (self.opc.RETURN_VALUE, self.opc.BREAK_LOOP) if self.is_jump_forward(pre_rtarget) or (rtarget_is_ja): if_end = self.get_target(pre_rtarget) # If the jump target is back, we are looping if (if_end < pre_rtarget and self.version < 3.8 and (code[prev_op[if_end]] == self.opc.SETUP_LOOP)): if if_end > start: return end = self.restrict_to_parent(if_end, parent) self.structs.append({ "type": "if-then", "start": start, "end": pre_rtarget }) # FIXME: add this # self.fixed_jumps[offset] = rtarget self.not_continue.add(pre_rtarget) if rtarget < end and ( code[rtarget] not in (self.opc.END_FINALLY, self.opc.JUMP_ABSOLUTE) and code[prev_op[pre_rtarget]] not in (self.opc.POP_EXCEPT, self.opc.END_FINALLY)): self.structs.append({ "type": "else", "start": rtarget, "end": end }) self.else_start[rtarget] = end elif self.is_jump_back(pre_rtarget, 0): if_end = rtarget self.structs.append({ "type": "if-then", "start": start, "end": pre_rtarget }) self.not_continue.add(pre_rtarget) elif code[pre_rtarget] in rtarget_break: self.structs.append({ "type": "if-then", "start": start, "end": rtarget }) # It is important to distingish if this return is inside some sort # except block return jump_prev = prev_op[offset] if self.is_pypy and code[jump_prev] == self.opc.COMPARE_OP: if self.opc.cmp_op[code[jump_prev + 1]] == "exception-match": return pass # Check that next instruction after pops and jump is # not from SETUP_EXCEPT next_op = rtarget if code[next_op] == self.opc.POP_BLOCK: next_op += instruction_size(self.code[next_op], self.opc) if code[next_op] == self.opc.JUMP_ABSOLUTE: next_op += instruction_size(self.code[next_op], self.opc) if next_op in targets: for try_op in targets[next_op]: come_from_op = code[try_op] if self.version < 3.8 and come_from_op == self.opc.SETUP_EXCEPT: return pass self.fixed_jumps[offset] = rtarget if code[pre_rtarget] == self.opc.RETURN_VALUE: # If we are at some sort of POP_JUMP_IF and the instruction before was # COMPARE_OP exception-match, then pre_rtarget is not an end_if if not (inst_index > 0 and self.insts[inst_index - 1].argval == "exception-match"): self.return_end_ifs.add(pre_rtarget) else: self.fixed_jumps[offset] = rtarget self.not_continue.add(pre_rtarget) else: if target > offset: self.fixed_jumps[offset] = target pass elif self.version < 3.8 and op == self.opc.SETUP_EXCEPT: target = self.get_target(offset) end = self.restrict_to_parent(target, parent) self.fixed_jumps[offset] = end elif op == self.opc.POP_EXCEPT: next_offset = xdis.next_offset(op, self.opc, offset) target = self.get_target(next_offset) if target > next_offset: next_op = code[next_offset] if (self.opc.JUMP_ABSOLUTE == next_op and self.opc.END_FINALLY != code[xdis.next_offset( next_op, self.opc, next_offset)]): self.fixed_jumps[next_offset] = target self.except_targets[target] = next_offset elif op == self.opc.SETUP_FINALLY: target = self.get_target(offset) end = self.restrict_to_parent(target, parent) self.fixed_jumps[offset] = end elif op in self.jump_if_pop: target = self.get_target(offset) if target > offset: unop_target = self.last_instr(offset, target, self.opc.JUMP_FORWARD, target) if unop_target and code[unop_target + 3] != self.opc.ROT_TWO: self.fixed_jumps[offset] = unop_target else: self.fixed_jumps[offset] = self.restrict_to_parent( target, parent) pass pass else: # 3.5+ has Jump optimization which too often causes RETURN_VALUE to get # misclassified as RETURN_END_IF. Handle that here. # In RETURN_VALUE, JUMP_ABSOLUTE, RETURN_VALUE is never RETURN_END_IF if op == self.opc.RETURN_VALUE: next_offset = xdis.next_offset(op, self.opc, offset) if next_offset < len(code) and ( code[next_offset] == self.opc.JUMP_ABSOLUTE and offset in self.return_end_ifs): self.return_end_ifs.remove(offset) pass pass elif op == self.opc.JUMP_FORWARD: # If we have: # JUMP_FORWARD x, [non-jump, insns], RETURN_VALUE, x: # then RETURN_VALUE is not RETURN_END_IF rtarget = self.get_target(offset) rtarget_prev = self.prev[rtarget] if (code[rtarget_prev] == self.opc.RETURN_VALUE and rtarget_prev in self.return_end_ifs): i = rtarget_prev while i != offset: if code[i] in [op3.JUMP_FORWARD, op3.JUMP_ABSOLUTE]: return i = self.prev[i] self.return_end_ifs.remove(rtarget_prev) pass return
def detect_control_flow(self, offset, targets, inst_index): """ Detect type of block structures and their boundaries to fix optimized jumps in python2.3+ """ code = self.code inst = self.insts[inst_index] op = inst.opcode # Detect parent structure parent = self.structs[0] start = parent['start'] end = parent['end'] # Pick inner-most parent for our offset for struct in self.structs: current_start = struct['start'] current_end = struct['end'] if ((current_start <= offset < current_end) and (current_start >= start and current_end <= end)): start = current_start end = current_end parent = struct if self.version < 3.8 and op == self.opc.SETUP_LOOP: # We categorize loop types: 'for', 'while', 'while 1' with # possibly suffixes '-loop' and '-else' # Try to find the jump_back instruction of the loop. # It could be a return instruction. start += inst.inst_size target = self.get_target(offset) end = self.restrict_to_parent(target, parent) self.setup_loops[target] = offset if target != end: self.fixed_jumps[offset] = end (line_no, next_line_byte) = self.lines[offset] jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE, next_line_byte, False) if jump_back: jump_forward_offset = xdis.next_offset(code[jump_back], self.opc, jump_back) else: jump_forward_offset = None return_val_offset1 = self.prev[self.prev[end]] if (jump_back and jump_back != self.prev_op[end] and self.is_jump_forward(jump_forward_offset)): if (code[self.prev_op[end]] == self.opc.RETURN_VALUE or (code[self.prev_op[end]] == self.opc.POP_BLOCK and code[return_val_offset1] == self.opc.RETURN_VALUE)): jump_back = None if not jump_back: # loop suite ends in return jump_back = self.last_instr(start, end, self.opc.RETURN_VALUE) if not jump_back: return jb_inst = self.get_inst(jump_back) jump_back = self.next_offset(jb_inst.opcode, jump_back) if_offset = None if code[self.prev_op[next_line_byte]] not in self.pop_jump_tf: if_offset = self.prev[next_line_byte] if if_offset: loop_type = 'while' self.ignore_if.add(if_offset) else: loop_type = 'for' target = next_line_byte end = xdis.next_offset(code[jump_back], self.opc, jump_back) else: if self.get_target(jump_back) >= next_line_byte: jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE, start, False) jb_inst = self.get_inst(jump_back) jb_next_offset = self.next_offset(jb_inst.opcode, jump_back) if end > jb_next_offset and self.is_jump_forward(end): if self.is_jump_forward(jb_next_offset): if self.get_target(jb_next_offset) == self.get_target(end): self.fixed_jumps[offset] = jb_next_offset end = jb_next_offset elif target < offset: self.fixed_jumps[offset] = jb_next_offset end = jb_next_offset target = self.get_target(jump_back) if code[target] in (self.opc.FOR_ITER, self.opc.GET_ITER): loop_type = 'for' else: loop_type = 'while' test = self.prev_op[next_line_byte] if test == offset: loop_type = 'while 1' elif self.code[test] in self.opc.JUMP_OPs: self.ignore_if.add(test) test_target = self.get_target(test) if test_target > (jump_back+3): jump_back = test_target self.not_continue.add(jump_back) self.loops.append(target) self.structs.append({'type': loop_type + '-loop', 'start': target, 'end': jump_back}) after_jump_offset = xdis.next_offset(code[jump_back], self.opc, jump_back) if after_jump_offset != end: self.structs.append({'type': loop_type + '-else', 'start': after_jump_offset, 'end': end}) elif op in self.pop_jump_tf: start = offset + inst.inst_size target = inst.argval rtarget = self.restrict_to_parent(target, parent) prev_op = self.prev_op # Do not let jump to go out of parent struct bounds if target != rtarget and parent['type'] == 'and/or': self.fixed_jumps[offset] = rtarget return # Does this jump to right after another conditional jump that is # not myself? If so, it's part of a larger conditional. # rocky: if we have a conditional jump to the next instruction, then # possibly I am "skipping over" a "pass" or null statement. pretarget = self.get_inst(prev_op[target]) if (pretarget.opcode in self.pop_jump_if_pop and (target > offset) and pretarget.offset != offset): # FIXME: hack upon hack... # In some cases the pretarget can be a jump to the next instruction # and these aren't and/or's either. We limit to 3.5+ since we experienced there # but it might be earlier versions, or might be a general principle. if self.version < 3.5 or pretarget.argval != target: # FIXME: this is not accurate The commented out below # is what it should be. However grammar rules right now # assume the incorrect offsets. # self.fixed_jumps[offset] = target self.fixed_jumps[offset] = pretarget.offset self.structs.append({'type': 'and/or', 'start': start, 'end': pretarget.offset}) return # The opcode *two* instructions before the target jump offset is important # in making a determination of what we have. Save that. pre_rtarget = prev_op[rtarget] # Is it an "and" inside an "if" or "while" block if op == self.opc.POP_JUMP_IF_FALSE: # Search for another POP_JUMP_IF_FALSE targetting the same op, # in current statement, starting from current offset, and filter # everything inside inner 'or' jumps and midline ifs match = self.rem_or(start, self.next_stmt[offset], self.opc.POP_JUMP_IF_FALSE, target) # If we still have any offsets in set, start working on it if match: is_jump_forward = self.is_jump_forward(pre_rtarget) if (is_jump_forward and pre_rtarget not in self.stmts and self.restrict_to_parent(self.get_target(pre_rtarget), parent) == rtarget): if (code[prev_op[pre_rtarget]] == self.opc.JUMP_ABSOLUTE and self.remove_mid_line_ifs([offset]) and target == self.get_target(prev_op[pre_rtarget]) and (prev_op[pre_rtarget] not in self.stmts or self.get_target(prev_op[pre_rtarget]) > prev_op[pre_rtarget]) and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, prev_op[pre_rtarget], self.pop_jump_tf, target)))): pass elif (code[prev_op[pre_rtarget]] == self.opc.RETURN_VALUE and self.remove_mid_line_ifs([offset]) and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[pre_rtarget], self.pop_jump_tf, target))) | set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[pre_rtarget], (self.opc.POP_JUMP_IF_FALSE, self.opc.POP_JUMP_IF_TRUE, self.opc.JUMP_ABSOLUTE), pre_rtarget, True)))))): pass else: fix = None jump_ifs = self.inst_matches(start, self.next_stmt[offset], self.opc.POP_JUMP_IF_FALSE) last_jump_good = True for j in jump_ifs: if target == self.get_target(j): # FIXME: remove magic number if self.lines[j].next == j + 3 and last_jump_good: fix = j break else: last_jump_good = False self.fixed_jumps[offset] = fix or match[-1] return else: if self.version < 3.6: # FIXME: this is putting in COME_FROMs in the wrong place. # Fix up grammar so we don't need to do this. # See cf_for_iter use in parser36.py self.fixed_jumps[offset] = match[-1] elif target > offset: # Right now we only add COME_FROMs in forward (not loop) jumps self.fixed_jumps[offset] = target return # op == POP_JUMP_IF_TRUE else: next = self.next_stmt[offset] if prev_op[next] == offset: pass elif self.is_jump_forward(next) and target == self.get_target(next): if code[prev_op[next]] == self.opc.POP_JUMP_IF_FALSE: if (code[next] == self.opc.JUMP_FORWARD or target != rtarget or code[prev_op[pre_rtarget]] not in (self.opc.JUMP_ABSOLUTE, self.opc.RETURN_VALUE)): self.fixed_jumps[offset] = prev_op[next] return elif (code[next] == self.opc.JUMP_ABSOLUTE and self.is_jump_forward(target) and self.get_target(target) == self.get_target(next)): self.fixed_jumps[offset] = prev_op[next] return # Don't add a struct for a while test, it's already taken care of if offset in self.ignore_if: return rtarget_is_ja = code[pre_rtarget] == self.opc.JUMP_ABSOLUTE if ( rtarget_is_ja and pre_rtarget in self.stmts and pre_rtarget != offset and prev_op[pre_rtarget] != offset and not (code[rtarget] == self.opc.JUMP_ABSOLUTE and code[rtarget+3] == self.opc.POP_BLOCK and code[prev_op[pre_rtarget]] != self.opc.JUMP_ABSOLUTE)): rtarget = pre_rtarget # Does the "jump if" jump beyond a jump op? # That is, we have something like: # POP_JUMP_IF_FALSE HERE # ... # JUMP_FORWARD # HERE: # # If so, this can be block inside an "if" statement # or a conditional assignment like: # x = 1 if x else 2 # # For 3.5, in addition the JUMP_FORWARD above we could have # JUMP_BACK or CONTINUE # # There are other situations we may need to consider, like # if the condition jump is to a forward location. # Also the existence of a jump to the instruction after "END_FINALLY" # will distinguish "try/else" from "try". if self.version < 3.8: rtarget_break = (self.opc.RETURN_VALUE, self.opc.BREAK_LOOP) else: rtarget_break = (self.opc.RETURN_VALUE,) if self.is_jump_forward(pre_rtarget) or (rtarget_is_ja and self.version >= 3.5): if_end = self.get_target(pre_rtarget) # If the jump target is back, we are looping if (if_end < pre_rtarget and self.version < 3.8 and (code[prev_op[if_end]] == self.opc.SETUP_LOOP)): if (if_end > start): return end = self.restrict_to_parent(if_end, parent) self.structs.append({'type': 'if-then', 'start': start, 'end': pre_rtarget}) # FIXME: add this # self.fixed_jumps[offset] = rtarget self.not_continue.add(pre_rtarget) if rtarget < end and ( code[rtarget] not in (self.opc.END_FINALLY, self.opc.JUMP_ABSOLUTE) and code[prev_op[pre_rtarget]] not in (self.opc.POP_EXCEPT, self.opc.END_FINALLY)): self.structs.append({'type': 'else', 'start': rtarget, 'end': end}) self.else_start[rtarget] = end elif self.is_jump_back(pre_rtarget, 0): if_end = rtarget self.structs.append({'type': 'if-then', 'start': start, 'end': pre_rtarget}) self.not_continue.add(pre_rtarget) elif code[pre_rtarget] in rtarget_break: self.structs.append({'type': 'if-then', 'start': start, 'end': rtarget}) # It is important to distingish if this return is inside some sort # except block return jump_prev = prev_op[offset] if self.is_pypy and code[jump_prev] == self.opc.COMPARE_OP: if self.opc.cmp_op[code[jump_prev+1]] == 'exception-match': return if self.version >= 3.5: # Python 3.5 may remove as dead code a JUMP # instruction after a RETURN_VALUE. So we check # based on seeing SETUP_EXCEPT various places. if self.version < 3.6 and code[rtarget] == self.opc.SETUP_EXCEPT: return # Check that next instruction after pops and jump is # not from SETUP_EXCEPT next_op = rtarget if code[next_op] == self.opc.POP_BLOCK: next_op += instruction_size(self.code[next_op], self.opc) if code[next_op] == self.opc.JUMP_ABSOLUTE: next_op += instruction_size(self.code[next_op], self.opc) if next_op in targets: for try_op in targets[next_op]: come_from_op = code[try_op] if self.version < 3.8 and come_from_op == self.opc.SETUP_EXCEPT: return pass pass if self.version >= 3.4: self.fixed_jumps[offset] = rtarget if code[pre_rtarget] == self.opc.RETURN_VALUE: # If we are at some sort of POP_JUMP_IF and the instruction before was # COMPARE_OP exception-match, then pre_rtarget is not an end_if if not (inst_index > 0 and self.insts[inst_index-1].argval == 'exception-match'): self.return_end_ifs.add(pre_rtarget) else: self.fixed_jumps[offset] = rtarget self.not_continue.add(pre_rtarget) else: # FIXME: this is very convoluted and based on rather hacky # empirical evidence. It should go a way when # we have better control-flow analysis normal_jump = self.version >= 3.6 if self.version == 3.5: j = self.offset2inst_index[target] if j+2 < len(self.insts) and self.insts[j+2].is_jump_target: normal_jump = self.insts[j+1].opname == 'POP_BLOCK' if normal_jump: # For now, we'll only tag forward jump. if target > offset: self.fixed_jumps[offset] = target pass else: # FIXME: This is probably a bug in < 3.5 and we should # instead use the above code. But until we smoke things # out we'll stick with it. if rtarget > offset: self.fixed_jumps[offset] = rtarget elif self.version < 3.8 and op == self.opc.SETUP_EXCEPT: target = self.get_target(offset) end = self.restrict_to_parent(target, parent) self.fixed_jumps[offset] = end elif op == self.opc.POP_EXCEPT: next_offset = xdis.next_offset(op, self.opc, offset) target = self.get_target(next_offset) if target > next_offset: next_op = code[next_offset] if (self.opc.JUMP_ABSOLUTE == next_op and self.opc.END_FINALLY != code[xdis.next_offset(next_op, self.opc, next_offset)]): self.fixed_jumps[next_offset] = target self.except_targets[target] = next_offset elif op == self.opc.SETUP_FINALLY: target = self.get_target(offset) end = self.restrict_to_parent(target, parent) self.fixed_jumps[offset] = end elif op in self.jump_if_pop: target = self.get_target(offset) if target > offset: unop_target = self.last_instr(offset, target, self.opc.JUMP_FORWARD, target) if unop_target and code[unop_target+3] != self.opc.ROT_TWO: self.fixed_jumps[offset] = unop_target else: self.fixed_jumps[offset] = self.restrict_to_parent(target, parent) pass pass elif self.version >= 3.5: # 3.5+ has Jump optimization which too often causes RETURN_VALUE to get # misclassified as RETURN_END_IF. Handle that here. # In RETURN_VALUE, JUMP_ABSOLUTE, RETURN_VALUE is never RETURN_END_IF if op == self.opc.RETURN_VALUE: next_offset = xdis.next_offset(op, self.opc, offset) if ( next_offset < len(code) and (code[next_offset] == self.opc.JUMP_ABSOLUTE and offset in self.return_end_ifs) ): self.return_end_ifs.remove(offset) pass pass elif op == self.opc.JUMP_FORWARD: # If we have: # JUMP_FORWARD x, [non-jump, insns], RETURN_VALUE, x: # then RETURN_VALUE is not RETURN_END_IF rtarget = self.get_target(offset) rtarget_prev = self.prev[rtarget] if (code[rtarget_prev] == self.opc.RETURN_VALUE and rtarget_prev in self.return_end_ifs): i = rtarget_prev while i != offset: if code[i] in [op3.JUMP_FORWARD, op3.JUMP_ABSOLUTE]: return i = self.prev[i] self.return_end_ifs.remove(rtarget_prev) pass return