def _call_assembler_patch_je(self, result_loc, je_location): jump_to_done = self.mc.currpos() self.mc.trap() # patched later self.mc.write('\x00' * 4) # patched later # currpos = self.mc.currpos() pmc = OverwritingBuilder(self.mc, je_location, 1) pmc.BRCL(c.EQ, l.imm(currpos - je_location)) pmc.overwrite() # return jump_to_done
def emit_guard_no_exception(self, op, arglocs, regalloc): self.mc.load_imm(r.SCRATCH, self.cpu.pos_exception()) self.mc.LG(r.SCRATCH2, l.addr(0,r.SCRATCH)) self.mc.cmp_op(r.SCRATCH2, l.imm(0), imm=True) self.guard_success_cc = c.EQ self._emit_guard(op, arglocs) # If the previous operation was a COND_CALL, overwrite its conditional # jump to jump over this GUARD_NO_EXCEPTION as well, if we can if self._find_nearby_operation(regalloc,-1).getopnum() == rop.COND_CALL: jmp_adr, fcond = self.previous_cond_call_jcond relative_target = self.mc.currpos() - jmp_adr pmc = OverwritingBuilder(self.mc, jmp_adr, 1) pmc.BRCL(fcond, l.imm(relative_target)) pmc.overwrite()
def emit_cond_call(self, op, arglocs, regalloc): resloc = arglocs[0] arglocs = arglocs[1:] fcond = self.guard_success_cc self.guard_success_cc = c.cond_none assert fcond.value != c.cond_none.value jmp_adr = self.mc.get_relative_pos() self.mc.reserve_cond_jump() # patched later to a relative branch # save away r2, r3, r4, r5, r11 into the jitframe should_be_saved = [ reg for reg in self._regalloc.rm.reg_bindings.itervalues() if reg in self._COND_CALL_SAVE_REGS ] self._push_core_regs_to_jitframe(self.mc, should_be_saved) self.push_gcmap(self.mc, regalloc.get_gcmap([resloc])) # # load the 0-to-4 arguments into these registers, with the address of # the function to call into r11 remap_frame_layout(self, arglocs, [r.r11, r.r2, r.r3, r.r4, r.r5][:len(arglocs)], r.SCRATCH) # # figure out which variant of cond_call_slowpath to call, and call it callee_only = False floats = False for reg in regalloc.rm.reg_bindings.values(): if reg not in regalloc.rm.save_around_call_regs: break else: callee_only = True if regalloc.fprm.reg_bindings: floats = True cond_call_adr = self.cond_call_slowpath[floats * 2 + callee_only] self.mc.load_imm(r.r14, cond_call_adr) self.mc.BASR(r.r14, r.r14) # restoring the registers saved above, and doing pop_gcmap(), is left # to the cond_call_slowpath helper. We never have any result value. if resloc is not None: self.mc.LGR(resloc, r.SCRATCH2) relative_target = self.mc.currpos() - jmp_adr pmc = OverwritingBuilder(self.mc, jmp_adr, 1) pmc.BRCL(fcond, l.imm(relative_target)) pmc.overwrite() # might be overridden again to skip over the following # guard_no_exception too self.previous_cond_call_jcond = jmp_adr, fcond
def emit_guard_nonnull_class(self, op, arglocs, regalloc): self.mc.cmp_op(arglocs[0], l.imm(1), imm=True, signed=False) patch_pos = self.mc.currpos() self.mc.reserve_cond_jump(short=True) self._cmp_guard_class(op, arglocs, regalloc) #self.mc.CGRT(r.SCRATCH, r.SCRATCH2, c.NE) pmc = OverwritingBuilder(self.mc, patch_pos, 1) pmc.BRC(c.LT, l.imm(self.mc.currpos() - patch_pos)) pmc.overwrite() self.guard_success_cc = c.EQ self._emit_guard(op, arglocs[2:])
def emit_guard_no_exception(self, op, arglocs, regalloc): self.mc.load_imm(r.SCRATCH, self.cpu.pos_exception()) self.mc.LG(r.SCRATCH2, l.addr(0, r.SCRATCH)) self.mc.cmp_op(r.SCRATCH2, l.imm(0), imm=True) self.guard_success_cc = c.EQ self._emit_guard(op, arglocs) # If the previous operation was a COND_CALL, overwrite its conditional # jump to jump over this GUARD_NO_EXCEPTION as well, if we can if self._find_nearby_operation(regalloc, -1).getopnum() == rop.COND_CALL: jmp_adr, fcond = self.previous_cond_call_jcond relative_target = self.mc.currpos() - jmp_adr pmc = OverwritingBuilder(self.mc, jmp_adr, 1) pmc.BRCL(fcond, l.imm(relative_target)) pmc.overwrite()
def emit_cond_call(self, op, arglocs, regalloc): fcond = self.guard_success_cc self.guard_success_cc = c.cond_none assert fcond.value != c.cond_none.value fcond = c.negate(fcond) jmp_adr = self.mc.get_relative_pos() self.mc.reserve_cond_jump() # patched later to a relative branch # save away r2, r3, r4, r5, r11 into the jitframe should_be_saved = [ reg for reg in self._regalloc.rm.reg_bindings.itervalues() if reg in self._COND_CALL_SAVE_REGS] self._push_core_regs_to_jitframe(self.mc, should_be_saved) # load gc map into unusual location: r0 self.load_gcmap(self.mc, r.SCRATCH2, regalloc.get_gcmap()) # # load the 0-to-4 arguments into these registers, with the address of # the function to call into r11 remap_frame_layout(self, arglocs, [r.r11, r.r2, r.r3, r.r4, r.r5][:len(arglocs)], r.SCRATCH) # # figure out which variant of cond_call_slowpath to call, and call it callee_only = False floats = False for reg in regalloc.rm.reg_bindings.values(): if reg not in regalloc.rm.save_around_call_regs: break else: callee_only = True if regalloc.fprm.reg_bindings: floats = True cond_call_adr = self.cond_call_slowpath[floats * 2 + callee_only] self.mc.load_imm(r.r14, cond_call_adr) self.mc.BASR(r.r14, r.r14) # restoring the registers saved above, and doing pop_gcmap(), is left # to the cond_call_slowpath helper. We never have any result value. relative_target = self.mc.currpos() - jmp_adr pmc = OverwritingBuilder(self.mc, jmp_adr, 1) pmc.BRCL(fcond, l.imm(relative_target)) pmc.overwrite() # might be overridden again to skip over the following # guard_no_exception too self.previous_cond_call_jcond = jmp_adr, fcond
def _write_barrier_fastpath(self, mc, descr, arglocs, regalloc, array=False, is_frame=False): # Write code equivalent to write_barrier() in the GC: it checks # a flag in the object at arglocs[0], and if set, it calls a # helper piece of assembler. The latter saves registers as needed # and call the function remember_young_pointer() from the GC. if we_are_translated(): cls = self.cpu.gc_ll_descr.has_write_barrier_class() assert cls is not None and isinstance(descr, cls) # card_marking_mask = 0 mask = descr.jit_wb_if_flag_singlebyte if array and descr.jit_wb_cards_set != 0: # assumptions the rest of the function depends on: assert ( descr.jit_wb_cards_set_byteofs == descr.jit_wb_if_flag_byteofs) card_marking_mask = descr.jit_wb_cards_set_singlebyte # loc_base = arglocs[0] assert loc_base.is_reg() if is_frame: assert loc_base is r.SPP assert check_imm_value(descr.jit_wb_if_flag_byteofs) mc.LLGC(r.SCRATCH2, l.addr(descr.jit_wb_if_flag_byteofs, loc_base)) mc.LGR(r.SCRATCH, r.SCRATCH2) mc.NILL(r.SCRATCH, l.imm(mask & 0xFF)) jz_location = mc.get_relative_pos() mc.reserve_cond_jump(short=True) # patched later with 'EQ' # for cond_call_gc_wb_array, also add another fast path: # if GCFLAG_CARDS_SET, then we can just set one bit and be done if card_marking_mask: # GCFLAG_CARDS_SET is in the same byte, loaded in r2 already mc.LGR(r.SCRATCH, r.SCRATCH2) mc.NILL(r.SCRATCH, l.imm(card_marking_mask & 0xFF)) js_location = mc.get_relative_pos() mc.reserve_cond_jump() # patched later with 'NE' else: js_location = 0 # Write only a CALL to the helper prepared in advance, passing it as # argument the address of the structure we are writing into # (the first argument to COND_CALL_GC_WB). helper_num = (card_marking_mask != 0) if is_frame: helper_num = 4 elif regalloc.fprm.reg_bindings: helper_num += 2 if self.wb_slowpath[helper_num] == 0: # tests only assert not we_are_translated() assert not is_frame self.cpu.gc_ll_descr.write_barrier_descr = descr self._build_wb_slowpath(card_marking_mask != 0, bool(regalloc.fprm.reg_bindings)) assert self.wb_slowpath[helper_num] != 0 # if not is_frame: mc.LGR(r.r0, loc_base) # unusual argument location mc.load_imm(r.r14, self.wb_slowpath[helper_num]) mc.BASR(r.r14, r.r14) if card_marking_mask: # The helper ends again with a check of the flag in the object. # So here, we can simply write again a beq, which will be # taken if GCFLAG_CARDS_SET is still not set. jns_location = mc.get_relative_pos() mc.reserve_cond_jump(short=True) # # patch the 'NE' above currpos = mc.currpos() pmc = OverwritingBuilder(mc, js_location, 1) pmc.BRCL(c.NE, l.imm(currpos - js_location)) pmc.overwrite() # # case GCFLAG_CARDS_SET: emit a few instructions to do # directly the card flag setting loc_index = arglocs[1] if loc_index.is_reg(): tmp_loc = arglocs[2] n = descr.jit_wb_card_page_shift assert tmp_loc is not loc_index # compute in tmp_loc the byte offset: # tmp_loc = ~(index >> (card_page_shift + 3)) mc.SRLG(tmp_loc, loc_index, l.addr(n + 3)) # invert the bits of tmp_loc # compute in SCRATCH the index of the bit inside the byte: # scratch = (index >> card_page_shift) & 7 # 0x80 sets zero flag. will store 0 into all not selected bits mc.RISBG(r.SCRATCH, loc_index, l.imm(61), l.imm(0x80 | 63), l.imm(64 - n)) mc.LGHI(r.SCRATCH2, l.imm(-1)) mc.XGR(tmp_loc, r.SCRATCH2) # set SCRATCH2 to 1 << r1 mc.LGHI(r.SCRATCH2, l.imm(1)) mc.SLLG(r.SCRATCH2, r.SCRATCH2, l.addr(0, r.SCRATCH)) # set this bit inside the byte of interest addr = l.addr(0, loc_base, tmp_loc) mc.LLGC(r.SCRATCH, addr) mc.OGRK(r.SCRATCH, r.SCRATCH, r.SCRATCH2) mc.STCY(r.SCRATCH, addr) # done else: byte_index = loc_index.value >> descr.jit_wb_card_page_shift byte_ofs = ~(byte_index >> 3) byte_val = 1 << (byte_index & 7) assert check_imm_value(byte_ofs, lower_bound=-2**19, upper_bound=2**19 - 1) addr = l.addr(byte_ofs, loc_base) mc.LLGC(r.SCRATCH, addr) mc.OILL(r.SCRATCH, l.imm(byte_val)) mc.STCY(r.SCRATCH, addr) # # patch the beq just above currpos = mc.currpos() pmc = OverwritingBuilder(mc, jns_location, 1) pmc.BRC(c.EQ, l.imm(currpos - jns_location)) pmc.overwrite() # patch the JZ above currpos = mc.currpos() pmc = OverwritingBuilder(mc, jz_location, 1) pmc.BRC(c.EQ, l.imm(currpos - jz_location)) pmc.overwrite()
def emit_int_mul_ovf(self, op, arglocs, regalloc): lr, lq, l1 = arglocs if l1.is_in_pool(): self.mc.LG(r.SCRATCH, l1) l1 = r.SCRATCH elif l1.is_imm(): self.mc.LGFI(r.SCRATCH, l1) l1 = r.SCRATCH else: # we are not allowed to modify l1 if it is not a scratch # register, thus copy it here! self.mc.LGR(r.SCRATCH, l1) l1 = r.SCRATCH mc = self.mc # check left neg jmp_lq_lt_0 = mc.get_relative_pos() mc.reserve_cond_jump() # CGIJ lq < 0 +-----------+ jmp_l1_ge_0 = mc.get_relative_pos() # | mc.reserve_cond_jump() # CGIJ l1 >= 0 -----------|-> (both same sign) jmp_lq_pos_l1_neg = mc.get_relative_pos() # | mc.reserve_cond_jump(short=True) # BCR any -----|-> (xor negative) jmp_l1_neg_lq_neg = mc.get_relative_pos() # | mc.reserve_cond_jump() # <-----------------------+ # CGIJ l1 < 0 -> (both same_sign) # (xor negative) label_xor_neg = mc.get_relative_pos() mc.LPGR(lq, lq) mc.LPGR(l1, l1) mc.MLGR(lr, l1) mc.LGHI(r.SCRATCH, l.imm(-1)) mc.RISBG(r.SCRATCH, r.SCRATCH, l.imm(0), l.imm(0x80 | 0), l.imm(0)) # is the value greater than 2**63 ? then an overflow occured jmp_xor_lq_overflow = mc.get_relative_pos() mc.reserve_cond_jump() # CLGRJ lq > 0x8000 ... 00 -> (label_overflow) jmp_xor_lr_overflow = mc.get_relative_pos() mc.reserve_cond_jump() # CLGIJ lr > 0 -> (label_overflow) mc.LCGR(lq, lq) # complement the value mc.XGR(r.SCRATCH, r.SCRATCH) mc.SPM(r.SCRATCH ) # 0x80 ... 00 clears the condition code and program mask jmp_no_overflow_xor_neg = mc.get_relative_pos() mc.reserve_cond_jump(short=True) # both are positive/negative label_both_same_sign = mc.get_relative_pos() mc.LPGR(lq, lq) mc.LPGR(l1, l1) mc.MLGR(lr, l1) mc.LGHI(r.SCRATCH, l.imm(-1)) # 0xff -> shift 0 -> 0xff set MSB on pos 0 to zero -> 7f mc.RISBG(r.SCRATCH, r.SCRATCH, l.imm(1), l.imm(0x80 | 63), l.imm(0)) jmp_lq_overflow = mc.get_relative_pos() mc.reserve_cond_jump() # CLGRJ lq > 0x7fff ... ff -> (label_overflow) jmp_lr_overflow = mc.get_relative_pos() mc.reserve_cond_jump() # CLGIJ lr > 0 -> (label_overflow) jmp_neither_lqlr_overflow = mc.get_relative_pos() mc.reserve_cond_jump(short=True) # BRC any -> (label_end) # set overflow! label_overflow = mc.get_relative_pos() # set bit 34 & 35 -> indicates overflow mc.XGR(r.SCRATCH, r.SCRATCH) mc.OILH(r.SCRATCH, l.imm(0x3000)) # sets OF mc.SPM(r.SCRATCH) # no overflow happended label_end = mc.get_relative_pos() # patch patch patch!!! # jmp_lq_lt_0 pos = jmp_lq_lt_0 omc = OverwritingBuilder(self.mc, pos, 1) omc.CGIJ(lq, l.imm(0), c.LT, l.imm(jmp_l1_neg_lq_neg - pos)) omc.overwrite() # jmp_l1_ge_0 pos = jmp_l1_ge_0 omc = OverwritingBuilder(self.mc, pos, 1) omc.CGIJ(l1, l.imm(0), c.GE, l.imm(label_both_same_sign - pos)) omc.overwrite() # jmp_lq_pos_l1_neg pos = jmp_lq_pos_l1_neg omc = OverwritingBuilder(self.mc, pos, 1) omc.BRC(c.ANY, l.imm(label_xor_neg - pos)) omc.overwrite() # jmp_l1_neg_lq_neg pos = jmp_l1_neg_lq_neg omc = OverwritingBuilder(self.mc, pos, 1) omc.CGIJ(l1, l.imm(0), c.LT, l.imm(label_both_same_sign - pos)) omc.overwrite() # patch jmp_xor_lq_overflow pos = jmp_xor_lq_overflow omc = OverwritingBuilder(self.mc, pos, 1) omc.CLGRJ(lq, r.SCRATCH, c.GT, l.imm(label_overflow - pos)) omc.overwrite() # patch jmp_xor_lr_overflow pos = jmp_xor_lr_overflow omc = OverwritingBuilder(self.mc, pos, 1) omc.CLGIJ(lr, l.imm(0), c.GT, l.imm(label_overflow - pos)) omc.overwrite() # patch jmp_no_overflow_xor_neg omc = OverwritingBuilder(self.mc, jmp_no_overflow_xor_neg, 1) omc.BRC(c.ANY, l.imm(label_end - jmp_no_overflow_xor_neg)) omc.overwrite() # patch jmp_lq_overflow omc = OverwritingBuilder(self.mc, jmp_lq_overflow, 1) omc.CLGRJ(lq, r.SCRATCH, c.GT, l.imm(label_overflow - jmp_lq_overflow)) omc.overwrite() # patch jmp_lr_overflow omc = OverwritingBuilder(self.mc, jmp_lr_overflow, 1) omc.CLGIJ(lr, l.imm(0), c.GT, l.imm(label_overflow - jmp_lr_overflow)) omc.overwrite() # patch jmp_neither_lqlr_overflow omc = OverwritingBuilder(self.mc, jmp_neither_lqlr_overflow, 1) omc.BRC(c.ANY, l.imm(label_end - jmp_neither_lqlr_overflow)) omc.overwrite()
def _call_assembler_patch_jmp(self, jmp_location): currpos = self.mc.currpos() pmc = OverwritingBuilder(self.mc, jmp_location, 1) pmc.BRCL(c.ANY, l.imm(currpos - jmp_location)) pmc.overwrite()
def move_real_result_and_call_reacqgil_addr(self, fastgil): from rpython.jit.backend.zarch.codebuilder import OverwritingBuilder # try to reacquire the lock. The following registers are still # valid from before the call: RSHADOWOLD = self.RSHADOWOLD # r8: previous val of root_stack_top RSHADOWPTR = self.RSHADOWPTR # r9: &root_stack_top RFASTGILPTR = self.RFASTGILPTR # r10: &fastgil # Equivalent of 'r13 = __sync_lock_test_and_set(&rpy_fastgil, 1);' self.mc.LGHI(r.SCRATCH, l.imm(1)) self.mc.LG(r.r13, l.addr(0, RFASTGILPTR)) retry_label = self.mc.currpos() self.mc.LGR(r.r14, r.r13) self.mc.CSG(r.r13, r.SCRATCH, l.addr(0, RFASTGILPTR)) # try to claim lock self.mc.BRC(c.LT, l.imm(retry_label - self.mc.currpos())) # retry if failed # CSG performs a serialization # zarch is sequential consistent! self.mc.CGHI(r.r14, l.imm0) b1_location = self.mc.currpos() # boehm: patched with a BEQ: jump if r13 is zero # shadowstack: patched with BNE instead self.mc.reserve_cond_jump() gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap if gcrootmap: # When doing a call_release_gil with shadowstack, there # is the risk that the 'rpy_fastgil' was free but the # current shadowstack can be the one of a different # thread. So here we check if the shadowstack pointer # is still the same as before we released the GIL (saved # in RSHADOWOLD), and if not, we fall back to 'reacqgil_addr'. self.mc.load(r.SCRATCH, RSHADOWPTR, 0) self.mc.CGR(r.SCRATCH, RSHADOWOLD) bne_location = b1_location b1_location = self.mc.currpos() self.mc.reserve_cond_jump() # revert the rpy_fastgil acquired above, so that the # general 'reacqgil_addr' below can acquire it again... # (here, r14 is conveniently zero) self.mc.STG(r.r14, l.addr(0, RFASTGILPTR)) pmc = OverwritingBuilder(self.mc, bne_location, 1) pmc.BRCL(c.NE, l.imm(self.mc.currpos() - bne_location)) pmc.overwrite() # # Yes, we need to call the reacqgil() function. # save the result we just got RSAVEDRES = RFASTGILPTR # can reuse this reg here reg = self.resloc PARAM_SAVE_AREA_OFFSET = 0 if reg is not None: # save 1 word below the stack pointer if reg.is_core_reg(): self.mc.LGR(RSAVEDRES, reg) elif reg.is_fp_reg(): self.mc.STD(reg, l.addr(16*WORD, r.SP)) # r8-r13 live on the stack and must NOT be overwritten, # restore_stack_pointer already moved SP + subtracted_to_sp, self.mc.LAY(r.SP, l.addr(-self.subtracted_to_sp, r.SP)) self.mc.load_imm(self.mc.RAW_CALL_REG, self.asm.reacqgil_addr) self.mc.raw_call() self.mc.LAY(r.SP, l.addr(self.subtracted_to_sp, r.SP)) if reg is not None: if reg.is_core_reg(): self.mc.LGR(reg, RSAVEDRES) elif reg.is_fp_reg(): self.mc.LD(reg, l.addr(16*WORD, r.SP)) # replace b1_location with BEQ(here) pmc = OverwritingBuilder(self.mc, b1_location, 1) pmc.BRCL(c.EQ, l.imm(self.mc.currpos() - b1_location)) pmc.overwrite() if gcrootmap: if gcrootmap.is_shadow_stack and self.is_call_release_gil: self.mc.LGR(r.SCRATCH, RSHADOWOLD) pos = STD_FRAME_SIZE_IN_BYTES - 7*WORD self.mc.LMG(r.r8, r.r13, l.addr(pos, r.SP))
def _write_barrier_fastpath(self, mc, descr, arglocs, regalloc, array=False, is_frame=False): # Write code equivalent to write_barrier() in the GC: it checks # a flag in the object at arglocs[0], and if set, it calls a # helper piece of assembler. The latter saves registers as needed # and call the function remember_young_pointer() from the GC. if we_are_translated(): cls = self.cpu.gc_ll_descr.has_write_barrier_class() assert cls is not None and isinstance(descr, cls) # card_marking_mask = 0 mask = descr.jit_wb_if_flag_singlebyte if array and descr.jit_wb_cards_set != 0: # assumptions the rest of the function depends on: assert (descr.jit_wb_cards_set_byteofs == descr.jit_wb_if_flag_byteofs) card_marking_mask = descr.jit_wb_cards_set_singlebyte # loc_base = arglocs[0] assert loc_base.is_reg() if is_frame: assert loc_base is r.SPP assert check_imm_value(descr.jit_wb_if_flag_byteofs) mc.LLGC(r.SCRATCH2, l.addr(descr.jit_wb_if_flag_byteofs, loc_base)) mc.LGR(r.SCRATCH, r.SCRATCH2) mc.NILL(r.SCRATCH, l.imm(mask & 0xFF)) jz_location = mc.get_relative_pos() mc.reserve_cond_jump(short=True) # patched later with 'EQ' # for cond_call_gc_wb_array, also add another fast path: # if GCFLAG_CARDS_SET, then we can just set one bit and be done if card_marking_mask: # GCFLAG_CARDS_SET is in the same byte, loaded in r2 already mc.LGR(r.SCRATCH, r.SCRATCH2) mc.NILL(r.SCRATCH, l.imm(card_marking_mask & 0xFF)) js_location = mc.get_relative_pos() mc.reserve_cond_jump() # patched later with 'NE' else: js_location = 0 # Write only a CALL to the helper prepared in advance, passing it as # argument the address of the structure we are writing into # (the first argument to COND_CALL_GC_WB). helper_num = (card_marking_mask != 0) if is_frame: helper_num = 4 elif regalloc.fprm.reg_bindings: helper_num += 2 if self.wb_slowpath[helper_num] == 0: # tests only assert not we_are_translated() assert not is_frame self.cpu.gc_ll_descr.write_barrier_descr = descr self._build_wb_slowpath(card_marking_mask != 0, bool(regalloc.fprm.reg_bindings)) assert self.wb_slowpath[helper_num] != 0 # if not is_frame: mc.LGR(r.r0, loc_base) # unusual argument location mc.load_imm(r.r14, self.wb_slowpath[helper_num]) mc.BASR(r.r14, r.r14) if card_marking_mask: # The helper ends again with a check of the flag in the object. # So here, we can simply write again a beq, which will be # taken if GCFLAG_CARDS_SET is still not set. jns_location = mc.get_relative_pos() mc.reserve_cond_jump(short=True) # # patch the 'NE' above currpos = mc.currpos() pmc = OverwritingBuilder(mc, js_location, 1) pmc.BRCL(c.NE, l.imm(currpos - js_location)) pmc.overwrite() # # case GCFLAG_CARDS_SET: emit a few instructions to do # directly the card flag setting loc_index = arglocs[1] if loc_index.is_reg(): tmp_loc = arglocs[2] n = descr.jit_wb_card_page_shift assert tmp_loc is not loc_index # compute in tmp_loc the byte offset: # tmp_loc = ~(index >> (card_page_shift + 3)) mc.SRLG(tmp_loc, loc_index, l.addr(n+3)) # invert the bits of tmp_loc # compute in SCRATCH the index of the bit inside the byte: # scratch = (index >> card_page_shift) & 7 # 0x80 sets zero flag. will store 0 into all not selected bits mc.RISBG(r.SCRATCH, loc_index, l.imm(61), l.imm(0x80 | 63), l.imm(64-n)) mc.LGHI(r.SCRATCH2, l.imm(-1)) mc.XGR(tmp_loc, r.SCRATCH2) # set SCRATCH2 to 1 << r1 mc.LGHI(r.SCRATCH2, l.imm(1)) mc.SLLG(r.SCRATCH2, r.SCRATCH2, l.addr(0,r.SCRATCH)) # set this bit inside the byte of interest addr = l.addr(0, loc_base, tmp_loc) mc.LLGC(r.SCRATCH, addr) mc.OGRK(r.SCRATCH, r.SCRATCH, r.SCRATCH2) mc.STCY(r.SCRATCH, addr) # done else: byte_index = loc_index.value >> descr.jit_wb_card_page_shift byte_ofs = ~(byte_index >> 3) byte_val = 1 << (byte_index & 7) assert check_imm_value(byte_ofs, lower_bound=-2**19, upper_bound=2**19-1) addr = l.addr(byte_ofs, loc_base) mc.LLGC(r.SCRATCH, addr) mc.OILL(r.SCRATCH, l.imm(byte_val)) mc.STCY(r.SCRATCH, addr) # # patch the beq just above currpos = mc.currpos() pmc = OverwritingBuilder(mc, jns_location, 1) pmc.BRC(c.EQ, l.imm(currpos - jns_location)) pmc.overwrite() # patch the JZ above currpos = mc.currpos() pmc = OverwritingBuilder(mc, jz_location, 1) pmc.BRC(c.EQ, l.imm(currpos - jz_location)) pmc.overwrite()
def emit_int_mul_ovf(self, op, arglocs, regalloc): lr, lq, l1 = arglocs if l1.is_in_pool(): self.mc.LG(r.SCRATCH, l1) l1 = r.SCRATCH elif l1.is_imm(): self.mc.LGFI(r.SCRATCH, l1) l1 = r.SCRATCH else: # we are not allowed to modify l1 if it is not a scratch # register, thus copy it here! self.mc.LGR(r.SCRATCH, l1) l1 = r.SCRATCH mc = self.mc # check left neg jmp_lq_lt_0 = mc.get_relative_pos() mc.reserve_cond_jump() # CGIJ lq < 0 +-----------+ jmp_l1_ge_0 = mc.get_relative_pos() # | mc.reserve_cond_jump() # CGIJ l1 >= 0 -----------|-> (both same sign) jmp_lq_pos_l1_neg = mc.get_relative_pos() # | mc.reserve_cond_jump(short=True) # BCR any -----|-> (xor negative) jmp_l1_neg_lq_neg = mc.get_relative_pos() # | mc.reserve_cond_jump() # <-----------------------+ # CGIJ l1 < 0 -> (both same_sign) # (xor negative) label_xor_neg = mc.get_relative_pos() mc.LPGR(lq, lq) mc.LPGR(l1, l1) mc.MLGR(lr, l1) mc.LGHI(r.SCRATCH, l.imm(-1)) mc.RISBG(r.SCRATCH, r.SCRATCH, l.imm(0), l.imm(0x80 | 0), l.imm(0)) # is the value greater than 2**63 ? then an overflow occured jmp_xor_lq_overflow = mc.get_relative_pos() mc.reserve_cond_jump() # CLGRJ lq > 0x8000 ... 00 -> (label_overflow) jmp_xor_lr_overflow = mc.get_relative_pos() mc.reserve_cond_jump() # CLGIJ lr > 0 -> (label_overflow) mc.LCGR(lq, lq) # complement the value mc.XGR(r.SCRATCH, r.SCRATCH) mc.SPM(r.SCRATCH) # 0x80 ... 00 clears the condition code and program mask jmp_no_overflow_xor_neg = mc.get_relative_pos() mc.reserve_cond_jump(short=True) # both are positive/negative label_both_same_sign = mc.get_relative_pos() mc.LPGR(lq, lq) mc.LPGR(l1, l1) mc.MLGR(lr, l1) mc.LGHI(r.SCRATCH, l.imm(-1)) # 0xff -> shift 0 -> 0xff set MSB on pos 0 to zero -> 7f mc.RISBG(r.SCRATCH, r.SCRATCH, l.imm(1), l.imm(0x80 | 63), l.imm(0)) jmp_lq_overflow = mc.get_relative_pos() mc.reserve_cond_jump() # CLGRJ lq > 0x7fff ... ff -> (label_overflow) jmp_lr_overflow = mc.get_relative_pos() mc.reserve_cond_jump() # CLGIJ lr > 0 -> (label_overflow) jmp_neither_lqlr_overflow = mc.get_relative_pos() mc.reserve_cond_jump(short=True) # BRC any -> (label_end) # set overflow! label_overflow = mc.get_relative_pos() # set bit 34 & 35 -> indicates overflow mc.XGR(r.SCRATCH, r.SCRATCH) mc.OILH(r.SCRATCH, l.imm(0x3000)) # sets OF mc.SPM(r.SCRATCH) # no overflow happended label_end = mc.get_relative_pos() # patch patch patch!!! # jmp_lq_lt_0 pos = jmp_lq_lt_0 omc = OverwritingBuilder(self.mc, pos, 1) omc.CGIJ(lq, l.imm(0), c.LT, l.imm(jmp_l1_neg_lq_neg - pos)) omc.overwrite() # jmp_l1_ge_0 pos = jmp_l1_ge_0 omc = OverwritingBuilder(self.mc, pos, 1) omc.CGIJ(l1, l.imm(0), c.GE, l.imm(label_both_same_sign - pos)) omc.overwrite() # jmp_lq_pos_l1_neg pos = jmp_lq_pos_l1_neg omc = OverwritingBuilder(self.mc, pos, 1) omc.BRC(c.ANY, l.imm(label_xor_neg - pos)) omc.overwrite() # jmp_l1_neg_lq_neg pos = jmp_l1_neg_lq_neg omc = OverwritingBuilder(self.mc, pos, 1) omc.CGIJ(l1, l.imm(0), c.LT, l.imm(label_both_same_sign - pos)) omc.overwrite() # patch jmp_xor_lq_overflow pos = jmp_xor_lq_overflow omc = OverwritingBuilder(self.mc, pos, 1) omc.CLGRJ(lq, r.SCRATCH, c.GT, l.imm(label_overflow - pos)) omc.overwrite() # patch jmp_xor_lr_overflow pos = jmp_xor_lr_overflow omc = OverwritingBuilder(self.mc, pos, 1) omc.CLGIJ(lr, l.imm(0), c.GT, l.imm(label_overflow - pos)) omc.overwrite() # patch jmp_no_overflow_xor_neg omc = OverwritingBuilder(self.mc, jmp_no_overflow_xor_neg, 1) omc.BRC(c.ANY, l.imm(label_end - jmp_no_overflow_xor_neg)) omc.overwrite() # patch jmp_lq_overflow omc = OverwritingBuilder(self.mc, jmp_lq_overflow, 1) omc.CLGRJ(lq, r.SCRATCH, c.GT, l.imm(label_overflow - jmp_lq_overflow)) omc.overwrite() # patch jmp_lr_overflow omc = OverwritingBuilder(self.mc, jmp_lr_overflow, 1) omc.CLGIJ(lr, l.imm(0), c.GT, l.imm(label_overflow - jmp_lr_overflow)) omc.overwrite() # patch jmp_neither_lqlr_overflow omc = OverwritingBuilder(self.mc, jmp_neither_lqlr_overflow, 1) omc.BRC(c.ANY, l.imm(label_end - jmp_neither_lqlr_overflow)) omc.overwrite()
def move_real_result_and_call_reacqgil_addr(self, fastgil): from rpython.jit.backend.zarch.codebuilder import OverwritingBuilder # try to reacquire the lock. The following registers are still # valid from before the call: RSHADOWOLD = self.RSHADOWOLD # r8: previous val of root_stack_top RSHADOWPTR = self.RSHADOWPTR # r9: &root_stack_top RFASTGILPTR = self.RFASTGILPTR # r10: &fastgil # Equivalent of 'r13 = __sync_lock_test_and_set(&rpy_fastgil, 1);' self.mc.LGHI(r.SCRATCH, l.imm(1)) self.mc.LG(r.r13, l.addr(0, RFASTGILPTR)) retry_label = self.mc.currpos() self.mc.LGR(r.r14, r.r13) self.mc.CSG(r.r13, r.SCRATCH, l.addr(0, RFASTGILPTR)) # try to claim lock self.mc.BRC(c.LT, l.imm(retry_label - self.mc.currpos())) # retry if failed # CSG performs a serialization # zarch is sequential consistent! self.mc.CGHI(r.r14, l.imm0) b1_location = self.mc.currpos() # boehm: patched with a BEQ: jump if r13 is zero # shadowstack: patched with BNE instead self.mc.reserve_cond_jump() gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap if gcrootmap: # When doing a call_release_gil with shadowstack, there # is the risk that the 'rpy_fastgil' was free but the # current shadowstack can be the one of a different # thread. So here we check if the shadowstack pointer # is still the same as before we released the GIL (saved # in RSHADOWOLD), and if not, we fall back to 'reacqgil_addr'. self.mc.load(r.SCRATCH, RSHADOWPTR, 0) self.mc.CGR(r.SCRATCH, RSHADOWOLD) bne_location = b1_location b1_location = self.mc.currpos() self.mc.reserve_cond_jump() # revert the rpy_fastgil acquired above, so that the # general 'reacqgil_addr' below can acquire it again... # (here, r14 is conveniently zero) self.mc.STG(r.r14, l.addr(0, RFASTGILPTR)) pmc = OverwritingBuilder(self.mc, bne_location, 1) pmc.BRCL(c.NE, l.imm(self.mc.currpos() - bne_location)) pmc.overwrite() # # Yes, we need to call the reacqgil() function. # save the result we just got RSAVEDRES = RFASTGILPTR # can reuse this reg here reg = self.resloc PARAM_SAVE_AREA_OFFSET = 0 if reg is not None: # save 1 word below the stack pointer if reg.is_core_reg(): self.mc.LGR(RSAVEDRES, reg) elif reg.is_fp_reg(): self.mc.STD(reg, l.addr(16 * WORD, r.SP)) # r8-r13 live on the stack and must NOT be overwritten, # restore_stack_pointer already moved SP + subtracted_to_sp, self.mc.LAY(r.SP, l.addr(-self.subtracted_to_sp, r.SP)) self.mc.load_imm(self.mc.RAW_CALL_REG, self.asm.reacqgil_addr) self.mc.raw_call() self.mc.LAY(r.SP, l.addr(self.subtracted_to_sp, r.SP)) if reg is not None: if reg.is_core_reg(): self.mc.LGR(reg, RSAVEDRES) elif reg.is_fp_reg(): self.mc.LD(reg, l.addr(16 * WORD, r.SP)) # replace b1_location with BEQ(here) pmc = OverwritingBuilder(self.mc, b1_location, 1) pmc.BRCL(c.EQ, l.imm(self.mc.currpos() - b1_location)) pmc.overwrite() if gcrootmap: if gcrootmap.is_shadow_stack and self.is_call_release_gil: self.mc.LGR(r.SCRATCH, RSHADOWOLD) pos = STD_FRAME_SIZE_IN_BYTES - 7 * WORD self.mc.LMG(r.r8, r.r13, l.addr(pos, r.SP))
def move_real_result_and_call_reacqgil_addr(self, fastgil): from rpython.jit.backend.zarch.codebuilder import OverwritingBuilder # try to reacquire the lock. The following registers are still # valid from before the call: RSHADOWOLD = self.RSHADOWOLD # r8: previous val of root_stack_top RSHADOWPTR = self.RSHADOWPTR # r9: &root_stack_top RFASTGILPTR = self.RFASTGILPTR # r10: &fastgil RTHREADID = self.RTHREADID # r11: holding my thread id # Equivalent of 'r13 = __sync_val_compre_and_swap(&rpy_fastgil, 0, thread_id);' retry_label = self.mc.currpos() self.mc.LG(r.r13, l.addr(0, RFASTGILPTR)) # compare if &rpy_fastgil == 0 self.mc.CGFI(r.r13, l.imm0) branch_forward = self.mc.currpos() self.mc.BRC(c.NE, l.imm(0)) # overwrite later # if so try to compare and swap. # r13 == &r10, then store the contets of r.SCRATCH to &r10 self.mc.CSG(r.r13, RTHREADID, l.addr(0, RFASTGILPTR)) # try to claim lock self.mc.BRC(c.NE, l.imm(retry_label - self.mc.currpos())) # retry if failed # CSG performs a serialization # zarch is sequential consistent! # overwrite the branch pmc = OverwritingBuilder(self.mc, branch_forward, 1) pmc.BRC(c.NE, l.imm(self.mc.currpos() - branch_forward)) self.mc.CGHI(r.r13, l.imm0) b1_location = self.mc.currpos() # save some space, this is patched later self.mc.reserve_cond_jump() gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap if gcrootmap: # When doing a call_release_gil with shadowstack, there # is the risk that the 'rpy_fastgil' was free but the # current shadowstack can be the one of a different # thread. So here we check if the shadowstack pointer # is still the same as before we released the GIL (saved # in RSHADOWOLD), and if not, we fall back to 'reacqgil_addr'. self.mc.load(r.SCRATCH, RSHADOWPTR, 0) self.mc.CGR(r.SCRATCH, RSHADOWOLD) bne_location = b1_location b1_location = self.mc.currpos() self.mc.reserve_cond_jump() # revert the rpy_fastgil acquired above, so that the # general 'reacqgil_addr' below can acquire it again... self.mc.XGR(r.r13, r.r13) self.mc.STG(r.r13, l.addr(0, RFASTGILPTR)) pmc = OverwritingBuilder(self.mc, bne_location, 1) pmc.BRCL(c.NE, l.imm(self.mc.currpos() - bne_location)) pmc.overwrite() # # Yes, we need to call the reacqgil() function. # save the result we just got RSAVEDRES = RFASTGILPTR # can reuse this reg here reg = self.resloc if reg is not None: # save 1 word below the stack pointer if reg.is_core_reg(): self.mc.LGR(RSAVEDRES, reg) elif reg.is_fp_reg(): self.mc.STD(reg, l.addr(16*WORD, r.SP)) # r8-r13 live on the stack and must NOT be overwritten, # restore_stack_pointer already moved SP + subtracted_to_sp, self.mc.LAY(r.SP, l.addr(-self.subtracted_to_sp, r.SP)) self.mc.load_imm(self.mc.RAW_CALL_REG, self.asm.reacqgil_addr) self.mc.raw_call() self.mc.LAY(r.SP, l.addr(self.subtracted_to_sp, r.SP)) if reg is not None: if reg.is_core_reg(): self.mc.LGR(reg, RSAVEDRES) elif reg.is_fp_reg(): self.mc.LD(reg, l.addr(16*WORD, r.SP)) # replace b1_location with BEQ(here) pmc = OverwritingBuilder(self.mc, b1_location, 1) pmc.BRCL(c.EQ, l.imm(self.mc.currpos() - b1_location)) pmc.overwrite() if gcrootmap: if gcrootmap.is_shadow_stack and self.is_call_release_gil: self.mc.LGR(r.SCRATCH, RSHADOWOLD) pos = STD_FRAME_SIZE_IN_BYTES - CALL_RELEASE_GIL_STACK_OFF self.mc.LMG(r.r8, r.r13, l.addr(pos, r.SP))