def emit_zero_array(self, op, arglocs, regalloc): base_loc, startindex_loc, length_loc, \ ofs_loc, itemsize_loc = arglocs if ofs_loc.is_imm(): assert check_imm_value(ofs_loc.value) self.mc.AGHI(base_loc, ofs_loc) else: self.mc.AGR(base_loc, ofs_loc) if startindex_loc.is_imm(): assert check_imm_value(startindex_loc.value) self.mc.AGHI(base_loc, startindex_loc) else: self.mc.AGR(base_loc, startindex_loc) assert not length_loc.is_imm() # contents of r0 do not matter because r1 is zero, so # no copying takes place self.mc.XGR(r.r1, r.r1) assert base_loc.is_even() assert length_loc.value == base_loc.value + 1 # s390x has memset directly as a hardware instruction!! # it needs 5 registers allocated # dst = rX, dst len = rX+1 (ensured by the regalloc) # src = r0, src len = r1 self.mc.MVCLE(base_loc, r.r0, l.addr(0)) # NOTE this instruction can (determined by the cpu), just # quit the movement any time, thus it is looped until all bytes # are copied! self.mc.BRC(c.OF, l.imm(-self.mc.MVCLE_byte_count))
def emit(self, op, arglocs, regalloc): l0, l1 = arglocs if l1.is_in_pool(): getattr(self.mc, rp_func)(l0, l1) elif l1.is_imm(): if check_imm_value(l1.value): getattr(self.mc, rh_func)(l0, l1) else: getattr(self.mc, ri_func)(l0, l1) else: getattr(self.mc, rr_func)(l0, l1)
def _call_assembler_check_descr(self, value, tmploc): ofs = self.cpu.get_ofs_of_frame_field('jf_descr') self.mc.LG(r.SCRATCH, l.addr(ofs, r.r2)) if check_imm_value(value): self.mc.cmp_op(r.SCRATCH, l.imm(value), imm=True) else: self.mc.load_imm(r.SCRATCH2, value) self.mc.cmp_op(r.SCRATCH, r.SCRATCH2, imm=False) jump_if_eq = self.mc.currpos() self.mc.trap() # patched later self.mc.write('\x00' * 4) # patched later return jump_if_eq
def _emit_load_for_copycontent(self, dst, src_ptr, src_ofs, scale): if src_ofs.is_imm(): value = src_ofs.value << scale if check_imm_value(value): self.mc.AGHIK(dst, src_ptr, l.imm(value)) else: # it is fine to use r1 here, because it will # only hold a value before invoking the memory copy self.mc.load_imm(r.SCRATCH, value) self.mc.AGRK(dst, src_ptr, r.SCRATCH) elif scale == 0: self.mc.AGRK(dst, src_ptr, src_ofs) else: self.mc.SLLG(r.SCRATCH, src_ofs, l.addr(scale)) self.mc.AGRK(dst, src_ptr, r.SCRATCH)
def _emit_threadlocalref_get(self, op, arglocs, regalloc): [resloc] = arglocs offset = op.getarg(1).getint() # getarg(0) == 'threadlocalref_get' calldescr = op.getdescr() size = calldescr.get_result_size() sign = calldescr.is_result_signed() # # This loads the stack location THREADLOCAL_OFS into a # register, and then read the word at the given offset. # It is only supported if 'translate_support_code' is # true; otherwise, the execute_token() was done with a # dummy value for the stack location THREADLOCAL_OFS # assert self.cpu.translate_support_code assert resloc.is_reg() assert check_imm_value(offset) self.mc.LG(resloc, l.addr(THREADLOCAL_ADDR_OFFSET, r.SP)) self._memory_read(resloc, l.addr(offset, resloc), size, sign)
def emit_guard_exception(self, op, arglocs, regalloc): loc, resloc = arglocs[:2] failargs = arglocs[2:] mc = self.mc mc.load_imm(r.SCRATCH, self.cpu.pos_exc_value()) diff = self.cpu.pos_exception() - self.cpu.pos_exc_value() assert check_imm_value(diff) mc.LG(r.SCRATCH2, l.addr(diff, r.SCRATCH)) mc.cmp_op(r.SCRATCH2, loc) self.guard_success_cc = c.EQ self._emit_guard(op, failargs) if resloc: mc.load(resloc, r.SCRATCH, 0) mc.LGHI(r.SCRATCH2, l.imm(0)) mc.STG(r.SCRATCH2, l.addr(0, r.SCRATCH)) mc.STG(r.SCRATCH2, l.addr(diff, r.SCRATCH))
def emit_guard_subclass(self, op, arglocs, regalloc): assert self.cpu.supports_guard_gc_type loc_object = arglocs[0] loc_check_against_class = arglocs[1] offset = self.cpu.vtable_offset offset2 = self.cpu.subclassrange_min_offset if offset is not None: # read this field to get the vtable pointer self.mc.LG(r.SCRATCH, l.addr(offset, loc_object)) # read the vtable's subclassrange_min field assert check_imm_value(offset2) self.mc.load(r.SCRATCH2, r.SCRATCH, offset2) else: # read the typeid self._read_typeid(r.SCRATCH, loc_object) # read the vtable's subclassrange_min field, as a single # step with the correct offset base_type_info, shift_by, sizeof_ti = ( self.cpu.gc_ll_descr.get_translated_info_for_typeinfo()) self.mc.load_imm(r.SCRATCH2, base_type_info + sizeof_ti + offset2) assert shift_by == 0 # add index manually # we cannot use r0 in l.addr(...) self.mc.AGR(r.SCRATCH, r.SCRATCH2) self.mc.load(r.SCRATCH2, r.SCRATCH, 0) # get the two bounds to check against vtable_ptr = loc_check_against_class.getint() vtable_ptr = rffi.cast(rclass.CLASSTYPE, vtable_ptr) check_min = vtable_ptr.subclassrange_min check_max = vtable_ptr.subclassrange_max assert check_max > check_min check_diff = check_max - check_min - 1 # right now, a full PyPy uses less than 6000 numbers, # so we'll assert here that it always fit inside 15 bits assert 0 <= check_min <= 0x7fff assert 0 <= check_diff <= 0xffff # check by doing the unsigned comparison (tmp - min) < (max - min) self.mc.AGHI(r.SCRATCH2, l.imm(-check_min)) self.mc.cmp_op(r.SCRATCH2, l.imm(check_diff), imm=True, signed=False) # the guard passes if we get a result of "below or equal" self.guard_success_cc = c.LE self._emit_guard(op, arglocs[2:])
def _write_barrier_fastpath(self, mc, descr, arglocs, regalloc, array=False, is_frame=False): # Write code equivalent to write_barrier() in the GC: it checks # a flag in the object at arglocs[0], and if set, it calls a # helper piece of assembler. The latter saves registers as needed # and call the function remember_young_pointer() from the GC. if we_are_translated(): cls = self.cpu.gc_ll_descr.has_write_barrier_class() assert cls is not None and isinstance(descr, cls) # card_marking_mask = 0 mask = descr.jit_wb_if_flag_singlebyte if array and descr.jit_wb_cards_set != 0: # assumptions the rest of the function depends on: assert ( descr.jit_wb_cards_set_byteofs == descr.jit_wb_if_flag_byteofs) card_marking_mask = descr.jit_wb_cards_set_singlebyte # loc_base = arglocs[0] assert loc_base.is_reg() if is_frame: assert loc_base is r.SPP assert check_imm_value(descr.jit_wb_if_flag_byteofs) mc.LLGC(r.SCRATCH2, l.addr(descr.jit_wb_if_flag_byteofs, loc_base)) mc.LGR(r.SCRATCH, r.SCRATCH2) mc.NILL(r.SCRATCH, l.imm(mask & 0xFF)) jz_location = mc.get_relative_pos() mc.reserve_cond_jump(short=True) # patched later with 'EQ' # for cond_call_gc_wb_array, also add another fast path: # if GCFLAG_CARDS_SET, then we can just set one bit and be done if card_marking_mask: # GCFLAG_CARDS_SET is in the same byte, loaded in r2 already mc.LGR(r.SCRATCH, r.SCRATCH2) mc.NILL(r.SCRATCH, l.imm(card_marking_mask & 0xFF)) js_location = mc.get_relative_pos() mc.reserve_cond_jump() # patched later with 'NE' else: js_location = 0 # Write only a CALL to the helper prepared in advance, passing it as # argument the address of the structure we are writing into # (the first argument to COND_CALL_GC_WB). helper_num = (card_marking_mask != 0) if is_frame: helper_num = 4 elif regalloc.fprm.reg_bindings: helper_num += 2 if self.wb_slowpath[helper_num] == 0: # tests only assert not we_are_translated() assert not is_frame self.cpu.gc_ll_descr.write_barrier_descr = descr self._build_wb_slowpath(card_marking_mask != 0, bool(regalloc.fprm.reg_bindings)) assert self.wb_slowpath[helper_num] != 0 # if not is_frame: mc.LGR(r.r0, loc_base) # unusual argument location mc.load_imm(r.r14, self.wb_slowpath[helper_num]) mc.BASR(r.r14, r.r14) if card_marking_mask: # The helper ends again with a check of the flag in the object. # So here, we can simply write again a beq, which will be # taken if GCFLAG_CARDS_SET is still not set. jns_location = mc.get_relative_pos() mc.reserve_cond_jump(short=True) # # patch the 'NE' above currpos = mc.currpos() pmc = OverwritingBuilder(mc, js_location, 1) pmc.BRCL(c.NE, l.imm(currpos - js_location)) pmc.overwrite() # # case GCFLAG_CARDS_SET: emit a few instructions to do # directly the card flag setting loc_index = arglocs[1] if loc_index.is_reg(): tmp_loc = arglocs[2] n = descr.jit_wb_card_page_shift assert tmp_loc is not loc_index # compute in tmp_loc the byte offset: # tmp_loc = ~(index >> (card_page_shift + 3)) mc.SRLG(tmp_loc, loc_index, l.addr(n + 3)) # invert the bits of tmp_loc # compute in SCRATCH the index of the bit inside the byte: # scratch = (index >> card_page_shift) & 7 # 0x80 sets zero flag. will store 0 into all not selected bits mc.RISBG(r.SCRATCH, loc_index, l.imm(61), l.imm(0x80 | 63), l.imm(64 - n)) mc.LGHI(r.SCRATCH2, l.imm(-1)) mc.XGR(tmp_loc, r.SCRATCH2) # set SCRATCH2 to 1 << r1 mc.LGHI(r.SCRATCH2, l.imm(1)) mc.SLLG(r.SCRATCH2, r.SCRATCH2, l.addr(0, r.SCRATCH)) # set this bit inside the byte of interest addr = l.addr(0, loc_base, tmp_loc) mc.LLGC(r.SCRATCH, addr) mc.OGRK(r.SCRATCH, r.SCRATCH, r.SCRATCH2) mc.STCY(r.SCRATCH, addr) # done else: byte_index = loc_index.value >> descr.jit_wb_card_page_shift byte_ofs = ~(byte_index >> 3) byte_val = 1 << (byte_index & 7) assert check_imm_value(byte_ofs, lower_bound=-2**19, upper_bound=2**19 - 1) addr = l.addr(byte_ofs, loc_base) mc.LLGC(r.SCRATCH, addr) mc.OILL(r.SCRATCH, l.imm(byte_val)) mc.STCY(r.SCRATCH, addr) # # patch the beq just above currpos = mc.currpos() pmc = OverwritingBuilder(mc, jns_location, 1) pmc.BRC(c.EQ, l.imm(currpos - jns_location)) pmc.overwrite() # patch the JZ above currpos = mc.currpos() pmc = OverwritingBuilder(mc, jz_location, 1) pmc.BRC(c.EQ, l.imm(currpos - jz_location)) pmc.overwrite()
def _write_barrier_fastpath(self, mc, descr, arglocs, regalloc, array=False, is_frame=False): # Write code equivalent to write_barrier() in the GC: it checks # a flag in the object at arglocs[0], and if set, it calls a # helper piece of assembler. The latter saves registers as needed # and call the function remember_young_pointer() from the GC. if we_are_translated(): cls = self.cpu.gc_ll_descr.has_write_barrier_class() assert cls is not None and isinstance(descr, cls) # card_marking_mask = 0 mask = descr.jit_wb_if_flag_singlebyte if array and descr.jit_wb_cards_set != 0: # assumptions the rest of the function depends on: assert (descr.jit_wb_cards_set_byteofs == descr.jit_wb_if_flag_byteofs) card_marking_mask = descr.jit_wb_cards_set_singlebyte # loc_base = arglocs[0] assert loc_base.is_reg() if is_frame: assert loc_base is r.SPP assert check_imm_value(descr.jit_wb_if_flag_byteofs) mc.LLGC(r.SCRATCH2, l.addr(descr.jit_wb_if_flag_byteofs, loc_base)) mc.LGR(r.SCRATCH, r.SCRATCH2) mc.NILL(r.SCRATCH, l.imm(mask & 0xFF)) jz_location = mc.get_relative_pos() mc.reserve_cond_jump(short=True) # patched later with 'EQ' # for cond_call_gc_wb_array, also add another fast path: # if GCFLAG_CARDS_SET, then we can just set one bit and be done if card_marking_mask: # GCFLAG_CARDS_SET is in the same byte, loaded in r2 already mc.LGR(r.SCRATCH, r.SCRATCH2) mc.NILL(r.SCRATCH, l.imm(card_marking_mask & 0xFF)) js_location = mc.get_relative_pos() mc.reserve_cond_jump() # patched later with 'NE' else: js_location = 0 # Write only a CALL to the helper prepared in advance, passing it as # argument the address of the structure we are writing into # (the first argument to COND_CALL_GC_WB). helper_num = (card_marking_mask != 0) if is_frame: helper_num = 4 elif regalloc.fprm.reg_bindings: helper_num += 2 if self.wb_slowpath[helper_num] == 0: # tests only assert not we_are_translated() assert not is_frame self.cpu.gc_ll_descr.write_barrier_descr = descr self._build_wb_slowpath(card_marking_mask != 0, bool(regalloc.fprm.reg_bindings)) assert self.wb_slowpath[helper_num] != 0 # if not is_frame: mc.LGR(r.r0, loc_base) # unusual argument location mc.load_imm(r.r14, self.wb_slowpath[helper_num]) mc.BASR(r.r14, r.r14) if card_marking_mask: # The helper ends again with a check of the flag in the object. # So here, we can simply write again a beq, which will be # taken if GCFLAG_CARDS_SET is still not set. jns_location = mc.get_relative_pos() mc.reserve_cond_jump(short=True) # # patch the 'NE' above currpos = mc.currpos() pmc = OverwritingBuilder(mc, js_location, 1) pmc.BRCL(c.NE, l.imm(currpos - js_location)) pmc.overwrite() # # case GCFLAG_CARDS_SET: emit a few instructions to do # directly the card flag setting loc_index = arglocs[1] if loc_index.is_reg(): tmp_loc = arglocs[2] n = descr.jit_wb_card_page_shift assert tmp_loc is not loc_index # compute in tmp_loc the byte offset: # tmp_loc = ~(index >> (card_page_shift + 3)) mc.SRLG(tmp_loc, loc_index, l.addr(n+3)) # invert the bits of tmp_loc # compute in SCRATCH the index of the bit inside the byte: # scratch = (index >> card_page_shift) & 7 # 0x80 sets zero flag. will store 0 into all not selected bits mc.RISBG(r.SCRATCH, loc_index, l.imm(61), l.imm(0x80 | 63), l.imm(64-n)) mc.LGHI(r.SCRATCH2, l.imm(-1)) mc.XGR(tmp_loc, r.SCRATCH2) # set SCRATCH2 to 1 << r1 mc.LGHI(r.SCRATCH2, l.imm(1)) mc.SLLG(r.SCRATCH2, r.SCRATCH2, l.addr(0,r.SCRATCH)) # set this bit inside the byte of interest addr = l.addr(0, loc_base, tmp_loc) mc.LLGC(r.SCRATCH, addr) mc.OGRK(r.SCRATCH, r.SCRATCH, r.SCRATCH2) mc.STCY(r.SCRATCH, addr) # done else: byte_index = loc_index.value >> descr.jit_wb_card_page_shift byte_ofs = ~(byte_index >> 3) byte_val = 1 << (byte_index & 7) assert check_imm_value(byte_ofs, lower_bound=-2**19, upper_bound=2**19-1) addr = l.addr(byte_ofs, loc_base) mc.LLGC(r.SCRATCH, addr) mc.OILL(r.SCRATCH, l.imm(byte_val)) mc.STCY(r.SCRATCH, addr) # # patch the beq just above currpos = mc.currpos() pmc = OverwritingBuilder(mc, jns_location, 1) pmc.BRC(c.EQ, l.imm(currpos - jns_location)) pmc.overwrite() # patch the JZ above currpos = mc.currpos() pmc = OverwritingBuilder(mc, jz_location, 1) pmc.BRC(c.EQ, l.imm(currpos - jz_location)) pmc.overwrite()