Exemplo n.º 1
0
 def move_real_result_and_call_reacqgil_addr(self):
     # save the result we just got (in eax/eax+edx/st(0)/xmm0)
     self.save_result_value()
     # call the reopenstack() function (also reacquiring the GIL)
     if not self.asm._is_asmgcc():
         css = 0     # the helper takes no argument
     else:
         from rpython.memory.gctransform import asmgcroot
         css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)
         if IS_X86_32:
             reg = eax
         elif IS_X86_64:
             reg = edi
         self.mc.LEA_rs(reg.value, css)
         if IS_X86_32:
             self.mc.MOV_sr(0, reg.value)
     #
     self.mc.CALL(imm(self.asm.reacqgil_addr))
     #
     if not we_are_translated():        # for testing: now we can accesss
         self.mc.SUB(ebp, imm(1))       # ebp again
     #
     # Now that we required the GIL, we can reload a possibly modified ebp
     if self.asm._is_asmgcc():
         # special-case: reload ebp from the css
         from rpython.memory.gctransform import asmgcroot
         index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
         self.mc.MOV_rs(ebp.value, index_of_ebp)  # MOV EBP, [css.ebp]
Exemplo n.º 2
0
 def consider_vec_unpack_i(self, op):
     assert isinstance(op, VectorOp)
     index = op.getarg(1)
     count = op.getarg(2)
     assert isinstance(index, ConstInt)
     assert isinstance(count, ConstInt)
     args = op.getarglist()
     srcloc = self.make_sure_var_in_reg(op.getarg(0), args)
     if op.is_vector():
         resloc = self.xrm.force_result_in_reg(op, op.getarg(0), args)
         size = op.bytesize
     else:
         # unpack into iX box
         resloc = self.force_allocate_reg(op, args)
         arg = op.getarg(0)
         assert isinstance(arg, VectorOp)
         size = arg.bytesize
     residx = 0
     args = op.getarglist()
     arglocs = [
         resloc, srcloc,
         imm(residx),
         imm(index.value),
         imm(count.value),
         imm(size)
     ]
     self.perform(op, arglocs, resloc)
Exemplo n.º 3
0
    def write_real_errno(self, save_err):
        """This occurs just before emit_raw_call().
        """
        mc = self.mc

        if handle_lasterror and (save_err & rffi.RFFI_READSAVED_LASTERROR):
            # must call SetLastError().  There are no registers to save
            # if we are on 32-bit in this case: no register contains
            # the arguments to the main function we want to call afterwards.
            # On win64, though, it's more messy.  It could be better optimized
            # but for now we save (again) the registers containing arguments,
            # and restore them afterwards.
            from rpython.rlib.rwin32 import _SetLastError
            adr = llmemory.cast_ptr_to_adr(_SetLastError)
            SetLastError_addr = self.asm.cpu.cast_adr_to_int(adr)
            #
            if save_err & rffi.RFFI_ALT_ERRNO:
                lasterror = llerrno.get_alt_lasterror_offset(self.asm.cpu)
            else:
                lasterror = llerrno.get_rpy_lasterror_offset(self.asm.cpu)
            tlofsreg = self.get_tlofs_reg()  # => esi or r12, callee-saved
            if not WIN64:
                self.save_stack_position()  # => edi, callee-saved
                mc.PUSH_m((tlofsreg.value, lasterror))
                mc.CALL(imm(follow_jump(SetLastError_addr)))
                # restore the stack position without assuming a particular
                # calling convention of _SetLastError()
                self.mc.stack_frame_size_delta(-WORD)
                self.mc.MOV(esp, self.saved_stack_position_reg)
            else:
                self.win64_save_register_args()
                mc.MOV_rm(ecx.value, (tlofsreg.value, lasterror))
                mc.CALL(imm(follow_jump(SetLastError_addr)))
                self.win64_restore_register_args()

        if save_err & rffi.RFFI_READSAVED_ERRNO:
            # Just before a call, read '*_errno' and write it into the
            # real 'errno'.  Most registers are free here, including the
            # callee-saved ones, except 'ebx' and except the ones used to
            # pass the arguments on x86-64.
            if save_err & rffi.RFFI_ALT_ERRNO:
                rpy_errno = llerrno.get_alt_errno_offset(self.asm.cpu)
            else:
                rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu)
            p_errno = llerrno.get_p_errno_offset(self.asm.cpu)
            tlofsreg = self.get_tlofs_reg()  # => esi or r12, callee-saved
            if IS_X86_32:
                tmpreg = edx
            else:
                tmpreg = r10  # edx is used for 3rd argument
            mc.MOV_rm(tmpreg.value, (tlofsreg.value, p_errno))
            mc.MOV32_rm(eax.value, (tlofsreg.value, rpy_errno))
            mc.MOV32_mr((tmpreg.value, 0), eax.value)
        elif save_err & rffi.RFFI_ZERO_ERRNO_BEFORE:
            # Same, but write zero.
            p_errno = llerrno.get_p_errno_offset(self.asm.cpu)
            tlofsreg = self.get_tlofs_reg()  # => esi or r12, callee-saved
            mc.MOV_rm(eax.value, (tlofsreg.value, p_errno))
            mc.MOV32_mi((eax.value, 0), 0)
Exemplo n.º 4
0
 def consider_vec_int_signext(self, op):
     assert isinstance(op, VectorOp)
     args = op.getarglist()
     resloc = self.xrm.force_result_in_reg(op, op.getarg(0), args)
     arg = op.getarg(0)
     assert isinstance(arg, VectorOp)
     size = arg.bytesize
     assert size > 0
     self.perform(op, [resloc, imm(size), imm(op.bytesize)], resloc)
Exemplo n.º 5
0
 def consider_vec_int_signext(self, op):
     assert isinstance(op, VectorOp)
     args = op.getarglist()
     resloc = self.xrm.force_result_in_reg(op, op.getarg(0), args)
     arg = op.getarg(0)
     assert isinstance(arg, VectorOp)
     size = arg.bytesize
     assert size > 0
     self.perform(op, [resloc, imm(size), imm(op.bytesize)], resloc)
Exemplo n.º 6
0
 def _consider_vec_getarrayitem(self, op):
     descr = op.getdescr()
     assert isinstance(descr, ArrayDescr)
     assert not descr.is_array_of_pointers() and not descr.is_array_of_structs()
     itemsize, ofs, _ = unpack_arraydescr(descr)
     integer = not (descr.is_array_of_floats() or descr.getconcrete_type() == FLOAT)
     aligned = False
     args = op.getarglist()
     base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
     ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
     result_loc = self.force_allocate_reg(op)
     self.perform(op, [base_loc, ofs_loc, imm(itemsize), imm(ofs), imm(integer), imm(aligned)], result_loc)
Exemplo n.º 7
0
 def _consider_vec_getarrayitem(self, op):
     descr = op.getdescr()
     assert isinstance(descr, ArrayDescr)
     assert not descr.is_array_of_pointers() and \
            not descr.is_array_of_structs()
     itemsize, ofs, _ = unpack_arraydescr(descr)
     integer = not (descr.is_array_of_floats() or descr.getconcrete_type() == FLOAT)
     aligned = False
     args = op.getarglist()
     base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
     ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
     result_loc = self.force_allocate_reg(op)
     self.perform(op, [base_loc, ofs_loc, imm(itemsize), imm(ofs),
                       imm(integer), imm(aligned)], result_loc)
Exemplo n.º 8
0
 def load_result(self):
     """Overridden in CallBuilder32 and CallBuilder64"""
     if self.ressize == 0:
         return  # void result
     # use the code in load_from_mem to do the zero- or sign-extension
     if self.restype == FLOAT:
         srcloc = xmm0
     else:
         srcloc = eax
     if self.ressize >= WORD and self.resloc is srcloc:
         return  # no need for any MOV
     if self.ressize == 1 and isinstance(srcloc, RegLoc):
         srcloc = srcloc.lowest8bits()
     self.asm.load_from_mem(self.resloc, srcloc, imm(self.ressize),
                            imm(self.ressign))
Exemplo n.º 9
0
 def consider_vec_pack_i(self, op):
     # new_res = vec_pack_i(res, src, index, count)
     assert isinstance(op, VectorOp)
     arg = op.getarg(1)
     index = op.getarg(2)
     count = op.getarg(3)
     assert isinstance(index, ConstInt)
     assert isinstance(count, ConstInt)
     args = op.getarglist()
     srcloc = self.make_sure_var_in_reg(arg, args)
     resloc = self.xrm.force_result_in_reg(op, op.getarg(0), args)
     residx = index.value  # where to put it in result?
     srcidx = 0
     arglocs = [resloc, srcloc, imm(residx), imm(srcidx), imm(count.value), imm(op.bytesize)]
     self.perform(op, arglocs, resloc)
Exemplo n.º 10
0
 def load_result(self):
     """Overridden in CallBuilder32 and CallBuilder64"""
     if self.ressize == 0:
         return      # void result
     # use the code in load_from_mem to do the zero- or sign-extension
     if self.restype == FLOAT:
         srcloc = xmm0
     else:
         srcloc = eax
     if self.ressize >= WORD and self.resloc is srcloc:
         return      # no need for any MOV
     if self.ressize == 1 and isinstance(srcloc, RegLoc):
         srcloc = srcloc.lowest8bits()
     self.asm.load_from_mem(self.resloc, srcloc,
                            imm(self.ressize), imm(self.ressign))
Exemplo n.º 11
0
 def call_releasegil_addr_and_move_real_arguments(self):
     initial_esp = self.current_esp
     self.save_register_arguments()
     #
     if not self.asm._is_asmgcc():
         # the helper takes no argument
         self.change_extra_stack_depth = False
     else:
         from rpython.memory.gctransform import asmgcroot
         # build a 'css' structure on the stack: 2 words for the linkage,
         # and 5/7 words as described for asmgcroot.ASM_FRAMEDATA, for a
         # total size of JIT_USE_WORDS.  This structure is found at
         # [ESP+css].
         css = -self.current_esp + (
             WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS))
         assert css >= 2 * WORD
         # Save ebp
         index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
         self.mc.MOV_sr(index_of_ebp, ebp.value)  # MOV [css.ebp], EBP
         # Save the "return address": we pretend that it's css
         if IS_X86_32:
             reg = eax
         elif IS_X86_64:
             reg = edi
         self.mc.LEA_rs(reg.value, css)           # LEA reg, [css]
         frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR)
         self.mc.MOV_sr(frame_ptr, reg.value)     # MOV [css.frame], reg
         # Set up jf_extra_stack_depth to pretend that the return address
         # was at css, and so our stack frame is supposedly shorter by
         # (PASS_ON_MY_FRAME-JIT_USE_WORDS+1) words
         delta = PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS + 1
         self.change_extra_stack_depth = True
         self.asm.set_extra_stack_depth(self.mc, -delta * WORD)
         # Call the closestack() function (also releasing the GIL)
         # with 'reg' as argument
         if IS_X86_32:
             self.subtract_esp_aligned(1)
             self.mc.MOV_sr(0, reg.value)
         #else:
         #   on x86_64, reg is edi so that it is already correct
     #
     self.mc.CALL(imm(self.asm.releasegil_addr))
     #
     if not we_are_translated():        # for testing: we should not access
         self.mc.ADD(ebp, imm(1))       # ebp any more
     #
     self.restore_register_arguments()
     self.restore_stack_pointer(initial_esp)
Exemplo n.º 12
0
 def consider_vec_logic(self, op):
     lhs = op.getarg(0)
     assert isinstance(lhs, VectorOp)
     args = op.getarglist()
     source = self.make_sure_var_in_reg(op.getarg(1), args)
     result = self.xrm.force_result_in_reg(op, op.getarg(0), args)
     self.perform(op, [source, imm(lhs.bytesize)], result)
Exemplo n.º 13
0
 def consider_vec_int_is_true(self, op):
     args = op.getarglist()
     arg = op.getarg(0)
     assert isinstance(arg, VectorOp)
     argloc = self.loc(arg)
     resloc = self.xrm.force_result_in_reg(op, arg, args)
     self.perform(op, [resloc, imm(arg.bytesize)], None)
Exemplo n.º 14
0
 def consider_vec_logic(self, op):
     lhs = op.getarg(0)
     assert isinstance(lhs, VectorOp)
     args = op.getarglist()
     source = self.make_sure_var_in_reg(op.getarg(1), args)
     result = self.xrm.force_result_in_reg(op, op.getarg(0), args)
     self.perform(op, [source, imm(lhs.bytesize)], result)
Exemplo n.º 15
0
 def consider_vec_arith(self, op):
     lhs = op.getarg(0)
     size = lhs.bytesize
     args = op.getarglist()
     loc1 = self.make_sure_var_in_reg(op.getarg(1), args)
     loc0 = self.xrm.force_result_in_reg(op, op.getarg(0), args)
     self.perform(op, [loc0, loc1, imm(size)], loc0)
Exemplo n.º 16
0
 def consider_vec_int_is_true(self, op):
     args = op.getarglist()
     arg = op.getarg(0)
     assert isinstance(arg, VectorOp)
     argloc = self.loc(arg)
     resloc = self.xrm.force_result_in_reg(op, arg, args)
     self.perform(op, [resloc,imm(arg.bytesize)], None)
Exemplo n.º 17
0
 def consider_vec_pack_i(self, op):
     # new_res = vec_pack_i(res, src, index, count)
     assert isinstance(op, VectorOp)
     arg = op.getarg(1)
     index = op.getarg(2)
     count = op.getarg(3)
     assert isinstance(index, ConstInt)
     assert isinstance(count, ConstInt)
     args = op.getarglist()
     srcloc = self.make_sure_var_in_reg(arg, args)
     resloc =  self.xrm.force_result_in_reg(op, op.getarg(0), args)
     residx = index.value # where to put it in result?
     srcidx = 0
     arglocs = [resloc, srcloc, imm(residx), imm(srcidx),
                imm(count.value), imm(op.bytesize)]
     self.perform(op, arglocs, resloc)
Exemplo n.º 18
0
        def generate_body(self, assembler, mc):
            if self.early_jump_addr != 0:
                # This slow-path has two entry points, with two
                # conditional jumps.  We can jump to the regular start
                # of this slow-path with the 2nd conditional jump.  Or,
                # we can jump past the "MOV(heap(fastgil), ecx)"
                # instruction from the 1st conditional jump.
                # This instruction reverts the rpy_fastgil acquired
                # previously, so that the general 'reacqgil_addr'
                # function can acquire it again.  It must only be done
                # if we actually succeeded in acquiring rpy_fastgil.
                from rpython.jit.backend.x86.assembler import heap
                mc.MOV(heap(self.fastgil), ecx)
                offset = mc.get_relative_pos() - self.early_jump_addr
                mc.overwrite32(self.early_jump_addr - 4, offset)
                # scratch register forgotten here, by get_relative_pos()

            # call the reacqgil() function
            cb = self.callbuilder
            if not cb.result_value_saved_early:
                cb.save_result_value(save_edx=False)
            if assembler._is_asmgcc():
                if IS_X86_32:
                    css_value = edx
                    old_value = ecx
                    mc.MOV_sr(4, old_value.value)
                    mc.MOV_sr(0, css_value.value)
                # on X86_64, they are already in the right registers
            mc.CALL(imm(follow_jump(assembler.reacqgil_addr)))
            if not cb.result_value_saved_early:
                cb.restore_result_value(save_edx=False)
Exemplo n.º 19
0
 def call_releasegil_addr_and_move_real_arguments(self, fastgil):
     from rpython.jit.backend.x86.assembler import heap
     assert self.is_call_release_gil
     #
     # Save this thread's shadowstack pointer into 'ebx',
     # for later comparison
     gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
     if gcrootmap:
         if gcrootmap.is_shadow_stack:
             rst = gcrootmap.get_root_stack_top_addr()
             self.mc.MOV(ebx, heap(rst))
     #
     if not self.asm._is_asmgcc():
         # shadowstack: change 'rpy_fastgil' to 0 (it should be
         # non-zero right now).
         self.change_extra_stack_depth = False
         # ^^ note that set_extra_stack_depth() in this case is a no-op
         css_value = imm(0)
     else:
         from rpython.memory.gctransform import asmgcroot
         # build a 'css' structure on the stack: 2 words for the linkage,
         # and 5/7 words as described for asmgcroot.ASM_FRAMEDATA, for a
         # total size of JIT_USE_WORDS.  This structure is found at
         # [ESP+css].
         css = -self.get_current_esp() + (
             WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS))
         assert css >= 2 * WORD
         # Save ebp
         index_of_ebp = css + WORD * (2 + asmgcroot.INDEX_OF_EBP)
         self.mc.MOV_sr(index_of_ebp, ebp.value)  # MOV [css.ebp], EBP
         # Save the "return address": we pretend that it's css
         self.mc.LEA_rs(eax.value, css)  # LEA eax, [css]
         frame_ptr = css + WORD * (2 + asmgcroot.FRAME_PTR)
         self.mc.MOV_sr(frame_ptr, eax.value)  # MOV [css.frame], eax
         # Set up jf_extra_stack_depth to pretend that the return address
         # was at css, and so our stack frame is supposedly shorter by
         # (PASS_ON_MY_FRAME-JIT_USE_WORDS+1) words
         delta = PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS + 1
         self.change_extra_stack_depth = True
         self.asm.set_extra_stack_depth(self.mc, -delta * WORD)
         css_value = eax
     #
     # <--here--> would come a memory fence, if the CPU needed one.
     self.mc.MOV(heap(fastgil), css_value)
     #
     if not we_are_translated():  # for testing: we should not access
         self.mc.ADD(ebp, imm(1))  # ebp any more
Exemplo n.º 20
0
 def call_releasegil_addr_and_move_real_arguments(self, fastgil):
     from rpython.jit.backend.x86.assembler import heap
     assert self.is_call_release_gil
     #
     # Save this thread's shadowstack pointer into 'ebx',
     # for later comparison
     gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
     if gcrootmap:
         if gcrootmap.is_shadow_stack:
             rst = gcrootmap.get_root_stack_top_addr()
             self.mc.MOV(ebx, heap(rst))
     #
     if not self.asm._is_asmgcc():
         # shadowstack: change 'rpy_fastgil' to 0 (it should be
         # non-zero right now).
         self.change_extra_stack_depth = False
         # ^^ note that set_extra_stack_depth() in this case is a no-op
         css_value = imm(0)
     else:
         from rpython.memory.gctransform import asmgcroot
         # build a 'css' structure on the stack: 2 words for the linkage,
         # and 5/7 words as described for asmgcroot.ASM_FRAMEDATA, for a
         # total size of JIT_USE_WORDS.  This structure is found at
         # [ESP+css].
         css = -self.get_current_esp() + (
             WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS))
         assert css >= 2 * WORD
         # Save ebp
         index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
         self.mc.MOV_sr(index_of_ebp, ebp.value)  # MOV [css.ebp], EBP
         # Save the "return address": we pretend that it's css
         self.mc.LEA_rs(eax.value, css)           # LEA eax, [css]
         frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR)
         self.mc.MOV_sr(frame_ptr, eax.value)     # MOV [css.frame], eax
         # Set up jf_extra_stack_depth to pretend that the return address
         # was at css, and so our stack frame is supposedly shorter by
         # (PASS_ON_MY_FRAME-JIT_USE_WORDS+1) words
         delta = PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS + 1
         self.change_extra_stack_depth = True
         self.asm.set_extra_stack_depth(self.mc, -delta * WORD)
         css_value = eax
     #
     # <--here--> would come a memory fence, if the CPU needed one.
     self.mc.MOV(heap(fastgil), css_value)
     #
     if not we_are_translated():        # for testing: we should not access
         self.mc.ADD(ebp, imm(1))       # ebp any more
Exemplo n.º 21
0
 def consider_vec_float_eq(self, op):
     assert isinstance(op, VectorOp)
     lhs = op.getarg(0)
     assert isinstance(lhs, VectorOp)
     args = op.getarglist()
     rhsloc = self.make_sure_var_in_reg(op.getarg(1), args)
     lhsloc = self.xrm.force_result_in_reg(op, op.getarg(0), args)
     self.perform(op, [lhsloc, rhsloc, imm(lhs.bytesize)], lhsloc)
Exemplo n.º 22
0
 def consider_vec_arith(self, op):
     lhs = op.getarg(0)
     assert isinstance(op, VectorOp)
     size = op.bytesize
     args = op.getarglist()
     loc1 = self.make_sure_var_in_reg(op.getarg(1), args)
     loc0 = self.xrm.force_result_in_reg(op, op.getarg(0), args)
     self.perform(op, [loc0, loc1, imm(size)], loc0)
Exemplo n.º 23
0
 def consider_vec_float_eq(self, op):
     assert isinstance(op, VectorOp)
     lhs = op.getarg(0)
     assert isinstance(lhs, VectorOp)
     args = op.getarglist()
     rhsloc = self.make_sure_var_in_reg(op.getarg(1), args)
     lhsloc = self.xrm.force_result_in_reg(op, op.getarg(0), args)
     self.perform(op, [lhsloc, rhsloc, imm(lhs.bytesize)], lhsloc)
Exemplo n.º 24
0
    def consider_vec_store(self, op):
        # TODO
        descr = op.getdescr()
        assert isinstance(descr, ArrayDescr)
        assert not descr.is_array_of_pointers() and \
               not descr.is_array_of_structs()
        itemsize, _, _ = unpack_arraydescr(descr)
        args = op.getarglist()
        base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
        value_loc = self.make_sure_var_in_reg(op.getarg(2), args)
        ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
        scale = get_scale(op.getarg(3).getint())
        ofs = op.getarg(4).getint()

        integer = not (descr.is_array_of_floats() or \
                       descr.getconcrete_type() == FLOAT)
        self.perform_discard(op, [base_loc, ofs_loc, value_loc, imm(itemsize),
                                  imm(scale), imm(ofs), imm(integer)])
Exemplo n.º 25
0
 def consider_vec_expand_i(self, op):
     arg = op.getarg(0)
     args = op.getarglist()
     if arg.is_constant():
         srcloc = self.rm.convert_to_imm(arg)
     else:
         srcloc = self.make_sure_var_in_reg(arg, args)
     resloc = self.xrm.force_allocate_reg(op, args)
     self.perform(op, [srcloc, imm(op.bytesize)], resloc)
Exemplo n.º 26
0
    def consider_vec_store(self, op):
        # TODO
        descr = op.getdescr()
        assert isinstance(descr, ArrayDescr)
        assert not descr.is_array_of_pointers() and \
               not descr.is_array_of_structs()
        itemsize, _, _ = unpack_arraydescr(descr)
        args = op.getarglist()
        base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
        value_loc = self.make_sure_var_in_reg(op.getarg(2), args)
        ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
        scale = get_scale(op.getarg(3).getint())
        ofs = op.getarg(4).getint()

        integer = not (descr.is_array_of_floats() or \
                       descr.getconcrete_type() == FLOAT)
        self.perform_discard(op, [base_loc, ofs_loc, value_loc, imm(itemsize),
                                  imm(scale), imm(ofs), imm(integer)])
Exemplo n.º 27
0
 def consider_vec_expand_i(self, op):
     assert isinstance(op, VectorOp)
     arg = op.getarg(0)
     args = op.getarglist()
     if arg.is_constant():
         srcloc = self.rm.convert_to_imm(arg)
     else:
         srcloc = self.make_sure_var_in_reg(arg, args)
     resloc = self.xrm.force_allocate_reg(op, args)
     self.perform(op, [srcloc, imm(op.bytesize)], resloc)
Exemplo n.º 28
0
 def consider_vec_unpack_i(self, op):
     index = op.getarg(1)
     count = op.getarg(2)
     assert isinstance(index, ConstInt)
     assert isinstance(count, ConstInt)
     args = op.getarglist()
     srcloc = self.make_sure_var_in_reg(op.getarg(0), args)
     if op.is_vector():
         resloc =  self.xrm.force_result_in_reg(op, op.getarg(0), args)
         size = op.bytesize
     else:
         # unpack into iX box
         resloc =  self.force_allocate_reg(op, args)
         arg = op.getarg(0)
         size = arg.bytesize
     residx = 0
     args = op.getarglist()
     arglocs = [resloc, srcloc, imm(residx), imm(index.value), imm(count.value), imm(size)]
     self.perform(op, arglocs, resloc)
Exemplo n.º 29
0
 def consider_vec_expand_f(self, op):
     arg = op.getarg(0)
     args = op.getarglist()
     if arg.is_constant():
         resloc = self.xrm.force_allocate_reg(op)
         srcloc = self.xrm.expand_float(op.bytesize, arg)
     else:
         resloc = self.xrm.force_result_in_reg(op, arg, args)
         srcloc = resloc
     self.perform(op, [srcloc, imm(op.bytesize)], resloc)
Exemplo n.º 30
0
 def consider_vec_float_eq(self, op):
     assert isinstance(op, VectorOp)
     lhs = op.getarg(0)
     assert isinstance(lhs, VectorOp)
     args = op.getarglist()
     # we need to use xmm0
     lhsloc = self.enforce_var_in_vector_reg(op.getarg(0), args, selected_reg=xmm0)
     rhsloc = self.make_sure_var_in_reg(op.getarg(1), args)
     resloc = self.force_allocate_vector_reg_or_cc(op)
     self.perform(op, [lhsloc, rhsloc, imm(lhs.bytesize)], resloc)
Exemplo n.º 31
0
 def consider_vec_float_eq(self, op):
     assert isinstance(op, VectorOp)
     lhs = op.getarg(0)
     assert isinstance(lhs, VectorOp)
     args = op.getarglist()
     # we need to use xmm0
     lhsloc = self.enforce_var_in_vector_reg(op.getarg(0), args, selected_reg=xmm0)
     rhsloc = self.make_sure_var_in_reg(op.getarg(1), args)
     resloc = self.force_allocate_vector_reg_or_cc(op)
     self.perform(op, [lhsloc, rhsloc, imm(lhs.bytesize)], resloc)
Exemplo n.º 32
0
 def consider_vec_expand_f(self, op):
     assert isinstance(op, VectorOp)
     arg = op.getarg(0)
     args = op.getarglist()
     if arg.is_constant():
         resloc = self.xrm.force_allocate_reg(op)
         srcloc = self.xrm.expand_float(op.bytesize, arg)
     else:
         resloc = self.xrm.force_result_in_reg(op, arg, args)
         srcloc = resloc
     self.perform(op, [srcloc, imm(op.bytesize)], resloc)
Exemplo n.º 33
0
 def call_releasegil_addr_and_move_real_arguments(self, fastgil):
     from rpython.jit.backend.x86.assembler import heap
     assert self.is_call_release_gil
     assert not self.asm._is_asmgcc()
     #
     # Save this thread's shadowstack pointer into 'ebx',
     # for later comparison
     gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
     if gcrootmap:
         if gcrootmap.is_shadow_stack:
             rst = gcrootmap.get_root_stack_top_addr()
             self.mc.MOV(ebx, heap(rst))
     #
     # shadowstack: change 'rpy_fastgil' to 0 (it should be
     # non-zero right now).
     #self.change_extra_stack_depth = False
     #
     # <--here--> would come a memory fence, if the CPU needed one.
     self.mc.MOV(heap(fastgil), imm(0))
     #
     if not we_are_translated():  # for testing: we should not access
         self.mc.ADD(ebp, imm(1))  # ebp any more
Exemplo n.º 34
0
    def write_real_errno(self, save_err):
        """This occurs just before emit_raw_call().
        """
        mc = self.mc

        if handle_lasterror and (save_err & rffi.RFFI_READSAVED_LASTERROR):
            # must call SetLastError().  There are no registers to save
            # because we are on 32-bit in this case: no register contains
            # the arguments to the main function we want to call afterwards.
            from rpython.rlib.rwin32 import _SetLastError
            adr = llmemory.cast_ptr_to_adr(_SetLastError)
            SetLastError_addr = self.asm.cpu.cast_adr_to_int(adr)
            assert isinstance(self, CallBuilder32)    # Windows 32-bit only
            #
            if save_err & rffi.RFFI_ALT_ERRNO:
                lasterror = llerrno.get_alt_lasterror_offset(self.asm.cpu)
            else:
                lasterror = llerrno.get_rpy_lasterror_offset(self.asm.cpu)
            tlofsreg = self.get_tlofs_reg()    # => esi, callee-saved
            self.save_stack_position()         # => edi, callee-saved
            mc.PUSH_m((tlofsreg.value, lasterror))
            mc.CALL(imm(follow_jump(SetLastError_addr)))
            # restore the stack position without assuming a particular
            # calling convention of _SetLastError()
            self.mc.stack_frame_size_delta(-WORD)
            self.mc.MOV(esp, self.saved_stack_position_reg)

        if save_err & rffi.RFFI_READSAVED_ERRNO:
            # Just before a call, read '*_errno' and write it into the
            # real 'errno'.  Most registers are free here, including the
            # callee-saved ones, except 'ebx' and except the ones used to
            # pass the arguments on x86-64.
            if save_err & rffi.RFFI_ALT_ERRNO:
                rpy_errno = llerrno.get_alt_errno_offset(self.asm.cpu)
            else:
                rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu)
            p_errno = llerrno.get_p_errno_offset(self.asm.cpu)
            tlofsreg = self.get_tlofs_reg()    # => esi or r12, callee-saved
            if IS_X86_32:
                tmpreg = edx
            else:
                tmpreg = r11     # edx is used for 3rd argument
            mc.MOV_rm(tmpreg.value, (tlofsreg.value, p_errno))
            mc.MOV32_rm(eax.value, (tlofsreg.value, rpy_errno))
            mc.MOV32_mr((tmpreg.value, 0), eax.value)
        elif save_err & rffi.RFFI_ZERO_ERRNO_BEFORE:
            # Same, but write zero.
            p_errno = llerrno.get_p_errno_offset(self.asm.cpu)
            tlofsreg = self.get_tlofs_reg()    # => esi or r12, callee-saved
            mc.MOV_rm(eax.value, (tlofsreg.value, p_errno))
            mc.MOV32_mi((eax.value, 0), 0)
Exemplo n.º 35
0
 def __init__(self, assembler, fnloc, arglocs,
              resloc=eax, restype=INT, ressize=WORD):
     AbstractCallBuilder.__init__(self, assembler, fnloc, arglocs,
                                  resloc, restype, ressize)
     # Avoid tons of issues with a non-immediate fnloc by sticking it
     # as an extra argument if needed
     if isinstance(fnloc, ImmedLoc):
         self.fnloc_is_immediate = True
         self.fnloc = imm(follow_jump(fnloc.value))
     else:
         self.fnloc_is_immediate = False
         self.fnloc = None
         self.arglocs = arglocs + [fnloc]
     self.start_frame_size = self.mc._frame_size
Exemplo n.º 36
0
 def __init__(self, assembler, fnloc, arglocs,
              resloc=eax, restype=INT, ressize=WORD):
     AbstractCallBuilder.__init__(self, assembler, fnloc, arglocs,
                                  resloc, restype, ressize)
     # Avoid tons of issues with a non-immediate fnloc by sticking it
     # as an extra argument if needed
     if isinstance(fnloc, ImmedLoc):
         self.fnloc_is_immediate = True
         self.fnloc = imm(follow_jump(fnloc.value))
     else:
         self.fnloc_is_immediate = False
         self.fnloc = None
         self.arglocs = arglocs + [fnloc]
     self.current_esp = 0     # 0 or (usually) negative, counted in bytes
Exemplo n.º 37
0
    def write_real_errno(self, save_err):
        """This occurs just before emit_raw_call().
        """
        mc = self.mc

        if handle_lasterror and (save_err & rffi.RFFI_READSAVED_LASTERROR):
            # must call SetLastError().  There are no registers to save
            # because we are on 32-bit in this case: no register contains
            # the arguments to the main function we want to call afterwards.
            from rpython.rlib.rwin32 import _SetLastError
            adr = llmemory.cast_ptr_to_adr(_SetLastError)
            SetLastError_addr = self.asm.cpu.cast_adr_to_int(adr)
            assert isinstance(self, CallBuilder32)  # Windows 32-bit only
            #
            if save_err & rffi.RFFI_ALT_ERRNO:
                lasterror = llerrno.get_alt_lasterror_offset(self.asm.cpu)
            else:
                lasterror = llerrno.get_rpy_lasterror_offset(self.asm.cpu)
            tlofsreg = self.get_tlofs_reg()  # => esi, callee-saved
            self.save_stack_position()  # => edi, callee-saved
            mc.PUSH_m((tlofsreg.value, lasterror))
            mc.CALL(imm(follow_jump(SetLastError_addr)))
            # restore the stack position without assuming a particular
            # calling convention of _SetLastError()
            self.mc.MOV(esp, self.saved_stack_position_reg)

        if save_err & rffi.RFFI_READSAVED_ERRNO:
            # Just before a call, read '*_errno' and write it into the
            # real 'errno'.  Most registers are free here, including the
            # callee-saved ones, except 'ebx' and except the ones used to
            # pass the arguments on x86-64.
            if save_err & rffi.RFFI_ALT_ERRNO:
                rpy_errno = llerrno.get_alt_errno_offset(self.asm.cpu)
            else:
                rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu)
            p_errno = llerrno.get_p_errno_offset(self.asm.cpu)
            tlofsreg = self.get_tlofs_reg()  # => esi or r12, callee-saved
            if IS_X86_32:
                tmpreg = edx
            else:
                tmpreg = r11  # edx is used for 3rd argument
            mc.MOV_rm(tmpreg.value, (tlofsreg.value, p_errno))
            mc.MOV32_rm(eax.value, (tlofsreg.value, rpy_errno))
            mc.MOV32_mr((tmpreg.value, 0), eax.value)
        elif save_err & rffi.RFFI_ZERO_ERRNO_BEFORE:
            # Same, but write zero.
            p_errno = llerrno.get_p_errno_offset(self.asm.cpu)
            tlofsreg = self.get_tlofs_reg()  # => esi or r12, callee-saved
            mc.MOV_rm(eax.value, (tlofsreg.value, p_errno))
            mc.MOV32_mi((eax.value, 0), 0)
Exemplo n.º 38
0
 def __init__(self, assembler, fnloc, arglocs,
              resloc=eax, restype=INT, ressize=WORD):
     AbstractCallBuilder.__init__(self, assembler, fnloc, arglocs,
                                  resloc, restype, ressize)
     # Avoid tons of issues with a non-immediate fnloc by sticking it
     # as an extra argument if needed
     if isinstance(fnloc, ImmedLoc):
         self.fnloc_is_immediate = True
         self.fnloc = imm(follow_jump(fnloc.value))
     else:
         self.fnloc_is_immediate = False
         self.fnloc = None
         self.arglocs = arglocs + [fnloc]
     self.start_frame_size = self.mc._frame_size
Exemplo n.º 39
0
 def call_releasegil_addr_and_move_real_arguments(self, fastgil):
     from rpython.jit.backend.x86.assembler import heap
     #
     if not self.asm._is_asmgcc():
         # shadowstack: change 'rpy_fastgil' to 0 (it should be
         # non-zero right now).
         self.change_extra_stack_depth = False
         css_value = imm(0)
     else:
         from rpython.memory.gctransform import asmgcroot
         # build a 'css' structure on the stack: 2 words for the linkage,
         # and 5/7 words as described for asmgcroot.ASM_FRAMEDATA, for a
         # total size of JIT_USE_WORDS.  This structure is found at
         # [ESP+css].
         css = -self.current_esp + (
             WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS))
         assert css >= 2 * WORD
         # Save ebp
         index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
         self.mc.MOV_sr(index_of_ebp, ebp.value)  # MOV [css.ebp], EBP
         # Save the "return address": we pretend that it's css
         self.mc.LEA_rs(eax.value, css)           # LEA eax, [css]
         frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR)
         self.mc.MOV_sr(frame_ptr, eax.value)     # MOV [css.frame], eax
         # Set up jf_extra_stack_depth to pretend that the return address
         # was at css, and so our stack frame is supposedly shorter by
         # (PASS_ON_MY_FRAME-JIT_USE_WORDS+1) words
         delta = PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS + 1
         self.change_extra_stack_depth = True
         self.asm.set_extra_stack_depth(self.mc, -delta * WORD)
         css_value = eax
     #
     self.mc.MOV(heap(fastgil), css_value)
     #
     if not we_are_translated():        # for testing: we should not access
         self.mc.ADD(ebp, imm(1))       # ebp any more; and ignore 'fastgil'
Exemplo n.º 40
0
 def __init__(self,
              assembler,
              fnloc,
              arglocs,
              resloc=eax,
              restype=INT,
              ressize=WORD):
     AbstractCallBuilder.__init__(self, assembler, fnloc, arglocs, resloc,
                                  restype, ressize)
     # Avoid tons of issues with a non-immediate fnloc by sticking it
     # as an extra argument if needed
     if isinstance(fnloc, ImmedLoc):
         self.fnloc_is_immediate = True
         self.fnloc = imm(follow_jump(fnloc.value))
     else:
         self.fnloc_is_immediate = False
         self.fnloc = None
         self.arglocs = arglocs + [fnloc]
     self.current_esp = 0  # 0 or (usually) negative, counted in bytes
Exemplo n.º 41
0
    def read_real_errno(self, save_err):
        """This occurs after emit_raw_call() and after restore_stack_pointer().
        """
        mc = self.mc

        if save_err & rffi.RFFI_SAVE_ERRNO:
            # Just after a call, read the real 'errno' and save a copy of
            # it inside our thread-local '*_errno'.  Most registers are
            # free here, including the callee-saved ones, except 'ebx'.
            # The tlofs register might have been loaded earlier and is
            # callee-saved, so it does not need to be reloaded.
            if save_err & rffi.RFFI_ALT_ERRNO:
                rpy_errno = llerrno.get_alt_errno_offset(self.asm.cpu)
            else:
                rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu)
            p_errno = llerrno.get_p_errno_offset(self.asm.cpu)
            tlofsreg = self.get_tlofs_reg()  # => esi or r12 (possibly reused)
            mc.MOV_rm(edi.value, (tlofsreg.value, p_errno))
            mc.MOV32_rm(edi.value, (edi.value, 0))
            mc.MOV32_mr((tlofsreg.value, rpy_errno), edi.value)

        if handle_lasterror and (
                save_err &
            (rffi.RFFI_SAVE_LASTERROR | rffi.RFFI_SAVE_WSALASTERROR)):
            if save_err & rffi.RFFI_SAVE_LASTERROR:
                from rpython.rlib.rwin32 import _GetLastError
                adr = llmemory.cast_ptr_to_adr(_GetLastError)
            else:
                from rpython.rlib._rsocket_rffi import _WSAGetLastError
                adr = llmemory.cast_ptr_to_adr(_WSAGetLastError)
            GetLastError_addr = self.asm.cpu.cast_adr_to_int(adr)
            assert isinstance(self, CallBuilder32)  # Windows 32-bit only
            #
            if save_err & rffi.RFFI_ALT_ERRNO:
                lasterror = llerrno.get_alt_lasterror_offset(self.asm.cpu)
            else:
                lasterror = llerrno.get_rpy_lasterror_offset(self.asm.cpu)
            self.save_result_value(save_edx=True)  # save eax/edx/xmm0
            self.result_value_saved_early = True
            mc.CALL(imm(follow_jump(GetLastError_addr)))
            #
            tlofsreg = self.get_tlofs_reg()  # => esi (possibly reused)
            mc.MOV32_mr((tlofsreg.value, lasterror), eax.value)
Exemplo n.º 42
0
    def read_real_errno(self, save_err):
        """This occurs after emit_raw_call() and after restore_stack_pointer().
        """
        mc = self.mc

        if save_err & rffi.RFFI_SAVE_ERRNO:
            # Just after a call, read the real 'errno' and save a copy of
            # it inside our thread-local '*_errno'.  Most registers are
            # free here, including the callee-saved ones, except 'ebx'.
            # The tlofs register might have been loaded earlier and is
            # callee-saved, so it does not need to be reloaded.
            if save_err & rffi.RFFI_ALT_ERRNO:
                rpy_errno = llerrno.get_alt_errno_offset(self.asm.cpu)
            else:
                rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu)
            p_errno = llerrno.get_p_errno_offset(self.asm.cpu)
            tlofsreg = self.get_tlofs_reg()   # => esi or r12 (possibly reused)
            mc.MOV_rm(ecx.value, (tlofsreg.value, p_errno))
            mc.MOV32_rm(ecx.value, (ecx.value, 0))
            mc.MOV32_mr((tlofsreg.value, rpy_errno), ecx.value)

        if handle_lasterror and (save_err & (rffi.RFFI_SAVE_LASTERROR |
                                             rffi.RFFI_SAVE_WSALASTERROR)):
            if save_err & rffi.RFFI_SAVE_LASTERROR:
                from rpython.rlib.rwin32 import _GetLastError
                adr = llmemory.cast_ptr_to_adr(_GetLastError)
            else:
                from rpython.rlib._rsocket_rffi import _WSAGetLastError
                adr = llmemory.cast_ptr_to_adr(_WSAGetLastError)
            GetLastError_addr = self.asm.cpu.cast_adr_to_int(adr)
            assert isinstance(self, CallBuilder32)    # Windows 32-bit only
            #
            if save_err & rffi.RFFI_ALT_ERRNO:
                lasterror = llerrno.get_alt_lasterror_offset(self.asm.cpu)
            else:
                lasterror = llerrno.get_rpy_lasterror_offset(self.asm.cpu)
            self.save_result_value(save_edx=True)   # save eax/edx/xmm0
            self.result_value_saved_early = True
            mc.CALL(imm(follow_jump(GetLastError_addr)))
            #
            tlofsreg = self.get_tlofs_reg()    # => esi (possibly reused)
            mc.MOV32_mr((tlofsreg.value, lasterror), eax.value)
Exemplo n.º 43
0
 def consider_vec_arith_unary(self, op):
     lhs = op.getarg(0)
     assert isinstance(lhs, VectorOp)
     args = op.getarglist()
     res = self.xrm.force_result_in_reg(op, op.getarg(0), args)
     self.perform(op, [res, imm(lhs.bytesize)], res)
Exemplo n.º 44
0
 def move_real_result_and_call_reacqgil_addr(self, fastgil):
     from rpython.jit.backend.x86 import rx86
     #
     # check if we need to call the reacqgil() function or not
     # (to acquiring the GIL, remove the asmgcc head from
     # the chained list, etc.)
     mc = self.mc
     restore_edx = False
     if not self.asm._is_asmgcc():
         css = 0
         css_value = imm(0)
         old_value = ecx
     else:
         from rpython.memory.gctransform import asmgcroot
         css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)
         if IS_X86_32:
             assert css >= 16
             if self.restype == 'L':  # long long result: eax/edx
                 if not self.result_value_saved_early:
                     mc.MOV_sr(12, edx.value)
                     restore_edx = True
             css_value = edx
             old_value = ecx
         elif IS_X86_64:
             css_value = edi
             old_value = esi
         mc.LEA_rs(css_value.value, css)
     #
     # Use XCHG as an atomic test-and-set-lock.  It also implicitly
     # does a memory barrier.
     mc.MOV(old_value, imm(1))
     if rx86.fits_in_32bits(fastgil):
         mc.XCHG_rj(old_value.value, fastgil)
     else:
         mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil)
         mc.XCHG_rm(old_value.value, (X86_64_SCRATCH_REG.value, 0))
     mc.CMP(old_value, css_value)
     #
     gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
     if bool(gcrootmap) and gcrootmap.is_shadow_stack:
         from rpython.jit.backend.x86.assembler import heap
         #
         # When doing a call_release_gil with shadowstack, there
         # is the risk that the 'rpy_fastgil' was free but the
         # current shadowstack can be the one of a different
         # thread.  So here we check if the shadowstack pointer
         # is still the same as before we released the GIL (saved
         # in 'ebx'), and if not, we fall back to 'reacqgil_addr'.
         mc.J_il8(rx86.Conditions['NE'], 0)
         jne_location = mc.get_relative_pos()
         # here, ecx (=old_value) is zero (so rpy_fastgil was in 'released'
         # state before the XCHG, but the XCHG acquired it by writing 1)
         rst = gcrootmap.get_root_stack_top_addr()
         mc = self.mc
         mc.CMP(ebx, heap(rst))
         mc.J_il8(rx86.Conditions['E'], 0)
         je_location = mc.get_relative_pos()
         # revert the rpy_fastgil acquired above, so that the
         # general 'reacqgil_addr' below can acquire it again...
         mc.MOV(heap(fastgil), ecx)
         # patch the JNE above
         offset = mc.get_relative_pos() - jne_location
         assert 0 < offset <= 127
         mc.overwrite(jne_location - 1, chr(offset))
     else:
         mc.J_il8(rx86.Conditions['E'], 0)
         je_location = mc.get_relative_pos()
     #
     # Yes, we need to call the reacqgil() function
     if not self.result_value_saved_early:
         self.save_result_value(save_edx=False)
     if self.asm._is_asmgcc():
         if IS_X86_32:
             mc.MOV_sr(4, old_value.value)
             mc.MOV_sr(0, css_value.value)
         # on X86_64, they are already in the right registers
     mc.CALL(imm(follow_jump(self.asm.reacqgil_addr)))
     if not self.result_value_saved_early:
         self.restore_result_value(save_edx=False)
     #
     # patch the JE above
     offset = mc.get_relative_pos() - je_location
     assert 0 < offset <= 127
     mc.overwrite(je_location - 1, chr(offset))
     #
     if restore_edx:
         mc.MOV_rs(edx.value, 12)  # restore this
     #
     if self.result_value_saved_early:
         self.restore_result_value(save_edx=True)
     #
     if not we_are_translated():  # for testing: now we can accesss
         mc.SUB(ebp, imm(1))  # ebp again
     #
     # Now that we required the GIL, we can reload a possibly modified ebp
     if self.asm._is_asmgcc():
         # special-case: reload ebp from the css
         from rpython.memory.gctransform import asmgcroot
         index_of_ebp = css + WORD * (2 + asmgcroot.INDEX_OF_EBP)
         mc.MOV_rs(ebp.value, index_of_ebp)  # MOV EBP, [css.ebp]
Exemplo n.º 45
0
 def consider_vec_int_signext(self, op):
     args = op.getarglist()
     resloc = self.xrm.force_result_in_reg(op, op.getarg(0), args)
     size = op.cast_from_bytesize()
     assert size > 0
     self.perform(op, [resloc, imm(size), imm(op.bytesize)], resloc)
Exemplo n.º 46
0
 def move_real_result_and_call_reacqgil_addr(self, fastgil):
     from rpython.jit.backend.x86 import rx86
     #
     # check if we need to call the reacqgil() function or not
     # (to acquiring the GIL, remove the asmgcc head from
     # the chained list, etc.)
     mc = self.mc
     restore_edx = False
     if not self.asm._is_asmgcc():
         css = 0
         css_value = imm(0)
         old_value = ecx
     else:
         from rpython.memory.gctransform import asmgcroot
         css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)
         if IS_X86_32:
             assert css >= 16
             if self.restype == 'L':    # long long result: eax/edx
                 mc.MOV_sr(12, edx.value)
                 restore_edx = True
             css_value = edx
             old_value = ecx
         elif IS_X86_64:
             css_value = edi
             old_value = esi
         mc.LEA_rs(css_value.value, css)
     #
     mc.MOV(old_value, imm(1))
     if rx86.fits_in_32bits(fastgil):
         mc.XCHG_rj(old_value.value, fastgil)
     else:
         mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil)
         mc.XCHG_rm(old_value.value, (X86_64_SCRATCH_REG.value, 0))
     mc.CMP(old_value, css_value)
     mc.J_il8(rx86.Conditions['E'], 0)
     je_location = mc.get_relative_pos()
     #
     # Yes, we need to call the reacqgil() function
     self.save_result_value_reacq()
     if self.asm._is_asmgcc():
         if IS_X86_32:
             mc.MOV_sr(4, old_value.value)
             mc.MOV_sr(0, css_value.value)
         # on X86_64, they are already in the right registers
     mc.CALL(imm(self.asm.reacqgil_addr))
     self.restore_result_value_reacq()
     #
     # patch the JE above
     offset = mc.get_relative_pos() - je_location
     assert 0 < offset <= 127
     mc.overwrite(je_location-1, chr(offset))
     #
     if restore_edx:
         mc.MOV_rs(edx.value, 12)   # restore this
     #
     if not we_are_translated():    # for testing: now we can accesss
         mc.SUB(ebp, imm(1))        # ebp again
     #
     # Now that we required the GIL, we can reload a possibly modified ebp
     if self.asm._is_asmgcc():
         # special-case: reload ebp from the css
         from rpython.memory.gctransform import asmgcroot
         index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
         mc.MOV_rs(ebp.value, index_of_ebp)  # MOV EBP, [css.ebp]
Exemplo n.º 47
0
 def consider_vec_arith_unary(self, op):
     lhs = op.getarg(0)
     assert isinstance(lhs, VectorOp)
     args = op.getarglist()
     res = self.xrm.force_result_in_reg(op, op.getarg(0), args)
     self.perform(op, [res, imm(lhs.bytesize)], res)
Exemplo n.º 48
0
 def move_real_result_and_call_reacqgil_addr(self, fastgil):
     from rpython.jit.backend.x86 import rx86
     #
     # check if we need to call the reacqgil() function or not
     # (to acquiring the GIL, remove the asmgcc head from
     # the chained list, etc.)
     mc = self.mc
     restore_edx = False
     if not self.asm._is_asmgcc():
         css = 0
         css_value = imm(0)
         old_value = ecx
     else:
         from rpython.memory.gctransform import asmgcroot
         css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)
         if IS_X86_32:
             assert css >= 16
             if self.restype == 'L':    # long long result: eax/edx
                 if not self.result_value_saved_early:
                     mc.MOV_sr(12, edx.value)
                     restore_edx = True
             css_value = edx
             old_value = ecx
         elif IS_X86_64:
             css_value = edi
             old_value = esi
         mc.LEA_rs(css_value.value, css)
     #
     # Use XCHG as an atomic test-and-set-lock.  It also implicitly
     # does a memory barrier.
     mc.MOV(old_value, imm(1))
     if rx86.fits_in_32bits(fastgil):
         mc.XCHG_rj(old_value.value, fastgil)
     else:
         mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil)
         mc.XCHG_rm(old_value.value, (X86_64_SCRATCH_REG.value, 0))
     mc.CMP(old_value, css_value)
     #
     gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
     if bool(gcrootmap) and gcrootmap.is_shadow_stack:
         from rpython.jit.backend.x86.assembler import heap
         #
         # When doing a call_release_gil with shadowstack, there
         # is the risk that the 'rpy_fastgil' was free but the
         # current shadowstack can be the one of a different
         # thread.  So here we check if the shadowstack pointer
         # is still the same as before we released the GIL (saved
         # in 'ebx'), and if not, we fall back to 'reacqgil_addr'.
         mc.J_il8(rx86.Conditions['NE'], 0)
         jne_location = mc.get_relative_pos()
         # here, ecx (=old_value) is zero (so rpy_fastgil was in 'released'
         # state before the XCHG, but the XCHG acquired it by writing 1)
         rst = gcrootmap.get_root_stack_top_addr()
         mc = self.mc
         mc.CMP(ebx, heap(rst))
         mc.J_il8(rx86.Conditions['E'], 0)
         je_location = mc.get_relative_pos()
         # revert the rpy_fastgil acquired above, so that the
         # general 'reacqgil_addr' below can acquire it again...
         mc.MOV(heap(fastgil), ecx)
         # patch the JNE above
         offset = mc.get_relative_pos() - jne_location
         assert 0 < offset <= 127
         mc.overwrite(jne_location-1, chr(offset))
     else:
         mc.J_il8(rx86.Conditions['E'], 0)
         je_location = mc.get_relative_pos()
     #
     # Yes, we need to call the reacqgil() function
     if not self.result_value_saved_early:
         self.save_result_value(save_edx=False)
     if self.asm._is_asmgcc():
         if IS_X86_32:
             mc.MOV_sr(4, old_value.value)
             mc.MOV_sr(0, css_value.value)
         # on X86_64, they are already in the right registers
     mc.CALL(imm(follow_jump(self.asm.reacqgil_addr)))
     if not self.result_value_saved_early:
         self.restore_result_value(save_edx=False)
     #
     # patch the JE above
     offset = mc.get_relative_pos() - je_location
     assert 0 < offset <= 127
     mc.overwrite(je_location-1, chr(offset))
     #
     if restore_edx:
         mc.MOV_rs(edx.value, 12)   # restore this
     #
     if self.result_value_saved_early:
         self.restore_result_value(save_edx=True)
     #
     if not we_are_translated():    # for testing: now we can accesss
         mc.SUB(ebp, imm(1))        # ebp again
     #
     # Now that we required the GIL, we can reload a possibly modified ebp
     if self.asm._is_asmgcc():
         # special-case: reload ebp from the css
         from rpython.memory.gctransform import asmgcroot
         index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
         mc.MOV_rs(ebp.value, index_of_ebp)  # MOV EBP, [css.ebp]
Exemplo n.º 49
0
    def move_real_result_and_call_reacqgil_addr(self, fastgil):
        from rpython.jit.backend.x86 import rx86
        #
        # check if we need to call the reacqgil() function or not
        # (to acquiring the GIL, remove the asmgcc head from
        # the chained list, etc.)
        mc = self.mc
        restore_edx = False
        if not self.asm._is_asmgcc():
            css = 0
            css_value = imm(0)
            old_value = ecx
        else:
            from rpython.memory.gctransform import asmgcroot
            css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)
            if IS_X86_32:
                assert css >= 16
                if self.restype == 'L':  # long long result: eax/edx
                    if not self.result_value_saved_early:
                        mc.MOV_sr(12, edx.value)
                        restore_edx = True
                css_value = edx  # note: duplicated in ReacqGilSlowPath
                old_value = ecx  #
            elif IS_X86_64:
                css_value = edi
                old_value = esi
            mc.LEA_rs(css_value.value, css)
        #
        # Use XCHG as an atomic test-and-set-lock.  It also implicitly
        # does a memory barrier.
        mc.MOV(old_value, imm(1))
        if rx86.fits_in_32bits(fastgil):
            mc.XCHG_rj(old_value.value, fastgil)
        else:
            mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil)
            mc.XCHG_rm(old_value.value, (X86_64_SCRATCH_REG.value, 0))
        mc.CMP(old_value, css_value)
        #
        gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
        if bool(gcrootmap) and gcrootmap.is_shadow_stack:
            from rpython.jit.backend.x86.assembler import heap
            #
            # When doing a call_release_gil with shadowstack, there
            # is the risk that the 'rpy_fastgil' was free but the
            # current shadowstack can be the one of a different
            # thread.  So here we check if the shadowstack pointer
            # is still the same as before we released the GIL (saved
            # in 'ebx'), and if not, we fall back to 'reacqgil_addr'.
            mc.J_il(rx86.Conditions['NE'], 0xfffff)  # patched later
            early_jump_addr = mc.get_relative_pos(break_basic_block=False)
            # ^^^ this jump will go to almost the same place as the
            # ReacqGilSlowPath() computes, but one instruction farther,
            # i.e. just after the "MOV(heap(fastgil), ecx)".

            # here, ecx (=old_value) is zero (so rpy_fastgil was in 'released'
            # state before the XCHG, but the XCHG acquired it by writing 1)
            rst = gcrootmap.get_root_stack_top_addr()
            mc = self.mc
            mc.CMP(ebx, heap(rst))
            sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NE'])
            sp.early_jump_addr = early_jump_addr
            sp.fastgil = fastgil
        else:
            sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NE'])
        sp.callbuilder = self
        sp.set_continue_addr(mc)
        self.asm.pending_slowpaths.append(sp)
        #
        if restore_edx:
            mc.MOV_rs(edx.value, 12)  # restore this
        #
        if self.result_value_saved_early:
            self.restore_result_value(save_edx=True)
        #
        if not we_are_translated():  # for testing: now we can accesss
            mc.SUB(ebp, imm(1))  # ebp again
        #
        # Now that we required the GIL, we can reload a possibly modified ebp
        if self.asm._is_asmgcc():
            # special-case: reload ebp from the css
            from rpython.memory.gctransform import asmgcroot
            index_of_ebp = css + WORD * (2 + asmgcroot.INDEX_OF_EBP)
            mc.MOV_rs(ebp.value, index_of_ebp)  # MOV EBP, [css.ebp]
Exemplo n.º 50
0
    def move_real_result_and_call_reacqgil_addr(self, fastgil):
        from rpython.jit.backend.x86 import rx86
        #
        # check if we need to call the reacqgil() function or not
        # (to acquiring the GIL)
        mc = self.mc
        restore_edx = False
        #
        # Make sure we can use 'eax' in the sequel for CMPXCHG
        # On 32-bit, we also need to check if restype is 'L' for long long,
        # in which case we need to save eax and edx because they are both
        # used for the return value.
        if self.restype in (INT, 'L') and not self.result_value_saved_early:
            self.save_result_value(save_edx=self.restype == 'L')
            self.result_value_saved_early = True
        #
        # Use LOCK CMPXCHG as a compare-and-swap with memory barrier.
        tlsreg = self.get_tlofs_reg()
        thread_ident_ofs = lltls.get_thread_ident_offset(self.asm.cpu)
        #
        mc.MOV_rm(ecx.value, (tlsreg.value, thread_ident_ofs))
        mc.XOR_rr(eax.value, eax.value)

        if rx86.fits_in_32bits(fastgil):
            mc.LOCK()
            mc.CMPXCHG_jr(fastgil, ecx.value)
        else:
            mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil)
            mc.LOCK()
            mc.CMPXCHG_mr((X86_64_SCRATCH_REG.value, 0), ecx.value)
        #
        gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
        if bool(gcrootmap):
            from rpython.jit.backend.x86.assembler import heap
            assert gcrootmap.is_shadow_stack
            #
            # When doing a call_release_gil with shadowstack, there
            # is the risk that the 'rpy_fastgil' was free but the
            # current shadowstack can be the one of a different
            # thread.  So here we check if the shadowstack pointer
            # is still the same as before we released the GIL (saved
            # in 'ebx'), and if not, we fall back to 'reacqgil_addr'.
            mc.J_il(rx86.Conditions['NZ'], 0xfffff)  # patched later
            early_jump_addr = mc.get_relative_pos(break_basic_block=False)
            # ^^^ this jump will go to almost the same place as the
            # ReacqGilSlowPath() computes, but one instruction further,
            # i.e. just after the "MOV(heap(fastgil), 0)".

            rst = gcrootmap.get_root_stack_top_addr()
            mc = self.mc
            mc.CMP(ebx, heap(rst))
            sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NE'])
            sp.early_jump_addr = early_jump_addr
            sp.fastgil = fastgil
        else:
            sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NZ'])
        sp.callbuilder = self
        sp.set_continue_addr(mc)
        self.asm.pending_slowpaths.append(sp)
        #
        if restore_edx:
            mc.MOV_rs(edx.value, 12)  # restore this
        #
        if self.result_value_saved_early:
            self.restore_result_value(save_edx=True)
        #
        if not we_are_translated():  # for testing: now we can accesss
            mc.SUB(ebp, imm(1))  # ebp again