def test_reuse_scratch_register(self): base_addr = intmask(0xFEDCBA9876543210) cb = LocationCodeBuilder64() cb.MOV(ecx, heap(base_addr)) cb.MOV(ecx, heap(base_addr + 8)) expected_instructions = ( # mov r11, 0xFEDCBA9876543210 '\x49\xBB\x10\x32\x54\x76\x98\xBA\xDC\xFE' + # mov rcx, [r11] '\x49\x8B\x0B' + # mov rcx, [r11+8] '\x49\x8B\x4B\x08') assert cb.getvalue() == expected_instructions
def generate_body(self, assembler, mc): if self.early_jump_addr != 0: # This slow-path has two entry points, with two # conditional jumps. We can jump to the regular start # of this slow-path with the 2nd conditional jump. Or, # we can jump past the "MOV(heap(fastgil), ecx)" # instruction from the 1st conditional jump. # This instruction reverts the rpy_fastgil acquired # previously, so that the general 'reacqgil_addr' # function can acquire it again. It must only be done # if we actually succeeded in acquiring rpy_fastgil. from rpython.jit.backend.x86.assembler import heap mc.MOV(heap(self.fastgil), ecx) offset = mc.get_relative_pos() - self.early_jump_addr mc.overwrite32(self.early_jump_addr - 4, offset) # scratch register forgotten here, by get_relative_pos() # call the reacqgil() function cb = self.callbuilder if not cb.result_value_saved_early: cb.save_result_value(save_edx=False) if assembler._is_asmgcc(): if IS_X86_32: css_value = edx old_value = ecx mc.MOV_sr(4, old_value.value) mc.MOV_sr(0, css_value.value) # on X86_64, they are already in the right registers mc.CALL(imm(follow_jump(assembler.reacqgil_addr))) if not cb.result_value_saved_early: cb.restore_result_value(save_edx=False)
def call_releasegil_addr_and_move_real_arguments(self, fastgil): from rpython.jit.backend.x86.assembler import heap assert self.is_call_release_gil # # Save this thread's shadowstack pointer into 'ebx', # for later comparison gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap if gcrootmap: if gcrootmap.is_shadow_stack: rst = gcrootmap.get_root_stack_top_addr() self.mc.MOV(ebx, heap(rst)) # if not self.asm._is_asmgcc(): # shadowstack: change 'rpy_fastgil' to 0 (it should be # non-zero right now). self.change_extra_stack_depth = False # ^^ note that set_extra_stack_depth() in this case is a no-op css_value = imm(0) else: from rpython.memory.gctransform import asmgcroot # build a 'css' structure on the stack: 2 words for the linkage, # and 5/7 words as described for asmgcroot.ASM_FRAMEDATA, for a # total size of JIT_USE_WORDS. This structure is found at # [ESP+css]. css = -self.get_current_esp() + ( WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)) assert css >= 2 * WORD # Save ebp index_of_ebp = css + WORD * (2 + asmgcroot.INDEX_OF_EBP) self.mc.MOV_sr(index_of_ebp, ebp.value) # MOV [css.ebp], EBP # Save the "return address": we pretend that it's css self.mc.LEA_rs(eax.value, css) # LEA eax, [css] frame_ptr = css + WORD * (2 + asmgcroot.FRAME_PTR) self.mc.MOV_sr(frame_ptr, eax.value) # MOV [css.frame], eax # Set up jf_extra_stack_depth to pretend that the return address # was at css, and so our stack frame is supposedly shorter by # (PASS_ON_MY_FRAME-JIT_USE_WORDS+1) words delta = PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS + 1 self.change_extra_stack_depth = True self.asm.set_extra_stack_depth(self.mc, -delta * WORD) css_value = eax # # <--here--> would come a memory fence, if the CPU needed one. self.mc.MOV(heap(fastgil), css_value) # if not we_are_translated(): # for testing: we should not access self.mc.ADD(ebp, imm(1)) # ebp any more
def call_releasegil_addr_and_move_real_arguments(self, fastgil): from rpython.jit.backend.x86.assembler import heap assert self.is_call_release_gil # # Save this thread's shadowstack pointer into 'ebx', # for later comparison gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap if gcrootmap: if gcrootmap.is_shadow_stack: rst = gcrootmap.get_root_stack_top_addr() self.mc.MOV(ebx, heap(rst)) # if not self.asm._is_asmgcc(): # shadowstack: change 'rpy_fastgil' to 0 (it should be # non-zero right now). self.change_extra_stack_depth = False # ^^ note that set_extra_stack_depth() in this case is a no-op css_value = imm(0) else: from rpython.memory.gctransform import asmgcroot # build a 'css' structure on the stack: 2 words for the linkage, # and 5/7 words as described for asmgcroot.ASM_FRAMEDATA, for a # total size of JIT_USE_WORDS. This structure is found at # [ESP+css]. css = -self.get_current_esp() + ( WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)) assert css >= 2 * WORD # Save ebp index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP) self.mc.MOV_sr(index_of_ebp, ebp.value) # MOV [css.ebp], EBP # Save the "return address": we pretend that it's css self.mc.LEA_rs(eax.value, css) # LEA eax, [css] frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR) self.mc.MOV_sr(frame_ptr, eax.value) # MOV [css.frame], eax # Set up jf_extra_stack_depth to pretend that the return address # was at css, and so our stack frame is supposedly shorter by # (PASS_ON_MY_FRAME-JIT_USE_WORDS+1) words delta = PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS + 1 self.change_extra_stack_depth = True self.asm.set_extra_stack_depth(self.mc, -delta * WORD) css_value = eax # # <--here--> would come a memory fence, if the CPU needed one. self.mc.MOV(heap(fastgil), css_value) # if not we_are_translated(): # for testing: we should not access self.mc.ADD(ebp, imm(1)) # ebp any more
def test_reuse_scratch_register(self): base_addr = 0xFEDCBA9876543210 cb = LocationCodeBuilder64() cb.begin_reuse_scratch_register() cb.MOV(ecx, heap(base_addr)) cb.MOV(ecx, heap(base_addr + 8)) cb.end_reuse_scratch_register() expected_instructions = ( # mov r11, 0xFEDCBA9876543210 '\x49\xBB\x10\x32\x54\x76\x98\xBA\xDC\xFE' + # mov rcx, [r11] '\x49\x8B\x0B' + # mov rcx, [r11+8] '\x49\x8B\x4B\x08' ) assert cb.getvalue() == expected_instructions
def test_MOV_64bit_address_into_r11(self): base_addr = intmask(0xFEDCBA9876543210) cb = LocationCodeBuilder64() cb.MOV(r11, heap(base_addr)) expected_instructions = ( # mov r11, 0xFEDCBA9876543210 '\x49\xBB\x10\x32\x54\x76\x98\xBA\xDC\xFE' + # mov r11, [r11] '\x4D\x8B\x1B') assert cb.getvalue() == expected_instructions
def test_MOV_64bit_address_into_r11(self): base_addr = 0xFEDCBA9876543210 cb = LocationCodeBuilder64() cb.MOV(r11, heap(base_addr)) expected_instructions = ( # mov r11, 0xFEDCBA9876543210 '\x49\xBB\x10\x32\x54\x76\x98\xBA\xDC\xFE' + # mov r11, [r11] '\x4D\x8B\x1B' ) assert cb.getvalue() == expected_instructions
def call_releasegil_addr_and_move_real_arguments(self, fastgil): from rpython.jit.backend.x86.assembler import heap assert self.is_call_release_gil assert not self.asm._is_asmgcc() # # Save this thread's shadowstack pointer into 'ebx', # for later comparison gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap if gcrootmap: if gcrootmap.is_shadow_stack: rst = gcrootmap.get_root_stack_top_addr() self.mc.MOV(ebx, heap(rst)) # # shadowstack: change 'rpy_fastgil' to 0 (it should be # non-zero right now). #self.change_extra_stack_depth = False # # <--here--> would come a memory fence, if the CPU needed one. self.mc.MOV(heap(fastgil), imm(0)) # if not we_are_translated(): # for testing: we should not access self.mc.ADD(ebp, imm(1)) # ebp any more
def call_releasegil_addr_and_move_real_arguments(self, fastgil): from rpython.jit.backend.x86.assembler import heap # if not self.asm._is_asmgcc(): # shadowstack: change 'rpy_fastgil' to 0 (it should be # non-zero right now). self.change_extra_stack_depth = False css_value = imm(0) else: from rpython.memory.gctransform import asmgcroot # build a 'css' structure on the stack: 2 words for the linkage, # and 5/7 words as described for asmgcroot.ASM_FRAMEDATA, for a # total size of JIT_USE_WORDS. This structure is found at # [ESP+css]. css = -self.current_esp + ( WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)) assert css >= 2 * WORD # Save ebp index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP) self.mc.MOV_sr(index_of_ebp, ebp.value) # MOV [css.ebp], EBP # Save the "return address": we pretend that it's css self.mc.LEA_rs(eax.value, css) # LEA eax, [css] frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR) self.mc.MOV_sr(frame_ptr, eax.value) # MOV [css.frame], eax # Set up jf_extra_stack_depth to pretend that the return address # was at css, and so our stack frame is supposedly shorter by # (PASS_ON_MY_FRAME-JIT_USE_WORDS+1) words delta = PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS + 1 self.change_extra_stack_depth = True self.asm.set_extra_stack_depth(self.mc, -delta * WORD) css_value = eax # self.mc.MOV(heap(fastgil), css_value) # if not we_are_translated(): # for testing: we should not access self.mc.ADD(ebp, imm(1)) # ebp any more; and ignore 'fastgil'
def move_real_result_and_call_reacqgil_addr(self, fastgil): from rpython.jit.backend.x86 import rx86 # # check if we need to call the reacqgil() function or not # (to acquiring the GIL, remove the asmgcc head from # the chained list, etc.) mc = self.mc restore_edx = False if not self.asm._is_asmgcc(): css = 0 css_value = imm(0) old_value = ecx else: from rpython.memory.gctransform import asmgcroot css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS) if IS_X86_32: assert css >= 16 if self.restype == 'L': # long long result: eax/edx if not self.result_value_saved_early: mc.MOV_sr(12, edx.value) restore_edx = True css_value = edx old_value = ecx elif IS_X86_64: css_value = edi old_value = esi mc.LEA_rs(css_value.value, css) # # Use XCHG as an atomic test-and-set-lock. It also implicitly # does a memory barrier. mc.MOV(old_value, imm(1)) if rx86.fits_in_32bits(fastgil): mc.XCHG_rj(old_value.value, fastgil) else: mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil) mc.XCHG_rm(old_value.value, (X86_64_SCRATCH_REG.value, 0)) mc.CMP(old_value, css_value) # gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap if bool(gcrootmap) and gcrootmap.is_shadow_stack: from rpython.jit.backend.x86.assembler import heap # # When doing a call_release_gil with shadowstack, there # is the risk that the 'rpy_fastgil' was free but the # current shadowstack can be the one of a different # thread. So here we check if the shadowstack pointer # is still the same as before we released the GIL (saved # in 'ebx'), and if not, we fall back to 'reacqgil_addr'. mc.J_il8(rx86.Conditions['NE'], 0) jne_location = mc.get_relative_pos() # here, ecx (=old_value) is zero (so rpy_fastgil was in 'released' # state before the XCHG, but the XCHG acquired it by writing 1) rst = gcrootmap.get_root_stack_top_addr() mc = self.mc mc.CMP(ebx, heap(rst)) mc.J_il8(rx86.Conditions['E'], 0) je_location = mc.get_relative_pos() # revert the rpy_fastgil acquired above, so that the # general 'reacqgil_addr' below can acquire it again... mc.MOV(heap(fastgil), ecx) # patch the JNE above offset = mc.get_relative_pos() - jne_location assert 0 < offset <= 127 mc.overwrite(jne_location - 1, chr(offset)) else: mc.J_il8(rx86.Conditions['E'], 0) je_location = mc.get_relative_pos() # # Yes, we need to call the reacqgil() function if not self.result_value_saved_early: self.save_result_value(save_edx=False) if self.asm._is_asmgcc(): if IS_X86_32: mc.MOV_sr(4, old_value.value) mc.MOV_sr(0, css_value.value) # on X86_64, they are already in the right registers mc.CALL(imm(follow_jump(self.asm.reacqgil_addr))) if not self.result_value_saved_early: self.restore_result_value(save_edx=False) # # patch the JE above offset = mc.get_relative_pos() - je_location assert 0 < offset <= 127 mc.overwrite(je_location - 1, chr(offset)) # if restore_edx: mc.MOV_rs(edx.value, 12) # restore this # if self.result_value_saved_early: self.restore_result_value(save_edx=True) # if not we_are_translated(): # for testing: now we can accesss mc.SUB(ebp, imm(1)) # ebp again # # Now that we required the GIL, we can reload a possibly modified ebp if self.asm._is_asmgcc(): # special-case: reload ebp from the css from rpython.memory.gctransform import asmgcroot index_of_ebp = css + WORD * (2 + asmgcroot.INDEX_OF_EBP) mc.MOV_rs(ebp.value, index_of_ebp) # MOV EBP, [css.ebp]
def move_real_result_and_call_reacqgil_addr(self, fastgil): from rpython.jit.backend.x86 import rx86 # # check if we need to call the reacqgil() function or not # (to acquiring the GIL, remove the asmgcc head from # the chained list, etc.) mc = self.mc restore_edx = False if not self.asm._is_asmgcc(): css = 0 css_value = imm(0) old_value = ecx else: from rpython.memory.gctransform import asmgcroot css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS) if IS_X86_32: assert css >= 16 if self.restype == 'L': # long long result: eax/edx if not self.result_value_saved_early: mc.MOV_sr(12, edx.value) restore_edx = True css_value = edx old_value = ecx elif IS_X86_64: css_value = edi old_value = esi mc.LEA_rs(css_value.value, css) # # Use XCHG as an atomic test-and-set-lock. It also implicitly # does a memory barrier. mc.MOV(old_value, imm(1)) if rx86.fits_in_32bits(fastgil): mc.XCHG_rj(old_value.value, fastgil) else: mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil) mc.XCHG_rm(old_value.value, (X86_64_SCRATCH_REG.value, 0)) mc.CMP(old_value, css_value) # gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap if bool(gcrootmap) and gcrootmap.is_shadow_stack: from rpython.jit.backend.x86.assembler import heap # # When doing a call_release_gil with shadowstack, there # is the risk that the 'rpy_fastgil' was free but the # current shadowstack can be the one of a different # thread. So here we check if the shadowstack pointer # is still the same as before we released the GIL (saved # in 'ebx'), and if not, we fall back to 'reacqgil_addr'. mc.J_il8(rx86.Conditions['NE'], 0) jne_location = mc.get_relative_pos() # here, ecx (=old_value) is zero (so rpy_fastgil was in 'released' # state before the XCHG, but the XCHG acquired it by writing 1) rst = gcrootmap.get_root_stack_top_addr() mc = self.mc mc.CMP(ebx, heap(rst)) mc.J_il8(rx86.Conditions['E'], 0) je_location = mc.get_relative_pos() # revert the rpy_fastgil acquired above, so that the # general 'reacqgil_addr' below can acquire it again... mc.MOV(heap(fastgil), ecx) # patch the JNE above offset = mc.get_relative_pos() - jne_location assert 0 < offset <= 127 mc.overwrite(jne_location-1, chr(offset)) else: mc.J_il8(rx86.Conditions['E'], 0) je_location = mc.get_relative_pos() # # Yes, we need to call the reacqgil() function if not self.result_value_saved_early: self.save_result_value(save_edx=False) if self.asm._is_asmgcc(): if IS_X86_32: mc.MOV_sr(4, old_value.value) mc.MOV_sr(0, css_value.value) # on X86_64, they are already in the right registers mc.CALL(imm(follow_jump(self.asm.reacqgil_addr))) if not self.result_value_saved_early: self.restore_result_value(save_edx=False) # # patch the JE above offset = mc.get_relative_pos() - je_location assert 0 < offset <= 127 mc.overwrite(je_location-1, chr(offset)) # if restore_edx: mc.MOV_rs(edx.value, 12) # restore this # if self.result_value_saved_early: self.restore_result_value(save_edx=True) # if not we_are_translated(): # for testing: now we can accesss mc.SUB(ebp, imm(1)) # ebp again # # Now that we required the GIL, we can reload a possibly modified ebp if self.asm._is_asmgcc(): # special-case: reload ebp from the css from rpython.memory.gctransform import asmgcroot index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP) mc.MOV_rs(ebp.value, index_of_ebp) # MOV EBP, [css.ebp]
def move_real_result_and_call_reacqgil_addr(self, fastgil): from rpython.jit.backend.x86 import rx86 # # check if we need to call the reacqgil() function or not # (to acquiring the GIL, remove the asmgcc head from # the chained list, etc.) mc = self.mc restore_edx = False if not self.asm._is_asmgcc(): css = 0 css_value = imm(0) old_value = ecx else: from rpython.memory.gctransform import asmgcroot css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS) if IS_X86_32: assert css >= 16 if self.restype == 'L': # long long result: eax/edx if not self.result_value_saved_early: mc.MOV_sr(12, edx.value) restore_edx = True css_value = edx # note: duplicated in ReacqGilSlowPath old_value = ecx # elif IS_X86_64: css_value = edi old_value = esi mc.LEA_rs(css_value.value, css) # # Use XCHG as an atomic test-and-set-lock. It also implicitly # does a memory barrier. mc.MOV(old_value, imm(1)) if rx86.fits_in_32bits(fastgil): mc.XCHG_rj(old_value.value, fastgil) else: mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil) mc.XCHG_rm(old_value.value, (X86_64_SCRATCH_REG.value, 0)) mc.CMP(old_value, css_value) # gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap if bool(gcrootmap) and gcrootmap.is_shadow_stack: from rpython.jit.backend.x86.assembler import heap # # When doing a call_release_gil with shadowstack, there # is the risk that the 'rpy_fastgil' was free but the # current shadowstack can be the one of a different # thread. So here we check if the shadowstack pointer # is still the same as before we released the GIL (saved # in 'ebx'), and if not, we fall back to 'reacqgil_addr'. mc.J_il(rx86.Conditions['NE'], 0xfffff) # patched later early_jump_addr = mc.get_relative_pos(break_basic_block=False) # ^^^ this jump will go to almost the same place as the # ReacqGilSlowPath() computes, but one instruction farther, # i.e. just after the "MOV(heap(fastgil), ecx)". # here, ecx (=old_value) is zero (so rpy_fastgil was in 'released' # state before the XCHG, but the XCHG acquired it by writing 1) rst = gcrootmap.get_root_stack_top_addr() mc = self.mc mc.CMP(ebx, heap(rst)) sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NE']) sp.early_jump_addr = early_jump_addr sp.fastgil = fastgil else: sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NE']) sp.callbuilder = self sp.set_continue_addr(mc) self.asm.pending_slowpaths.append(sp) # if restore_edx: mc.MOV_rs(edx.value, 12) # restore this # if self.result_value_saved_early: self.restore_result_value(save_edx=True) # if not we_are_translated(): # for testing: now we can accesss mc.SUB(ebp, imm(1)) # ebp again # # Now that we required the GIL, we can reload a possibly modified ebp if self.asm._is_asmgcc(): # special-case: reload ebp from the css from rpython.memory.gctransform import asmgcroot index_of_ebp = css + WORD * (2 + asmgcroot.INDEX_OF_EBP) mc.MOV_rs(ebp.value, index_of_ebp) # MOV EBP, [css.ebp]
def move_real_result_and_call_reacqgil_addr(self, fastgil): from rpython.jit.backend.x86 import rx86 # # check if we need to call the reacqgil() function or not # (to acquiring the GIL) mc = self.mc restore_edx = False # # Make sure we can use 'eax' in the sequel for CMPXCHG # On 32-bit, we also need to check if restype is 'L' for long long, # in which case we need to save eax and edx because they are both # used for the return value. if self.restype in (INT, 'L') and not self.result_value_saved_early: self.save_result_value(save_edx=self.restype == 'L') self.result_value_saved_early = True # # Use LOCK CMPXCHG as a compare-and-swap with memory barrier. tlsreg = self.get_tlofs_reg() thread_ident_ofs = lltls.get_thread_ident_offset(self.asm.cpu) # mc.MOV_rm(ecx.value, (tlsreg.value, thread_ident_ofs)) mc.XOR_rr(eax.value, eax.value) if rx86.fits_in_32bits(fastgil): mc.LOCK() mc.CMPXCHG_jr(fastgil, ecx.value) else: mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil) mc.LOCK() mc.CMPXCHG_mr((X86_64_SCRATCH_REG.value, 0), ecx.value) # gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap if bool(gcrootmap): from rpython.jit.backend.x86.assembler import heap assert gcrootmap.is_shadow_stack # # When doing a call_release_gil with shadowstack, there # is the risk that the 'rpy_fastgil' was free but the # current shadowstack can be the one of a different # thread. So here we check if the shadowstack pointer # is still the same as before we released the GIL (saved # in 'ebx'), and if not, we fall back to 'reacqgil_addr'. mc.J_il(rx86.Conditions['NZ'], 0xfffff) # patched later early_jump_addr = mc.get_relative_pos(break_basic_block=False) # ^^^ this jump will go to almost the same place as the # ReacqGilSlowPath() computes, but one instruction further, # i.e. just after the "MOV(heap(fastgil), 0)". rst = gcrootmap.get_root_stack_top_addr() mc = self.mc mc.CMP(ebx, heap(rst)) sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NE']) sp.early_jump_addr = early_jump_addr sp.fastgil = fastgil else: sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NZ']) sp.callbuilder = self sp.set_continue_addr(mc) self.asm.pending_slowpaths.append(sp) # if restore_edx: mc.MOV_rs(edx.value, 12) # restore this # if self.result_value_saved_early: self.restore_result_value(save_edx=True) # if not we_are_translated(): # for testing: now we can accesss mc.SUB(ebp, imm(1)) # ebp again