def synthesize(self): # Okay. This code is not going to exceed 256 instructions (1kb). Knowing that, # the register contents can be safely placed at 0x3F400 in localstore, 3kb from # the top. The SPRE will place the instruction stream as close to the top as # possible. But since it is not going to be more than 1kb worth of instructions, # it will not overlap with the register contents. code = self.code spu.set_active_code(code) # Reload the instructions spu.sync(1) # Next instruction to execute lbl_op = code.size() spu.nop(0) # Placeholders for register store instructions for i in range(128): spu.stqa(i, 0xFD00 + (i * 4)) # spu.stqa(i, 0xFE00 + (i * 4)) # Stop for next command spu.stop(0x0FFF) lbl_regs = code.size() # Create space for the saved registers #for i in range(128): # # 16 bytes/register # spu.nop(0) # spu.lnop() # spu.nop(0) # spu.lnop() # Clearing active code here is important! spu.set_active_code(None) code.cache_code() code_size = len(code._prologue._code) * 4 self.xfer_size = code_size + (16 - (code_size) % 16) print 'xfer_size:', self.xfer_size self.code_lsa = (0x3FFFF - code_size) & 0xFFF80 self.lbl_op = lbl_op return
def synthesize(self): # Okay. This code is not going to exceed 256 instructions (1kb). Knowing that, # the register contents can be safely placed at 0x3F400 in localstore, 3kb from # the top. The SPRE will place the instruction stream as close to the top as # possible. But since it is not going to be more than 1kb worth of instructions, # it will not overlap with the register contents. code = self.code spu.set_active_code(code) # Reload the instructions spu.sync(1) # Next instruction to execute lbl_op = code.size() spu.nop(0) # Placeholders for register store instructions for i in range(128): spu.stqa(i, 0xFD00 + (i * 4)) # spu.stqa(i, 0xFE00 + (i * 4)) # Stop for next command spu.stop(0x0FFF) lbl_regs = code.size() # Create space for the saved registers #for i in range(128): # # 16 bytes/register # spu.nop(0) # spu.lnop() # spu.nop(0) # spu.lnop() # Clearing active code here is important! spu.set_active_code(None) code.cache_code() code_size = len(code._prologue._code) * 4 self.xfer_size = code_size + (16 - (code_size) % 16); print 'xfer_size:', self.xfer_size self.code_lsa = (0x3FFFF - code_size) & 0xFFF80; self.lbl_op = lbl_op return
def bi_bug(): """ A very simple SPU that computes 11 + 31 and returns 0xA on success. """ code = InstructionStream() proc = Processor() spu.set_active_code(code) # Acquire two registers stop_inst = SignedWord(0x200D) stop_addr = SignedWord(0x0) spu.stqa(stop_inst, 0x0) spu.bi(stop_addr) spu.stop(0x200A) r = proc.execute(code) assert (r == 0xD) return
def bi_bug(): """ A very simple SPU that computes 11 + 31 and returns 0xA on success. """ code = InstructionStream() proc = Processor() spu.set_active_code(code) # Acquire two registers stop_inst = SignedWord(0x200D) stop_addr = SignedWord(0x0) spu.stqa(stop_inst, 0x0) spu.bi(stop_addr) spu.stop(0x200A) r = proc.execute(code) assert r == 0xD return
def TestMFC(): size = 32 #data_array = array.array('I', range(size)) #data = synspu.aligned_memory(size, typecode = 'I') #data.copy_to(data_array.buffer_info()[0], len(data_array)) data = extarray.extarray('I', range(size)) code = synspu.InstructionStream() r_zero = code.acquire_register() r_ea_data = code.acquire_register() r_ls_data = code.acquire_register() r_size = code.acquire_register() r_tag = code.acquire_register() # Load zero util.load_word(code, r_zero, 0) print 'array ea: %X' % (data.buffer_info()[0]) print 'r_zero = %s, ea_data = %s, ls_data = %s, r_size = %s, r_tag = %s' % ( str(r_zero), str(r_ea_data), str(r_ls_data), str(r_size), str(r_tag)) # Load the effective address print 'test ea: %X' % data.buffer_info()[0] util.load_word(code, r_ea_data, data.buffer_info()[0]) # Load the size code.add(spu.ai(r_size, r_zero, size * 4)) # Load the tag code.add(spu.ai(r_tag, r_zero, 2)) # Load the lsa code.add(spu.ai(r_ls_data, r_zero, 0)) # Load the data into address 0 mfc_get(code, r_ls_data, r_ea_data, r_size, r_tag) # Set the tag bit to 2 mfc_write_tag_mask(code, 1 << 2) # Wait for the transfer to complete mfc_read_tag_status_all(code) # Increment the data values by 1 using an unrolled loop (no branches) r_current = code.acquire_register() for lsa in range(0, size * 4, 16): code.add(spu.lqa(r_current, (lsa >> 2))) code.add(spu.ai(r_current, r_current, 1)) code.add(spu.stqa(r_current, (lsa >> 2))) code.release_register(r_current) # Store the values back to main memory # Load the data into address 0 mfc_put(code, r_ls_data, r_ea_data, r_size, r_tag) # Set the tag bit to 2 mfc_write_tag_mask(code, 1 << 2) # Wait for the transfer to complete mfc_read_tag_status_all(code) # Cleanup code.release_register(r_zero) code.release_register(r_ea_data) code.release_register(r_ls_data) code.release_register(r_size) code.release_register(r_tag) # Stop for debugging # code.add(spu.stop(0xA)) # Execute the code proc = synspu.Processor() # code.print_code() #print data_array proc.execute(code) #data.copy_from(data_array.buffer_info()[0], len(data_array)) for i in range(size): assert (data[i] == i + 1) return
def TestMFC(): import corepy.lib.extarray as extarray import corepy.arch.spu.platform as synspu size = 32 #data_array = array.array('I', range(size)) #data = synspu.aligned_memory(size, typecode = 'I') #data.copy_to(data_array.buffer_info()[0], len(data_array)) data = extarray.extarray('I', range(size)) code = synspu.InstructionStream() r_zero = code.acquire_register() r_ea_data = code.acquire_register() r_ls_data = code.acquire_register() r_size = code.acquire_register() r_tag = code.acquire_register() # Load zero util.load_word(code, r_zero, 0) print 'array ea: %X' % (data.buffer_info()[0]) print 'r_zero = %s, ea_data = %s, ls_data = %s, r_size = %s, r_tag = %s' % ( str(r_zero), str(r_ea_data), str(r_ls_data), str(r_size), str(r_tag)) # Load the effective address print 'test ea: %X' % data.buffer_info()[0] util.load_word(code, r_ea_data, data.buffer_info()[0]) # Load the size code.add(spu.ai(r_size, r_zero, size * 4)) # Load the tag code.add(spu.ai(r_tag, r_zero, 2)) # Load the lsa code.add(spu.ai(r_ls_data, r_zero, 0)) # Load the data into address 0 mfc_get(code, r_ls_data, r_ea_data, r_size, r_tag) # Set the tag bit to 2 mfc_write_tag_mask(code, 1<<2); # Wait for the transfer to complete mfc_read_tag_status_all(code); # Increment the data values by 1 using an unrolled loop (no branches) r_current = code.acquire_register() for lsa in range(0, size * 4, 16): code.add(spu.lqa(r_current, (lsa >> 2))) code.add(spu.ai(r_current, r_current, 1)) code.add(spu.stqa(r_current, (lsa >> 2))) code.release_register(r_current) # Store the values back to main memory # Load the data into address 0 mfc_put(code, r_ls_data, r_ea_data, r_size, r_tag) # Set the tag bit to 2 mfc_write_tag_mask(code, 1<<2); # Wait for the transfer to complete mfc_read_tag_status_all(code); # Cleanup code.release_register(r_zero) code.release_register(r_ea_data) code.release_register(r_ls_data) code.release_register(r_size) code.release_register(r_tag) # Stop for debugging # code.add(spu.stop(0xA)) # Execute the code proc = synspu.Processor() # code.print_code() #print data_array proc.execute(code) #data.copy_from(data_array.buffer_info()[0], len(data_array)) for i in range(size): assert(data[i] == i + 1) return