示例#1
0
def TestInt():
    prgm = Program()
    code = prgm.get_stream()
    proc = Processor()

    spu.set_active_code(code)

    r13 = prgm.acquire_register(reg_name=13)
    r20 = prgm.acquire_register(reg_name=20)
    spu.ai(r20, r20, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)

    spu.stop(0x200D)

    prgm += code
    r = proc.execute(prgm, stop=True)  # , debug = True)

    #print 'int result:', r
    assert (r[0] == 0)
    assert (r[1] == 0x200D)
    return
示例#2
0
def TestInt():
  prgm = Program()
  code = prgm.get_stream()
  proc = Processor()

  spu.set_active_code(code)
  
  r13 = prgm.acquire_register(reg_name = 13)
  r20 = prgm.acquire_register(reg_name = 20)
  spu.ai(r20, r20, 13)
  spu.ai(r13, r13, 13)
  spu.ai(r13, r13, 13)
  spu.ai(r13, r13, 13)
  spu.ai(r13, r13, 13)
  spu.ai(r13, r13, 13)
  
  spu.stop(0x200D)

  prgm += code
  r = proc.execute(prgm, stop = True) # , debug = True)

  #print 'int result:', r
  assert(r[0] == 0)
  assert(r[1] == 0x200D)
  return
示例#3
0
    def execute(self, code, mode='int', debug=False, params=None, n_spus=1):

        if type(code) is ParallelInstructionStream:
            raise Exception(
                'DebugProcessor does not support ParallelInstructionStream')

        self.code = code

        if len(code) == 0:
            return None

        # Add the debug instructions - two each for normal instructions and branch targets
        self.debug_idx = self.code.size()
        self.code.add(spu.stop(DEBUG_STOP))

        self.debug_branch = self.code.size()
        self.code.add(spu.stop(DEBUG_STOP))

        self.debug_target_idx = self.code.size()
        self.code.add(spu.stop(DEBUG_STOP_TARGET))

        self.debug_target_branch = self.code.size()
        self.code.add(spu.stop(DEBUG_STOP_TARGET))

        # Cache the code here
        if not code._cached:
            code.cache_code()

        # Setup the parameter structure
        if params is None:
            params = spu_exec.ExecParams()

        addr = code._prologue.inst_addr()
        params.addr = addr
        params.size = len(code.render_code) * 4  # size in bytes

        self.params = params
        self.ea = code._prologue.inst_addr()
        self.lsa = (0x3FFFF - params.size) & 0xFFF80
        self.size = params.size + (16 - params.size % 16)
        self.last_pc = self.lsa
        self.last_stop = (1, )

        self.debug_lsa = (self.lsa + self.code.code_offset * 4 +
                          self.debug_idx * 4) >> 2
        self.debug_target_lsa = (self.lsa + self.code.code_offset * 4 +
                                 self.debug_target_idx * 4) >> 2

        mode = 'async'

        # TODO: Factor replacing into one function in case the first one is a branch
        self.replace(self.last_stop[0],
                     spu.bra(self.debug_lsa, ignore_active=True))

        self.spe_id = spe.Processor.execute(self, code, mode, debug, params)
        code.print_code()

        retval = self.wait_debug()

        return retval
示例#4
0
  def save_register(self, reg): # , branch_to_save = False):
    code = spu.get_active_code()

    offset = code.acquire_register()
    size = code.acquire_register()
    test = code.acquire_register()
    regs = [offset, size, test]
    
    spu.rotqbyi(offset, self.ls_buffer, 4)
    spu.rotqbyi(size,   self.ls_buffer, 8)

    spu.stqx(reg, self.ls_buffer, offset)
    
    spu.ai(offset, offset, 16)
    spu.ceq(test,  offset, size)

    spu.wrch(size, dma.SPU_WrOutMbox)
    spu.wrch(offset, dma.SPU_WrOutMbox)
    spu.wrch(test, dma.SPU_WrOutMbox)
    # !!! STOPPED HERE !!! THESE VALUES ARE WRONG !!!
    lbl_ls_full = code.size()
    spu.stop(0xB)
    self.save_ls_buffer(ls_size = size)

    spu.nop(0)
    code[lbl_ls_full] = spu.brz(test, (code.size() - lbl_ls_full), ignore_active = True)

    code.release_registers(regs)
    return
示例#5
0
    def save_register(self, reg):  # , branch_to_save = False):
        code = spu.get_active_code()

        offset = code.acquire_register()
        size = code.acquire_register()
        test = code.acquire_register()
        regs = [offset, size, test]

        spu.rotqbyi(offset, self.ls_buffer, 4)
        spu.rotqbyi(size, self.ls_buffer, 8)

        spu.stqx(reg, self.ls_buffer, offset)

        spu.ai(offset, offset, 16)
        spu.ceq(test, offset, size)

        spu.wrch(size, dma.SPU_WrOutMbox)
        spu.wrch(offset, dma.SPU_WrOutMbox)
        spu.wrch(test, dma.SPU_WrOutMbox)
        # !!! STOPPED HERE !!! THESE VALUES ARE WRONG !!!
        lbl_ls_full = code.size()
        spu.stop(0xB)
        self.save_ls_buffer(ls_size=size)

        spu.nop(0)
        code[lbl_ls_full] = spu.brz(test, (code.size() - lbl_ls_full),
                                    ignore_active=True)

        code.release_registers(regs)
        return
示例#6
0
 def test(self, cmp, count_var):
   code = spu.get_active_code()
   self._branch_idx = len(code)
   spu.stop(0xB)
   # spu.nop(0)
   self._cmp = cmp
   self._count = count_var
   return
示例#7
0
def TestParams():
    # Run this with a stop instruction and examine the registers
    prgm = Program()
    code = prgm.get_stream()
    proc = Processor()

    # r_sum = code.acquire_register(reg = 1)
    r_sum = prgm.gp_return
    r_current = prgm.acquire_register()

    # Zero the sum
    code.add(spu.xor(r_sum, r_sum, r_sum))

    for param in [
        spu_param_1,
        spu_param_2,
        spu_param_3,
        spu_param_4,
        spu_param_5,
        spu_param_6,
        spu_param_7,
        spu_param_8,
        spu_param_9,
        spu_param_10,
    ]:
        copy_param(code, r_current, param)
        code.add(spu.a(r_sum, r_sum, r_current))

    code.add(spu.ceqi(r_current, r_sum, 55))
    # code.add(spu.ori(code.gp_return, r_current, 0))

    code.add(spu.brz(r_current, 2))
    code.add(spu.stop(0x200A))
    code.add(spu.stop(0x200B))

    params = spu_exec.ExecParams()

    params.p1 = 1
    params.p2 = 2
    params.p3 = 3

    params.p4 = 4
    params.p5 = 5
    params.p6 = 6

    params.p7 = 7
    params.p8 = 8
    params.p9 = 9
    params.p10 = 10

    prgm += code
    r = proc.execute(prgm, params=params, stop=True)

    assert r[0] == 55
    assert r[1] == 0x200A
    # print 'int result:', r
    return
示例#8
0
  def execute(self, code, mode = 'int', debug = False, params = None, n_spus = 1):

    if type(code) is ParallelInstructionStream:
      raise Exception('DebugProcessor does not support ParallelInstructionStream')

    self.code = code
    
    if len(code) == 0:
      return None

    # Add the debug instructions - two each for normal instructions and branch targets
    self.debug_idx = self.code.size()
    self.code.add(spu.stop(DEBUG_STOP))

    self.debug_branch = self.code.size()    
    self.code.add(spu.stop(DEBUG_STOP))    

    self.debug_target_idx = self.code.size()
    self.code.add(spu.stop(DEBUG_STOP_TARGET))

    self.debug_target_branch = self.code.size()    
    self.code.add(spu.stop(DEBUG_STOP_TARGET))    

    # Cache the code here
    if not code._cached:
      code.cache_code()

    # Setup the parameter structure
    if params is None:
      params = spu_exec.ExecParams()

    addr = code._prologue.inst_addr()
    params.addr = addr
    params.size = len(code.render_code) * 4 # size in bytes

    self.params = params
    self.ea   = code._prologue.inst_addr()
    self.lsa  = (0x3FFFF - params.size) & 0xFFF80;
    self.size = params.size + (16 - params.size % 16);
    self.last_pc   = self.lsa
    self.last_stop = (1,)

    self.debug_lsa = (self.lsa + self.code.code_offset * 4 + self.debug_idx * 4) >> 2
    self.debug_target_lsa = (self.lsa + self.code.code_offset * 4 + self.debug_target_idx * 4) >> 2    

    mode = 'async'

    # TODO: Factor replacing into one function in case the first one is a branch
    self.replace(self.last_stop[0], spu.bra(self.debug_lsa, ignore_active = True))

    self.spe_id = spe.Processor.execute(self, code, mode, debug, params)
    code.print_code()

    retval = self.wait_debug()
    
    return retval
示例#9
0
 def test(self, cmp, score, x_off, y_off):
   code = spu.get_active_code()
   self._branch_idx = len(code)
   spu.stop(0xB)
   # spu.nop(0)
   self._cmp = cmp
   self._score = score
   self._x_off = x_off
   self._y_off = y_off
   return
示例#10
0
def TestParams():
    # Run this with a stop instruction and examine the registers
    prgm = Program()
    code = prgm.get_stream()
    proc = Processor()

    #r_sum = code.acquire_register(reg = 1)
    r_sum = prgm.gp_return
    r_current = prgm.acquire_register()

    # Zero the sum
    code.add(spu.xor(r_sum, r_sum, r_sum))

    for param in [
            spu_param_1, spu_param_2, spu_param_3, spu_param_4, spu_param_5,
            spu_param_6, spu_param_7, spu_param_8, spu_param_9, spu_param_10
    ]:
        copy_param(code, r_current, param)
        code.add(spu.a(r_sum, r_sum, r_current))

    code.add(spu.ceqi(r_current, r_sum, 55))
    #code.add(spu.ori(code.gp_return, r_current, 0))

    code.add(spu.brz(r_current, 2))
    code.add(spu.stop(0x200A))
    code.add(spu.stop(0x200B))

    params = spu_exec.ExecParams()

    params.p1 = 1
    params.p2 = 2
    params.p3 = 3

    params.p4 = 4
    params.p5 = 5
    params.p6 = 6

    params.p7 = 7
    params.p8 = 8
    params.p9 = 9
    params.p10 = 10

    prgm += code
    r = proc.execute(prgm, params=params, stop=True)

    assert (r[0] == 55)
    assert (r[1] == 0x200A)
    # print 'int result:', r
    return
示例#11
0
def TestDebug():
  prgm = Program()
  code = prgm.get_stream()
  proc = DebugProcessor()

  spu.set_active_code(code)

  ra = code.acquire_register()
  rb = code.acquire_register()
  rc = code.acquire_register()
  rd = code.acquire_register()
  re = code.acquire_register()
  rf = code.acquire_register()
  rg = code.acquire_register()
  rh = code.acquire_register()  

  spu.ai(ra, 0, 14)
  spu.ai(rb, 0, 13)
  spu.ai(rc, 0, 14)
  spu.brnz(14, 3)
  spu.ai(rd, 0, 15)
  spu.ai(re, 0, 16)
  spu.ai(rf, 0, 17)
  spu.ai(rg, 0, 18)
  spu.ai(rh, 0, 19)    
  spu.nop(0)

  spu.stop(0x200A)

  prgm += code
  r = proc.execute(prgm) # , debug = True)

  r = proc.nexti()
  r = proc.nexti()
  r = proc.nexti()
  r = proc.nexti()
    
  while r != None:
    r = proc.nexti()
    if r is not None:
      regs = proc.dump_regs()
      print '******', regs[122:]
    
  assert(r == None)
  print 'int result:', r
  # while True:
  #   pass
  return
示例#12
0
def TestDebug():
    prgm = Program()
    code = prgm.get_stream()
    proc = DebugProcessor()

    spu.set_active_code(code)

    ra = code.acquire_register()
    rb = code.acquire_register()
    rc = code.acquire_register()
    rd = code.acquire_register()
    re = code.acquire_register()
    rf = code.acquire_register()
    rg = code.acquire_register()
    rh = code.acquire_register()

    spu.ai(ra, 0, 14)
    spu.ai(rb, 0, 13)
    spu.ai(rc, 0, 14)
    spu.brnz(14, 3)
    spu.ai(rd, 0, 15)
    spu.ai(re, 0, 16)
    spu.ai(rf, 0, 17)
    spu.ai(rg, 0, 18)
    spu.ai(rh, 0, 19)
    spu.nop(0)

    spu.stop(0x200A)

    prgm += code
    r = proc.execute(prgm)  # , debug = True)

    r = proc.nexti()
    r = proc.nexti()
    r = proc.nexti()
    r = proc.nexti()

    while r != None:
        r = proc.nexti()
        if r is not None:
            regs = proc.dump_regs()
            print '******', regs[122:]

    assert (r == None)
    print 'int result:', r
    # while True:
    #   pass
    return
示例#13
0
 def _synthesize_epilogue(self):
     """
 Add a stop signal with return type 0x2000 (EXIT_SUCCESS) to the
 instruction stream epilogue. (BE Handbook, p. 422).
 """
     self._epilogue = [self.lbl_epilogue, spu.stop(0x2000, ignore_active=True)]
     return
示例#14
0
  def cache_code(self):
    """
    Add a stop signal with return type 0x2000 (EXIT_SUCCESS) to the
    end if the instruction stream. (BE Handbook, p. 422).
    """

    # Generate the prologue
    self._synthesize_prologue()

    # Don't have a real epilogue.
    self.add(spu.stop(0x2000))
    # self._check_alignment(self._code, 'spu code')

    # self.exec_module.make_executable(self._code.buffer_info()[0], len(self._code))

    # Append our instructions to the prologue's, first making sure the alignment is correct.
    if len(self._prologue._code) % 2 == 1: # Odd number of instructions
      self._prologue.add(spu.lnop(0))

    self._prologue._code.extend(self._code)
    self._prologue._check_alignment(self._prologue._code, 'spu prologue')
    
    self._epilogue = self    
    self._cached = True
    return
示例#15
0
    def cache_code(self):
        """
    Add a stop signal with return type 0x2000 (EXIT_SUCCESS) to the
    end if the instruction stream. (BE Handbook, p. 422).
    """

        # Generate the prologue
        self._synthesize_prologue()

        # Don't have a real epilogue.
        self.add(spu.stop(0x2000))
        # self._check_alignment(self._code, 'spu code')

        # self.exec_module.make_executable(self._code.buffer_info()[0], len(self._code))

        # Append our instructions to the prologue's, first making sure the alignment is correct.
        if len(self._prologue._code) % 2 == 1:  # Odd number of instructions
            self._prologue.add(spu.lnop(0))

        self._prologue._code.extend(self._code)
        self._prologue._check_alignment(self._prologue._code, 'spu prologue')

        self._epilogue = self
        self._cached = True
        return
示例#16
0
def TestParams():
  # Run this with a stop instruction and examine the registers
  code = InstructionStream()
  proc = Processor()

  # code.add(spu.stop(0xA))
  code.add(spu.stop(0x200D))
  
  params = ExecParams()

  params.p1  = 1 
  params.p2  = 2 
  params.p3  = 3 

  params.p4  = 4 
  params.p5  = 5 
  params.p6  = 6 

  params.p7  = 7 
  params.p8  = 8 
  params.p9  = 9 
  params.p10 = 10


  r = proc.execute(code, params = params)
  # print 'int result:', r
  # while True:
  #   pass
  return
示例#17
0
def TestParams():
    # Run this with a stop instruction and examine the registers
    code = InstructionStream()
    proc = Processor()

    # code.add(spu.stop(0xA))
    code.add(spu.stop(0x200D))

    params = ExecParams()

    params.p1 = 1
    params.p2 = 2
    params.p3 = 3

    params.p4 = 4
    params.p5 = 5
    params.p6 = 6

    params.p7 = 7
    params.p8 = 8
    params.p9 = 9
    params.p10 = 10

    r = proc.execute(code, params=params)
    # print 'int result:', r
    # while True:
    #   pass
    return
示例#18
0
  def synthesize(self):
    # Okay.  This code is not going to exceed 256 instructions (1kb).  Knowing that,
    # the register contents can be safely placed at 0x3F400 in localstore, 3kb from
    # the top.  The SPRE will place the instruction stream as close to the top as
    # possible.  But since it is not going to be more than 1kb worth of instructions,
    # it will not overlap with the register contents.

    code = self.code
    spu.set_active_code(code)
    
    # Reload the instructions
    spu.sync(1)

    # Next instruction to execute
    lbl_op = code.size()
    spu.nop(0)    

    # Placeholders for register store instructions
    for i in range(128):
       spu.stqa(i, 0xFD00 + (i * 4))
    #  spu.stqa(i, 0xFE00 + (i * 4))

    # Stop for next command
    spu.stop(0x0FFF) 

    lbl_regs = code.size()
    
    # Create space for the saved registers
    #for i in range(128):
    #  # 16 bytes/register
    #  spu.nop(0)
    #  spu.lnop()
    #  spu.nop(0)
    #  spu.lnop()

    # Clearing active code here is important!
    spu.set_active_code(None)
    code.cache_code()

    code_size = len(code._prologue._code) * 4
    self.xfer_size = code_size  + (16 - (code_size) % 16);
    print 'xfer_size:', self.xfer_size

    self.code_lsa = (0x3FFFF - code_size) & 0xFFF80;
    self.lbl_op = lbl_op
    return
示例#19
0
文件: ispu.py 项目: tmaone/efi
    def synthesize(self):
        # Okay.  This code is not going to exceed 256 instructions (1kb).  Knowing that,
        # the register contents can be safely placed at 0x3F400 in localstore, 3kb from
        # the top.  The SPRE will place the instruction stream as close to the top as
        # possible.  But since it is not going to be more than 1kb worth of instructions,
        # it will not overlap with the register contents.

        code = self.code
        spu.set_active_code(code)

        # Reload the instructions
        spu.sync(1)

        # Next instruction to execute
        lbl_op = code.size()
        spu.nop(0)

        # Placeholders for register store instructions
        for i in range(128):
            spu.stqa(i, 0xFD00 + (i * 4))
        #  spu.stqa(i, 0xFE00 + (i * 4))

        # Stop for next command
        spu.stop(0x0FFF)

        lbl_regs = code.size()

        # Create space for the saved registers
        #for i in range(128):
        #  # 16 bytes/register
        #  spu.nop(0)
        #  spu.lnop()
        #  spu.nop(0)
        #  spu.lnop()

        # Clearing active code here is important!
        spu.set_active_code(None)
        code.cache_code()

        code_size = len(code._prologue._code) * 4
        self.xfer_size = code_size + (16 - (code_size) % 16)
        print 'xfer_size:', self.xfer_size

        self.code_lsa = (0x3FFFF - code_size) & 0xFFF80
        self.lbl_op = lbl_op
        return
示例#20
0
 def _synthesize_epilogue(self):
     """
 Add a stop signal with return type 0x2000 (EXIT_SUCCESS) to the
 instruction stream epilogue. (BE Handbook, p. 422).
 """
     self._epilogue = [self.lbl_epilogue]
     self._epilogue.append(spu.stop(0x2000, ignore_active=True))
     return
示例#21
0
def TestParams():
    # Run this with a stop instruction and examine the registers
    code = InstructionStream()
    proc = Processor()

    r_sum = code.acquire_register()
    r_current = code.acquire_register()

    # Zero the sum
    code.add(spu.xor(r_sum, r_sum, r_sum))

    for param in [
            spu_param_1, spu_param_2, spu_param_3, spu_param_4, spu_param_5,
            spu_param_6, spu_param_7, spu_param_8, spu_param_9, spu_param_10
    ]:
        copy_param(code, r_current, param)
        code.add(spu.a(r_sum, r_sum, r_current))

    code.add(spu.ceqi(r_current, r_sum, 55))

    code.add(spu.brz(r_current, 2))
    code.add(spu.stop(0x200A))
    code.add(spu.stop(0x200B))

    params = spu_exec.ExecParams()

    params.p1 = 1
    params.p2 = 2
    params.p3 = 3

    params.p4 = 4
    params.p5 = 5
    params.p6 = 6

    params.p7 = 7
    params.p8 = 8
    params.p9 = 9
    params.p10 = 10

    r = proc.execute(code, params=params)

    assert (r == 0xA)
    # print 'int result:', r
    # while True:
    #   pass
    return
示例#22
0
def TestInt2(i0 = 0, i1 = 1):
  i2 = i0 + i1
  i3 = i1 + i2
  
  code = InstructionStream()
  proc = Processor()

  r_loop = 4
  r_address = 5
  r0 = 6
  r1 = 7
  r2 = 8
  r3 = 9
  
  # Load arguments into a quadword
  
  #################
  # Pack quadword #
  #################

  def load_value_int32(code, reg, value, clear = False):
    # obviously, value should be 32 bit integer
    code.add(spu.ilhu(reg, value / pow(2, 16)))      # immediate load halfword upper
    code.add(spu.iohl(reg, value % pow(2, 16))) # immediate or halfword lower
    if clear:
      code.add(spu.shlqbyi(reg, reg, 12)) # shift left qw by bytes, clears right bytes
    return

  load_value_int32(code, r0, i0, True)
  load_value_int32(code, r1, i1, True)
  code.add(spu.rotqbyi(r1, r1, 12)) # rotate qw by bytes
  load_value_int32(code, r2, i2, True)
  code.add(spu.rotqbyi(r2, r2, 8))
  load_value_int32(code, r3, i3, True)
  code.add(spu.rotqbyi(r3, r3, 4))
  code.add(spu.a(r0, r0, r1))
  code.add(spu.a(r0, r0, r2))
  code.add(spu.a(r0, r0, r3)) 

  ##########

  # Main loop to calculate Fibnoccai sequence

  load_value_int32(code, r_address, pow(2, 16), clear_bits = False) # start at 64K

  load_value_int32(code, r_loop, 0, clear_bits = False)
  start_label = code.size() + 1

  code.add(spu.sfi(r_loop, r_loop, 1))
  code.add(spu.brnz(r_loop, (-(next - start_label) * spu.WORD_SIZE)))

  code.add(spu.stop(0x2005))

  r = proc.execute(code)
  # assert(r == 12)
  # print 'int result:', r

  return
示例#23
0
def TestInt2(i0 = 0, i1 = 1):
  i2 = i0 + i1
  i3 = i1 + i2
  
  code = InstructionStream()
  proc = Processor()

  r_loop = 4
  r_address = 5
  r0 = 6
  r1 = 7
  r2 = 8
  r3 = 9
  
  # Load arguments into a quadword
  
  #################
  # Pack quadword #
  #################

  def load_value_int32(code, reg, value, clear = False):
    # obviously, value should be 32 bit integer
    code.add(spu.ilhu(reg, value / pow(2, 16)))      # immediate load halfword upper
    code.add(spu.iohl(reg, value % pow(2, 16))) # immediate or halfword lower
    if clear:
      code.add(spu.shlqbyi(reg, reg, 12)) # shift left qw by bytes, clears right bytes
    return

  load_value_int32(code, r0, i0, True)
  load_value_int32(code, r1, i1, True)
  code.add(spu.rotqbyi(r1, r1, 12)) # rotate qw by bytes
  load_value_int32(code, r2, i2, True)
  code.add(spu.rotqbyi(r2, r2, 8))
  load_value_int32(code, r3, i3, True)
  code.add(spu.rotqbyi(r3, r3, 4))
  code.add(spu.a(r0, r0, r1))
  code.add(spu.a(r0, r0, r2))
  code.add(spu.a(r0, r0, r3)) 

  ##########

  # Main loop to calculate Fibnoccai sequence

  load_value_int32(code, r_address, pow(2, 16), clear_bits = False) # start at 64K

  load_value_int32(code, r_loop, 0, clear_bits = False)
  start_label = code.size() + 1

  code.add(spu.sfi(r_loop, r_loop, 1))
  code.add(spu.brnz(r_loop, (-(next - start_label) * spu.WORD_SIZE)))

  code.add(spu.stop(0x2005))

  r = proc.execute(code)
  # assert(r == 12)
  # print 'int result:', r

  return
示例#24
0
    def GenerateStream(self, step=None):
        prgm = env.Program()
        code = prgm.get_stream()
        txt = self.editCtrl.GetText().split('\n')
        txtlen = len(txt)

        for i in xrange(0, txtlen):
            # For the stop case, want all instructions except the current one to be
            # STOP instructions.
            cmd = txt[i].strip()
            if step != None and i != step:
                if cmd == "" or cmd[0] == '#':
                    continue
                if cmd[-1] == ":":
                    # Label - better parsing?
                    #code.add(spe.Label(cmd[:-1]))
                    code.add(code.prgm.get_label(cmd[:-1]))
                else:
                    code.add(spu.stop(0x2FFF))
                continue

            if self.editCtrl.IsBreakSet(i):
                code.add(spu.stop(0x2FFF))
                continue

            if cmd != "" and cmd[0] != '#':
                inst = None
                if cmd[-1] == ":":
                    # Label - better parsing?
                    #inst = spe.Label(cmd[:-1])
                    inst = code.prgm.get_label(cmd[:-1])
                else:
                    # Instruction
                    strcmd = re.sub("Label\((.*?)\)",
                                    "code.prgm.get_label('\\1')", cmd)
                    try:
                        inst = eval('spu.%s' % strcmd)
                    except:
                        print 'Error creating instruction: %s' % cmd

                code.add(inst)

        prgm.add(code)
        prgm.cache_code()
        return code
示例#25
0
  def GenerateStream(self, step = None):
    prgm = env.Program()
    code = prgm.get_stream()
    txt = self.editCtrl.GetText().split('\n')
    txtlen = len(txt)

    for i in xrange(0, txtlen):
      # For the stop case, want all instructions except the current one to be
      # STOP instructions.
      cmd = txt[i].strip()
      if step != None and i != step:
        if cmd == "" or cmd[0] == '#':
          continue
        if cmd[-1] == ":":
          # Label - better parsing?
          #code.add(spe.Label(cmd[:-1]))
          code.add(code.prgm.get_label(cmd[:-1]))
        else:
          code.add(spu.stop(0x2FFF))
        continue

      if self.editCtrl.IsBreakSet(i):
        code.add(spu.stop(0x2FFF))
        continue

      if cmd != "" and cmd[0] != '#':
        inst = None
        if cmd[-1] == ":":
          # Label - better parsing?
          #inst = spe.Label(cmd[:-1])
          inst = code.prgm.get_label(cmd[:-1])
        else:
          # Instruction
          strcmd = re.sub("Label\((.*?)\)", "code.prgm.get_label('\\1')", cmd)
          try:
            inst = eval('spu.%s' % strcmd)
          except:
            print 'Error creating instruction: %s' % cmd

        code.add(inst)

    prgm.add(code)
    prgm.cache_code()
    return code
示例#26
0
def SimpleSPU():
    """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
    prgm = env.Program()
    code = prgm.get_stream()
    proc = env.Processor()

    spu.set_active_code(code)

    # Acquire two registers
    #x    = code.acquire_register()
    x = code.gp_return
    test = prgm.acquire_register(reg_name=55)

    spu.xor(x, x, x)  # zero x
    spu.ai(x, x, 11)  # x = x + 11
    spu.ai(x, x, 31)  # x = x + 31

    spu.ceqi(test, x, 42)  # test = (x == 42)

    # If test is false (all 0s), skip the stop(0x100A) instruction
    spu.brz(test, 2)
    spu.stop(0x100A)
    spu.stop(0x100B)

    prgm.add(code)
    prgm.print_code(hex=True)
    r = proc.execute(prgm, mode='int', stop=True, debug=True)
    assert (r[0] == 42)
    assert (r[1] == 0x100A)

    prgm = env.Program()
    code = prgm.get_stream()
    spu.set_active_code(code)

    util.load_float(code, code.fp_return, 3.14)

    prgm.add(code)
    prgm.print_code(hex=True)
    r = proc.execute(prgm, mode='fp')
    print r
    return
示例#27
0
def SimpleSPU():
  """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
  prgm = env.Program()
  code = prgm.get_stream()
  proc = env.Processor()

  spu.set_active_code(code)

  # Acquire two registers
  #x    = code.acquire_register()
  x = code.gp_return
  test = prgm.acquire_register(reg_name = 55)

  spu.xor(x, x, x) # zero x
  spu.ai(x, x, 11) # x = x + 11
  spu.ai(x, x, 31) # x = x + 31

  spu.ceqi(test, x, 42) # test = (x == 42)

  # If test is false (all 0s), skip the stop(0x100A) instruction
  spu.brz(test, 2)
  spu.stop(0x100A)
  spu.stop(0x100B)

  prgm.add(code) 
  prgm.print_code(hex = True) 
  r = proc.execute(prgm, mode = 'int', stop = True, debug = True) 
  assert(r[0] == 42)
  assert(r[1] == 0x100A)

  prgm = env.Program()
  code = prgm.get_stream()
  spu.set_active_code(code)

  util.load_float(code, code.fp_return, 3.14)

  prgm.add(code)
  prgm.print_code(hex = True)
  r = proc.execute(prgm, mode = 'fp')
  print r
  return
示例#28
0
def bi_bug():
    """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
    code = InstructionStream()
    proc = Processor()

    spu.set_active_code(code)

    # Acquire two registers
    stop_inst = SignedWord(0x200D)
    stop_addr = SignedWord(0x0)

    spu.stqa(stop_inst, 0x0)
    spu.bi(stop_addr)
    spu.stop(0x200A)

    r = proc.execute(code)
    assert r == 0xD

    return
示例#29
0
文件: bi.py 项目: tmaone/efi
def bi_bug():
    """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
    code = InstructionStream()
    proc = Processor()

    spu.set_active_code(code)

    # Acquire two registers
    stop_inst = SignedWord(0x200D)
    stop_addr = SignedWord(0x0)

    spu.stqa(stop_inst, 0x0)
    spu.bi(stop_addr)
    spu.stop(0x200A)

    r = proc.execute(code)
    assert (r == 0xD)

    return
示例#30
0
def TestInt():
    code = InstructionStream()
    proc = Processor()

    spu.set_active_code(code)

    r13 = code.acquire_register(reg=13)
    r20 = code.acquire_register(reg=20)
    spu.ai(r20, r20, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)

    spu.stop(0x200D)

    r = proc.execute(code, stop=True)  # , debug = True)

    #print 'int result:', r
    assert (r[0] == 0)
    assert (r[1] == 0x200D)
    return
示例#31
0
def TestInt():
    code = InstructionStream()
    proc = Processor()

    spu.set_active_code(code)

    r13 = code.acquire_register(reg=13)
    r20 = code.acquire_register(reg=20)
    spu.ai(r20, r20, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)

    spu.stop(0x200D)

    code.print_code()
    r = proc.execute(code)  # , debug = True)
    print 'int result:', r
    # while True:
    #   pass
    return
示例#32
0
def TestInt():
  code = InstructionStream()
  proc = Processor()

  spu.set_active_code(code)
  
  r13 = code.acquire_register(reg = 13)
  r20 = code.acquire_register(reg = 20)
  spu.ai(r20, r20, 13)
  spu.ai(r13, r13, 13)
  spu.ai(r13, r13, 13)
  spu.ai(r13, r13, 13)
  spu.ai(r13, r13, 13)
  spu.ai(r13, r13, 13)
  
  spu.stop(0x200D)

  code.print_code()
  r = proc.execute(code) # , debug = True)
  print 'int result:', r
  # while True:
  #   pass
  return
示例#33
0
def TestParallel():
    # Run this with a stop instruction and examine the registers and memory
    code = ParallelInstructionStream()
    proc = Processor()

    code.raw_data_size = 128 * 8

    r = code.acquire_register()
    code.add(spu.ai(r, r, 0xCAFE))
    code.add(spu.ai(r, r, 0xBABE))
    code.add(spu.stop(0x2000))

    r = proc.execute(code, mode='async', n_spus=6)

    for speid in r:
        proc.join(speid)

    assert (True)
    return
示例#34
0
def TestParallel():
  # Run this with a stop instruction and examine the registers and memory
  code = ParallelInstructionStream()
  proc = Processor()

  code.raw_data_size = 128*8

  r = code.acquire_register()
  code.add(spu.ai(r, r, 0xCAFE))
  code.add(spu.ai(r, r, 0xBABE))    
  code.add(spu.stop(0x2000))

  r = proc.execute(code, mode='async', n_spus = 6)

  for speid in r:
    proc.join(speid)

  assert(True)
  return
示例#35
0
def TestParallel():
    # Run this with a stop instruction and examine the registers and memory
    prgm = ParallelProgram()
    code = prgm.get_stream()
    proc = Processor()

    code.raw_data_size = 128 * 8

    r = prgm.acquire_register()
    code.add(spu.ai(r, r, 0x2FE))
    code.add(spu.ai(r, r, 0x2BE))
    code.add(spu.stop(0x1FFF))

    prgm += code
    r = proc.execute(prgm, async=True, mode='void', n_spus=6)

    for speid in r:
        proc.join(speid)

    assert (True)
    return
示例#36
0
def TestParallel():
  # Run this with a stop instruction and examine the registers and memory
  prgm = ParallelProgram()
  code = prgm.get_stream()
  proc = Processor()

  code.raw_data_size = 128*8

  r = prgm.acquire_register()
  code.add(spu.ai(r, r, 0x2FE))
  code.add(spu.ai(r, r, 0x2BE))    
  code.add(spu.stop(0x1FFF))

  prgm += code
  r = proc.execute(prgm, async = True, mode='void', n_spus = 6)

  for speid in r:
    proc.join(speid)

  assert(True)
  return
示例#37
0
def SimpleSPU():
  """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
  prgm = env.Program()
  code = prgm.get_stream()
  proc = env.Processor()

  spu.set_active_code(code)
  

  # Acquire two registers
  #x    = code.acquire_register()
  x = prgm.gp_return
  test = prgm.acquire_register()

  lbl_brz = prgm.get_label("BRZ")
  lbl_skip = prgm.get_label("SKIP")

  spu.hbrr(lbl_brz, lbl_skip)
  spu.xor(x, x, x) # zero x
  spu.ai(x, x, 11) # x = x + 11
  spu.ai(x, x, 31) # x = x + 31

  spu.ceqi(test, x, 42) # test = (x == 42)

  # If test is false (all 0s), skip the stop(0x100A) instruction
  code.add(lbl_brz)
  spu.brz(test, lbl_skip)
  spu.stop(0x100A)
  code.add(lbl_skip)
  spu.stop(0x100B)

  prgm.add(code) 
  prgm.print_code() 
  r = proc.execute(prgm, mode = 'int', stop = True) 
  print "ret", r
  assert(r[0] == 42)
  assert(r[1] == 0x100A)


  prgm = env.Program()
  code = prgm.get_stream()
  spu.set_active_code(code)

  lbl_loop = prgm.get_label("LOOP")
  lbl_break = prgm.get_label("BREAK")

  r_cnt = prgm.acquire_register()
  r_stop = prgm.acquire_register()
  r_cmp = prgm.acquire_register()
  r_foo = prgm.gp_return

  spu.ori(r_foo, prgm.r_zero, 0)
  spu.ori(r_cnt, prgm.r_zero, 0)
  util.load_word(code, r_stop, 10)

  code.add(lbl_loop)

  spu.ceq(r_cmp, r_cnt, r_stop)
  spu.brnz(r_cmp, lbl_break)
  spu.ai(r_cnt, r_cnt, 1)

  spu.a(r_foo, r_foo, r_cnt)

  spu.br(lbl_loop)
  code.add(lbl_break)

  prgm.add(code)
  prgm.print_code()
  r = proc.execute(prgm, mode = 'int', stop = True)
  print "ret", r
  assert(r[0] == 55)

  return
示例#38
0
文件: spu_labels.py 项目: tmaone/efi
def SimpleSPU():
    """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
    code = InstructionStream()
    proc = Processor()

    spu.set_active_code(code)

    # Acquire two registers
    #x    = code.acquire_register()
    x = code.gp_return
    test = code.acquire_register()

    lbl_brz = code.get_label("BRZ")
    lbl_skip = code.get_label("SKIP")

    spu.hbrr(lbl_brz, lbl_skip)
    spu.xor(x, x, x)  # zero x
    spu.ai(x, x, 11)  # x = x + 11
    spu.ai(x, x, 31)  # x = x + 31

    spu.ceqi(test, x, 42)  # test = (x == 42)

    # If test is false (all 0s), skip the stop(0x100A) instruction
    code.add(lbl_brz)
    spu.brz(test, lbl_skip)
    spu.stop(0x100A)
    code.add(lbl_skip)
    spu.stop(0x100B)

    code.print_code(hex=True, pro=True, epi=True)
    r = proc.execute(code, mode='int', stop=True)
    print "ret", r
    assert (r[0] == 42)
    assert (r[1] == 0x100A)

    code = InstructionStream()
    spu.set_active_code(code)

    lbl_loop = code.get_label("LOOP")
    lbl_break = code.get_label("BREAK")

    r_cnt = code.acquire_register()
    r_stop = code.acquire_register()
    r_cmp = code.acquire_register()
    r_foo = code.gp_return

    spu.ori(r_foo, code.r_zero, 0)
    spu.ori(r_cnt, code.r_zero, 0)
    util.load_word(code, r_stop, 10)

    code.add(lbl_loop)

    spu.ceq(r_cmp, r_cnt, r_stop)
    spu.brnz(r_cmp, lbl_break)
    spu.ai(r_cnt, r_cnt, 1)

    spu.a(r_foo, r_foo, r_cnt)

    spu.br(lbl_loop)
    code.add(lbl_break)

    code.print_code()
    r = proc.execute(code, mode='int', stop=True)
    print "ret", r
    assert (r[0] == 55)

    return
示例#39
0
  def dump_regs(self):
    mbox   = 28 # write out mbox channel

    # Pseudo-code:
    #  1) Save code is: (do this as an array, not an instruction stream)
    save_size = 128 * 2 + 4
    save_code = extarray.extarray('I', range(save_size))
    
    for i in range(0, 128 * 2, 2):
      save_code[i] = spu.wrch(i / 2, mbox, ignore_active = True).render()
      save_code[i + 1] = spu.stop(0x6, ignore_active = True).render()

    # branch back to the debug stop
    save_code[128 * 2] = spu.stop(0x7, ignore_active = True).render()
    ret = spu.bra(self.debug_lsa, ignore_active = True)
    save_code[128 * 2 + 1] = ret.render()

    #aligned_save_code = aligned_memory(save_size, typecode = 'I')
    #aligned_save_code.copy_to(save_code.buffer_info()[0], len(save_code))

    #  2) Save lsa[0:len(save_code)]
    # TODO: do this with putb

    #  3) Push save code to lsa[0:]
    tag = 2
    spu_exec.spu_getb(self.spe_id, 0, save_code.buffer_info()[0], save_size * 4, tag, 0, 0)
    spu_exec.read_tag_status_all(self.spe_id, 1 << tag);
    
    #  3) Replace the debug branch with a branch to 0
    self.replace(self.debug_branch, spu.bra(0, ignore_active = True))
    self.get_instructions()

    #  4) Resume
    self.resume(self.spe_id)    

    #  5) Read the register values and send the ok signal
    regs = []
    for i in range(128):
      while spu_exec.stat_out_mbox(self.spe_id) == 0: pass
      value = spu_exec.read_out_mbox(self.spe_id)
      regs.append(value)

      r = spu_exec.wait_stop_event(self.spe_id)
      self.resume(self.spe_id)

    r = spu_exec.wait_stop_event(self.spe_id)
    print 'next stop', r
    #  6) Restore code at original pc
    self.restore(self.debug_branch)
    self.get_instructions()

    #  7) Restore lsa[0:len(save_code)]
    # TODO: do this with putb

    #  8) Resume
    # self.resume(self.spe_id)    
    # r = spu_exec.wait_stop_event(self.spe_id)
    self.resume(self.spe_id)
    r = self.wait_debug()

    return regs
示例#40
0
    def dump_regs(self):
        mbox = 28  # write out mbox channel

        # Pseudo-code:
        #  1) Save code is: (do this as an array, not an instruction stream)
        save_size = 128 * 2 + 4
        save_code = extarray.extarray('I', range(save_size))

        for i in range(0, 128 * 2, 2):
            save_code[i] = spu.wrch(i / 2, mbox, ignore_active=True).render()
            save_code[i + 1] = spu.stop(0x6, ignore_active=True).render()

        # branch back to the debug stop
        save_code[128 * 2] = spu.stop(0x7, ignore_active=True).render()
        ret = spu.bra(self.debug_lsa, ignore_active=True)
        save_code[128 * 2 + 1] = ret.render()

        #aligned_save_code = aligned_memory(save_size, typecode = 'I')
        #aligned_save_code.copy_to(save_code.buffer_info()[0], len(save_code))

        #  2) Save lsa[0:len(save_code)]
        # TODO: do this with putb

        #  3) Push save code to lsa[0:]
        tag = 2
        spu_exec.spu_getb(self.spe_id, 0,
                          save_code.buffer_info()[0], save_size * 4, tag, 0, 0)
        spu_exec.read_tag_status_all(self.spe_id, 1 << tag)

        #  3) Replace the debug branch with a branch to 0
        self.replace(self.debug_branch, spu.bra(0, ignore_active=True))
        self.get_instructions()

        #  4) Resume
        self.resume(self.spe_id)

        #  5) Read the register values and send the ok signal
        regs = []
        for i in range(128):
            while spu_exec.stat_out_mbox(self.spe_id) == 0:
                pass
            value = spu_exec.read_out_mbox(self.spe_id)
            regs.append(value)

            r = spu_exec.wait_stop_event(self.spe_id)
            self.resume(self.spe_id)

        r = spu_exec.wait_stop_event(self.spe_id)
        print 'next stop', r
        #  6) Restore code at original pc
        self.restore(self.debug_branch)
        self.get_instructions()

        #  7) Restore lsa[0:len(save_code)]
        # TODO: do this with putb

        #  8) Resume
        # self.resume(self.spe_id)
        # r = spu_exec.wait_stop_event(self.spe_id)
        self.resume(self.spe_id)
        r = self.wait_debug()

        return regs
示例#41
0
def TestTanimotoBlock(n_vecs = 4):
  code = synspu.InstructionStream()
  proc = synspu.Processor()

  code.set_debug(True)
  spu.set_active_code(code)
  
  tb = TanimotoBlock()
  ls_save = LocalSave()
  mm_save = MemorySave()

  code.set_debug(True)

  # Input block parameters
  m = 128
  n = 64
  # n_vecs = 9
  n_bits = 128 * n_vecs

  # Main memory results buffer
  # max_results = 2**16
  max_results = 16384
  words_per_result = 4

  mm_results_data = array.array('I', [12 for i in range(max_results * words_per_result)])
  #mm_results_buffer = synspu.aligned_memory(max_results * words_per_result, typecode = 'I')
  # mm_results_buffer.copy_to(mm_results_data.buffer_info()[0], len(mm_results_data))

  mm_results = spuiter.memory_desc('I')
  #mm_results.from_array(mm_results_buffer)
  mm_results.from_array(mm_results_data)

  mm_save.set_md_save_buffer(mm_results)
    
  # Local Results buffer
  buffer_size = var.SignedWord(16384)
  buffer_addr = var.SignedWord(m * n * n_vecs * 4)
  ls_results = spuiter.memory_desc('B')
  ls_results.set_size_reg(buffer_size)
  ls_results.set_addr_reg(buffer_addr)

  ls_save.set_md_results(ls_results)
  ls_save.set_mm_save_op(mm_save)

  # Setup the TanimotoBlock class
  tb.set_n_bits(n_bits)
  tb.set_block_size(m, n)

  tb.set_x_addr(0)
  tb.set_y_addr(m * n_vecs * 16)
  tb.set_save_op(ls_save)

  # Main test loop
  n_samples = 10000
  for samples in spuiter.syn_iter(code, n_samples):
    tb.synthesize(code)

  spu.wrch(buffer_size, dma.SPU_WrOutMbox)
  
  spu.stop(0x2000) 

  # "Function" Calls
  ls_save.block()
  mm_save.block()

  # code.print_code()
  start = time.time()
  spe_id = proc.execute(code, async=True)
  
  while synspu.spu_exec.stat_out_mbox(spe_id) == 0: pass
  # print 'tb said: 0x%X' % (synspu.spu_exec.read_out_mbox(spe_id))
  stop = time.time()

  # mm_results_buffer.copy_from(mm_results_data.buffer_info()[0], len(mm_results_data))
  
  proc.join(spe_id)
  total = stop - start
  bits_sec = (m * n * n_bits * n_samples) / total / 1e9
  ops_per_compare = 48 * 4 + 8  # 48 SIMD instructions, 8 scalar
  insts_per_compare = 56
  gops = (m * n * n_vecs * n_samples * ops_per_compare ) / total / 1e9
  ginsts = (m * n * n_vecs * n_samples * insts_per_compare ) / total / 1e9  
  print '%.6f sec, %.2f Gbits/sec, %.2f GOps, %.2f GInsts, %d insts' % (
    total, bits_sec, gops, ginsts, code.size())
  return
示例#42
0
class DebugProcessor(spe.Processor):
    """
  Experimental class for simple debugging.
  """

    exec_module = spu_exec
    debug_stop = spu.stop(DEBUG_STOP, ignore_active=True)

    def __init__(self):
        spe.Processor.__init__(self)
        self.params = None
        self.spe_id = None
        self.code = None

        self.ea = None
        self.lsa = None
        self.inst_size = None

        self.last_pc = None
        self.last_stop = None
        self.stop_code = None

        self.instructions = {}  # key: inst, backup copy of we've replaced
        return

    def execute(self, code, mode='int', debug=False, params=None, n_spus=1):

        if type(code) is ParallelInstructionStream:
            raise Exception(
                'DebugProcessor does not support ParallelInstructionStream')

        self.code = code

        if len(code) == 0:
            return None

        # Add the debug instructions - two each for normal instructions and branch targets
        self.debug_idx = self.code.size()
        self.code.add(spu.stop(DEBUG_STOP))

        self.debug_branch = self.code.size()
        self.code.add(spu.stop(DEBUG_STOP))

        self.debug_target_idx = self.code.size()
        self.code.add(spu.stop(DEBUG_STOP_TARGET))

        self.debug_target_branch = self.code.size()
        self.code.add(spu.stop(DEBUG_STOP_TARGET))

        # Cache the code here
        if not code._cached:
            code.cache_code()

        # Setup the parameter structure
        if params is None:
            params = spu_exec.ExecParams()

        addr = code._prologue.inst_addr()
        params.addr = addr
        params.size = len(code.render_code) * 4  # size in bytes

        self.params = params
        self.ea = code._prologue.inst_addr()
        self.lsa = (0x3FFFF - params.size) & 0xFFF80
        self.size = params.size + (16 - params.size % 16)
        self.last_pc = self.lsa
        self.last_stop = (1, )

        self.debug_lsa = (self.lsa + self.code.code_offset * 4 +
                          self.debug_idx * 4) >> 2
        self.debug_target_lsa = (self.lsa + self.code.code_offset * 4 +
                                 self.debug_target_idx * 4) >> 2

        mode = 'async'

        # TODO: Factor replacing into one function in case the first one is a branch
        self.replace(self.last_stop[0],
                     spu.bra(self.debug_lsa, ignore_active=True))

        self.spe_id = spe.Processor.execute(self, code, mode, debug, params)
        code.print_code()

        retval = self.wait_debug()

        return retval

    def replace(self, idx, inst):
        self.instructions[idx] = self.code[idx]
        self.code.debug_set(idx, inst)
        return

    def restore(self, idx):
        """
    Restore the function at idx and return a reference to the instruction
    """
        # self.code._prologue._code[idx] = self.instructions[idx]
        self.code.debug_set(idx, self.instructions[idx])
        return self.code[idx]

    def get_instructions(self):
        # return spe_mfc_getb(speid, ls, (void *)ea, size, tag, tid, rid);
        tag = 5
        ea = self.code._prologue.inst_addr()
        spu_exec.spu_getb(self.spe_id, self.lsa, ea, self.size, tag, 0, 0)
        spu_exec.read_tag_status_all(self.spe_id, 1 << tag)
        return

    def wait_debug(self):
        r = spu_exec.wait_stop_event(self.spe_id)
        if r not in (DEBUG_STOP, DEBUG_STOP_TARGET):
            print 'Warning: SPU stopped for unknown reason:', r
        else:
            print 'Debug stop: 0x%X' % r
        return r

    def nexti(self):

        if len(self.last_stop) == 1:
            # Restore a single instruction
            current_inst = self.restore(self.last_stop[0])
            last_idx = self.last_stop[0]
        else:
            # Restore two branch targets and determine which branch was taken
            # based on the stop code
            i1 = self.restore(self.last_stop[0])
            i2 = self.restore(self.last_stop[1])
            if self.stop_code == DEBUG_STOP:
                current_inst = i1
                last_idx = self.last_stop[0]
            else:
                current_inst = i2
                last_idx = self.last_stop[1]

        # If the current instruction is a branch, get the location
        # of all possible next instructions
        if isinstance(current_inst, (spu.br, spu.brsl)):
            next_stop = (self.last_stop[0] + current_inst.I16, )
            print 'next br:', next_stop
        elif isinstance(current_inst, (spu.bra, spu.brasl)):
            next_stop = (current_inst.I16 - (self.lsa >> 2), )
        elif isinstance(current_inst,
                        (spu.brnz, spu.brz, spu.brhnz, spu.brhz)):
            next_stop = (self.last_stop[0] + 1,
                         self.last_stop[0] + current_inst.I16)

        elif isinstance(current_inst, (spu.bi, spu.bisled, spu.bisl)):
            raise Exception(
                "DebugProcessor does not support branch indirect (bi) instructions"
            )
        else:
            next_stop = (self.last_stop[0] + 1, )

        # TODO: Get rid of last instruction.  Do something smarter.
        last_instruction = (next_stop[0] == (self.debug_idx - 1))

        # !!! STOPPED HERE !!!
        # !!! STILL WRONG !!!
        if not last_instruction:
            # Normal instructions and single target branches
            self.replace(next_stop[0],
                         spu.bra(self.debug_lsa, ignore_active=True))
            print 'target (1):', -(self.debug_lsa -
                                   ((self.lsa >> 2) + next_stop[0])
                                   ), self.debug_lsa, last_idx, self.lsa
            self.replace(
                self.debug_branch,
                spu.br(-(self.debug_lsa - ((self.lsa >> 2) + next_stop[0])),
                       ignore_active=True))
            # Branch target for test-based branch instructions
            if len(next_stop) == 2:
                self.replace(
                    next_stop[1],
                    spu.bra(self.debug_target_lsa, ignore_active=True))
                print 'target (2):', -(self.debug_target_lsa - (
                    (self.lsa >> 2) + next_stop[1])), self.debug_target_lsa
                self.replace(
                    self.debug_target_branch,
                    spu.br(-(self.debug_target_lsa -
                             ((self.lsa >> 2) + next_stop[1])),
                           ignore_active=True))

            # self.replace(next_stop, self.debug_stop)

        self.get_instructions()
        self.code.print_code()
        self.resume(self.spe_id)

        if last_instruction:
            r = self.join(self.spe_id)
            r = None
        else:
            r = self.wait_debug()
            self.last_stop = next_stop
            self.stop_code = r

        return r

    def dump_regs(self):
        mbox = 28  # write out mbox channel

        # Pseudo-code:
        #  1) Save code is: (do this as an array, not an instruction stream)
        save_size = 128 * 2 + 4
        save_code = extarray.extarray('I', range(save_size))

        for i in range(0, 128 * 2, 2):
            save_code[i] = spu.wrch(i / 2, mbox, ignore_active=True).render()
            save_code[i + 1] = spu.stop(0x6, ignore_active=True).render()

        # branch back to the debug stop
        save_code[128 * 2] = spu.stop(0x7, ignore_active=True).render()
        ret = spu.bra(self.debug_lsa, ignore_active=True)
        save_code[128 * 2 + 1] = ret.render()

        #aligned_save_code = aligned_memory(save_size, typecode = 'I')
        #aligned_save_code.copy_to(save_code.buffer_info()[0], len(save_code))

        #  2) Save lsa[0:len(save_code)]
        # TODO: do this with putb

        #  3) Push save code to lsa[0:]
        tag = 2
        spu_exec.spu_getb(self.spe_id, 0,
                          save_code.buffer_info()[0], save_size * 4, tag, 0, 0)
        spu_exec.read_tag_status_all(self.spe_id, 1 << tag)

        #  3) Replace the debug branch with a branch to 0
        self.replace(self.debug_branch, spu.bra(0, ignore_active=True))
        self.get_instructions()

        #  4) Resume
        self.resume(self.spe_id)

        #  5) Read the register values and send the ok signal
        regs = []
        for i in range(128):
            while spu_exec.stat_out_mbox(self.spe_id) == 0:
                pass
            value = spu_exec.read_out_mbox(self.spe_id)
            regs.append(value)

            r = spu_exec.wait_stop_event(self.spe_id)
            self.resume(self.spe_id)

        r = spu_exec.wait_stop_event(self.spe_id)
        print 'next stop', r
        #  6) Restore code at original pc
        self.restore(self.debug_branch)
        self.get_instructions()

        #  7) Restore lsa[0:len(save_code)]
        # TODO: do this with putb

        #  8) Resume
        # self.resume(self.spe_id)
        # r = spu_exec.wait_stop_event(self.spe_id)
        self.resume(self.spe_id)
        r = self.wait_debug()

        return regs

    def dump_mem(self):
        # Use putb to copy the local store to Python array
        return