Exemplo n.º 1
0
    def synthesize(self, code):
        old_code = spu.get_active_code()
        spu.set_active_code(code)

        # Reserve two variable registers
        count = code.acquire_register()
        result = code.acquire_register()

        # 'Load' the input vector x from register 5
        x = code.acquire_register()
        spu.ai(x, 5, 0)

        # Zero count and result
        spu.xor(count, count, count)
        spu.xor(result, result, result)

        # Inline the popc and reduce operations
        self.popc(count, x)
        self.reduce_word(result, count)

        # Send the result to the caller
        spu.wrch(result, dma.SPU_WrOutMbox)

        code.release_register(x)
        spu.set_active_code(old_code)
        return
Exemplo n.º 2
0
  def synthesize(self, code):
    """
    Render a vector with 4 pixels.
    """
    old_code = spu.get_active_code()
    spu.set_active_code(code)

    if self.x_offset is None: raise Exception('Please call setup')
    if self.result is None: raise Exception('Please set result')
    if self.one is None: raise Exception('Please set one')

    # Make the part of the result positive and subtract 1
    # to transform (-1,-oo) into (0,oo)
    self.result.v = spu.fs.ex(0, self.result)
    self.result.v = spu.fs.ex(self.result, self.one)

    # Convert the result to an unsigned int, scaling by 2^4 to put 
    # values between 0 and 16 in the gradient.  Values outside [0,16] 
    # are 0 or FF
    self.result.v = spu.cfltu.ex(self.result, 169) # 173 - 169 == 4
    # self.result.v = spu.sfi.ex(self.result, 255) # 173 - 169 == 4

    # Extract the first two bytes from the result into the RGB positions
    # and set alpha to 0xFF
    self.result.v = spu.shufb.ex(self.result, self.ff, self.uint2rgba)

    # Save the result and increment the offset
    spu.stqd(self.result, self.x_offset, self.lsa >> 4)
    spu.ai(self.x_offset, self.x_offset, 16)

    spu.set_active_code(old_code)
    return
Exemplo n.º 3
0
  def synthesize(self, code):
    old_code = spu.get_active_code()
    spu.set_active_code(code)

    # Reserve two variable registers
    count  = code.acquire_register()
    result = code.acquire_register()
    
    # 'Load' the input vector x from register 5
    x = code.acquire_register() 
    spu.ai(x, 5, 0)

    # Zero count and result
    spu.xor(count, count, count)
    spu.xor(result, result, result)
    
    # Inline the popc and reduce operations
    self.popc(count, x)
    self.reduce_word(result, count)

    # Send the result to the caller
    spu.wrch(result, dma.SPU_WrOutMbox)    

    code.release_register(x)
    spu.set_active_code(old_code)
    return
Exemplo n.º 4
0
    def synthesize(self, code):
        """
    Render a vector with 4 pixels.
    """
        old_code = spu.get_active_code()
        spu.set_active_code(code)

        if self.x_offset is None: raise Exception('Please call setup')
        if self.result is None: raise Exception('Please set result')
        if self.one is None: raise Exception('Please set one')

        # Make the part of the result positive and subtract 1
        # to transform (-1,-oo) into (0,oo)
        self.result.v = spu.fs.ex(0, self.result)
        self.result.v = spu.fs.ex(self.result, self.one)

        # Convert the result to an unsigned int, scaling by 2^4 to put
        # values between 0 and 16 in the gradient.  Values outside [0,16]
        # are 0 or FF
        self.result.v = spu.cfltu.ex(self.result, 169)  # 173 - 169 == 4
        # self.result.v = spu.sfi.ex(self.result, 255) # 173 - 169 == 4

        # Extract the first two bytes from the result into the RGB positions
        # and set alpha to 0xFF
        self.result.v = spu.shufb.ex(self.result, self.ff, self.uint2rgba)

        # Save the result and increment the offset
        spu.stqd(self.result, self.x_offset, self.lsa >> 4)
        spu.ai(self.x_offset, self.x_offset, 16)

        spu.set_active_code(old_code)
        return
Exemplo n.º 5
0
def fdiv(code, d, x, y, one = None):
  """
  Single-precision floating point division for x / y
  """
  Y = code.acquire_registers(3)
  t = code.acquire_register()
  regs = Y[:]
  regs.append(t)
  
  if one is None:
    one = code.acquire_register()
    spu.xor(one, one, one)
    spu.ai(one, one, 1)
    spu.cuflt(one, one, 155)
    regs.append(one)
    
  # Compute 1/y (from SPU ISA 1.1, p208, Normal case)
  spu.frest(Y[0], y)
  spu.fi(Y[1], y, Y[0])
  spu.fnms(t, y, Y[1], one)
  spu.fma(Y[2], t, Y[1], Y[1])

  # Compute x * (1/y)
  spu.fm(d, x, Y[2])
  
  code.release_registers(regs)
    
  return
Exemplo n.º 6
0
    def save_register(self, reg):  # , branch_to_save = False):
        code = spu.get_active_code()

        offset = code.acquire_register()
        size = code.acquire_register()
        test = code.acquire_register()
        regs = [offset, size, test]

        spu.rotqbyi(offset, self.ls_buffer, 4)
        spu.rotqbyi(size, self.ls_buffer, 8)

        spu.stqx(reg, self.ls_buffer, offset)

        spu.ai(offset, offset, 16)
        spu.ceq(test, offset, size)

        spu.wrch(size, dma.SPU_WrOutMbox)
        spu.wrch(offset, dma.SPU_WrOutMbox)
        spu.wrch(test, dma.SPU_WrOutMbox)
        # !!! STOPPED HERE !!! THESE VALUES ARE WRONG !!!
        lbl_ls_full = code.size()
        spu.stop(0xB)
        self.save_ls_buffer(ls_size=size)

        spu.nop(0)
        code[lbl_ls_full] = spu.brz(test, (code.size() - lbl_ls_full),
                                    ignore_active=True)

        code.release_registers(regs)
        return
Exemplo n.º 7
0
  def save_register(self, reg): # , branch_to_save = False):
    code = spu.get_active_code()

    offset = code.acquire_register()
    size = code.acquire_register()
    test = code.acquire_register()
    regs = [offset, size, test]
    
    spu.rotqbyi(offset, self.ls_buffer, 4)
    spu.rotqbyi(size,   self.ls_buffer, 8)

    spu.stqx(reg, self.ls_buffer, offset)
    
    spu.ai(offset, offset, 16)
    spu.ceq(test,  offset, size)

    spu.wrch(size, dma.SPU_WrOutMbox)
    spu.wrch(offset, dma.SPU_WrOutMbox)
    spu.wrch(test, dma.SPU_WrOutMbox)
    # !!! STOPPED HERE !!! THESE VALUES ARE WRONG !!!
    lbl_ls_full = code.size()
    spu.stop(0xB)
    self.save_ls_buffer(ls_size = size)

    spu.nop(0)
    code[lbl_ls_full] = spu.brz(test, (code.size() - lbl_ls_full), ignore_active = True)

    code.release_registers(regs)
    return
Exemplo n.º 8
0
  def synthesize_constants(self, code):
    old_code = spu.get_active_code()
    spu.set_active_code(code)
    
    self._one = code.acquire_register()
    spu.xor(self._one, self._one, self._one)
    spu.ai(self._one, self._one, 1)
    spu.cuflt(self._one, self._one, 155)
    
    if old_code is not None:
      spu.set_active_code(old_code)

    return
Exemplo n.º 9
0
 def cleanup(self):
   """Do end-of-loop iterator code"""
   # Update the current count
   if self.mode == DEC:
     if self.r_step is not None:
       self.code.add(spu.sf(self.r_count, self.r_step, self.r_count))
     else:
       self.code.add(spu.ai( self.r_count, self.r_count, -self.step_size()))
   elif self.mode == INC:
     if self.r_step is not None:
       self.code.add(spu.a(self.r_count, self.r_count, self.r_step))
     else:
       self.code.add(spu.ai(self.r_count, self.r_count, self.step_size()))
     
   return
Exemplo n.º 10
0
    def start(self, align=True, branch=True):
        """Do pre-loop iteration initialization"""
        if self.r_count is None:
            self.r_count = self.code.acquire_register()

        if self.mode == DEC:
            if self._external_start:
                self.code.add(spu.ai(self.r_count, self.r_start, 0))
            else:
                util.load_word(self.code, self.r_count, self.get_count())

        elif self.mode == INC:
            if self.r_stop is None and branch:
                self.r_stop = self.code.acquire_register()

            if self._external_start:
                self.code.add(spu.ai(self.r_count, self.r_start, 0))
            else:
                util.load_word(self.code, self.r_count, self.get_start())

            if branch and not self._external_stop:
                util.load_word(self.code, self.r_stop, self.get_count())

        # /end mode if

        if self.r_count is not None:
            self.current_count = var.SignedWord(code=self.code,
                                                reg=self.r_count)

        # If the step size doesn't fit in an immediate value, store it in a register
        # (-512 < word < 511):
        if not (-512 < self.step_size() < 511):
            self.r_step = self.code.acquire_register()
            util.load_word(self.code, self.r_step, self.step_size())

        # Label
        self.start_label = self.code.get_label("SYN_ITER_START_%d" %
                                               random.randint(0, 2**32))
        self.code.add(self.start_label)

        # Create continue/branch labels so they can be referenced; they will be
        # added to the code in their appropriate locations.
        self.branch_label = self.code.get_label("SYN_ITER_BRANCH_%d" %
                                                random.randint(0, 2**32))
        self.continue_label = self.code.get_label("SYN_ITER_CONTINUE_%d" %
                                                  random.randint(0, 2**32))
        return
Exemplo n.º 11
0
def SimpleSPU():
    """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
    prgm = env.Program()
    code = prgm.get_stream()
    proc = env.Processor()

    spu.set_active_code(code)

    # Acquire two registers
    #x    = code.acquire_register()
    x = code.gp_return
    test = prgm.acquire_register(reg_name=55)

    spu.xor(x, x, x)  # zero x
    spu.ai(x, x, 11)  # x = x + 11
    spu.ai(x, x, 31)  # x = x + 31

    spu.ceqi(test, x, 42)  # test = (x == 42)

    # If test is false (all 0s), skip the stop(0x100A) instruction
    spu.brz(test, 2)
    spu.stop(0x100A)
    spu.stop(0x100B)

    prgm.add(code)
    prgm.print_code(hex=True)
    r = proc.execute(prgm, mode='int', stop=True, debug=True)
    assert (r[0] == 42)
    assert (r[1] == 0x100A)

    prgm = env.Program()
    code = prgm.get_stream()
    spu.set_active_code(code)

    util.load_float(code, code.fp_return, 3.14)

    prgm.add(code)
    prgm.print_code(hex=True)
    r = proc.execute(prgm, mode='fp')
    print r
    return
Exemplo n.º 12
0
def SimpleSPU():
  """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
  prgm = env.Program()
  code = prgm.get_stream()
  proc = env.Processor()

  spu.set_active_code(code)

  # Acquire two registers
  #x    = code.acquire_register()
  x = code.gp_return
  test = prgm.acquire_register(reg_name = 55)

  spu.xor(x, x, x) # zero x
  spu.ai(x, x, 11) # x = x + 11
  spu.ai(x, x, 31) # x = x + 31

  spu.ceqi(test, x, 42) # test = (x == 42)

  # If test is false (all 0s), skip the stop(0x100A) instruction
  spu.brz(test, 2)
  spu.stop(0x100A)
  spu.stop(0x100B)

  prgm.add(code) 
  prgm.print_code(hex = True) 
  r = proc.execute(prgm, mode = 'int', stop = True, debug = True) 
  assert(r[0] == 42)
  assert(r[1] == 0x100A)

  prgm = env.Program()
  code = prgm.get_stream()
  spu.set_active_code(code)

  util.load_float(code, code.fp_return, 3.14)

  prgm.add(code)
  prgm.print_code(hex = True)
  r = proc.execute(prgm, mode = 'fp')
  print r
  return
Exemplo n.º 13
0
def TestParallel():
    # Run this with a stop instruction and examine the registers and memory
    code = ParallelInstructionStream()
    proc = Processor()

    code.raw_data_size = 128 * 8

    r = code.acquire_register()
    code.add(spu.ai(r, r, 0xCAFE))
    code.add(spu.ai(r, r, 0xBABE))
    code.add(spu.stop(0x2000))

    r = proc.execute(code, mode='async', n_spus=6)

    for speid in r:
        proc.join(speid)

    assert (True)
    return
Exemplo n.º 14
0
def TestParallel():
  # Run this with a stop instruction and examine the registers and memory
  code = ParallelInstructionStream()
  proc = Processor()

  code.raw_data_size = 128*8

  r = code.acquire_register()
  code.add(spu.ai(r, r, 0xCAFE))
  code.add(spu.ai(r, r, 0xBABE))    
  code.add(spu.stop(0x2000))

  r = proc.execute(code, mode='async', n_spus = 6)

  for speid in r:
    proc.join(speid)

  assert(True)
  return
Exemplo n.º 15
0
  def start(self, align = True, branch = True):
    """Do pre-loop iteration initialization"""
    if self.r_count is None:
      self.r_count = self.code.prgm.acquire_register()
      
    if self.mode == DEC:
      if self._external_start:
        self.code.add(spu.ai(self.r_count, self.r_start, 0))
      else:
        util.load_word(self.code, self.r_count, self.get_count())

    elif self.mode == INC:
      if self.r_stop is None and branch:
        self.r_stop = self.code.prgm.acquire_register()

      if self._external_start:
        self.code.add(spu.ai(self.r_count, self.r_start, 0))
      else:
        util.load_word(self.code, self.r_count, self.get_start())

      if branch and not self._external_stop:
        util.load_word(self.code, self.r_stop, self.get_count())

    # /end mode if
    
    if self.r_count is not None:
      self.current_count = var.SignedWord(code = self.code, reg = self.r_count)

    # If the step size doesn't fit in an immediate value, store it in a register
    # (-512 < word < 511):
    if not (-512 < self.step_size() < 511):
      self.r_step = self.code.prgm.acquire_register()
      util.load_word(self.code, self.r_step, self.step_size())

    # Label
    self.start_label = self.code.prgm.get_unique_label("SYN_ITER_START")
    self.code.add(self.start_label)

    # Create continue/branch labels so they can be referenced; they will be
    # added to the code in their appropriate locations.
    self.branch_label = self.code.prgm.get_unique_label("SYN_ITER_BRANCH")
    self.continue_label = self.code.prgm.get_unique_label("SYN_ITER_CONTINUE")
    return
Exemplo n.º 16
0
def TestParallel():
  # Run this with a stop instruction and examine the registers and memory
  prgm = ParallelProgram()
  code = prgm.get_stream()
  proc = Processor()

  code.raw_data_size = 128*8

  r = prgm.acquire_register()
  code.add(spu.ai(r, r, 0x2FE))
  code.add(spu.ai(r, r, 0x2BE))    
  code.add(spu.stop(0x1FFF))

  prgm += code
  r = proc.execute(prgm, async = True, mode='void', n_spus = 6)

  for speid in r:
    proc.join(speid)

  assert(True)
  return
Exemplo n.º 17
0
def TestParallel():
    # Run this with a stop instruction and examine the registers and memory
    prgm = ParallelProgram()
    code = prgm.get_stream()
    proc = Processor()

    code.raw_data_size = 128 * 8

    r = prgm.acquire_register()
    code.add(spu.ai(r, r, 0x2FE))
    code.add(spu.ai(r, r, 0x2BE))
    code.add(spu.stop(0x1FFF))

    prgm += code
    r = proc.execute(prgm, async=True, mode='void', n_spus=6)

    for speid in r:
        proc.join(speid)

    assert (True)
    return
Exemplo n.º 18
0
def copy_param(code, target, source):
    """
  Copy a parameter from source reg to preferred slot in the target reg.
  For params in slot 0, this is just and add immediate.
  For params in other slots, the source is rotated.
  Note that other values in the source are copied, too.
  """
    if source[SLOT] != 0:
        code.add(spu.rotqbyi(target, source[REG], source[SLOT] * 4))
    else:
        code.add(spu.ai(target, source[REG], 0))
    return
Exemplo n.º 19
0
def copy_param(code, target, source):
  """
  Copy a parameter from source reg to preferred slot in the target reg.
  For params in slot 0, this is just and add immediate.
  For params in other slots, the source is rotated.
  Note that other values in the source are copied, too.
  """
  if source[SLOT] != 0:
    code.add(spu.rotqbyi(target, source[REG], source[SLOT] * 4))
  else:
    code.add(spu.ai(target, source[REG], 0))
  return
Exemplo n.º 20
0
def load_word(code, r_target, word, clear=False, zero=True):
    """If r0 is not set to 0, the zero parameter should be set to False"""

    if zero and (-512 < word < 511):
        code.add(spu.ai(r_target, code.r_zero, word))
    elif (word & 0x7FFF) == word:
        code.add(spu.il(r_target, word))
    elif (word & 0x3FFFF) == word:
        code.add(spu.ila(r_target, word))
    else:
        code.add(spu.ilhu(r_target, (word & 0xFFFF0000) >> 16))
        code.add(spu.iohl(r_target, (word & 0xFFFF)))

    if clear:
        code.add(spu.shlqbyi(r_target, r_target, 12))
    return
Exemplo n.º 21
0
  def end(self, branch = True):
    """Do post-loop iterator code"""
    if self.hint == True:
      self.code.add(spu.hbrr(self.branch_label, self.start_label))

    if self.mode == DEC:
      # branch if r_count is not zero (CR)
      #   Note that this relies on someone (e.g. cleanup()) setting the
      #   condition register properly.
      if branch:
        self.code.add(self.branch_label)
        self.code.add(spu.brnz(self.r_count, self.start_label))

      # Reset the counter in case this is a nested loop
      util.load_word(self.code, self.r_count, self.get_count())

    elif self.mode == INC:
      # branch if r_current < r_stop
      if branch:
        r_cmp_gt = self.code.prgm.acquire_register()

        self.code.add(spu.cgt(r_cmp_gt, self.r_stop, self.r_count))
        self.code.add(self.branch_label)
        self.code.add(spu.brnz(r_cmp_gt, self.start_label))

        self.code.prgm.release_register(r_cmp_gt)        

      # Reset the the current value in case this is a nested loop
      if self._external_start:
        self.code.add(spu.ai(self.r_count, self.r_start, 0))
      else:
        util.load_word(self.code, self.r_count, self.get_start())

    if self.r_count is not None:
      self.code.prgm.release_register(self.r_count)
    if self.r_stop is not None and not self._external_stop:
      self.code.prgm.release_register(self.r_stop)      

    return
Exemplo n.º 22
0
def TestMFC():
    size = 32
    #data_array = array.array('I', range(size))
    #data = synspu.aligned_memory(size, typecode = 'I')
    #data.copy_to(data_array.buffer_info()[0], len(data_array))
    data = extarray.extarray('I', range(size))
    code = synspu.InstructionStream()

    r_zero = code.acquire_register()
    r_ea_data = code.acquire_register()
    r_ls_data = code.acquire_register()
    r_size = code.acquire_register()
    r_tag = code.acquire_register()

    # Load zero
    util.load_word(code, r_zero, 0)

    print 'array ea: %X' % (data.buffer_info()[0])
    print 'r_zero = %s, ea_data = %s, ls_data = %s, r_size = %s, r_tag = %s' % (
        str(r_zero), str(r_ea_data), str(r_ls_data), str(r_size), str(r_tag))

    # Load the effective address
    print 'test ea: %X' % data.buffer_info()[0]
    util.load_word(code, r_ea_data, data.buffer_info()[0])

    # Load the size
    code.add(spu.ai(r_size, r_zero, size * 4))

    # Load the tag
    code.add(spu.ai(r_tag, r_zero, 2))

    # Load the lsa
    code.add(spu.ai(r_ls_data, r_zero, 0))

    # Load the data into address 0
    mfc_get(code, r_ls_data, r_ea_data, r_size, r_tag)

    # Set the tag bit to 2
    mfc_write_tag_mask(code, 1 << 2)

    # Wait for the transfer to complete
    mfc_read_tag_status_all(code)

    # Increment the data values by 1 using an unrolled loop (no branches)
    r_current = code.acquire_register()

    for lsa in range(0, size * 4, 16):
        code.add(spu.lqa(r_current, (lsa >> 2)))
        code.add(spu.ai(r_current, r_current, 1))
        code.add(spu.stqa(r_current, (lsa >> 2)))

    code.release_register(r_current)

    # Store the values back to main memory

    # Load the data into address 0
    mfc_put(code, r_ls_data, r_ea_data, r_size, r_tag)

    # Set the tag bit to 2
    mfc_write_tag_mask(code, 1 << 2)

    # Wait for the transfer to complete
    mfc_read_tag_status_all(code)

    # Cleanup
    code.release_register(r_zero)
    code.release_register(r_ea_data)
    code.release_register(r_ls_data)
    code.release_register(r_size)
    code.release_register(r_tag)

    # Stop for debugging
    # code.add(spu.stop(0xA))

    # Execute the code
    proc = synspu.Processor()
    # code.print_code()
    #print data_array
    proc.execute(code)

    #data.copy_from(data_array.buffer_info()[0], len(data_array))

    for i in range(size):
        assert (data[i] == i + 1)

    return
Exemplo n.º 23
0
def TestInt():
    code = InstructionStream()
    proc = Processor()

    spu.set_active_code(code)

    r13 = code.acquire_register(reg=13)
    r20 = code.acquire_register(reg=20)
    spu.ai(r20, r20, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)

    spu.stop(0x200D)

    code.print_code()
    r = proc.execute(code)  # , debug = True)
    print 'int result:', r
    # while True:
    #   pass
    return
Exemplo n.º 24
0
def SimpleSPU():
    """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
    code = InstructionStream()
    proc = Processor()

    spu.set_active_code(code)

    # Acquire two registers
    #x    = code.acquire_register()
    x = code.gp_return
    test = code.acquire_register()

    lbl_brz = code.get_label("BRZ")
    lbl_skip = code.get_label("SKIP")

    spu.hbrr(lbl_brz, lbl_skip)
    spu.xor(x, x, x)  # zero x
    spu.ai(x, x, 11)  # x = x + 11
    spu.ai(x, x, 31)  # x = x + 31

    spu.ceqi(test, x, 42)  # test = (x == 42)

    # If test is false (all 0s), skip the stop(0x100A) instruction
    code.add(lbl_brz)
    spu.brz(test, lbl_skip)
    spu.stop(0x100A)
    code.add(lbl_skip)
    spu.stop(0x100B)

    code.print_code(hex=True, pro=True, epi=True)
    r = proc.execute(code, mode='int', stop=True)
    print "ret", r
    assert (r[0] == 42)
    assert (r[1] == 0x100A)

    code = InstructionStream()
    spu.set_active_code(code)

    lbl_loop = code.get_label("LOOP")
    lbl_break = code.get_label("BREAK")

    r_cnt = code.acquire_register()
    r_stop = code.acquire_register()
    r_cmp = code.acquire_register()
    r_foo = code.gp_return

    spu.ori(r_foo, code.r_zero, 0)
    spu.ori(r_cnt, code.r_zero, 0)
    util.load_word(code, r_stop, 10)

    code.add(lbl_loop)

    spu.ceq(r_cmp, r_cnt, r_stop)
    spu.brnz(r_cmp, lbl_break)
    spu.ai(r_cnt, r_cnt, 1)

    spu.a(r_foo, r_foo, r_cnt)

    spu.br(lbl_loop)
    code.add(lbl_break)

    code.print_code()
    r = proc.execute(code, mode='int', stop=True)
    print "ret", r
    assert (r[0] == 55)

    return
Exemplo n.º 25
0
import corepy.lib.extarray as extarray
import corepy.arch.spu.isa as spu
import corepy.arch.spu.lib.util as util
import corepy.arch.spu.platform as env

prgm = env.Program()
code = prgm.get_stream()
proc = env.Processor()

# Generate substream
# Multiply gp_return by 2, add 1
subcode = prgm.get_stream()
subcode.add(spu.shli(subcode.gp_return, subcode.gp_return, 1))
subcode.add(spu.ai(subcode.gp_return, subcode.gp_return, 1))

# Initialize gp_return, insert code
code.add(spu.il(code.gp_return, 5))
code.add(subcode)

# Add 3, insert again
code.add(spu.ai(code.gp_return, code.gp_return, 3))
code.add(subcode)

#code.print_code()

prgm.add(code)
prgm.print_code()  # TODO  - support print prgm instead?

ret = proc.execute(prgm, mode='int')
print "ret", ret
Exemplo n.º 26
0
 def copy_register(self, other):
     return self.code.add(spu.ai(self, other, 0))
Exemplo n.º 27
0
  code = prgm.get_stream()
  reg = prgm.acquire_register()
  foo = prgm.acquire_register(reg_name = 5)

  code.add(prgm.get_label("FOO"))
  code.add(spu.il(foo, 0xCAFE))
  code.add(spu.ilhu(reg, 0xDEAD))
  code.add(spu.iohl(reg, 0xBEEF))
  code.add(spu.stqd(reg, code.r_zero, 4))

  lbl_loop = prgm.get_label("LOOP")
  lbl_break = prgm.get_label("BREAK")

  r_cnt = code.gp_return
  r_stop = prgm.acquire_register(reg_name = 9)
  r_cmp = prgm.acquire_register()

  code.add(spu.ori(r_cnt, code.r_zero, 0))
  code.add(spu.il(r_stop, 5))

  code.add(lbl_loop)
  code.add(spu.ceq(r_cmp, r_cnt, r_stop))
  code.add(spu.brnz(r_cmp, prgm.get_label("BREAK")))
  code.add(spu.ai(r_cnt, r_cnt, 1))
  code.add(spu.br(prgm.get_label("LOOP")))
  code.add(lbl_break)

  app = SPUApp(code)
  app.MainLoop()

Exemplo n.º 28
0
    def synthesize(self, code):
        old_code = spu.get_active_code()
        spu.set_active_code(code)

        if self.x is None: raise Exception("Please set x")
        if self.result is None: raise Exception("Please set result")

        # exponent
        e = var.Word()

        # Working values
        x = var.Word()
        y = var.Word()
        z = var.Word()

        cmp = var.Bits()
        tmp = var.Word()

        spu.xor(cmp, cmp, cmp)
        spu.xor(tmp, tmp, tmp)

        # Set the working x
        x.v = self.x

        # Extract the exponent
        # int e = (((*(unsigned int *) &x) >> 23) & 0xff) - 0x7e;
        e.v = x >> self.consts['_23']
        e.v = spu.andi.ex(e, 0xff)
        e.v = spu.ai.ex(e, 0x382)  # 0x382 == (- 0x7E) using 10 bits
        # 0b 111 1110

        # Extract the mantissa
        x.v = x & self.consts['M1']  # *(unsigned int*)&x &= 0x807fffff;
        x.v = x | self.consts['M2']  # *(unsigned int*)&x |= 0x3f000000;

        # Normalize
        x1, x2, e1 = y, z, tmp

        # if (x < SQRTHF)
        cmp.v = spu.fcgt.ex(self.consts['SQRTHF'], x)

        # (True) { ... }
        e1.v = spu.ai.ex(e, -1)  #   e -= 1;
        x1.v = spu.fa.ex(x, x)  #   x = x + x - 1.0;
        x1.v = spu.fs.ex(x1, self.consts['ONE'])  #     ""  ""

        # (False) { ... }
        x2.v = spu.fs.ex(x, self.consts['ONE'])  #   x = x - 1.0;

        # Select the True/False values based on cmp
        e.v = spu.selb.ex(e, e1, cmp)
        x.v = spu.selb.ex(x2, x1, cmp)

        # Compute polynomial
        z.v = spu.fm.ex(x, x)  #  z = x * x;

        y.v = spu.fms.ex(
            self.consts['C1'],
            x,  #  y = (((((((( 7.0376836292E-2 * x  
            self.consts['C2'])  #	       - 1.1514610310E-1) * x
        y.v = spu.fma.ex(y, x,
                         self.consts['C3'])  #	     + 1.1676998740E-1) * x
        y.v = spu.fms.ex(y, x, self.consts['C4'])  #	    - 1.2420140846E-1) * x
        y.v = spu.fma.ex(y, x, self.consts['C5'])  #	   + 1.4249322787E-1) * x
        y.v = spu.fms.ex(y, x, self.consts['C6'])  #	  - 1.6668057665E-1) * x
        y.v = spu.fma.ex(y, x, self.consts['C7'])  #	 + 2.0000714765E-1) * x
        y.v = spu.fms.ex(y, x, self.consts['C8'])  #	- 2.4999993993E-1) * x
        y.v = spu.fma.ex(y, x, self.consts['C9'])  #       + 3.3333331174E-1)
        y.v = spu.fm.ex(y, x)  #   * x
        y.v = spu.fm.ex(y, z)  #   * z;

        y.v = spu.fma.ex(self.consts['C10'], z, y)  #  y += -0.5 * z;

        # Convert to log base 2
        z.v = spu.fm.ex(y, self.consts['LOG2EA'])  # z = y * LOG2EA;
        z.v = spu.fma.ex(x, self.consts['LOG2EA'], z)  # z += x * LOG2EA;
        z.v = spu.fa.ex(z, y)  # z += y;
        z.v = spu.fa.ex(z, x)  # z += x;
        e.v = spu.csflt.ex(e, 155)  # z += (float) e;
        z.v = spu.fa.ex(z, e)  #  ""  ""

        spu.ai(self.result, z, 0)  # return z

        spu.set_active_code(old_code)
        return
Exemplo n.º 29
0
def TestSPUParallelIter(data, size, n_spus = 6, buffer_size = 16, run_code = True):
  import time
  # n_spus = 8
  # buffer_size = 16 # 16 ints/buffer
  # n_buffers   = 4  # 4 buffers/spu
  # n_buffers = size / buffer_size
  # size = buffer_size * n_buffers * n_spus
  # data = array.array('I', range(size + 2))

  #data = env.aligned_memory(n, typecode = 'I')
  #data.copy_to(data_array.buffer_info()[0], len(data_array))


  # print 'Data align: 0x%X, %d' % (data.buffer_info()[0], data.buffer_info()[0] % 16)

  code = env.ParallelInstructionStream()
  # code = env.InstructionStream()

  r_zero    = code.acquire_register()
  r_ea_data = code.acquire_register()
  r_ls_data = code.acquire_register()
  r_size    = code.acquire_register()
  r_tag     = code.acquire_register()  

  # Load zero
  util.load_word(code, r_zero, 0)

  # print 'array ea: 0x%X 0x%X' % (data.buffer_info()[0], long(data.buffer_info()[0]))
  # print 'r_zero = %d, ea_data = %d, ls_data = %d, r_size = %d, r_tag = %d' % (
  #   r_zero, r_ea_data, r_ls_data, r_size, r_tag)

  # Load the effective address
  if data.buffer_info()[0] % 16 == 0:
    util.load_word(code, r_ea_data, data.buffer_info()[0])
  else: 
    util.load_word(code, r_ea_data, data.buffer_info()[0] + 8)

  ea_start = data.buffer_info()[0]
  # Iterate over each buffer
  for ea in parallel(syn_range(code, ea_start, ea_start + size * 4 , buffer_size * 4)):
    # ea = var.SignedWord(code = code, reg = r_ea_data)
  
    # print 'n_iters:', size / buffer_size
    # for i in syn_range(code, size / buffer_size):

    # code.add(spu.stop(0xB))
  
    # Load the size
    util.load_word(code, r_size, buffer_size * 4)

    # Load the tag
    code.add(spu.ai(r_tag, r_zero, 12))

    # Load the lsa
    code.add(spu.ai(r_ls_data, r_zero, 0))

    # Load the data into address 0
    dma.mfc_get(code, r_ls_data, ea, r_size, r_tag)

    # Set the tag bit to 12
    dma.mfc_write_tag_mask(code, 1<<12);

    # Wait for the transfer to complete
    dma.mfc_read_tag_status_all(code);

    # Increment the data values by 1 using an unrolled loop (no branches)
    # r_current = code.acquire_register()
    current = var.SignedWord(0, code)

    count = var.SignedWord(0, code)
    # Use an SPU iter
    for lsa in syn_iter(code, buffer_size * 4, 16):
      code.add(spu.lqx(current, r_zero, lsa))
      # code.add(spu.ai(1, r_current, r_current))
      current.v = current + current
      code.add(spu.stqx(current, r_zero, lsa))    
      count.v = count + 1

    code.add(spu.stqx(count, r_zero, 0))
  
    # code.release_register(r_current)
    current.release_registers(code)

    # Store the values back to main memory

    # Load the tag
    code.add(spu.ai(r_tag, r_zero, 13))

    # Load the data into address 0
    dma.mfc_put(code, r_ls_data, ea.reg, r_size, r_tag)

    # Set the tag bit to 13
    dma.mfc_write_tag_mask(code, 1<<13);

    # Wait for the transfer to complete
    dma.mfc_read_tag_status_all(code);


    # code.add(spu.stop(0xB))

    # Update ea
    # ea.v = ea + (buffer_size * 4)
  # /for ea address 


  # Cleanup
  code.release_register(r_zero)
  code.release_register(r_ea_data)
  code.release_register(r_ls_data)  
  code.release_register(r_size)
  code.release_register(r_tag)  

  if not run_code:
    return code

  # Stop for debugging
  # code.add(spu.stop(0xA))

  # Execute the code
  proc = env.Processor()
  #data.copy_from(data_array.buffer_info()[0], len(data_array))  
  def print_blocks():
    for i in range(0, size, buffer_size):
      # print data[i:(i + buffer_size)]
      print data[i + buffer_size],
    print '' 
  
  # print_blocks()
  s = time.time()
  r = proc.execute(code, n_spus = n_spus)
  # r = proc.execute(code)
  t = time.time() - s
  # print_blocks()

  return t
Exemplo n.º 30
0
def TestMFC():
  import corepy.lib.extarray as extarray
  import corepy.arch.spu.platform as synspu 

  size = 32
  #data_array = array.array('I', range(size))
  #data = synspu.aligned_memory(size, typecode = 'I')
  #data.copy_to(data_array.buffer_info()[0], len(data_array))
  data = extarray.extarray('I', range(size))
  code = synspu.InstructionStream()

  r_zero    = code.acquire_register()
  r_ea_data = code.acquire_register()
  r_ls_data = code.acquire_register()
  r_size    = code.acquire_register()
  r_tag     = code.acquire_register()  

  # Load zero
  util.load_word(code, r_zero, 0)

  print 'array ea: %X' % (data.buffer_info()[0])
  print 'r_zero = %s, ea_data = %s, ls_data = %s, r_size = %s, r_tag = %s' % (
    str(r_zero), str(r_ea_data), str(r_ls_data), str(r_size), str(r_tag))
  
  # Load the effective address
  print 'test ea: %X' % data.buffer_info()[0]
  util.load_word(code, r_ea_data, data.buffer_info()[0])

  # Load the size
  code.add(spu.ai(r_size, r_zero, size * 4))

  # Load the tag
  code.add(spu.ai(r_tag, r_zero, 2))

  # Load the lsa
  code.add(spu.ai(r_ls_data, r_zero, 0))

  # Load the data into address 0
  mfc_get(code, r_ls_data, r_ea_data, r_size, r_tag)

  # Set the tag bit to 2
  mfc_write_tag_mask(code, 1<<2);

  # Wait for the transfer to complete
  mfc_read_tag_status_all(code);

  # Increment the data values by 1 using an unrolled loop (no branches)
  r_current = code.acquire_register()

  for lsa in range(0, size * 4, 16):
    code.add(spu.lqa(r_current, (lsa >> 2)))
    code.add(spu.ai(r_current, r_current, 1))
    code.add(spu.stqa(r_current, (lsa >> 2)))

  code.release_register(r_current)
             
  # Store the values back to main memory

  # Load the data into address 0
  mfc_put(code, r_ls_data, r_ea_data, r_size, r_tag)

  # Set the tag bit to 2
  mfc_write_tag_mask(code, 1<<2);

  # Wait for the transfer to complete
  mfc_read_tag_status_all(code);

  # Cleanup
  code.release_register(r_zero)
  code.release_register(r_ea_data)
  code.release_register(r_ls_data)  
  code.release_register(r_size)
  code.release_register(r_tag)  

  # Stop for debugging
  # code.add(spu.stop(0xA))

  # Execute the code
  proc = synspu.Processor()
  # code.print_code()
  #print data_array
  proc.execute(code)

  #data.copy_from(data_array.buffer_info()[0], len(data_array))

  for i in range(size):
    assert(data[i] == i + 1)
  
  return
Exemplo n.º 31
0
def TestDebug():
    prgm = Program()
    code = prgm.get_stream()
    proc = DebugProcessor()

    spu.set_active_code(code)

    ra = code.acquire_register()
    rb = code.acquire_register()
    rc = code.acquire_register()
    rd = code.acquire_register()
    re = code.acquire_register()
    rf = code.acquire_register()
    rg = code.acquire_register()
    rh = code.acquire_register()

    spu.ai(ra, 0, 14)
    spu.ai(rb, 0, 13)
    spu.ai(rc, 0, 14)
    spu.brnz(14, 3)
    spu.ai(rd, 0, 15)
    spu.ai(re, 0, 16)
    spu.ai(rf, 0, 17)
    spu.ai(rg, 0, 18)
    spu.ai(rh, 0, 19)
    spu.nop(0)

    spu.stop(0x200A)

    prgm += code
    r = proc.execute(prgm)  # , debug = True)

    r = proc.nexti()
    r = proc.nexti()
    r = proc.nexti()
    r = proc.nexti()

    while r != None:
        r = proc.nexti()
        if r is not None:
            regs = proc.dump_regs()
            print '******', regs[122:]

    assert (r == None)
    print 'int result:', r
    # while True:
    #   pass
    return
Exemplo n.º 32
0
def TestInt():
    prgm = Program()
    code = prgm.get_stream()
    proc = Processor()

    spu.set_active_code(code)

    r13 = prgm.acquire_register(reg_name=13)
    r20 = prgm.acquire_register(reg_name=20)
    spu.ai(r20, r20, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)

    spu.stop(0x200D)

    prgm += code
    r = proc.execute(prgm, stop=True)  # , debug = True)

    #print 'int result:', r
    assert (r[0] == 0)
    assert (r[1] == 0x200D)
    return
Exemplo n.º 33
0
    spu.set_active_code(code)
    psmap = extarray.extarray('I', 131072 / 4)
    data = extarray.extarray('I', range(0, 16))

    r_sum = prgm.gp_return
    r_cnt = prgm.acquire_register()

    spu.xor(r_sum, r_sum, r_sum)
    load_word(code, r_cnt, ITERS)

    lbl_loop = prgm.get_label("loop")
    code.add(lbl_loop)

    reg = dma.spu_read_in_mbox(code)

    spu.ai(r_sum, r_sum, 1)
    dma.spu_write_out_intr_mbox(code, r_sum)
    #dma.spu_write_out_mbox(code, reg)

    prgm.release_register(reg)

    spu.ai(r_cnt, r_cnt, -1)
    spu.brnz(r_cnt, lbl_loop)

    reg = dma.spu_read_signal1(code)
    spu.ori(code.gp_return, reg, 0)

    spu.il(r_cnt, 0)
    spu.il(r_sum, 16 * 4)

    r_data = prgm.acquire_register()
Exemplo n.º 34
0
    import corepy.arch.spu.lib.util as util

    prgm = env.Program()
    code = prgm.get_stream()
    spu.set_active_code(code)

    r_cnt = prgm.acquire_register()
    r_cmp = prgm.acquire_register()
    r_sum = prgm.acquire_register()

    spu.il(r_cnt, 32)
    spu.il(r_sum, 0)
    lbl_loop = prgm.get_unique_label("LOOP")
    code.add(lbl_loop)

    spu.ai(r_sum, r_sum, 1)

    spu.ceqi(r_cmp, r_cnt, 2)
    spu.brz(r_cmp, lbl_loop)

    spu.ai(r_sum, r_sum, 10)

    #src = prgm.acquire_register()
    #tmp = prgm.acquire_registers(3)
    #dst = prgm.acquire_registers(2)

    #spu.il(tmp[0], 1)
    #spu.il(tmp[1], 2)
    #spu.il(tmp[2], 3)
    #spu.fma(src, tmp[0], tmp[1], tmp[2])
    #spu.fa(dst[0], src, src)
Exemplo n.º 35
0
  def synthesize(self, code):
    old_code = spu.get_active_code()
    spu.set_active_code(code)
    
    if self.x is None: raise Exception("Please set x")
    if self.result is None: raise Exception("Please set result")

    # exponent
    e = var.Word()
    
    # Working values    
    x = var.Word()
    y = var.Word()
    z = var.Word()

    cmp = var.Bits()
    tmp = var.Word()

    spu.xor(cmp, cmp, cmp)
    spu.xor(tmp, tmp, tmp)    

    # Set the working x
    x.v = self.x

    # Extract the exponent
    # int e = (((*(unsigned int *) &x) >> 23) & 0xff) - 0x7e;
    e.v = x >> self.consts['_23']
    e.v = spu.andi.ex(e, 0xff)
    e.v = spu.ai.ex(e, 0x382) # 0x382 == (- 0x7E) using 10 bits
    # 0b 111 1110

    # Extract the mantissa
    x.v = x & self.consts['M1'] # *(unsigned int*)&x &= 0x807fffff;
    x.v = x | self.consts['M2'] # *(unsigned int*)&x |= 0x3f000000;

    # Normalize
    x1, x2, e1 = y, z, tmp
    
    # if (x < SQRTHF) 
    cmp.v = spu.fcgt.ex(self.consts['SQRTHF'], x)

    # (True) { ... }
    e1.v = spu.ai.ex(e, -1)                  #   e -= 1;
    x1.v = spu.fa.ex(x, x)                   #   x = x + x - 1.0;
    x1.v = spu.fs.ex(x1, self.consts['ONE']) #     ""  ""

    # (False) { ... }
    x2.v = spu.fs.ex(x, self.consts['ONE'])  #   x = x - 1.0;

    # Select the True/False values based on cmp
    e.v = spu.selb.ex(e,  e1, cmp)
    x.v = spu.selb.ex(x2, x1, cmp)

    # Compute polynomial
    z.v = spu.fm.ex(x, x)                      #  z = x * x;
    
    y.v = spu.fms.ex(self.consts['C1'], x,     #  y = (((((((( 7.0376836292E-2 * x  
                     self.consts['C2'])        #	       - 1.1514610310E-1) * x      
    y.v = spu.fma.ex(y, x, self.consts['C3'])  #	     + 1.1676998740E-1) * x        
    y.v = spu.fms.ex(y, x, self.consts['C4'])  #	    - 1.2420140846E-1) * x         
    y.v = spu.fma.ex(y, x, self.consts['C5'])  #	   + 1.4249322787E-1) * x          
    y.v = spu.fms.ex(y, x, self.consts['C6'])  #	  - 1.6668057665E-1) * x           
    y.v = spu.fma.ex(y, x, self.consts['C7'])  #	 + 2.0000714765E-1) * x            
    y.v = spu.fms.ex(y, x, self.consts['C8'])  #	- 2.4999993993E-1) * x             
    y.v = spu.fma.ex(y, x, self.consts['C9'])  #       + 3.3333331174E-1) 
    y.v = spu.fm.ex(y, x)                      #   * x 
    y.v = spu.fm.ex(y, z)                      #   * z;   
    
    y.v = spu.fma.ex(self.consts['C10'], z, y) #  y += -0.5 * z;

    # Convert to log base 2
    z.v = spu.fm.ex( y, self.consts['LOG2EA'])     # z = y * LOG2EA;
    z.v = spu.fma.ex(x, self.consts['LOG2EA'], z)  # z += x * LOG2EA;
    z.v = spu.fa.ex(z, y)                          # z += y;
    z.v = spu.fa.ex(z, x)                          # z += x;
    e.v = spu.csflt.ex(e, 155)                     # z += (float) e;
    z.v = spu.fa.ex(z, e)                          #  ""  ""
    
    spu.ai(self.result, z, 0)       # return z

    spu.set_active_code(old_code)
    return
Exemplo n.º 36
0
import corepy.arch.spu.lib.dma as dma
from corepy.arch.spu.lib.util import load_word

import time

if __name__ == '__main__':
  prgm = env.Program()
  code = prgm.get_stream()
  proc = env.Processor()

  spu.set_active_code(code)

  r_cnt = prgm.acquire_register()
  load_word(code, r_cnt, 0x10000)

  br_loop = code.size()

  spu.ai(r_cnt, r_cnt, -1)
  spu.brnz(r_cnt, br_loop - code.size())

  prgm.add(code)
  prgm.print_code()

  for i in xrange(0, 10000):
    proc.execute(prgm)
    #if i % 25 == 0:
    #  print "sleep"
    #  time.sleep(1)
 
 
Exemplo n.º 37
0
import corepy.arch.spu.isa as spu
import corepy.arch.spu.platform as env
import corepy.arch.spu.lib.dma as dma
from corepy.arch.spu.lib.util import load_word

import time

if __name__ == '__main__':
    prgm = env.Program()
    code = prgm.get_stream()
    proc = env.Processor()

    spu.set_active_code(code)

    r_cnt = prgm.acquire_register()
    load_word(code, r_cnt, 0x10000)

    br_loop = code.size()

    spu.ai(r_cnt, r_cnt, -1)
    spu.brnz(r_cnt, br_loop - code.size())

    prgm.add(code)
    prgm.print_code()

    for i in xrange(0, 10000):
        proc.execute(prgm)
        #if i % 25 == 0:
        #  print "sleep"
        #  time.sleep(1)
Exemplo n.º 38
0
fb = 124

y0 = 120
y1 = 121
y2 = 122

t1 = 119

result = 118

ione = 110
fone = 111

insts = [
    # Create fone = 1.0, fa = 2.0 and fb = 4.0
    spu.ai(ione, 0, 1),
    spu.ai(ia, 0, 2),
    spu.ai(ib, 0, 4),
    spu.cuflt(fone, ione, 155),
    spu.cuflt(fa, ia, 155),
    spu.cuflt(fb, ib, 155),

    # Compute 1/fb
    spu.frest(y0, fb),
    spu.fi(y1, fb, y0),
    spu.fnms(t1, fb, y1, fone),
    spu.fma(y2, t1, y1, y1),
    spu.fm(result, fa, y2)
]

for inst in insts:
Exemplo n.º 39
0
def TestInt():
    code = InstructionStream()
    proc = Processor()

    spu.set_active_code(code)

    r13 = code.acquire_register(reg=13)
    r20 = code.acquire_register(reg=20)
    spu.ai(r20, r20, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)

    spu.stop(0x200D)

    r = proc.execute(code, stop=True)  # , debug = True)

    #print 'int result:', r
    assert (r[0] == 0)
    assert (r[1] == 0x200D)
    return
Exemplo n.º 40
0
def TestSPUIter():
  size = 32
  data = extarray.extarray('I', range(size))
  prgm = env.Program()
  code = prgm.get_stream()

  r_ea_data = prgm.acquire_register()
  r_ls_data = prgm.acquire_register()
  r_size    = prgm.acquire_register()
  r_tag     = prgm.acquire_register()  

  #print 'array ea: %X' % (data.buffer_info()[0])
  #print 'r_zero = %s, ea_data = %s, ls_data = %s, r_size = %s, r_tag = %s' % (
  #  str(code.r_zero), str(r_ea_data), str(r_ls_data), str(r_size), str(r_tag))
  
  # Load the effective address
  util.load_word(code, r_ea_data, data.buffer_info()[0])

  # Load the size
  util.load_word(code, r_size, size * 4)

  # Load the tag
  code.add(spu.ai(r_tag, code.r_zero, 12))

  # Load the lsa
  code.add(spu.ai(r_ls_data, code.r_zero, 0))

  # Load the data into address 0
  dma.mfc_get(code, r_ls_data, r_ea_data, r_size, r_tag)

  # Set the tag bit to 12
  dma.mfc_write_tag_mask(code, 1<<12);

  # Wait for the transfer to complete
  dma.mfc_read_tag_status_all(code);

  # Increment the data values by 1 using an unrolled loop (no branches)
  # r_current = code.acquire_register()
  current = var.SignedWord(0, code)
  
  # Use an SPU iter
  for lsa in syn_iter(code, size * 4, 16):
    code.add(spu.lqx(current, code.r_zero, lsa))
    # code.add(spu.ai(1, r_current, r_current))
    current.v = current + current
    code.add(spu.stqx(current, code.r_zero, lsa))    

  # code.prgm.release_register(r_current)
  #current.release_register(code)
  
  # Store the values back to main memory

  # Load the tag
  code.add(spu.ai(r_tag, code.r_zero, 13))

  # Load the data into address 0
  dma.mfc_put(code, r_ls_data, r_ea_data, r_size, r_tag)

  # Set the tag bit to 12
  dma.mfc_write_tag_mask(code, 1<<13);

  # Wait for the transfer to complete
  dma.mfc_read_tag_status_all(code);

  # Cleanup
  prgm.release_register(r_ea_data)
  prgm.release_register(r_ls_data)  
  prgm.release_register(r_size)
  prgm.release_register(r_tag)  

  # Stop for debugging
  # code.add(spu.stop(0xA))

  # Execute the code
  prgm.add(code)
  proc = env.Processor()
  r = proc.execute(prgm)

  for i in range(0, size):
    assert(data[i] == i + i)

  return
Exemplo n.º 41
0
  import corepy.arch.spu.lib.util as util

  prgm = env.Program()
  code = prgm.get_stream()
  spu.set_active_code(code)

  r_cnt = prgm.acquire_register()
  r_cmp = prgm.acquire_register()
  r_sum = prgm.acquire_register()

  spu.il(r_cnt, 32)
  spu.il(r_sum, 0)
  lbl_loop = prgm.get_unique_label("LOOP")
  code.add(lbl_loop)

  spu.ai(r_sum, r_sum, 1)

  spu.ceqi(r_cmp, r_cnt, 2)
  spu.brz(r_cmp, lbl_loop)

  spu.ai(r_sum, r_sum, 10)

  #src = prgm.acquire_register()
  #tmp = prgm.acquire_registers(3)
  #dst = prgm.acquire_registers(2)

  #spu.il(tmp[0], 1)
  #spu.il(tmp[1], 2)
  #spu.il(tmp[2], 3)
  #spu.fma(src, tmp[0], tmp[1], tmp[2])
  #spu.fa(dst[0], src, src)
Exemplo n.º 42
0
 def copy_register(self, other):
     return self.code.add(spu.ai(self, other, 0))
Exemplo n.º 43
0
def TestDebug():
  prgm = Program()
  code = prgm.get_stream()
  proc = DebugProcessor()

  spu.set_active_code(code)

  ra = code.acquire_register()
  rb = code.acquire_register()
  rc = code.acquire_register()
  rd = code.acquire_register()
  re = code.acquire_register()
  rf = code.acquire_register()
  rg = code.acquire_register()
  rh = code.acquire_register()  

  spu.ai(ra, 0, 14)
  spu.ai(rb, 0, 13)
  spu.ai(rc, 0, 14)
  spu.brnz(14, 3)
  spu.ai(rd, 0, 15)
  spu.ai(re, 0, 16)
  spu.ai(rf, 0, 17)
  spu.ai(rg, 0, 18)
  spu.ai(rh, 0, 19)    
  spu.nop(0)

  spu.stop(0x200A)

  prgm += code
  r = proc.execute(prgm) # , debug = True)

  r = proc.nexti()
  r = proc.nexti()
  r = proc.nexti()
  r = proc.nexti()
    
  while r != None:
    r = proc.nexti()
    if r is not None:
      regs = proc.dump_regs()
      print '******', regs[122:]
    
  assert(r == None)
  print 'int result:', r
  # while True:
  #   pass
  return
Exemplo n.º 44
0
fb = 124

y0 = 120
y1 = 121
y2 = 122

t1 = 119

result  = 118

ione = 110
fone = 111

insts = [
  # Create fone = 1.0, fa = 2.0 and fb = 4.0
  spu.ai(ione, 0, 1),  
  spu.ai(ia, 0, 2),
  spu.ai(ib, 0, 4),
  spu.cuflt(fone, ione, 155),  
  spu.cuflt(fa, ia, 155),
  spu.cuflt(fb, ib, 155),

  # Compute 1/fb
  spu.frest(y0, fb),
  spu.fi(y1, fb, y0),
  spu.fnms(t1, fb, y1, fone),
  spu.fma(y2, t1, y1, y1),

  spu.fm(result, fa, y2)
  ]
Exemplo n.º 45
0
def TestInt():
  prgm = Program()
  code = prgm.get_stream()
  proc = Processor()

  spu.set_active_code(code)
  
  r13 = prgm.acquire_register(reg_name = 13)
  r20 = prgm.acquire_register(reg_name = 20)
  spu.ai(r20, r20, 13)
  spu.ai(r13, r13, 13)
  spu.ai(r13, r13, 13)
  spu.ai(r13, r13, 13)
  spu.ai(r13, r13, 13)
  spu.ai(r13, r13, 13)
  
  spu.stop(0x200D)

  prgm += code
  r = proc.execute(prgm, stop = True) # , debug = True)

  #print 'int result:', r
  assert(r[0] == 0)
  assert(r[1] == 0x200D)
  return
Exemplo n.º 46
0
    prgm = env.Program()
    code = prgm.get_stream()
    reg = prgm.acquire_register()
    foo = prgm.acquire_register(reg_name=5)

    code.add(prgm.get_label("FOO"))
    code.add(spu.il(foo, 0xCAFE))
    code.add(spu.ilhu(reg, 0xDEAD))
    code.add(spu.iohl(reg, 0xBEEF))
    code.add(spu.stqd(reg, code.r_zero, 4))

    lbl_loop = prgm.get_label("LOOP")
    lbl_break = prgm.get_label("BREAK")

    r_cnt = code.gp_return
    r_stop = prgm.acquire_register(reg_name=9)
    r_cmp = prgm.acquire_register()

    code.add(spu.ori(r_cnt, code.r_zero, 0))
    code.add(spu.il(r_stop, 5))

    code.add(lbl_loop)
    code.add(spu.ceq(r_cmp, r_cnt, r_stop))
    code.add(spu.brnz(r_cmp, prgm.get_label("BREAK")))
    code.add(spu.ai(r_cnt, r_cnt, 1))
    code.add(spu.br(prgm.get_label("LOOP")))
    code.add(lbl_break)

    app = SPUApp(code)
    app.MainLoop()
Exemplo n.º 47
0
import corepy.lib.extarray as extarray
import corepy.arch.spu.isa as spu
import corepy.arch.spu.lib.util as util
import corepy.arch.spu.platform as env

prgm = env.Program()
code = prgm.get_stream()
proc = env.Processor()

# Generate substream
# Multiply gp_return by 2, add 1
subcode = prgm.get_stream()
subcode.add(spu.shli(subcode.gp_return, subcode.gp_return, 1))
subcode.add(spu.ai(subcode.gp_return, subcode.gp_return, 1))

# Initialize gp_return, insert code
code.add(spu.il(code.gp_return, 5))
code.add(subcode)

# Add 3, insert again
code.add(spu.ai(code.gp_return, code.gp_return, 3))
code.add(subcode)

#code.print_code()

prgm.add(code)
prgm.print_code() # TODO  - support print prgm instead?

ret = proc.execute(prgm, mode = 'int')
print "ret", ret
Exemplo n.º 48
0
def SimpleSPU():
  """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
  prgm = env.Program()
  code = prgm.get_stream()
  proc = env.Processor()

  spu.set_active_code(code)
  

  # Acquire two registers
  #x    = code.acquire_register()
  x = prgm.gp_return
  test = prgm.acquire_register()

  lbl_brz = prgm.get_label("BRZ")
  lbl_skip = prgm.get_label("SKIP")

  spu.hbrr(lbl_brz, lbl_skip)
  spu.xor(x, x, x) # zero x
  spu.ai(x, x, 11) # x = x + 11
  spu.ai(x, x, 31) # x = x + 31

  spu.ceqi(test, x, 42) # test = (x == 42)

  # If test is false (all 0s), skip the stop(0x100A) instruction
  code.add(lbl_brz)
  spu.brz(test, lbl_skip)
  spu.stop(0x100A)
  code.add(lbl_skip)
  spu.stop(0x100B)

  prgm.add(code) 
  prgm.print_code() 
  r = proc.execute(prgm, mode = 'int', stop = True) 
  print "ret", r
  assert(r[0] == 42)
  assert(r[1] == 0x100A)


  prgm = env.Program()
  code = prgm.get_stream()
  spu.set_active_code(code)

  lbl_loop = prgm.get_label("LOOP")
  lbl_break = prgm.get_label("BREAK")

  r_cnt = prgm.acquire_register()
  r_stop = prgm.acquire_register()
  r_cmp = prgm.acquire_register()
  r_foo = prgm.gp_return

  spu.ori(r_foo, prgm.r_zero, 0)
  spu.ori(r_cnt, prgm.r_zero, 0)
  util.load_word(code, r_stop, 10)

  code.add(lbl_loop)

  spu.ceq(r_cmp, r_cnt, r_stop)
  spu.brnz(r_cmp, lbl_break)
  spu.ai(r_cnt, r_cnt, 1)

  spu.a(r_foo, r_foo, r_cnt)

  spu.br(lbl_loop)
  code.add(lbl_break)

  prgm.add(code)
  prgm.print_code()
  r = proc.execute(prgm, mode = 'int', stop = True)
  print "ret", r
  assert(r[0] == 55)

  return
Exemplo n.º 49
0
  spu.set_active_code(code)
  psmap = extarray.extarray('I', 131072 / 4)
  data = extarray.extarray('I', range(0, 16))

  r_sum = prgm.gp_return
  r_cnt = prgm.acquire_register()

  spu.xor(r_sum, r_sum, r_sum)
  load_word(code, r_cnt, ITERS)

  lbl_loop = prgm.get_label("loop")
  code.add(lbl_loop)

  reg = dma.spu_read_in_mbox(code)

  spu.ai(r_sum, r_sum, 1)
  dma.spu_write_out_intr_mbox(code, r_sum)
  #dma.spu_write_out_mbox(code, reg)

  prgm.release_register(reg)

  spu.ai(r_cnt, r_cnt, -1)
  spu.brnz(r_cnt, lbl_loop)
 
  reg = dma.spu_read_signal1(code)
  spu.ori(code.gp_return, reg, 0)


  spu.il(r_cnt, 0)
  spu.il(r_sum, 16 * 4)
Exemplo n.º 50
0
def TestInt():
  code = InstructionStream()
  proc = Processor()

  spu.set_active_code(code)
  
  r13 = code.acquire_register(reg = 13)
  r20 = code.acquire_register(reg = 20)
  spu.ai(r20, r20, 13)
  spu.ai(r13, r13, 13)
  spu.ai(r13, r13, 13)
  spu.ai(r13, r13, 13)
  spu.ai(r13, r13, 13)
  spu.ai(r13, r13, 13)
  
  spu.stop(0x200D)

  code.print_code()
  r = proc.execute(code) # , debug = True)
  print 'int result:', r
  # while True:
  #   pass
  return