示例#1
0
def set_slot_value(code, reg, slot, value):
  """
  Set the value in reg[slot] with value.  If value is a register, use
  the value from the preferred slot (value[0]).  If value is a
  constant, load it into reg[slot], preserving the values in the other
  slots.
  """
  prgm = code.prgm

  if slot not in [0,1,2,3]:
    raise Exception("Invalid SIMD slot: " + slot)

  mask = prgm.acquire_register()
  vector_from_array(code, mask, [0xFFFFFFFF, 0, 0, 0])

  if not issubclass(type(value), (spe.Register, spe.Variable)):
    r_value = prgm.acquire_register()
    load_word(code, r_value, value)
  else:
    r_value = value

  code.add(spu.rotqbyi(reg, reg, slot * 4))
  code.add(spu.selb(reg, reg, r_value, mask))
  code.add(spu.rotqbyi(reg, reg, (4 - slot) * 4))

  prgm.release_register(mask)
  if not issubclass(type(value), (spe.Register, spe.Variable)):
    prgm.release_register(r_value)
  return
示例#2
0
文件: util.py 项目: forrestv/pyable
def set_slot_value(code, reg, slot, value):
    """
  Set the value in reg[slot] with value.  If value is a register, use
  the value from the preferred slot (value[0]).  If value is a
  constant, load it into reg[slot], preserving the values in the other
  slots.
  """
    prgm = code.prgm

    if slot not in [0, 1, 2, 3]:
        raise Exception("Invalid SIMD slot: " + slot)

    mask = prgm.acquire_register()
    vector_from_array(code, mask, [0xFFFFFFFF, 0, 0, 0])

    if not issubclass(type(value), (spe.Register, spe.Variable)):
        r_value = prgm.acquire_register()
        load_word(code, r_value, value)
    else:
        r_value = value

    code.add(spu.rotqbyi(reg, reg, slot * 4))
    code.add(spu.selb(reg, reg, r_value, mask))
    code.add(spu.rotqbyi(reg, reg, (4 - slot) * 4))

    prgm.release_register(mask)
    if not issubclass(type(value), (spe.Register, spe.Variable)):
        prgm.release_register(r_value)
    return
示例#3
0
    def block(self):
        code = spu.get_active_code()
        self._block_idx = len(code)

        # --> add the branch instruction (use brz (?) to always branch, nop to never branch)
        code[self._branch_idx] = spu.nop(0, ignore_active=True)
        # code[self._branch_idx] = spu.brnz(self._cmp, self._block_idx - self._branch_idx, ignore_active = True)
        # code[self._branch_idx] = spu.brz(self._cmp, self._block_idx - self._branch_idx, ignore_active = True)

        # Pack result into vector
        #   [x][y][score][--]

        # Zero the save value
        spu.xor(self._save_value, self._save_value, self._save_value)

        # Copy the score
        spu.selb(self._save_value, self._save_value, self._score,
                 self._word_mask)
        spu.rotqbyi(self._save_value, self._save_value, 12)

        # Copy the y value
        spu.selb(self._save_value, self._save_value, self._y_off,
                 self._word_mask)
        spu.rotqbyi(self._save_value, self._save_value, 12)

        # Copy the x value
        spu.selb(self._save_value, self._save_value, self._x_off,
                 self._word_mask)

        # Save value to local store
        spu.stqx(self._save_value, self._count, self._md_results.r_addr)

        self._count.v = self._count.v + 16

        # --> MemorySave test
        cmp = self._save_value  # reuse the save register
        spu.ceq.ex(cmp, self._count, self._md_results.r_size)

        if self._save_op is not None:
            self._save_op.test(cmp, self._count)

        # Just reset for now
        spu.selb(self._count, self._count, 0, cmp)

        # Return to the loop
        idx = len(code)
        spu.br(-(idx - self._branch_idx - 1))

        return
示例#4
0
  def block(self):
    code = spu.get_active_code()
    self._block_idx = len(code)

    # --> add the branch instruction (use brz (?) to always branch, nop to never branch)
    code[self._branch_idx] = spu.nop(0, ignore_active = True)
    # code[self._branch_idx] = spu.brnz(self._cmp, self._block_idx - self._branch_idx, ignore_active = True)
    # code[self._branch_idx] = spu.brz(self._cmp, self._block_idx - self._branch_idx, ignore_active = True)

    # Pack result into vector
    #   [x][y][score][--]

    # Zero the save value
    spu.xor(self._save_value, self._save_value, self._save_value)

    # Copy the score
    spu.selb(self._save_value, self._save_value, self._score, self._word_mask)    
    spu.rotqbyi(self._save_value, self._save_value, 12)

    # Copy the y value
    spu.selb(self._save_value, self._save_value, self._y_off, self._word_mask)
    spu.rotqbyi(self._save_value, self._save_value, 12)        

    # Copy the x value
    spu.selb(self._save_value, self._save_value, self._x_off, self._word_mask)
    
    # Save value to local store
    spu.stqx(self._save_value, self._count, self._md_results.r_addr)
    
    self._count.v = self._count.v + 16

    # --> MemorySave test
    cmp = self._save_value # reuse the save register
    spu.ceq.ex(cmp, self._count, self._md_results.r_size)

    if self._save_op is not None:
      self._save_op.test(cmp, self._count)
      
    # Just reset for now
    spu.selb(self._count, self._count, 0, cmp)

    # Return to the loop
    idx = len(code)
    spu.br(- (idx - self._branch_idx - 1))
    
    return
示例#5
0
  def _transfer_data(self, code, kernel, lsa, tag):
    """
    Load the data into the SPU memory
    """

    # Check the types
    if not isinstance(code, spe.InstructionStream):
      raise Exception('Code must be an InstructionStream')
    if not (isinstance(lsa, int) or issubclass(type(lsa), (spe.Register, spe.Variable))):
      raise Exception('lsa must be an integer, Register, or Variable')
    
    old_code = spu.get_active_code()
    spu.set_active_code(code)

    # Acquire registers for address and size, if they were not supplied by the user
    if self.r_addr is None: r_ea_data = code.prgm.acquire_register()
    else:                   r_ea_data = self.r_addr
      
    if self.r_size is None: r_size = code.prgm.acquire_register()
    else:                   r_size = self.r_size

    # Create variables 
    ea_addr      = var.SignedWord(reg = r_ea_data)
    aligned_size = var.SignedWord(0)
    mod_16       = var.SignedWord(0xF)

    # Initialize the lsa_addr variable. 
    if isinstance(lsa, int):
      # From a constant
      ls_addr   = var.SignedWord(lsa)
    elif issubclass(type(lsa), (spe.Register, spe.Variable)):
      # From a variable
      ls_addr   = var.SignedWord()      
      ls_addr.v = lsa
      
      
    tag_var = var.SignedWord(tag)
    cmp = var.SignedWord(0)

    # Load the effective address
    if self.r_addr is None:
      if self.addr % 16 != 0:
        print '[get_memory] Misaligned data'

      util.load_word(code, ea_addr, self.addr)

    # Load the size, rounding up as required to be 16-byte aligned
    if self.r_size is None:
      rnd_size = self.size * var.INT_SIZES[self.typecode]
      if rnd_size < 16:
        rnd_size = 16
      elif (rnd_size % 16) != 0:
        rnd_size += (16 - (rnd_size % 16))
      util.load_word(code, aligned_size, rnd_size)
    else:
      # TODO: !!! UNIT TEST THIS !!!
      # Same as above, but using SPU arithemtic to round
      size  = var.SignedWord(reg = r_size)
      sixteen  = var.SignedWord(16)
      cmp.v = ((size & mod_16) == size)
      aligned_size.v = size + (sixteen - (size & mod_16))
      spu.selb(aligned_size.reg, size.reg, aligned_size.reg, cmp.reg, order = _mi(spu.selb))
      code.release_register(sixteen.reg)

    # Use an auxillary register for the moving ea value if the
    # caller supplied the address register
    if self.r_addr is not None:
      ea_load   = var.SignedWord(0)
      ea_load.v = ea_addr
    else:
      ea_load = ea_addr # note that this is reference, not .v assignment

    # Transfer parameters
    buffer_size   = var.SignedWord(16384)
    remaining     = var.SignedWord(0)
    transfer_size = var.SignedWord(0)
    remaining.v   = aligned_size

    # Set up the iterators to transfer at most 16k at a time
    xfer_iter = syn_iter(code, 0, 16384)
    xfer_iter.set_stop_reg(aligned_size.reg)

    for offset in xfer_iter:
      cmp.v = buffer_size > remaining
      spu.selb(transfer_size, buffer_size, remaining, cmp)

      # Transfer the data
      kernel(code, ls_addr, ea_load, transfer_size, tag_var)
      ls_addr.v = ls_addr + buffer_size
      ea_load.v = ea_load + buffer_size

      remaining.v = remaining - buffer_size

    # Set the tag bit to tag
    dma.mfc_write_tag_mask(code, 1<<tag);

    # Wait for the transfer to complete
    dma.mfc_read_tag_status_all(code);

    # Release the registers
    code.release_register(buffer_size.reg)
    code.release_register(remaining.reg)
    code.release_register(aligned_size.reg)    
    code.release_register(transfer_size.reg)
    code.release_register(cmp.reg)
    code.release_register(ls_addr.reg)
    code.release_register(tag_var.reg)
    code.release_register(ea_load.reg)

    if old_code is not None:
      spu.set_active_code(old_code)
    return