Python ai示例，corepy.arch.spu.isa.ai Python示例

示例#1

0

显示文件

    def synthesize(self, code):
        old_code = spu.get_active_code()
        spu.set_active_code(code)

        # Reserve two variable registers
        count = code.acquire_register()
        result = code.acquire_register()

        # 'Load' the input vector x from register 5
        x = code.acquire_register()
        spu.ai(x, 5, 0)

        # Zero count and result
        spu.xor(count, count, count)
        spu.xor(result, result, result)

        # Inline the popc and reduce operations
        self.popc(count, x)
        self.reduce_word(result, count)

        # Send the result to the caller
        spu.wrch(result, dma.SPU_WrOutMbox)

        code.release_register(x)
        spu.set_active_code(old_code)
        return

示例#2

0

显示文件

文件： spu_lyapunov_png.py 项目： KapilRijhwani/corepy

  def synthesize(self, code):
    """
    Render a vector with 4 pixels.
    """
    old_code = spu.get_active_code()
    spu.set_active_code(code)

    if self.x_offset is None: raise Exception('Please call setup')
    if self.result is None: raise Exception('Please set result')
    if self.one is None: raise Exception('Please set one')

    # Make the part of the result positive and subtract 1
    # to transform (-1,-oo) into (0,oo)
    self.result.v = spu.fs.ex(0, self.result)
    self.result.v = spu.fs.ex(self.result, self.one)

    # Convert the result to an unsigned int, scaling by 2^4 to put 
    # values between 0 and 16 in the gradient.  Values outside [0,16] 
    # are 0 or FF
    self.result.v = spu.cfltu.ex(self.result, 169) # 173 - 169 == 4
    # self.result.v = spu.sfi.ex(self.result, 255) # 173 - 169 == 4

    # Extract the first two bytes from the result into the RGB positions
    # and set alpha to 0xFF
    self.result.v = spu.shufb.ex(self.result, self.ff, self.uint2rgba)

    # Save the result and increment the offset
    spu.stqd(self.result, self.x_offset, self.lsa >> 4)
    spu.ai(self.x_offset, self.x_offset, 16)

    spu.set_active_code(old_code)
    return

示例#3

0

显示文件

文件： popc.py 项目： KapilRijhwani/corepy

  def synthesize(self, code):
    old_code = spu.get_active_code()
    spu.set_active_code(code)

    # Reserve two variable registers
    count  = code.acquire_register()
    result = code.acquire_register()
    
    # 'Load' the input vector x from register 5
    x = code.acquire_register() 
    spu.ai(x, 5, 0)

    # Zero count and result
    spu.xor(count, count, count)
    spu.xor(result, result, result)
    
    # Inline the popc and reduce operations
    self.popc(count, x)
    self.reduce_word(result, count)

    # Send the result to the caller
    spu.wrch(result, dma.SPU_WrOutMbox)    

    code.release_register(x)
    spu.set_active_code(old_code)
    return

示例#4

0

显示文件

    def synthesize(self, code):
        """
    Render a vector with 4 pixels.
    """
        old_code = spu.get_active_code()
        spu.set_active_code(code)

        if self.x_offset is None: raise Exception('Please call setup')
        if self.result is None: raise Exception('Please set result')
        if self.one is None: raise Exception('Please set one')

        # Make the part of the result positive and subtract 1
        # to transform (-1,-oo) into (0,oo)
        self.result.v = spu.fs.ex(0, self.result)
        self.result.v = spu.fs.ex(self.result, self.one)

        # Convert the result to an unsigned int, scaling by 2^4 to put
        # values between 0 and 16 in the gradient.  Values outside [0,16]
        # are 0 or FF
        self.result.v = spu.cfltu.ex(self.result, 169)  # 173 - 169 == 4
        # self.result.v = spu.sfi.ex(self.result, 255) # 173 - 169 == 4

        # Extract the first two bytes from the result into the RGB positions
        # and set alpha to 0xFF
        self.result.v = spu.shufb.ex(self.result, self.ff, self.uint2rgba)

        # Save the result and increment the offset
        spu.stqd(self.result, self.x_offset, self.lsa >> 4)
        spu.ai(self.x_offset, self.x_offset, 16)

        spu.set_active_code(old_code)
        return

示例#5

0

显示文件

def fdiv(code, d, x, y, one = None):
  """
  Single-precision floating point division for x / y
  """
  Y = code.acquire_registers(3)
  t = code.acquire_register()
  regs = Y[:]
  regs.append(t)
  
  if one is None:
    one = code.acquire_register()
    spu.xor(one, one, one)
    spu.ai(one, one, 1)
    spu.cuflt(one, one, 155)
    regs.append(one)
    
  # Compute 1/y (from SPU ISA 1.1, p208, Normal case)
  spu.frest(Y[0], y)
  spu.fi(Y[1], y, Y[0])
  spu.fnms(t, y, Y[1], one)
  spu.fma(Y[2], t, Y[1], Y[1])

  # Compute x * (1/y)
  spu.fm(d, x, Y[2])
  
  code.release_registers(regs)
    
  return

示例#6

0

显示文件

    def save_register(self, reg):  # , branch_to_save = False):
        code = spu.get_active_code()

        offset = code.acquire_register()
        size = code.acquire_register()
        test = code.acquire_register()
        regs = [offset, size, test]

        spu.rotqbyi(offset, self.ls_buffer, 4)
        spu.rotqbyi(size, self.ls_buffer, 8)

        spu.stqx(reg, self.ls_buffer, offset)

        spu.ai(offset, offset, 16)
        spu.ceq(test, offset, size)

        spu.wrch(size, dma.SPU_WrOutMbox)
        spu.wrch(offset, dma.SPU_WrOutMbox)
        spu.wrch(test, dma.SPU_WrOutMbox)
        # !!! STOPPED HERE !!! THESE VALUES ARE WRONG !!!
        lbl_ls_full = code.size()
        spu.stop(0xB)
        self.save_ls_buffer(ls_size=size)

        spu.nop(0)
        code[lbl_ls_full] = spu.brz(test, (code.size() - lbl_ls_full),
                                    ignore_active=True)

        code.release_registers(regs)
        return

示例#7

0

显示文件

文件： save_buffer.py 项目： KapilRijhwani/corepy

  def save_register(self, reg): # , branch_to_save = False):
    code = spu.get_active_code()

    offset = code.acquire_register()
    size = code.acquire_register()
    test = code.acquire_register()
    regs = [offset, size, test]
    
    spu.rotqbyi(offset, self.ls_buffer, 4)
    spu.rotqbyi(size,   self.ls_buffer, 8)

    spu.stqx(reg, self.ls_buffer, offset)
    
    spu.ai(offset, offset, 16)
    spu.ceq(test,  offset, size)

    spu.wrch(size, dma.SPU_WrOutMbox)
    spu.wrch(offset, dma.SPU_WrOutMbox)
    spu.wrch(test, dma.SPU_WrOutMbox)
    # !!! STOPPED HERE !!! THESE VALUES ARE WRONG !!!
    lbl_ls_full = code.size()
    spu.stop(0xB)
    self.save_ls_buffer(ls_size = size)

    spu.nop(0)
    code[lbl_ls_full] = spu.brz(test, (code.size() - lbl_ls_full), ignore_active = True)

    code.release_registers(regs)
    return

示例#8

0

显示文件

  def synthesize_constants(self, code):
    old_code = spu.get_active_code()
    spu.set_active_code(code)
    
    self._one = code.acquire_register()
    spu.xor(self._one, self._one, self._one)
    spu.ai(self._one, self._one, 1)
    spu.cuflt(self._one, self._one, 155)
    
    if old_code is not None:
      spu.set_active_code(old_code)

    return

示例#9

0

显示文件

 def cleanup(self):
   """Do end-of-loop iterator code"""
   # Update the current count
   if self.mode == DEC:
     if self.r_step is not None:
       self.code.add(spu.sf(self.r_count, self.r_step, self.r_count))
     else:
       self.code.add(spu.ai( self.r_count, self.r_count, -self.step_size()))
   elif self.mode == INC:
     if self.r_step is not None:
       self.code.add(spu.a(self.r_count, self.r_count, self.r_step))
     else:
       self.code.add(spu.ai(self.r_count, self.r_count, self.step_size()))
     
   return

示例#10

0

显示文件

文件： iterators.py 项目： microwave89-hv/efi

    def start(self, align=True, branch=True):
        """Do pre-loop iteration initialization"""
        if self.r_count is None:
            self.r_count = self.code.acquire_register()

        if self.mode == DEC:
            if self._external_start:
                self.code.add(spu.ai(self.r_count, self.r_start, 0))
            else:
                util.load_word(self.code, self.r_count, self.get_count())

        elif self.mode == INC:
            if self.r_stop is None and branch:
                self.r_stop = self.code.acquire_register()

            if self._external_start:
                self.code.add(spu.ai(self.r_count, self.r_start, 0))
            else:
                util.load_word(self.code, self.r_count, self.get_start())

            if branch and not self._external_stop:
                util.load_word(self.code, self.r_stop, self.get_count())

        # /end mode if

        if self.r_count is not None:
            self.current_count = var.SignedWord(code=self.code,
                                                reg=self.r_count)

        # If the step size doesn't fit in an immediate value, store it in a register
        # (-512 < word < 511):
        if not (-512 < self.step_size() < 511):
            self.r_step = self.code.acquire_register()
            util.load_word(self.code, self.r_step, self.step_size())

        # Label
        self.start_label = self.code.get_label("SYN_ITER_START_%d" %
                                               random.randint(0, 2**32))
        self.code.add(self.start_label)

        # Create continue/branch labels so they can be referenced; they will be
        # added to the code in their appropriate locations.
        self.branch_label = self.code.get_label("SYN_ITER_BRANCH_%d" %
                                                random.randint(0, 2**32))
        self.continue_label = self.code.get_label("SYN_ITER_CONTINUE_%d" %
                                                  random.randint(0, 2**32))
        return

示例#11

0

显示文件

文件： spu_basics.py 项目： maxim-tyutyunnikov/corepy

def SimpleSPU():
    """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
    prgm = env.Program()
    code = prgm.get_stream()
    proc = env.Processor()

    spu.set_active_code(code)

    # Acquire two registers
    #x    = code.acquire_register()
    x = code.gp_return
    test = prgm.acquire_register(reg_name=55)

    spu.xor(x, x, x)  # zero x
    spu.ai(x, x, 11)  # x = x + 11
    spu.ai(x, x, 31)  # x = x + 31

    spu.ceqi(test, x, 42)  # test = (x == 42)

    # If test is false (all 0s), skip the stop(0x100A) instruction
    spu.brz(test, 2)
    spu.stop(0x100A)
    spu.stop(0x100B)

    prgm.add(code)
    prgm.print_code(hex=True)
    r = proc.execute(prgm, mode='int', stop=True, debug=True)
    assert (r[0] == 42)
    assert (r[1] == 0x100A)

    prgm = env.Program()
    code = prgm.get_stream()
    spu.set_active_code(code)

    util.load_float(code, code.fp_return, 3.14)

    prgm.add(code)
    prgm.print_code(hex=True)
    r = proc.execute(prgm, mode='fp')
    print r
    return

示例#12

0

显示文件

文件： spu_basics.py 项目： KapilRijhwani/corepy

def SimpleSPU():
  """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
  prgm = env.Program()
  code = prgm.get_stream()
  proc = env.Processor()

  spu.set_active_code(code)

  # Acquire two registers
  #x    = code.acquire_register()
  x = code.gp_return
  test = prgm.acquire_register(reg_name = 55)

  spu.xor(x, x, x) # zero x
  spu.ai(x, x, 11) # x = x + 11
  spu.ai(x, x, 31) # x = x + 31

  spu.ceqi(test, x, 42) # test = (x == 42)

  # If test is false (all 0s), skip the stop(0x100A) instruction
  spu.brz(test, 2)
  spu.stop(0x100A)
  spu.stop(0x100B)

  prgm.add(code) 
  prgm.print_code(hex = True) 
  r = proc.execute(prgm, mode = 'int', stop = True, debug = True) 
  assert(r[0] == 42)
  assert(r[1] == 0x100A)

  prgm = env.Program()
  code = prgm.get_stream()
  spu.set_active_code(code)

  util.load_float(code, code.fp_return, 3.14)

  prgm.add(code)
  prgm.print_code(hex = True)
  r = proc.execute(prgm, mode = 'fp')
  print r
  return

示例#13

0

显示文件

文件： spre_dummy_spu.py 项目： microwave89-hv/efi

def TestParallel():
    # Run this with a stop instruction and examine the registers and memory
    code = ParallelInstructionStream()
    proc = Processor()

    code.raw_data_size = 128 * 8

    r = code.acquire_register()
    code.add(spu.ai(r, r, 0xCAFE))
    code.add(spu.ai(r, r, 0xBABE))
    code.add(spu.stop(0x2000))

    r = proc.execute(code, mode='async', n_spus=6)

    for speid in r:
        proc.join(speid)

    assert (True)
    return

示例#14

0

显示文件

文件： spre_dummy_spu.py 项目： KapilRijhwani/corepy

def TestParallel():
  # Run this with a stop instruction and examine the registers and memory
  code = ParallelInstructionStream()
  proc = Processor()

  code.raw_data_size = 128*8

  r = code.acquire_register()
  code.add(spu.ai(r, r, 0xCAFE))
  code.add(spu.ai(r, r, 0xBABE))    
  code.add(spu.stop(0x2000))

  r = proc.execute(code, mode='async', n_spus = 6)

  for speid in r:
    proc.join(speid)

  assert(True)
  return

示例#15

0

显示文件

文件： iterators.py 项目： KapilRijhwani/corepy

  def start(self, align = True, branch = True):
    """Do pre-loop iteration initialization"""
    if self.r_count is None:
      self.r_count = self.code.prgm.acquire_register()
      
    if self.mode == DEC:
      if self._external_start:
        self.code.add(spu.ai(self.r_count, self.r_start, 0))
      else:
        util.load_word(self.code, self.r_count, self.get_count())

    elif self.mode == INC:
      if self.r_stop is None and branch:
        self.r_stop = self.code.prgm.acquire_register()

      if self._external_start:
        self.code.add(spu.ai(self.r_count, self.r_start, 0))
      else:
        util.load_word(self.code, self.r_count, self.get_start())

      if branch and not self._external_stop:
        util.load_word(self.code, self.r_stop, self.get_count())

    # /end mode if
    
    if self.r_count is not None:
      self.current_count = var.SignedWord(code = self.code, reg = self.r_count)

    # If the step size doesn't fit in an immediate value, store it in a register
    # (-512 < word < 511):
    if not (-512 < self.step_size() < 511):
      self.r_step = self.code.prgm.acquire_register()
      util.load_word(self.code, self.r_step, self.step_size())

    # Label
    self.start_label = self.code.prgm.get_unique_label("SYN_ITER_START")
    self.code.add(self.start_label)

    # Create continue/branch labels so they can be referenced; they will be
    # added to the code in their appropriate locations.
    self.branch_label = self.code.prgm.get_unique_label("SYN_ITER_BRANCH")
    self.continue_label = self.code.prgm.get_unique_label("SYN_ITER_CONTINUE")
    return

示例#16

0

显示文件

文件： spre_linux_spu.py 项目： KapilRijhwani/corepy

def TestParallel():
  # Run this with a stop instruction and examine the registers and memory
  prgm = ParallelProgram()
  code = prgm.get_stream()
  proc = Processor()

  code.raw_data_size = 128*8

  r = prgm.acquire_register()
  code.add(spu.ai(r, r, 0x2FE))
  code.add(spu.ai(r, r, 0x2BE))    
  code.add(spu.stop(0x1FFF))

  prgm += code
  r = proc.execute(prgm, async = True, mode='void', n_spus = 6)

  for speid in r:
    proc.join(speid)

  assert(True)
  return

示例#17

0

显示文件

def TestParallel():
    # Run this with a stop instruction and examine the registers and memory
    prgm = ParallelProgram()
    code = prgm.get_stream()
    proc = Processor()

    code.raw_data_size = 128 * 8

    r = prgm.acquire_register()
    code.add(spu.ai(r, r, 0x2FE))
    code.add(spu.ai(r, r, 0x2BE))
    code.add(spu.stop(0x1FFF))

    prgm += code
    r = proc.execute(prgm, async=True, mode='void', n_spus=6)

    for speid in r:
        proc.join(speid)

    assert (True)
    return

示例#18

0

显示文件

文件： spre_dummy_spu.py 项目： microwave89-hv/efi

def copy_param(code, target, source):
    """
  Copy a parameter from source reg to preferred slot in the target reg.
  For params in slot 0, this is just and add immediate.
  For params in other slots, the source is rotated.
  Note that other values in the source are copied, too.
  """
    if source[SLOT] != 0:
        code.add(spu.rotqbyi(target, source[REG], source[SLOT] * 4))
    else:
        code.add(spu.ai(target, source[REG], 0))
    return

示例#19

0

显示文件

文件： spre_linux_spu.py 项目： KapilRijhwani/corepy

def copy_param(code, target, source):
  """
  Copy a parameter from source reg to preferred slot in the target reg.
  For params in slot 0, this is just and add immediate.
  For params in other slots, the source is rotated.
  Note that other values in the source are copied, too.
  """
  if source[SLOT] != 0:
    code.add(spu.rotqbyi(target, source[REG], source[SLOT] * 4))
  else:
    code.add(spu.ai(target, source[REG], 0))
  return

示例#20

0

显示文件

def load_word(code, r_target, word, clear=False, zero=True):
    """If r0 is not set to 0, the zero parameter should be set to False"""

    if zero and (-512 < word < 511):
        code.add(spu.ai(r_target, code.r_zero, word))
    elif (word & 0x7FFF) == word:
        code.add(spu.il(r_target, word))
    elif (word & 0x3FFFF) == word:
        code.add(spu.ila(r_target, word))
    else:
        code.add(spu.ilhu(r_target, (word & 0xFFFF0000) >> 16))
        code.add(spu.iohl(r_target, (word & 0xFFFF)))

    if clear:
        code.add(spu.shlqbyi(r_target, r_target, 12))
    return

示例#21

0

显示文件

  def end(self, branch = True):
    """Do post-loop iterator code"""
    if self.hint == True:
      self.code.add(spu.hbrr(self.branch_label, self.start_label))

    if self.mode == DEC:
      # branch if r_count is not zero (CR)
      #   Note that this relies on someone (e.g. cleanup()) setting the
      #   condition register properly.
      if branch:
        self.code.add(self.branch_label)
        self.code.add(spu.brnz(self.r_count, self.start_label))

      # Reset the counter in case this is a nested loop
      util.load_word(self.code, self.r_count, self.get_count())

    elif self.mode == INC:
      # branch if r_current < r_stop
      if branch:
        r_cmp_gt = self.code.prgm.acquire_register()

        self.code.add(spu.cgt(r_cmp_gt, self.r_stop, self.r_count))
        self.code.add(self.branch_label)
        self.code.add(spu.brnz(r_cmp_gt, self.start_label))

        self.code.prgm.release_register(r_cmp_gt)        

      # Reset the the current value in case this is a nested loop
      if self._external_start:
        self.code.add(spu.ai(self.r_count, self.r_start, 0))
      else:
        util.load_word(self.code, self.r_count, self.get_start())

    if self.r_count is not None:
      self.code.prgm.release_register(self.r_count)
    if self.r_stop is not None and not self._external_stop:
      self.code.prgm.release_register(self.r_stop)      

    return

示例#22

0

显示文件

def TestMFC():
    size = 32
    #data_array = array.array('I', range(size))
    #data = synspu.aligned_memory(size, typecode = 'I')
    #data.copy_to(data_array.buffer_info()[0], len(data_array))
    data = extarray.extarray('I', range(size))
    code = synspu.InstructionStream()

    r_zero = code.acquire_register()
    r_ea_data = code.acquire_register()
    r_ls_data = code.acquire_register()
    r_size = code.acquire_register()
    r_tag = code.acquire_register()

    # Load zero
    util.load_word(code, r_zero, 0)

    print 'array ea: %X' % (data.buffer_info()[0])
    print 'r_zero = %s, ea_data = %s, ls_data = %s, r_size = %s, r_tag = %s' % (
        str(r_zero), str(r_ea_data), str(r_ls_data), str(r_size), str(r_tag))

    # Load the effective address
    print 'test ea: %X' % data.buffer_info()[0]
    util.load_word(code, r_ea_data, data.buffer_info()[0])

    # Load the size
    code.add(spu.ai(r_size, r_zero, size * 4))

    # Load the tag
    code.add(spu.ai(r_tag, r_zero, 2))

    # Load the lsa
    code.add(spu.ai(r_ls_data, r_zero, 0))

    # Load the data into address 0
    mfc_get(code, r_ls_data, r_ea_data, r_size, r_tag)

    # Set the tag bit to 2
    mfc_write_tag_mask(code, 1 << 2)

    # Wait for the transfer to complete
    mfc_read_tag_status_all(code)

    # Increment the data values by 1 using an unrolled loop (no branches)
    r_current = code.acquire_register()

    for lsa in range(0, size * 4, 16):
        code.add(spu.lqa(r_current, (lsa >> 2)))
        code.add(spu.ai(r_current, r_current, 1))
        code.add(spu.stqa(r_current, (lsa >> 2)))

    code.release_register(r_current)

    # Store the values back to main memory

    # Load the data into address 0
    mfc_put(code, r_ls_data, r_ea_data, r_size, r_tag)

    # Set the tag bit to 2
    mfc_write_tag_mask(code, 1 << 2)

    # Wait for the transfer to complete
    mfc_read_tag_status_all(code)

    # Cleanup
    code.release_register(r_zero)
    code.release_register(r_ea_data)
    code.release_register(r_ls_data)
    code.release_register(r_size)
    code.release_register(r_tag)

    # Stop for debugging
    # code.add(spu.stop(0xA))

    # Execute the code
    proc = synspu.Processor()
    # code.print_code()
    #print data_array
    proc.execute(code)

    #data.copy_from(data_array.buffer_info()[0], len(data_array))

    for i in range(size):
        assert (data[i] == i + 1)

    return

示例#23

0

显示文件

文件： spre_dummy_spu.py 项目： microwave89-hv/efi

def TestInt():
    code = InstructionStream()
    proc = Processor()

    spu.set_active_code(code)

    r13 = code.acquire_register(reg=13)
    r20 = code.acquire_register(reg=20)
    spu.ai(r20, r20, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)

    spu.stop(0x200D)

    code.print_code()
    r = proc.execute(code)  # , debug = True)
    print 'int result:', r
    # while True:
    #   pass
    return

示例#24

0

显示文件

文件： spu_labels.py 项目： tmaone/efi

def SimpleSPU():
    """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
    code = InstructionStream()
    proc = Processor()

    spu.set_active_code(code)

    # Acquire two registers
    #x    = code.acquire_register()
    x = code.gp_return
    test = code.acquire_register()

    lbl_brz = code.get_label("BRZ")
    lbl_skip = code.get_label("SKIP")

    spu.hbrr(lbl_brz, lbl_skip)
    spu.xor(x, x, x)  # zero x
    spu.ai(x, x, 11)  # x = x + 11
    spu.ai(x, x, 31)  # x = x + 31

    spu.ceqi(test, x, 42)  # test = (x == 42)

    # If test is false (all 0s), skip the stop(0x100A) instruction
    code.add(lbl_brz)
    spu.brz(test, lbl_skip)
    spu.stop(0x100A)
    code.add(lbl_skip)
    spu.stop(0x100B)

    code.print_code(hex=True, pro=True, epi=True)
    r = proc.execute(code, mode='int', stop=True)
    print "ret", r
    assert (r[0] == 42)
    assert (r[1] == 0x100A)

    code = InstructionStream()
    spu.set_active_code(code)

    lbl_loop = code.get_label("LOOP")
    lbl_break = code.get_label("BREAK")

    r_cnt = code.acquire_register()
    r_stop = code.acquire_register()
    r_cmp = code.acquire_register()
    r_foo = code.gp_return

    spu.ori(r_foo, code.r_zero, 0)
    spu.ori(r_cnt, code.r_zero, 0)
    util.load_word(code, r_stop, 10)

    code.add(lbl_loop)

    spu.ceq(r_cmp, r_cnt, r_stop)
    spu.brnz(r_cmp, lbl_break)
    spu.ai(r_cnt, r_cnt, 1)

    spu.a(r_foo, r_foo, r_cnt)

    spu.br(lbl_loop)
    code.add(lbl_break)

    code.print_code()
    r = proc.execute(code, mode='int', stop=True)
    print "ret", r
    assert (r[0] == 55)

    return

示例#25

0

显示文件

文件： spu_comp.py 项目： maxim-tyutyunnikov/corepy

import corepy.lib.extarray as extarray
import corepy.arch.spu.isa as spu
import corepy.arch.spu.lib.util as util
import corepy.arch.spu.platform as env

prgm = env.Program()
code = prgm.get_stream()
proc = env.Processor()

# Generate substream
# Multiply gp_return by 2, add 1
subcode = prgm.get_stream()
subcode.add(spu.shli(subcode.gp_return, subcode.gp_return, 1))
subcode.add(spu.ai(subcode.gp_return, subcode.gp_return, 1))

# Initialize gp_return, insert code
code.add(spu.il(code.gp_return, 5))
code.add(subcode)

# Add 3, insert again
code.add(spu.ai(code.gp_return, code.gp_return, 3))
code.add(subcode)

#code.print_code()

prgm.add(code)
prgm.print_code()  # TODO  - support print prgm instead?

ret = proc.execute(prgm, mode='int')
print "ret", ret

示例#26

0

显示文件

文件： spu_types.py 项目： KapilRijhwani/corepy

 def copy_register(self, other):
     return self.code.add(spu.ai(self, other, 0))

示例#27

0

显示文件

文件： ispugui.py 项目： KapilRijhwani/corepy

  code = prgm.get_stream()
  reg = prgm.acquire_register()
  foo = prgm.acquire_register(reg_name = 5)

  code.add(prgm.get_label("FOO"))
  code.add(spu.il(foo, 0xCAFE))
  code.add(spu.ilhu(reg, 0xDEAD))
  code.add(spu.iohl(reg, 0xBEEF))
  code.add(spu.stqd(reg, code.r_zero, 4))

  lbl_loop = prgm.get_label("LOOP")
  lbl_break = prgm.get_label("BREAK")

  r_cnt = code.gp_return
  r_stop = prgm.acquire_register(reg_name = 9)
  r_cmp = prgm.acquire_register()

  code.add(spu.ori(r_cnt, code.r_zero, 0))
  code.add(spu.il(r_stop, 5))

  code.add(lbl_loop)
  code.add(spu.ceq(r_cmp, r_cnt, r_stop))
  code.add(spu.brnz(r_cmp, prgm.get_label("BREAK")))
  code.add(spu.ai(r_cnt, r_cnt, 1))
  code.add(spu.br(prgm.get_label("LOOP")))
  code.add(lbl_break)

  app = SPUApp(code)
  app.MainLoop()

示例#28

0

显示文件

文件： spu_log.py 项目： maxim-tyutyunnikov/corepy

    def synthesize(self, code):
        old_code = spu.get_active_code()
        spu.set_active_code(code)

        if self.x is None: raise Exception("Please set x")
        if self.result is None: raise Exception("Please set result")

        # exponent
        e = var.Word()

        # Working values
        x = var.Word()
        y = var.Word()
        z = var.Word()

        cmp = var.Bits()
        tmp = var.Word()

        spu.xor(cmp, cmp, cmp)
        spu.xor(tmp, tmp, tmp)

        # Set the working x
        x.v = self.x

        # Extract the exponent
        # int e = (((*(unsigned int *) &x) >> 23) & 0xff) - 0x7e;
        e.v = x >> self.consts['_23']
        e.v = spu.andi.ex(e, 0xff)
        e.v = spu.ai.ex(e, 0x382)  # 0x382 == (- 0x7E) using 10 bits
        # 0b 111 1110

        # Extract the mantissa
        x.v = x & self.consts['M1']  # *(unsigned int*)&x &= 0x807fffff;
        x.v = x | self.consts['M2']  # *(unsigned int*)&x |= 0x3f000000;

        # Normalize
        x1, x2, e1 = y, z, tmp

        # if (x < SQRTHF)
        cmp.v = spu.fcgt.ex(self.consts['SQRTHF'], x)

        # (True) { ... }
        e1.v = spu.ai.ex(e, -1)  #   e -= 1;
        x1.v = spu.fa.ex(x, x)  #   x = x + x - 1.0;
        x1.v = spu.fs.ex(x1, self.consts['ONE'])  #     ""  ""

        # (False) { ... }
        x2.v = spu.fs.ex(x, self.consts['ONE'])  #   x = x - 1.0;

        # Select the True/False values based on cmp
        e.v = spu.selb.ex(e, e1, cmp)
        x.v = spu.selb.ex(x2, x1, cmp)

        # Compute polynomial
        z.v = spu.fm.ex(x, x)  #  z = x * x;

        y.v = spu.fms.ex(
            self.consts['C1'],
            x,  #  y = (((((((( 7.0376836292E-2 * x  
            self.consts['C2'])  #	       - 1.1514610310E-1) * x
        y.v = spu.fma.ex(y, x,
                         self.consts['C3'])  #	     + 1.1676998740E-1) * x
        y.v = spu.fms.ex(y, x, self.consts['C4'])  #	    - 1.2420140846E-1) * x
        y.v = spu.fma.ex(y, x, self.consts['C5'])  #	   + 1.4249322787E-1) * x
        y.v = spu.fms.ex(y, x, self.consts['C6'])  #	  - 1.6668057665E-1) * x
        y.v = spu.fma.ex(y, x, self.consts['C7'])  #	 + 2.0000714765E-1) * x
        y.v = spu.fms.ex(y, x, self.consts['C8'])  #	- 2.4999993993E-1) * x
        y.v = spu.fma.ex(y, x, self.consts['C9'])  #       + 3.3333331174E-1)
        y.v = spu.fm.ex(y, x)  #   * x
        y.v = spu.fm.ex(y, z)  #   * z;

        y.v = spu.fma.ex(self.consts['C10'], z, y)  #  y += -0.5 * z;

        # Convert to log base 2
        z.v = spu.fm.ex(y, self.consts['LOG2EA'])  # z = y * LOG2EA;
        z.v = spu.fma.ex(x, self.consts['LOG2EA'], z)  # z += x * LOG2EA;
        z.v = spu.fa.ex(z, y)  # z += y;
        z.v = spu.fa.ex(z, x)  # z += x;
        e.v = spu.csflt.ex(e, 155)  # z += (float) e;
        z.v = spu.fa.ex(z, e)  #  ""  ""

        spu.ai(self.result, z, 0)  # return z

        spu.set_active_code(old_code)
        return

示例#29

0

显示文件

def TestSPUParallelIter(data, size, n_spus = 6, buffer_size = 16, run_code = True):
  import time
  # n_spus = 8
  # buffer_size = 16 # 16 ints/buffer
  # n_buffers   = 4  # 4 buffers/spu
  # n_buffers = size / buffer_size
  # size = buffer_size * n_buffers * n_spus
  # data = array.array('I', range(size + 2))

  #data = env.aligned_memory(n, typecode = 'I')
  #data.copy_to(data_array.buffer_info()[0], len(data_array))


  # print 'Data align: 0x%X, %d' % (data.buffer_info()[0], data.buffer_info()[0] % 16)

  code = env.ParallelInstructionStream()
  # code = env.InstructionStream()

  r_zero    = code.acquire_register()
  r_ea_data = code.acquire_register()
  r_ls_data = code.acquire_register()
  r_size    = code.acquire_register()
  r_tag     = code.acquire_register()  

  # Load zero
  util.load_word(code, r_zero, 0)

  # print 'array ea: 0x%X 0x%X' % (data.buffer_info()[0], long(data.buffer_info()[0]))
  # print 'r_zero = %d, ea_data = %d, ls_data = %d, r_size = %d, r_tag = %d' % (
  #   r_zero, r_ea_data, r_ls_data, r_size, r_tag)

  # Load the effective address
  if data.buffer_info()[0] % 16 == 0:
    util.load_word(code, r_ea_data, data.buffer_info()[0])
  else: 
    util.load_word(code, r_ea_data, data.buffer_info()[0] + 8)

  ea_start = data.buffer_info()[0]
  # Iterate over each buffer
  for ea in parallel(syn_range(code, ea_start, ea_start + size * 4 , buffer_size * 4)):
    # ea = var.SignedWord(code = code, reg = r_ea_data)
  
    # print 'n_iters:', size / buffer_size
    # for i in syn_range(code, size / buffer_size):

    # code.add(spu.stop(0xB))
  
    # Load the size
    util.load_word(code, r_size, buffer_size * 4)

    # Load the tag
    code.add(spu.ai(r_tag, r_zero, 12))

    # Load the lsa
    code.add(spu.ai(r_ls_data, r_zero, 0))

    # Load the data into address 0
    dma.mfc_get(code, r_ls_data, ea, r_size, r_tag)

    # Set the tag bit to 12
    dma.mfc_write_tag_mask(code, 1<<12);

    # Wait for the transfer to complete
    dma.mfc_read_tag_status_all(code);

    # Increment the data values by 1 using an unrolled loop (no branches)
    # r_current = code.acquire_register()
    current = var.SignedWord(0, code)

    count = var.SignedWord(0, code)
    # Use an SPU iter
    for lsa in syn_iter(code, buffer_size * 4, 16):
      code.add(spu.lqx(current, r_zero, lsa))
      # code.add(spu.ai(1, r_current, r_current))
      current.v = current + current
      code.add(spu.stqx(current, r_zero, lsa))    
      count.v = count + 1

    code.add(spu.stqx(count, r_zero, 0))
  
    # code.release_register(r_current)
    current.release_registers(code)

    # Store the values back to main memory

    # Load the tag
    code.add(spu.ai(r_tag, r_zero, 13))

    # Load the data into address 0
    dma.mfc_put(code, r_ls_data, ea.reg, r_size, r_tag)

    # Set the tag bit to 13
    dma.mfc_write_tag_mask(code, 1<<13);

    # Wait for the transfer to complete
    dma.mfc_read_tag_status_all(code);


    # code.add(spu.stop(0xB))

    # Update ea
    # ea.v = ea + (buffer_size * 4)
  # /for ea address 


  # Cleanup
  code.release_register(r_zero)
  code.release_register(r_ea_data)
  code.release_register(r_ls_data)  
  code.release_register(r_size)
  code.release_register(r_tag)  

  if not run_code:
    return code

  # Stop for debugging
  # code.add(spu.stop(0xA))

  # Execute the code
  proc = env.Processor()
  #data.copy_from(data_array.buffer_info()[0], len(data_array))  
  def print_blocks():
    for i in range(0, size, buffer_size):
      # print data[i:(i + buffer_size)]
      print data[i + buffer_size],
    print '' 
  
  # print_blocks()
  s = time.time()
  r = proc.execute(code, n_spus = n_spus)
  # r = proc.execute(code)
  t = time.time() - s
  # print_blocks()

  return t

示例#30

0

显示文件

文件： dma.py 项目： KapilRijhwani/corepy

def TestMFC():
  import corepy.lib.extarray as extarray
  import corepy.arch.spu.platform as synspu 

  size = 32
  #data_array = array.array('I', range(size))
  #data = synspu.aligned_memory(size, typecode = 'I')
  #data.copy_to(data_array.buffer_info()[0], len(data_array))
  data = extarray.extarray('I', range(size))
  code = synspu.InstructionStream()

  r_zero    = code.acquire_register()
  r_ea_data = code.acquire_register()
  r_ls_data = code.acquire_register()
  r_size    = code.acquire_register()
  r_tag     = code.acquire_register()  

  # Load zero
  util.load_word(code, r_zero, 0)

  print 'array ea: %X' % (data.buffer_info()[0])
  print 'r_zero = %s, ea_data = %s, ls_data = %s, r_size = %s, r_tag = %s' % (
    str(r_zero), str(r_ea_data), str(r_ls_data), str(r_size), str(r_tag))
  
  # Load the effective address
  print 'test ea: %X' % data.buffer_info()[0]
  util.load_word(code, r_ea_data, data.buffer_info()[0])

  # Load the size
  code.add(spu.ai(r_size, r_zero, size * 4))

  # Load the tag
  code.add(spu.ai(r_tag, r_zero, 2))

  # Load the lsa
  code.add(spu.ai(r_ls_data, r_zero, 0))

  # Load the data into address 0
  mfc_get(code, r_ls_data, r_ea_data, r_size, r_tag)

  # Set the tag bit to 2
  mfc_write_tag_mask(code, 1<<2);

  # Wait for the transfer to complete
  mfc_read_tag_status_all(code);

  # Increment the data values by 1 using an unrolled loop (no branches)
  r_current = code.acquire_register()

  for lsa in range(0, size * 4, 16):
    code.add(spu.lqa(r_current, (lsa >> 2)))
    code.add(spu.ai(r_current, r_current, 1))
    code.add(spu.stqa(r_current, (lsa >> 2)))

  code.release_register(r_current)
             
  # Store the values back to main memory

  # Load the data into address 0
  mfc_put(code, r_ls_data, r_ea_data, r_size, r_tag)

  # Set the tag bit to 2
  mfc_write_tag_mask(code, 1<<2);

  # Wait for the transfer to complete
  mfc_read_tag_status_all(code);

  # Cleanup
  code.release_register(r_zero)
  code.release_register(r_ea_data)
  code.release_register(r_ls_data)  
  code.release_register(r_size)
  code.release_register(r_tag)  

  # Stop for debugging
  # code.add(spu.stop(0xA))

  # Execute the code
  proc = synspu.Processor()
  # code.print_code()
  #print data_array
  proc.execute(code)

  #data.copy_from(data_array.buffer_info()[0], len(data_array))

  for i in range(size):
    assert(data[i] == i + 1)
  
  return

示例#31

0

显示文件

def TestDebug():
    prgm = Program()
    code = prgm.get_stream()
    proc = DebugProcessor()

    spu.set_active_code(code)

    ra = code.acquire_register()
    rb = code.acquire_register()
    rc = code.acquire_register()
    rd = code.acquire_register()
    re = code.acquire_register()
    rf = code.acquire_register()
    rg = code.acquire_register()
    rh = code.acquire_register()

    spu.ai(ra, 0, 14)
    spu.ai(rb, 0, 13)
    spu.ai(rc, 0, 14)
    spu.brnz(14, 3)
    spu.ai(rd, 0, 15)
    spu.ai(re, 0, 16)
    spu.ai(rf, 0, 17)
    spu.ai(rg, 0, 18)
    spu.ai(rh, 0, 19)
    spu.nop(0)

    spu.stop(0x200A)

    prgm += code
    r = proc.execute(prgm)  # , debug = True)

    r = proc.nexti()
    r = proc.nexti()
    r = proc.nexti()
    r = proc.nexti()

    while r != None:
        r = proc.nexti()
        if r is not None:
            regs = proc.dump_regs()
            print '******', regs[122:]

    assert (r == None)
    print 'int result:', r
    # while True:
    #   pass
    return

示例#32

0

显示文件

def TestInt():
    prgm = Program()
    code = prgm.get_stream()
    proc = Processor()

    spu.set_active_code(code)

    r13 = prgm.acquire_register(reg_name=13)
    r20 = prgm.acquire_register(reg_name=20)
    spu.ai(r20, r20, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)

    spu.stop(0x200D)

    prgm += code
    r = proc.execute(prgm, stop=True)  # , debug = True)

    #print 'int result:', r
    assert (r[0] == 0)
    assert (r[1] == 0x200D)
    return

示例#33

0

显示文件

文件： spu_psmap.py 项目： maxim-tyutyunnikov/corepy

    spu.set_active_code(code)
    psmap = extarray.extarray('I', 131072 / 4)
    data = extarray.extarray('I', range(0, 16))

    r_sum = prgm.gp_return
    r_cnt = prgm.acquire_register()

    spu.xor(r_sum, r_sum, r_sum)
    load_word(code, r_cnt, ITERS)

    lbl_loop = prgm.get_label("loop")
    code.add(lbl_loop)

    reg = dma.spu_read_in_mbox(code)

    spu.ai(r_sum, r_sum, 1)
    dma.spu_write_out_intr_mbox(code, r_sum)
    #dma.spu_write_out_mbox(code, reg)

    prgm.release_register(reg)

    spu.ai(r_cnt, r_cnt, -1)
    spu.brnz(r_cnt, lbl_loop)

    reg = dma.spu_read_signal1(code)
    spu.ori(code.gp_return, reg, 0)

    spu.il(r_cnt, 0)
    spu.il(r_sum, 16 * 4)

    r_data = prgm.acquire_register()

示例#34

0

显示文件

文件： isched.py 项目： forrestv/pyable

    import corepy.arch.spu.lib.util as util

    prgm = env.Program()
    code = prgm.get_stream()
    spu.set_active_code(code)

    r_cnt = prgm.acquire_register()
    r_cmp = prgm.acquire_register()
    r_sum = prgm.acquire_register()

    spu.il(r_cnt, 32)
    spu.il(r_sum, 0)
    lbl_loop = prgm.get_unique_label("LOOP")
    code.add(lbl_loop)

    spu.ai(r_sum, r_sum, 1)

    spu.ceqi(r_cmp, r_cnt, 2)
    spu.brz(r_cmp, lbl_loop)

    spu.ai(r_sum, r_sum, 10)

    #src = prgm.acquire_register()
    #tmp = prgm.acquire_registers(3)
    #dst = prgm.acquire_registers(2)

    #spu.il(tmp[0], 1)
    #spu.il(tmp[1], 2)
    #spu.il(tmp[2], 3)
    #spu.fma(src, tmp[0], tmp[1], tmp[2])
    #spu.fa(dst[0], src, src)

示例#35

0

显示文件

文件： spu_log.py 项目： KapilRijhwani/corepy

  def synthesize(self, code):
    old_code = spu.get_active_code()
    spu.set_active_code(code)
    
    if self.x is None: raise Exception("Please set x")
    if self.result is None: raise Exception("Please set result")

    # exponent
    e = var.Word()
    
    # Working values    
    x = var.Word()
    y = var.Word()
    z = var.Word()

    cmp = var.Bits()
    tmp = var.Word()

    spu.xor(cmp, cmp, cmp)
    spu.xor(tmp, tmp, tmp)    

    # Set the working x
    x.v = self.x

    # Extract the exponent
    # int e = (((*(unsigned int *) &x) >> 23) & 0xff) - 0x7e;
    e.v = x >> self.consts['_23']
    e.v = spu.andi.ex(e, 0xff)
    e.v = spu.ai.ex(e, 0x382) # 0x382 == (- 0x7E) using 10 bits
    # 0b 111 1110

    # Extract the mantissa
    x.v = x & self.consts['M1'] # *(unsigned int*)&x &= 0x807fffff;
    x.v = x | self.consts['M2'] # *(unsigned int*)&x |= 0x3f000000;

    # Normalize
    x1, x2, e1 = y, z, tmp
    
    # if (x < SQRTHF) 
    cmp.v = spu.fcgt.ex(self.consts['SQRTHF'], x)

    # (True) { ... }
    e1.v = spu.ai.ex(e, -1)                  #   e -= 1;
    x1.v = spu.fa.ex(x, x)                   #   x = x + x - 1.0;
    x1.v = spu.fs.ex(x1, self.consts['ONE']) #     ""  ""

    # (False) { ... }
    x2.v = spu.fs.ex(x, self.consts['ONE'])  #   x = x - 1.0;

    # Select the True/False values based on cmp
    e.v = spu.selb.ex(e,  e1, cmp)
    x.v = spu.selb.ex(x2, x1, cmp)

    # Compute polynomial
    z.v = spu.fm.ex(x, x)                      #  z = x * x;
    
    y.v = spu.fms.ex(self.consts['C1'], x,     #  y = (((((((( 7.0376836292E-2 * x  
                     self.consts['C2'])        #	       - 1.1514610310E-1) * x      
    y.v = spu.fma.ex(y, x, self.consts['C3'])  #	     + 1.1676998740E-1) * x        
    y.v = spu.fms.ex(y, x, self.consts['C4'])  #	    - 1.2420140846E-1) * x         
    y.v = spu.fma.ex(y, x, self.consts['C5'])  #	   + 1.4249322787E-1) * x          
    y.v = spu.fms.ex(y, x, self.consts['C6'])  #	  - 1.6668057665E-1) * x           
    y.v = spu.fma.ex(y, x, self.consts['C7'])  #	 + 2.0000714765E-1) * x            
    y.v = spu.fms.ex(y, x, self.consts['C8'])  #	- 2.4999993993E-1) * x             
    y.v = spu.fma.ex(y, x, self.consts['C9'])  #       + 3.3333331174E-1) 
    y.v = spu.fm.ex(y, x)                      #   * x 
    y.v = spu.fm.ex(y, z)                      #   * z;   
    
    y.v = spu.fma.ex(self.consts['C10'], z, y) #  y += -0.5 * z;

    # Convert to log base 2
    z.v = spu.fm.ex( y, self.consts['LOG2EA'])     # z = y * LOG2EA;
    z.v = spu.fma.ex(x, self.consts['LOG2EA'], z)  # z += x * LOG2EA;
    z.v = spu.fa.ex(z, y)                          # z += y;
    z.v = spu.fa.ex(z, x)                          # z += x;
    e.v = spu.csflt.ex(e, 155)                     # z += (float) e;
    z.v = spu.fa.ex(z, e)                          #  ""  ""
    
    spu.ai(self.result, z, 0)       # return z

    spu.set_active_code(old_code)
    return

示例#36

0

显示文件

文件： spu_looptest.py 项目： KapilRijhwani/corepy

import corepy.arch.spu.lib.dma as dma
from corepy.arch.spu.lib.util import load_word

import time

if __name__ == '__main__':
  prgm = env.Program()
  code = prgm.get_stream()
  proc = env.Processor()

  spu.set_active_code(code)

  r_cnt = prgm.acquire_register()
  load_word(code, r_cnt, 0x10000)

  br_loop = code.size()

  spu.ai(r_cnt, r_cnt, -1)
  spu.brnz(r_cnt, br_loop - code.size())

  prgm.add(code)
  prgm.print_code()

  for i in xrange(0, 10000):
    proc.execute(prgm)
    #if i % 25 == 0:
    #  print "sleep"
    #  time.sleep(1)

示例#37

0

显示文件

文件： spu_looptest.py 项目： maxim-tyutyunnikov/corepy

import corepy.arch.spu.isa as spu
import corepy.arch.spu.platform as env
import corepy.arch.spu.lib.dma as dma
from corepy.arch.spu.lib.util import load_word

import time

if __name__ == '__main__':
    prgm = env.Program()
    code = prgm.get_stream()
    proc = env.Processor()

    spu.set_active_code(code)

    r_cnt = prgm.acquire_register()
    load_word(code, r_cnt, 0x10000)

    br_loop = code.size()

    spu.ai(r_cnt, r_cnt, -1)
    spu.brnz(r_cnt, br_loop - code.size())

    prgm.add(code)
    prgm.print_code()

    for i in xrange(0, 10000):
        proc.execute(prgm)
        #if i % 25 == 0:
        #  print "sleep"
        #  time.sleep(1)

示例#38

0

显示文件

文件： div_test.py 项目： tmaone/efi

fb = 124

y0 = 120
y1 = 121
y2 = 122

t1 = 119

result = 118

ione = 110
fone = 111

insts = [
    # Create fone = 1.0, fa = 2.0 and fb = 4.0
    spu.ai(ione, 0, 1),
    spu.ai(ia, 0, 2),
    spu.ai(ib, 0, 4),
    spu.cuflt(fone, ione, 155),
    spu.cuflt(fa, ia, 155),
    spu.cuflt(fb, ib, 155),

    # Compute 1/fb
    spu.frest(y0, fb),
    spu.fi(y1, fb, y0),
    spu.fnms(t1, fb, y1, fone),
    spu.fma(y2, t1, y1, y1),
    spu.fm(result, fa, y2)
]

for inst in insts:

示例#39

0

显示文件

文件： spre_linux_spu.py 项目： microwave89-hv/efi

def TestInt():
    code = InstructionStream()
    proc = Processor()

    spu.set_active_code(code)

    r13 = code.acquire_register(reg=13)
    r20 = code.acquire_register(reg=20)
    spu.ai(r20, r20, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)
    spu.ai(r13, r13, 13)

    spu.stop(0x200D)

    r = proc.execute(code, stop=True)  # , debug = True)

    #print 'int result:', r
    assert (r[0] == 0)
    assert (r[1] == 0x200D)
    return

示例#40

0

显示文件

def TestSPUIter():
  size = 32
  data = extarray.extarray('I', range(size))
  prgm = env.Program()
  code = prgm.get_stream()

  r_ea_data = prgm.acquire_register()
  r_ls_data = prgm.acquire_register()
  r_size    = prgm.acquire_register()
  r_tag     = prgm.acquire_register()  

  #print 'array ea: %X' % (data.buffer_info()[0])
  #print 'r_zero = %s, ea_data = %s, ls_data = %s, r_size = %s, r_tag = %s' % (
  #  str(code.r_zero), str(r_ea_data), str(r_ls_data), str(r_size), str(r_tag))
  
  # Load the effective address
  util.load_word(code, r_ea_data, data.buffer_info()[0])

  # Load the size
  util.load_word(code, r_size, size * 4)

  # Load the tag
  code.add(spu.ai(r_tag, code.r_zero, 12))

  # Load the lsa
  code.add(spu.ai(r_ls_data, code.r_zero, 0))

  # Load the data into address 0
  dma.mfc_get(code, r_ls_data, r_ea_data, r_size, r_tag)

  # Set the tag bit to 12
  dma.mfc_write_tag_mask(code, 1<<12);

  # Wait for the transfer to complete
  dma.mfc_read_tag_status_all(code);

  # Increment the data values by 1 using an unrolled loop (no branches)
  # r_current = code.acquire_register()
  current = var.SignedWord(0, code)
  
  # Use an SPU iter
  for lsa in syn_iter(code, size * 4, 16):
    code.add(spu.lqx(current, code.r_zero, lsa))
    # code.add(spu.ai(1, r_current, r_current))
    current.v = current + current
    code.add(spu.stqx(current, code.r_zero, lsa))    

  # code.prgm.release_register(r_current)
  #current.release_register(code)
  
  # Store the values back to main memory

  # Load the tag
  code.add(spu.ai(r_tag, code.r_zero, 13))

  # Load the data into address 0
  dma.mfc_put(code, r_ls_data, r_ea_data, r_size, r_tag)

  # Set the tag bit to 12
  dma.mfc_write_tag_mask(code, 1<<13);

  # Wait for the transfer to complete
  dma.mfc_read_tag_status_all(code);

  # Cleanup
  prgm.release_register(r_ea_data)
  prgm.release_register(r_ls_data)  
  prgm.release_register(r_size)
  prgm.release_register(r_tag)  

  # Stop for debugging
  # code.add(spu.stop(0xA))

  # Execute the code
  prgm.add(code)
  proc = env.Processor()
  r = proc.execute(prgm)

  for i in range(0, size):
    assert(data[i] == i + i)

  return

示例#41

0

显示文件

文件： isched.py 项目： KapilRijhwani/corepy

  import corepy.arch.spu.lib.util as util

  prgm = env.Program()
  code = prgm.get_stream()
  spu.set_active_code(code)

  r_cnt = prgm.acquire_register()
  r_cmp = prgm.acquire_register()
  r_sum = prgm.acquire_register()

  spu.il(r_cnt, 32)
  spu.il(r_sum, 0)
  lbl_loop = prgm.get_unique_label("LOOP")
  code.add(lbl_loop)

  spu.ai(r_sum, r_sum, 1)

  spu.ceqi(r_cmp, r_cnt, 2)
  spu.brz(r_cmp, lbl_loop)

  spu.ai(r_sum, r_sum, 10)

  #src = prgm.acquire_register()
  #tmp = prgm.acquire_registers(3)
  #dst = prgm.acquire_registers(2)

  #spu.il(tmp[0], 1)
  #spu.il(tmp[1], 2)
  #spu.il(tmp[2], 3)
  #spu.fma(src, tmp[0], tmp[1], tmp[2])
  #spu.fa(dst[0], src, src)

示例#42

0

显示文件

文件： spu_types.py 项目： forrestv/pyable

 def copy_register(self, other):
     return self.code.add(spu.ai(self, other, 0))

示例#43

0

显示文件

文件： spre_linux_spu.py 项目： KapilRijhwani/corepy

def TestDebug():
  prgm = Program()
  code = prgm.get_stream()
  proc = DebugProcessor()

  spu.set_active_code(code)

  ra = code.acquire_register()
  rb = code.acquire_register()
  rc = code.acquire_register()
  rd = code.acquire_register()
  re = code.acquire_register()
  rf = code.acquire_register()
  rg = code.acquire_register()
  rh = code.acquire_register()  

  spu.ai(ra, 0, 14)
  spu.ai(rb, 0, 13)
  spu.ai(rc, 0, 14)
  spu.brnz(14, 3)
  spu.ai(rd, 0, 15)
  spu.ai(re, 0, 16)
  spu.ai(rf, 0, 17)
  spu.ai(rg, 0, 18)
  spu.ai(rh, 0, 19)    
  spu.nop(0)

  spu.stop(0x200A)

  prgm += code
  r = proc.execute(prgm) # , debug = True)

  r = proc.nexti()
  r = proc.nexti()
  r = proc.nexti()
  r = proc.nexti()
    
  while r != None:
    r = proc.nexti()
    if r is not None:
      regs = proc.dump_regs()
      print '******', regs[122:]
    
  assert(r == None)
  print 'int result:', r
  # while True:
  #   pass
  return

示例#44

0

显示文件

文件： div_test.py 项目： KapilRijhwani/corepy

fb = 124

y0 = 120
y1 = 121
y2 = 122

t1 = 119

result  = 118

ione = 110
fone = 111

insts = [
  # Create fone = 1.0, fa = 2.0 and fb = 4.0
  spu.ai(ione, 0, 1),  
  spu.ai(ia, 0, 2),
  spu.ai(ib, 0, 4),
  spu.cuflt(fone, ione, 155),  
  spu.cuflt(fa, ia, 155),
  spu.cuflt(fb, ib, 155),

  # Compute 1/fb
  spu.frest(y0, fb),
  spu.fi(y1, fb, y0),
  spu.fnms(t1, fb, y1, fone),
  spu.fma(y2, t1, y1, y1),

  spu.fm(result, fa, y2)
  ]

示例#45

0

显示文件

文件： spre_linux_spu.py 项目： KapilRijhwani/corepy

def TestInt():
  prgm = Program()
  code = prgm.get_stream()
  proc = Processor()

  spu.set_active_code(code)
  
  r13 = prgm.acquire_register(reg_name = 13)
  r20 = prgm.acquire_register(reg_name = 20)
  spu.ai(r20, r20, 13)
  spu.ai(r13, r13, 13)
  spu.ai(r13, r13, 13)
  spu.ai(r13, r13, 13)
  spu.ai(r13, r13, 13)
  spu.ai(r13, r13, 13)
  
  spu.stop(0x200D)

  prgm += code
  r = proc.execute(prgm, stop = True) # , debug = True)

  #print 'int result:', r
  assert(r[0] == 0)
  assert(r[1] == 0x200D)
  return

示例#46

0

显示文件

文件： ispugui.py 项目： maxim-tyutyunnikov/corepy

    prgm = env.Program()
    code = prgm.get_stream()
    reg = prgm.acquire_register()
    foo = prgm.acquire_register(reg_name=5)

    code.add(prgm.get_label("FOO"))
    code.add(spu.il(foo, 0xCAFE))
    code.add(spu.ilhu(reg, 0xDEAD))
    code.add(spu.iohl(reg, 0xBEEF))
    code.add(spu.stqd(reg, code.r_zero, 4))

    lbl_loop = prgm.get_label("LOOP")
    lbl_break = prgm.get_label("BREAK")

    r_cnt = code.gp_return
    r_stop = prgm.acquire_register(reg_name=9)
    r_cmp = prgm.acquire_register()

    code.add(spu.ori(r_cnt, code.r_zero, 0))
    code.add(spu.il(r_stop, 5))

    code.add(lbl_loop)
    code.add(spu.ceq(r_cmp, r_cnt, r_stop))
    code.add(spu.brnz(r_cmp, prgm.get_label("BREAK")))
    code.add(spu.ai(r_cnt, r_cnt, 1))
    code.add(spu.br(prgm.get_label("LOOP")))
    code.add(lbl_break)

    app = SPUApp(code)
    app.MainLoop()

示例#47

0

显示文件

文件： spu_comp.py 项目： KapilRijhwani/corepy

import corepy.lib.extarray as extarray
import corepy.arch.spu.isa as spu
import corepy.arch.spu.lib.util as util
import corepy.arch.spu.platform as env

prgm = env.Program()
code = prgm.get_stream()
proc = env.Processor()

# Generate substream
# Multiply gp_return by 2, add 1
subcode = prgm.get_stream()
subcode.add(spu.shli(subcode.gp_return, subcode.gp_return, 1))
subcode.add(spu.ai(subcode.gp_return, subcode.gp_return, 1))

# Initialize gp_return, insert code
code.add(spu.il(code.gp_return, 5))
code.add(subcode)

# Add 3, insert again
code.add(spu.ai(code.gp_return, code.gp_return, 3))
code.add(subcode)

#code.print_code()

prgm.add(code)
prgm.print_code() # TODO  - support print prgm instead?

ret = proc.execute(prgm, mode = 'int')
print "ret", ret

示例#48

0

显示文件

文件： spu_labels.py 项目： KapilRijhwani/corepy

def SimpleSPU():
  """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
  prgm = env.Program()
  code = prgm.get_stream()
  proc = env.Processor()

  spu.set_active_code(code)
  

  # Acquire two registers
  #x    = code.acquire_register()
  x = prgm.gp_return
  test = prgm.acquire_register()

  lbl_brz = prgm.get_label("BRZ")
  lbl_skip = prgm.get_label("SKIP")

  spu.hbrr(lbl_brz, lbl_skip)
  spu.xor(x, x, x) # zero x
  spu.ai(x, x, 11) # x = x + 11
  spu.ai(x, x, 31) # x = x + 31

  spu.ceqi(test, x, 42) # test = (x == 42)

  # If test is false (all 0s), skip the stop(0x100A) instruction
  code.add(lbl_brz)
  spu.brz(test, lbl_skip)
  spu.stop(0x100A)
  code.add(lbl_skip)
  spu.stop(0x100B)

  prgm.add(code) 
  prgm.print_code() 
  r = proc.execute(prgm, mode = 'int', stop = True) 
  print "ret", r
  assert(r[0] == 42)
  assert(r[1] == 0x100A)


  prgm = env.Program()
  code = prgm.get_stream()
  spu.set_active_code(code)

  lbl_loop = prgm.get_label("LOOP")
  lbl_break = prgm.get_label("BREAK")

  r_cnt = prgm.acquire_register()
  r_stop = prgm.acquire_register()
  r_cmp = prgm.acquire_register()
  r_foo = prgm.gp_return

  spu.ori(r_foo, prgm.r_zero, 0)
  spu.ori(r_cnt, prgm.r_zero, 0)
  util.load_word(code, r_stop, 10)

  code.add(lbl_loop)

  spu.ceq(r_cmp, r_cnt, r_stop)
  spu.brnz(r_cmp, lbl_break)
  spu.ai(r_cnt, r_cnt, 1)

  spu.a(r_foo, r_foo, r_cnt)

  spu.br(lbl_loop)
  code.add(lbl_break)

  prgm.add(code)
  prgm.print_code()
  r = proc.execute(prgm, mode = 'int', stop = True)
  print "ret", r
  assert(r[0] == 55)

  return

示例#49

0

显示文件

文件： spu_psmap.py 项目： KapilRijhwani/corepy

  spu.set_active_code(code)
  psmap = extarray.extarray('I', 131072 / 4)
  data = extarray.extarray('I', range(0, 16))

  r_sum = prgm.gp_return
  r_cnt = prgm.acquire_register()

  spu.xor(r_sum, r_sum, r_sum)
  load_word(code, r_cnt, ITERS)

  lbl_loop = prgm.get_label("loop")
  code.add(lbl_loop)

  reg = dma.spu_read_in_mbox(code)

  spu.ai(r_sum, r_sum, 1)
  dma.spu_write_out_intr_mbox(code, r_sum)
  #dma.spu_write_out_mbox(code, reg)

  prgm.release_register(reg)

  spu.ai(r_cnt, r_cnt, -1)
  spu.brnz(r_cnt, lbl_loop)
 
  reg = dma.spu_read_signal1(code)
  spu.ori(code.gp_return, reg, 0)


  spu.il(r_cnt, 0)
  spu.il(r_sum, 16 * 4)

示例#50

0

显示文件

文件： spre_dummy_spu.py 项目： KapilRijhwani/corepy

def TestInt():
  code = InstructionStream()
  proc = Processor()

  spu.set_active_code(code)
  
  r13 = code.acquire_register(reg = 13)
  r20 = code.acquire_register(reg = 20)
  spu.ai(r20, r20, 13)
  spu.ai(r13, r13, 13)
  spu.ai(r13, r13, 13)
  spu.ai(r13, r13, 13)
  spu.ai(r13, r13, 13)
  spu.ai(r13, r13, 13)
  
  spu.stop(0x200D)

  code.print_code()
  r = proc.execute(code) # , debug = True)
  print 'int result:', r
  # while True:
  #   pass
  return