Пример #1
0
    def _align_stream(self, length, align):
        # Return nop's such that length % align = 0
        if align % 4 != 0:
            raise Exception("SPU alignment must be a multiple of 4 bytes")
        length /= 4
        align /= 4

        mod = align - (length % align)
        # need mod instructions to achieve alignment

        ret = []
        if mod % 2 == 0:
            nop_pair = (spu.nop(self.r_zero, ignore_active=True),
                        spu.lnop(ignore_active=True))
            # issue mod / 2 nop/lnop pairs
            for i in xrange(0, mod / 2):
                ret.extend(nop_pair)
        else:
            # issue an lnop, then (mod - 1) / 2 nop/lnop pairs
            nop_pair = (spu.lnop(ignore_active=True),
                        spu.nop(self.r_zero, ignore_active=True))
            for i in xrange(0, mod / 2):
                ret.extend(nop_pair)
            ret.append(spu.lnop(ignore_active=True))

        return ret
Пример #2
0
  def _align_stream(self, length, align):
    # Return nop's such that length % align = 0
    if align % 4 != 0:
      raise Exception("SPU alignment must be a multiple of 4 bytes")
    length /= 4
    align /= 4

    mod = align - (length % align)
    # need mod instructions to achieve alignment

    ret = []
    if mod % 2 == 0:
      nop_pair = (spu.nop(self.r_zero, ignore_active = True),
                  spu.lnop(ignore_active = True))
      # issue mod / 2 nop/lnop pairs
      for i in xrange(0, mod / 2):
        ret.extend(nop_pair)
    else:
      # issue an lnop, then (mod - 1) / 2 nop/lnop pairs
      nop_pair = (spu.lnop(ignore_active = True),
                  spu.nop(self.r_zero, ignore_active = True))
      for i in xrange(0, mod / 2):
        ret.extend(nop_pair)
      ret.append(spu.lnop(ignore_active = True))

    return ret
Пример #3
0
  def save_register(self, reg): # , branch_to_save = False):
    code = spu.get_active_code()

    offset = code.acquire_register()
    size = code.acquire_register()
    test = code.acquire_register()
    regs = [offset, size, test]
    
    spu.rotqbyi(offset, self.ls_buffer, 4)
    spu.rotqbyi(size,   self.ls_buffer, 8)

    spu.stqx(reg, self.ls_buffer, offset)
    
    spu.ai(offset, offset, 16)
    spu.ceq(test,  offset, size)

    spu.wrch(size, dma.SPU_WrOutMbox)
    spu.wrch(offset, dma.SPU_WrOutMbox)
    spu.wrch(test, dma.SPU_WrOutMbox)
    # !!! STOPPED HERE !!! THESE VALUES ARE WRONG !!!
    lbl_ls_full = code.size()
    spu.stop(0xB)
    self.save_ls_buffer(ls_size = size)

    spu.nop(0)
    code[lbl_ls_full] = spu.brz(test, (code.size() - lbl_ls_full), ignore_active = True)

    code.release_registers(regs)
    return
Пример #4
0
    def save_register(self, reg):  # , branch_to_save = False):
        code = spu.get_active_code()

        offset = code.acquire_register()
        size = code.acquire_register()
        test = code.acquire_register()
        regs = [offset, size, test]

        spu.rotqbyi(offset, self.ls_buffer, 4)
        spu.rotqbyi(size, self.ls_buffer, 8)

        spu.stqx(reg, self.ls_buffer, offset)

        spu.ai(offset, offset, 16)
        spu.ceq(test, offset, size)

        spu.wrch(size, dma.SPU_WrOutMbox)
        spu.wrch(offset, dma.SPU_WrOutMbox)
        spu.wrch(test, dma.SPU_WrOutMbox)
        # !!! STOPPED HERE !!! THESE VALUES ARE WRONG !!!
        lbl_ls_full = code.size()
        spu.stop(0xB)
        self.save_ls_buffer(ls_size=size)

        spu.nop(0)
        code[lbl_ls_full] = spu.brz(test, (code.size() - lbl_ls_full),
                                    ignore_active=True)

        code.release_registers(regs)
        return
Пример #5
0
def TestDebug():
  prgm = Program()
  code = prgm.get_stream()
  proc = DebugProcessor()

  spu.set_active_code(code)

  ra = code.acquire_register()
  rb = code.acquire_register()
  rc = code.acquire_register()
  rd = code.acquire_register()
  re = code.acquire_register()
  rf = code.acquire_register()
  rg = code.acquire_register()
  rh = code.acquire_register()  

  spu.ai(ra, 0, 14)
  spu.ai(rb, 0, 13)
  spu.ai(rc, 0, 14)
  spu.brnz(14, 3)
  spu.ai(rd, 0, 15)
  spu.ai(re, 0, 16)
  spu.ai(rf, 0, 17)
  spu.ai(rg, 0, 18)
  spu.ai(rh, 0, 19)    
  spu.nop(0)

  spu.stop(0x200A)

  prgm += code
  r = proc.execute(prgm) # , debug = True)

  r = proc.nexti()
  r = proc.nexti()
  r = proc.nexti()
  r = proc.nexti()
    
  while r != None:
    r = proc.nexti()
    if r is not None:
      regs = proc.dump_regs()
      print '******', regs[122:]
    
  assert(r == None)
  print 'int result:', r
  # while True:
  #   pass
  return
Пример #6
0
def TestDebug():
    prgm = Program()
    code = prgm.get_stream()
    proc = DebugProcessor()

    spu.set_active_code(code)

    ra = code.acquire_register()
    rb = code.acquire_register()
    rc = code.acquire_register()
    rd = code.acquire_register()
    re = code.acquire_register()
    rf = code.acquire_register()
    rg = code.acquire_register()
    rh = code.acquire_register()

    spu.ai(ra, 0, 14)
    spu.ai(rb, 0, 13)
    spu.ai(rc, 0, 14)
    spu.brnz(14, 3)
    spu.ai(rd, 0, 15)
    spu.ai(re, 0, 16)
    spu.ai(rf, 0, 17)
    spu.ai(rg, 0, 18)
    spu.ai(rh, 0, 19)
    spu.nop(0)

    spu.stop(0x200A)

    prgm += code
    r = proc.execute(prgm)  # , debug = True)

    r = proc.nexti()
    r = proc.nexti()
    r = proc.nexti()
    r = proc.nexti()

    while r != None:
        r = proc.nexti()
        if r is not None:
            regs = proc.dump_regs()
            print '******', regs[122:]

    assert (r == None)
    print 'int result:', r
    # while True:
    #   pass
    return
Пример #7
0
Файл: ispu.py Проект: tmaone/efi
    def synthesize(self):
        # Okay.  This code is not going to exceed 256 instructions (1kb).  Knowing that,
        # the register contents can be safely placed at 0x3F400 in localstore, 3kb from
        # the top.  The SPRE will place the instruction stream as close to the top as
        # possible.  But since it is not going to be more than 1kb worth of instructions,
        # it will not overlap with the register contents.

        code = self.code
        spu.set_active_code(code)

        # Reload the instructions
        spu.sync(1)

        # Next instruction to execute
        lbl_op = code.size()
        spu.nop(0)

        # Placeholders for register store instructions
        for i in range(128):
            spu.stqa(i, 0xFD00 + (i * 4))
        #  spu.stqa(i, 0xFE00 + (i * 4))

        # Stop for next command
        spu.stop(0x0FFF)

        lbl_regs = code.size()

        # Create space for the saved registers
        #for i in range(128):
        #  # 16 bytes/register
        #  spu.nop(0)
        #  spu.lnop()
        #  spu.nop(0)
        #  spu.lnop()

        # Clearing active code here is important!
        spu.set_active_code(None)
        code.cache_code()

        code_size = len(code._prologue._code) * 4
        self.xfer_size = code_size + (16 - (code_size) % 16)
        print 'xfer_size:', self.xfer_size

        self.code_lsa = (0x3FFFF - code_size) & 0xFFF80
        self.lbl_op = lbl_op
        return
Пример #8
0
  def synthesize(self):
    # Okay.  This code is not going to exceed 256 instructions (1kb).  Knowing that,
    # the register contents can be safely placed at 0x3F400 in localstore, 3kb from
    # the top.  The SPRE will place the instruction stream as close to the top as
    # possible.  But since it is not going to be more than 1kb worth of instructions,
    # it will not overlap with the register contents.

    code = self.code
    spu.set_active_code(code)
    
    # Reload the instructions
    spu.sync(1)

    # Next instruction to execute
    lbl_op = code.size()
    spu.nop(0)    

    # Placeholders for register store instructions
    for i in range(128):
       spu.stqa(i, 0xFD00 + (i * 4))
    #  spu.stqa(i, 0xFE00 + (i * 4))

    # Stop for next command
    spu.stop(0x0FFF) 

    lbl_regs = code.size()
    
    # Create space for the saved registers
    #for i in range(128):
    #  # 16 bytes/register
    #  spu.nop(0)
    #  spu.lnop()
    #  spu.nop(0)
    #  spu.lnop()

    # Clearing active code here is important!
    spu.set_active_code(None)
    code.cache_code()

    code_size = len(code._prologue._code) * 4
    self.xfer_size = code_size  + (16 - (code_size) % 16);
    print 'xfer_size:', self.xfer_size

    self.code_lsa = (0x3FFFF - code_size) & 0xFFF80;
    self.lbl_op = lbl_op
    return
Пример #9
0
  def align_code(self, boundary):
    """
    Insert the appropraite nop/lnops to align the next instruction
    on the byte boudary.  boundary must be a multiple of four.
    """
    word_align = boundary / 4

    while len(self._code) % word_align:
      if len(self._code) % 2 == 0:
        self.add(spu.nop(0), True)
      else:
        self.add(spu.lnop(0), True)

    return
Пример #10
0
    def align_code(self, boundary):
        """
    Insert the appropraite nop/lnops to align the next instruction
    on the byte boudary.  boundary must be a multiple of four.
    """
        word_align = boundary / 4

        while len(self._code) % word_align:
            if len(self._code) % 2 == 0:
                self.add(spu.nop(0), True)
            else:
                self.add(spu.lnop(0), True)

        return
Пример #11
0
    def block(self):
        code = spu.get_active_code()
        self._block_idx = len(code)

        # --> add the branch instruction (use brz (?) to always branch, nop to never branch)
        code[self._branch_idx] = spu.nop(0, ignore_active=True)
        # code[self._branch_idx] = spu.brnz(self._cmp, self._block_idx - self._branch_idx, ignore_active = True)
        # code[self._branch_idx] = spu.brz(self._cmp, self._block_idx - self._branch_idx, ignore_active = True)

        # Pack result into vector
        #   [x][y][score][--]

        # Zero the save value
        spu.xor(self._save_value, self._save_value, self._save_value)

        # Copy the score
        spu.selb(self._save_value, self._save_value, self._score,
                 self._word_mask)
        spu.rotqbyi(self._save_value, self._save_value, 12)

        # Copy the y value
        spu.selb(self._save_value, self._save_value, self._y_off,
                 self._word_mask)
        spu.rotqbyi(self._save_value, self._save_value, 12)

        # Copy the x value
        spu.selb(self._save_value, self._save_value, self._x_off,
                 self._word_mask)

        # Save value to local store
        spu.stqx(self._save_value, self._count, self._md_results.r_addr)

        self._count.v = self._count.v + 16

        # --> MemorySave test
        cmp = self._save_value  # reuse the save register
        spu.ceq.ex(cmp, self._count, self._md_results.r_size)

        if self._save_op is not None:
            self._save_op.test(cmp, self._count)

        # Just reset for now
        spu.selb(self._count, self._count, 0, cmp)

        # Return to the loop
        idx = len(code)
        spu.br(-(idx - self._branch_idx - 1))

        return
Пример #12
0
  def block(self):
    code = spu.get_active_code()
    self._block_idx = len(code)

    # --> add the branch instruction
    code[self._branch_idx] = spu.nop(0, ignore_active = True)
    code[self._branch_idx] = spu.brnz(self._cmp, self._block_idx - self._branch_idx,
                                      ignore_active = True)
    
    # FILL IN HERE
    
    # Return to the loop
    idx = len(code)
    spu.br(- (idx - self._branch_idx - 1))
    
    return
Пример #13
0
  def block(self):
    code = spu.get_active_code()
    self._block_idx = len(code)

    # --> add the branch instruction (use brz (?) to always branch, nop to never branch)
    code[self._branch_idx] = spu.nop(0, ignore_active = True)
    # code[self._branch_idx] = spu.brnz(self._cmp, self._block_idx - self._branch_idx, ignore_active = True)
    # code[self._branch_idx] = spu.brz(self._cmp, self._block_idx - self._branch_idx, ignore_active = True)

    # Pack result into vector
    #   [x][y][score][--]

    # Zero the save value
    spu.xor(self._save_value, self._save_value, self._save_value)

    # Copy the score
    spu.selb(self._save_value, self._save_value, self._score, self._word_mask)    
    spu.rotqbyi(self._save_value, self._save_value, 12)

    # Copy the y value
    spu.selb(self._save_value, self._save_value, self._y_off, self._word_mask)
    spu.rotqbyi(self._save_value, self._save_value, 12)        

    # Copy the x value
    spu.selb(self._save_value, self._save_value, self._x_off, self._word_mask)
    
    # Save value to local store
    spu.stqx(self._save_value, self._count, self._md_results.r_addr)
    
    self._count.v = self._count.v + 16

    # --> MemorySave test
    cmp = self._save_value # reuse the save register
    spu.ceq.ex(cmp, self._count, self._md_results.r_size)

    if self._save_op is not None:
      self._save_op.test(cmp, self._count)
      
    # Just reset for now
    spu.selb(self._count, self._count, 0, cmp)

    # Return to the loop
    idx = len(code)
    spu.br(- (idx - self._branch_idx - 1))
    
    return
Пример #14
0
    def _startSPU(self):
        self.ctx = ctx = env.spu_exec.alloc_context()

        # Execute a no-op instruction stream so the prolog is executed
        code = env.InstructionStream()
        code.add(spu.nop(code.r_zero))

        code.cache_code()
        itemsize = code.render_code.itemsize
        code_len = len(code.render_code) * itemsize
        if code_len % 16 != 0:
            code_len += 16 - (code_len % 16)
        code_lsa = 0x40000 - code_len

        env.spu_exec.run_stream(ctx, code.inst_addr(), code_len, code_lsa,
                                code_lsa)

        self.localstore = extarray.extarray('I', 262144 / 4)
        self.localstore.set_memory(ctx.spuls)
        return
Пример #15
0
  def _startSPU(self):
    self.ctx = ctx = env.spu_exec.alloc_context()

    # Execute a no-op instruction stream so the prolog is executed
    prgm = env.Program()
    code = prgm.get_stream()
    code.add(spu.nop(code.r_zero))

    prgm.cache_code()
    itemsize = prgm.render_code.itemsize 
    code_len = len(prgm.render_code) * itemsize
    if code_len % 16 != 0:
      code_len += 16 - (code_len % 16)
    code_lsa = 0x40000 - code_len

    env.spu_exec.run_stream(ctx, prgm.inst_addr(), code_len, code_lsa, code_lsa)

    self.localstore = extarray.extarray('I', 262144 / 4)
    print "spuls %x" % (ctx.spuls), ctx.spuls, type(ctx.spuls)
    self.localstore.set_memory(ctx.spuls, 262144)
    return
Пример #16
0
    def add(self, inst, optimize_override=False):

        if not optimize_override and self._optimize:
            # binary_string_inst = spu.DecToBin(inst)
            op = 'nop'
            # if binary_string_inst[0:3] in spu.inst_opcodes:
            #   op = spu.inst_opcodes[binary_string_inst[0:3]]
            # elif binary_string_inst[0:6] in spu.inst_opcodes:
            #   op = spu.inst_opcodes[binary_string_inst[0:6]]
            # elif binary_string_inst[0:7] in spu.inst_opcodes:
            #   op = spu.inst_opcodes[binary_string_inst[0:7]]
            # elif binary_string_inst[0:8] in spu.inst_opcodes:
            #   op = spu.inst_opcodes[binary_string_inst[0:8]]
            # elif binary_string_inst[0:9] in spu.inst_opcodes:
            #   op = spu.inst_opcodes[binary_string_inst[0:9]]
            # elif binary_string_inst[0:10] in spu.inst_opcodes:
            #   op = spu.inst_opcodes[binary_string_inst[0:10]]

            pipeline = inst.cycles[0]

            if (len(self._code) % 2 == 0) and pipeline == 0:
                InstructionStream.add(self, inst)

            elif (len(self._code) % 2 == 1) and pipeline == 1:
                InstructionStream.add(self, inst)
            elif (len(self._code) % 2 == 0) and pipeline == 1:
                InstructionStream.add(self, spu.nop(0))
                InstructionStream.add(self, inst)
            elif (len(self._code) % 2 == 1) and pipeline == 0:
                InstructionStream.add(self, spu.lnop(0))
                InstructionStream.add(self, inst)

        else:
            spe.InstructionStream.add(self, inst)

        # Invalidate the cache
        self._cached = False
        return len(self._code)
Пример #17
0
  def add(self, inst, optimize_override = False):

    if not optimize_override and self._optimize:
      # binary_string_inst = spu.DecToBin(inst)
      op = 'nop'
      # if binary_string_inst[0:3] in spu.inst_opcodes:
      #   op = spu.inst_opcodes[binary_string_inst[0:3]]
      # elif binary_string_inst[0:6] in spu.inst_opcodes:
      #   op = spu.inst_opcodes[binary_string_inst[0:6]]
      # elif binary_string_inst[0:7] in spu.inst_opcodes:
      #   op = spu.inst_opcodes[binary_string_inst[0:7]]
      # elif binary_string_inst[0:8] in spu.inst_opcodes:
      #   op = spu.inst_opcodes[binary_string_inst[0:8]]
      # elif binary_string_inst[0:9] in spu.inst_opcodes:
      #   op = spu.inst_opcodes[binary_string_inst[0:9]]
      # elif binary_string_inst[0:10] in spu.inst_opcodes:
      #   op = spu.inst_opcodes[binary_string_inst[0:10]]
        
      pipeline = inst.cycles[0]
        
      if (len(self._code) % 2 == 0) and pipeline == 0:   
        InstructionStream.add(self, inst)

      elif (len(self._code) % 2 == 1) and pipeline == 1:
        InstructionStream.add(self, inst)
      elif (len(self._code) % 2 == 0) and pipeline == 1:
        InstructionStream.add(self, spu.nop(0))
        InstructionStream.add(self, inst)
      elif (len(self._code) % 2 == 1) and pipeline == 0:
        InstructionStream.add(self, spu.lnop(0))
        InstructionStream.add(self, inst)

    else:
      spe.InstructionStream.add(self, inst)

    # Invalidate the cache
    self._cached = False
    return len(self._code)