Пример #1
0
    def popc(self, count, x):
        """
    Add the number of 1 bits in each word in X to the value in count.
    """
        temp = spu.get_active_code().acquire_register()

        spu.cntb(temp, x)
        spu.sumb(temp, temp, 0)
        spu.a(count, count, temp)

        spu.get_active_code().release_register(temp)
        return
Пример #2
0
  def popc(self, count, x):
    """
    Add the number of 1 bits in each word in X to the value in count.
    """
    temp = spu.get_active_code().acquire_register()
    
    spu.cntb(temp, x)
    spu.sumb(temp, temp, 0)
    spu.a(count, count, temp)

    spu.get_active_code().release_register(temp)
    return
Пример #3
0
  def synthesize(self, code):
    old_code = spu.get_active_code()
    spu.set_active_code(code)

    # Reserve two variable registers
    count  = code.acquire_register()
    result = code.acquire_register()
    
    # 'Load' the input vector x from register 5
    x = code.acquire_register() 
    spu.ai(x, 5, 0)

    # Zero count and result
    spu.xor(count, count, count)
    spu.xor(result, result, result)
    
    # Inline the popc and reduce operations
    self.popc(count, x)
    self.reduce_word(result, count)

    # Send the result to the caller
    spu.wrch(result, dma.SPU_WrOutMbox)    

    code.release_register(x)
    spu.set_active_code(old_code)
    return
Пример #4
0
    def init_mm_buffer(self, addr, size, offset=0):
        code = spu.get_active_code()

        util.set_slot_value(code, self.mm_buffer, 0, addr)
        util.set_slot_value(code, self.mm_buffer, 1, size)
        util.set_slot_value(code, self.mm_buffer, 2, offset)
        return
Пример #5
0
  def save_register(self, reg): # , branch_to_save = False):
    code = spu.get_active_code()

    offset = code.acquire_register()
    size = code.acquire_register()
    test = code.acquire_register()
    regs = [offset, size, test]
    
    spu.rotqbyi(offset, self.ls_buffer, 4)
    spu.rotqbyi(size,   self.ls_buffer, 8)

    spu.stqx(reg, self.ls_buffer, offset)
    
    spu.ai(offset, offset, 16)
    spu.ceq(test,  offset, size)

    spu.wrch(size, dma.SPU_WrOutMbox)
    spu.wrch(offset, dma.SPU_WrOutMbox)
    spu.wrch(test, dma.SPU_WrOutMbox)
    # !!! STOPPED HERE !!! THESE VALUES ARE WRONG !!!
    lbl_ls_full = code.size()
    spu.stop(0xB)
    self.save_ls_buffer(ls_size = size)

    spu.nop(0)
    code[lbl_ls_full] = spu.brz(test, (code.size() - lbl_ls_full), ignore_active = True)

    code.release_registers(regs)
    return
Пример #6
0
    def synthesize(self, code):
        """
    Render a vector with 4 pixels.
    """
        old_code = spu.get_active_code()
        spu.set_active_code(code)

        if self.x_offset is None: raise Exception('Please call setup')
        if self.result is None: raise Exception('Please set result')
        if self.one is None: raise Exception('Please set one')

        # Make the part of the result positive and subtract 1
        # to transform (-1,-oo) into (0,oo)
        self.result.v = spu.fs.ex(0, self.result)
        self.result.v = spu.fs.ex(self.result, self.one)

        # Convert the result to an unsigned int, scaling by 2^4 to put
        # values between 0 and 16 in the gradient.  Values outside [0,16]
        # are 0 or FF
        self.result.v = spu.cfltu.ex(self.result, 169)  # 173 - 169 == 4
        # self.result.v = spu.sfi.ex(self.result, 255) # 173 - 169 == 4

        # Extract the first two bytes from the result into the RGB positions
        # and set alpha to 0xFF
        self.result.v = spu.shufb.ex(self.result, self.ff, self.uint2rgba)

        # Save the result and increment the offset
        spu.stqd(self.result, self.x_offset, self.lsa >> 4)
        spu.ai(self.x_offset, self.x_offset, 16)

        spu.set_active_code(old_code)
        return
Пример #7
0
    def synthesize(self, code):
        old_code = spu.get_active_code()
        spu.set_active_code(code)

        # Reserve two variable registers
        count = code.acquire_register()
        result = code.acquire_register()

        # 'Load' the input vector x from register 5
        x = code.acquire_register()
        spu.ai(x, 5, 0)

        # Zero count and result
        spu.xor(count, count, count)
        spu.xor(result, result, result)

        # Inline the popc and reduce operations
        self.popc(count, x)
        self.reduce_word(result, count)

        # Send the result to the caller
        spu.wrch(result, dma.SPU_WrOutMbox)

        code.release_register(x)
        spu.set_active_code(old_code)
        return
Пример #8
0
  def synthesize(self, code):
    old_code = spu.get_active_code()
    spu.set_active_code(code)

    if self.buffers is None: raise Exception('Please set buffers')
    if self.stride is None: raise Exception('Please set stride')
    
    # Draw a square
    color  = var.SignedWord(0x0F0F0FFF)
    fb0    = var.Word(self.buffers[0])
    fb1    = var.Word(self.buffers[1])
    stride = var.Word(self.stride)
    addr   = var.Word(0)
    
    # Draw one line
    line_pixels = 256
    for i in spuiter.syn_iter(code, line_pixels*4, step = 16):
      spu.stqx(color, addr, i)

    # Transfer the line to the frame buffer
    md_fb = spuiter.memory_desc('I', size = line_pixels)
    md_fb.set_addr_reg(addr.reg)
    
    addr.v = fb0

    for i in spuiter.syn_iter(code, 128):
      md_fb.put(code, 0)
      addr.v = addr + stride
    
    spu.set_active_code(old_code)
    return
Пример #9
0
    def save_register(self, reg):  # , branch_to_save = False):
        code = spu.get_active_code()

        offset = code.acquire_register()
        size = code.acquire_register()
        test = code.acquire_register()
        regs = [offset, size, test]

        spu.rotqbyi(offset, self.ls_buffer, 4)
        spu.rotqbyi(size, self.ls_buffer, 8)

        spu.stqx(reg, self.ls_buffer, offset)

        spu.ai(offset, offset, 16)
        spu.ceq(test, offset, size)

        spu.wrch(size, dma.SPU_WrOutMbox)
        spu.wrch(offset, dma.SPU_WrOutMbox)
        spu.wrch(test, dma.SPU_WrOutMbox)
        # !!! STOPPED HERE !!! THESE VALUES ARE WRONG !!!
        lbl_ls_full = code.size()
        spu.stop(0xB)
        self.save_ls_buffer(ls_size=size)

        spu.nop(0)
        code[lbl_ls_full] = spu.brz(test, (code.size() - lbl_ls_full),
                                    ignore_active=True)

        code.release_registers(regs)
        return
Пример #10
0
  def synthesize(self, code):
    """
    Render a vector with 4 pixels.
    """
    old_code = spu.get_active_code()
    spu.set_active_code(code)

    if self.x_offset is None: raise Exception('Please call setup')
    if self.result is None: raise Exception('Please set result')
    if self.one is None: raise Exception('Please set one')

    # Make the part of the result positive and subtract 1
    # to transform (-1,-oo) into (0,oo)
    self.result.v = spu.fs.ex(0, self.result)
    self.result.v = spu.fs.ex(self.result, self.one)

    # Convert the result to an unsigned int, scaling by 2^4 to put 
    # values between 0 and 16 in the gradient.  Values outside [0,16] 
    # are 0 or FF
    self.result.v = spu.cfltu.ex(self.result, 169) # 173 - 169 == 4
    # self.result.v = spu.sfi.ex(self.result, 255) # 173 - 169 == 4

    # Extract the first two bytes from the result into the RGB positions
    # and set alpha to 0xFF
    self.result.v = spu.shufb.ex(self.result, self.ff, self.uint2rgba)

    # Save the result and increment the offset
    spu.stqd(self.result, self.x_offset, self.lsa >> 4)
    spu.ai(self.x_offset, self.x_offset, 16)

    spu.set_active_code(old_code)
    return
Пример #11
0
  def init_mm_buffer(self, addr, size, offset = 0):
    code = spu.get_active_code()

    util.set_slot_value(code, self.mm_buffer, 0, addr)
    util.set_slot_value(code, self.mm_buffer, 1, size)
    util.set_slot_value(code, self.mm_buffer, 2, offset)
    return
Пример #12
0
  def synthesize(self, code):
    old_code = spu.get_active_code()
    spu.set_active_code(code)


    stream = spuiter.stream_buffer(code, self.stream_addr, self.stream_size * 4,
                                   self.buffer_size, self.lsa)
    ls_data = spuiter.memory_desc('I', self.lsa, self.buffer_size / 4)
    popc = syn_popc_var()

    x = var.Word(0)
    count = var.Word(0)
    total = var.Word(0)

    for buffer in stream:
      for x in spuiter.spu_vec_iter(code, ls_data, addr_reg = buffer):
        popc.popc(count, x)

    popc.reduce_word(total, count)

    # Send the result to the caller
    spu.wrch(total, dma.SPU_WrOutMbox)    

    spu.set_active_code(old_code)
    return
Пример #13
0
    def synthesize(self, code):
        old_code = spu.get_active_code()
        spu.set_active_code(code)

        stream = spuiter.stream_buffer(code, self.stream_addr,
                                       self.stream_size * 4, self.buffer_size,
                                       self.lsa)
        ls_data = spuiter.memory_desc('I', self.lsa, self.buffer_size / 4)
        popc = syn_popc_var()

        x = var.Word(0)
        count = var.Word(0)
        total = var.Word(0)

        for buffer in stream:
            for x in spuiter.spu_vec_iter(code, ls_data, addr_reg=buffer):
                popc.popc(count, x)

        popc.reduce_word(total, count)

        # Send the result to the caller
        spu.wrch(total, dma.SPU_WrOutMbox)

        spu.set_active_code(old_code)
        return
Пример #14
0
 def test(self, cmp, count_var):
   code = spu.get_active_code()
   self._branch_idx = len(code)
   spu.stop(0xB)
   # spu.nop(0)
   self._cmp = cmp
   self._count = count_var
   return
Пример #15
0
    def setup(self, code):
        old_code = spu.get_active_code()
        spu.set_active_code(code)

        self.consts = {}
        for const in constants.keys():
            self.consts[const] = var.Word(constants[const])

        spu.set_active_code(old_code)
        return
Пример #16
0
  def setup(self, code):
    old_code = spu.get_active_code()
    spu.set_active_code(code)
    
    self.consts = {}
    for const in constants.keys():
      self.consts[const] = var.Word(constants[const])

    spu.set_active_code(old_code)
    return
Пример #17
0
 def test(self, cmp, score, x_off, y_off):
   code = spu.get_active_code()
   self._branch_idx = len(code)
   spu.stop(0xB)
   # spu.nop(0)
   self._cmp = cmp
   self._score = score
   self._x_off = x_off
   self._y_off = y_off
   return
Пример #18
0
    def synthesize(self, code):
        old_code = spu.get_active_code()
        spu.set_active_code(code)

        if self.result is None: raise Exception('Please set result')

        spu.wrch(self.result, dma.SPU_WrOutMbox)

        spu.set_active_code(old_code)
        return
Пример #19
0
  def synthesize(self, code):
    old_code = spu.get_active_code()
    spu.set_active_code(code)

    if self.result is None: raise Exception('Please set result')

    spu.wrch(self.result, dma.SPU_WrOutMbox)

    spu.set_active_code(old_code)
    return
Пример #20
0
    def synthesize(self, code):
        old_code = spu.get_active_code()
        spu.set_active_code(code)

        self._load_parameters(code)

        log = spu_log.SPULog()
        log.setup(code)

        if self.renderer is not None:
            self.renderer.setup(code)
            self.renderer.set_one(log.consts['ONE'])

        r1_inc = var.SingleFloat()
        r2_inc = var.SingleFloat()
        r1 = var.SingleFloat()
        r2 = var.SingleFloat()
        result = var.SingleFloat()
        pattern = var.Word(0)

        self.ly_point.set_pattern_reg(pattern)
        self.ly_point.set_result_reg(result)
        self.ly_point.set_r_regs(r1, r2)
        self.ly_point.set_log(log)
        self.ly_point.setup(code)

        spu.lqa(r1, 0)
        spu.lqa(r2, 4)
        spu.lqa(r1_inc, 8)
        spu.lqa(r2_inc, 12)
        spu.lqa(pattern, 16)

        for y in spuiter.syn_iter(code, self.h):
            spu.lqa(r1, 0)

            for x in spuiter.syn_iter(code, self.w / 4):
                self.ly_point.synthesize(code)
                r1.v = spu.fa.ex(r1, r1_inc)

                if self.renderer is not None:
                    # result.v = spu.fm.ex(r1, r2)
                    self.renderer.set_result_reg(result)
                    self.renderer.synthesize(code)

            if self.renderer is not None:
                self.renderer.row_complete(code)
            r2.v = spu.fa.ex(r2, r2_inc)

        # return Numeric.where(Numeric.less(results, 0), results, 0)

        spu.set_active_code(old_code)
        return
Пример #21
0
  def synthesize(self, code):
    old_code = spu.get_active_code()
    spu.set_active_code(code)

    self._load_parameters(code)

    log = spu_log.SPULog()
    log.setup(code)

    if self.renderer is not None:
      self.renderer.setup(code)
      self.renderer.set_one(log.consts['ONE'])

    r1_inc = var.SingleFloat()
    r2_inc = var.SingleFloat()
    r1 = var.SingleFloat()
    r2 = var.SingleFloat()
    result = var.SingleFloat()
    pattern = var.Word(0)

    self.ly_point.set_pattern_reg(pattern)
    self.ly_point.set_result_reg(result)
    self.ly_point.set_r_regs(r1, r2)
    self.ly_point.set_log(log)
    self.ly_point.setup(code)

    spu.lqa(r1, 0)
    spu.lqa(r2, 4)    
    spu.lqa(r1_inc, 8)
    spu.lqa(r2_inc, 12)
    spu.lqa(pattern, 16)

    for y in spuiter.syn_iter(code, self.h):
      spu.lqa(r1, 0)

      for x in spuiter.syn_iter(code, self.w / 4):
        self.ly_point.synthesize(code)
        r1.v = spu.fa.ex(r1, r1_inc)

        if self.renderer is not None:
          # result.v = spu.fm.ex(r1, r2)
          self.renderer.set_result_reg(result)
          self.renderer.synthesize(code)
          
      if self.renderer is not None:
        self.renderer.row_complete(code)
      r2.v = spu.fa.ex(r2, r2_inc)
      
    # return Numeric.where(Numeric.less(results, 0), results, 0)
    
    spu.set_active_code(old_code)
    return 
Пример #22
0
  def synthesize_constants(self, code):
    old_code = spu.get_active_code()
    spu.set_active_code(code)
    
    self._one = code.acquire_register()
    spu.xor(self._one, self._one, self._one)
    spu.ai(self._one, self._one, 1)
    spu.cuflt(self._one, self._one, 155)
    
    if old_code is not None:
      spu.set_active_code(old_code)

    return
Пример #23
0
  def _compute_ratio(self, ab, c, result):

    # Convert ab and c to float
    spu.cuflt(ab, ab, 155)
    spu.cuflt(c,   c, 155)

    # Compute ab = ab + c
    spu.fa(ab, ab, c)

    # Compute c / (ab + c)

    fdiv(spu.get_active_code(), result, c, ab, self._one)
    
    return
Пример #24
0
    def block(self):
        code = spu.get_active_code()
        self._block_idx = len(code)

        # --> add the branch instruction (use brz (?) to always branch, nop to never branch)
        code[self._branch_idx] = spu.nop(0, ignore_active=True)
        # code[self._branch_idx] = spu.brnz(self._cmp, self._block_idx - self._branch_idx, ignore_active = True)
        # code[self._branch_idx] = spu.brz(self._cmp, self._block_idx - self._branch_idx, ignore_active = True)

        # Pack result into vector
        #   [x][y][score][--]

        # Zero the save value
        spu.xor(self._save_value, self._save_value, self._save_value)

        # Copy the score
        spu.selb(self._save_value, self._save_value, self._score,
                 self._word_mask)
        spu.rotqbyi(self._save_value, self._save_value, 12)

        # Copy the y value
        spu.selb(self._save_value, self._save_value, self._y_off,
                 self._word_mask)
        spu.rotqbyi(self._save_value, self._save_value, 12)

        # Copy the x value
        spu.selb(self._save_value, self._save_value, self._x_off,
                 self._word_mask)

        # Save value to local store
        spu.stqx(self._save_value, self._count, self._md_results.r_addr)

        self._count.v = self._count.v + 16

        # --> MemorySave test
        cmp = self._save_value  # reuse the save register
        spu.ceq.ex(cmp, self._count, self._md_results.r_size)

        if self._save_op is not None:
            self._save_op.test(cmp, self._count)

        # Just reset for now
        spu.selb(self._count, self._count, 0, cmp)

        # Return to the loop
        idx = len(code)
        spu.br(-(idx - self._branch_idx - 1))

        return
Пример #25
0
  def block(self):
    code = spu.get_active_code()
    self._block_idx = len(code)

    # --> add the branch instruction
    code[self._branch_idx] = spu.nop(0, ignore_active = True)
    code[self._branch_idx] = spu.brnz(self._cmp, self._block_idx - self._branch_idx,
                                      ignore_active = True)
    
    # FILL IN HERE
    
    # Return to the loop
    idx = len(code)
    spu.br(- (idx - self._branch_idx - 1))
    
    return
Пример #26
0
  def block(self):
    code = spu.get_active_code()
    self._block_idx = len(code)

    # --> add the branch instruction (use brz (?) to always branch, nop to never branch)
    code[self._branch_idx] = spu.nop(0, ignore_active = True)
    # code[self._branch_idx] = spu.brnz(self._cmp, self._block_idx - self._branch_idx, ignore_active = True)
    # code[self._branch_idx] = spu.brz(self._cmp, self._block_idx - self._branch_idx, ignore_active = True)

    # Pack result into vector
    #   [x][y][score][--]

    # Zero the save value
    spu.xor(self._save_value, self._save_value, self._save_value)

    # Copy the score
    spu.selb(self._save_value, self._save_value, self._score, self._word_mask)    
    spu.rotqbyi(self._save_value, self._save_value, 12)

    # Copy the y value
    spu.selb(self._save_value, self._save_value, self._y_off, self._word_mask)
    spu.rotqbyi(self._save_value, self._save_value, 12)        

    # Copy the x value
    spu.selb(self._save_value, self._save_value, self._x_off, self._word_mask)
    
    # Save value to local store
    spu.stqx(self._save_value, self._count, self._md_results.r_addr)
    
    self._count.v = self._count.v + 16

    # --> MemorySave test
    cmp = self._save_value # reuse the save register
    spu.ceq.ex(cmp, self._count, self._md_results.r_size)

    if self._save_op is not None:
      self._save_op.test(cmp, self._count)
      
    # Just reset for now
    spu.selb(self._count, self._count, 0, cmp)

    # Return to the loop
    idx = len(code)
    spu.br(- (idx - self._branch_idx - 1))
    
    return
Пример #27
0
    def synthesize(self, code):
        if self._x_regs is None: raise Exception("Please set x_regs")
        if self._y_regs is None: raise Exception("Please set y_regs")
        if self._result is None: raise Exception("Please set result register")

        old_code = spu.get_active_code()
        spu.set_active_code(code)

        regs = []

        if self._one is None:
            self.synthesize_constants(code)
            regs.append(self._one)

        ab = code.acquire_register()
        c = code.acquire_register()
        ab_temp = code.acquire_register()
        c_temp = code.acquire_register()
        result = code.acquire_register()
        regs = regs + [ab, c, ab_temp, c_temp]

        nregs = self._n_bits / 128

        for i in range(nregs):
            # self._ab(self._x_regs[i], self._y_regs[i], ab, ab_temp)
            # self._c( self._x_regs[i], self._y_regs[i],  c,  c_temp)
            self._ab_c(self._x_regs[i], self._y_regs[i], ab, c, ab_temp,
                       c_temp)

        self._reduce_word(ab, ab_temp)
        self._reduce_word(c, c_temp)

        self._compute_ratio(ab_temp, c_temp, result)

        print '%d registers,' % (len(regs) + len(self._x_regs) +
                                 len(self._y_regs)),
        code.release_registers(regs)
        if old_code is not None:
            spu.set_active_code(old_code)

        return
Пример #28
0
    def synthesize(self, code):
        old_code = spu.get_active_code()
        spu.set_active_code(code)

        # Create and initialize the variables
        count = var.Word(0)
        result = var.Word(0)
        x = var.Word(0)

        # 'Load' the input vector x from register 5
        x.v = spu.ai.ex(5, 0)

        # Inline the popc and reduce operations
        self.popc(count, x)
        self.reduce_word(result, count)

        # Send the result to the caller
        spu.wrch(result, dma.SPU_WrOutMbox)

        spu.set_active_code(old_code)
        return
Пример #29
0
  def synthesize(self, code):
    old_code = spu.get_active_code()
    spu.set_active_code(code)

    # Create and initialize the variables
    count  = var.Word(0)
    result = var.Word(0)
    x = var.Word(0)    

    # 'Load' the input vector x from register 5
    x.v = spu.ai.ex(5, 0)
    
    # Inline the popc and reduce operations
    self.popc(count, x)
    self.reduce_word(result, count)

    # Send the result to the caller
    spu.wrch(result, dma.SPU_WrOutMbox)    

    spu.set_active_code(old_code)
    return
Пример #30
0
  def synthesize(self, code):
    if self._x_regs is None:  raise Exception("Please set x_regs")        
    if self._y_regs is None:  raise Exception("Please set y_regs")
    if self._result is None:  raise Exception("Please set result register")    

    old_code = spu.get_active_code()
    spu.set_active_code(code)    

    regs = []

    if self._one is None:
      self.synthesize_constants(code)
      regs.append(self._one)      


    ab = code.acquire_register()
    c  = code.acquire_register()
    ab_temp = code.acquire_register()
    c_temp  = code.acquire_register()
    result  = code.acquire_register()
    regs = regs + [ab, c, ab_temp, c_temp]

    nregs = self._n_bits / 128

    for i in range(nregs):
      # self._ab(self._x_regs[i], self._y_regs[i], ab, ab_temp)
      # self._c( self._x_regs[i], self._y_regs[i],  c,  c_temp)
      self._ab_c(self._x_regs[i], self._y_regs[i], ab, c, ab_temp, c_temp)
      
    self._reduce_word(ab, ab_temp)
    self._reduce_word( c,  c_temp)

    self._compute_ratio(ab_temp, c_temp, result)

    print '%d registers,' % (len(regs) + len(self._x_regs) + len(self._y_regs)),
    code.release_registers(regs)
    if old_code is not None:
      spu.set_active_code(old_code)
      
    return
Пример #31
0
    def save_ls_buffer(self, ls_size=None, branch=False):
        code = spu.get_active_code()

        regs = []
        if ls_size is None:
            ls_size = code.acquire_register()
            regs.append(ls_size)

        # Set the main memory address
        mm_offset = code.acquire_register()
        regs.append(mm_offset)

        spu.rotqbyi(mm_offset, self.mm_buffer, 4)
        spu.a(mm_offset, mm_offset, self.mm_buffer)

        # Tranfer the buffer
        md = spuiter.memory_desc('b')
        md.set_size_reg(ls_size)
        md.set_addr_reg(mm_offset)

        md.put(code, self.ls_buffer)

        # Increment the main memory offset
        mm_size = code.acquire_register()
        regs.append(mm_size)

        spu.rotqbyi(mm_size, self.mm_buffer, 8)
        spu.rotqbyi(mm_offset, self.mm_buffer, 4)
        spu.a(mm_offset, mm_offset, mm_size)

        util.set_slot_value(code, self.mm_buffer, 2, mm_offset)

        # Reset the ls offset
        util.set_slot_value(code, self.ls_buffer, 2, 0)

        code.release_registers(regs)

        return
Пример #32
0
  def save_ls_buffer(self, ls_size = None, branch = False):
    code = spu.get_active_code()
    
    regs = []
    if ls_size is None:
      ls_size = code.acquire_register()
      regs.append(ls_size)

    # Set the main memory address
    mm_offset = code.acquire_register()
    regs.append(mm_offset)

    spu.rotqbyi(mm_offset, self.mm_buffer, 4)
    spu.a(mm_offset, mm_offset, self.mm_buffer)

    # Tranfer the buffer
    md = spuiter.memory_desc('b')
    md.set_size_reg(ls_size)
    md.set_addr_reg(mm_offset)

    md.put(code, self.ls_buffer)

    # Increment the main memory offset
    mm_size = code.acquire_register()
    regs.append(mm_size)

    spu.rotqbyi(mm_size, self.mm_buffer, 8)        
    spu.rotqbyi(mm_offset,  self.mm_buffer, 4)
    spu.a(mm_offset, mm_offset, mm_size)

    util.set_slot_value(code, self.mm_buffer, 2, mm_offset)
    
    # Reset the ls offset
    util.set_slot_value(code, self.ls_buffer, 2, 0)
    
    code.release_registers(regs)
    
    return
Пример #33
0
  def _transfer_data(self, code, kernel, lsa, tag):
    """
    Load the data into the SPU memory
    """

    # Check the types
    if not isinstance(code, spe.InstructionStream):
      raise Exception('Code must be an InstructionStream')
    if not (isinstance(lsa, int) or issubclass(type(lsa), (spe.Register, spe.Variable))):
      raise Exception('lsa must be an integer, Register, or Variable')
    
    old_code = spu.get_active_code()
    spu.set_active_code(code)

    # Acquire registers for address and size, if they were not supplied by the user
    if self.r_addr is None: r_ea_data = code.prgm.acquire_register()
    else:                   r_ea_data = self.r_addr
      
    if self.r_size is None: r_size = code.prgm.acquire_register()
    else:                   r_size = self.r_size

    # Create variables 
    ea_addr      = var.SignedWord(reg = r_ea_data)
    aligned_size = var.SignedWord(0)
    mod_16       = var.SignedWord(0xF)

    # Initialize the lsa_addr variable. 
    if isinstance(lsa, int):
      # From a constant
      ls_addr   = var.SignedWord(lsa)
    elif issubclass(type(lsa), (spe.Register, spe.Variable)):
      # From a variable
      ls_addr   = var.SignedWord()      
      ls_addr.v = lsa
      
      
    tag_var = var.SignedWord(tag)
    cmp = var.SignedWord(0)

    # Load the effective address
    if self.r_addr is None:
      if self.addr % 16 != 0:
        print '[get_memory] Misaligned data'

      util.load_word(code, ea_addr, self.addr)

    # Load the size, rounding up as required to be 16-byte aligned
    if self.r_size is None:
      rnd_size = self.size * var.INT_SIZES[self.typecode]
      if rnd_size < 16:
        rnd_size = 16
      elif (rnd_size % 16) != 0:
        rnd_size += (16 - (rnd_size % 16))
      util.load_word(code, aligned_size, rnd_size)
    else:
      # TODO: !!! UNIT TEST THIS !!!
      # Same as above, but using SPU arithemtic to round
      size  = var.SignedWord(reg = r_size)
      sixteen  = var.SignedWord(16)
      cmp.v = ((size & mod_16) == size)
      aligned_size.v = size + (sixteen - (size & mod_16))
      spu.selb(aligned_size.reg, size.reg, aligned_size.reg, cmp.reg, order = _mi(spu.selb))
      code.release_register(sixteen.reg)

    # Use an auxillary register for the moving ea value if the
    # caller supplied the address register
    if self.r_addr is not None:
      ea_load   = var.SignedWord(0)
      ea_load.v = ea_addr
    else:
      ea_load = ea_addr # note that this is reference, not .v assignment

    # Transfer parameters
    buffer_size   = var.SignedWord(16384)
    remaining     = var.SignedWord(0)
    transfer_size = var.SignedWord(0)
    remaining.v   = aligned_size

    # Set up the iterators to transfer at most 16k at a time
    xfer_iter = syn_iter(code, 0, 16384)
    xfer_iter.set_stop_reg(aligned_size.reg)

    for offset in xfer_iter:
      cmp.v = buffer_size > remaining
      spu.selb(transfer_size, buffer_size, remaining, cmp)

      # Transfer the data
      kernel(code, ls_addr, ea_load, transfer_size, tag_var)
      ls_addr.v = ls_addr + buffer_size
      ea_load.v = ea_load + buffer_size

      remaining.v = remaining - buffer_size

    # Set the tag bit to tag
    dma.mfc_write_tag_mask(code, 1<<tag);

    # Wait for the transfer to complete
    dma.mfc_read_tag_status_all(code);

    # Release the registers
    code.release_register(buffer_size.reg)
    code.release_register(remaining.reg)
    code.release_register(aligned_size.reg)    
    code.release_register(transfer_size.reg)
    code.release_register(cmp.reg)
    code.release_register(ls_addr.reg)
    code.release_register(tag_var.reg)
    code.release_register(ea_load.reg)

    if old_code is not None:
      spu.set_active_code(old_code)
    return 
Пример #34
0
    def synthesize(self, code):
        old_code = spu.get_active_code()
        spu.set_active_code(code)

        if self.x is None: raise Exception("Please set x")
        if self.result is None: raise Exception("Please set result")

        # exponent
        e = var.Word()

        # Working values
        x = var.Word()
        y = var.Word()
        z = var.Word()

        cmp = var.Bits()
        tmp = var.Word()

        spu.xor(cmp, cmp, cmp)
        spu.xor(tmp, tmp, tmp)

        # Set the working x
        x.v = self.x

        # Extract the exponent
        # int e = (((*(unsigned int *) &x) >> 23) & 0xff) - 0x7e;
        e.v = x >> self.consts['_23']
        e.v = spu.andi.ex(e, 0xff)
        e.v = spu.ai.ex(e, 0x382)  # 0x382 == (- 0x7E) using 10 bits
        # 0b 111 1110

        # Extract the mantissa
        x.v = x & self.consts['M1']  # *(unsigned int*)&x &= 0x807fffff;
        x.v = x | self.consts['M2']  # *(unsigned int*)&x |= 0x3f000000;

        # Normalize
        x1, x2, e1 = y, z, tmp

        # if (x < SQRTHF)
        cmp.v = spu.fcgt.ex(self.consts['SQRTHF'], x)

        # (True) { ... }
        e1.v = spu.ai.ex(e, -1)  #   e -= 1;
        x1.v = spu.fa.ex(x, x)  #   x = x + x - 1.0;
        x1.v = spu.fs.ex(x1, self.consts['ONE'])  #     ""  ""

        # (False) { ... }
        x2.v = spu.fs.ex(x, self.consts['ONE'])  #   x = x - 1.0;

        # Select the True/False values based on cmp
        e.v = spu.selb.ex(e, e1, cmp)
        x.v = spu.selb.ex(x2, x1, cmp)

        # Compute polynomial
        z.v = spu.fm.ex(x, x)  #  z = x * x;

        y.v = spu.fms.ex(
            self.consts['C1'],
            x,  #  y = (((((((( 7.0376836292E-2 * x  
            self.consts['C2'])  #	       - 1.1514610310E-1) * x
        y.v = spu.fma.ex(y, x,
                         self.consts['C3'])  #	     + 1.1676998740E-1) * x
        y.v = spu.fms.ex(y, x, self.consts['C4'])  #	    - 1.2420140846E-1) * x
        y.v = spu.fma.ex(y, x, self.consts['C5'])  #	   + 1.4249322787E-1) * x
        y.v = spu.fms.ex(y, x, self.consts['C6'])  #	  - 1.6668057665E-1) * x
        y.v = spu.fma.ex(y, x, self.consts['C7'])  #	 + 2.0000714765E-1) * x
        y.v = spu.fms.ex(y, x, self.consts['C8'])  #	- 2.4999993993E-1) * x
        y.v = spu.fma.ex(y, x, self.consts['C9'])  #       + 3.3333331174E-1)
        y.v = spu.fm.ex(y, x)  #   * x
        y.v = spu.fm.ex(y, z)  #   * z;

        y.v = spu.fma.ex(self.consts['C10'], z, y)  #  y += -0.5 * z;

        # Convert to log base 2
        z.v = spu.fm.ex(y, self.consts['LOG2EA'])  # z = y * LOG2EA;
        z.v = spu.fma.ex(x, self.consts['LOG2EA'], z)  # z += x * LOG2EA;
        z.v = spu.fa.ex(z, y)  # z += y;
        z.v = spu.fa.ex(z, x)  # z += x;
        e.v = spu.csflt.ex(e, 155)  # z += (float) e;
        z.v = spu.fa.ex(z, e)  #  ""  ""

        spu.ai(self.result, z, 0)  # return z

        spu.set_active_code(old_code)
        return
Пример #35
0
def isched(scode):
  old_active_code = spu.get_active_code()
  spu.set_active_code(None)

  # Generate the instruction dependence DAG(s)
  blocks = isched_gen_blocks(scode)

  # For each instruction, compute the max cycles to the end of the code
  g_critpath = critpath_block(blocks)

  # Apply heuristics to build an optimized InstructionStream
  fcode = scode.prgm.get_stream()

  inst_cycle = {} # For each inst, the cycle number it has in the code

  lastpos = -1    # Index of last instruction in the stream (excludes labels!)
  pipe = 0        # Current pipeline (0 = even, 1 = odd)
  cycle = 0       # Current cycle number

  for (ind, block) in enumerate(blocks):
    if block.label is not None:
      fcode.add(block.label)

    start = block.start
    g_in = block.g_in
    g_incnt = block.g_incnt
    g_out = block.g_out

    while len(start) > 0:
      # Apply heuristics to find the best instruction in the queue.
    
      # For each inst in start, compute the minimum stall time
      # TODO - cache this instead of computing each time?
      #  Do this by computing the stall time when an inst is added to start.
      #  Each time the cycle number is moved forward, reduce the stall time
      #  by that number of cycles for each inst in start.
      # TODO - idea from I think Muchnick -- keep a start Q of no-stall nodes,
      #  and a Q of nodes that would stall.  Then just pull from no-stall nodes
      #  unless empty, in which case fall back to the stall Q
      #   would make it easy(er) to do cached stall counts

      best = (None, 999)
      for s in start:
        # Find the stall time of s, or maximum delay for all its deps
        maxstall = 0
        for d in g_in[s]:
          if d[0] == None:
            continue

          # Compute stall time for this dep
          stall = d[1] - (cycle - inst_cycle[d[0]])
          if stall > maxstall:
            maxstall = stall

        best = heurcompare_block(best, (s, maxstall), pipe, g_critpath, blocks, ind)

      inst = best[0]

      start.remove(inst)
      cycle += best[1] + 1

      block.inst_cnt -= 1
      fcode.add(inst)

      # Dual issue? if so, adjust the cycle back one.
      # Careful, lastpos starts out as -1.  However the pipe also starts out
      # as 0, so the first part of the conditional will fail before lastpos
      # is used.

      # Ah - if a label occurs first in the stream, followed by say an ai,
      # this will fail
      previnst = fcode[lastpos]
      if (pipe == inst.cycles[0] == 1 and
          previnst.cycles[0] == 0 and 
          inst_cycle[previnst] == cycle - 1):
        cycle -= 1

      inst_cycle[inst] = cycle

      lastpos = len(fcode) - 1
      pipe = (pipe + 1) & 1

      # Evaluate all the instructions that depend on this inst.
      # Can any be added to start?
      for d in g_out[inst]:
        # Skip this d if it's not in the current block
        if d not in g_incnt:
          continue

        g_incnt[d] -= 1
        if g_incnt[d] == 0:
          start.append(d)

        # Does d have depend on any insts in start?
        # If so, move those to the front of start
        # Why does this still matter?  It affects ties in the heuristic..
        else:
          # d depends on inst, and at least 1 other inst not in start.
          # look at the insts d depends on.  if any are in start, move
          # them to the front of the start set.
          # how does this help?  helps insts closer to getting into start,
          #  get in sooner.  get a larger start set for choosing best inst
          for e in g_in[d]:
            if e[0] in start:
              start.remove(e[0])
              start.insert(0, e[0])

    # end while len(start) > 0
    if block.branch is not None:
      fcode.add(block.branch)
  # end for block in blocks

  spu.set_active_code(old_active_code)
  return fcode
Пример #36
0
  def synthesize(self, code):
    self._check_inputs()
    
    old_code = spu.get_active_code()
    spu.set_active_code(code)

    zero = var.Word(reg = code.r_zero)
    one = self.log.consts['ONE']
    two = self.consts['TWO']
    
    x   = var.Word(self.x0)
    r   = var.Word(0)
    cmp = var.Word(0)
    x_neg = var.Word(0)
    fmax  = var.Word(self.max_init)
    temp = var.SingleFloat()

    fmax.v = spu.cuflt.ex(fmax, 155)

    # Init
    for i in spuiter.syn_iter(code, self.max_init):
      # x = r[i % r_max] * x * (1.0 - x)      
      self._next_r(r)
      temp.v = spu.fs.ex(one, x)
      x.v = spu.fm.ex(x, temp)
      x.v = spu.fm.ex(r, x)

    #  if x == float('-infinity'):
    #    return -10.0
    
    # Derive Exponent
    total = var.Word(0)
    logx  = var.SingleFloat()

    for i in spuiter.syn_iter(code, self.max_n):    
      # x = ri * x * (1.0 - x)
      self._next_r(r)
      temp.v = spu.fs.ex(one, x)
      x.v = spu.fm.ex(x, temp)
      x.v = spu.fm.ex(r, x)
      
      # logx = ri - 2.0 * ri * x
      logx.v = spu.fm.ex(two, x)
      logx.v = spu.fm.ex(r, logx)
      logx.v = spu.fs.ex(r, logx)

      # abs(logx)
      x_neg.v = spu.fs.ex(zero, logx)
      cmp.v = spu.fcgt.ex(logx, zero)
      logx.v = spu.selb.ex(x_neg, logx, cmp)
      # logx.v = spu.selb.ex(logx, x_neg, cmp)
      

      # log(logx)
      self.log.set_result(logx)
      self.log.set_x(logx)
      self.log.synthesize(code)

      # total = total + x
      total.v = spu.fa.ex(total, logx)

    # return total / float(max_n)    
    fdiv(code, self.result, total, fmax, one)
    
    spu.set_active_code(code)
    return
Пример #37
0
 def _get_active_code(self):
     return spu.get_active_code()
Пример #38
0
  def synthesize(self, code):
    old_code = spu.get_active_code()
    spu.set_active_code(code)

    # Sanity checks
    if self._x_addr is None: raise Exception("Please set x_addr")
    if self._y_addr is None: raise Exception("Please set y_addr")
    if self._n_bits is None: raise Exception("Please set n_bits")
    if self._m is None: raise Exception("Please set m")
    if self._n is None: raise Exception("Please set n")    
    
    # Acquire a registers for the bit vectors and result
    n_vecs = self._n_bits / 128
    x_regs = [code.acquire_register() for i in range(n_vecs)]
    y_regs = [code.acquire_register() for i in range(n_vecs)]
    result = code.acquire_register()

    x_addr = var.Word()
    y_addr = var.Word()

    if self._save_op is not None:
      if self._threshold is not None:
        threshold = var.SingleFloat(self._threshold)
      else:
        threshold = var.SingleFloat(0.0)
      bcmp = var.Word(0)
    
    # Setup the Tanimito kernel
    tan = Tanimoto()

    tan.set_n_bits(self._n_bits)
    tan.set_x_regs(x_regs)
    tan.set_y_regs(y_regs)
    tan.set_result(result)

    tan.synthesize_constants(code)

    # Setup the save op
    save_op = self._save_op
    if save_op is not None:
      save_op.setup()
      
    # Create the iterators
    xiter = spuiter.syn_iter(code, self._m)
    yiter = spuiter.syn_iter(code, self._n)

    # Synthesize the block comparison loops
    x_addr.v = self._x_addr

    for x_off in xiter:
      x_addr.v = x_addr + 16 * n_vecs
      y_addr.v = self._y_addr

      self._load_bit_vector(x_addr, x_regs)

      for y_off in yiter:
        y_addr.v = y_addr + 16 * n_vecs

        self._load_bit_vector(y_addr, y_regs)
        tan.synthesize(code)

        if save_op is not None:
          spu.fcgt(bcmp, result, threshold)
          save_op.test(bcmp, result, x_off, y_off)

    # /x_off

    if old_code is not None:
      spu.set_active_code(old_code)
    
    return
Пример #39
0
  def synthesize(self, code):
    old_code = spu.get_active_code()
    spu.set_active_code(code)
    
    if self.x is None: raise Exception("Please set x")
    if self.result is None: raise Exception("Please set result")

    # exponent
    e = var.Word()
    
    # Working values    
    x = var.Word()
    y = var.Word()
    z = var.Word()

    cmp = var.Bits()
    tmp = var.Word()

    spu.xor(cmp, cmp, cmp)
    spu.xor(tmp, tmp, tmp)    

    # Set the working x
    x.v = self.x

    # Extract the exponent
    # int e = (((*(unsigned int *) &x) >> 23) & 0xff) - 0x7e;
    e.v = x >> self.consts['_23']
    e.v = spu.andi.ex(e, 0xff)
    e.v = spu.ai.ex(e, 0x382) # 0x382 == (- 0x7E) using 10 bits
    # 0b 111 1110

    # Extract the mantissa
    x.v = x & self.consts['M1'] # *(unsigned int*)&x &= 0x807fffff;
    x.v = x | self.consts['M2'] # *(unsigned int*)&x |= 0x3f000000;

    # Normalize
    x1, x2, e1 = y, z, tmp
    
    # if (x < SQRTHF) 
    cmp.v = spu.fcgt.ex(self.consts['SQRTHF'], x)

    # (True) { ... }
    e1.v = spu.ai.ex(e, -1)                  #   e -= 1;
    x1.v = spu.fa.ex(x, x)                   #   x = x + x - 1.0;
    x1.v = spu.fs.ex(x1, self.consts['ONE']) #     ""  ""

    # (False) { ... }
    x2.v = spu.fs.ex(x, self.consts['ONE'])  #   x = x - 1.0;

    # Select the True/False values based on cmp
    e.v = spu.selb.ex(e,  e1, cmp)
    x.v = spu.selb.ex(x2, x1, cmp)

    # Compute polynomial
    z.v = spu.fm.ex(x, x)                      #  z = x * x;
    
    y.v = spu.fms.ex(self.consts['C1'], x,     #  y = (((((((( 7.0376836292E-2 * x  
                     self.consts['C2'])        #	       - 1.1514610310E-1) * x      
    y.v = spu.fma.ex(y, x, self.consts['C3'])  #	     + 1.1676998740E-1) * x        
    y.v = spu.fms.ex(y, x, self.consts['C4'])  #	    - 1.2420140846E-1) * x         
    y.v = spu.fma.ex(y, x, self.consts['C5'])  #	   + 1.4249322787E-1) * x          
    y.v = spu.fms.ex(y, x, self.consts['C6'])  #	  - 1.6668057665E-1) * x           
    y.v = spu.fma.ex(y, x, self.consts['C7'])  #	 + 2.0000714765E-1) * x            
    y.v = spu.fms.ex(y, x, self.consts['C8'])  #	- 2.4999993993E-1) * x             
    y.v = spu.fma.ex(y, x, self.consts['C9'])  #       + 3.3333331174E-1) 
    y.v = spu.fm.ex(y, x)                      #   * x 
    y.v = spu.fm.ex(y, z)                      #   * z;   
    
    y.v = spu.fma.ex(self.consts['C10'], z, y) #  y += -0.5 * z;

    # Convert to log base 2
    z.v = spu.fm.ex( y, self.consts['LOG2EA'])     # z = y * LOG2EA;
    z.v = spu.fma.ex(x, self.consts['LOG2EA'], z)  # z += x * LOG2EA;
    z.v = spu.fa.ex(z, y)                          # z += y;
    z.v = spu.fa.ex(z, x)                          # z += x;
    e.v = spu.csflt.ex(e, 155)                     # z += (float) e;
    z.v = spu.fa.ex(z, e)                          #  ""  ""
    
    spu.ai(self.result, z, 0)       # return z

    spu.set_active_code(old_code)
    return
Пример #40
0
def isched(scode):
    old_active_code = spu.get_active_code()
    spu.set_active_code(None)

    # Generate the instruction dependence DAG(s)
    blocks = isched_gen_blocks(scode)

    # For each instruction, compute the max cycles to the end of the code
    g_critpath = critpath_block(blocks)

    # Apply heuristics to build an optimized InstructionStream
    fcode = scode.prgm.get_stream()

    inst_cycle = {}  # For each inst, the cycle number it has in the code

    lastpos = -1  # Index of last instruction in the stream (excludes labels!)
    pipe = 0  # Current pipeline (0 = even, 1 = odd)
    cycle = 0  # Current cycle number

    for (ind, block) in enumerate(blocks):
        if block.label is not None:
            fcode.add(block.label)

        start = block.start
        g_in = block.g_in
        g_incnt = block.g_incnt
        g_out = block.g_out

        while len(start) > 0:
            # Apply heuristics to find the best instruction in the queue.

            # For each inst in start, compute the minimum stall time
            # TODO - cache this instead of computing each time?
            #  Do this by computing the stall time when an inst is added to start.
            #  Each time the cycle number is moved forward, reduce the stall time
            #  by that number of cycles for each inst in start.
            # TODO - idea from I think Muchnick -- keep a start Q of no-stall nodes,
            #  and a Q of nodes that would stall.  Then just pull from no-stall nodes
            #  unless empty, in which case fall back to the stall Q
            #   would make it easy(er) to do cached stall counts

            best = (None, 999)
            for s in start:
                # Find the stall time of s, or maximum delay for all its deps
                maxstall = 0
                for d in g_in[s]:
                    if d[0] == None:
                        continue

                    # Compute stall time for this dep
                    stall = d[1] - (cycle - inst_cycle[d[0]])
                    if stall > maxstall:
                        maxstall = stall

                best = heurcompare_block(best, (s, maxstall), pipe, g_critpath,
                                         blocks, ind)

            inst = best[0]

            start.remove(inst)
            cycle += best[1] + 1

            block.inst_cnt -= 1
            fcode.add(inst)

            # Dual issue? if so, adjust the cycle back one.
            # Careful, lastpos starts out as -1.  However the pipe also starts out
            # as 0, so the first part of the conditional will fail before lastpos
            # is used.

            # Ah - if a label occurs first in the stream, followed by say an ai,
            # this will fail
            previnst = fcode[lastpos]
            if (pipe == inst.cycles[0] == 1 and previnst.cycles[0] == 0
                    and inst_cycle[previnst] == cycle - 1):
                cycle -= 1

            inst_cycle[inst] = cycle

            lastpos = len(fcode) - 1
            pipe = (pipe + 1) & 1

            # Evaluate all the instructions that depend on this inst.
            # Can any be added to start?
            for d in g_out[inst]:
                # Skip this d if it's not in the current block
                if d not in g_incnt:
                    continue

                g_incnt[d] -= 1
                if g_incnt[d] == 0:
                    start.append(d)

                # Does d have depend on any insts in start?
                # If so, move those to the front of start
                # Why does this still matter?  It affects ties in the heuristic..
                else:
                    # d depends on inst, and at least 1 other inst not in start.
                    # look at the insts d depends on.  if any are in start, move
                    # them to the front of the start set.
                    # how does this help?  helps insts closer to getting into start,
                    #  get in sooner.  get a larger start set for choosing best inst
                    for e in g_in[d]:
                        if e[0] in start:
                            start.remove(e[0])
                            start.insert(0, e[0])

        # end while len(start) > 0
        if block.branch is not None:
            fcode.add(block.branch)
    # end for block in blocks

    spu.set_active_code(old_active_code)
    return fcode
Пример #41
0
 def _get_active_code(self):
     return spu.get_active_code()
Пример #42
0
    def synthesize(self, code):
        self._check_inputs()

        old_code = spu.get_active_code()
        spu.set_active_code(code)

        zero = var.Word(reg=code.r_zero)
        one = self.log.consts['ONE']
        two = self.consts['TWO']

        x = var.Word(self.x0)
        r = var.Word(0)
        cmp = var.Word(0)
        x_neg = var.Word(0)
        fmax = var.Word(self.max_init)
        temp = var.SingleFloat()

        fmax.v = spu.cuflt.ex(fmax, 155)

        # Init
        for i in spuiter.syn_iter(code, self.max_init):
            # x = r[i % r_max] * x * (1.0 - x)
            self._next_r(r)
            temp.v = spu.fs.ex(one, x)
            x.v = spu.fm.ex(x, temp)
            x.v = spu.fm.ex(r, x)

        #  if x == float('-infinity'):
        #    return -10.0

        # Derive Exponent
        total = var.Word(0)
        logx = var.SingleFloat()

        for i in spuiter.syn_iter(code, self.max_n):
            # x = ri * x * (1.0 - x)
            self._next_r(r)
            temp.v = spu.fs.ex(one, x)
            x.v = spu.fm.ex(x, temp)
            x.v = spu.fm.ex(r, x)

            # logx = ri - 2.0 * ri * x
            logx.v = spu.fm.ex(two, x)
            logx.v = spu.fm.ex(r, logx)
            logx.v = spu.fs.ex(r, logx)

            # abs(logx)
            x_neg.v = spu.fs.ex(zero, logx)
            cmp.v = spu.fcgt.ex(logx, zero)
            logx.v = spu.selb.ex(x_neg, logx, cmp)
            # logx.v = spu.selb.ex(logx, x_neg, cmp)

            # log(logx)
            self.log.set_result(logx)
            self.log.set_x(logx)
            self.log.synthesize(code)

            # total = total + x
            total.v = spu.fa.ex(total, logx)

        # return total / float(max_n)
        fdiv(code, self.result, total, fmax, one)

        spu.set_active_code(code)
        return