def fdiv(code, d, x, y, one = None): """ Single-precision floating point division for x / y """ Y = code.acquire_registers(3) t = code.acquire_register() regs = Y[:] regs.append(t) if one is None: one = code.acquire_register() spu.xor(one, one, one) spu.ai(one, one, 1) spu.cuflt(one, one, 155) regs.append(one) # Compute 1/y (from SPU ISA 1.1, p208, Normal case) spu.frest(Y[0], y) spu.fi(Y[1], y, Y[0]) spu.fnms(t, y, Y[1], one) spu.fma(Y[2], t, Y[1], Y[1]) # Compute x * (1/y) spu.fm(d, x, Y[2]) code.release_registers(regs) return
def synthesize_constants(self, code): old_code = spu.get_active_code() spu.set_active_code(code) self._one = code.acquire_register() spu.xor(self._one, self._one, self._one) spu.ai(self._one, self._one, 1) spu.cuflt(self._one, self._one, 155) if old_code is not None: spu.set_active_code(old_code) return
def _compute_ratio(self, ab, c, result): # Convert ab and c to float spu.cuflt(ab, ab, 155) spu.cuflt(c, c, 155) # Compute ab = ab + c spu.fa(ab, ab, c) # Compute c / (ab + c) fdiv(spu.get_active_code(), result, c, ab, self._one) return
def TestLog(): code = synspu.InstructionStream() proc = synspu.Processor() spu.set_active_code(code) # Create a simple SPU program that computes log for 10 values and # sends the result back using the mailbox log = SPULog() values = [] result = code.acquire_register() N = 10 x = 1 for i in range(N): val = var.Word(x) spu.cuflt(val, val, 155) values.append(val) x = x * 10 log.setup(code) log.set_result(result) for i in range(N): log.set_x(values[i]) log.synthesize(code) spu.wrch(result, dma.SPU_WrOutMbox) spe_id = proc.execute(code, mode='async') x = 1 for i in range(N): while synspu.spu_exec.stat_out_mbox(spe_id) == 0: pass print 'log said: 0x%08X (%d)' % ( synspu.spu_exec.read_out_mbox(spe_id), x) x = x * 10 proc.join(spe_id) return
def TestLog(): code = synspu.InstructionStream() proc = synspu.Processor() spu.set_active_code(code) # Create a simple SPU program that computes log for 10 values and # sends the result back using the mailbox log = SPULog() values = [] result = code.acquire_register() N = 10 x = 1 for i in range(N): val = var.Word(x) spu.cuflt(val, val, 155) values.append(val) x = x * 10 log.setup(code) log.set_result(result) for i in range(N): log.set_x(values[i]) log.synthesize(code) spu.wrch(result, dma.SPU_WrOutMbox) spe_id = proc.execute(code, mode = 'async') x = 1 for i in range(N): while synspu.spu_exec.stat_out_mbox(spe_id) == 0: pass print 'log said: 0x%08X (%d)' %(synspu.spu_exec.read_out_mbox(spe_id), x) x = x * 10 proc.join(spe_id) return
y1 = 121 y2 = 122 t1 = 119 result = 118 ione = 110 fone = 111 insts = [ # Create fone = 1.0, fa = 2.0 and fb = 4.0 spu.ai(ione, 0, 1), spu.ai(ia, 0, 2), spu.ai(ib, 0, 4), spu.cuflt(fone, ione, 155), spu.cuflt(fa, ia, 155), spu.cuflt(fb, ib, 155), # Compute 1/fb spu.frest(y0, fb), spu.fi(y1, fb, y0), spu.fnms(t1, fb, y1, fone), spu.fma(y2, t1, y1, y1), spu.fm(result, fa, y2) ] for inst in insts: cli.execute(inst)