def TestRelativeAddressing(): import corepy.arch.cal.platform as env import corepy.arch.cal.isa as cal proc = env.Processor(0) input_mem = proc.alloc_remote('I', 4, 16, 1) output_mem = proc.alloc_remote('I', 4, 1, 1) for i in range(16*1*4): for j in range(4): input_mem[i*4 + j] = i prgm = env.Program() code = prgm.get_stream() cal.set_active_code(code) cal.dcl_output(o0, USAGE=cal.usage.generic) cal.dcl_literal(l0, 1, 1, 1, 1) cal.dcl_literal(l1, 16, 16, 16, 16) cal.mov(r0, r0('0000')) cal.mov(r1, r1('0000')) cal.whileloop() cal.iadd(r1, r1, g[r0.x]) cal.iadd(r0, r0, l0) cal.breakc(cal.relop.ge, r0, l1) cal.endloop() cal.mov(o0, r1) prgm.set_binding('g[]', input_mem) prgm.set_binding('o0', output_mem) prgm.add(code) domain = (0, 0, 128, 128) prgm.print_code() proc.execute(prgm, domain) # code.cache_code() # print code.render_string if output_mem[0] == 120: print "Passed relative addressing test" else: print "Failed relative addressing test" proc.free(input_mem) proc.free(output_mem)
def TestRelativeAddressing(): import corepy.arch.cal.platform as env import corepy.arch.cal.isa as cal proc = env.Processor(0) input_mem = proc.alloc_remote('I', 4, 16, 1) output_mem = proc.alloc_remote('I', 4, 1, 1) for i in range(16 * 1 * 4): for j in range(4): input_mem[i * 4 + j] = i prgm = env.Program() code = prgm.get_stream() cal.set_active_code(code) cal.dcl_output(o0, USAGE=cal.usage.generic) cal.dcl_literal(l0, 1, 1, 1, 1) cal.dcl_literal(l1, 16, 16, 16, 16) cal.mov(r0, r0('0000')) cal.mov(r1, r1('0000')) cal.whileloop() cal.iadd(r1, r1, g[r0.x]) cal.iadd(r0, r0, l0) cal.breakc(cal.relop.ge, r0, l1) cal.endloop() cal.mov(o0, r1) prgm.set_binding('g[]', input_mem) prgm.set_binding('o0', output_mem) prgm.add(code) domain = (0, 0, 128, 128) prgm.print_code() proc.execute(prgm, domain) # code.cache_code() # print code.render_string if output_mem[0] == 120: print "Passed relative addressing test" else: print "Failed relative addressing test" proc.free(input_mem) proc.free(output_mem)
def generate(prgm): code = prgm.get_stream() cal.set_active_code(code) r_count = prgm.acquire_register() #r_cx = prgm.acquire_register() #r_cy = prgm.acquire_register() r_sum = prgm.acquire_register() r_limit = prgm.acquire_register((64.0, ) * 4) r_cmp = prgm.acquire_register() cal.dcl_output(reg.o0, USAGE=cal.usage.generic) cal.mov(r_count, r_count('0000')) #cal.mov(r_cx, r_cx('0000')) #cal.mov(r_cy, r_cy('0000')) cal.mov(r_sum, r_sum('0000')) cal.whileloop() #cal.ge(r_cmp, r_count.x, r_limit) #cal.break_logicalnz(r_cmp) cal.breakc(cal.relop.ge, r_count.x, r_limit) cal.mov(r_count, r_count('x0zw')) cal.whileloop() #cal.ge(r_cmp, r_count.y, r_limit) #cal.break_logicalnz(r_cmp) cal.breakc(cal.relop.ge, r_count.y, r_limit) cal.add(r_sum, r_sum, r_sum('1111')) cal.add(r_count, r_count, r_count('0100')) cal.endloop() cal.add(r_count, r_count, r_count('1000')) cal.endloop() cal.mov(reg.o0, r_sum) return code
def generate(prgm): code = prgm.get_stream() cal.set_active_code(code) r_count = prgm.acquire_register() #r_cx = prgm.acquire_register() #r_cy = prgm.acquire_register() r_sum = prgm.acquire_register() r_limit = prgm.acquire_register((64.0,) * 4) r_cmp = prgm.acquire_register() cal.dcl_output(reg.o0, USAGE=cal.usage.generic) cal.mov(r_count, r_count('0000')) #cal.mov(r_cx, r_cx('0000')) #cal.mov(r_cy, r_cy('0000')) cal.mov(r_sum, r_sum('0000')) cal.whileloop() #cal.ge(r_cmp, r_count.x, r_limit) #cal.break_logicalnz(r_cmp) cal.breakc(cal.relop.ge, r_count.x, r_limit) cal.mov(r_count, r_count('x0zw')) cal.whileloop() #cal.ge(r_cmp, r_count.y, r_limit) #cal.break_logicalnz(r_cmp) cal.breakc(cal.relop.ge, r_count.y, r_limit) cal.add(r_sum, r_sum, r_sum('1111')) cal.add(r_count, r_count, r_count('0100')) cal.endloop() cal.add(r_count, r_count, r_count('1000')) cal.endloop() cal.mov(reg.o0, r_sum) return code
def end(self, branch = True): """Do post-loop iterator code""" if self.mode == DEC: self.code.add(cal.break_logicalz(self.r_count)) elif self.mode == INC: self.code.add(cal.breakc(cal.relop.ge, self.r_count, self.r_stop)) self.code.add(cal.endloop()) # Reset the the current value in case this is a nested loop if self._external_start: self.code.add(cal.mov(self.r_count, self.r_start)) else: self.code.add(cal.mov(self.r_count, self.r_loop_consts.x)) for reg in self.get_acquired_registers(): self.code.prgm.release_register(reg) return
def cal_nb_generate(n_bodies, dt): code = env.InstructionStream() cal.set_active_code(code) fn_bodies = float(n_bodies) r_count = code.acquire_register() r_lpos = code.acquire_register() r_rpos = code.acquire_register() r_force = code.acquire_register() r_diff = code.acquire_register() r_dist_vec = code.acquire_register() r_dist = code.acquire_register() r_force_tmp = code.acquire_register() r_force_vec = code.acquire_register() r_vel = code.acquire_register() #code.add("dcl_input_position_interp(linear_noperspective) v0.x___") cal.dcl_input(reg.v0.x___, USAGE=cal.usage.pos, INTERP=cal.interp.linear_noperspective) r_bodies = code.acquire_register((fn_bodies,) * 4) r_G = code.acquire_register((G,) * 4) r_dt = code.acquire_register((dt,) * 4) cal.dcl_output(reg.o0, USAGE=cal.usage.generic) cal.dcl_output(reg.o1, USAGE=cal.usage.generic) cal.dcl_resource(0, cal.pixtex_type.oned, cal.fmt.float, UNNORM=True) # positions cal.dcl_resource(1, cal.pixtex_type.oned, cal.fmt.float, UNNORM=True) # velocities # Loop over all other points to calculate the force cal.mov(r_count, r_count('0000')) # loop counter cal.sample(0, 0, r_lpos, reg.v0.x) # Local position cal.mov(r_force, r_force('0000')) # total force # Compute force using input from every other point cal.whileloop() # Break if end of points reached cal.breakc(cal.relop.ge, r_count, r_bodies) cal.sample(0, 0, r_rpos, r_count.x) # Remote position # d_xyz cal.sub(r_diff, r_lpos.xyz0, r_rpos.xyz0) # local pos - remote pos # dist_tmp cal.mul(r_dist_vec, r_diff.xxxx, r_diff.xxxx) cal.mad(r_dist_vec, r_diff.yyyy, r_diff.yyyy, r_dist_vec) cal.mad(r_dist_vec, r_diff.zzzz, r_diff.zzzz, r_dist_vec) # distance # TODO - skip rest of force computation if distance is 0 cal.sqrt_vec(r_dist, r_dist_vec) # force G * ((m[i] * m[j]) / dist_tmp) cal.mul(r_force_tmp, r_lpos.wwww, r_rpos.wwww) cal.div(cal.zeroop.zero, r_force_tmp, r_force_tmp, r_dist_vec) cal.mul(r_force_tmp, r_force_tmp, r_G) # f_xyz cal.div(cal.zeroop.zero, r_force_vec, r_diff.xyz0, r_dist.xyz1) cal.mul(r_force_vec, r_force_vec.xyz0, r_force_tmp.xyz0) cal.sub(r_force, r_force.xyz0, r_force_vec.xyz0) # Increment loop counter, end loop cal.add(r_count, r_count, r_count('1111')) cal.endloop() # Acceleration cal.div(cal.zeroop.zero, r_force, r_force.xyz0, r_lpos.wwww) # Velocity cal.sample(1, 1, r_vel, reg.v0.x) # Load velocity cal.mad(r_vel, r_force, r_dt, r_vel) cal.mov(reg.o1, r_vel) # Position cal.mad(reg.o0, r_vel.xyz0, r_dt.xyz0, r_lpos.xyzw) return code
def cal_nb_generate_local(n_bodies, dt, steps): code = env.InstructionStream() cal.set_active_code(code) fn_bodies = float(n_bodies) steps = float(steps) r_count = code.acquire_register() r_step = code.acquire_register() r_lpos = code.acquire_register() r_rpos = code.acquire_register() r_force = code.acquire_register() r_diff = code.acquire_register() r_dist_vec = code.acquire_register() r_dist = code.acquire_register() r_force_tmp = code.acquire_register() r_force_vec = code.acquire_register() r_vel = code.acquire_register() print "fn_bodies", fn_bodies code.add("dcl_input_position_interp(linear_noperspective) v0.xy__") #cal.dcl_input(reg.v0.x___, USAGE=cal.usage.pos, INTERP=cal.interp.linear_noperspective) r_numsteps = code.acquire_register((steps,) * 4) r_bodies = code.acquire_register((fn_bodies,) * 4) #r_bodiesquare = code.acquire_register((float(fn_bodies**2),) * 4) r_G = code.acquire_register((G,) * 4) r_dt = code.acquire_register((dt,) * 4) cal.dcl_output(reg.o0, USAGE=cal.usage.generic) cal.dcl_output(reg.o1, USAGE=cal.usage.generic) cal.dcl_output(reg.o2, USAGE=cal.usage.generic) cal.dcl_resource(0, cal.pixtex_type.twod, cal.fmt.float, UNNORM=True) # positions cal.dcl_resource(1, cal.pixtex_type.twod, cal.fmt.float, UNNORM=True) # velocities r_foo = code.acquire_register() cal.mov(r_foo, r_foo('0000')) r_gpos = code.acquire_register() cal.mad(r_gpos, reg.v0.y, r_bodies.x, reg.v0.x) r_gvel = code.acquire_register() cal.mad(r_gvel, r_bodies.x, r_bodies.x, r_gpos) cal.ftoi(r_gpos, r_gpos) cal.ftoi(r_gvel, r_gvel) cal.sample(0, 0, r_lpos, reg.v0.xy) # Local position cal.sample(1, 1, r_vel, reg.v0.xy) # Load velocity cal.mov(reg.g[r_gpos.x], r_lpos) cal.mov(reg.g[r_gvel.x], r_vel) cal.mov(r_step, r_step('0000')) cal.whileloop() cal.breakc(cal.relop.ge, r_step.x, r_numsteps) cal.mov(r_count, r_count('0000')) # loop counter cal.whileloop() cal.breakc(cal.relop.ge, r_count.x, r_bodies) cal.add(r_foo, r_foo, r_foo('1111')) # calculate force r_tmp = code.acquire_register() cal.ftoi(r_tmp, r_count) cal.mov(r_rpos, reg.g[r_tmp.x]) # d_xyz cal.sub(r_diff, r_lpos.xyz0, r_rpos.xyz0) # local pos - remote pos # dist_tmp cal.mul(r_dist_vec, r_diff.xxxx, r_diff.xxxx) cal.mad(r_dist_vec, r_diff.yyyy, r_diff.yyyy, r_dist_vec) cal.mad(r_dist_vec, r_diff.zzzz, r_diff.zzzz, r_dist_vec) # distance # TODO - skip rest of force computation if distance is 0 cal.sqrt_vec(r_dist, r_dist_vec) # force G * ((m[i] * m[j]) / dist_tmp) cal.mul(r_force_tmp, r_lpos.wwww, r_rpos.wwww) cal.div(r_force_tmp, r_force_tmp, r_dist_vec, ZEROOP = cal.zeroop.zero) cal.mul(r_force_tmp, r_force_tmp, r_G) # f_xyz # TODO - whats going on, is this right? cal.div(r_force_vec, r_diff.xyz0, r_dist.xyz1, ZEROOP = cal.zeroop.zero) cal.mul(r_force_vec, r_force_vec.xyz0, r_force_tmp.xyz0) cal.sub(r_force, r_force.xyz0, r_force_vec.xyz0) cal.add(r_count, r_count, r_count('1111')) cal.endloop() # Acceleration cal.div(r_force, r_force.xyz0, r_lpos.wwww, ZEROOP = cal.zeroop.zero) # Velocity cal.mad(r_vel, r_force, r_dt, r_vel) # Position cal.mad(reg.o0, r_vel.xyz0, r_dt.xyz0, r_lpos.xyzw) # store updated pos and vel cal.mov(reg.g[r_gpos.x], r_lpos) cal.mov(reg.g[r_gvel.x], r_vel) cal.add(r_step, r_step, r_step('1111')) cal.endloop() cal.mov(reg.o0, r_lpos) cal.mov(reg.o1, r_vel) cal.mov(reg.o2, r_foo) return code
def cal_nb_generate_2d(prgm, n_bodies, dt): code = prgm.get_stream() cal.set_active_code(code) fn_bodies = float(n_bodies) #r_cx = prgm.acquire_register() #r_cy = prgm.acquire_register() r_count = prgm.acquire_register() r_lpos = prgm.acquire_register() r_rpos = prgm.acquire_register() r_force = prgm.acquire_register() r_diff = prgm.acquire_register() r_dist_vec = prgm.acquire_register() r_dist = prgm.acquire_register() r_force_tmp = prgm.acquire_register() r_force_vec = prgm.acquire_register() r_vel = prgm.acquire_register() #code.add("dcl_input_position_interp(linear_noperspective) v0.xy__") cal.dcl_input(reg.v0.xy__, USAGE=cal.usage.pos, INTERP=cal.interp.linear_noperspective) r_bodies = prgm.acquire_register((fn_bodies,) * 4) r_G = prgm.acquire_register((G,) * 4) r_dt = prgm.acquire_register((dt,) * 4) cal.dcl_output(reg.o0, USAGE=cal.usage.generic) cal.dcl_output(reg.o1, USAGE=cal.usage.generic) cal.dcl_resource(0, cal.pixtex_type.twod, cal.fmt.float, UNNORM=True) # positions cal.dcl_resource(1, cal.pixtex_type.twod, cal.fmt.float, UNNORM=True) # velocities # Loop over all other points to calculate the force cal.mov(r_count, r_count('0000')) # loop counter #cal.mov(r_cx, r_cx('0000')) # loop counter #cal.mov(r_cy, r_cy('0000')) # loop counter cal.sample(0, 0, r_lpos, reg.v0.xy) # Local position cal.mov(r_force, r_force('0000')) # total force # Compute force using input from every other point cal.whileloop() cal.breakc(cal.relop.ge, r_count.x, r_bodies) cal.mov(r_count, r_count.x0zw) cal.whileloop() cal.breakc(cal.relop.ge, r_count.y, r_bodies) #for i in xrange(0, 4): #cal.add(r_count, r_cx('x000'), r_cy('0x00')) cal.sample(0, 0, r_rpos, r_count.xy) # Remote position # d_xyz cal.sub(r_diff, r_lpos.xyz0, r_rpos.xyz0) # local pos - remote pos # dist_tmp #cal.mul(r_dist_vec, r_diff.xxxx, r_diff.xxxx) #cal.mad(r_dist_vec, r_diff.yyyy, r_diff.yyyy, r_dist_vec) #cal.mad(r_dist_vec, r_diff.zzzz, r_diff.zzzz, r_dist_vec) cal.dp3(r_dist_vec, r_diff, r_diff, IEEE = False) # distance # TODO - skip rest of force computation if distance is 0 cal.sqrt_vec(r_dist, r_dist_vec) # force G * ((m[i] * m[j]) / dist_tmp) cal.mul(r_force_tmp, r_lpos.wwww, r_rpos.wwww, IEEE = False) cal.div(r_force_tmp, r_force_tmp, r_dist_vec, ZEROOP = cal.zeroop.zero) cal.mul(r_force_tmp, r_force_tmp, r_G, IEEE = False) # f_xyz # TODO - whats going on, is this right? cal.div(r_force_vec, r_diff.xyz0, r_dist.xyz1, ZEROOP = cal.zeroop.zero) cal.mul(r_force_vec, r_force_vec.xyz0, r_force_tmp.xyz0, IEEE = False) cal.sub(r_force, r_force.xyz0, r_force_vec.xyz0) #cal.add(r_cy, r_cy, r_count('1111')) #cal.add(r_count, r_count, r_count('0100')) #cal.ifc(cal.relop.ge, r_count.y, r_bodies.y) ## TODO - can I merge these two? #cal.mov(r_count('_y__'), r_count('x0zw')) #cal.add(r_count, r_count, r_count('1000')) #cal.endif() # Increment loop counter, end loop cal.add(r_count, r_count, r_count('0100')) cal.endloop() cal.add(r_count, r_count, r_count('1000')) #cal.add(r_cx, r_cx, r_cx('1111')) cal.endloop() # Acceleration cal.div(r_force, r_force.xyz0, r_lpos.wwww, ZEROOP = cal.zeroop.zero) # Velocity cal.sample(1, 1, r_vel, reg.v0.xy) # Load velocity cal.mad(r_vel, r_force, r_dt, r_vel, IEEE = False) cal.mov(reg.o1, r_vel) # Position cal.mad(reg.o0, r_vel.xyz0, r_dt.xyz0, r_lpos.xyzw, IEEE = False) #cal.mov(reg.g[0], r_vel) return code