def TestSynIterInc(): SIZE = 64 # build and run the kernel prgm = env.Program() code = prgm.get_stream() code.add(ptx.dcl_output(reg.o0, USAGE=ptx.usage.pos)) ones = prgm.acquire_register((1, 1, 1, 1)) counter = prgm.acquire_register() code.add(ptx.mov(counter, ones)) for i in syn_iter(code, 4, step=1, mode=INC): code.add(ptx.iadd(counter, counter, ones)) code.add(ptx.mov(reg.o0, counter.x)) domain = (0, 0, SIZE, SIZE) proc = env.Processor(0) ext_output=proc.alloc_remote('i', 1, SIZE) prgm.set_binding(reg.o0, ext_output) prgm.add(code) proc.execute(prgm, domain) passed = True for i in xrange(0, SIZE): if ext_output[i] != 5: passed = False print "Passed == ", passed proc.free(ext_output) return
def TestSynIterInc(): SIZE = 64 # build and run the kernel prgm = env.Program() code = prgm.get_stream() code.add(ptx.dcl_output(reg.o0, USAGE=ptx.usage.pos)) ones = prgm.acquire_register((1, 1, 1, 1)) counter = prgm.acquire_register() code.add(ptx.mov(counter, ones)) for i in syn_iter(code, 4, step=1, mode=INC): code.add(ptx.iadd(counter, counter, ones)) code.add(ptx.mov(reg.o0, counter.x)) domain = (0, 0, SIZE, SIZE) proc = env.Processor(0) ext_output = proc.alloc_remote('i', 1, SIZE) prgm.set_binding(reg.o0, ext_output) prgm.add(code) proc.execute(prgm, domain) passed = True for i in xrange(0, SIZE): if ext_output[i] != 5: passed = False print "Passed == ", passed proc.free(ext_output) return
def TestSimpleKernelNPy(): import corepy.arch.cal.isa as isa SIZE = 128 proc = Processor(0) arr_input = proc.alloc_remote_npy('f', 4, SIZE, SIZE) arr_output = proc.alloc_remote_npy('f', 4, SIZE, SIZE) #for i in xrange(0, SIZE * SIZE * 4): # arr_input[i] = float(i + 1) # arr_output[i] = 0.0 #print arr_input.shape #print arr_output.shape #print type(arr_input.data) val = 0.0 for i in xrange(0, SIZE): for j in xrange(0, SIZE): for k in xrange(0, 4): arr_input[i][j][k] = val arr_output[i][j][k] = 0.0 val += 1.0 # build and run the kernel prgm = Program() code = prgm.get_stream() #code.add(isa.dcl_input('v0', USAGE=isa.usage.pos, INTERP='linear_noperspective')) code.add("dcl_input_position_interp(constant) v0.xy__") code.add(isa.dcl_output('o0', USAGE=isa.usage.generic)) code.add(isa.dcl_resource(0, '2d', isa.fmt.float, UNNORM=True)) code.add(isa.sample(0, 0, 'o0', 'v0.xy')) #code.add(isa.load(0, 'o0', 'v0.g')) domain = (0, 0, SIZE, SIZE) prgm.set_binding("o0", arr_output) prgm.set_binding("i0", arr_input) prgm.add(code) prgm.cache_code() prgm.print_code() proc.execute(prgm, domain) # Check the output val = 0.0 for i in xrange(0, SIZE): for j in xrange(0, SIZE): for k in xrange(0, 4): if arr_output[i][j][k] != val: print "ERROR index %d is %f, should be %f" % (i, arr_output[i], val) val += 1.0 return
def TestSimpleKernelNPy(): import corepy.arch.cal.isa as isa SIZE = 128 proc = Processor(0) arr_input = proc.alloc_remote_npy("f", 4, SIZE, SIZE) arr_output = proc.alloc_remote_npy("f", 4, SIZE, SIZE) # for i in xrange(0, SIZE * SIZE * 4): # arr_input[i] = float(i + 1) # arr_output[i] = 0.0 # print arr_input.shape # print arr_output.shape # print type(arr_input.data) val = 0.0 for i in xrange(0, SIZE): for j in xrange(0, SIZE): for k in xrange(0, 4): arr_input[i][j][k] = val arr_output[i][j][k] = 0.0 val += 1.0 # build and run the kernel prgm = Program() code = prgm.get_stream() # code.add(isa.dcl_input('v0', USAGE=isa.usage.pos, INTERP='linear_noperspective')) code.add("dcl_input_position_interp(constant) v0.xy__") code.add(isa.dcl_output("o0", USAGE=isa.usage.generic)) code.add(isa.dcl_resource(0, "2d", isa.fmt.float, UNNORM=True)) code.add(isa.sample(0, 0, "o0", "v0.xy")) # code.add(isa.load(0, 'o0', 'v0.g')) domain = (0, 0, SIZE, SIZE) prgm.set_binding("o0", arr_output) prgm.set_binding("i0", arr_input) prgm.add(code) prgm.cache_code() prgm.print_code() proc.execute(prgm, domain) # Check the output val = 0.0 for i in xrange(0, SIZE): for j in xrange(0, SIZE): for k in xrange(0, 4): if arr_output[i][j][k] != val: print "ERROR index %d is %f, should be %f" % (i, arr_output[i], val) val += 1.0 return
def TestSynIterIncFloatExtStopExtStart(): SIZE = 64 # build and run the kernel prgm = env.Program() code = prgm.get_stream() code.add(ptx.dcl_output(reg.o0, USAGE=ptx.usage.pos)) ones = prgm.acquire_register((1, 1, 1, 1)) counter = prgm.acquire_register() code.add(ptx.mov(counter, ones)) stop = prgm.acquire_register((4.0, 4.0, 4.0, 4.0)) start = prgm.acquire_register((2.0, 2.0, 2.0, 2.0)) step = prgm.acquire_register((1.0, 1.0, 1.0, 1.0)) fiter = syn_iter_float(code, stop, step=step, mode=INC) fiter.set_start_reg(start) for i in fiter: code.add(ptx.iadd(counter, counter, ones)) code.add(ptx.mov(reg.o0, counter.x)) domain = (0, 0, SIZE, SIZE) proc = env.Processor(0) ext_output=proc.alloc_remote('i', 1, SIZE, 1) prgm.set_binding(reg.o0, ext_output) prgm.add(code) proc.execute(prgm, domain) passed = True for i in xrange(0, SIZE): if ext_output[i] != 3: passed = False print "Passed == ", passed proc.free(ext_output) return
def TestSynIterIncFloatExtStopExtStart(): SIZE = 64 # build and run the kernel prgm = env.Program() code = prgm.get_stream() code.add(ptx.dcl_output(reg.o0, USAGE=ptx.usage.pos)) ones = prgm.acquire_register((1, 1, 1, 1)) counter = prgm.acquire_register() code.add(ptx.mov(counter, ones)) stop = prgm.acquire_register((4.0, 4.0, 4.0, 4.0)) start = prgm.acquire_register((2.0, 2.0, 2.0, 2.0)) step = prgm.acquire_register((1.0, 1.0, 1.0, 1.0)) fiter = syn_iter_float(code, stop, step=step, mode=INC) fiter.set_start_reg(start) for i in fiter: code.add(ptx.iadd(counter, counter, ones)) code.add(ptx.mov(reg.o0, counter.x)) domain = (0, 0, SIZE, SIZE) proc = env.Processor(0) ext_output = proc.alloc_remote('i', 1, SIZE, 1) prgm.set_binding(reg.o0, ext_output) prgm.add(code) proc.execute(prgm, domain) passed = True for i in xrange(0, SIZE): if ext_output[i] != 3: passed = False print "Passed == ", passed proc.free(ext_output) return