def call_kernel(): knl_lib.stream_triad_tasks( cptr_from_numpy(a), cptr_from_numpy(b), cptr_from_numpy(c), STREAM_CTYPE(scalar), INDEX_CTYPE(ARRAY_SIZE), )
def main(): shared_obj = build_ispc(ISPC_CODE) lib = ctypes.cdll.LoadLibrary(shared_obj) n = 2**20 alignment = 4096 # a page a = empty_aligned(n, dtype=np.float32, n=alignment) b = empty_aligned(n, dtype=np.float32, n=alignment) b.fill(np.pi) lib.scale(cptr_from_numpy(a), cptr_from_numpy(b), ctypes.c_float(15), ctypes.c_int(n))
def main(): with open("tasksys.cpp") as ts_file: tasksys_source = ts_file.read() def make_knl(name, insn, vars): knl = lp.make_kernel( "{[i]: 0<=i<n}", insn, target=lp.ISPCTarget(), index_dtype=INDEX_DTYPE, name="stream_"+name+"_tasks") knl = transform(knl, vars, STREAM_DTYPE) return knl init_knl = make_knl("init", """ a[i] = 1 b[i] = 2 c[i] = 0 """, "a,b,c") triad_knl = make_knl("triad", """ a[i] = b[i] + scalar * c[i] """, "a,b,c,scalar") with TemporaryDirectory() as tmpdir: ispc_code = gen_code(init_knl) + gen_code(triad_knl) print(ispc_code) build_ispc_shared_lib( tmpdir, [("stream.ispc", ispc_code)], [("tasksys.cpp", tasksys_source)], cxx_options=["-g", "-fopenmp", "-DISPC_USE_OMP"], ispc_options=([ #"-g", "--no-omit-frame-pointer", "--target=avx2-i32x8", "--opt=force-aligned-memory", "--opt=disable-loop-unroll", #"--opt=fast-math", #"--opt=disable-fma", ] + (["--addressing=64"] if INDEX_DTYPE == np.int64 else []) ), #ispc_bin="/home/andreask/pack/ispc-v1.9.0-linux/ispc", quiet=False, ) knl_lib = ctypes.cdll.LoadLibrary(os.path.join(tmpdir, "shared.so")) scalar = 5 a = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO) b = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO) c = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO) print( hex(address_from_numpy(a)), hex(address_from_numpy(b)), hex(address_from_numpy(c))) assert address_from_numpy(a) % ALIGN_TO == 0 assert address_from_numpy(b) % ALIGN_TO == 0 assert address_from_numpy(c) % ALIGN_TO == 0 knl_lib.stream_init_tasks( cptr_from_numpy(a), cptr_from_numpy(b), cptr_from_numpy(c), INDEX_CTYPE(ARRAY_SIZE), ) def call_kernel(): knl_lib.stream_triad_tasks( cptr_from_numpy(a), cptr_from_numpy(b), cptr_from_numpy(c), STREAM_CTYPE(scalar), INDEX_CTYPE(ARRAY_SIZE), ) call_kernel() call_kernel() start_time = time() for irun in range(NRUNS): call_kernel() elapsed = time() - start_time print(elapsed/NRUNS) print(1e-9*3*a.nbytes*NRUNS/elapsed, "GB/s") assert la.norm(a-b+scalar*c, np.inf) < np.finfo(STREAM_DTYPE).eps * 10
def main(): with open("tasksys.cpp", "r") as ts_file: tasksys_source = ts_file.read() def make_knl(name, insn, vars): knl = lp.make_kernel( "{[i]: 0<=i<n}", insn, target=lp.ISPCTarget(), index_dtype=INDEX_DTYPE, name="stream_"+name+"_tasks") knl = transform(knl, vars, STREAM_DTYPE) return knl init_knl = make_knl("init", """ a[i] = 1 b[i] = 2 c[i] = 0 """, "a,b,c") triad_knl = make_knl("triad", """ a[i] = b[i] + scalar * c[i] """, "a,b,c,scalar") with TemporaryDirectory() as tmpdir: ispc_code = gen_code(init_knl) + gen_code(triad_knl) print(ispc_code) build_ispc_shared_lib( tmpdir, [("stream.ispc", ispc_code)], [("tasksys.cpp", tasksys_source)], cxx_options=["-g", "-fopenmp", "-DISPC_USE_OMP"], ispc_options=([ #"-g", "--no-omit-frame-pointer", "--target=avx2-i32x8", "--opt=force-aligned-memory", "--opt=disable-loop-unroll", #"--opt=fast-math", #"--opt=disable-fma", ] + (["--addressing=64"] if INDEX_DTYPE == np.int64 else []) ), #ispc_bin="/home/andreask/pack/ispc-v1.9.0-linux/ispc", quiet=False, ) knl_lib = ctypes.cdll.LoadLibrary(os.path.join(tmpdir, "shared.so")) scalar = 5 a = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO) b = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO) c = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO) print( hex(address_from_numpy(a)), hex(address_from_numpy(b)), hex(address_from_numpy(c))) assert address_from_numpy(a) % ALIGN_TO == 0 assert address_from_numpy(b) % ALIGN_TO == 0 assert address_from_numpy(c) % ALIGN_TO == 0 knl_lib.stream_init_tasks( cptr_from_numpy(a), cptr_from_numpy(b), cptr_from_numpy(c), INDEX_CTYPE(ARRAY_SIZE), ) def call_kernel(): knl_lib.stream_triad_tasks( cptr_from_numpy(a), cptr_from_numpy(b), cptr_from_numpy(c), STREAM_CTYPE(scalar), INDEX_CTYPE(ARRAY_SIZE), ) call_kernel() call_kernel() start_time = time() for irun in range(NRUNS): call_kernel() elapsed = time() - start_time print(elapsed/NRUNS) print(1e-9*3*a.nbytes*NRUNS/elapsed, "GB/s") assert la.norm(a-b+scalar*c, np.inf) < np.finfo(STREAM_DTYPE).eps * 10
def call_kernel(): if 'map' in approx.optimizations: knl_lib.eval( cptr_from_numpy(approx.mid), cptr_from_numpy(approx.left), cptr_from_numpy(approx.right), cptr_from_numpy(approx.interval_a), cptr_from_numpy(approx.interval_b), cptr_from_numpy(approx.coeff), cptr_from_numpy(approx.map), cptr_from_numpy(x), cptr_from_numpy(y), INDEX_CTYPE(size), ) else: knl_lib.eval( cptr_from_numpy(approx.tree_1d), cptr_from_numpy(x), cptr_from_numpy(y), INDEX_CTYPE(size), )
def main(experiment): print() print("Task: ", experiment) with open("tests/tasksys.cpp", "r") as ts_file: tasksys_source = ts_file.read() def make_knl(name, insn, vars): knl = lp.make_kernel("{[i]: 0<=i<n}", insn, target=lp.ISPCTarget(), index_dtype=INDEX_DTYPE, name="stream_" + name + "_tasks") knl = transform(knl, vars, STREAM_DTYPE) return knl if experiment == "triad": init_knl = make_knl( "init", """ a[i] = 1 b[i] = 3 c[i] = 0 scalar = 7 """, "a,b,c,scalar") triad_knl = make_knl( "triad", """ a[i] = b[i] + scalar * c[i] """, "a,b,c,scalar") else: init_knl = make_knl( "init", """ a[i] = 1 b[i] = 9 """, "a,b") copy_knl = make_knl("copy", """ a[i] = b[i] """, "a,b") with TemporaryDirectory() as tmpdir: if experiment == "copy": ispc_code = gen_code(init_knl) + gen_code(copy_knl) else: ispc_code = gen_code(init_knl) + gen_code(triad_knl) print(ispc_code) build_ispc_shared_lib( tmpdir, [("stream.ispc", ispc_code)], [("tasksys.cpp", tasksys_source)], cxx_options=["-g", "-fopenmp", "-DISPC_USE_OMP"], ispc_options=([ "-g", "-O0", "--no-omit-frame-pointer", "--target=avx2-i32x8", #"--opt=force-aligned-memory", "--opt=disable-loop-unroll", #"--opt=fast-math", "--opt=disable-fma", "--addressing=32", ]), ispc_bin="/home/ubuntu-boot/Desktop/ispc-v1.9.1-linux/ispc", quiet=True, ) knl_lib = ctypes.cdll.LoadLibrary(os.path.join(tmpdir, "shared.so")) scalar = 5 a = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO) b = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO) c = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO) #print( hex(address_from_numpy(a)), # hex(address_from_numpy(b)), # hex(address_from_numpy(c))) assert address_from_numpy(a) % ALIGN_TO == 0 assert address_from_numpy(b) % ALIGN_TO == 0 assert address_from_numpy(c) % ALIGN_TO == 0 if experiment == "copy": g = knl_lib.stream_copy_tasks x = [ cptr_from_numpy(a), cptr_from_numpy(b), INDEX_CTYPE(ARRAY_SIZE), ] else: g = knl_lib.stream_triad_tasks x = [ cptr_from_numpy(a), cptr_from_numpy(b), cptr_from_numpy(c), STREAM_CTYPE(scalar), INDEX_CTYPE(ARRAY_SIZE), ] for i in range(4): g(*x) def call_kernel(): g(*x) for i in range(3): call_kernel() start_time = time() for irun in range(NRUNS): call_kernel() elapsed = time() - start_time print("Avg Time: ", elapsed / NRUNS) by = 3 if experiment == "triad" else 2 print("MB: ", 1e-9 * by * a.nbytes * NRUNS / elapsed, "GB/s")