def test_helper_fclamp_const(mode): with pnlvm.LLVMBuilderContext() as ctx: local_vec = copy.deepcopy(VECTOR) double_ptr_ty = ctx.float_ty.as_pointer() func_ty = ir.FunctionType(ir.VoidType(), (double_ptr_ty, ctx.int32_ty)) # Create clamp function custom_name = ctx.get_unique_name("clamp") function = ir.Function(ctx.module, func_ty, name=custom_name) vec, count = function.args block = function.append_basic_block(name="entry") builder = ir.IRBuilder(block) index = None with pnlvm.helpers.for_loop_zero_inc(builder, count, "linear") as (b1, index): val_ptr = b1.gep(vec, [index]) val = b1.load(val_ptr) val = pnlvm.helpers.fclamp(b1, val, TST_MIN, TST_MAX) b1.store(val, val_ptr) builder.ret_void() ref = np.clip(VECTOR, TST_MIN, TST_MAX) bin_f = pnlvm.LLVMBinaryFunction.get(custom_name) if mode == 'CPU': ct_ty = pnlvm._convert_llvm_ir_to_ctype(double_ptr_ty) ct_vec = local_vec.ctypes.data_as(ct_ty) bin_f(ct_vec, DIM_X) else: bin_f.cuda_wrap_call(local_vec, np.int32(DIM_X)) assert np.array_equal(local_vec, ref)
def test_dot_transposed_llvm_constant_dim(benchmark, mode): custom_name = None with pnlvm.LLVMBuilderContext() as ctx: custom_name = ctx.get_unique_name("vxsqm") double_ptr_ty = ctx.float_ty.as_pointer() func_ty = ir.FunctionType( ir.VoidType(), (double_ptr_ty, double_ptr_ty, double_ptr_ty)) # get builtin IR builtin = ctx.import_llvm_function("__pnl_builtin_vxm_transposed") # Create square vector matrix multiply function = ir.Function(ctx.module, func_ty, name=custom_name) _x = ctx.int32_ty(DIM_X) _y = ctx.int32_ty(DIM_Y) _v, _m, _o = function.args block = function.append_basic_block(name="entry") builder = ir.IRBuilder(block) builder.call(builtin, [_v, _m, _x, _y, _o]) builder.ret_void() binf2 = pnlvm.LLVMBinaryFunction.get(custom_name) if mode == 'CPU': benchmark(binf2, ct_tvec, ct_u, ct_tvec_res) else: cuda_vec = pnlvm.jit_engine.pycuda.driver.In(trans_vector) cuda_mat = pnlvm.jit_engine.pycuda.driver.In(u) cuda_res = pnlvm.jit_engine.pycuda.driver.Out(llvm_tvec_res) benchmark(binf2.cuda_call, cuda_vec, cuda_mat, cuda_res) assert np.allclose(llvm_tvec_res, trans_dot_res)
def test_helper_recursive_iterate_arrays(mode, var1, var2, expected): with pnlvm.LLVMBuilderContext() as ctx: arr_ptr_ty = ctx.convert_python_struct_to_llvm_ir(var1).as_pointer() func_ty = ir.FunctionType(ir.VoidType(), [arr_ptr_ty, arr_ptr_ty, arr_ptr_ty]) custom_name = ctx.get_unique_name("elementwise_op") function = ir.Function(ctx.module, func_ty, name=custom_name) u, v, out = function.args block = function.append_basic_block(name="entry") builder = ir.IRBuilder(block) for (a_ptr, b_ptr, o_ptr) in pnlvm.helpers.recursive_iterate_arrays( ctx, builder, u, v, out): a = builder.load(a_ptr) b = builder.load(b_ptr) builder.store(builder.fadd(a, b), o_ptr) builder.ret_void() bin_f = pnlvm.LLVMBinaryFunction.get(custom_name) if mode == 'CPU': ct_vec = np.ctypeslib.as_ctypes(var1) ct_vec_2 = np.ctypeslib.as_ctypes(var2) res = bin_f.byref_arg_types[2]() bin_f(ct_vec, ct_vec_2, ctypes.byref(res)) else: res = copy.deepcopy(var1) bin_f.cuda_wrap_call(var1, var2, res) assert np.array_equal(res, expected)
def test_helper_numerical(mode, op, var, expected): with pnlvm.LLVMBuilderContext() as ctx: func_ty = ir.FunctionType(ir.VoidType(), [ctx.float_ty.as_pointer()]) custom_name = ctx.get_unique_name("numerical") function = ir.Function(ctx.module, func_ty, name=custom_name) in_out = function.args[0] block = function.append_basic_block(name="entry") builder = ir.IRBuilder(block) variable = builder.load(in_out) result = op(ctx, builder, variable) builder.store(result, in_out) builder.ret_void() bin_f = pnlvm.LLVMBinaryFunction.get(custom_name) if mode == 'CPU': res = bin_f.byref_arg_types[0](var) bin_f(ctypes.byref(res)) res = res.value else: # FIXME: this needs to consider ctx.float_ty res = np.array([var], dtype=np.float64) bin_f.cuda_wrap_call(res) res = res[0] assert res == expected
def test_helper_elementwise_op(mode, var, expected): with pnlvm.LLVMBuilderContext() as ctx: arr_ptr_ty = ctx.convert_python_struct_to_llvm_ir(var).as_pointer() func_ty = ir.FunctionType(ir.VoidType(), [arr_ptr_ty, arr_ptr_ty]) custom_name = ctx.get_unique_name("elementwise_op") function = ir.Function(ctx.module, func_ty, name=custom_name) inp, out = function.args block = function.append_basic_block(name="entry") builder = ir.IRBuilder(block) pnlvm.helpers.call_elementwise_operation( ctx, builder, inp, lambda ctx, builder, x: builder.fadd(x.type(1.0), x), out) builder.ret_void() bin_f = pnlvm.LLVMBinaryFunction.get(custom_name) if mode == 'CPU': ct_vec = np.ctypeslib.as_ctypes(var) res = bin_f.byref_arg_types[1]() bin_f(ct_vec, ctypes.byref(res)) else: res = copy.deepcopy(var) bin_f.cuda_wrap_call(var, res) assert np.array_equal(res, expected)
def test_helper_is_boolean(self, mode, ir_type, expected): with pnlvm.LLVMBuilderContext() as ctx: func_ty = ir.FunctionType(ir.VoidType(), [ir.IntType(32).as_pointer()]) custom_name = ctx.get_unique_name("is_boolean") function = ir.Function(ctx.module, func_ty, name=custom_name) out = function.args[0] block = function.append_basic_block(name="entry") builder = ir.IRBuilder(block) variable = builder.load(builder.alloca(ir_type)) if pnlvm.helpers.is_boolean(variable): builder.store(out.type.pointee(1), out) else: builder.store(out.type.pointee(0), out) builder.ret_void() bin_f = pnlvm.LLVMBinaryFunction.get(custom_name) if mode == 'CPU': res = bin_f.byref_arg_types[0](-1) bin_f(ctypes.byref(res)) res = res.value else: res = np.array([-1], dtype=np.int32) bin_f.cuda_wrap_call(res) res = res[0] assert res == expected
def test_integer_broadcast(mode, val): custom_name = None with pnlvm.LLVMBuilderContext() as ctx: custom_name = ctx.get_unique_name("broadcast") int_ty = ctx.convert_python_struct_to_llvm_ir(val) int_array_ty = ir.ArrayType(int_ty, 8) func_ty = ir.FunctionType( ir.VoidType(), (int_ty.as_pointer(), int_array_ty.as_pointer())) function = ir.Function(ctx.module, func_ty, name=custom_name) i, o = function.args block = function.append_basic_block(name="entry") builder = ir.IRBuilder(block) ival = builder.load(i) ival = builder.add(ival, ival.type(1)) with pnlvm.helpers.array_ptr_loop(builder, o, "broadcast") as (b, i): out_ptr = builder.gep(o, [ctx.int32_ty(0), i]) builder.store(ival, out_ptr) builder.ret_void() binf = pnlvm.LLVMBinaryFunction.get(custom_name) res = np.zeros(8, dtype=val.dtype) if mode == 'CPU': ct_res = np.ctypeslib.as_ctypes(res) ct_in = np.ctypeslib.as_ctypes(val) binf(ctypes.byref(ct_in), ctypes.byref(ct_res)) else: binf.cuda_wrap_call(np.asarray(val), res) assert all(res == np.broadcast_to(val + 1, 8))
def test_helper_is_close(mode): with pnlvm.LLVMBuilderContext() as ctx: double_ptr_ty = ir.DoubleType().as_pointer() func_ty = ir.FunctionType( ir.VoidType(), [double_ptr_ty, double_ptr_ty, double_ptr_ty, ctx.int32_ty]) # Create clamp function custom_name = ctx.get_unique_name("all_close") function = ir.Function(ctx.module, func_ty, name=custom_name) in1, in2, out, count = function.args block = function.append_basic_block(name="entry") builder = ir.IRBuilder(block) index = None with pnlvm.helpers.for_loop_zero_inc(builder, count, "compare") as (b1, index): val1_ptr = b1.gep(in1, [index]) val2_ptr = b1.gep(in2, [index]) val1 = b1.load(val1_ptr) val2 = b1.load(val2_ptr) close = pnlvm.helpers.is_close(ctx, b1, val1, val2) out_ptr = b1.gep(out, [index]) out_val = b1.select(close, val1.type(1), val1.type(0)) res = b1.select(close, out_ptr.type.pointee(1), out_ptr.type.pointee(0)) b1.store(out_val, out_ptr) builder.ret_void() vec1 = copy.deepcopy(VECTOR) tmp = np.random.rand(DIM_X) tmp[0::2] = vec1[0::2] vec2 = np.asfarray(tmp) assert len(vec1) == len(vec2) res = np.empty_like(vec2) ref = np.isclose(vec1, vec2) bin_f = pnlvm.LLVMBinaryFunction.get(custom_name) if mode == 'CPU': ct_ty = ctypes.POINTER(bin_f.byref_arg_types[0]) ct_vec1 = vec1.ctypes.data_as(ct_ty) ct_vec2 = vec2.ctypes.data_as(ct_ty) ct_res = res.ctypes.data_as(ct_ty) bin_f(ct_vec1, ct_vec2, ct_res, DIM_X) else: bin_f.cuda_wrap_call(vec1, vec2, res, np.int32(DIM_X)) assert np.array_equal(res, ref)
def test_fixed_dimensions__pnl_builtin_vxm(mode): # The original builtin mxv function binf = pnlvm.LLVMBinaryFunction.get("__pnl_builtin_vxm") orig_res = np.empty_like(llvm_res) if mode == 'CPU': ct_in_ty, ct_mat_ty, _, _, ct_res_ty = binf.byref_arg_types ct_vec = vector.ctypes.data_as(ctypes.POINTER(ct_in_ty)) ct_mat = matrix.ctypes.data_as(ctypes.POINTER(ct_mat_ty)) ct_res = orig_res.ctypes.data_as(ctypes.POINTER(ct_res_ty)) binf.c_func(ct_vec, ct_mat, x, y, ct_res) else: binf.cuda_wrap_call(vector, matrix, np.int32(x), np.int32(y), orig_res) custom_name = None with pnlvm.LLVMBuilderContext() as ctx: custom_name = ctx.get_unique_name("vxsqm") double_ptr_ty = ctx.convert_python_struct_to_llvm_ir(1.0).as_pointer() func_ty = ir.FunctionType( ir.VoidType(), (double_ptr_ty, double_ptr_ty, double_ptr_ty)) # get builtin IR builtin = ctx.import_llvm_function("__pnl_builtin_vxm") # Create square vector matrix multiply function = ir.Function(ctx.module, func_ty, name=custom_name) _x = ctx.int32_ty(x) _v, _m, _o = function.args block = function.append_basic_block(name="entry") builder = ir.IRBuilder(block) builder.call(builtin, [_v, _m, _x, _x, _o]) builder.ret_void() binf2 = pnlvm.LLVMBinaryFunction.get(custom_name) new_res = np.empty_like(llvm_res) if mode == 'CPU': ct_res = new_res.ctypes.data_as(ctypes.POINTER(ct_res_ty)) binf2(ct_vec, ct_mat, ct_res) else: binf2.cuda_wrap_call(vector, matrix, new_res) assert np.array_equal(orig_res, new_res)
def test_helper_fclamp(mode): with pnlvm.LLVMBuilderContext() as ctx: double_ptr_ty = ir.DoubleType().as_pointer() func_ty = ir.FunctionType(ir.VoidType(), (double_ptr_ty, ctx.int32_ty, double_ptr_ty)) # Create clamp function custom_name = ctx.get_unique_name("clamp") function = ir.Function(ctx.module, func_ty, name=custom_name) vec, count, bounds = function.args block = function.append_basic_block(name="entry") builder = ir.IRBuilder(block) tst_min = builder.load(builder.gep(bounds, [ctx.int32_ty(0)])) tst_max = builder.load(builder.gep(bounds, [ctx.int32_ty(1)])) index = None with pnlvm.helpers.for_loop_zero_inc(builder, count, "linear") as (b1, index): val_ptr = b1.gep(vec, [index]) val = b1.load(val_ptr) val = pnlvm.helpers.fclamp(b1, val, tst_min, tst_max) b1.store(val, val_ptr) builder.ret_void() ref = np.clip(VECTOR, TST_MIN, TST_MAX) bounds = np.asfarray([TST_MIN, TST_MAX]) bin_f = pnlvm.LLVMBinaryFunction.get(custom_name) local_vec = copy.deepcopy(VECTOR) if mode == 'CPU': ct_ty = ctypes.POINTER(bin_f.byref_arg_types[0]) ct_vec = local_vec.ctypes.data_as(ct_ty) ct_bounds = bounds.ctypes.data_as(ct_ty) bin_f(ct_vec, DIM_X, ct_bounds) else: bin_f.cuda_wrap_call(local_vec, np.int32(DIM_X), bounds) assert np.array_equal(local_vec, ref)
def test_helper_all_close(mode): with pnlvm.LLVMBuilderContext() as ctx: arr_ptr_ty = ir.ArrayType(ctx.float_ty, DIM_X).as_pointer() func_ty = ir.FunctionType( ir.VoidType(), [arr_ptr_ty, arr_ptr_ty, ir.IntType(32).as_pointer()]) custom_name = ctx.get_unique_name("all_close") function = ir.Function(ctx.module, func_ty, name=custom_name) in1, in2, out = function.args block = function.append_basic_block(name="entry") builder = ir.IRBuilder(block) all_close = pnlvm.helpers.all_close(builder, in1, in2) res = builder.select(all_close, out.type.pointee(1), out.type.pointee(0)) builder.store(res, out) builder.ret_void() vec1 = copy.deepcopy(VECTOR) vec2 = copy.deepcopy(VECTOR) ref = np.allclose(vec1, vec2) bin_f = pnlvm.LLVMBinaryFunction.get(custom_name) if mode == 'CPU': ct_ty = pnlvm._convert_llvm_ir_to_ctype(arr_ptr_ty) ct_vec1 = vec1.ctypes.data_as(ct_ty) ct_vec2 = vec2.ctypes.data_as(ct_ty) res = ctypes.c_int32() bin_f(ct_vec1, ct_vec2, ctypes.byref(res)) else: res = np.array([5], dtype=np.int32) bin_f.cuda_wrap_call(vec1, vec2, res) res = res[0] assert np.array_equal(res, ref)
def test_helper_printf(capfd, ir_argtype, format_spec, values_to_check): format_str = f"Hello {(format_spec+' ')*len(values_to_check)} \n" with pnlvm.LLVMBuilderContext() as ctx: func_ty = ir.FunctionType(ir.VoidType(), []) ir_values_to_check = [ir_argtype(i) for i in values_to_check] custom_name = ctx.get_unique_name("test_printf") function = ir.Function(ctx.module, func_ty, name=custom_name) block = function.append_basic_block(name="entry") builder = ir.IRBuilder(block) pnlvm.helpers.printf(builder, format_str, *ir_values_to_check, override_debug=True) builder.ret_void() bin_f = pnlvm.LLVMBinaryFunction.get(custom_name) # Printf is buffered in libc. bin_f() libc = ctypes.util.find_library("c") libc = ctypes.CDLL(libc) libc.fflush(0) assert capfd.readouterr().out == format_str % tuple(values_to_check)
def test_helper_printf(capfd): with pnlvm.LLVMBuilderContext() as ctx: func_ty = ir.FunctionType(ir.VoidType(), [ctx.int32_ty]) custom_name = ctx.get_unique_name("hello") function = ir.Function(ctx.module, func_ty, name=custom_name) block = function.append_basic_block(name="entry") builder = ir.IRBuilder(block) ctx.inject_printf(builder, "Hello %u!\n", function.args[0], override_debug=True) builder.ret_void() bin_f = pnlvm.LLVMBinaryFunction.get(custom_name) # Printf is buffered in libc. res = ctypes.c_int32(4) bin_f(res) libc = ctypes.util.find_library("c") libc = ctypes.CDLL(libc) # fflush(NULL) flushes all open streams. libc.fflush(0) assert capfd.readouterr().out == "Hello 4!\n"