def test_nvvm_accepts_encoding(self): # Test that NVVM will accept a constant containing all possible 8-bit # characters. Taken from the test case added in llvmlite PR #53: # # https://github.com/numba/llvmlite/pull/53 # # This test case is included in Numba to ensure that the encoding used # by llvmlite (e.g. utf-8, latin1, etc.) does not result in an input to # NVVM that it cannot parse correctly # Create a module with a constant containing all 8-bit characters c = ir.Constant(ir.ArrayType(ir.IntType(8), 256), bytearray(range(256))) m = ir.Module() gv = ir.GlobalVariable(m, c.type, "myconstant") gv.global_constant = True gv.initializer = c nvvm.fix_data_layout(m) # Parse with LLVM then dump the parsed module into NVVM parsed = llvm.parse_assembly(str(m)) ptx = nvvm.llvm_to_ptx(str(parsed)) # Ensure all characters appear in the generated constant array. elements = ", ".join([str(i) for i in range(256)]) myconstant = f"myconstant[256] = {{{elements}}}".encode('utf-8') self.assertIn(myconstant, ptx)
def test_nvvm_from_llvm(self): m = Module("test_nvvm_from_llvm") fty = Type.function(Type.void(), [Type.int()]) kernel = m.add_function(fty, name='mycudakernel') bldr = Builder(kernel.append_basic_block('entry')) bldr.ret_void() set_cuda_kernel(kernel) fix_data_layout(m) ptx = llvm_to_ptx(str(m)).decode('utf8') self.assertTrue('mycudakernel' in ptx) if is64bit: self.assertTrue('.address_size 64' in ptx) else: self.assertTrue('.address_size 32' in ptx)
def test_nvvm_from_llvm(self): m = Module("test_nvvm_from_llvm") fty = Type.function(Type.void(), [Type.int()]) kernel = m.add_function(fty, name='mycudakernel') bldr = Builder(kernel.append_basic_block('entry')) bldr.ret_void() set_cuda_kernel(kernel) fix_data_layout(m) ptx = llvm_to_ptx(str(m)).decode('utf8') self.assertTrue('mycudakernel' in ptx) if is64bit: self.assertTrue('.address_size 64' in ptx) else: self.assertTrue('.address_size 32' in ptx)
def test_nvvm_from_llvm(self): m = ir.Module("test_nvvm_from_llvm") fty = ir.FunctionType(ir.VoidType(), [ir.IntType(32)]) kernel = ir.Function(m, fty, name='mycudakernel') bldr = ir.IRBuilder(kernel.append_basic_block('entry')) bldr.ret_void() set_cuda_kernel(kernel) fix_data_layout(m) ptx = llvm_to_ptx(str(m)).decode('utf8') self.assertTrue('mycudakernel' in ptx) if is64bit: self.assertTrue('.address_size 64' in ptx) else: self.assertTrue('.address_size 32' in ptx)
def test_inline_rsqrt(self): mod = Module.new(__name__) fnty = Type.function(Type.void(), [Type.pointer(Type.float())]) fn = mod.add_function(fnty, "cu_rsqrt") bldr = Builder.new(fn.append_basic_block("entry")) rsqrt_approx_fnty = Type.function(Type.float(), [Type.float()]) inlineasm = InlineAsm.get(rsqrt_approx_fnty, "rsqrt.approx.f32 $0, $1;", "=f,f", side_effect=True) val = bldr.load(fn.args[0]) res = bldr.call(inlineasm, [val]) bldr.store(res, fn.args[0]) bldr.ret_void() # generate ptx nvvm.fix_data_layout(mod) nvvm.set_cuda_kernel(fn) nvvmir = str(mod) ptx = nvvm.llvm_to_ptx(nvvmir) self.assertTrue("rsqrt.approx.f32" in str(ptx))
def test_inline_rsqrt(self): mod = Module.new(__name__) fnty = Type.function(Type.void(), [Type.pointer(Type.float())]) fn = mod.add_function(fnty, 'cu_rsqrt') bldr = Builder.new(fn.append_basic_block('entry')) rsqrt_approx_fnty = Type.function(Type.float(), [Type.float()]) inlineasm = InlineAsm.get(rsqrt_approx_fnty, 'rsqrt.approx.f32 $0, $1;', '=f,f', side_effect=True) val = bldr.load(fn.args[0]) res = bldr.call(inlineasm, [val]) bldr.store(res, fn.args[0]) bldr.ret_void() # generate ptx nvvm.fix_data_layout(mod) nvvm.set_cuda_kernel(fn) nvvmir = str(mod) ptx = nvvm.llvm_to_ptx(nvvmir) self.assertTrue('rsqrt.approx.f32' in str(ptx))
def test_inline_rsqrt(self): mod = ir.Module(__name__) fnty = ir.FunctionType(ir.VoidType(), [ir.PointerType(ir.FloatType())]) fn = ir.Function(mod, fnty, 'cu_rsqrt') bldr = ir.IRBuilder(fn.append_basic_block('entry')) rsqrt_approx_fnty = ir.FunctionType(ir.FloatType(), [ir.FloatType()]) inlineasm = ir.InlineAsm(rsqrt_approx_fnty, 'rsqrt.approx.f32 $0, $1;', '=f,f', side_effect=True) val = bldr.load(fn.args[0]) res = bldr.call(inlineasm, [val]) bldr.store(res, fn.args[0]) bldr.ret_void() # generate ptx nvvm.fix_data_layout(mod) nvvm.set_cuda_kernel(fn) nvvmir = str(mod) ptx = nvvm.llvm_to_ptx(nvvmir) self.assertTrue('rsqrt.approx.f32' in str(ptx))
def test_nvvm_ir_verify_fail(self): m = ir.Module("test_bad_ir") m.triple = "unknown-unknown-unknown" fix_data_layout(m) with self.assertRaisesRegex(NvvmError, 'Invalid target triple'): llvm_to_ptx(str(m))