def declare_runtime_library(self): self.runtime = {} self.runtime['_print_int'] = Function.new(self.module, Type.function(Type.void(), [int_type], False), "_print_int") self.runtime['_print_float'] = Function.new(self.module, Type.function(Type.void(), [float_type], False), "_print_float") self.runtime['_print_bool'] = Function.new(self.module, Type.function(Type.void(), [bool_type], False), "_print_bool")
def CodeGen(self): # Make the function type, eg. double(double,double). funct_type = Type.function(Type.double(), [Type.double()] * len(self.args), False) function = Function.new(g_llvm_module, funct_type, self.name) # If the name conflicted, there was already something with the same name. # If it has a body, don't allow redefinition or reextern. if function.name != self.name: function.delete() function = g_llvm_module.get_function_named(self.name) # If the function already has a body, reject this. if not function.is_declaration: raise RuntimeError('Redefinition of function.') # If the function took a different number of args, reject. if len(function.args) != len(self.args): raise RuntimeError('Redeclaration of a function with different number of args.') # Set names for all arguments and add them to the variables symbol table. for arg, arg_name in zip(function.args, self.args): arg.name = arg_name return function
def code_gen(self, from_definition=False): top_context = self.context.parent_context func_name_with_tag = self.func_name_token.word + "()" return_type = Helper.get_type(self.ret_type.word) arg_types = [Helper.get_type(arg[1]) for arg in self.args if True] func_type = Type.function(return_type, arg_types, False) if not func_name_with_tag in top_context.type_table: function = Function.new(g_llvm_module, func_type, self.func_name_token.word) top_context.type_table[func_name_with_tag] = func_type for arg in self.args: self.context.type_table[arg[0]] = Helper.get_type(arg[1]) return [function, self.context] else: old_func_type = top_context.type_table[func_name_with_tag] if old_func_type == func_type: if from_definition: for arg in self.args: self.context.type_table[arg[0]] = Helper.get_type( arg[1]) return [ g_llvm_module.get_function_named( self.func_name_token.word), self.context ] else: raise cmexception.RedefineException( self.func_name_token, 'function') else: raise cmexception.RedefineException(self.func_name_token, 'function')
def __init__(self, env, node, restype, argtypes): self.env = env self.node = node self.restype = ctypes[restype] self.argtypes = [ctypes[t] for t in argtypes] self.fntype = Type.function(self.restype, self.argtypes) self.fn = Function.new(env.options['module'], self.fntype, node.value)
def add_prelude(self): for name, function in prelude.iteritems(): self.intrinsics[name] = Function.new( self.module, function, name )
def CodeGen(self): # Make the function type, ex: double(double, double). function_type = Type.function(Type.double(), [Type.double()] * len(self.args), False) function = Function.new(g_llvm_module, function_type, self.name) # If the name conflicts, already something with the same name # If it has a body, don't allow redefinition or re-extern if function.name != self.name: function.delete() function = g_llvm_module.get_function_named(self.name) # If the function already has a body, reject it if not function.is_declaration: raise RuntimeError('Redefinition of function.') # THIS IS ESSENTIALLY FUNCTION OVERLOADING, MAYBE CHANGE IN FUTURE # If function took different number of args, reject it if len(callee.args) != len(self.args): raise RuntimeError('Redeclaration of function with different' + ' number of args') # Set names for all args and add them to var symbol table for arg, arg_name in zip(function.args, self.args): arg.name = arg_name # add args to variable symbol table g_named_values[arg_name] = arg return function
def emit_extern_func(self, extnode): assert extnode.name not in self.functions fntype = Type.function(self.type_map[extnode.rettype], [self.type_map[t] for _, t in funcnode], extnode.varargs) self.functions[extnode.name] = Function.new(self.module, fntype, extnode.name) return self.funcnode[extnode.name]
def init_llvm(self): mod = Module.new("exprllvm") self.engine = ExecutionEngine.new(mod) # functions self.llvm_functions = {} func = Function.new(mod, Type.function( Type.void(), [], False), "main") self.llvm_functions['main'] = func block = func.append_basic_block("entry") # builder builder = Builder.new(block) self.builder = builder # add some pre-defined functions print_int = Function.new(mod, Type.function( Type.void(), [Type.int()], False), "print_int") self.llvm_functions['print_int'] = print_int self.builder.call(print_int, [Constant.int(Type.int(),3)]) self.builder.ret_void()
def declare_function(self, func_block): """Declare a Function and add it to self.vars""" _, name, ret_type, *args = func_block.instructions[0] args = [] if args == [[]] else args if not ret_type: ret_type = args[0] arg_types = [] else: arg_types = [typemap[t] for t in args[1::2]] ret_type = typemap[ret_type] function = Function.new( self.module, Type.function(ret_type, arg_types, False), name ) for i, argname in enumerate(args[0::2]): function.args[i].name = argname self.vars[name] = function
def codegen_proto(proto): name, args = proto[1], proto[2] double_types = [double_type] * len(args) func_type = Type.function(double_type, double_types) try: func = Function.get(the_module, name) if func.basic_block_count: raise CodegenError("redefinition of function") if len(func.args) != len(args): raise CodegenError("redefinition of function with different # args") except LLVMException: func = Function.new(the_module, func_type, name) for arg, name in zip(func.args, args): arg.name = name named_values[name] = arg return func
def codegen_proto(proto): name, args = proto[1], proto[2] double_types = [double_type] * len(args) func_type = Type.function(double_type, double_types) try: func = Function.get(the_module, name) if func.basic_block_count: raise CodegenError("redefinition of function") if len(func.args) != len(args): raise CodegenError( "redefinition of function with different # args") except LLVMException: func = Function.new(the_module, func_type, name) for arg, name in zip(func.args, args): arg.name = name named_values[name] = arg return func
def start_function(self, name, retty, argtys): rettype = arg_typemap(retty) argtypes = [arg_typemap(arg) for arg in argtys] func_type = Type.function(rettype, argtypes, False) self.function = Function.new(self.module, func_type, name) self.block = self.function.append_basic_block("entry") self.builder = Builder.new(self.block) self.exit_block = self.function.append_basic_block("exit") self.locals = {} self.stack = {} if rettype is not void_type: self.locals['retval'] = self.builder.alloca(rettype, "retval") self.globals[name] = self.function
def buildLoad(module, memory): '''Build function load value from memory at address''' # Declare load loadType = Type.function(Type.int(), [Type.int()], False) load = Function.new(module, loadType, 'load') # Build body body = load.append_basic_block('body') builder = Builder.new(body) # Get pointer to memory at address addr = builder.sext(load.args[0], Type.int(bits=64)) value = builder.load(builder.gep(memory, [num(0), addr])) # Return value builder.ret(value) load.verify()
def start_function(self, name, retty, argtys): rettype = arg_typemap(retty) argtypes = [arg_typemap(arg) for arg in argtys] func_type = Type.function(rettype, argtypes, False) self.function = Function.new(self.module, func_type, name) self.block = self.function.append_basic_block("entry") self.builder = Builder.new(self.block) self.exit_block = self.function.append_basic_block("exit") self.locals = {} self.stack = {} if rettype is not void_type: self.locals['retval'] = self.builder.alloca(rettype, "retval") self.globals[name] = self.function
def emit_func(self, funcnode): assert funcnode.name not in self.functions fntype = Type.function(self.type_map[funcnode.rettype], [self.type_map[t] for _, t in funcnode.args], False) fn = Function.new(self.module, fntype, funcnode.name) s = Scope("", fn) for arg, (name, ty) in zip(fn.args, funcnode.args): arg.name = name var = s.locals[name] = s.builder.alloca(self.type_map[ty], name) s.builder.store(arg, var) self.scopes.append(s) self.functions[funcnode.name] = self.current_func = f self.emit_block(funcnode.body) self.scopes.pop() fn.verify() return f
def buildSave(module, memory): '''Build function to save value to memory at address''' # Declare save saveType = Type.function(Type.void(), [Type.int(), Type.int()], False) save = Function.new(module, saveType, 'save') # Build body body = save.append_basic_block('body') builder = Builder.new(body) # Get pointer to memory at address value, addr = save.args addr64 = builder.sext(addr, Type.int(bits=64)) builder.store(value, builder.gep(memory, [num(0), addr64])) # Exit function builder.ret_void() save.verify()
def gen_code(self, module, builder, variables): funct_type = Type.function(Type.double(), [Type.double()] * len(self.args), False) function = Function.new(module, funct_type, self.name) variables = {} for arg, arg_name in zip(function.args, self.args): arg.name = arg_name variables[arg_name] = arg block = function.append_basic_block('entry') builder = Builder.new(block) return_value = self.body.gen_code(module, builder, variables) builder.ret(return_value) function.verify() return function
def code_gen(self): funct_type = Type.function( Type.double(), [Type.double()] * len(self.args), False) function = Function.new(g_llvm_module, funct_type, self.name) if function.name != self.name: function.delete() function = g_llvm_module.get_function_named(self.name) if not function.is_declaration: raise RuntimeError('Redefinition of a function.') if len(self.callee.args) != self.args): raise RuntimeError('Redeclaration of a function with different number of args.') for arg, arg_name in zip(function.args, self.args): arg.name = arg_name g_named_values[arg_name] = arg return function
def buildMain(module, source): '''Build main function''' # Declare main function mainType = Type.function(Type.int(), [], False) main = Function.new(module, mainType, 'main') # Build entry block entry = main.append_basic_block('entry') builder = Builder.new(entry) next = builder.alloca(Type.int(), 'next') builder.store(num(0), next) # Build exit block exit = main.append_basic_block('exit') builder = Builder.new(exit) builder.ret(num(0)) # Build block for switch-case loop = main.append_basic_block('loop') builder = Builder.new(loop) jump = builder.load(next, 'jump') switch = builder.switch(jump, exit) builder = Builder.new(entry) builder.branch(loop) # For each expression build a block that jumps back up to switch # and add label-block pair to switch block for label, expression in sorted(source.items()): stack = [] case = main.append_basic_block('case-{}'.format(label)) builder = Builder.new(case) for instruction in expression: instruction.gen(module, builder, stack) builder.store(stack.pop(), next) builder.branch(loop) switch.add_case(num(label), case) return main
def code_gen(self, from_definition=False): top_context = self.context.parent_context func_name_with_tag = self.func_name_token.word + "()" return_type = Helper.get_type(self.ret_type.word) arg_types = [Helper.get_type(arg[1]) for arg in self.args if True] func_type = Type.function(return_type, arg_types, False) if not func_name_with_tag in top_context.type_table: function = Function.new(g_llvm_module, func_type, self.func_name_token.word) top_context.type_table[func_name_with_tag] = func_type for arg in self.args: self.context.type_table[arg[0]] = Helper.get_type(arg[1]) return [function, self.context] else: old_func_type = top_context.type_table[func_name_with_tag] if old_func_type == func_type: if from_definition: for arg in self.args: self.context.type_table[arg[0]] = Helper.get_type(arg[1]) return [g_llvm_module.get_function_named(self.func_name_token.word), self.context] else: raise cmexception.RedefineException(self.func_name_token, 'function') else: raise cmexception.RedefineException(self.func_name_token, 'function')
def code(self, context): # Make the function type, eg. double(double, double). func_args = (Type.double(),) * len(self.args) func_type = Type.function(Type.double(), func_args, False) for func in context.module.functions: if func.name == self.name: if not func.is_declaration: raise RuntimeError('Redefinition of function.') if len(func.args) != len(self.args): raise RuntimeError('Redeclaration of a function with a ' 'different number of args.') break else: func = Func.new(context.module, func_type, self.name) for arg, name in zip(func.args, self.args): arg.name = name context.scope[name] = arg # Add arguments to symbol table. return func
def CodeGen(self): print >> stderr, "codegening prototype node" funct_type = Type.function( Type.pointer(Type.int(8)), [Type.pointer(Type.int(8))] * len(self.args), False) function = Function.new(G_LLVM_MODULE, funct_type, self.name) function.calling_convention = self.calling_convention if function.name != self.name: function.delete() function = G_LLVM_MODULE.get_function_named(self.name) function.calling_convention = self.calling_convention if not function.is_declaration: raise RuntimeError('Redefinition of function.') if len(function.args) != len(self.args): raise RuntimeError('Redeclaration of a function with different number of args.') for arg, arg_name in zip(function.args, self.args): arg.name = arg_name return function
def emit_extern_func(self, name, rettypename, *parmtypenames): rettype = typemap[rettypename] parmtypes = [typemap[pname] for pname in parmtypenames] func_type = Type.function(rettype, parmtypes, False) self.vars[name] = Function.new(self.module, func_type, name)
def add_prelude(self): for name, function in prelude.items(): lfunc = Function.new(self.module, function, name) lfunc.linkage = lc.LINKAGE_EXTERNAL lfunc.visibility = lc.VISIBILITY_HIDDEN self.intrinsics[name] = lfunc
def lift(self, outrank, outkind): """Take the current kernel and "lift" it so that the output has rank given by output_rank and kind given by outkind. All arguments will have the same kind as outkind in the signature of the lifted kernel and all ranks will be adjusted the same amount as output_rank This creates a new BlazeElementKernel whose function calls the underlying kernel's function multiple times. Example: (let rn == rank-n) We need an r2, r2 -> r2 kernel and we have an r1, r1 -> r1 kernel. We create a kernel with rank r2, r2 -> r2 that does the equivalent of for i in range(n0): out[i] = inner_kernel(in0[i], in1[i]) """ if outkind in 'CFS': from .llvm_array import kindfromchar outkind = kindfromchar[outkind] name = self.func.name + "_lifted_%d_%s" % (outrank, orderchar[outkind]) try_bk = self._lifted_cache.get(name, None) if try_bk is not None: return try_bk if outkind not in array_kinds[:3]: raise ValueError("Invalid kind specified for output: %s" % outkind) cur_rank = self.ranks[-1] if outrank == cur_rank: if not (outrank == 0 and all(x in [SCALAR, POINTER] for x in self.kinds)): return self # no-op dr = outrank - cur_rank if dr < 0: raise ValueError("Output rank (%d) must be greater than current " "rank (%d)" % (outrank, cur_rank)) if not all((x in [SCALAR, POINTER] or x[0]==outkind) for x in self.kinds): raise ValueError("Incompatible kernel arguments for " "lifting: %s" % self.kinds) # Replace any None values with difference in ranks outranks = [ri + dr for ri in self.ranks] func_type = self._lifted_func_type(outranks, outkind) func = Function.new(self.module, func_type, name=name) block = func.append_basic_block('entry') builder = lc.Builder.new(block) def ensure_llvm(arg, kind): if isinstance(arg, LLArray): return arg.array_ptr else: return arg arg_arrays = [LLArray(arg, builder) for arg in func.args] begins = [const_intp(0)]*dr # This is the shape of the output array ends = arg_arrays[-1].shape loop_nest_ctx = loop_nest(builder, begins, ends) with loop_nest_ctx as loop: if self.kinds[-1] == SCALAR: inargs = arg_arrays[:-1] inkinds = self.kinds[:-1] else: inargs = arg_arrays inkinds = self.kinds callargs = [ensure_llvm(arg[loop.indices], kind) for arg, kind in zip(inargs, inkinds)] res = builder.call(self.func, callargs) if self.kinds[-1] == SCALAR: arg_arrays[-1][loop.indices] = res builder.branch(loop.incr) builder.branch(loop.entry) builder.position_at_end(loop.end) builder.ret_void() def add_rank(dshape, dr): new = ["L%d, " % i for i in range(dr)] new.append(str(dshape)) return make_dshape("".join(new)) dshapes = [add_rank(dshape, dr) for dshape in self.dshapes] try_bk = BlazeElementKernel(func, dshapes) self._lifted_cache[name] = try_bk return try_bk
def fuse_kerneltree(tree, module_or_name): """Fuse the kernel tree into a single kernel object with the common names Examples: add(multiply(b,c),subtract(d,f)) var tmp0 = multiply(b,c) var tmp1 = subtract(d,f) return add(tmp0, tmp1) var tmp0; var tmp1; multiply(b,c,&tmp0) subtract(d,f,&tmp1) add(tmp0, tmp1, &res) """ if isinstance(module_or_name, _strtypes): module = Module.new(module_or_name) else: module = module_or_name args, func_type = get_fused_type(tree) outdshape = tree.kernel.dshapes[-1] try: func = module.get_function_named(tree.name+"_fused") except LLVMException: func = Function.new(module, func_type, tree.name+"_fused") block = func.append_basic_block('entry') builder = lc.Builder.new(block) # TODO: Create wrapped function for functions # that need to loop over their inputs # Attach the llvm_object to the Argument objects for i, arg in enumerate(args): arg.llvm_obj = func.args[i] # topologically sort the kernel-tree nodes and then for each node # site we issue instructions to compute the value nodelist = tree.sorted_nodes() cleanup = [] # Objects to deallocate any temporary heap memory needed # ust have a _dealloc method def _temp_cleanup(): for obj in cleanup: if obj is not None: obj._dealloc() #import pdb #pdb.set_trace() for node in nodelist[:-1]: node.kernel.attach(module) new = insert_instructions(node, builder) cleanup.append(new) nodelist[-1].kernel.attach(module) if tree.kernel.kinds[-1] == SCALAR: new = insert_instructions(nodelist[-1], builder) cleanup.append(new) _temp_cleanup() builder.ret(nodelist[-1].llvm_obj) else: new = insert_instructions(nodelist[-1], builder, func.args[-1]) cleanup.append(new) _temp_cleanup() builder.ret_void() dshapes = [get_kernel_dshape(arg) for arg in args] dshapes.append(outdshape) newkernel = BlazeElementKernel(func, dshapes) return newkernel, args
def create_ckernel_interface(bek, strided): """Create a function wrapper with a CKernel interface according to `strided`. Parameters ---------- bek : BlazeElementKernel The blaze kernel to compile into an unbound single ckernel. strided : bool If true, returns an ExprStridedOperation, otherwise an ExprSingleOperation. """ # TODO: Decouple this from BlazeElementKernel inarg_count = len(bek.kinds)-1 module = bek.module.clone() if strided: ck_func_name = bek.func.name +"_strided_ckernel" ck_func = Function.new(module, strided_ckernel_func_type, name=ck_func_name) else: ck_func_name = bek.func.name +"_single_ckernel" ck_func = Function.new(module, single_ckernel_func_type, name=ck_func_name) entry_block = ck_func.append_basic_block('entry') builder = lc.Builder.new(entry_block) if strided: dst_ptr_arg, dst_stride_arg, \ src_ptr_arr_arg, src_stride_arr_arg, \ count_arg, extra_ptr_arg = ck_func.args dst_stride_arg.name = 'dst_stride' src_stride_arr_arg.name = 'src_strides' count_arg.name = 'count' else: dst_ptr_arg, src_ptr_arr_arg, extra_ptr_arg = ck_func.args dst_ptr_arg.name = 'dst_ptr' src_ptr_arr_arg.name = 'src_ptrs' extra_ptr_arg.name = 'extra_ptr' if strided: # Allocate an array of pointer counters for the # strided loop src_ptr_arr_tmp = builder.alloca_array(int8_p_type, lc.Constant.int(int32_type, inarg_count), 'src_ptr_arr') # Copy the pointers for i in range(inarg_count): builder.store(builder.load(builder.gep(src_ptr_arr_arg, (lc.Constant.int(int32_type, i),))), builder.gep(src_ptr_arr_tmp, (lc.Constant.int(int32_type, i),))) # Get all the src strides src_stride_vals = [builder.load(builder.gep(src_stride_arr_arg, (lc.Constant.int(int32_type, i),))) for i in range(inarg_count)] # Replace src_ptr_arr_arg with this local variable src_ptr_arr_arg = src_ptr_arr_tmp # Initialize some more basic blocks for the strided loop looptest_block = ck_func.append_basic_block('looptest') loopbody_block = ck_func.append_basic_block('loopbody') end_block = ck_func.append_basic_block('finish') # Finish the entry block by branching # to the looptest block builder.branch(looptest_block) # The looptest block continues the loop while counter != 0 builder.position_at_end(looptest_block) counter_phi = builder.phi(count_arg.type) counter_phi.add_incoming(count_arg, entry_block) dst_ptr_phi = builder.phi(dst_ptr_arg.type) dst_ptr_phi.add_incoming(dst_ptr_arg, entry_block) dst_ptr_arg = dst_ptr_phi kzero = lc.Constant.int(count_arg.type, 0) pred = builder.icmp(lc.ICMP_NE, counter_phi, kzero) builder.cbranch(pred, loopbody_block, end_block) # The loopbody block decrements the counter, and executes # one kernel iteration builder.position_at_end(loopbody_block) kone = lc.Constant.int(counter_phi.type, 1) counter_dec = builder.sub(counter_phi, kone) counter_phi.add_incoming(counter_dec, loopbody_block) # Convert the src pointer args to the # appropriate kinds for the llvm call args = build_llvm_src_ptrs(builder, src_ptr_arr_arg, bek.dshapes, bek.kinds[:-1], bek.argtypes) # Call the function and store in the dst kind = bek.kinds[-1] func = module.get_function_named(bek.func.name) if kind == lla.SCALAR: dst_ptr = builder.bitcast(dst_ptr_arg, Type.pointer(bek.return_type)) dst_val = builder.call(func, args) builder.store(dst_val, dst_ptr) else: dst_ptr = build_llvm_arg_ptr(builder, dst_ptr_arg, bek.dshapes[-1], kind, bek.argtypes[-1]) builder.call(func, args + [dst_ptr]) if strided: # Finish the loopbody block by incrementing all the pointers # and branching to the looptest block dst_ptr_inc = builder.gep(dst_ptr_arg, (dst_stride_arg,)) dst_ptr_phi.add_incoming(dst_ptr_inc, loopbody_block) # Increment the src pointers for i in range(inarg_count): src_ptr_val = builder.load(builder.gep(src_ptr_arr_tmp, (lc.Constant.int(int32_type, i),))) src_ptr_inc = builder.gep(src_ptr_val, (src_stride_vals[i],)) builder.store(src_ptr_inc, builder.gep(src_ptr_arr_tmp, (lc.Constant.int(int32_type, i),))) builder.branch(looptest_block) # The end block just returns builder.position_at_end(end_block) builder.ret_void() #print("Function before optimization passes:") #print(ck_func) #module.verify() return module, ck_func
def unbound_single_ckernel(self): """Creates an UnboundCKernelFunction with the ExprSingleOperation prototype. """ import ctypes if self._unbound_single_ckernel is None: i8_p_type = Type.pointer(Type.int(8)) func_type = Type.function(void_type, [i8_p_type, Type.pointer(i8_p_type), i8_p_type]) module = self.module.clone() single_ck_func_name = self.func.name +"_single_ckernel" single_ck_func = Function.new(module, func_type, name=single_ck_func_name) block = single_ck_func.append_basic_block('entry') builder = lc.Builder.new(block) dst_ptr_arg, src_ptr_arr_arg, extra_ptr_arg = single_ck_func.args dst_ptr_arg.name = 'dst_ptr' src_ptr_arr_arg.name = 'src_ptrs' extra_ptr_arg.name = 'extra_ptr' # Build up the kernel data structure. Currently, this means # adding a shape field for each array argument. First comes # the kernel data prefix with a spot for the 'owner' reference added. input_field_indices = [] kernel_data_fields = [Type.struct([i8_p_type]*3)] kernel_data_ctypes_fields = [('base', JITKernelData)] for i, (kind, a) in enumerate(izip(self.kinds, self.argtypes)): if isinstance(kind, tuple): if kind[0] != lla.C_CONTIGUOUS: raise ValueError('only support C contiguous array presently') input_field_indices.append(len(kernel_data_fields)) kernel_data_fields.append(Type.array( intp_type, len(self.dshapes[i])-1)) kernel_data_ctypes_fields.append(('operand_%d' % i, c_ssize_t * (len(self.dshapes[i])-1))) elif kind in [SCALAR, POINTER]: input_field_indices.append(None) else: raise TypeError(("unbound_single_ckernel codegen doesn't " + "support the parameter kind %r yet") % (k,)) # Make an LLVM and ctypes type for the extra data pointer. kernel_data_llvmtype = Type.struct(kernel_data_fields) class kernel_data_ctypestype(ctypes.Structure): _fields_ = kernel_data_ctypes_fields # Cast the extra pointer to the right llvm type extra_struct = builder.bitcast(extra_ptr_arg, Type.pointer(kernel_data_llvmtype)) # Convert the src pointer args to the # appropriate kinds for the llvm call args = [] for i, (kind, atype) in enumerate(izip(self.kinds[:-1], self.argtypes)): if kind == SCALAR: src_ptr = builder.bitcast(builder.load( builder.gep(src_ptr_arr_arg, (lc.Constant.int(intp_type, i),))), Type.pointer(atype)) src_val = builder.load(src_ptr) args.append(src_val) elif kind == POINTER: src_ptr = builder.bitcast(builder.load( builder.gep(src_ptr_arr_arg, (lc.Constant.int(intp_type, i),))), Type.pointer(atype)) args.append(src_ptr) elif isinstance(kind, tuple): src_ptr = builder.bitcast(builder.load( builder.gep(src_ptr_arr_arg, (lc.Constant.int(intp_type, i),))), Type.pointer(kind[2])) # First get the shape of this parameter. This will # be a combination of Fixed and TypeVar (Var unsupported # here for now) shape = self.dshapes[i][:-1] # Get the llvm array arr_var = builder.alloca(atype.pointee) builder.store(src_ptr, builder.gep(arr_var, (lc.Constant.int(int32_type, 0), lc.Constant.int(int32_type, 0)))) for j, sz in enumerate(shape): if isinstance(sz, Fixed): # If the shape is already known at JIT compile time, # insert the constant shape_el_ptr = builder.gep(arr_var, (lc.Constant.int(int32_type, 0), lc.Constant.int(int32_type, 1), lc.Constant.int(intp_type, j))) builder.store(lc.Constant.int(intp_type, operator.index(sz)), shape_el_ptr) elif isinstance(sz, TypeVar): # TypeVar types are only known when the kernel is bound, # so copy it from the extra data pointer sz_from_extra_ptr = builder.gep(extra_struct, (lc.Constant.int(int32_type, 0), lc.Constant.int(int32_type, input_field_indices[i]), lc.Constant.int(intp_type, j))) sz_from_extra = builder.load(sz_from_extra_ptr) shape_el_ptr = builder.gep(arr_var, (lc.Constant.int(int32_type, 0), lc.Constant.int(int32_type, 1), lc.Constant.int(intp_type, j))) builder.store(sz_from_extra, shape_el_ptr) else: raise TypeError(("unbound_single_ckernel codegen doesn't " + "support dimension type %r") % type(sz)) args.append(arr_var) # Call the function and store in the dst kind = self.kinds[-1] func = module.get_function_named(self.func.name) if kind == SCALAR: dst_ptr = builder.bitcast(dst_ptr_arg, Type.pointer(self.return_type)) dst_val = builder.call(func, args) builder.store(dst_val, dst_ptr) elif kind == POINTER: dst_ptr = builder.bitcast(dst_ptr_arg, Type.pointer(self.return_type)) builder.call(func, args + [dst_ptr]) elif isinstance(kind, tuple): dst_ptr = builder.bitcast(dst_ptr_arg, Type.pointer(kind[2])) # First get the shape of the output. This will # be a combination of Fixed and TypeVar (Var unsupported # here for now) shape = self.dshapes[-1][:-1] # Get the llvm array arr_var = builder.alloca(self.argtypes[-1].pointee) builder.store(dst_ptr, builder.gep(arr_var, (lc.Constant.int(int32_type, 0), lc.Constant.int(int32_type, 0)))) for j, sz in enumerate(shape): if isinstance(sz, Fixed): # If the shape is already known at JIT compile time, # insert the constant shape_el_ptr = builder.gep(arr_var, (lc.Constant.int(int32_type, 0), lc.Constant.int(int32_type, 1), lc.Constant.int(intp_type, j))) builder.store(lc.Constant.int(intp_type, operator.index(sz)), shape_el_ptr) elif isinstance(sz, TypeVar): # TypeVar types are only known when the kernel is bound, # so copy it from the extra data pointer sz_from_extra_ptr = builder.gep(extra_struct, (lc.Constant.int(int32_type, 0), lc.Constant.int(int32_type, input_field_indices[-1]), lc.Constant.int(intp_type, j))) sz_from_extra = builder.load(sz_from_extra_ptr) shape_el_ptr = builder.gep(arr_var, (lc.Constant.int(int32_type, 0), lc.Constant.int(int32_type, 1), lc.Constant.int(intp_type, j))) builder.store(sz_from_extra, shape_el_ptr) else: raise TypeError(("unbound_single_ckernel codegen doesn't " + "support dimension type %r") % type(sz)) builder.call(func, args + [arr_var]) else: raise TypeError(("single_ckernel codegen doesn't " + "support kind %r") % kind) builder.ret_void() #print("Function before optimization passes:") #print(single_ck_func) #module.verify() import llvm.ee as le from llvm.passes import build_pass_managers tm = le.TargetMachine.new(opt=3, cm=le.CM_JITDEFAULT, features='') pms = build_pass_managers(tm, opt=3, fpm=False, vectorize=True, loop_vectorize=True) pms.pm.run(module) #print("Function after optimization passes:") #print(single_ck_func) # DEBUGGING: Verify the module. #module.verify() # TODO: Cache the EE - the interplay with the func_ptr # was broken, so just avoiding caching for now # FIXME: Temporarily disabling AVX, because of misdetection # in linux VMs. Some code is in llvmpy's workarounds # submodule related to this. ee = le.EngineBuilder.new(module).mattrs("-avx").create() func_ptr = ee.get_pointer_to_function(single_ck_func) # Create a function which copies the shape from data # descriptors to the extra data struct. if len(kernel_data_ctypes_fields) == 1: def bind_func(estruct, dst_dd, src_dd_list): pass else: def bind_func(estruct, dst_dd, src_dd_list): for i, (ds, dd) in enumerate( izip(self.dshapes, src_dd_list + [dst_dd])): shape = [operator.index(dim) for dim in dd.dshape[-len(ds):-1]] cshape = getattr(estruct, 'operand_%d' % i) for j, dim_size in enumerate(shape): cshape[j] = dim_size self._unbound_single_ckernel = UnboundCKernelFunction( ExprSingleOperation(func_ptr), kernel_data_ctypestype, bind_func, (ee, func_ptr)) return self._unbound_single_ckernel
def LLVMFunction(name, args, ret, m): proto = Type.function(ret, args) func = Function.new(m, proto, name) return func
def op_DEF_FOREIGN(self, name, retty, argtys): largtys = map(arg_typemap, argtys) lretty = arg_typemap(retty) func_type = Type.function(lretty, largtys, False) self.globals[name] = Function.new(self.module, func_type, name)
def jit_compile_unbound_single_ckernel(bek, strided): """Creates an UnboundCKernelFunction with either the ExprSingleOperation prototype or the ExprStridedOperation prototype depending on the `strided` parameter. Parameters ---------- bek : BlazeElementKernel The blaze kernel to compile into an unbound single ckernel. strided : bool If true, returns an ExprStridedOperation, otherwise an ExprSingleOperation. """ inarg_count = len(bek.kinds)-1 module = bek.module.clone() if strided: ck_func_name = bek.func.name +"_strided_ckernel" ck_func = Function.new(module, strided_ckernel_func_type, name=ck_func_name) else: ck_func_name = bek.func.name +"_single_ckernel" ck_func = Function.new(module, single_ckernel_func_type, name=ck_func_name) entry_block = ck_func.append_basic_block('entry') builder = lc.Builder.new(entry_block) if strided: dst_ptr_arg, dst_stride_arg, \ src_ptr_arr_arg, src_stride_arr_arg, \ count_arg, extra_ptr_arg = ck_func.args dst_stride_arg.name = 'dst_stride' src_stride_arr_arg.name = 'src_strides' count_arg.name = 'count' else: dst_ptr_arg, src_ptr_arr_arg, extra_ptr_arg = ck_func.args dst_ptr_arg.name = 'dst_ptr' src_ptr_arr_arg.name = 'src_ptrs' extra_ptr_arg.name = 'extra_ptr' # Build llvm and ctypes structures for the kernel data, using # the argument types. kd_llvmtype, kd_ctypestype = args_to_kernel_data_struct(bek.kinds, bek.argtypes) # Cast the extra pointer to the right llvm type extra_struct = builder.bitcast(extra_ptr_arg, Type.pointer(kd_llvmtype)) if strided: # Allocate an array of pointer counters for the # strided loop src_ptr_arr_tmp = builder.alloca_array(int8_p_type, lc.Constant.int(int32_type, inarg_count), 'src_ptr_arr') # Copy the pointers for i in range(inarg_count): builder.store(builder.load(builder.gep(src_ptr_arr_arg, (lc.Constant.int(int32_type, i),))), builder.gep(src_ptr_arr_tmp, (lc.Constant.int(int32_type, i),))) # Get all the src strides src_stride_vals = [builder.load(builder.gep(src_stride_arr_arg, (lc.Constant.int(int32_type, i),))) for i in range(inarg_count)] # Replace src_ptr_arr_arg with this local variable src_ptr_arr_arg = src_ptr_arr_tmp # Initialize some more basic blocks for the strided loop looptest_block = ck_func.append_basic_block('looptest') loopbody_block = ck_func.append_basic_block('loopbody') end_block = ck_func.append_basic_block('finish') # Finish the entry block by branching # to the looptest block builder.branch(looptest_block) # The looptest block continues the loop while counter != 0 builder.position_at_end(looptest_block) counter_phi = builder.phi(count_arg.type) counter_phi.add_incoming(count_arg, entry_block) dst_ptr_phi = builder.phi(dst_ptr_arg.type) dst_ptr_phi.add_incoming(dst_ptr_arg, entry_block) dst_ptr_arg = dst_ptr_phi kzero = lc.Constant.int(count_arg.type, 0) pred = builder.icmp(lc.ICMP_NE, counter_phi, kzero) builder.cbranch(pred, loopbody_block, end_block) # The loopbody block decrements the counter, and executes # one kernel iteration builder.position_at_end(loopbody_block) kone = lc.Constant.int(counter_phi.type, 1) counter_dec = builder.sub(counter_phi, kone) counter_phi.add_incoming(counter_dec, loopbody_block) # Convert the src pointer args to the # appropriate kinds for the llvm call args = build_llvm_src_ptrs(builder, src_ptr_arr_arg, bek.dshapes, bek.kinds[:-1], bek.argtypes) # Call the function and store in the dst kind = bek.kinds[-1] func = module.get_function_named(bek.func.name) if kind == lla.SCALAR: dst_ptr = builder.bitcast(dst_ptr_arg, Type.pointer(bek.return_type)) dst_val = builder.call(func, args) builder.store(dst_val, dst_ptr) else: dst_ptr = build_llvm_arg_ptr(builder, dst_ptr_arg, bek.dshapes[-1], kind, bek.argtypes[-1]) builder.call(func, args + [dst_ptr]) if strided: # Finish the loopbody block by incrementing all the pointers # and branching to the looptest block dst_ptr_inc = builder.gep(dst_ptr_arg, (dst_stride_arg,)) dst_ptr_phi.add_incoming(dst_ptr_inc, loopbody_block) # Increment the src pointers for i in range(inarg_count): src_ptr_val = builder.load(builder.gep(src_ptr_arr_tmp, (lc.Constant.int(int32_type, i),))) src_ptr_inc = builder.gep(src_ptr_val, (src_stride_vals[i],)) builder.store(src_ptr_inc, builder.gep(src_ptr_arr_tmp, (lc.Constant.int(int32_type, i),))) builder.branch(looptest_block) # The end block just returns builder.position_at_end(end_block) builder.ret_void() #print("Function before optimization passes:") #print(ck_func) #module.verify() import llvm.ee as le from llvm.passes import build_pass_managers tm = le.TargetMachine.new(opt=3, cm=le.CM_JITDEFAULT, features='') pms = build_pass_managers(tm, opt=3, fpm=False, vectorize=True, loop_vectorize=True) pms.pm.run(module) #print("Function after optimization passes:") #print(ck_func) # DEBUGGING: Verify the module. #module.verify() # TODO: Cache the EE - the interplay with the func_ptr # was broken, so just avoiding caching for now # FIXME: Temporarily disabling AVX, because of misdetection # in linux VMs. Some code is in llvmpy's workarounds # submodule related to this. ee = le.EngineBuilder.new(module).mattrs("-avx").create() func_ptr = ee.get_pointer_to_function(ck_func) # Create a function which copies the shape from data # descriptors to the extra data struct. if len(kd_ctypestype._fields_) == 1: # If there were no extra data fields, it's a no-op function def bind_func(estruct, dst_dd, src_dd_list): pass else: def bind_func(estruct, dst_dd, src_dd_list): for i, (ds, dd) in enumerate( izip(bek.dshapes, src_dd_list + [dst_dd])): shape = [operator.index(dim) for dim in dd.dshape[-len(ds):-1]] cshape = getattr(estruct, 'operand_%d' % i) for j, dim_size in enumerate(shape): cshape[j] = dim_size if strided: optype = ExprStridedOperation else: optype = ExprSingleOperation return UnboundCKernelFunction( optype(func_ptr), kd_ctypestype, bind_func, (ee, func_ptr))
def buildPutchar(module): '''Build forward declaration to putchar''' putcharType = Type.function(Type.int(), [Type.int()], False) putchar = Function.new(module, putcharType, 'putchar')
def buildGetchar(module): '''Build forward declaration to getchar''' getcharType = Type.function(Type.int(), [], False) getchar = Function.new(module, getcharType, 'getchar')
def make_function(self, name, ret_type, arg_types): func = Function.new(self.module, Type.function(ret_type, arg_types, False), name) self.functions[name] = func self.globals[name] = func return func
def create_ckernel_interface(bek, strided): """Create a function wrapper with a CKernel interface according to `strided`. Parameters ---------- bek : BlazeElementKernel The blaze kernel to compile into an unbound single ckernel. strided : bool If true, returns an ExprStridedOperation, otherwise an ExprSingleOperation. """ # TODO: Decouple this from BlazeElementKernel inarg_count = len(bek.kinds)-1 module = bek.module.clone() if strided: ck_func_name = bek.func.name +"_strided_ckernel" ck_func = Function.new(module, strided_ckernel_func_type, name=ck_func_name) else: ck_func_name = bek.func.name +"_single_ckernel" ck_func = Function.new(module, single_ckernel_func_type, name=ck_func_name) entry_block = ck_func.append_basic_block('entry') builder = lc.Builder.new(entry_block) if strided: dst_ptr_arg, dst_stride_arg, \ src_ptr_arr_arg, src_stride_arr_arg, \ count_arg, extra_ptr_arg = ck_func.args dst_stride_arg.name = 'dst_stride' src_stride_arr_arg.name = 'src_strides' count_arg.name = 'count' else: dst_ptr_arg, src_ptr_arr_arg, extra_ptr_arg = ck_func.args dst_ptr_arg.name = 'dst_ptr' src_ptr_arr_arg.name = 'src_ptrs' extra_ptr_arg.name = 'extra_ptr' if strided: # Allocate an array of pointer counters for the # strided loop src_ptr_arr_tmp = builder.alloca_array(int8_p_type, lc.Constant.int(int32_type, inarg_count), 'src_ptr_arr') # Copy the pointers for i in range(inarg_count): builder.store(builder.load(builder.gep(src_ptr_arr_arg, (lc.Constant.int(int32_type, i),))), builder.gep(src_ptr_arr_tmp, (lc.Constant.int(int32_type, i),))) # Get all the src strides src_stride_vals = [builder.load(builder.gep(src_stride_arr_arg, (lc.Constant.int(int32_type, i),))) for i in range(inarg_count)] # Replace src_ptr_arr_arg with this local variable src_ptr_arr_arg = src_ptr_arr_tmp # Initialize some more basic blocks for the strided loop looptest_block = ck_func.append_basic_block('looptest') loopbody_block = ck_func.append_basic_block('loopbody') end_block = ck_func.append_basic_block('finish') # Finish the entry block by branching # to the looptest block builder.branch(looptest_block) # The looptest block continues the loop while counter != 0 builder.position_at_end(looptest_block) counter_phi = builder.phi(count_arg.type) counter_phi.add_incoming(count_arg, entry_block) dst_ptr_phi = builder.phi(dst_ptr_arg.type) dst_ptr_phi.add_incoming(dst_ptr_arg, entry_block) dst_ptr_arg = dst_ptr_phi kzero = lc.Constant.int(count_arg.type, 0) pred = builder.icmp(lc.ICMP_NE, counter_phi, kzero) builder.cbranch(pred, loopbody_block, end_block) # The loopbody block decrements the counter, and executes # one kernel iteration builder.position_at_end(loopbody_block) kone = lc.Constant.int(counter_phi.type, 1) counter_dec = builder.sub(counter_phi, kone) counter_phi.add_incoming(counter_dec, loopbody_block) # Convert the src pointer args to the # appropriate kinds for the llvm call args = build_llvm_src_ptrs(builder, src_ptr_arr_arg, bek.dshapes, bek.kinds[:-1], bek.argtypes) # Call the function and store in the dst kind = bek.kinds[-1] func = module.get_function_named(bek.func.name) if kind == lla.SCALAR: dst_ptr = builder.bitcast(dst_ptr_arg, Type.pointer(bek.return_type)) dst_val = builder.call(func, args) builder.store(dst_val, dst_ptr) else: dst_ptr = build_llvm_arg_ptr(builder, dst_ptr_arg, bek.dshapes[-1], kind, bek.argtypes[-1]) builder.call(func, args + [dst_ptr]) if strided: # Finish the loopbody block by incrementing all the pointers # and branching to the looptest block dst_ptr_inc = builder.gep(dst_ptr_arg, (dst_stride_arg,)) dst_ptr_phi.add_incoming(dst_ptr_inc, loopbody_block) # Increment the src pointers for i in range(inarg_count): src_ptr_val = builder.load(builder.gep(src_ptr_arr_tmp, (lc.Constant.int(int32_type, i),))) src_ptr_inc = builder.gep(src_ptr_val, (src_stride_vals[i],)) builder.store(src_ptr_inc, builder.gep(src_ptr_arr_tmp, (lc.Constant.int(int32_type, i),))) builder.branch(looptest_block) # The end block just returns builder.position_at_end(end_block) builder.ret_void() #print("Function before optimization passes:") #print(ck_func) #module.verify() return module, ck_func
def add_prelude(self): for name, function in prelude.iteritems(): self.intrinsics[name] = Function.new(self.module, function, name)
def op_DEF_FOREIGN(self, name, retty, argtys): largtys = map(arg_typemap, argtys) lretty = arg_typemap(retty) func_type = Type.function(lretty, largtys, False) self.globals[name] = Function.new(self.module, func_type, name)