def visit_For(self, node): """restricted, for now, to range as iterator with long-type args""" if isinstance(node, ast.For) and \ isinstance(node.iter, ast.Call) and \ isinstance(node.iter.func, ast.Name) and \ node.iter.func.id == 'range': Range = node.iter nArgs = len(Range.args) if nArgs == 1: stop = self.visit(Range.args[0]) start, step = Constant(0), Constant(1) elif nArgs == 2: start, stop = map(self.visit, Range.args) step = Constant(1) elif nArgs == 3: start, stop, step = map(self.visit, Range.args) else: raise Exception("Cannot convert a for...range with %d args." % nArgs) # TODO allow any expressions castable to Long type assert isinstance(stop.get_type(), c_long), "Can only convert range's with stop values of Long type." assert isinstance(start.get_type(), c_long), "Can only convert range's with start values of Long type." assert isinstance(step.get_type(), c_long), "Can only convert range's with step values of Long type." target = SymbolRef(node.target.id, c_long()) for_loop = For( Assign(target, start), Lt(target.copy(), stop), AddAssign(target.copy(), step), [self.visit(stmt) for stmt in node.body], ) return for_loop node.body = list(map(self.visit, node.body)) return node
def clSetKernelArg(kernel, arg_index, arg_size, arg_value): if isinstance(kernel, str): kernel = SymbolRef(kernel) if isinstance(arg_index, int): arg_index = Constant(arg_index) if isinstance(arg_size, int): arg_size = Constant(arg_size) if isinstance(arg_value, str): arg_value = Ref(SymbolRef(arg_value)) return FunctionCall(SymbolRef("clSetKernelArg"), [kernel, arg_index, arg_size, arg_value])
def test_array_ref(self): tree = MultiNode([ SymbolRef("foo", ctypes.POINTER(ctypes.c_double)()), Assign(SymbolRef("____temp__x"), ArrayRef(SymbolRef("foo"), Constant(0))) ]) DeclarationFiller().visit(tree) self._check_code(tree, "\ndouble* foo;\n" "double ____temp__x = foo[0];\n")
def test_complex(self): node = ArrayDef( SymbolRef('myArray', ct.c_int()), Constant(2), Array(body=[ Add(SymbolRef('b'), SymbolRef('c')), Mul(Sub(Constant(99), SymbolRef('d')), Constant(200)) ])) self._check_code(node, "int myArray[2] = {b + c, (99 - d) * 200}")
def test_dot(self): op = SymbolRef("op") setattr(op, "get_type", lambda: ctypes.c_char()) foo = SymbolRef("foo") setattr(foo, "get_type", lambda: ctypes.c_double()) tree = Assign(SymbolRef("x"), Dot(foo, op)) DeclarationFiller().visit(tree) self._check_code(tree, "char x = foo . op")
def clEnqueueReadBuffer(queue, buf, blocking, offset, cb, ptr, num_events=0, evt_list_ptr=None, evt=None): if isinstance(buf, str): buf = SymbolRef(buf) if isinstance(blocking, bool): blocking = Constant(int(blocking)) if isinstance(ptr, str): ptr = SymbolRef(ptr) if not isinstance(offset, ast.AST): offset = Constant(offset) if not isinstance(cb, ast.AST): cb = Constant(cb) if not isinstance(num_events, ast.AST): num_events = Constant(num_events) if not isinstance(evt_list_ptr, ast.AST): event_list_ptr = NULL() if not isinstance(evt, ast.AST): evt = NULL() return FunctionCall(SymbolRef('clEnqueueReadBuffer'), [ queue, buf, blocking, offset, cb, ptr, num_events, event_list_ptr, evt])
def clEnqueueCopyBuffer(queue, src_buf, dst_buf, src_offset=0, dst_offset=0, cb=0): if isinstance(src_buf, str): src_buf = SymbolRef(src_buf) if isinstance(dst_buf, str): dst_buf = SymbolRef(dst_buf) if isinstance(src_offset, int): src_offset = Constant(src_offset) if isinstance(dst_offset, int): dst_offset = Constant(dst_offset) if isinstance(cb, int): cb = Constant(cb) num_events = Constant(0) event_list_ptr = NULL() evt = NULL() return FunctionCall(SymbolRef('clEnqueueCopyBuffer'), [ queue, src_buf, dst_buf, src_offset, dst_offset, cb, num_events, event_list_ptr, evt])
def printf(fmt, *args): """ Makes a printf call. Args must be CtreeNodes. """ for arg in args: assert isinstance(arg, CtreeNode) return FunctionCall(SymbolRef("printf"), [String(fmt)] + list(args))
def test_long(self): tree = SymbolRef("i", ctypes.c_long()) if sys.maxsize > 2 ** 32: self._check_code(tree, "long i") else: # int == long self._check_code(tree, "int i")
def test_simple_array_def(self): self._check_code( ArrayDef( SymbolRef('hi', ct.c_int()), Constant(2), Array(body=[Constant(0), Constant(1)]), ), "int hi[2] = {0, 1}")
def gen_local_macro(self): dim = len(self.output_grid.shape) index = SymbolRef("d%d" % (dim - 1)) for d in reversed(range(dim - 1)): base = Add(get_local_size(dim - 1), Constant(2 * self.ghost_depth[dim - 1])) for s in range(d + 1, dim - 1): base = Mul( base, Add(get_local_size(s), Constant(2 * self.ghost_depth[s])) ) index = Add( index, Mul(base, SymbolRef("d%d" % d)) ) index._force_parentheses = True index.right.right._force_parentheses = True return index
def test_template_parent_pointers(self): from ctree.c.nodes import SymbolRef symbol = SymbolRef("hello") template = "char *str = $val" template_args = { 'val': symbol, } node = StringTemplate(template, template_args) self.assertIs(symbol.parent, node)
def test_template_with_transformer(self): from ctree.visitors import NodeTransformer from ctree.c.nodes import String, SymbolRef template = "char *str = $val" template_args = { 'val': SymbolRef("hello"), } tree = StringTemplate(template, template_args) self._check(tree, 'char *str = hello') class SymbolsToStrings(NodeTransformer): def visit_SymbolRef(self, node): return String(node.name) tree = SymbolsToStrings().visit(tree) self._check(tree, 'char *str = "hello"')
def test_template_parent_pointers_with_transformer(self): from ctree.visitors import NodeTransformer from ctree.c.nodes import String, SymbolRef template = "char *str = $val" template_args = { 'val': SymbolRef("hello"), } class SymbolsToStrings(NodeTransformer): def visit_SymbolRef(self, node): return String(node.name) tree = StringTemplate(template, template_args) tree = SymbolsToStrings().visit(tree) template_node, string = tree, tree.val self.assertIs(string.parent, template_node)
def clEnqueueNDRangeKernel(queue, kernel, work_dim=1, work_offset=0, global_size=0, local_size=0): assert isinstance(queue, SymbolRef) assert isinstance(kernel, SymbolRef) global_size_sym = SymbolRef('global_size', c_size_t()) local_size_sym = SymbolRef('local_size', c_size_t()) call = FunctionCall(SymbolRef("clEnqueueNDRangeKernel"), [ queue, kernel, work_dim, work_offset, Ref(global_size_sym.copy()), Ref(local_size_sym.copy()), 0, NULL(), NULL() ]) return Block([ Assign(global_size_sym, Constant(global_size)), Assign(local_size_sym, Constant(local_size)), call ])
def visit_FunctionDef(self, node): if node.name == 'kernel': node.args.args = node.args.args[1:] if self.arg_names is not None: # pragma no cover for index, arg in enumerate(node.args.args): new_name = self.arg_names[index] if sys.version_info >= (3, 0): self.arg_name_map[arg.arg] = new_name else: self.arg_name_map[arg.id] = new_name arg.id = new_name else: for index, arg in enumerate(node.args.args): # pragma no cover name = SymbolRef.unique().name if sys.version_info >= (3, 0): self.arg_name_map[arg.arg] = name arg.arg = name else: self.arg_name_map[arg.id] = name arg.id = name return super(PythonToStencilModel, self).visit_FunctionDef(node)
def CL_DEVICE_TYPE_DEFAULT(): return SymbolRef("CL_DEVICE_TYPE_DEFAULT")
def CL_DEVICE_TYPE_ACCELERATOR(): return SymbolRef("CL_DEVICE_TYPE_ACCELERATOR")
def CL_DEVICE_TYPE_CPU(): return SymbolRef("CL_DEVICE_TYPE_CPU")
def visit_For(self, node): """restricted, for now, to range as iterator with long-type args""" if ( isinstance(node, ast.For) and isinstance(node.iter, ast.Call) and isinstance(node.iter.func, ast.Name) and node.iter.func.id in ("range", "xrange") ): Range = node.iter nArgs = len(Range.args) if nArgs == 1: stop = self.visit(Range.args[0]) start, step = Constant(0), Constant(1) elif nArgs == 2: start, stop = map(self.visit, Range.args) step = Constant(1) elif nArgs == 3: start, stop, step = map(self.visit, Range.args) else: raise Exception("Cannot convert a for...range with %d args." % nArgs) # check no-op conditions. if all(isinstance(item, Constant) for item in (start, stop, step)): if step.value == 0: raise ValueError("range() step argument must not be zero") elif ( start.value == stop.value or (start.value < stop.value and step.value < 0) or (start.value > stop.value and step.value > 0) ): return None if not all(isinstance(item, CtreeNode) for item in (start, stop, step)): node.body = list(map(self.visit, node.body)) return node # TODO allow any expressions castable to Long type target_types = [c_long] for el in (stop, start, step): # typed item to try and guess type off of. Imperfect right now. if hasattr(el, "get_type"): # TODO take the proper class instead of the last; if start, # end are doubles, but step is long, target is double t = el.get_type() assert any( isinstance(t, klass) for klass in [c_byte, c_int, c_long, c_short] ), "Can only convert ranges with integer/long \ start/stop/step values" target_types.append(type(t)) target_type = get_common_ctype(target_types)() target = SymbolRef(node.target.id, target_type) op = Lt if hasattr(start, "value") and hasattr(stop, "value") and start.value > stop.value: op = Gt for_loop = For( Assign(target, start), op(target.copy(), stop), AddAssign(target.copy(), step), [self.visit(stmt) for stmt in node.body], ) return for_loop node.body = list(map(self.visit, node.body)) return node
def test_bad_type(self): class Bad(object): pass with self.assertRaises(ValueError): SymbolRef("i", Bad()).codegen()
def get_global_id(id): return FunctionCall(SymbolRef('get_global_id'), [Constant(id)])
def CLK_LOCAL_MEM_FENCE(): return SymbolRef("CLK_LOCAL_MEM_FENCE")
def CL_DEVICE_TYPE_ALL(): return SymbolRef("CL_DEVICE_TYPE_ALL")
def get_num_groups(id): return FunctionCall(SymbolRef('get_num_groups'), [Constant(id)])
def visit_FunctionDecl(self, node): # This function grabs the input and output grid names which are used to self.local_block = SymbolRef.unique() # generate the proper array macros. arg_cfg = self.arg_cfg global_size = arg_cfg[0].shape if self.testing: local_size = (1, 1, 1) else: desired_device_number = -1 device = cl.clGetDeviceIDs()[desired_device_number] lcs = LocalSizeComputer(global_size, device) local_size = lcs.compute_local_size_bulky() virtual_global_size = lcs.compute_virtual_global_size(local_size) self.global_size = global_size self.local_size = local_size self.virtual_global_size = virtual_global_size super(StencilOclTransformer, self).visit_FunctionDecl(node) for index, param in enumerate(node.params[:-1]): # TODO: Transform numpy type to ctype param.type = ct.POINTER(ct.c_float)() param.set_global() param.set_const() node.set_kernel() node.params[-1].set_global() node.params[-1].type = ct.POINTER(ct.c_float)() node.params.append(SymbolRef(self.local_block.name, ct.POINTER(ct.c_float)())) node.params[-1].set_local() node.defn = node.defn[0] # if boundary handling is copy we have to generate a collection of # boundary kernels to handle the on-gpu boundary copy if self.is_copied: device = cl.clGetDeviceIDs()[-1] self.boundary_handlers = boundary_kernel_factory( self.ghost_depth, self.output_grid, node.params[0].name, node.params[-2].name, # second last parameter is output device ) boundary_kernels = [ FunctionDecl( name=boundary_handler.kernel_name, params=node.params, defn=boundary_handler.generate_ocl_kernel_body(), ) for boundary_handler in self.boundary_handlers ] self.project.files.append(OclFile('kernel', [node])) for dim, boundary_kernel in enumerate(boundary_kernels): boundary_kernel.set_kernel() self.project.files.append(OclFile(kernel_dim_name(dim), [boundary_kernel])) self.boundary_kernels = boundary_kernels # ctree.browser_show_ast(node) # import ctree # ctree.browser_show_ast(boundary_kernels[0]) else: self.project.files.append(OclFile('kernel', [node])) # print(self.project.files[0]) # print(self.project.files[-1]) defn = [ ArrayDef( SymbolRef('global', ct.c_ulong()), arg_cfg[0].ndim, [Constant(d) for d in self.virtual_global_size] ), ArrayDef( SymbolRef('local', ct.c_ulong()), arg_cfg[0].ndim, [Constant(s) for s in local_size] # [Constant(s) for s in [512, 512]] # use this line to force a # opencl local size error ), Assign(SymbolRef("error_code", ct.c_int()), Constant(0)), ] setargs = [clSetKernelArg( SymbolRef('kernel'), Constant(d), FunctionCall(SymbolRef('sizeof'), [SymbolRef('cl_mem')]), Ref(SymbolRef('buf%d' % d)) ) for d in range(len(arg_cfg) + 1)] from functools import reduce import operator local_mem_size = reduce( operator.mul, (size + 2 * self.kernel.ghost_depth[index] for index, size in enumerate(local_size)), ct.sizeof(cl.cl_float()) ) setargs.append( clSetKernelArg( 'kernel', len(arg_cfg) + 1, local_mem_size, NULL() ) ) defn.extend(setargs) enqueue_call = FunctionCall(SymbolRef('clEnqueueNDRangeKernel'), [ SymbolRef('queue'), SymbolRef('kernel'), Constant(self.kernel.dim), NULL(), SymbolRef('global'), SymbolRef('local'), Constant(0), NULL(), NULL() ]) defn.extend(check_ocl_error(enqueue_call, "clEnqueueNDRangeKernel")) params = [ SymbolRef('queue', cl.cl_command_queue()), SymbolRef('kernel', cl.cl_kernel()) ] if self.is_copied: for dim, boundary_kernel in enumerate(self.boundary_kernels): defn.extend([ ArrayDef( SymbolRef(global_for_dim_name(dim), ct.c_ulong()), arg_cfg[0].ndim, [Constant(d) for d in self.boundary_handlers[dim].global_size] ), ArrayDef( SymbolRef(local_for_dim_name(dim), ct.c_ulong()), arg_cfg[0].ndim, [Constant(s) for s in self.boundary_handlers[dim].local_size] ) ]) setargs = [clSetKernelArg( SymbolRef(kernel_dim_name(dim)), Constant(d), FunctionCall(SymbolRef('sizeof'), [SymbolRef('cl_mem')]), Ref(SymbolRef('buf%d' % d)) ) for d in range(len(arg_cfg) + 1)] setargs.append( clSetKernelArg( SymbolRef(kernel_dim_name(dim)), len(arg_cfg) + 1, local_mem_size, NULL() ) ) defn.extend(setargs) enqueue_call = FunctionCall( SymbolRef('clEnqueueNDRangeKernel'), [ SymbolRef('queue'), SymbolRef(kernel_dim_name(dim)), Constant(self.kernel.dim), NULL(), SymbolRef(global_for_dim_name(dim)), SymbolRef(local_for_dim_name(dim)), Constant(0), NULL(), NULL() ] ) defn.append(enqueue_call) params.extend([ SymbolRef(kernel_dim_name(dim), cl.cl_kernel()) ]) # finish_call = FunctionCall(SymbolRef('clFinish'), # [SymbolRef('queue')]) # defn.append(finish_call) # finish_call = [ # Assign( # SymbolRef("error_code", ct.c_int()), # FunctionCall(SymbolRef('clFinish'), [SymbolRef('queue')]) # ), # If( # NotEq(SymbolRef("error_code"), Constant(0)), # FunctionCall( # SymbolRef("printf"), # [ # String("OPENCL KERNEL RETURNED ERROR CODE %d"), # SymbolRef("error_code") # ] # ) # ) # ] finish_call = check_ocl_error( FunctionCall(SymbolRef('clFinish'), [SymbolRef('queue')]), "clFinish" ) defn.extend(finish_call) defn.append(Return(SymbolRef("error_code"))) params.extend(SymbolRef('buf%d' % d, cl.cl_mem()) for d in range(len(arg_cfg) + 1)) control = FunctionDecl(ct.c_int32(), "stencil_control", params=params, defn=defn) return control
def CL_SUCCESS(): return SymbolRef("CL_SUCCESS")
def test_bool(self): tree = SymbolRef("i", ctypes.c_bool()) self._check_code(tree, "bool i")
def barrier(arg): return FunctionCall(SymbolRef('barrier'), [arg])
def test_string(self): tree = SymbolRef("i", ctypes.c_char_p()) self._check_code(tree, "char* i")
def get_local_size(id): return FunctionCall(SymbolRef('get_local_size'), [Constant(id)])
def test_pointer(self): tree = SymbolRef("i", ctypes.POINTER(ctypes.c_double)()) self._check_code(tree, "double* i")
def clReleaseMemObject(arg): return FunctionCall(SymbolRef('clReleaseMemObject'), [arg])
def test_none(self): tree = SymbolRef("i", ctypes.c_void_p()) self._check_code(tree, "void* i")