def _global_structs(self): # CPP implementation does nothing here id_struct = (c.Statement(f'struct has_id'), c.Struct('has_id', [])) vector_struct = ( c.Statement(f'template <typename T> struct has_data_vector'), TemplateSemicolon('typename T', c.Struct('has_data_vector', []))) return (id_struct, vector_struct)
def _generate_kernel_arg_decls(self): _kernel_arg_decls = [] _kernel_lib_arg_decls = [] _kernel_structs = cgen.Module( [cgen.Comment('#### Structs generated per ParticleDat ####')]) if self._kernel.static_args is not None: for i, dat in enumerate(self._kernel.static_args.items()): _kernel_arg_decls.append( cgen.Const(cgen.Value(host.ctypes_map[dat[1]], dat[0]))) for i, dat in enumerate(self._dat_dict.items()): assert type(dat[1]) is tuple, "Access descriptors not found" kernel_lib_arg = cgen.Pointer( cgen.Value(host.ctypes_map[dat[1][0].dtype], Restrict(self._cc.restrict_keyword, dat[0]))) # print host.ctypes_map[dat[1][0].dtype], dat[1][0].dtype if issubclass(type(dat[1][0]), host._Array): kernel_arg = cgen.Pointer( cgen.Value(host.ctypes_map[dat[1][0].dtype], Restrict(self._cc.restrict_keyword, dat[0]))) if not dat[1][1].write: kernel_arg = cgen.Const(kernel_arg) _kernel_arg_decls.append(kernel_arg) elif issubclass(type(dat[1][0]), host.Matrix): # MAKE STRUCT TYPE dtype = dat[1][0].dtype ti = cgen.Pointer( cgen.Value(ctypes_map(dtype), Restrict(self._cc.restrict_keyword, 'i'))) tj = cgen.Pointer( cgen.Value(ctypes_map(dtype), Restrict(self._cc.restrict_keyword, 'j'))) if not dat[1][1].write: ti = cgen.Const(ti) tj = cgen.Const(tj) typename = '_' + dat[0] + '_t' _kernel_structs.append( cgen.Typedef(cgen.Struct('', [ti, tj], typename))) # MAKE STRUCT ARG _kernel_arg_decls.append(cgen.Value(typename, dat[0])) if not dat[1][1].write: kernel_lib_arg = cgen.Const(kernel_lib_arg) _kernel_lib_arg_decls.append(kernel_lib_arg) self._components['KERNEL_ARG_DECLS'] = _kernel_arg_decls self._components['KERNEL_LIB_ARG_DECLS'] = _kernel_lib_arg_decls self._components['KERNEL_STRUCT_TYPEDEFS'] = _kernel_structs
def eval(self, generator): if isinstance(self.base, MessageBox): inherits = '' if self.base.base.name: inherits = f' : public {self.base.base.name.eval()}' if self.base.base.template: inherits = f'{inherits}<{self.base.base.template.eval()}>' return c.Struct(f'{self.base.name.eval()}{inherits}', WrapStruct(self.base.block).eval(generator)) elif isinstance(self.base, BlockBox): return [ WrapStruct(x).eval(generator) for x in self.base.fields.eval() ] elif isinstance(self.base, DeclarationBox): dtype = self.base.dtype.eval(generator) if self.base.optional: dtype = f'std::optional<{dtype}>' return c.Value(dtype, self.base.name.eval())
def _generate_kernel_arg_decls(self): _kernel_arg_decls = [] _kernel_lib_arg_decls = [] _kernel_structs = cgen.Module([ cgen.Comment('#### Structs generated per ParticleDat ####') ]) if self._kernel.static_args is not None: for i, dat in enumerate(self._kernel.static_args.items()): arg = cgen.Const(cgen.Value(host.ctypes_map[dat[1]], dat[0])) _kernel_arg_decls.append(arg) _kernel_lib_arg_decls.append(arg) for i, dat in enumerate(self._dat_dict.items()): assert type(dat[1]) is tuple, "Access descriptors not found" obj = dat[1][0] mode = dat[1][1] symbol = dat[0] kernel_lib_arg = cgen.Pointer(cgen.Value(host.ctypes_map[obj.dtype], Restrict(self._cc.restrict_keyword, symbol)) ) if issubclass(type(obj), data.GlobalArrayClassic): kernel_lib_arg = cgen.Pointer(kernel_lib_arg) if issubclass(type(obj), host._Array): kernel_arg = cgen.Pointer(cgen.Value(host.ctypes_map[obj.dtype], Restrict(self._cc.restrict_keyword, symbol)) ) if not mode.write: kernel_arg = cgen.Const(kernel_arg) _kernel_arg_decls.append(kernel_arg) if mode.write is True: assert issubclass(type(obj), data.GlobalArrayClassic),\ "global array must be a thread safe type for \ write access. Type is:" + str(type(obj)) elif issubclass(type(dat[1][0]), host.Matrix): # MAKE STRUCT TYPE dtype = dat[1][0].dtype ti = cgen.Pointer(cgen.Value(ctypes_map(dtype), Restrict(self._cc.restrict_keyword,'i'))) tj = cgen.Pointer(cgen.Value(ctypes_map(dtype), Restrict(self._cc.restrict_keyword,'j'))) if not dat[1][1].write: ti = cgen.Const(ti) tj = cgen.Const(tj) typename = '_'+dat[0]+'_t' _kernel_structs.append(cgen.Typedef(cgen.Struct('', [ti,tj], typename))) # MAKE STRUCT ARG _kernel_arg_decls.append(cgen.Value(typename, dat[0])) if not dat[1][1].write: kernel_lib_arg = cgen.Const(kernel_lib_arg) _kernel_lib_arg_decls.append(kernel_lib_arg) self._components['KERNEL_ARG_DECLS'] = _kernel_arg_decls self._components['KERNEL_LIB_ARG_DECLS'] = _kernel_lib_arg_decls self._components['KERNEL_STRUCT_TYPEDEFS'] = _kernel_structs
def profiling_function(self): return cgen.Extern( "C", cgen.Struct(self._profiling_structure_name, [self.grid.define_profiling]))
def convergence_structure(self): return cgen.Extern( "C", cgen.Struct(self.__convergence_structure_name, [self.grid.define_convergence]))
def grid_structure(self): return cgen.Extern( "C", cgen.Struct(self._grid_structure_name, [self.grid.define_fields]))
def setup(self, g): if g.is_parent: if len(self._path) < 1: self._path = '%s/%s.%s' % (os.getcwd(), g.name, self._lang.lower()) if len(self.includes) > 0: for incfile in self.includes: include = cg.Include('%s.h' % incfile) self._lines.append(str(include)) self._lines.append('') if len(self.defines) > 0: for tuple in self.defines: (lhs, rhs) = tuple define = cg.Define(lhs, rhs) self._lines.append(str(define)) self._lines.append('') if len(self._typedefs) > 0: for tuple in self._typedefs: (lhs, rhs) = tuple typedef = cg.Typedef(cg.Value(lhs, rhs)) self._lines.append(str(typedef)) self._lines.append('') if len(self._functions) > 0: for tuple in self._functions: (lhs, rhs) = tuple fxndef = 'inline %s { %s }' % (lhs, rhs) self._lines.append(fxndef) self._lines.append('') if len(self._structs) > 0: for structname in self._structs: struct = self._structs[structname] fields = [cg.Value(struct[name], name) for name in struct] struct = cg.Struct('', fields) typedef = cg.Typedef(cg.Value(struct, '%s%s' % (structname, StructNode.suffix()))) self._lines.append(str(typedef).replace('} ;', '}')) self._lines.append('') if len(g.constants) > 0: for const in g.constants: define = cg.Define(const.name, const.value) self._lines.append(str(define)) self._lines.append('') if len(g.includes) > 0: for incfile in g.includes: include = cg.Include('%s.h' % incfile) self._lines.append(str(include)) self._lines.append('') if len(g.subgraphs) < 1: func = CGenHelper.functionDecl(g.returntype, g.name, g.params) decl = str(func) self._lines.append(decl) self._lines.append('inline %s' % decl.replace(';', ' {')) if len(g.constants) > 0: structname = self._structname if len(structname) < 1: structname = g.name.split('_')[0] if structname not in self._structs: self._structs[structname] = OrderedDict() for const in g.constants: self._structs[structname][const.name] = const.type # Add const to struct for later use...
def _generate_per_dat(self): # =================== DICT INIT =============================== self._components['KERNEL_ARG_DECLS'] = [ cgen.Const(cgen.Value(host.int32_str, '_D_N_LOCAL')) ] self._components['KERNEL_LIB_ARG_DECLS'] = [] self._components['KERNEL_STRUCT_TYPEDEFS'] = cgen.Module( [cgen.Comment('#### Structs generated per ParticleDat ####')]) self._components['LIB_KERNEL_CALL'] = cgen.Module( [cgen.Comment('#### Kernel call ####')]) kernel_call_symbols = ['_H_N_LOCAL'] self._components['KERNEL_SCATTER'] = cgen.Module( [cgen.Comment('#### kernel scatter ####')]) self._components['KERNEL_GATHER'] = cgen.Module( [cgen.Comment('#### kernel gather ####')]) self._components['IF_SCATTER'] = cgen.Module( [cgen.Comment('#### if scatter ####')]) self._components['IF_GATHER'] = cgen.Module( [cgen.Comment('#### if gather ####')]) self._components['KERNEL_MAPPING'] = cgen.Module( [cgen.Comment('#### kernel symbol mapping ####')]) # =================== Static Args =============================== if self._kernel.static_args is not None: for i, datt in enumerate(self._kernel.static_args.items()): ksym = datt[0] ktype = datt[1] # Add to kernel args g = cgen.Const(cgen.Value(host.ctypes_map[ktype], ksym)) self._components['KERNEL_ARG_DECLS'].append(g) self._components['KERNEL_LIB_ARG_DECLS'].append(g) kernel_call_symbols.append(ksym) # =================== Dynamic Args =============================== for i, datt in enumerate(self._dat_dict.items()): assert type(datt[1]) is tuple, "Access descriptors not found" dati = datt[1][0] ksym = datt[0] dsym = 'd_' + ksym kacc = datt[1][1] # add to lib args kernel_lib_arg = cgen.Pointer( cgen.Value(host.ctypes_map[dati.dtype], Restrict(self._cc.restrict_keyword, ksym))) if type(dati) is cuda_data.GlobalArray or \ issubclass(type(dati), cuda_base.Array): # KERNEL ARGS DECLS ----------------------------- kernel_arg = cgen.Pointer( cgen.Value(host.ctypes_map[dati.dtype], Restrict(self._cc.restrict_keyword, dsym))) if not kacc.write: kernel_arg = cgen.Const(kernel_arg) self._components['KERNEL_ARG_DECLS'].append(kernel_arg) # KERNEL CALL SYMS ----------------------------- kernel_call_symbols.append(ksym) # KERNEL GATHER/SCATTER START ------------------ if not kacc.incremented: a = cgen.Pointer( cgen.Value(host.ctypes_map[dati.dtype], ksym)) a = cgen.Const(a) a = cgen.Initializer(a, dsym) self._components['IF_GATHER'].append(a) else: a = cgen.Initializer( cgen.Value(host.ctypes_map[dati.dtype], ksym + '[' + str(dati.ncomp) + ']'), '{0}') self._components['IF_GATHER'].append(a) # add the scatter code self._components['IF_SCATTER'].append( cgen.Line( generate_reduction_final_stage(dsym, ksym, dati))) # KERNEL GATHER/SCATTER END ------------------ elif issubclass(type(dati), cuda_base.Matrix): # KERNEL ARGS DECLS, STRUCT DECLS ---------------- dtype = dati.dtype ti = cgen.Pointer( cgen.Value(ctypes_map(dtype), Restrict(self._cc.restrict_keyword, 'i'))) if not kacc.write: ti = cgen.Const(ti) typename = '_' + ksym + '_t' self._components['KERNEL_STRUCT_TYPEDEFS'].append( cgen.Typedef(cgen.Struct('', [ti], typename))) # add to kernel args kernel_arg = cgen.Pointer( cgen.Value(host.ctypes_map[dati.dtype], Restrict(self._cc.restrict_keyword, dsym))) if not kacc.write: kernel_arg = cgen.Const(kernel_arg) self._components['KERNEL_ARG_DECLS'].append(kernel_arg) # KERNEL CALL SYMS ----------------------------- kernel_call_symbols.append(ksym) # KERNEL GATHER/SCATTER START ------------------ nc = str(dati.ncomp) _ishift = '+' + self._components['LIB_PAIR_INDEX_0'] + '*' + nc isym = dsym + _ishift g = cgen.Value(typename, ksym) g = cgen.Initializer(g, '{ ' + isym + '}') self._components['KERNEL_MAPPING'].append(g) # KERNEL GATHER/SCATTER END ------------------ # END OF IF ------------------------ # add to lib args if not kacc.write: kernel_lib_arg = cgen.Const(kernel_lib_arg) self._components['KERNEL_LIB_ARG_DECLS'].append(kernel_lib_arg) # KERNEL CALL SYMS ----------------------------- kernel_call_symbols_s = '' for sx in kernel_call_symbols: kernel_call_symbols_s += sx + ',' kernel_call_symbols_s = kernel_call_symbols_s[:-1] self._components['LIB_KERNEL_CALL'].append( cgen.Module([ cgen.Value('dim3', '_B'), cgen.Value('dim3', '_T'), cgen.Assign('_B.x', '_H_BLOCKSIZE[0]'), cgen.Assign('_B.y', '_H_BLOCKSIZE[1]'), cgen.Assign('_B.z', '_H_BLOCKSIZE[2]'), cgen.Assign('_T.x', '_H_THREADSIZE[0]'), cgen.Assign('_T.y', '_H_THREADSIZE[1]'), cgen.Assign('_T.z', '_H_THREADSIZE[2]') ])) self._components['LIB_KERNEL_CALL'].append( cgen.Line('k_' + self._kernel.name + '<<<_B,_T>>>(' + kernel_call_symbols_s + ');')) self._components['LIB_KERNEL_CALL'].append( cgen.Line('checkCudaErrors(cudaDeviceSynchronize());'))