def __init__(self, dtype, name, args, body, template: tuple = None, modifiers=''): self.dtype = dtype self.name = name self.value = c.Value(dtype, name) self.modifiers = modifiers self.is_template = template is not None decl = c.FunctionDeclaration(self.value, args) if self.is_template: decl = c.Template(template[0], decl) self.fnc = c.FunctionBody(decl, body)
def _template_function_declaration(self, template, body): return c.Template(template, body)
cuda_mod.add_to_preamble([c.Include('cuda.h')]) global_index = 'int index = blockIdx.x * blockDim.x + threadIdx.x' compute_diff = 'outputPtr[index] = inputPtr[index] - inputPtr[index-1]' launch = [ 'CUdeviceptr output', 'cuMemAlloc(&output, sizeof(T) * length)', 'int bSize = 256', 'int gSize = (length-1)/bSize + 1', 'diffKernel<<<gSize, bSize>>>((T*)inputPtr, length, (T*)output)', 'return output' ] diff = [ c.Template( 'typename T', CudaGlobal( c.FunctionDeclaration(c.Value('void', 'diffKernel'), [ c.Value('T*', 'inputPtr'), c.Value('int', 'length'), c.Value('T*', 'outputPtr') ]))), c.Block([ c.Statement(global_index), c.If( 'index == 0', c.Statement('outputPtr[0] = inputPtr[0]'), c.If('index < length', c.Statement(compute_diff), c.Statement(''))) ]), c.Template( 'typename T', c.FunctionDeclaration( c.Value('CUdeviceptr', 'difference'), [c.Value('CUdeviceptr', 'inputPtr'), c.Value('int', 'length')])),
def generate_common(self): # Common structs (id_forward, id_struct), (data_forward, data_struct) = self._global_structs() self.structs.forward.insert(0, id_forward) self.structs.forward.insert(0, data_forward) self.structs.header.insert(0, id_struct) self.structs.header.insert(0, data_struct) # Switch jump table decl, body, _ = self._jump_table(CppMethod) self.marshal.header.append(decl) self.marshal.source.append(body) # Opcodes enum self.opcodes.header.append(self._opcode_enum()) # Language specific string methods self.__generate_string_methods() # Generate individual sizes for message_name, message in self.user_defined_messages.items(): header, body, _, all_trivial = self._sizeof_message( CppMethod, message_name, message, fname=message_name) self.marshal.forward.append(header) if all_trivial: self.marshal.header.append(body) else: self.marshal.source.append(body) # Big size switch body = c.FunctionBody( c.Template( 'typename T', c.FunctionDeclaration( c.Value('inline uint8_t', 'message_size'), [self._data_cref('T', 'message')])), c.Block([ c.Collection([ c.Line( ('' if i == 0 else 'else ') + f'if constexpr (std::is_same_v<std::decay_t<T>, {message_name}>)' ), c.Block( [c.Statement(f'return {message_name}_size(message)')]) ]) for i, message_name in enumerate(self.user_defined_messages) ])) self.marshal.header.append(body) # Big pack switch body = c.FunctionBody( c.Template( 'typename T', c.FunctionDeclaration(c.Value('inline void', 'pack_message'), [ self._data_object_ref(self._packet_type(), 'packet'), c.Value('T&&', 'message') ])), c.Block([ c.Collection([ c.Line( ('' if i == 0 else 'else ') + f'if constexpr (std::is_same_v<std::decay_t<T>, {message_name}>)' ), c.Block([ c.Statement( f'pack_{message_name}(packet, std::forward<T>(message))' ) ]) ]) for i, message_name in enumerate(self.user_defined_messages) ])) self.marshal.header.append(body)
cuda_mod.add_to_preamble([c.Include("cuda.h")]) global_index = "int index = blockIdx.x * blockDim.x + threadIdx.x" compute_diff = "outputPtr[index] = inputPtr[index] - inputPtr[index-1]" launch = [ "CUdeviceptr output", "cuMemAlloc(&output, sizeof(T) * length)", "int bSize = 256", "int gSize = (length-1)/bSize + 1", "diffKernel<<<gSize, bSize>>>((T*)inputPtr, length, (T*)output)", "return output" ] diff = [ c.Template( "typename T", CudaGlobal( c.FunctionDeclaration(c.Value("void", "diffKernel"), [ c.Value("T*", "inputPtr"), c.Value("int", "length"), c.Value("T*", "outputPtr") ]))), c.Block([ c.Statement(global_index), c.If( "index == 0", c.Statement("outputPtr[0] = inputPtr[0]"), c.If("index < length", c.Statement(compute_diff), c.Statement(""))) ]), c.Template( "typename T", c.FunctionDeclaration( c.Value("CUdeviceptr", "difference"), [c.Value("CUdeviceptr", "inputPtr"), c.Value("int", "length")])),