def _generate_lib_inner_loop(self): i = self._components['LIB_PAIR_INDEX_0'] j = self._components['LIB_PAIR_INDEX_1'] b = self._components['LIB_INNER_LOOP_BLOCK'] self._components['LIB_INNER_LOOP'] = cgen.Module([ cgen.For('int ' + j + '=0', j + '<' + i, j + '++', b), cgen.For('int ' + j + '=1+' + i, j + '< _N_LOCAL', j + '++', b), ])
def _generate_lib_outer_loop(self): block = cgen.Block([ self._components['LIB_KERNEL_GATHER'], self._components['LIB_INNER_LOOP'], self._components['LIB_KERNEL_SCATTER'] ]) cx = self._components['LIB_CELL_CX'] cy = self._components['LIB_CELL_CY'] cz = self._components['LIB_CELL_CZ'] ncx = self._components['N_CELL_X'] ncy = self._components['N_CELL_Y'] ncz = self._components['N_CELL_Z'] exec_count = self._components['EXEC_COUNT'] red_exec_count = '_' + exec_count npad = self._components['N_CELL_PAD'] shared = '' for sx in self._components['OMP_SHARED_SYMS']: shared += sx + ',' shared = shared[:-1] pragma = cgen.Pragma('omp parallel for default(none) reduction(+:' + \ red_exec_count +') schedule(dynamic) collapse(3) ' + \ 'shared(' + shared + ')') if runtime.OMP_NUM_THREADS is None: pragma = cgen.Comment(pragma) loop = cgen.Module([ cgen.Line('omp_set_num_threads(_NUM_THREADS);'), cgen.Line('INT64 ' + red_exec_count + ' = 0;'), pragma, # cellx loop cgen.For( 'INT64 ' + cx + '=' + npad, cx + '<' + ncx + '-' + npad, cx + '++', cgen.Block([ cgen.For( 'INT64 ' + cy + '=' + npad, cy + '<' + ncy + '-' + npad, cy + '++', cgen.Block((cgen.For('INT64 ' + cz + '=' + npad, cz + '<' + ncz + '-' + npad, cz + '++', block), ))), ])), cgen.Line('*' + exec_count + ' += ' + red_exec_count + ';') ]) self._components['LIB_OUTER_LOOP'] = loop
def __generate_string_methods(self): body = c.Block([ c.Statement('byte size = (byte)str.Length'), c.Statement(f'packet.Data.WriteInt8(size)'), c.For( 'byte i = 0', 'i < size', '++i', c.Block([c.Statement(f'packet.Data.WriteByte((byte)str[i])')])) ]) self.all_packers.append( CSharpMethod('void', 'pack_string', [ self._data_object_ref(self._packet_type(), 'packet'), self._data_cref(self.get_dtype('string'), 'str') ], body).modifier('static')) body = c.Block([ WrapUnpack._guard('sizeof(byte)'), c.Statement('byte size = packet.Data.ReadUInt8(packet.Data.Size)'), WrapUnpack._guard('size'), c.Statement(f'str = packet.Data.ReadString()'), c.Statement('return true') ]) self.all_packers.append( CSharpMethod('bool', 'unpack_string', [ self._pointer_type('PacketReader', 'packet'), self._data_value_ref(self.get_dtype('string'), 'str') ], body).modifier('static'))
def _generate_lib_outer_loop(self): block = cgen.Block([self._components['LIB_KERNEL_CALL']]) i = self._components['LIB_PAIR_INDEX_0'] shared = '' for sx in self._components['OMP_SHARED_SYMS']: shared += sx + ',' shared = shared[:-1] pragma = cgen.Pragma('omp parallel default(none) shared(' + shared + ')') parallel_region = cgen.Block(( cgen.Value('int', '_thread_start'), cgen.Value('int', '_thread_end'), cgen.Line( 'get_thread_decomp((int)_N_LOCAL, &_thread_start, &_thread_end);' ), cgen.For('int ' + i + '= _thread_start', i + '< _thread_end', i + '++', block))) loop = cgen.Module([ cgen.Line('omp_set_num_threads(_NUM_THREADS);'), pragma, parallel_region ]) self._components['LIB_OUTER_LOOP'] = loop
def eval(self, generator): variable = f'data.{self.base.name.eval()}' idl_dtype = self.base.dtype.dtype.eval() dtype = generator.get_dtype(idl_dtype) ret_block = block = c.Collection() if self.base.optional: conditional_block = c.Block() block.append(self._guard('sizeof(bool)')) block.append(c.If('packet.Data.ReadByte() == 1', conditional_block)) block = conditional_block variable = f'{variable}.Value' if idl_dtype == 'vector': spec = self.base.dtype.spec.eval() block.append(self._guard('sizeof(byte)')) block.append( c.Statement(f'var size_{spec} = packet.Data.ReadUInt8()')) block.append( c.If( f'size_{spec} > 0', c.Block([ c.Statement( f'{variable} = new List<{spec}>(size_{spec})'), c.For('byte i = 0', f'i < size_{spec}', '++i', self._unpack_vector(generator, variable)) ]))) else: block.append(self._unpack(generator, variable, idl_dtype, dtype)) return ret_block
def _generate_lib_outer_loop(self): block = cgen.Block([self._components['LIB_KERNEL_GATHER'], self._components['LIB_INNER_LOOP'], self._components['LIB_KERNEL_SCATTER']]) i = self._components['LIB_PAIR_INDEX_0'] shared = '' for sx in self._components['OMP_SHARED_SYMS']: shared+= sx+',' shared = shared[:-1] pragma = cgen.Pragma('omp parallel for schedule(static) // default(shared) shared(' + shared + ')') if runtime.OMP_NUM_THREADS is None: pragma = cgen.Comment(pragma) loop = cgen.Module([ cgen.Line('omp_set_num_threads(_NUM_THREADS);'), pragma, cgen.For('int ' + i + '=0', i + '<_N_LOCAL', i+'++', block) ]) self._components['LIB_OUTER_LOOP'] = loop
def visit_Iteration(self, o): body = flatten(self._visit(i) for i in o.children) _min = o.limits[0] _max = o.limits[1] # For backward direction flip loop bounds if o.direction == Backward: loop_init = 'int %s = %s' % (o.index, ccode(_max)) loop_cond = '%s >= %s' % (o.index, ccode(_min)) loop_inc = '%s -= %s' % (o.index, o.limits[2]) else: loop_init = 'int %s = %s' % (o.index, ccode(_min)) loop_cond = '%s <= %s' % (o.index, ccode(_max)) loop_inc = '%s += %s' % (o.index, o.limits[2]) # Append unbounded indices, if any if o.uindices: uinit = ['%s = %s' % (i.name, ccode(i.symbolic_min)) for i in o.uindices] loop_init = c.Line(', '.join([loop_init] + uinit)) ustep = [] for i in o.uindices: op = '=' if i.is_Modulo else '+=' ustep.append('%s %s %s' % (i.name, op, ccode(i.symbolic_incr))) loop_inc = c.Line(', '.join([loop_inc] + ustep)) # Create For header+body handle = c.For(loop_init, loop_cond, loop_inc, c.Block(body)) # Attach pragmas, if any if o.pragmas: handle = c.Module(o.pragmas + (handle,)) return handle
def execute_time_loop(self): statements = [] statements.append(self.grid.time_stepping) if self.pluto: statements.append( cgen.Block([ cgen.Pragma("scop"), self.grid.stress_loop, cgen.Pragma("endscop") ])) else: statements.append(self.grid.stress_loop) statements.append(self.grid.stress_bc) if self.pluto: statements.append( cgen.Block([ cgen.Pragma("scop"), self.grid.velocity_loop, cgen.Pragma("endscop") ])) else: statements.append(self.grid.velocity_loop) statements.append(self.grid.velocity_bc) output_step = self.grid.output_step if output_step: statements.append(output_step) result = cgen.For(cgen.InlineInitializer(cgen.Value("int", "_ti"), 0), "_ti < ntsteps", "_ti++", cgen.Block(statements)) return result
def eval(self, generator): variable = f'data.{self.base.name.eval()}' idl_dtype = self.base.dtype.dtype.eval() dtype = generator.get_dtype(idl_dtype) ret_block = block = c.Collection() if self.base.optional: conditional_block = c.Block() block.append(self._guard('sizeof(bool)')) block.append(c.If('packet->read<bool>()', conditional_block)) block = conditional_block variable = f'(*{variable})' if idl_dtype == 'vector': spec = self.base.dtype.spec.eval() block.append(self._guard('sizeof(uint8_t)')) block.append( c.If( f'auto size_{spec} = packet->read<uint8_t>(); size_{spec} > 0', c.Block([ c.Statement(f'{variable}.reserve(size_{spec})'), c.For('uint8_t i = 0', f'i < size_{spec}', '++i', self._unpack_vector(generator, variable)) ]))) else: block.append(self._unpack(generator, variable, idl_dtype, dtype)) return ret_block
def _generate_kernel_scatter(self): kernel_scatter = cgen.Module( [cgen.Comment('#### Post kernel scatter ####')]) if self._kernel.static_args is not None: for i, dat in enumerate(self._kernel.static_args.items()): pass for i, dat in enumerate(self._dat_dict.items()): if issubclass(type(dat[1][0]), host._Array): pass elif issubclass(type(dat[1][0]), host.Matrix)\ and dat[1][1].write\ and dat[1][0].ncomp <= self._gather_size_limit: isym = dat[0] + 'i' nc = dat[1][0].ncomp ncb = '[' + str(nc) + ']' dtype = host.ctypes_map[dat[1][0].dtype] ix = self._components['LIB_PAIR_INDEX_0'] b = cgen.Assign(dat[0] + '[' + str(nc) + '*' + ix + '+_tx]', isym + '[_tx]') g = cgen.For('int _tx=0', '_tx<' + str(nc), '_tx++', cgen.Block([b])) kernel_scatter.append(g) self._components['LIB_KERNEL_SCATTER'] = kernel_scatter
def _generate_lib_inner_loop(self): i = self._components['LIB_PAIR_INDEX_0'] b = self._components['LIB_INNER_LOOP_BLOCK'] self._components['LIB_INNER_LOOP'] = cgen.For( 'unsigned long long _k= ((unsigned long long)' + i + ') * ((unsigned long long)_STRIDE)', '_k<((unsigned long long)_NN[' + i + '])+((unsigned long long)' + i + ') * ((unsigned long long)_STRIDE)', '_k++', b)
def _generate_lib_outer_loop(self): block = cgen.Block([self._components['LIB_KERNEL_CALL']]) i = self._components['LIB_PAIR_INDEX_0'] loop = cgen.For('int ' + i + '=0', i + '<_N_LOCAL', i + '++', block) self._components['LIB_OUTER_LOOP'] = loop
def generate(self, funcname, field_args, const_args, kernel_ast, c_include): ccode = [] # Add include for Parcels and math header ccode += [str(c.Include("parcels.h", system=False))] ccode += [str(c.Include("math.h", system=False))] # Generate type definition for particle type vdecl = [] for v in self.ptype.variables: if v.dtype == np.uint64: vdecl.append(c.Pointer(c.POD(np.void, v.name))) else: vdecl.append(c.POD(v.dtype, v.name)) ccode += [str(c.Typedef(c.GenerableStruct("", vdecl, declname=self.ptype.name)))] if c_include: ccode += [c_include] # Insert kernel code ccode += [str(kernel_ast)] # Generate outer loop for repeated kernel invocation args = [c.Value("int", "num_particles"), c.Pointer(c.Value(self.ptype.name, "particles")), c.Value("double", "endtime"), c.Value("float", "dt")] for field, _ in field_args.items(): args += [c.Pointer(c.Value("CField", "%s" % field))] for const, _ in const_args.items(): args += [c.Value("float", const)] fargs_str = ", ".join(['particles[p].time', 'sign_dt * __dt'] + list(field_args.keys()) + list(const_args.keys())) # Inner loop nest for forward runs sign_dt = c.Assign("sign_dt", "dt > 0 ? 1 : -1") sign_end_part = c.Assign("sign_end_part", "endtime - particles[p].time > 0 ? 1 : -1") dt_pos = c.Assign("__dt", "fmin(fabs(particles[p].dt), fabs(endtime - particles[p].time))") dt_0_break = c.If("particles[p].dt == 0", c.Statement("break")) notstarted_continue = c.If("(sign_end_part != sign_dt) && (particles[p].dt != 0)", c.Statement("continue")) body = [c.Assign("res", "%s(&(particles[p]), %s)" % (funcname, fargs_str))] body += [c.Assign("particles[p].state", "res")] # Store return code on particle body += [c.If("res == SUCCESS", c.Block([c.Statement("particles[p].time += sign_dt * __dt"), dt_pos, dt_0_break, c.Statement("continue")]))] body += [c.If("res == REPEAT", c.Block([dt_pos, c.Statement("continue")]), c.Statement("break"))] time_loop = c.While("__dt > __tol || particles[p].dt == 0", c.Block(body)) part_loop = c.For("p = 0", "p < num_particles", "++p", c.Block([sign_end_part, notstarted_continue, dt_pos, time_loop])) fbody = c.Block([c.Value("int", "p, sign_dt, sign_end_part"), c.Value("ErrorCode", "res"), c.Value("double", "__dt, __tol"), c.Assign("__tol", "1.e-6"), sign_dt, part_loop]) fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args) ccode += [str(c.FunctionBody(fdecl, fbody))] return "\n\n".join(ccode)
def _generate_lib_inner_loop(self): i = self._components['LIB_PAIR_INDEX_0'] j = self._components['LIB_PAIR_INDEX_1'] self._components['LIB_LOOP_J_PREPARE'] = cgen.Module([ cgen.Line('const int _icell = _CRL[' + i + '];'), cgen.Line('int * _JJSTORE = _JSTORE[' + self._components['OMP_THREAD_INDEX_SYM'] + '];'), cgen.Line('int _nn = 0;'), ]) b = self._components['LIB_INNER_LOOP_BLOCK'] self._components['LIB_INNER_LOOP'] = cgen.Module([ cgen.For('int _k=0', '_k<27', '_k++', b), cgen.For( 'int _k2=0', '_k2<_nn', '_k2++', cgen.Block([ cgen.Line('const int ' + j + ' = _JJSTORE[_k2];'), self._components['LIB_KERNEL_CALL'], ])) ])
def visit_Iteration(self, o): body = flatten(self.visit(i) for i in o.children) # Start if o.offsets[0] != 0: start = str(o.limits[0] + o.offsets[0]) try: start = eval(start) except (NameError, TypeError): pass else: start = o.limits[0] # Bound if o.offsets[1] != 0: end = str(o.limits[1] + o.offsets[1]) try: end = eval(end) except (NameError, TypeError): pass else: end = o.limits[1] # For backward direction flip loop bounds if o.direction == Backward: loop_init = 'int %s = %s' % (o.index, ccode(end)) loop_cond = '%s >= %s' % (o.index, ccode(start)) loop_inc = '%s -= %s' % (o.index, o.limits[2]) else: loop_init = 'int %s = %s' % (o.index, ccode(start)) loop_cond = '%s <= %s' % (o.index, ccode(end)) loop_inc = '%s += %s' % (o.index, o.limits[2]) # Append unbounded indices, if any if o.uindices: uinit = [ '%s = %s' % (i.name, ccode(i.symbolic_start)) for i in o.uindices ] loop_init = c.Line(', '.join([loop_init] + uinit)) ustep = [ '%s = %s' % (i.name, ccode(i.symbolic_incr)) for i in o.uindices ] loop_inc = c.Line(', '.join([loop_inc] + ustep)) # Create For header+body handle = c.For(loop_init, loop_cond, loop_inc, c.Block(body)) # Attach pragmas, if any if o.pragmas: handle = c.Module(o.pragmas + (handle, )) return handle
def generate(self, funcname, field_args, kernel_ast, adaptive=False): ccode = [] # Add include for Parcels and math header ccode += [str(c.Include("parcels.h", system=False))] ccode += [str(c.Include("math.h", system=False))] # Generate type definition for particle type vdecl = [c.POD(dtype, var) for var, dtype in self.ptype.var_types.items()] ccode += [str(c.Typedef(c.GenerableStruct("", vdecl, declname=self.ptype.name)))] # Insert kernel code ccode += [str(kernel_ast)] # Generate outer loop for repeated kernel invocation args = [c.Value("int", "num_particles"), c.Pointer(c.Value(self.ptype.name, "particles")), c.Value("double", "endtime"), c.Value("float", "dt")] for field, _ in field_args.items(): args += [c.Pointer(c.Value("CField", "%s" % field))] fargs_str = ", ".join(['particles[p].time', 'particles[p].dt'] + list(field_args.keys())) # Inner loop nest for forward runs dt_fwd = c.Statement("__dt = fmin(particles[p].dt, endtime - particles[p].time)") body_fwd = [c.Statement("res = %s(&(particles[p]), %s)" % (funcname, fargs_str)), c.If("res == SUCCESS", c.Statement("particles[p].time += __dt")), dt_fwd] time_fwd = c.While("__dt > __tol", c.Block(body_fwd)) part_fwd = c.For("p = 0", "p < num_particles", "++p", c.Block([dt_fwd, time_fwd])) # Inner loop nest for backward runs dt_bwd = c.Statement("__dt = fmax(particles[p].dt, endtime - particles[p].time)") body_bwd = [c.Statement("res = %s(&(particles[p]), %s)" % (funcname, fargs_str)), c.If("res == SUCCESS", c.Statement("particles[p].time += __dt")), dt_bwd] time_bwd = c.While("__dt < -1. * __tol", c.Block(body_bwd)) part_bwd = c.For("p = 0", "p < num_particles", "++p", c.Block([dt_bwd, time_bwd])) time_if = c.If("dt > 0.0", c.Block([part_fwd]), c.Block([part_bwd])) fbody = c.Block([c.Value("int", "p"), c.Value("KernelOp", "res"), c.Value("double", "__dt, __tol"), c.Assign("__tol", "1.e-6"), time_if]) fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args) ccode += [str(c.FunctionBody(fdecl, fbody))] return "\n\n".join(ccode)
def _generate_kernel_func(self): if_block = cgen.If( self._components['LIB_PAIR_INDEX_0']+'<_D_N_LOCAL', cgen.Block([ self._components['KERNEL_GATHER'], cgen.For('int _k=1', '_k<=_D_NMATRIX['+self._components['LIB_PAIR_INDEX_0']+']', '_k++', cgen.Block([ cgen.Initializer( cgen.Const(cgen.Value( host.int32_str, self._components['LIB_PAIR_INDEX_1'])), '_D_NMATRIX['+self._components['LIB_PAIR_INDEX_0']+\ ' + _D_N_LOCAL * _k ]' ), self._components['KERNEL_MAPPING'], cgen.Line(self._kernel.code) ]) ), self._components['KERNEL_SCATTER'] ]) ) func = cgen.Block([ cgen.Initializer( cgen.Const( cgen.Value( host.int32_str, self._components['LIB_PAIR_INDEX_0'] )), 'threadIdx.x + blockIdx.x*blockDim.x' ), self._components['IF_GATHER'], if_block, self._components['IF_SCATTER'] ]) self._components['KERNEL_FUNC'] = cgen.FunctionBody( cgen.FunctionDeclaration( cgen.DeclSpecifier( cgen.Value("void", 'k_' + self._kernel.name), '__global__' ), self._components['KERNEL_ARG_DECLS'] ), func )
def _generate_lib_inner_loop_block(self): i = self._components['LIB_PAIR_INDEX_0'] j = self._components['LIB_PAIR_INDEX_1'] self._components['LIB_INNER_LOOP_BLOCK'] = \ cgen.Block([ cgen.Line('const int _jcell = _icell + _OFFSET[_k];'), cgen.Line('int '+j+' = _CELL_LIST[_jcell + _LIST_OFFSET];' ), cgen.For( 'int _k2=0','_k2<_CCC[_jcell]','_k2++', cgen.Block([ cgen.Line('if(%(I)s!=%(J)s){_JJSTORE[_nn++]=%(J)s;}'%\ {'I':i, 'J':j}), cgen.Line(j+' = _CELL_LIST['+j+'];'), ]) ), ])
def print_loop_structure(self, loop_index: str, lower_bound: int, upper_bound: int, affine: List[List[int]], fun): """ :param loop_index: a -> b -> c (int the first call it is 'a', int the second 'b', ...) :param lower_bound: unused :param upper_bound: unused :param affine: list [[5, 1], [29, 32]] || [[1, 5, 1], [16, 29, 32]] :param fun: <class 'cgen.For'> almost whole structure {code} and for loop is added at the beginning :return: <class 'cgen.For'> """ """ Print loop structure: for(int {} = {}; {} < {}; {}++){ <fun> } """ return c.For('int {} = {}'.format(loop_index, affine[0][0]), '{} < '.format(loop_index) + str(affine[1][0]), '{}++'.format(loop_index), fun)
def ccode(self): """Generate C code for the represented stencil loop :returns: :class:`cgen.For` object representing the loop """ forward = self.limits[1] >= self.limits[0] loop_body = cgen.Block([s.ccode for s in self.expressions]) loop_init = cgen.InlineInitializer(cgen.Value("int", self.index), self.limits[0]) loop_cond = '%s %s %s' % (self.index, '<' if forward else '>', self.limits[1]) if self.limits[2] == 1: loop_inc = '%s%s' % (self.index, '++' if forward else '--') else: loop_inc = '%s %s %s' % (self.index, '+=' if forward else '-=', self.limits[2]) return cgen.For(loop_init, loop_cond, loop_inc, loop_body)
def _generate_lib_inner_loop(self): c = self._components i = c['LIB_PAIR_INDEX_0'] j = c['LIB_PAIR_INDEX_1'] ccc_i = c['CCC_0'] ccc_j = c['CCC_1'] ci = c['LIB_CELL_INDEX_0'] cj = c['LIB_CELL_INDEX_1'] nloc = c['N_LOCAL'] ec = '_' + c['EXEC_COUNT'] iif = c['PARTICLE_DAT_PARTITION'].idict[c['TMP_INDEX']] def ifnothalo(b): return cgen.Block((cgen.If(iif + '[' + i + ']<' + nloc, b), )) kg = self._components['KERNEL_GATHER'] ks = self._components['KERNEL_SCATTER'] loop_other = cgen.Block((cgen.For( 'INT64 ' + i + '=0', i + '<' + ccc_i, i + '++', ifnothalo( cgen.Block( (cgen.Line(kg), cgen.For('INT64 ' + j + '=0', j + '<' + ccc_j, j + '++', cgen.Block(self._components['LIB_KERNEL_CALL'])), cgen.Line(ks), cgen.Line(ec + '+=' + ccc_j + ';'))))), )) loop_same = cgen.Block((cgen.For( 'INT64 ' + i + '=0', i + '<' + ccc_i, i + '++', ifnothalo( cgen.Block( (cgen.Line(kg), cgen.For('INT64 ' + j + '=0', j + '<' + i, j + '++', cgen.Block(self._components['LIB_KERNEL_CALL'])), cgen.For('INT64 ' + j + '=1+' + i, j + '<' + ccc_j, j + '++', cgen.Block(self._components['LIB_KERNEL_CALL'])), cgen.Line(ks), cgen.Line(ec + '+=' + ccc_j + '-1;'))))), )) cell_cond = cgen.If(ci + '==' + cj, loop_same, loop_other) b = cgen.Block( (cgen.Line('const INT64 {jcell} = {icell} + _OFFSET[_k];'.format( jcell=self._components['LIB_CELL_INDEX_1'], icell=self._components['LIB_CELL_INDEX_0'])), self._components['J_GATHER'], cell_cond)) self._components['LIB_INNER_LOOP'] = cgen.Module([ cgen.For('int _k=0', '_k<27', '_k++', b), ])
def print_loop_structure(loop_index, lower_bound, upper_bound, affine, fun): """ Print loop structure: for(int {} = {}; {} < {}; {}++){ ... } """ gen_scalar_part = '' curr_val = affine[1][0] if curr_val > 0: gen_scalar_part = ' - ' elif curr_val < 0: gen_scalar_part = ' + ' gen_scalar_part += str(abs(curr_val)) return c.For( 'int {} = {}'.format(loop_index, affine[0][0]), '{} < '.format(loop_index) # + str(upper_bound) + str(affine[1][0]), '{}++'.format(loop_index), fun)
def ccode(self): """Generate C code for the represented stencil loop :returns: :class:`cgen.For` object representing the loop """ loop_body = [s.ccode for s in self.nodes] # Start if self.offsets[0] != 0: start = "%s + %s" % (self.limits[0], -self.offsets[0]) try: start = eval(start) except (NameError, TypeError): pass else: start = self.limits[0] # Bound if self.offsets[1] != 0: end = "%s - %s" % (self.limits[1], self.offsets[1]) try: end = eval(end) except (NameError, TypeError): pass else: end = self.limits[1] # For reverse dimensions flip loop bounds if self.dim.reverse: loop_init = c.InlineInitializer(c.Value("int", self.index), ccode('%s - 1' % end)) loop_cond = '%s >= %s' % (self.index, ccode(start)) loop_inc = '%s -= %s' % (self.index, self.limits[2]) else: loop_init = c.InlineInitializer(c.Value("int", self.index), ccode(start)) loop_cond = '%s < %s' % (self.index, ccode(end)) loop_inc = '%s += %s' % (self.index, self.limits[2]) return c.For(loop_init, loop_cond, loop_inc, c.Block(loop_body))
def map_Do(self, node): scope = self.scope_stack[-1] body = self.map_statement_list(node.content) if node.loopcontrol: loop_var, loop_bounds = node.loopcontrol.split("=") loop_var = loop_var.strip() scope.use_name(loop_var) loop_bounds = [self.parse_expr(s) for s in loop_bounds.split(",")] if len(loop_bounds) == 2: start, stop = loop_bounds step = 1 elif len(loop_bounds) == 3: start, stop, step = loop_bounds else: raise RuntimeError("loop bounds not understood: %s" % node.loopcontrol) if not isinstance(step, int): print type(step) raise TranslationError( "non-constant steps not yet supported: %s" % step) if step < 0: comp_op = ">=" else: comp_op = "<=" return cgen.For( "%s = %s" % (loop_var, self.gen_expr(start)), "%s %s %s" % (loop_var, comp_op, self.gen_expr(stop)), "%s += %s" % (loop_var, self.gen_expr(step)), cgen.block_if_necessary(body)) else: raise NotImplementedError("unbounded do loop")
def __generate_string_methods(self): declaration = c.FunctionDeclaration(c.Value('void', 'pack_string'), [ self._data_object_ref(self._packet_type(), 'packet'), self._data_cref(self.get_dtype('string'), 'str') ]) body = c.FunctionBody( declaration, c.Block([ c.Statement('uint8_t size = str.size()'), c.Statement(f'*packet << static_cast<uint8_t>(size)'), c.For( 'uint8_t i = 0', 'i < size', '++i', c.Block([ c.Statement(f'*packet << static_cast<uint8_t>(str[i])') ])) ])) self.marshal.header.append(declaration) self.marshal.source.append(body) declaration = c.FunctionDeclaration(c.Value('bool', 'unpack_string'), [ self._pointer_type('PacketReader', 'packet'), self._data_object_ref(self.get_dtype('string'), 'str') ]) body = c.FunctionBody( declaration, c.Block([ WrapUnpack._guard('sizeof(uint8_t)'), c.Statement('uint8_t size = packet->peek<uint8_t>()'), WrapUnpack._guard('size'), c.Statement(f'str = packet->read<std::string>()'), c.Statement('return true') ])) self.marshal.header.append(declaration) self.marshal.source.append(body)
def generate(self, funcname, field_args, const_args, kernel_ast, c_include): ccode = [] # Add include for Parcels and math header ccode += [str(c.Include("parcels.h", system=False))] ccode += [str(c.Include("math.h", system=False))] ccode += [str(c.Assign('double _next_dt', '0'))] ccode += [str(c.Assign('size_t _next_dt_set', '0'))] # Generate type definition for particle type vdecl = [] for v in self.ptype.variables: if v.dtype == np.uint64: vdecl.append(c.Pointer(c.POD(np.void, v.name))) else: vdecl.append(c.POD(v.dtype, v.name)) ccode += [ str( c.Typedef( c.GenerableStruct("", vdecl, declname=self.ptype.name))) ] args = [ c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(self.ptype.name, "particle")) ] p_back_set_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "set_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [ c.Assign(("particle_backup->%s" % v.name), ("particle->%s" % v.name)) ] p_back_set_body = c.Block(body) p_back_set = str(c.FunctionBody(p_back_set_decl, p_back_set_body)) ccode += [p_back_set] args = [ c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(self.ptype.name, "particle")) ] p_back_get_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "get_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [ c.Assign(("particle->%s" % v.name), ("particle_backup->%s" % v.name)) ] p_back_get_body = c.Block(body) p_back_get = str(c.FunctionBody(p_back_get_decl, p_back_get_body)) ccode += [p_back_get] update_next_dt_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "update_next_dt"), spec='inline')), [c.Value('double', 'dt')]) if 'update_next_dt' in str(kernel_ast): body = [] body += [c.Assign("_next_dt", "dt")] body += [c.Assign("_next_dt_set", "1")] update_next_dt_body = c.Block(body) update_next_dt = str( c.FunctionBody(update_next_dt_decl, update_next_dt_body)) ccode += [update_next_dt] if c_include: ccode += [c_include] # Insert kernel code ccode += [str(kernel_ast)] # Generate outer loop for repeated kernel invocation args = [ c.Value("int", "num_particles"), c.Pointer(c.Value(self.ptype.name, "particles")), c.Value("double", "endtime"), c.Value("float", "dt") ] for field, _ in field_args.items(): args += [c.Pointer(c.Value("CField", "%s" % field))] for const, _ in const_args.items(): args += [c.Value("float", const)] fargs_str = ", ".join(['particles[p].time'] + list(field_args.keys()) + list(const_args.keys())) # Inner loop nest for forward runs sign_dt = c.Assign("sign_dt", "dt > 0 ? 1 : -1") particle_backup = c.Statement("%s particle_backup" % self.ptype.name) sign_end_part = c.Assign("sign_end_part", "endtime - particles[p].time > 0 ? 1 : -1") dt_pos = c.Assign( "__dt", "fmin(fabs(particles[p].dt), fabs(endtime - particles[p].time))") pdt_eq_dt_pos = c.Assign("__pdt_prekernels", "__dt * sign_dt") partdt = c.Assign("particles[p].dt", "__pdt_prekernels") dt_0_break = c.If("particles[p].dt == 0", c.Statement("break")) notstarted_continue = c.If( "(sign_end_part != sign_dt) && (particles[p].dt != 0)", c.Statement("continue")) body = [ c.Statement( "set_particle_backup(&particle_backup, &(particles[p]))") ] body += [pdt_eq_dt_pos] body += [partdt] body += [ c.Assign("res", "%s(&(particles[p]), %s)" % (funcname, fargs_str)) ] check_pdt = c.If( "(res == SUCCESS) & (__pdt_prekernels != particles[p].dt)", c.Assign("res", "REPEAT")) body += [check_pdt] body += [c.Assign("particles[p].state", "res")] # Store return code on particle update_pdt = c.If( "_next_dt_set == 1", c.Block([ c.Assign("_next_dt_set", "0"), c.Assign("particles[p].dt", "_next_dt") ])) body += [ c.If( "res == SUCCESS || res == DELETE", c.Block([ c.Statement("particles[p].time += particles[p].dt"), update_pdt, dt_pos, dt_0_break, c.Statement("continue") ]), c.Block([ c.Statement( "get_particle_backup(&particle_backup, &(particles[p]))" ), dt_pos, c.Statement("break") ])) ] time_loop = c.While("__dt > __tol || particles[p].dt == 0", c.Block(body)) part_loop = c.For( "p = 0", "p < num_particles", "++p", c.Block([sign_end_part, notstarted_continue, dt_pos, time_loop])) fbody = c.Block([ c.Value("int", "p, sign_dt, sign_end_part"), c.Value("ErrorCode", "res"), c.Value("float", "__pdt_prekernels"), c.Value("double", "__dt, __tol"), c.Assign("__tol", "1.e-6"), sign_dt, particle_backup, part_loop ]) fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args) ccode += [str(c.FunctionBody(fdecl, fbody))] return "\n\n".join(ccode)
def generate(self, funcname, field_args, const_args, kernel_ast, c_include): ccode = [] pname = self.ptype.name + 'p' # ==== Add include for Parcels and math header ==== # ccode += [str(c.Include("parcels.h", system=False))] #ccode += [str(c.Include("math.h", system=False))] # removed by Lyc because it is already in parcels.h ??? #ccode += [str(c.Include("stdbool.h", system=False))] # added by Luc to accomodate crossdike.h booleans ccode += [str(c.Assign('double _next_dt', '0'))] ccode += [str(c.Assign('size_t _next_dt_set', '0'))] ccode += [ str( c.Assign( 'const int ngrid', str(self.fieldset.gridset.size if self. fieldset is not None else 1))) ] # ==== Generate type definition for particle type ==== # vdeclp = [ c.Pointer(c.POD(v.dtype, v.name)) for v in self.ptype.variables ] ccode += [ str(c.Typedef(c.GenerableStruct("", vdeclp, declname=pname))) ] # Generate type definition for single particle type vdecl = [ c.POD(v.dtype, v.name) for v in self.ptype.variables if v.dtype != np.uint64 ] ccode += [ str( c.Typedef( c.GenerableStruct("", vdecl, declname=self.ptype.name))) ] args = [ c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(pname, "particles")), c.Value("int", "pnum") ] p_back_set_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "set_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [ c.Assign(("particle_backup->%s" % v.name), ("particles->%s[pnum]" % v.name)) ] p_back_set_body = c.Block(body) p_back_set = str(c.FunctionBody(p_back_set_decl, p_back_set_body)) ccode += [p_back_set] args = [ c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(pname, "particles")), c.Value("int", "pnum") ] p_back_get_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "get_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [ c.Assign(("particles->%s[pnum]" % v.name), ("particle_backup->%s" % v.name)) ] p_back_get_body = c.Block(body) p_back_get = str(c.FunctionBody(p_back_get_decl, p_back_get_body)) ccode += [p_back_get] update_next_dt_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "update_next_dt"), spec='inline')), [c.Value('double', 'dt')]) if 'update_next_dt' in str(kernel_ast): body = [] body += [c.Assign("_next_dt", "dt")] body += [c.Assign("_next_dt_set", "1")] update_next_dt_body = c.Block(body) update_next_dt = str( c.FunctionBody(update_next_dt_decl, update_next_dt_body)) ccode += [update_next_dt] if c_include: ccode += [c_include] # ==== Insert kernel code ==== # ccode += [str(kernel_ast)] # Generate outer loop for repeated kernel invocation args = [ c.Value("int", "num_particles"), c.Pointer(c.Value(pname, "particles")), c.Value("double", "endtime"), c.Value("double", "dt") ] for field, _ in field_args.items(): args += [c.Pointer(c.Value("CField", "%s" % field))] for const, _ in const_args.items(): args += [c.Value("double", const)] fargs_str = ", ".join(['particles->time[pnum]'] + list(field_args.keys()) + list(const_args.keys())) # ==== statement clusters use to compose 'body' variable and variables 'time_loop' and 'part_loop' ==== ## sign_dt = c.Assign("sign_dt", "dt > 0 ? 1 : -1") particle_backup = c.Statement("%s particle_backup" % self.ptype.name) sign_end_part = c.Assign( "sign_end_part", "(endtime - particles->time[pnum]) > 0 ? 1 : -1") reset_res_state = c.Assign("res", "particles->state[pnum]") update_state = c.Assign("particles->state[pnum]", "res") update_pdt = c.If( "_next_dt_set == 1", c.Block([ c.Assign("_next_dt_set", "0"), c.Assign("particles->dt[pnum]", "_next_dt") ])) dt_pos = c.Assign( "__dt", "fmin(fabs(particles->dt[pnum]), fabs(endtime - particles->time[pnum]))" ) # original pdt_eq_dt_pos = c.Assign("__pdt_prekernels", "__dt * sign_dt") partdt = c.Assign("particles->dt[pnum]", "__pdt_prekernels") check_pdt = c.If( "(res == SUCCESS) & !is_equal_dbl(__pdt_prekernels, particles->dt[pnum])", c.Assign("res", "REPEAT")) dt_0_break = c.If("is_zero_dbl(particles->dt[pnum])", c.Statement("break")) notstarted_continue = c.If( "(( sign_end_part != sign_dt) || is_close_dbl(__dt, 0) ) && !is_zero_dbl(particles->dt[pnum])", c.Block([ c.If("fabs(particles->time[pnum]) >= fabs(endtime)", c.Assign("particles->state[pnum]", "SUCCESS")), c.Statement("continue") ])) # ==== main computation body ==== # body = [ c.Statement( "set_particle_backup(&particle_backup, particles, pnum)") ] body += [pdt_eq_dt_pos] body += [partdt] body += [ c.Value("StatusCode", "state_prev"), c.Assign("state_prev", "particles->state[pnum]") ] body += [ c.Assign("res", "%s(particles, pnum, %s)" % (funcname, fargs_str)) ] body += [ c.If("(res==SUCCESS) && (particles->state[pnum] != state_prev)", c.Assign("res", "particles->state[pnum]")) ] body += [check_pdt] body += [ c.If( "res == SUCCESS || res == DELETE", c.Block([ c.Statement( "particles->time[pnum] += particles->dt[pnum]"), update_pdt, dt_pos, sign_end_part, c.If( "(res != DELETE) && !is_close_dbl(__dt, 0) && (sign_dt == sign_end_part)", c.Assign("res", "EVALUATE")), c.If("sign_dt != sign_end_part", c.Assign("__dt", "0")), update_state, dt_0_break ]), c.Block([ c.Statement( "get_particle_backup(&particle_backup, particles, pnum)" ), dt_pos, sign_end_part, c.If("sign_dt != sign_end_part", c.Assign("__dt", "0")), update_state, c.Statement("break") ])) ] time_loop = c.While( "(particles->state[pnum] == EVALUATE || particles->state[pnum] == REPEAT) || is_zero_dbl(particles->dt[pnum])", c.Block(body)) part_loop = c.For( "pnum = 0", "pnum < num_particles", "++pnum", c.Block([ sign_end_part, reset_res_state, dt_pos, notstarted_continue, time_loop ])) fbody = c.Block([ c.Value("int", "pnum, sign_dt, sign_end_part"), c.Value("StatusCode", "res"), c.Value("double", "__pdt_prekernels"), c.Value("double", "__dt"), # 1e-8 = built-in tolerance for np.isclose() sign_dt, particle_backup, part_loop ]) fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args) ccode += [str(c.FunctionBody(fdecl, fbody))] return "\n\n".join(ccode)
def _cgen(self): start = self.start.__str__().strip(';') cond = self.cond.__str__() update = self.update.__str__().strip(';') return cgen.For(start, cond, update, self.body._cgen())
def get_idl_dtype_size(idl_dtype, spec_type, variable, optional): # Here goes all the sizes logic ret = col = c.Collection([]) # Optional data is an special case if optional: col.append(c.Statement('size += sizeof(bool)')) inner = c.Block([]) col.append(c.If(f'data.{variable}.HasValue', inner)) col = inner dtype = self.get_dtype(idl_dtype) if self.is_trivial(idl_dtype): col.append(c.Statement(f'size += sizeof({dtype})')) elif idl_dtype == 'string': col.append( c.Statement( f'size += (byte)(sizeof({self.get_dtype("uint8")}) + ' + self._str_len(f'data.{variable}') + ')')) elif self.is_message(idl_dtype): col.append( c.Statement( f'size += {idl_dtype}_size(data.{variable})')) elif idl_dtype == 'vector': if self.is_trivial(spec_type): # TODO(gpascualg): No vector of optionals yet col.append( c.Statement( f'size += (byte)(sizeof({self.get_dtype("uint8")}) + data.{variable}.Count * {get_idl_dtype_size(spec_type, None, variable, None)})' )) elif self.is_message(spec_type): if self._is_trivial( spec_type, self.user_defined_messages[spec_type], include_messages=False): col.append( c.Collection([ c.If( f'data.{variable}.Count == 0', c.Block([ c.Statement( f'size += sizeof({self.get_dtype("uint8")})' ) ]), c.Block([ c.Statement( f'size += (byte)(sizeof({self.get_dtype("uint8")}) + data.{variable}.Count * {spec_type}_size(data.{variable}[0]))' ) ])) ])) else: col.append( c.Collection([ c.Statement( f'size += sizeof({self.get_dtype("uint8")})' ), c.For( 'int i = 0', f'i < data.{variable}.Count', '++i', c.Block([ c.Statement( f'size += {spec_type}_size(data.{variable}[i])' ) ])) ])) elif spec_type == 'string': raise NotImplementedError( f'Vector of strings is a WIP') else: raise NotImplementedError( f'Unsupported vector type {spec_type}') else: raise NotImplementedError(f'Unrecognized type {idl_dtype}') return ret
def main(): import argparse argparser = argparse.ArgumentParser() argparser.add_argument('-c', '--config', type=str, metavar="PATH", required=True) argparser.add_argument('-o', '--output-dir', type=str, metavar="PATH", required=True) args = argparser.parse_args() cfg = load_config(args.config) components = [] serial: Optional[SerialComponent] = None systick: Optional[SysTickComponent] = None if cfg.components.serial is not None: serial = SerialComponent(cfg) components.append(serial) if cfg.components.systick is not None: systick = SysTickComponent(cfg) components.append(systick) if cfg.components.gpio is not None: gpio = GPIOComponent(cfg) components.append(gpio) if cfg.components.modbus is not None: assert systick is not None assert serial is not None modbus = ModbusComponent(cfg) systick.register_systimer(SysTimerDef(name="modbusTimer", repeat=False, handler=True, required_accuracy=1)) components.append(modbus) ### SOURCE source_file = SourceFile(is_header=False) # source_file.add_include("stdint.h", True) for component in components: if not component.verify(): exit(1) for path in component.get_source_includes(): if path is not None: source_file.add_include(path, True) source_file.add_include("ksystem.h", system=False) for component in components: component.emit_global_variables(source_file) source_file.add(cgen.Statement("extern void setup()")) source_file.add(cgen.Statement("extern void loop()")) source_file.add(cgen.Line()) for component in components: source_file.add(cgen.LineComment(f"Component: {type(component).__name__}")) component.emit_helper_functions(source_file) source_file.add_blank() with source_file.function("int", "main") as f: for component in components: f.add(cgen.LineComment(f"Component: {type(component).__name__}")) component.emit_initialization(f) f.add_blank() f.add(cgen.Statement("setup()")) f.add(cgen.Statement("sei()")) loop_statements = StatementsContainer() loop_statements.add("loop()") for component in components: loop_statements.add_blank() loop_statements.add(cgen.LineComment(f"Component: {type(component).__name__}")) component.emit_loop(loop_statements) f.add(cgen.For("", "", "", cgen.Block(loop_statements.statements))) ksystem_cpp_path = os.path.join(args.output_dir, "ksystem.cpp") source_file.save(ksystem_cpp_path) ### HEADER header_file = SourceFile(is_header=True) for component in components: for path in component.get_header_includes(): if path is not None: header_file.add_include(path, True) for component in components: header_file.add(cgen.LineComment(f"Component: {type(component).__name__}")) component.emit_extern_global_variables(header_file) header_file.add_blank() header_file.save(os.path.join(args.output_dir, "ksystem.h")) ### INTERNAL HEADER internal_header_file = SourceFile(is_header=True) for component in components: internal_header_file.add(cgen.LineComment(f"Component: {type(component).__name__}")) component.emit_internal_header(internal_header_file) internal_header_file.add_blank() internal_header_file.save(os.path.join(args.output_dir, "ksystem_internal.h")) ### CMAKE sources = [] include_dirs = [] for component in components: sources += component.get_additional_source_files() include_dirs += component.get_additional_header_directories() sources = [os.path.join(script_dir, x) for x in sources] include_dirs = [os.path.join(script_dir, x) for x in include_dirs] nl = "\n " cmake_content = f""" add_definitions(-DF_CPU={cfg.frequency}) include_directories(generated) include_directories({os.path.join(script_dir, "library")}) add_avr_library(ksystem STATIC ${{CMAKE_CURRENT_LIST_DIR}}/ksystem.cpp {nl.join(sources)} ) avr_target_include_directories(ksystem PUBLIC {nl.join(include_dirs)} ) """.lstrip() write_text_file(os.path.join(args.output_dir, "ksystem.cmake"), cmake_content)