def visit_Return(self, node): self.visit(node.value) node.ccode = c.Statement('return %s' % node.value.ccode)
def generate(self, funcname, field_args, const_args, kernel_ast, c_include): ccode = [] pname = self.ptype.name + 'p' # ==== Add include for Parcels and math header ==== # ccode += [str(c.Include("parcels.h", system=False))] ccode += [str(c.Include("math.h", system=False))] ccode += [str(c.Assign('double _next_dt', '0'))] ccode += [str(c.Assign('size_t _next_dt_set', '0'))] ccode += [ str( c.Assign( 'const int ngrid', str(self.fieldset.gridset.size if self. fieldset is not None else 1))) ] # ==== Generate type definition for particle type ==== # vdeclp = [ c.Pointer(c.POD(v.dtype, v.name)) for v in self.ptype.variables ] ccode += [ str(c.Typedef(c.GenerableStruct("", vdeclp, declname=pname))) ] # Generate type definition for single particle type vdecl = [ c.POD(v.dtype, v.name) for v in self.ptype.variables if v.dtype != np.uint64 ] ccode += [ str( c.Typedef( c.GenerableStruct("", vdecl, declname=self.ptype.name))) ] args = [ c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(pname, "particles")), c.Value("int", "pnum") ] p_back_set_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "set_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [ c.Assign(("particle_backup->%s" % v.name), ("particles->%s[pnum]" % v.name)) ] p_back_set_body = c.Block(body) p_back_set = str(c.FunctionBody(p_back_set_decl, p_back_set_body)) ccode += [p_back_set] args = [ c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(pname, "particles")), c.Value("int", "pnum") ] p_back_get_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "get_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [ c.Assign(("particles->%s[pnum]" % v.name), ("particle_backup->%s" % v.name)) ] p_back_get_body = c.Block(body) p_back_get = str(c.FunctionBody(p_back_get_decl, p_back_get_body)) ccode += [p_back_get] update_next_dt_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "update_next_dt"), spec='inline')), [c.Value('double', 'dt')]) if 'update_next_dt' in str(kernel_ast): body = [] body += [c.Assign("_next_dt", "dt")] body += [c.Assign("_next_dt_set", "1")] update_next_dt_body = c.Block(body) update_next_dt = str( c.FunctionBody(update_next_dt_decl, update_next_dt_body)) ccode += [update_next_dt] if c_include: ccode += [c_include] # ==== Insert kernel code ==== # ccode += [str(kernel_ast)] # Generate outer loop for repeated kernel invocation args = [ c.Value("int", "num_particles"), c.Pointer(c.Value(pname, "particles")), c.Value("double", "endtime"), c.Value("double", "dt") ] for field, _ in field_args.items(): args += [c.Pointer(c.Value("CField", "%s" % field))] for const, _ in const_args.items(): args += [c.Value("double", const)] fargs_str = ", ".join(['particles->time[pnum]'] + list(field_args.keys()) + list(const_args.keys())) # ==== statement clusters use to compose 'body' variable and variables 'time_loop' and 'part_loop' ==== ## sign_dt = c.Assign("sign_dt", "dt > 0 ? 1 : -1") particle_backup = c.Statement("%s particle_backup" % self.ptype.name) sign_end_part = c.Assign( "sign_end_part", "(endtime - particles->time[pnum]) > 0 ? 1 : -1") reset_res_state = c.Assign("res", "particles->state[pnum]") update_state = c.Assign("particles->state[pnum]", "res") update_pdt = c.If( "_next_dt_set == 1", c.Block([ c.Assign("_next_dt_set", "0"), c.Assign("particles->dt[pnum]", "_next_dt") ])) dt_pos = c.Assign( "__dt", "fmin(fabs(particles->dt[pnum]), fabs(endtime - particles->time[pnum]))" ) # original pdt_eq_dt_pos = c.Assign("__pdt_prekernels", "__dt * sign_dt") partdt = c.Assign("particles->dt[pnum]", "__pdt_prekernels") check_pdt = c.If( "(res == SUCCESS) & !is_equal_dbl(__pdt_prekernels, particles->dt[pnum])", c.Assign("res", "REPEAT")) dt_0_break = c.If("is_zero_dbl(particles->dt[pnum])", c.Statement("break")) notstarted_continue = c.If( "(( sign_end_part != sign_dt) || is_close_dbl(__dt, 0) ) && !is_zero_dbl(particles->dt[pnum])", c.Block([ c.If("fabs(particles->time[pnum]) >= fabs(endtime)", c.Assign("particles->state[pnum]", "SUCCESS")), c.Statement("continue") ])) # ==== main computation body ==== # body = [ c.Statement( "set_particle_backup(&particle_backup, particles, pnum)") ] body += [pdt_eq_dt_pos] body += [partdt] body += [ c.Value("StatusCode", "state_prev"), c.Assign("state_prev", "particles->state[pnum]") ] body += [ c.Assign("res", "%s(particles, pnum, %s)" % (funcname, fargs_str)) ] body += [ c.If("(res==SUCCESS) && (particles->state[pnum] != state_prev)", c.Assign("res", "particles->state[pnum]")) ] body += [check_pdt] body += [ c.If( "res == SUCCESS || res == DELETE", c.Block([ c.Statement( "particles->time[pnum] += particles->dt[pnum]"), update_pdt, dt_pos, sign_end_part, c.If( "(res != DELETE) && !is_close_dbl(__dt, 0) && (sign_dt == sign_end_part)", c.Assign("res", "EVALUATE")), c.If("sign_dt != sign_end_part", c.Assign("__dt", "0")), update_state, dt_0_break ]), c.Block([ c.Statement( "get_particle_backup(&particle_backup, particles, pnum)" ), dt_pos, sign_end_part, c.If("sign_dt != sign_end_part", c.Assign("__dt", "0")), update_state, c.Statement("break") ])) ] time_loop = c.While( "(particles->state[pnum] == EVALUATE || particles->state[pnum] == REPEAT) || is_zero_dbl(particles->dt[pnum])", c.Block(body)) part_loop = c.For( "pnum = 0", "pnum < num_particles", "++pnum", c.Block([ sign_end_part, reset_res_state, dt_pos, notstarted_continue, time_loop ])) fbody = c.Block([ c.Value("int", "pnum, sign_dt, sign_end_part"), c.Value("StatusCode", "res"), c.Value("double", "__pdt_prekernels"), c.Value("double", "__dt"), # 1e-8 = built-in tolerance for np.isclose() sign_dt, particle_backup, part_loop ]) fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args) ccode += [str(c.FunctionBody(fdecl, fbody))] return "\n\n".join(ccode)
def __init__(self, header=None, body=None, footer=None): b = [Element(c.Comment('Flush denormal numbers to zero in hardware')), Element(c.Statement('_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON)')), Element(c.Statement('_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON)'))] super(Denormals, self).__init__(header, b, footer)
def ccode(self): return c.Statement('%s(%s)' % (self.name, ','.join(self.params)))
def visit_Call(self, o): arguments = self._args_call(o.params) return c.Statement('%s(%s)' % (o.name, ','.join(arguments)))
def visit_ForeignExpression(self, o): return c.Statement(ccode(o.expr))
def visit_Expr(self, node): self.visit(node.value) node.ccode = c.Statement(node.value.ccode)
def visit_Break(self, node): node.ccode = c.Statement("break")
"""Generate C++ code from an expression calling CodePrinter class :param expr: The expression :param settings: A dictionary of settings for code printing :returns: The resulting code as a string. If it fails, then it returns the expr """ if isinstance(expr, Eq): return ccode_eq(expr) try: return CodePrinter(settings).doprint(expr, None) except: return expr def ccode_eq(eq, **settings): """Generate C++ assignment from an equation assigning RHS to LHS :param eq: The equation :param settings: A dictionary of settings for code printing :returns: The resulting code as a string """ return CodePrinter(settings).doprint(eq.lhs, None) \ + ' = ' + CodePrinter(settings).doprint(eq.rhs, None) blankline = c.Line("") printmark = lambda i: c.Line('printf("Here: %s\\n"); fflush(stdout);' % i) printvar = lambda i: c.Statement('printf("%s=%%s\\n", %s); fflush(stdout);' % (i, i)) INT = Function('INT') FLOAT = Function('FLOAT')
def map_Continue(self, node): return cgen.Statement("label_%s:" % node.label)
def map_Return(self, node): return cgen.Statement("return")
def map_Goto(self, node): return cgen.Statement("goto label_%s" % node.label)
omplang = { 'for': c.Pragma('omp for schedule(static)'), 'collapse': lambda i: c.Pragma('omp for collapse(%d) schedule(static)' % i), 'par-region': lambda i: c.Pragma('omp parallel %s' % i), 'par-for': c.Pragma('omp parallel for schedule(static)'), 'simd-for': c.Pragma('omp simd'), 'simd-for-aligned': lambda i, j: c.Pragma('omp simd aligned(%s:%d)' % (i, j)) } """ Compiler-specific language """ complang_ALL = { 'IntelCompiler': {'ignore-deps': c.Pragma('ivdep'), 'ntstores': c.Pragma('vector nontemporal'), 'storefence': c.Statement('_mm_sfence()'), 'noinline': c.Pragma('noinline')} } complang_ALL['IntelKNLCompiler'] = complang_ALL['IntelCompiler'] """ SIMD generic info """ simdinfo = { # Sizes in bytes of a vector register 'sse': 16, 'see4_2': 16, 'avx': 32, 'avx2': 32, 'avx512f': 64 }