示例#1
0
    def generate(self, funcname, field_args, const_args, kernel_ast, c_include):
        ccode = []

        # Add include for Parcels and math header
        ccode += [str(c.Include("parcels.h", system=False))]
        ccode += [str(c.Include("math.h", system=False))]

        # Generate type definition for particle type
        vdecl = []
        for v in self.ptype.variables:
            if v.dtype == np.uint64:
                vdecl.append(c.Pointer(c.POD(np.void, v.name)))
            else:
                vdecl.append(c.POD(v.dtype, v.name))

        ccode += [str(c.Typedef(c.GenerableStruct("", vdecl, declname=self.ptype.name)))]

        if c_include:
            ccode += [c_include]

        # Insert kernel code
        ccode += [str(kernel_ast)]

        # Generate outer loop for repeated kernel invocation
        args = [c.Value("int", "num_particles"),
                c.Pointer(c.Value(self.ptype.name, "particles")),
                c.Value("double", "endtime"), c.Value("float", "dt")]
        for field, _ in field_args.items():
            args += [c.Pointer(c.Value("CField", "%s" % field))]
        for const, _ in const_args.items():
            args += [c.Value("float", const)]
        fargs_str = ", ".join(['particles[p].time', 'sign_dt * __dt'] + list(field_args.keys())
                              + list(const_args.keys()))
        # Inner loop nest for forward runs
        sign_dt = c.Assign("sign_dt", "dt > 0 ? 1 : -1")
        sign_end_part = c.Assign("sign_end_part", "endtime - particles[p].time > 0 ? 1 : -1")
        dt_pos = c.Assign("__dt", "fmin(fabs(particles[p].dt), fabs(endtime - particles[p].time))")
        dt_0_break = c.If("particles[p].dt == 0", c.Statement("break"))
        notstarted_continue = c.If("(sign_end_part != sign_dt) && (particles[p].dt != 0)",
                                   c.Statement("continue"))
        body = [c.Assign("res", "%s(&(particles[p]), %s)" % (funcname, fargs_str))]
        body += [c.Assign("particles[p].state", "res")]  # Store return code on particle
        body += [c.If("res == SUCCESS", c.Block([c.Statement("particles[p].time += sign_dt * __dt"),
                                                 dt_pos, dt_0_break, c.Statement("continue")]))]
        body += [c.If("res == REPEAT", c.Block([dt_pos, c.Statement("continue")]),
                      c.Statement("break"))]

        time_loop = c.While("__dt > __tol || particles[p].dt == 0", c.Block(body))
        part_loop = c.For("p = 0", "p < num_particles", "++p",
                          c.Block([sign_end_part, notstarted_continue, dt_pos, time_loop]))
        fbody = c.Block([c.Value("int", "p, sign_dt, sign_end_part"), c.Value("ErrorCode", "res"),
                         c.Value("double", "__dt, __tol"), c.Assign("__tol", "1.e-6"),
                         sign_dt, part_loop])
        fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args)
        ccode += [str(c.FunctionBody(fdecl, fbody))]
        return "\n\n".join(ccode)
示例#2
0
 def push_stack(self, scope, obj):
     """
     Generate a cgen statement that allocates ``obj`` on the stack.
     """
     shape = "".join("[%s]" % ccode(i) for i in obj.symbolic_shape)
     alignment = "__attribute__((aligned(64)))"
     handle = self.stack.setdefault(scope, OrderedDict())
     handle[obj] = c.POD(obj.dtype, "%s%s %s" % (obj.name, shape, alignment))
示例#3
0
 def push_stack(self, scope, obj):
     """Generate a cgen object that allocates ``obj`` on the stack."""
     handle = self.stack.setdefault(scope, OrderedDict())
     if obj.is_LocalObject:
         handle[obj] = c.Value(obj._C_typename, obj.name)
     else:
         shape = "".join("[%s]" % ccode(i) for i in obj.symbolic_shape)
         alignment = "__attribute__((aligned(%d)))" % obj._data_alignment
         handle[obj] = c.POD(obj.dtype, "%s%s %s" % (obj.name, shape, alignment))
示例#4
0
    def _alloc_array_on_low_lat_mem(self, site, obj, storage):
        """
        Allocate an Array in the low latency memory.
        """
        shape = "".join("[%s]" % ccode(i) for i in obj.symbolic_shape)
        alignment = self.lang['aligned'](obj._data_alignment)
        value = "%s%s %s" % (obj.name, shape, alignment)

        storage.update(obj, site, allocs=c.POD(obj.dtype, value))
示例#5
0
    def push_array_on_stack(self, scope, obj):
        """Define an Array on the stack."""
        handle = self.stack.setdefault(scope, OrderedDict())

        if obj in flatten(self.stack.values()):
            return

        shape = "".join("[%s]" % ccode(i) for i in obj.symbolic_shape)
        alignment = "__attribute__((aligned(%d)))" % obj._data_alignment
        value = "%s%s %s" % (obj.name, shape, alignment)
        handle[obj] = Element(c.POD(obj.dtype, value))
示例#6
0
    def generate_optimmat_code(self, pos, name=None):
        """Generates the code for computing the local optimization matrix
        for the optimization over site nr. `pos`

        The function has the following signature:

            DTYPE const *const A,
            DTYPE const *const X_0,
            ...,
            DTYPE const *const X_N,
            DTYPE *const result

        :param pos: The local tensor to copy (should be `< len(X)`)
        :param name: Name of the C function (default: get_optimmat_%(pos))
        :returns: cgen.FunctionBody with given name

        """
        name = 'get_optimmat_%i' % pos if name is None else name

        finalization_src = '''
        if (mid < {nr_meas:d}) {{
            for (uint i = 0; i < {pdim:d}; ++i) {{
                for (uint k_l = 0; k_l < {rank_l:d}; ++k_l) {{
                    for (uint k_r = 0; k_r < {rank_r:d}; ++k_r) {{
                        result[mid * {rank_l:d} * {pdim:d} * {rank_r:d}
                            + k_l * {pdim:d} * {rank_r:d}
                            + i * {rank_r:d}
                            + k_r]
                        = left_c[k_l] * current_row[{offset:d} + i] * right_c[k_r];
                    }}
                }}
            }}
        }}
        '''.format(nr_meas=self._meas,
                   pdim=self._dims[pos],
                   rank_l=1 if pos == 0 else self._ranks[pos - 1],
                   rank_r=1 if pos == self._sites - 1 else self._ranks[pos],
                   offset=sum(self._dims[:pos]))
        finalization = c.LiteralLines(finalization_src)

        arg_decls = [ConstPointerToConstDecl(self._dtype, 'A')]
        arg_decls += [
            ConstPointerToConstDecl(self._dtype, 'X%i' % i)
            for i in range(self._sites)
        ]
        arg_decls += [c.Pointer(c.Const(c.POD(self._dtype, 'result')))]

        return c.FunctionBody(
            ccu.CudaGlobal(
                c.FunctionDeclaration(c.Value('void', 'get_optimmat_%i' % pos),
                                      arg_decls=arg_decls)),
            c.Block(
                self.declaration(pos) + self.left_contractions(pos) +
                self.right_contractions(pos) + [finalization]))
示例#7
0
 def visit_ArrayCast(self, o):
     """
     Build cgen type casts for an :class:`AbstractFunction`.
     """
     f = o.function
     align = "__attribute__((aligned(64)))"
     shape = ''.join(["[%s]" % ccode(j) for j in f.symbolic_shape[1:]])
     lvalue = c.POD(f.dtype, '(*restrict %s)%s %s' % (f.name, shape, align))
     rvalue = '(%s (*)%s) %s' % (c.dtype_to_ctype(
         f.dtype), shape, '%s_vec' % f.name)
     return c.Initializer(lvalue, rvalue)
示例#8
0
    def _alloc_array_on_low_lat_mem(self, scope, obj, storage):
        """Allocate an Array in the low latency memory."""
        handle = storage._low_lat_mem.setdefault(scope, OrderedDict())

        if obj in flatten(storage._low_lat_mem.values()):
            return

        shape = "".join("[%s]" % ccode(i) for i in obj.symbolic_shape)
        alignment = "__attribute__((aligned(%d)))" % obj._data_alignment
        value = "%s%s %s" % (obj.name, shape, alignment)
        handle[obj] = Element(c.POD(obj.dtype, value))
示例#9
0
    def push_object_on_stack(self, scope, obj):
        """Define an Array or a composite type (e.g., a struct) on the stack."""
        handle = self.stack.setdefault(scope, OrderedDict())

        if obj.is_LocalObject:
            handle[obj] = Element(c.Value(obj._C_typename, obj.name))
        else:
            shape = "".join("[%s]" % ccode(i) for i in obj.symbolic_shape)
            alignment = "__attribute__((aligned(%d)))" % obj._data_alignment
            value = "%s%s %s" % (obj.name, shape, alignment)
            handle[obj] = Element(c.POD(obj.dtype, value))
示例#10
0
    def push_stack(self, scope, obj):
        """
        Generate a cgen statement that allocates ``obj`` on the stack.
        """
        dtype = c.dtype_to_ctype(obj.dtype)
        shape = "".join("[%d]" % j for j in obj.shape)
        alignment = "__attribute__((aligned(64)))"

        item = c.POD(dtype, "%s%s %s" % (obj.name, shape, alignment))
        handle = self.stack.setdefault(scope, [])
        if item not in handle:
            handle.append(item)
示例#11
0
    def ccode(self):
        """Returns the C code generated by this kernel.

        This function generates the internal code block from Iteration
        and Expression objects, and adds the necessary template code
        around it.
        """
        header_vars = [
            c.Pointer(c.POD(v.dtype, '%s_vec' % v.name))
            for v in self.signature
        ]
        header = c.Extern(
            "C", c.FunctionDeclaration(c.Value('int', self.name), header_vars))
        cast_shapes = [(v, ''.join(['[%d]' % d for d in v.shape[1:]]))
                       for v in self.signature]
        casts = [
            c.Initializer(
                c.POD(v.dtype, '(*%s)%s' % (v.name, shape)), '(%s (*)%s) %s' %
                (c.dtype_to_ctype(v.dtype), shape, '%s_vec' % v.name))
            for v, shape in cast_shapes
        ]
        body = [e.ccode for e in self.expressions]
        ret = [c.Statement("return 0")]
        return c.FunctionBody(header, c.Block(casts + body + ret))
示例#12
0
文件: nodes.py 项目: kwinkunks/devito
 def _ccasts(self):
     """Generate data casts."""
     alignment = "__attribute__((aligned(64)))"
     handle = [
         f for f in self.parameters
         if isinstance(f, (SymbolicData, TensorFunction))
     ]
     shapes = [(f, ''.join(["[%s]" % i.ccode for i in f.indices[1:]]))
               for f in handle]
     casts = [
         c.Initializer(
             c.POD(v.dtype,
                   '(*restrict %s)%s %s' % (v.name, shape, alignment)),
             '(%s (*)%s) %s' %
             (c.dtype_to_ctype(v.dtype), shape, '%s_vec' % v.name))
         for v, shape in shapes
     ]
     return casts
示例#13
0
 def _args_cast(self, args):
     """Build cgen type casts for an iterable of :class:`Argument`."""
     ret = []
     for i in args:
         if i.is_TensorArgument:
             align = "__attribute__((aligned(64)))"
             shape = ''.join(
                 ["[%s]" % ccode(j) for j in i.provider.symbolic_shape[1:]])
             lvalue = c.POD(i.dtype,
                            '(*restrict %s)%s %s' % (i.name, shape, align))
             rvalue = '(%s (*)%s) %s' % (c.dtype_to_ctype(
                 i.dtype), shape, '%s_vec' % i.name)
             ret.append(c.Initializer(lvalue, rvalue))
         elif i.is_PtrArgument:
             ctype = ctypes_to_C(i.dtype)
             lvalue = c.Pointer(c.Value(ctype, i.name))
             rvalue = '(%s*) %s' % (ctype, '_%s' % i.name)
             ret.append(c.Initializer(lvalue, rvalue))
     return ret
示例#14
0
    def generate(self, funcname, field_args, kernel_ast, adaptive=False):
        ccode = []

        # Add include for Parcels and math header
        ccode += [str(c.Include("parcels.h", system=False))]
        ccode += [str(c.Include("math.h", system=False))]

        # Generate type definition for particle type
        vdecl = [c.POD(dtype, var) for var, dtype in self.ptype.var_types.items()]
        ccode += [str(c.Typedef(c.GenerableStruct("", vdecl, declname=self.ptype.name)))]

        # Insert kernel code
        ccode += [str(kernel_ast)]

        # Generate outer loop for repeated kernel invocation
        args = [c.Value("int", "num_particles"),
                c.Pointer(c.Value(self.ptype.name, "particles")),
                c.Value("double", "endtime"), c.Value("float", "dt")]
        for field, _ in field_args.items():
            args += [c.Pointer(c.Value("CField", "%s" % field))]
        fargs_str = ", ".join(['particles[p].time', 'particles[p].dt'] + list(field_args.keys()))
        # Inner loop nest for forward runs
        dt_fwd = c.Statement("__dt = fmin(particles[p].dt, endtime - particles[p].time)")
        body_fwd = [c.Statement("res = %s(&(particles[p]), %s)" % (funcname, fargs_str)),
                    c.If("res == SUCCESS", c.Statement("particles[p].time += __dt")), dt_fwd]
        time_fwd = c.While("__dt > __tol", c.Block(body_fwd))
        part_fwd = c.For("p = 0", "p < num_particles", "++p", c.Block([dt_fwd, time_fwd]))
        # Inner loop nest for backward runs
        dt_bwd = c.Statement("__dt = fmax(particles[p].dt, endtime - particles[p].time)")
        body_bwd = [c.Statement("res = %s(&(particles[p]), %s)" % (funcname, fargs_str)),
                    c.If("res == SUCCESS", c.Statement("particles[p].time += __dt")), dt_bwd]
        time_bwd = c.While("__dt < -1. * __tol", c.Block(body_bwd))
        part_bwd = c.For("p = 0", "p < num_particles", "++p", c.Block([dt_bwd, time_bwd]))

        time_if = c.If("dt > 0.0", c.Block([part_fwd]), c.Block([part_bwd]))
        fbody = c.Block([c.Value("int", "p"), c.Value("KernelOp", "res"),
                         c.Value("double", "__dt, __tol"), c.Assign("__tol", "1.e-6"),
                         time_if])
        fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args)
        ccode += [str(c.FunctionBody(fdecl, fbody))]
        return "\n\n".join(ccode)
示例#15
0
    def declaration(self, pos):
        """Generates the declarative instructions for the optimizations over
        sites nr. `pos`

        :param pos: The local tensor to copy (should be `< len(X)`)
        :returns: List containing cgen Statements
        """
        max_ltens_size = max(self._ltens_sizes)
        max_left_size = 1 if pos == 0 else max(self._ranks[:pos])
        max_right_size = 1 if pos == self._sites - 1 else max(
            self._ranks[pos:])
        max_tmat_size = max(self._ranks[i] * self._ranks[i + 1]
                            for i in range(self._sites - 2))

        init_statements = [
            c.LineComment(
                "Define the row number the current thread is operating on"),
            c.Initializer(c.Const(c.POD(np.int32, 'mid')),
                          'threadIdx.x + blockIdx.x * blockDim.x'),
            c.LineComment("Allocate shared memory for the local tensors"),
            ccu.CudaShared(
                c.ArrayOf(c.POD(self._dtype, 'x_shared'), max_ltens_size)),
            c.LineComment(
                "Allocate the left-, right-, and transfer contractions"),
            c.ArrayOf(c.POD(self._dtype, 'left_c'), max_left_size),
            c.ArrayOf(c.POD(self._dtype, 'right_c'), max_right_size),
            c.ArrayOf(c.POD(self._dtype, 'tmat_c'), max_tmat_size),
            c.ArrayOf(c.POD(self._dtype, 'buf_c'),
                      max(max_right_size, max_left_size)),
            c.LineComment("Shortcut for current row of design matrix"),
            c.LineComment("Carefull, current_row might be out of bounds!"),
            ConstPointerToConst(self._dtype, 'current_row',
                                'A + (mid * %i)' % sum(self._dims))
        ]

        return init_statements
示例#16
0
def ConstPointerToConstDecl(dtype, name):
    """Returns a cgen variable declaration of a constant pointer to a constant
    of type `dtype`
    """
    return c.Const(c.Pointer(c.Const(c.POD(dtype, name))))
示例#17
0
    def generate(self, funcname, field_args, const_args, kernel_ast,
                 c_include):
        ccode = []

        # Add include for Parcels and math header
        ccode += [str(c.Include("parcels.h", system=False))]
        ccode += [str(c.Include("math.h", system=False))]
        ccode += [str(c.Assign('double _next_dt', '0'))]
        ccode += [str(c.Assign('size_t _next_dt_set', '0'))]

        # Generate type definition for particle type
        vdecl = []
        for v in self.ptype.variables:
            if v.dtype == np.uint64:
                vdecl.append(c.Pointer(c.POD(np.void, v.name)))
            else:
                vdecl.append(c.POD(v.dtype, v.name))

        ccode += [
            str(
                c.Typedef(
                    c.GenerableStruct("", vdecl, declname=self.ptype.name)))
        ]

        args = [
            c.Pointer(c.Value(self.ptype.name, "particle_backup")),
            c.Pointer(c.Value(self.ptype.name, "particle"))
        ]
        p_back_set_decl = c.FunctionDeclaration(
            c.Static(
                c.DeclSpecifier(c.Value("void", "set_particle_backup"),
                                spec='inline')), args)
        body = []
        for v in self.ptype.variables:
            if v.dtype != np.uint64 and v.name not in ['dt', 'state']:
                body += [
                    c.Assign(("particle_backup->%s" % v.name),
                             ("particle->%s" % v.name))
                ]
        p_back_set_body = c.Block(body)
        p_back_set = str(c.FunctionBody(p_back_set_decl, p_back_set_body))
        ccode += [p_back_set]

        args = [
            c.Pointer(c.Value(self.ptype.name, "particle_backup")),
            c.Pointer(c.Value(self.ptype.name, "particle"))
        ]
        p_back_get_decl = c.FunctionDeclaration(
            c.Static(
                c.DeclSpecifier(c.Value("void", "get_particle_backup"),
                                spec='inline')), args)
        body = []
        for v in self.ptype.variables:
            if v.dtype != np.uint64 and v.name not in ['dt', 'state']:
                body += [
                    c.Assign(("particle->%s" % v.name),
                             ("particle_backup->%s" % v.name))
                ]
        p_back_get_body = c.Block(body)
        p_back_get = str(c.FunctionBody(p_back_get_decl, p_back_get_body))
        ccode += [p_back_get]

        update_next_dt_decl = c.FunctionDeclaration(
            c.Static(
                c.DeclSpecifier(c.Value("void", "update_next_dt"),
                                spec='inline')), [c.Value('double', 'dt')])
        if 'update_next_dt' in str(kernel_ast):
            body = []
            body += [c.Assign("_next_dt", "dt")]
            body += [c.Assign("_next_dt_set", "1")]
            update_next_dt_body = c.Block(body)
            update_next_dt = str(
                c.FunctionBody(update_next_dt_decl, update_next_dt_body))
            ccode += [update_next_dt]

        if c_include:
            ccode += [c_include]

        # Insert kernel code
        ccode += [str(kernel_ast)]

        # Generate outer loop for repeated kernel invocation
        args = [
            c.Value("int", "num_particles"),
            c.Pointer(c.Value(self.ptype.name, "particles")),
            c.Value("double", "endtime"),
            c.Value("float", "dt")
        ]
        for field, _ in field_args.items():
            args += [c.Pointer(c.Value("CField", "%s" % field))]
        for const, _ in const_args.items():
            args += [c.Value("float", const)]
        fargs_str = ", ".join(['particles[p].time'] + list(field_args.keys()) +
                              list(const_args.keys()))
        # Inner loop nest for forward runs
        sign_dt = c.Assign("sign_dt", "dt > 0 ? 1 : -1")
        particle_backup = c.Statement("%s particle_backup" % self.ptype.name)
        sign_end_part = c.Assign("sign_end_part",
                                 "endtime - particles[p].time > 0 ? 1 : -1")
        dt_pos = c.Assign(
            "__dt",
            "fmin(fabs(particles[p].dt), fabs(endtime - particles[p].time))")
        pdt_eq_dt_pos = c.Assign("__pdt_prekernels", "__dt * sign_dt")
        partdt = c.Assign("particles[p].dt", "__pdt_prekernels")
        dt_0_break = c.If("particles[p].dt == 0", c.Statement("break"))
        notstarted_continue = c.If(
            "(sign_end_part != sign_dt) && (particles[p].dt != 0)",
            c.Statement("continue"))
        body = [
            c.Statement(
                "set_particle_backup(&particle_backup, &(particles[p]))")
        ]
        body += [pdt_eq_dt_pos]
        body += [partdt]
        body += [
            c.Assign("res", "%s(&(particles[p]), %s)" % (funcname, fargs_str))
        ]
        check_pdt = c.If(
            "(res == SUCCESS) & (__pdt_prekernels != particles[p].dt)",
            c.Assign("res", "REPEAT"))
        body += [check_pdt]
        body += [c.Assign("particles[p].state",
                          "res")]  # Store return code on particle
        update_pdt = c.If(
            "_next_dt_set == 1",
            c.Block([
                c.Assign("_next_dt_set", "0"),
                c.Assign("particles[p].dt", "_next_dt")
            ]))
        body += [
            c.If(
                "res == SUCCESS || res == DELETE",
                c.Block([
                    c.Statement("particles[p].time += particles[p].dt"),
                    update_pdt, dt_pos, dt_0_break,
                    c.Statement("continue")
                ]),
                c.Block([
                    c.Statement(
                        "get_particle_backup(&particle_backup, &(particles[p]))"
                    ), dt_pos,
                    c.Statement("break")
                ]))
        ]

        time_loop = c.While("__dt > __tol || particles[p].dt == 0",
                            c.Block(body))
        part_loop = c.For(
            "p = 0", "p < num_particles", "++p",
            c.Block([sign_end_part, notstarted_continue, dt_pos, time_loop]))
        fbody = c.Block([
            c.Value("int", "p, sign_dt, sign_end_part"),
            c.Value("ErrorCode", "res"),
            c.Value("float", "__pdt_prekernels"),
            c.Value("double", "__dt, __tol"),
            c.Assign("__tol", "1.e-6"), sign_dt, particle_backup, part_loop
        ])
        fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args)
        ccode += [str(c.FunctionBody(fdecl, fbody))]
        return "\n\n".join(ccode)
示例#18
0
    def generate(self, funcname, field_args, const_args, kernel_ast,
                 c_include):
        ccode = []

        pname = self.ptype.name + 'p'

        # ==== Add include for Parcels and math header ==== #
        ccode += [str(c.Include("parcels.h", system=False))]
        #ccode += [str(c.Include("math.h", system=False))]     # removed by Lyc because it is already in parcels.h ???
        #ccode += [str(c.Include("stdbool.h", system=False))]  # added by Luc to accomodate crossdike.h booleans
        ccode += [str(c.Assign('double _next_dt', '0'))]
        ccode += [str(c.Assign('size_t _next_dt_set', '0'))]
        ccode += [
            str(
                c.Assign(
                    'const int ngrid',
                    str(self.fieldset.gridset.size if self.
                        fieldset is not None else 1)))
        ]

        # ==== Generate type definition for particle type ==== #
        vdeclp = [
            c.Pointer(c.POD(v.dtype, v.name)) for v in self.ptype.variables
        ]
        ccode += [
            str(c.Typedef(c.GenerableStruct("", vdeclp, declname=pname)))
        ]
        # Generate type definition for single particle type
        vdecl = [
            c.POD(v.dtype, v.name) for v in self.ptype.variables
            if v.dtype != np.uint64
        ]
        ccode += [
            str(
                c.Typedef(
                    c.GenerableStruct("", vdecl, declname=self.ptype.name)))
        ]

        args = [
            c.Pointer(c.Value(self.ptype.name, "particle_backup")),
            c.Pointer(c.Value(pname, "particles")),
            c.Value("int", "pnum")
        ]
        p_back_set_decl = c.FunctionDeclaration(
            c.Static(
                c.DeclSpecifier(c.Value("void", "set_particle_backup"),
                                spec='inline')), args)
        body = []
        for v in self.ptype.variables:
            if v.dtype != np.uint64 and v.name not in ['dt', 'state']:
                body += [
                    c.Assign(("particle_backup->%s" % v.name),
                             ("particles->%s[pnum]" % v.name))
                ]
        p_back_set_body = c.Block(body)
        p_back_set = str(c.FunctionBody(p_back_set_decl, p_back_set_body))
        ccode += [p_back_set]

        args = [
            c.Pointer(c.Value(self.ptype.name, "particle_backup")),
            c.Pointer(c.Value(pname, "particles")),
            c.Value("int", "pnum")
        ]
        p_back_get_decl = c.FunctionDeclaration(
            c.Static(
                c.DeclSpecifier(c.Value("void", "get_particle_backup"),
                                spec='inline')), args)
        body = []
        for v in self.ptype.variables:
            if v.dtype != np.uint64 and v.name not in ['dt', 'state']:
                body += [
                    c.Assign(("particles->%s[pnum]" % v.name),
                             ("particle_backup->%s" % v.name))
                ]
        p_back_get_body = c.Block(body)
        p_back_get = str(c.FunctionBody(p_back_get_decl, p_back_get_body))
        ccode += [p_back_get]

        update_next_dt_decl = c.FunctionDeclaration(
            c.Static(
                c.DeclSpecifier(c.Value("void", "update_next_dt"),
                                spec='inline')), [c.Value('double', 'dt')])
        if 'update_next_dt' in str(kernel_ast):
            body = []
            body += [c.Assign("_next_dt", "dt")]
            body += [c.Assign("_next_dt_set", "1")]
            update_next_dt_body = c.Block(body)
            update_next_dt = str(
                c.FunctionBody(update_next_dt_decl, update_next_dt_body))
            ccode += [update_next_dt]

        if c_include:
            ccode += [c_include]

        # ==== Insert kernel code ==== #
        ccode += [str(kernel_ast)]

        # Generate outer loop for repeated kernel invocation
        args = [
            c.Value("int", "num_particles"),
            c.Pointer(c.Value(pname, "particles")),
            c.Value("double", "endtime"),
            c.Value("double", "dt")
        ]
        for field, _ in field_args.items():
            args += [c.Pointer(c.Value("CField", "%s" % field))]
        for const, _ in const_args.items():
            args += [c.Value("double", const)]
        fargs_str = ", ".join(['particles->time[pnum]'] +
                              list(field_args.keys()) +
                              list(const_args.keys()))
        # ==== statement clusters use to compose 'body' variable and variables 'time_loop' and 'part_loop' ==== ##
        sign_dt = c.Assign("sign_dt", "dt > 0 ? 1 : -1")
        particle_backup = c.Statement("%s particle_backup" % self.ptype.name)
        sign_end_part = c.Assign(
            "sign_end_part", "(endtime - particles->time[pnum]) > 0 ? 1 : -1")
        reset_res_state = c.Assign("res", "particles->state[pnum]")
        update_state = c.Assign("particles->state[pnum]", "res")
        update_pdt = c.If(
            "_next_dt_set == 1",
            c.Block([
                c.Assign("_next_dt_set", "0"),
                c.Assign("particles->dt[pnum]", "_next_dt")
            ]))

        dt_pos = c.Assign(
            "__dt",
            "fmin(fabs(particles->dt[pnum]), fabs(endtime - particles->time[pnum]))"
        )  # original

        pdt_eq_dt_pos = c.Assign("__pdt_prekernels", "__dt * sign_dt")
        partdt = c.Assign("particles->dt[pnum]", "__pdt_prekernels")
        check_pdt = c.If(
            "(res == SUCCESS) & !is_equal_dbl(__pdt_prekernels, particles->dt[pnum])",
            c.Assign("res", "REPEAT"))

        dt_0_break = c.If("is_zero_dbl(particles->dt[pnum])",
                          c.Statement("break"))

        notstarted_continue = c.If(
            "(( sign_end_part != sign_dt) || is_close_dbl(__dt, 0) ) && !is_zero_dbl(particles->dt[pnum])",
            c.Block([
                c.If("fabs(particles->time[pnum]) >= fabs(endtime)",
                     c.Assign("particles->state[pnum]", "SUCCESS")),
                c.Statement("continue")
            ]))

        # ==== main computation body ==== #
        body = [
            c.Statement(
                "set_particle_backup(&particle_backup, particles, pnum)")
        ]
        body += [pdt_eq_dt_pos]
        body += [partdt]
        body += [
            c.Value("StatusCode", "state_prev"),
            c.Assign("state_prev", "particles->state[pnum]")
        ]
        body += [
            c.Assign("res", "%s(particles, pnum, %s)" % (funcname, fargs_str))
        ]
        body += [
            c.If("(res==SUCCESS) && (particles->state[pnum] != state_prev)",
                 c.Assign("res", "particles->state[pnum]"))
        ]
        body += [check_pdt]
        body += [
            c.If(
                "res == SUCCESS || res == DELETE",
                c.Block([
                    c.Statement(
                        "particles->time[pnum] += particles->dt[pnum]"),
                    update_pdt, dt_pos, sign_end_part,
                    c.If(
                        "(res != DELETE) && !is_close_dbl(__dt, 0) && (sign_dt == sign_end_part)",
                        c.Assign("res", "EVALUATE")),
                    c.If("sign_dt != sign_end_part",
                         c.Assign("__dt", "0")), update_state, dt_0_break
                ]),
                c.Block([
                    c.Statement(
                        "get_particle_backup(&particle_backup, particles, pnum)"
                    ), dt_pos, sign_end_part,
                    c.If("sign_dt != sign_end_part", c.Assign("__dt", "0")),
                    update_state,
                    c.Statement("break")
                ]))
        ]

        time_loop = c.While(
            "(particles->state[pnum] == EVALUATE || particles->state[pnum] == REPEAT) || is_zero_dbl(particles->dt[pnum])",
            c.Block(body))
        part_loop = c.For(
            "pnum = 0", "pnum < num_particles", "++pnum",
            c.Block([
                sign_end_part, reset_res_state, dt_pos, notstarted_continue,
                time_loop
            ]))
        fbody = c.Block([
            c.Value("int", "pnum, sign_dt, sign_end_part"),
            c.Value("StatusCode", "res"),
            c.Value("double", "__pdt_prekernels"),
            c.Value("double",
                    "__dt"),  # 1e-8 = built-in tolerance for np.isclose()
            sign_dt,
            particle_backup,
            part_loop
        ])
        fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args)
        ccode += [str(c.FunctionBody(fdecl, fbody))]
        return "\n\n".join(ccode)
示例#19
0
 def _cgen(self):
     decl = cgen.POD(self.typ, '')
     decl = cgen.Reference(decl)
     for i in range(0, self.dim):
         decl = cgen.Pointer(decl)
     return decl.inline(True)
示例#20
0
 def _cgen(self):
     return cgen.POD(self.typ, '').inline(True)
示例#21
0
def ConstPointerToConst(dtype, name, value):
    """Returns a cgen variable declaration & assignment of a constant pointer to
    a constant of type `dtype`
    """
    return c.Constant(c.Pointer(c.Const(c.POD(dtype, name))), value)