Пример #1
0
    def visit_Operator(self, o):
        blankline = c.Line("")

        # Kernel signature and body
        body = flatten(self._visit(i) for i in o.children)
        decls = self._args_decl(o.parameters)
        signature = c.FunctionDeclaration(c.Value(o.retval, o.name), decls)
        retval = [c.Line(), c.Statement("return 0")]
        kernel = c.FunctionBody(signature, c.Block(body + retval))

        # Elemental functions
        esigns = []
        efuncs = [blankline]
        for i in o._func_table.values():
            if i.local:
                esigns.append(c.FunctionDeclaration(c.Value(i.root.retval, i.root.name),
                                                    self._args_decl(i.root.parameters)))
                efuncs.extend([i.root.ccode, blankline])

        # Header files, extra definitions, ...
        header = [c.Define(*i) for i in o._headers] + [blankline]
        includes = [c.Include(i, system=(False if i.endswith('.h') else True))
                    for i in o._includes]
        includes += [blankline]
        cdefs = [i._C_typedecl for i in o.parameters if i._C_typedecl is not None]
        for i in o._func_table.values():
            if i.local:
                cdefs.extend([j._C_typedecl for j in i.root.parameters
                              if j._C_typedecl is not None])
        cdefs = filter_sorted(cdefs, key=lambda i: i.tpname)
        if o._compiler.src_ext == 'cpp':
            cdefs += [c.Extern('C', signature)]
        cdefs = [i for j in cdefs for i in (j, blankline)]

        return c.Module(header + includes + cdefs +
                        esigns + [blankline, kernel] + efuncs)
Пример #2
0
    def _alloc_array_on_high_bw_mem(self, obj, storage):
        """Allocate an Array in the high bandwidth memory."""
        if obj in storage._high_bw_mem:
            return

        size_trunkated = "".join("[%s]" % i for i in obj.symbolic_shape[1:])
        decl = c.Value(obj._C_typedata, "(*%s)%s" % (obj.name, size_trunkated))
        cast = "(%s (*)%s)" % (obj._C_typedata, size_trunkated)
        size_full = prod(obj.symbolic_shape)
        alloc = "%s acc_malloc(sizeof(%s[%s]))" % (cast, obj._C_typedata, size_full)
        init = c.Initializer(decl, alloc)

        free = c.Statement('acc_free(%s)' % obj.name)

        storage._high_bw_mem[obj] = (None, init, free)
Пример #3
0
 def _generate_lib_func(self):
     block = cgen.Block([
         self.loop_timer.get_cpp_pre_loop_code_ast(),
         self._components['LIB_OUTER_LOOP'],
         self.loop_timer.get_cpp_post_loop_code_ast()
     ])
     self._components['LIB_FUNC'] = cgen.FunctionBody(
         cgen.FunctionDeclaration(
             cgen.Value("void", self._components['LIB_NAME'])
         ,
             self._components['LIB_ARG_DECLS'] + \
                 self._components['KERNEL_LIB_ARG_DECLS']
         ),
             block
         )
Пример #4
0
    def generate(self, funcname, field_args, kernel_ast, adaptive=False):
        ccode = []

        # Add include for Parcels and math header
        ccode += [str(c.Include("parcels.h", system=False))]
        ccode += [str(c.Include("math.h", system=False))]

        # Generate type definition for particle type
        vdecl = [c.POD(dtype, var) for var, dtype in self.ptype.var_types.items()]
        ccode += [str(c.Typedef(c.GenerableStruct("", vdecl, declname=self.ptype.name)))]

        # Insert kernel code
        ccode += [str(kernel_ast)]

        # Generate outer loop for repeated kernel invocation
        args = [c.Value("int", "num_particles"),
                c.Pointer(c.Value(self.ptype.name, "particles")),
                c.Value("double", "endtime"), c.Value("float", "dt")]
        for field, _ in field_args.items():
            args += [c.Pointer(c.Value("CField", "%s" % field))]
        fargs_str = ", ".join(['particles[p].time', 'particles[p].dt'] + list(field_args.keys()))
        # Inner loop nest for forward runs
        dt_fwd = c.Statement("__dt = fmin(particles[p].dt, endtime - particles[p].time)")
        body_fwd = [c.Statement("res = %s(&(particles[p]), %s)" % (funcname, fargs_str)),
                    c.If("res == SUCCESS", c.Statement("particles[p].time += __dt")), dt_fwd]
        time_fwd = c.While("__dt > __tol", c.Block(body_fwd))
        part_fwd = c.For("p = 0", "p < num_particles", "++p", c.Block([dt_fwd, time_fwd]))
        # Inner loop nest for backward runs
        dt_bwd = c.Statement("__dt = fmax(particles[p].dt, endtime - particles[p].time)")
        body_bwd = [c.Statement("res = %s(&(particles[p]), %s)" % (funcname, fargs_str)),
                    c.If("res == SUCCESS", c.Statement("particles[p].time += __dt")), dt_bwd]
        time_bwd = c.While("__dt < -1. * __tol", c.Block(body_bwd))
        part_bwd = c.For("p = 0", "p < num_particles", "++p", c.Block([dt_bwd, time_bwd]))

        time_if = c.If("dt > 0.0", c.Block([part_fwd]), c.Block([part_bwd]))
        fbody = c.Block([c.Value("int", "p"), c.Value("KernelOp", "res"),
                         c.Value("double", "__dt, __tol"), c.Assign("__tol", "1.e-6"),
                         time_if])
        fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args)
        ccode += [str(c.FunctionBody(fdecl, fbody))]
        return "\n\n".join(ccode)
Пример #5
0
    def _generate_kernel_call(self):

        kernel_call = cgen.Module([cgen.Comment('#### Kernel call arguments ####')])
        kernel_call_symbols = []

        for i, dat in enumerate(self._dat_dict.items()):

            obj = dat[1][0]
            mode = dat[1][1]
            symbol = dat[0]

            if issubclass(type(obj), data.GlobalArrayClassic):
                kernel_call_symbols.append(symbol+'_c')           
            elif issubclass(type(obj), host._Array):
                kernel_call_symbols.append(symbol)
            elif issubclass(type(obj), host.Matrix):
                call_symbol = symbol + '_c'
                kernel_call_symbols.append(call_symbol)

                nc = str(obj.ncomp)
                _ishift = '+' + self._components['LIB_PAIR_INDEX_0'] + '*' + nc
                _jshift = '+' + self._components['LIB_PAIR_INDEX_1'] + '*' + nc

                if mode.write and obj.ncomp <= self._gather_size_limit:
                    isym = '&'+ symbol+'i[0]'
                else:
                    isym = symbol + _ishift
                jsym = symbol + _jshift
                g = cgen.Value('_'+symbol+'_t', call_symbol)
                g = cgen.Initializer(g, '{ ' + isym + ', ' + jsym + '}')

                kernel_call.append(g)

            else:
                raise RuntimeError("ERROR: Type not known")

        kernel_call.append(cgen.Comment('#### Kernel call ####'))

        kernel_call_symbols_s = ''
        for sx in kernel_call_symbols:
            kernel_call_symbols_s += sx +','
        kernel_call_symbols_s=kernel_call_symbols_s[:-1]

        kernel_call.append(cgen.Line(
            'k_'+self._kernel.name+'(' + kernel_call_symbols_s + ');'
        ))

        self._components['LIB_KERNEL_CALL'] = kernel_call
Пример #6
0
    def _alloc_array_on_high_bw_mem(self, site, obj, storage, *args):
        """
        Allocate an Array in the high bandwidth memory.
        """
        decl = "(*%s)%s" % (obj.name, "".join("[%s]" % i
                                              for i in obj.symbolic_shape[1:]))
        decl = c.Value(obj._C_typedata, decl)

        shape = "".join("[%s]" % i for i in obj.symbolic_shape)
        size = "sizeof(%s%s)" % (obj._C_typedata, shape)
        alloc = c.Statement(self.lang['alloc-host'](obj.name,
                                                    obj._data_alignment, size))

        free = c.Statement(self.lang['free-host'](obj.name))

        storage.update(obj, site, allocs=(decl, alloc), frees=free)
Пример #7
0
    def push_array_on_heap(self, obj):
        """Define an Array on the heap."""
        if obj in self.heap:
            return

        decl = "(*%s)%s" % (obj.name, "".join("[%s]" % i for i in obj.symbolic_shape[1:]))
        decl = c.Value(obj._C_typedata, decl)

        shape = "".join("[%s]" % i for i in obj.symbolic_shape)
        alloc = "posix_memalign((void**)&%s, %d, sizeof(%s%s))"
        alloc = alloc % (obj.name, obj._data_alignment, obj._C_typedata, shape)
        alloc = c.Statement(alloc)

        free = c.Statement('free(%s)' % obj.name)

        self.heap[obj] = (decl, alloc, free)
Пример #8
0
    def push_heap(self, obj):
        """Generate cgen objects to declare an Array and allocate/free its memory."""
        if obj in self.heap:
            return

        decl = "(*%s)%s" % (obj.name, "".join("[%s]" % i for i in obj.symbolic_shape[1:]))
        decl = c.Value(obj._C_typedata, decl)

        shape = "".join("[%s]" % i for i in obj.symbolic_shape)
        alloc = "posix_memalign((void**)&%s, %d, sizeof(%s%s))"
        alloc = alloc % (obj.name, obj._data_alignment, obj._C_typedata, shape)
        alloc = c.Statement(alloc)

        free = c.Statement('free(%s)' % obj.name)

        self.heap[obj] = (decl, alloc, free)
Пример #9
0
    def _alloc_array_on_high_bw_mem(self, site, obj, storage):
        """
        Allocate an Array in the high bandwidth memory.
        """
        decl = "(*%s)%s" % (obj.name, "".join("[%s]" % i
                                              for i in obj.symbolic_shape[1:]))
        decl = c.Value(obj._C_typedata, decl)

        shape = "".join("[%s]" % i for i in obj.symbolic_shape)
        alloc = "posix_memalign((void**)&%s, %d, sizeof(%s%s))"
        alloc = alloc % (obj.name, obj._data_alignment, obj._C_typedata, shape)
        alloc = c.Statement(alloc)

        free = c.Statement('free(%s)' % obj.name)

        storage.update(obj, site, allocs=(decl, alloc), frees=free)
Пример #10
0
    def ccode(self):
        """Generate C code for the represented stencil loop

        :returns: :class:`cgen.For` object representing the loop
        """
        forward = self.limits[1] >= self.limits[0]
        loop_body = cgen.Block([s.ccode for s in self.expressions])
        loop_init = cgen.InlineInitializer(cgen.Value("int", self.index),
                                           self.limits[0])
        loop_cond = '%s %s %s' % (self.index, '<' if forward else '>',
                                  self.limits[1])
        if self.limits[2] == 1:
            loop_inc = '%s%s' % (self.index, '++' if forward else '--')
        else:
            loop_inc = '%s %s %s' % (self.index, '+=' if forward else '-=',
                                     self.limits[2])
        return cgen.For(loop_init, loop_cond, loop_inc, loop_body)
Пример #11
0
    def __init__(self, dtype, name, args, body, template: tuple = None):
        self.name = name
        self.is_template = template is not None

        if self.is_template:
            self.name = f'{self.name}<{template[1]}>'

        self.value = c.Value(dtype, self.name)
        self.decl = c.FunctionDeclaration(self.value, args)

        if body is None:
            self.fnc = None
        else:
            self.fnc = c.FunctionBody(self.decl, body)

        self.modifiers = []
        self.constraints = []
Пример #12
0
    def _alloc_array_on_low_lat_mem(self, site, obj, storage):
        """
        Allocate an Array in the low latency memory.
        """
        shape = "".join("[%s]" % ccode(i) for i in obj.symbolic_shape)
        alignment = self.lang['aligned'](obj._data_alignment)
        decl = c.Value(obj._C_typedata,
                       "%s%s %s" % (obj._C_name, shape, alignment))

        if obj.initvalue is not None:
            storage.update(obj,
                           site,
                           allocs=c.Initializer(decl,
                                                ListInitializer(
                                                    obj.initvalue)))
        else:
            storage.update(obj, site, allocs=decl)
Пример #13
0
    def visit_Operator(self, o):
        # Generate the code for the cfile
        ccode = super().visit_Operator(o, mode='private')

        # Generate the code for the hfile
        typedecls = self._operator_typedecls(o, mode='public')
        guarded_typedecls = []
        for i in typedecls:
            guard = "DEVITO_%s" % i.tpname.upper()
            iflines = [c.Define(guard, ""), blankline, i, blankline]
            guarded_typedecl = c.IfNDef(guard, iflines, [])
            guarded_typedecls.extend([guarded_typedecl, blankline])

        decls = self._args_decl(o.parameters)
        signature = c.FunctionDeclaration(c.Value(o.retval, o.name), decls)
        hcode = c.Module(guarded_typedecls + [blankline, signature, blankline])

        return ccode, hcode
Пример #14
0
    def __init__(self,
                 dtype,
                 name,
                 args,
                 body,
                 template: tuple = None,
                 modifiers=''):
        self.dtype = dtype
        self.name = name
        self.value = c.Value(dtype, name)
        self.modifiers = modifiers
        self.is_template = template is not None
        decl = c.FunctionDeclaration(self.value, args)

        if self.is_template:
            decl = c.Template(template[0], decl)

        self.fnc = c.FunctionBody(decl, body)
Пример #15
0
    def push_heap(self, obj):
        """
        Generate cgen objects to declare, allocate memory, and free memory for
        ``obj``, of type :class:`Array`.
        """
        if obj in self.heap:
            return

        decl = "(*%s)%s" % (obj.name, "".join("[%s]" % i for i in obj.symbolic_shape[1:]))
        decl = c.Value(c.dtype_to_ctype(obj.dtype), decl)

        shape = "".join("[%s]" % i for i in obj.symbolic_shape)
        alloc = "posix_memalign((void**)&%s, 64, sizeof(%s%s))"
        alloc = alloc % (obj.name, c.dtype_to_ctype(obj.dtype), shape)
        alloc = c.Statement(alloc)

        free = c.Statement('free(%s)' % obj.name)

        self.heap[obj] = (decl, alloc, free)
Пример #16
0
    def _alloc_pointed_array_on_high_bw_mem(self, site, obj, storage):
        """
        Allocate the following objects in the high bandwidth memory:

            * The pointer array `obj`;
            * The pointee Array `obj.array`

        If the pointer array is defined over `sregistry.threadid`, that it a thread
        Dimension, then each `obj.array` slice is allocated and freed individually
        by the logically-owning thread.
        """
        # The pointer array
        decl = "**%s" % obj.name
        decl = c.Value(obj._C_typedata, decl)

        alloc0 = "posix_memalign((void**)&%s, %d, sizeof(%s*)*%s)"
        alloc0 = alloc0 % (obj.name, obj._data_alignment, obj._C_typedata,
                           obj.dim.symbolic_size)
        alloc0 = c.Statement(alloc0)

        free0 = c.Statement('free(%s)' % obj.name)

        # The pointee Array
        shape = "".join("[%s]" % i for i in obj.array.symbolic_shape)
        alloc1 = "posix_memalign((void**)&%s[%s], %d, sizeof(%s%s))"
        alloc1 = alloc1 % (obj.name, obj.dim.name, obj._data_alignment,
                           obj._C_typedata, shape)
        alloc1 = c.Statement(alloc1)

        free1 = c.Statement('free(%s[%s])' % (obj.name, obj.dim.name))

        if obj.dim is self.sregistry.threadid:
            storage.update(obj,
                           site,
                           allocs=(decl, alloc0),
                           frees=free0,
                           pallocs=(obj.dim, alloc1),
                           pfrees=(obj.dim, free1))
        else:
            storage.update(obj,
                           site,
                           allocs=(decl, alloc0, alloc1),
                           frees=(free0, free1))
Пример #17
0
 def _args_cast(self, args):
     """Build cgen type casts for an iterable of :class:`Argument`."""
     ret = []
     for i in args:
         if i.is_TensorArgument:
             align = "__attribute__((aligned(64)))"
             shape = ''.join(
                 ["[%s]" % ccode(j) for j in i.provider.symbolic_shape[1:]])
             lvalue = c.POD(i.dtype,
                            '(*restrict %s)%s %s' % (i.name, shape, align))
             rvalue = '(%s (*)%s) %s' % (c.dtype_to_ctype(
                 i.dtype), shape, '%s_vec' % i.name)
             ret.append(c.Initializer(lvalue, rvalue))
         elif i.is_PtrArgument:
             ctype = ctypes_to_C(i.dtype)
             lvalue = c.Pointer(c.Value(ctype, i.name))
             rvalue = '(%s*) %s' % (ctype, '_%s' % i.name)
             ret.append(c.Initializer(lvalue, rvalue))
     return ret
Пример #18
0
    def visit_FunctionDef(self, node):
        # Generate "ccode" attribute by traversing the Python AST
        for stmt in node.body:
            if not (hasattr(stmt, 'value')
                    and type(stmt.value) is ast.Str):  # ignore docstrings
                self.visit(stmt)

        # Create function declaration and argument list
        decl = c.Static(
            c.DeclSpecifier(c.Value("ErrorCode", node.name), spec='inline'))
        args = [
            c.Pointer(c.Value(self.ptype.name, "particle")),
            c.Value("double", "time"),
            c.Value("float", "dt")
        ]
        for field_name, field in self.field_args.items():
            if field_name != 'UV':
                args += [c.Pointer(c.Value("CField", "%s" % field_name))]
        for field_name, field in self.field_args.items():
            if field_name == 'UV':
                fieldset = field.fieldset
                for f in ['U', 'V', 'cosU', 'sinU', 'cosV', 'sinV']:
                    try:
                        getattr(fieldset, f)
                        if f not in self.field_args:
                            args += [c.Pointer(c.Value("CField", "%s" % f))]
                    except:
                        if fieldset.U.grid.gtype in [
                                GridCode.CurvilinearZGrid,
                                GridCode.CurvilinearSGrid
                        ]:
                            raise RuntimeError(
                                "cosU, sinU, cosV and sinV fields must be defined for a proper rotation of U, V fields in curvilinear grids"
                            )
        for const, _ in self.const_args.items():
            args += [c.Value("float", const)]

        # Create function body as C-code object
        body = [
            stmt.ccode for stmt in node.body
            if not (hasattr(stmt, 'value') and type(stmt.value) is ast.Str)
        ]
        body += [c.Statement("return SUCCESS")]
        node.ccode = c.FunctionBody(c.FunctionDeclaration(decl, args),
                                    c.Block(body))
Пример #19
0
    def eval(self, generator):
        if isinstance(self.base, MessageBox):
            methods, inherits = base_methods(self.base.base,
                                             self.base.name.eval())

            return CSharpClass(f'{self.base.name.eval()}{inherits}', methods,
                               [], [],
                               WrapStruct(self.base.block).eval(generator), [],
                               [])

        elif isinstance(self.base, BlockBox):
            return [
                WrapStruct(x).eval(generator) for x in self.base.fields.eval()
            ]

        elif isinstance(self.base, DeclarationBox):
            dtype = self.base.dtype.eval(generator)
            if self.base.optional:
                dtype = f'Optional<{dtype}>'
            return c.Value(dtype, self.base.name.eval())
Пример #20
0
    def visit_FunctionDef(self, node):
        # Generate "ccode" attribute by traversing the Python AST
        for stmt in node.body:
            if not (hasattr(stmt, 'value')
                    and type(stmt.value) is ast.Str):  # ignore docstrings
                self.visit(stmt)

        # Create function declaration and argument list
        decl = c.Static(
            c.DeclSpecifier(c.Value("ErrorCode", node.name), spec='inline'))
        args = [
            c.Pointer(c.Value(self.ptype.name, "particle")),
            c.Value("double", "time"),
            c.Value("float", "dt")
        ]
        for field_name, field in self.field_args.items():
            args += [c.Pointer(c.Value("CField", "%s" % field_name))]
        for field_name, field in self.vector_field_args.items():
            fieldset = field.fieldset
            Wname = field.W.name if field.W else 'not_defined'
            for f in [field.U.name, field.V.name, Wname]:
                try:
                    # Next line will break for example if field.U was created but not added to the fieldset
                    getattr(fieldset, f)
                    if f not in self.field_args:
                        args += [c.Pointer(c.Value("CField", "%s" % f))]
                except:
                    if f != Wname:
                        raise RuntimeError(
                            "Field %s needed by a VectorField but it does not exist"
                            % f)
                    else:
                        pass
        for const, _ in self.const_args.items():
            args += [c.Value("float", const)]

        # Create function body as C-code object
        body = [
            stmt.ccode for stmt in node.body
            if not (hasattr(stmt, 'value') and type(stmt.value) is ast.Str)
        ]
        body += [c.Statement("return SUCCESS")]
        node.ccode = c.FunctionBody(c.FunctionDeclaration(decl, args),
                                    c.Block(body))
Пример #21
0
    def generate(self, py_ast, funcvars):
        # Untangle Pythonic tuple-assignment statements
        py_ast = TupleSplitter().visit(py_ast)

        # Replace occurences of intrinsic objects in Python AST
        transformer = IntrinsicTransformer(self.grid, self.ptype)
        py_ast = transformer.visit(py_ast)

        # Generate C-code for all nodes in the Python AST
        self.visit(py_ast)
        self.ccode = py_ast.ccode

        # Insert variable declarations for non-instrinsics
        for kvar in self.kernel_vars + self.array_vars:
            if kvar in funcvars:
                funcvars.remove(kvar)
        if len(funcvars) > 0:
            self.ccode.body.insert(0, c.Value("float", ", ".join(funcvars)))

        return self.ccode
Пример #22
0
 def visit_Assign(self, node):
     self.visit(node.targets[0])
     self.visit(node.value)
     if isinstance(node.value, ast.List):
         # Detect in-place initialisation of multi-dimensional arrays
         tmp_node = node.value
         decl = c.Value('float', node.targets[0].id)
         while isinstance(tmp_node, ast.List):
             decl = c.ArrayOf(decl, len(tmp_node.elts))
             if isinstance(tmp_node.elts[0], ast.List):
                 # Check type and dimension are the same
                 if not all(isinstance(e, ast.List) for e in tmp_node.elts):
                     raise TypeError("Non-list element discovered in array declaration")
                 if not all(len(e.elts) == len(tmp_node.elts[0].elts) for e in tmp_node.elts):
                     raise TypeError("Irregular array length not allowed in array declaration")
             tmp_node = tmp_node.elts[0]
         node.ccode = c.Initializer(decl, node.value.ccode)
         self.array_vars += [node.targets[0].id]
     else:
         node.ccode = c.Assign(node.targets[0].ccode, node.value.ccode)
Пример #23
0
    def map_Subroutine(self, node):
        assert not node.prefix
        assert not hasattr(node, "suffix")

        scope = Scope(node.name, list(node.args))
        self.scope_stack.append(scope)

        body = self.map_statement_list(node.content)

        pre_func_decl, in_func_decl = self.get_declarations()
        body = in_func_decl + [cgen.Line()] + body

        if isinstance(body[-1], cgen.Statement) and body[-1].text == "return":
            body.pop()

        def get_arg_decl(arg_idx, arg_name):
            decl = self.get_declarator(arg_name)

            if self.arg_needs_pointer(node.name, arg_idx):
                hint = self.addr_space_hints.get((node.name, arg_name))
                if hint:
                    decl = hint(cgen.Pointer(decl))
                else:
                    if self.use_restrict_pointers:
                        decl = cgen.RestrictPointer(decl)
                    else:
                        decl = cgen.Pointer(decl)

            return decl

        result = cgen.FunctionBody(
            cgen.FunctionDeclaration(
                cgen.Value("void", node.name),
                [get_arg_decl(i, arg) for i, arg in enumerate(node.args)]),
            cgen.Block(body))

        self.scope_stack.pop()
        if pre_func_decl:
            return pre_func_decl + [cgen.Line(), result]
        else:
            return result
Пример #24
0
    def _alloc_pointed_array_on_high_bw_mem(self, site, obj, storage):
        """
        Allocate the following objects in the high bandwidth memory:

            * The pointer array `obj`;
            * The pointee Array `obj.array`

        If the pointer array is defined over `sregistry.threadid`, that is a thread
        Dimension, then each `obj.array` slice is allocated and freed individually
        by the owner thread.
        """
        # The pointer array
        decl = "**%s" % obj.name
        decl = c.Value(obj._C_typedata, decl)

        size = 'sizeof(%s*)*%s' % (obj._C_typedata, obj.dim.symbolic_size)
        alloc0 = c.Statement(self.lang['alloc-host'](obj.name,
                                                     obj._data_alignment,
                                                     size))
        free0 = c.Statement(self.lang['free-host'](obj.name))

        # The pointee Array
        pobj = '%s[%s]' % (obj.name, obj.dim.name)
        shape = "".join("[%s]" % i for i in obj.array.symbolic_shape)
        size = "sizeof(%s%s)" % (obj._C_typedata, shape)
        alloc1 = c.Statement(self.lang['alloc-host'](pobj, obj._data_alignment,
                                                     size))
        free1 = c.Statement(self.lang['free-host'](pobj))

        if obj.dim is self.sregistry.threadid:
            storage.update(obj,
                           site,
                           allocs=(decl, alloc0),
                           frees=free0,
                           pallocs=(obj.dim, alloc1),
                           pfrees=(obj.dim, free1))
        else:
            storage.update(obj,
                           site,
                           allocs=(decl, alloc0, alloc1),
                           frees=(free0, free1))
Пример #25
0
    def _alloc_array_on_high_bw_mem(self, site, obj, storage):
        if obj._mem_mapped:
            # posix_memalign + copy-to-device
            super()._alloc_array_on_high_bw_mem(site, obj, storage)
        else:
            # acc_malloc -- the Array only resides on the device, ie, it never
            # needs to be accessed on the host
            assert obj._mem_default
            size_trunkated = "".join("[%s]" % i
                                     for i in obj.symbolic_shape[1:])
            decl = c.Value(obj._C_typedata,
                           "(*%s)%s" % (obj.name, size_trunkated))
            cast = "(%s (*)%s)" % (obj._C_typedata, size_trunkated)
            size_full = "sizeof(%s[%s])" % (obj._C_typedata,
                                            prod(obj.symbolic_shape))
            alloc = "%s %s" % (cast, self.lang['device-alloc'](size_full))
            init = c.Initializer(decl, alloc)

            free = c.Statement(self.lang['device-free'](obj.name))

            storage.update(obj, site, allocs=init, frees=free)
Пример #26
0
    def eval(self, generator):
        if isinstance(self.base, MessageBox):
            inherits = ''
            if self.base.base.name:
                inherits = f' : public {self.base.base.name.eval()}'
                if self.base.base.template:
                    inherits = f'{inherits}<{self.base.base.template.eval()}>'

            return c.Struct(f'{self.base.name.eval()}{inherits}',
                            WrapStruct(self.base.block).eval(generator))

        elif isinstance(self.base, BlockBox):
            return [
                WrapStruct(x).eval(generator) for x in self.base.fields.eval()
            ]

        elif isinstance(self.base, DeclarationBox):
            dtype = self.base.dtype.eval(generator)
            if self.base.optional:
                dtype = f'std::optional<{dtype}>'
            return c.Value(dtype, self.base.name.eval())
Пример #27
0
    def _alloc_array_on_high_bw_mem(self, site, obj, storage):
        """
        Allocate an Array in the high bandwidth memory.
        """
        if obj._mem_mapped:
            # posix_memalign + copy-to-device
            super()._alloc_array_on_high_bw_mem(site, obj, storage)
        else:
            # acc_malloc -- the Array only resides on the device, ie, it never
            # needs to be accessed on the host
            assert obj._mem_default
            size_trunkated = "".join("[%s]" % i for i in obj.symbolic_shape[1:])
            decl = c.Value(obj._C_typedata, "(*%s)%s" % (obj.name, size_trunkated))
            cast = "(%s (*)%s)" % (obj._C_typedata, size_trunkated)
            size_full = prod(obj.symbolic_shape)
            alloc = "%s acc_malloc(sizeof(%s[%s]))" % (cast, obj._C_typedata, size_full)
            init = c.Initializer(decl, alloc)

            free = c.Statement('acc_free(%s)' % obj.name)

            storage.update(obj, site, allocs=init, frees=free)
Пример #28
0
    def _alloc_array_on_high_bw_mem(self, site, obj, storage):
        if obj._mem_mapped:
            super()._alloc_array_on_high_bw_mem(site, obj, storage)
        else:
            # E.g., use `acc_malloc` or `omp_target_alloc` -- the Array only resides
            # on the device as it never needs to be accessed on the host
            assert obj._mem_default
            decl = c.Value(obj._C_typedata, "*%s" % obj._C_name)
            size = "sizeof(%s[%s])" % (obj._C_typedata, prod(
                obj.symbolic_shape))

            deviceid = self.lang['device-get']
            doalloc = self.lang['device-alloc']
            dofree = self.lang['device-free']

            alloc = "(%s*) %s" % (obj._C_typedata, doalloc(size, deviceid))
            init = c.Initializer(decl, alloc)

            free = c.Statement(dofree(obj._C_name, deviceid))

            storage.update(obj, site, allocs=init, frees=free)
Пример #29
0
    def _generate_kernel_gather(self):

        kernel_gather = cgen.Module(
            [cgen.Comment('#### Pre kernel gather ####')])

        if self._kernel.static_args is not None:

            for i, dat in enumerate(self._kernel.static_args.items()):
                pass

        for i, dat in enumerate(self._dat_dict.items()):

            if issubclass(type(dat[1][0]), host._Array):
                pass
            elif issubclass(type(dat[1][0]), host.Matrix) \
                    and dat[1][1].write \
                    and dat[1][0].ncomp <= self._gather_size_limit:

                isym = dat[0] + 'i'
                nc = dat[1][0].ncomp
                ncb = '[' + str(nc) + ']'
                dtype = host.ctypes_map[dat[1][0].dtype]

                t = '{'
                for tx in range(nc):
                    t += '*(' + dat[0] + '+' + self._components[
                        'LIB_PAIR_INDEX_0']
                    t += '*' + str(nc) + '+' + str(tx) + '),'
                t = t[:-1] + '}'

                g = cgen.Value(dtype, isym + ncb)
                '''
                if not dat[1][1].write:
                    g = cgen.Const(g)
                '''
                g = cgen.Initializer(g, t)

                kernel_gather.append(g)

        self._components['LIB_KERNEL_GATHER'] = kernel_gather
Пример #30
0
    def _generate_kernel_call(self):

        kernel_call = cgen.Module(
            [cgen.Comment('#### Kernel call arguments ####')])
        kernel_call_symbols = []
        if self._kernel.static_args is not None:
            for i, dat in enumerate(self._kernel.static_args.items()):
                kernel_call_symbols.append(dat[0])

        for i, dat in enumerate(self._dat_dict.items()):
            if issubclass(type(dat[1][0]), host._Array):
                kernel_call_symbols.append(dat[0])
            elif issubclass(type(dat[1][0]), host.Matrix):
                call_symbol = dat[0] + '_c'
                kernel_call_symbols.append(call_symbol)

                nc = str(dat[1][0].ncomp)
                _ishift = '+' + self._components['LIB_PAIR_INDEX_0'] + '*' + nc

                isym = dat[0] + _ishift
                g = cgen.Value('_' + dat[0] + '_t', call_symbol)
                g = cgen.Initializer(g, '{ ' + isym + '}')

                kernel_call.append(g)

            else:
                raise RuntimeError("ERROR: Type not known")

        kernel_call.append(cgen.Comment('#### Kernel call ####'))

        kernel_call_symbols_s = ''
        for sx in kernel_call_symbols:
            kernel_call_symbols_s += sx + ','
        kernel_call_symbols_s = kernel_call_symbols_s[:-1]

        kernel_call.append(
            cgen.Line('k_' + self._kernel.name + '(' + kernel_call_symbols_s +
                      ');'))

        self._components['LIB_KERNEL_CALL'] = kernel_call