示例#1
0
    def _generate_kernel_scatter(self):
        kernel_scatter = cgen.Module(
            [cgen.Comment('#### Post kernel scatter ####')])

        ci = self._components['LIB_CELL_INDEX_0']

        inner_l = []
        src_sym = '_sgpx'
        dst_sym = '_shpx'
        # add dats to omp shared and init global array reduction
        for i, dat in enumerate(self._dat_dict.items()):

            obj = dat[1][0]
            mode = dat[1][1]
            symbol = dat[0]

            if issubclass(type(obj), data.ParticleDat) and mode.write:
                tsym = self._components['PARTICLE_DAT_PARTITION'].idict[symbol]
                inner_l.append(
                    DSLStrideScatter(tsym, symbol, obj.ncomp, dst_sym, src_sym,
                                     self._components['CCC_MAX']))

        inner_l.append(cgen.Line(dst_sym + '++;'))
        inner = cgen.Module(inner_l)
        g = self._components['CELL_LIST_ITER'](src_sym, ci, inner)

        kernel_scatter.append(
            cgen.Initializer(cgen.Value('INT64', dst_sym), '0'))
        kernel_scatter.append(g)

        self._components['LIB_KERNEL_SCATTER'] = kernel_scatter
示例#2
0
    def _dump_storage(self, iet, storage):
        mapper = {}
        for k, v in storage.items():
            # Expr -> LocalExpr ?
            if k.is_Expression:
                mapper[k] = v
                continue

            # allocs/pallocs
            allocs = flatten(v.allocs)
            for tid, body in as_mapper(v.pallocs, itemgetter(0), itemgetter(1)).items():
                header = self.lang.Region._make_header(tid.symbolic_size)
                init = c.Initializer(c.Value(tid._C_typedata, tid.name),
                                     self.lang['thread-num'])
                allocs.append(c.Module((header, c.Block([init] + body))))
            if allocs:
                allocs.append(c.Line())

            # frees/pfrees
            frees = []
            for tid, body in as_mapper(v.pfrees, itemgetter(0), itemgetter(1)).items():
                header = self.lang.Region._make_header(tid.symbolic_size)
                init = c.Initializer(c.Value(tid._C_typedata, tid.name),
                                     self.lang['thread-num'])
                frees.append(c.Module((header, c.Block([init] + body))))
            frees.extend(flatten(v.frees))
            if frees:
                frees.insert(0, c.Line())

            mapper[k] = k._rebuild(body=List(header=allocs, body=k.body, footer=frees),
                                   **k.args_frozen)

        processed = Transformer(mapper, nested=True).visit(iet)

        return processed
示例#3
0
    def _generate_lib_inner_loop_block(self):
        # generate j gather
        #'J_GATHER'
        cj = self._components['LIB_CELL_INDEX_1']

        j_gather = cgen.Module([
            cgen.Comment('#### Pre kernel j gather ####'),
        ])

        inner_l = []
        src_sym = '_tmp_jgpx'
        dst_sym = self._components['CCC_1']

        # add dats to omp shared and init global array reduction
        for i, dat in enumerate(self._dat_dict.items()):

            obj = dat[1][0]
            mode = dat[1][1]
            symbol = dat[0]

            if issubclass(type(obj), data.ParticleDat):
                tsym = self._components['PARTICLE_DAT_PARTITION'].jdict[symbol]
                inner_l.append(
                    DSLStrideGather(symbol, tsym, obj.ncomp, src_sym, dst_sym,
                                    self._components['CCC_MAX']))

        inner_l.append(cgen.Line(dst_sym + '++;'))

        inner = cgen.Module(inner_l)
        g = self._components['CELL_LIST_ITER'](src_sym, cj, inner)

        j_gather.append(cgen.Initializer(cgen.Value('INT64', dst_sym), '0'))
        j_gather.append(g)

        self._components['J_GATHER'] = j_gather
示例#4
0
    def visit_Iteration(self, o):
        body = flatten(self._visit(i) for i in o.children)

        _min = o.limits[0]
        _max = o.limits[1]

        # For backward direction flip loop bounds
        if o.direction == Backward:
            loop_init = 'int %s = %s' % (o.index, ccode(_max))
            loop_cond = '%s >= %s' % (o.index, ccode(_min))
            loop_inc = '%s -= %s' % (o.index, o.limits[2])
        else:
            loop_init = 'int %s = %s' % (o.index, ccode(_min))
            loop_cond = '%s <= %s' % (o.index, ccode(_max))
            loop_inc = '%s += %s' % (o.index, o.limits[2])

        # Append unbounded indices, if any
        if o.uindices:
            uinit = ['%s = %s' % (i.name, ccode(i.symbolic_min)) for i in o.uindices]
            loop_init = c.Line(', '.join([loop_init] + uinit))

            ustep = []
            for i in o.uindices:
                op = '=' if i.is_Modulo else '+='
                ustep.append('%s %s %s' % (i.name, op, ccode(i.symbolic_incr)))
            loop_inc = c.Line(', '.join([loop_inc] + ustep))

        # Create For header+body
        handle = c.For(loop_init, loop_cond, loop_inc, c.Block(body))

        # Attach pragmas, if any
        if o.pragmas:
            handle = c.Module(o.pragmas + (handle,))

        return handle
示例#5
0
    def _generate_kernel_call(self):

        kernel_call = cgen.Module(
            [cgen.Comment('#### Kernel call arguments ####')])
        kernel_call_symbols = []
        if self._kernel.static_args is not None:
            for i, dat in enumerate(self._kernel.static_args.items()):
                kernel_call_symbols.append(dat[0])

        for i, dat in enumerate(self._dat_dict.items()):

            obj = dat[1][0]
            mode = dat[1][1]
            symbol = dat[0]

            g = self._components['PARTICLE_DAT_C'][symbol]
            kernel_call_symbols.append(g.kernel_arg)
            kernel_call.append(g.kernel_create_j_arg)
            self._components['KERNEL_GATHER'] += g.kernel_create_i_arg
            self._components['KERNEL_SCATTER'] += g.kernel_create_i_scatter

        kernel_call.append(cgen.Comment('#### Kernel call ####'))

        kernel_call_symbols_s = ''
        for sx in kernel_call_symbols:
            kernel_call_symbols_s += sx + ','
        kernel_call_symbols_s = kernel_call_symbols_s[:-1]

        kernel_call.append(
            cgen.Line('k_' + self._kernel.name + '(' + kernel_call_symbols_s +
                      ');'))

        self._components['LIB_KERNEL_CALL'] = kernel_call
示例#6
0
    def visit_Operator(self, o):
        blankline = c.Line("")

        # Kernel signature and body
        body = flatten(self._visit(i) for i in o.children)
        decls = self._args_decl(o.parameters)
        signature = c.FunctionDeclaration(c.Value(o.retval, o.name), decls)
        retval = [c.Statement("return 0")]
        kernel = c.FunctionBody(signature, c.Block(body + retval))

        # Elemental functions
        esigns = []
        efuncs = [blankline]
        for i in o._func_table.values():
            if i.local:
                esigns.append(
                    c.FunctionDeclaration(c.Value(i.root.retval, i.root.name),
                                          self._args_decl(i.root.parameters)))
                efuncs.extend([i.root.ccode, blankline])

        # Header files, extra definitions, ...
        header = [c.Line(i) for i in o._headers]
        includes = [c.Include(i, system=False) for i in o._includes]
        includes += [blankline]
        cdefs = [
            i._C_typedecl for i in o.parameters if i._C_typedecl is not None
        ]
        cdefs = filter_sorted(cdefs, key=lambda i: i.tpname)
        if o._compiler.src_ext == 'cpp':
            cdefs += [c.Extern('C', signature)]
        cdefs = [i for j in cdefs for i in (j, blankline)]

        return c.Module(header + includes + cdefs + esigns +
                        [blankline, kernel] + efuncs)
示例#7
0
 def visit_AugmentedExpression(self, o):
     code = c.Statement("%s %s= %s" %
                        (ccode(o.expr.lhs, dtype=o.dtype), o.op,
                         ccode(o.expr.rhs, dtype=o.dtype)))
     if o.pragmas:
         code = c.Module(list(o.pragmas) + [code])
     return code
示例#8
0
 def get_cpp_pre_loop_code_ast(self):
     """
     Return the code to place before the loop.
     """
     _s = 'std::chrono::high_resolution_clock::time_point _loop_timer_t0 ='\
          ' std::chrono::high_resolution_clock::now(); \n'
     return cgen.Module([cgen.Line(_s)])
示例#9
0
    def _generate_lib_outer_loop(self):

        block = cgen.Block([self._components['LIB_KERNEL_CALL']])

        i = self._components['LIB_PAIR_INDEX_0']

        shared = ''
        for sx in self._components['OMP_SHARED_SYMS']:
            shared += sx + ','
        shared = shared[:-1]

        pragma = cgen.Pragma('omp parallel default(none) shared(' + shared +
                             ')')

        parallel_region = cgen.Block((
            cgen.Value('int',
                       '_thread_start'), cgen.Value('int', '_thread_end'),
            cgen.Line(
                'get_thread_decomp((int)_N_LOCAL, &_thread_start, &_thread_end);'
            ),
            cgen.For('int ' + i + '= _thread_start', i + '< _thread_end',
                     i + '++', block)))

        loop = cgen.Module([
            cgen.Line('omp_set_num_threads(_NUM_THREADS);'), pragma,
            parallel_region
        ])

        self._components['LIB_OUTER_LOOP'] = loop
示例#10
0
def gen_op(op_name, attrs, inputs, outputs,
           output_shapes, kernel_class_name,
           kernel_header):
    # Add the headers into the module
    contents = []
    contents.append(c.Include("tensorflow/core/framework/op.h", system = False))
    contents.append(c.Include("tensorflow/core/framework/shape_inference.h", system = False))
    contents.append(c.Include("tensorflow/core/framework/op_kernel.h", system = False))
    contents.append(c.Include(kernel_header, system = False))

    # Name space declarations
    contents.append(c.Line())
    contents.append(c.Statement("using namespace tensorflow"))
    contents.append(c.Line())

    shape_fn = gen_shape_fn(output_shapes);

    # Registration macro
    reg_macro = gen_reg_op_macro_str(op_name, attrs, inputs, outputs, shape_fn)
    contents.append(c.Statement(reg_macro))

    contents.append(c.Line())
    class_defn = gen_op_kernel_class_defn(kernel_class_name)
    contents.extend(class_defn)

    contents.append(c.Line())
    kernel_build_macro = gen_kernel_build_macro(op_name, kernel_class_name)
    contents.append(c.Statement(kernel_build_macro))

    return c.Module(contents)
示例#11
0
    def visit_Operator(self, o, mode='all'):
        # Kernel signature and body
        body = flatten(self._visit(i) for i in o.children)
        decls = self._args_decl(o.parameters)
        signature = c.FunctionDeclaration(c.Value(o.retval, o.name), decls)
        retval = [c.Line(), c.Statement("return 0")]
        kernel = c.FunctionBody(signature, c.Block(body + retval))

        # Elemental functions
        esigns = []
        efuncs = [blankline]
        for i in o._func_table.values():
            if i.local:
                prefix = ' '.join(i.root.prefix + (i.root.retval, ))
                esigns.append(
                    c.FunctionDeclaration(c.Value(prefix, i.root.name),
                                          self._args_decl(i.root.parameters)))
                efuncs.extend([self._visit(i.root), blankline])

        # Definitions
        headers = [c.Define(*i) for i in o._headers] + [blankline]

        # Header files
        includes = self._operator_includes(o) + [blankline]

        # Type declarations
        typedecls = self._operator_typedecls(o, mode)
        if mode in ('all', 'public') and o._compiler.src_ext in ('cpp', 'cu'):
            typedecls.append(c.Extern('C', signature))
        typedecls = [i for j in typedecls for i in (j, blankline)]

        return c.Module(headers + includes + typedecls + esigns +
                        [blankline, kernel] + efuncs)
示例#12
0
    def _generate_lib_outer_loop(self):

        block = cgen.Block([self._components['LIB_KERNEL_GATHER'],
                            self._components['LIB_INNER_LOOP'],
                            self._components['LIB_KERNEL_SCATTER']])

        i = self._components['LIB_PAIR_INDEX_0']

        shared = ''
        for sx in self._components['OMP_SHARED_SYMS']:
            shared+= sx+','
        shared = shared[:-1]
        pragma = cgen.Pragma('omp parallel for schedule(static) // default(shared) shared(' + shared + ')')
        if runtime.OMP_NUM_THREADS is None:
            pragma = cgen.Comment(pragma)

        loop = cgen.Module([
            cgen.Line('omp_set_num_threads(_NUM_THREADS);'),
            pragma,
            cgen.For('int ' + i + '=0',
                    i + '<_N_LOCAL',
                    i+'++',
                    block)
        ])

        self._components['LIB_OUTER_LOOP'] = loop
示例#13
0
    def _generate_kernel_scatter(self):

        kernel_scatter = cgen.Module(
            [cgen.Comment('#### Post kernel scatter ####')])

        if self._kernel.static_args is not None:

            for i, dat in enumerate(self._kernel.static_args.items()):
                pass

        for i, dat in enumerate(self._dat_dict.items()):

            if issubclass(type(dat[1][0]), host._Array):
                pass
            elif issubclass(type(dat[1][0]), host.Matrix)\
                    and dat[1][1].write\
                    and dat[1][0].ncomp <= self._gather_size_limit:

                isym = dat[0] + 'i'
                nc = dat[1][0].ncomp
                ncb = '[' + str(nc) + ']'
                dtype = host.ctypes_map[dat[1][0].dtype]
                ix = self._components['LIB_PAIR_INDEX_0']

                b = cgen.Assign(dat[0] + '[' + str(nc) + '*' + ix + '+_tx]',
                                isym + '[_tx]')
                g = cgen.For('int _tx=0', '_tx<' + str(nc), '_tx++',
                             cgen.Block([b]))

                kernel_scatter.append(g)

        self._components['LIB_KERNEL_SCATTER'] = kernel_scatter
示例#14
0
 def generate(self):
     """Generate (i.e. yield) the source code of the
     module line-by-line.
     """
     body = []
     body += (self.preamble + [cgen.Line()] + self.body)
     return cgen.Module(body)
示例#15
0
    def _generate_kernel_arg_decls(self):

        _kernel_arg_decls = []
        _kernel_lib_arg_decls = []
        _kernel_structs = cgen.Module(
            [cgen.Comment('#### Structs generated per ParticleDat ####')])

        if self._kernel.static_args is not None:

            for i, dat in enumerate(self._kernel.static_args.items()):
                _kernel_arg_decls.append(
                    cgen.Const(cgen.Value(host.ctypes_map[dat[1]], dat[0])))

        for i, dat in enumerate(self._dat_dict.items()):

            assert type(dat[1]) is tuple, "Access descriptors not found"

            kernel_lib_arg = cgen.Pointer(
                cgen.Value(host.ctypes_map[dat[1][0].dtype],
                           Restrict(self._cc.restrict_keyword, dat[0])))

            # print host.ctypes_map[dat[1][0].dtype], dat[1][0].dtype

            if issubclass(type(dat[1][0]), host._Array):
                kernel_arg = cgen.Pointer(
                    cgen.Value(host.ctypes_map[dat[1][0].dtype],
                               Restrict(self._cc.restrict_keyword, dat[0])))
                if not dat[1][1].write:
                    kernel_arg = cgen.Const(kernel_arg)

                _kernel_arg_decls.append(kernel_arg)

            elif issubclass(type(dat[1][0]), host.Matrix):
                # MAKE STRUCT TYPE
                dtype = dat[1][0].dtype
                ti = cgen.Pointer(
                    cgen.Value(ctypes_map(dtype),
                               Restrict(self._cc.restrict_keyword, 'i')))
                tj = cgen.Pointer(
                    cgen.Value(ctypes_map(dtype),
                               Restrict(self._cc.restrict_keyword, 'j')))
                if not dat[1][1].write:
                    ti = cgen.Const(ti)
                    tj = cgen.Const(tj)
                typename = '_' + dat[0] + '_t'
                _kernel_structs.append(
                    cgen.Typedef(cgen.Struct('', [ti, tj], typename)))

                # MAKE STRUCT ARG
                _kernel_arg_decls.append(cgen.Value(typename, dat[0]))

            if not dat[1][1].write:
                kernel_lib_arg = cgen.Const(kernel_lib_arg)

            _kernel_lib_arg_decls.append(kernel_lib_arg)

        self._components['KERNEL_ARG_DECLS'] = _kernel_arg_decls
        self._components['KERNEL_LIB_ARG_DECLS'] = _kernel_lib_arg_decls
        self._components['KERNEL_STRUCT_TYPEDEFS'] = _kernel_structs
示例#16
0
 def _generate_lib_src(self):
     self._components['LIB_SRC'] = cgen.Module([
         self._components['KERNEL_STRUCT_TYPEDEFS'],
         cgen.Comment('#### Kernel function ####'),
         self._components['KERNEL_FUNC'],
         cgen.Comment('#### Library function ####'),
         self._components['LIB_FUNC']
     ])
示例#17
0
 def _generate_lib_inner_loop(self):
     i = self._components['LIB_PAIR_INDEX_0']
     j = self._components['LIB_PAIR_INDEX_1']
     b = self._components['LIB_INNER_LOOP_BLOCK']
     self._components['LIB_INNER_LOOP'] = cgen.Module([
         cgen.For('int ' + j + '=0', j + '<' + i, j + '++', b),
         cgen.For('int ' + j + '=1+' + i, j + '< _N_LOCAL', j + '++', b),
     ])
示例#18
0
    def _generate_kernel_headers(self):
        s = []
        if self._kernel.headers is not None:
            for x in self._kernel.headers:
                s.append(x.ast)

        s.append(self.loop_timer.get_cpp_headers_ast())
        self._components['KERNEL_HEADERS'] = cgen.Module(s)
示例#19
0
    def generate(self):
        """@todo: Docstring for generate.
        :returns: @todo

        """
        return c.Module(
            [DGEMV_SRC] +
            [self.generate_optimmat_code(pos) for pos in range(self._sites)])
示例#20
0
 def visit_Section(self, o):
     body = flatten(self._visit(i) for i in o.children)
     if o.is_subsection:
         header = []
         footer = []
     else:
         header = [c.Comment("Begin %s" % o.name)]
         footer = [c.Comment("End %s" % o.name)]
     return c.Module(header + body + footer)
示例#21
0
 def get_cpp_post_loop_code_ast(self):
     """
     Return the code to place after the loop.
     """
     _s = 'std::chrono::high_resolution_clock::time_point _loop_timer_t1 ='\
          ' std::chrono::high_resolution_clock::now(); \n' \
          ' std::chrono::duration<double> _loop_timer_res = _loop_timer_t1'\
          ' - _loop_timer_t0; \n' \
          '*_loop_timer_return += (double) _loop_timer_res.count(); \n'
     return cgen.Module([cgen.Line(_s)])
示例#22
0
    def _generate_kernel_headers(self):

        s = [cgen.Include('cuda_generic.h', system=False)]

        if self._kernel.headers is not None:
            for x in self._kernel.headers:
                s.append(x.ast)

        s.append(self.loop_timer.get_cpp_headers_ast())
        self._components['KERNEL_HEADERS'] = cgen.Module(s)
示例#23
0
    def _generate_map_macros(self):
        g = cgen.Module([cgen.Comment('#### KERNEL_MAP_MACROS ####')])
        for i, dat in enumerate(self._dat_dict.items()):
            if type(dat[1][0]) is cuda_data.GlobalArray or \
                issubclass(type(dat[1][0]), cuda_base.Array):
                g.append(cgen.Define(dat[0] + '(x)', '(' + dat[0] + '[(x)])'))
            if issubclass(type(dat[1][0]), cuda_base.Matrix):
                g.append(cgen.Define(dat[0] + '(y)', dat[0] + '.i[(y)]'))

        self._components['KERNEL_MAP_MACROS'] = g
示例#24
0
    def _generate_lib_inner_loop(self):
        i = self._components['LIB_PAIR_INDEX_0']
        j = self._components['LIB_PAIR_INDEX_1']
        self._components['LIB_LOOP_J_PREPARE'] = cgen.Module([
            cgen.Line('const int _icell = _CRL[' + i + '];'),
            cgen.Line('int * _JJSTORE = _JSTORE[' +
                      self._components['OMP_THREAD_INDEX_SYM'] + '];'),
            cgen.Line('int _nn = 0;'),
        ])

        b = self._components['LIB_INNER_LOOP_BLOCK']
        self._components['LIB_INNER_LOOP'] = cgen.Module([
            cgen.For('int _k=0', '_k<27', '_k++', b),
            cgen.For(
                'int _k2=0', '_k2<_nn', '_k2++',
                cgen.Block([
                    cgen.Line('const int ' + j + ' = _JJSTORE[_k2];'),
                    self._components['LIB_KERNEL_CALL'],
                ]))
        ])
示例#25
0
    def visit_Iteration(self, o):
        body = flatten(self.visit(i) for i in o.children)

        # Start
        if o.offsets[0] != 0:
            start = str(o.limits[0] + o.offsets[0])
            try:
                start = eval(start)
            except (NameError, TypeError):
                pass
        else:
            start = o.limits[0]

        # Bound
        if o.offsets[1] != 0:
            end = str(o.limits[1] + o.offsets[1])
            try:
                end = eval(end)
            except (NameError, TypeError):
                pass
        else:
            end = o.limits[1]

        # For backward direction flip loop bounds
        if o.direction == Backward:
            loop_init = 'int %s = %s' % (o.index, ccode(end))
            loop_cond = '%s >= %s' % (o.index, ccode(start))
            loop_inc = '%s -= %s' % (o.index, o.limits[2])
        else:
            loop_init = 'int %s = %s' % (o.index, ccode(start))
            loop_cond = '%s <= %s' % (o.index, ccode(end))
            loop_inc = '%s += %s' % (o.index, o.limits[2])

        # Append unbounded indices, if any
        if o.uindices:
            uinit = [
                '%s = %s' % (i.name, ccode(i.symbolic_start))
                for i in o.uindices
            ]
            loop_init = c.Line(', '.join([loop_init] + uinit))
            ustep = [
                '%s = %s' % (i.name, ccode(i.symbolic_incr))
                for i in o.uindices
            ]
            loop_inc = c.Line(', '.join([loop_inc] + ustep))

        # Create For header+body
        handle = c.For(loop_init, loop_cond, loop_inc, c.Block(body))

        # Attach pragmas, if any
        if o.pragmas:
            handle = c.Module(o.pragmas + (handle, ))

        return handle
示例#26
0
    def _generate_kernel_call(self):

        kernel_call = cgen.Module([
            cgen.Comment('#### Kernel call arguments ####'),
            cgen.Initializer(
                cgen.Const(
                    cgen.Value('int',
                               self._components['OMP_THREAD_INDEX_SYM'])),
                'omp_get_thread_num()')
        ])
        kernel_call_symbols = []
        shared_syms = self._components['OMP_SHARED_SYMS']

        if self._kernel.static_args is not None:
            for i, dat in enumerate(self._kernel.static_args.items()):
                kernel_call_symbols.append(dat[0])

        for i, dat in enumerate(self._dat_dict.items()):
            if issubclass(type(dat[1][0]), host._Array):
                sym = dat[0]
                if issubclass(type(dat[1][0]), data.GlobalArrayClassic):
                    sym += '[' + self._components['OMP_THREAD_INDEX_SYM'] + ']'
                kernel_call_symbols.append(sym)
                shared_syms.append(dat[0])

            elif issubclass(type(dat[1][0]), host.Matrix):
                call_symbol = dat[0] + '_c'
                kernel_call_symbols.append(call_symbol)

                nc = str(dat[1][0].ncomp)
                _ishift = '+' + self._components['LIB_PAIR_INDEX_0'] + '*' + nc

                isym = dat[0] + _ishift
                g = cgen.Value('_' + dat[0] + '_t', call_symbol)
                g = cgen.Initializer(g, '{ ' + isym + '}')

                kernel_call.append(g)
                shared_syms.append(dat[0])

            else:
                print("ERROR: Type not known")

        kernel_call.append(cgen.Comment('#### Kernel call ####'))

        kernel_call_symbols_s = ''
        for sx in kernel_call_symbols:
            kernel_call_symbols_s += sx + ','
        kernel_call_symbols_s = kernel_call_symbols_s[:-1]

        kernel_call.append(
            cgen.Line('k_' + self._kernel.name + '(' + kernel_call_symbols_s +
                      ');'))

        self._components['LIB_KERNEL_CALL'] = kernel_call
示例#27
0
 def includes(self):
     statements = includes.copyright()
     statements += [cgen.Define('M_PI', '3.14159265358979323846')]
     statements += includes.common_include()
     if self.io:
         statements += includes.io_include()
     if self.pluto:
         statements += includes.pluto_include()
     if self.profiling:
         statements += includes.profiling_include()
     return cgen.Module(statements)
示例#28
0
    def _generate_kernel_headers(self):
        s = self._components['LIB_HEADERS']
        if self._kernel.headers is not None:
            if hasattr(self._kernel.headers, "__iter__"):
                for x in self._kernel.headers:
                    s.append(x.ast)
            else:
                s.append(self._kernel.headers.ast)

        s.append(self.loop_timer.get_cpp_headers_ast())
        self._components['KERNEL_HEADERS'] = cgen.Module(s)
示例#29
0
    def _generate_lib_inner_loop(self):

        c = self._components
        i = c['LIB_PAIR_INDEX_0']
        j = c['LIB_PAIR_INDEX_1']
        ccc_i = c['CCC_0']
        ccc_j = c['CCC_1']
        ci = c['LIB_CELL_INDEX_0']
        cj = c['LIB_CELL_INDEX_1']
        nloc = c['N_LOCAL']
        ec = '_' + c['EXEC_COUNT']

        iif = c['PARTICLE_DAT_PARTITION'].idict[c['TMP_INDEX']]

        def ifnothalo(b):
            return cgen.Block((cgen.If(iif + '[' + i + ']<' + nloc, b), ))

        kg = self._components['KERNEL_GATHER']
        ks = self._components['KERNEL_SCATTER']

        loop_other = cgen.Block((cgen.For(
            'INT64 ' + i + '=0', i + '<' + ccc_i, i + '++',
            ifnothalo(
                cgen.Block(
                    (cgen.Line(kg),
                     cgen.For('INT64 ' + j + '=0', j + '<' + ccc_j, j + '++',
                              cgen.Block(self._components['LIB_KERNEL_CALL'])),
                     cgen.Line(ks), cgen.Line(ec + '+=' + ccc_j + ';'))))), ))

        loop_same = cgen.Block((cgen.For(
            'INT64 ' + i + '=0', i + '<' + ccc_i, i + '++',
            ifnothalo(
                cgen.Block(
                    (cgen.Line(kg),
                     cgen.For('INT64 ' + j + '=0', j + '<' + i, j + '++',
                              cgen.Block(self._components['LIB_KERNEL_CALL'])),
                     cgen.For('INT64 ' + j + '=1+' + i, j + '<' + ccc_j,
                              j + '++',
                              cgen.Block(self._components['LIB_KERNEL_CALL'])),
                     cgen.Line(ks),
                     cgen.Line(ec + '+=' + ccc_j + '-1;'))))), ))

        cell_cond = cgen.If(ci + '==' + cj, loop_same, loop_other)

        b = cgen.Block(
            (cgen.Line('const INT64 {jcell} = {icell} + _OFFSET[_k];'.format(
                jcell=self._components['LIB_CELL_INDEX_1'],
                icell=self._components['LIB_CELL_INDEX_0'])),
             self._components['J_GATHER'], cell_cond))

        self._components['LIB_INNER_LOOP'] = cgen.Module([
            cgen.For('int _k=0', '_k<27', '_k++', b),
        ])
示例#30
0
    def _generate_kernel_gather(self):

        kernel_gather = cgen.Module([
            cgen.Comment('#### Pre kernel gather ####'),
            cgen.Initializer(
                cgen.Const(
                    cgen.Value('int',
                               self._components['OMP_THREAD_INDEX_SYM'])),
                'omp_get_thread_num()')
        ])
        shared_syms = self._components['OMP_SHARED_SYMS']

        for i, dat in enumerate(self._dat_dict.items()):

            obj = dat[1][0]
            mode = dat[1][1]
            symbol = dat[0]
            shared_syms.append(symbol)

            if issubclass(type(obj), data.GlobalArrayClassic):
                isym = symbol + '_c'
                val = symbol + '[' + self._components[
                    'OMP_THREAD_INDEX_SYM'] + ']'

                g = cgen.Pointer(cgen.Value(host.ctypes_map[obj.dtype], isym))
                if not mode.write:
                    g = cgen.Const(g)
                g = cgen.Initializer(g, val)

                kernel_gather.append(g)

            elif issubclass(type(obj), host.Matrix) \
                    and mode.write \
                    and obj.ncomp <= self._gather_size_limit:

                isym = symbol + 'i'
                nc = obj.ncomp
                ncb = '[' + str(nc) + ']'
                dtype = host.ctypes_map[obj.dtype]

                t = '{'
                for tx in range(nc):
                    t += '*(' + symbol + '+' + self._components[
                        'LIB_PAIR_INDEX_0']
                    t += '*' + str(nc) + '+' + str(tx) + '),'
                t = t[:-1] + '}'

                g = cgen.Value(dtype, isym + ncb)
                g = cgen.Initializer(g, t)

                kernel_gather.append(g)

        self._components['LIB_KERNEL_GATHER'] = kernel_gather