示例#1
0
    def _generate_kernel_arg_decls(self):

        _kernel_arg_decls = []
        _kernel_lib_arg_decls = []
        _kernel_structs = cgen.Module(
            [cgen.Comment('#### Structs generated per ParticleDat ####')])

        if self._kernel.static_args is not None:

            for i, dat in enumerate(self._kernel.static_args.items()):
                _kernel_arg_decls.append(
                    cgen.Const(cgen.Value(host.ctypes_map[dat[1]], dat[0])))

        for i, dat in enumerate(self._dat_dict.items()):

            assert type(dat[1]) is tuple, "Access descriptors not found"

            kernel_lib_arg = cgen.Pointer(
                cgen.Value(host.ctypes_map[dat[1][0].dtype],
                           Restrict(self._cc.restrict_keyword, dat[0])))

            # print host.ctypes_map[dat[1][0].dtype], dat[1][0].dtype

            if issubclass(type(dat[1][0]), host._Array):
                kernel_arg = cgen.Pointer(
                    cgen.Value(host.ctypes_map[dat[1][0].dtype],
                               Restrict(self._cc.restrict_keyword, dat[0])))
                if not dat[1][1].write:
                    kernel_arg = cgen.Const(kernel_arg)

                _kernel_arg_decls.append(kernel_arg)

            elif issubclass(type(dat[1][0]), host.Matrix):
                # MAKE STRUCT TYPE
                dtype = dat[1][0].dtype
                ti = cgen.Pointer(
                    cgen.Value(ctypes_map(dtype),
                               Restrict(self._cc.restrict_keyword, 'i')))
                tj = cgen.Pointer(
                    cgen.Value(ctypes_map(dtype),
                               Restrict(self._cc.restrict_keyword, 'j')))
                if not dat[1][1].write:
                    ti = cgen.Const(ti)
                    tj = cgen.Const(tj)
                typename = '_' + dat[0] + '_t'
                _kernel_structs.append(
                    cgen.Typedef(cgen.Struct('', [ti, tj], typename)))

                # MAKE STRUCT ARG
                _kernel_arg_decls.append(cgen.Value(typename, dat[0]))

            if not dat[1][1].write:
                kernel_lib_arg = cgen.Const(kernel_lib_arg)

            _kernel_lib_arg_decls.append(kernel_lib_arg)

        self._components['KERNEL_ARG_DECLS'] = _kernel_arg_decls
        self._components['KERNEL_LIB_ARG_DECLS'] = _kernel_lib_arg_decls
        self._components['KERNEL_STRUCT_TYPEDEFS'] = _kernel_structs
示例#2
0
    def _generate_kernel_gather(self):

        kernel_gather = cgen.Module([
            cgen.Comment('#### Pre kernel gather ####'),
            cgen.Initializer(
                cgen.Const(
                    cgen.Value('int',
                               self._components['OMP_THREAD_INDEX_SYM'])),
                'omp_get_thread_num()')
        ])
        shared_syms = self._components['OMP_SHARED_SYMS']

        for i, dat in enumerate(self._dat_dict.items()):

            obj = dat[1][0]
            mode = dat[1][1]
            symbol = dat[0]
            shared_syms.append(symbol)

            if issubclass(type(obj), data.GlobalArrayClassic):
                isym = symbol + '_c'
                val = symbol + '[' + self._components[
                    'OMP_THREAD_INDEX_SYM'] + ']'

                g = cgen.Pointer(cgen.Value(host.ctypes_map[obj.dtype], isym))
                if not mode.write:
                    g = cgen.Const(g)
                g = cgen.Initializer(g, val)

                kernel_gather.append(g)

            elif issubclass(type(obj), host.Matrix) \
                    and mode.write \
                    and obj.ncomp <= self._gather_size_limit:

                isym = symbol + 'i'
                nc = obj.ncomp
                ncb = '[' + str(nc) + ']'
                dtype = host.ctypes_map[obj.dtype]

                t = '{'
                for tx in range(nc):
                    t += '*(' + symbol + '+' + self._components[
                        'LIB_PAIR_INDEX_0']
                    t += '*' + str(nc) + '+' + str(tx) + '),'
                t = t[:-1] + '}'

                g = cgen.Value(dtype, isym + ncb)
                g = cgen.Initializer(g, t)

                kernel_gather.append(g)

        self._components['LIB_KERNEL_GATHER'] = kernel_gather
示例#3
0
    def _generate_kernel_arg_decls(self):

        _kernel_arg_decls = []
        _kernel_lib_arg_decls = []
        _kernel_structs = cgen.Module(
            [cgen.Comment('#### Structs generated per ParticleDat ####')])

        if self._kernel.static_args is not None:
            for i, dat in enumerate(self._kernel.static_args.items()):
                arg = cgen.Const(cgen.Value(host.ctypes_map[dat[1]], dat[0]))
                _kernel_arg_decls.append(arg)
                _kernel_lib_arg_decls.append(arg)

        for i, dat in enumerate(self._dat_dict.items()):

            assert type(dat[1]) is tuple, "Access descriptors not found"
            obj = dat[1][0]
            mode = dat[1][1]
            symbol = dat[0]

            kernel_lib_arg = cgen.Pointer(
                cgen.Value(host.ctypes_map[obj.dtype],
                           Restrict(self._cc.restrict_keyword, symbol)))

            if issubclass(type(obj), data.GlobalArrayClassic):
                kernel_lib_arg = cgen.Pointer(kernel_lib_arg)
                gen = self._components['PARTICLE_DAT_C'][symbol]
                _kernel_arg_decls.append(gen.kernel_arg_decl)
            elif issubclass(type(obj), host._Array):

                gen = self._components['PARTICLE_DAT_C'][symbol]
                _kernel_arg_decls.append(gen.kernel_arg_decl)

                if mode.write is True:
                    assert issubclass(type(obj), data.GlobalArrayClassic), \
                        "global array must be a thread safe type for \
                        write access. Type is:"                                                + str(type(obj))

            elif issubclass(type(dat[1][0]), host.Matrix):
                gen = self._components['PARTICLE_DAT_C'][symbol]
                _kernel_structs.append(gen.header)
                _kernel_arg_decls.append(gen.kernel_arg_decl[0])
                _kernel_arg_decls.append(gen.kernel_arg_decl[1])

            if not dat[1][1].write:
                kernel_lib_arg = cgen.Const(kernel_lib_arg)

            _kernel_lib_arg_decls.append(kernel_lib_arg)

        self._components['KERNEL_ARG_DECLS'] = _kernel_arg_decls
        self._components['KERNEL_LIB_ARG_DECLS'] = _kernel_lib_arg_decls
        self._components['KERNEL_STRUCT_TYPEDEFS'] = _kernel_structs
示例#4
0
    def _generate_kernel_func(self):

        if_block = cgen.If(
            self._components['LIB_PAIR_INDEX_0']+'<_D_N_LOCAL',
            cgen.Block([
                self._components['KERNEL_GATHER'],
                cgen.For('int _k=1',
                    '_k<=_D_NMATRIX['+self._components['LIB_PAIR_INDEX_0']+']',
                    '_k++',
                    cgen.Block([
                        cgen.Initializer(
                            cgen.Const(cgen.Value(
                                host.int32_str, self._components['LIB_PAIR_INDEX_1'])),
                                '_D_NMATRIX['+self._components['LIB_PAIR_INDEX_0']+\
                                ' + _D_N_LOCAL * _k ]'
                        ),

                        self._components['KERNEL_MAPPING'],
                        cgen.Line(self._kernel.code)
                    ])
                ),
                self._components['KERNEL_SCATTER']
            ])
        )


        func = cgen.Block([
            cgen.Initializer(
                    cgen.Const(
                    cgen.Value(
                        host.int32_str,
                        self._components['LIB_PAIR_INDEX_0']
                    )),
                    'threadIdx.x + blockIdx.x*blockDim.x'
            ),
            self._components['IF_GATHER'],
            if_block,
            self._components['IF_SCATTER']
        ])


        self._components['KERNEL_FUNC'] = cgen.FunctionBody(

            cgen.FunctionDeclaration(
                cgen.DeclSpecifier(
                    cgen.Value("void", 'k_' + self._kernel.name), '__global__'
                ),
                self._components['KERNEL_ARG_DECLS']
            ),
                func
            )
示例#5
0
 def _generate_lib_specific_args(self):
     self._components['LIB_ARG_DECLS'] = [
         cgen.Const(cgen.Value(host.int32_str, '_NUM_THREADS')),
         cgen.Const(cgen.Value(host.int32_str, '_N_START')),
         cgen.Const(cgen.Value(host.int32_str, '_N_LOCAL')),
         cgen.Const(cgen.Value(host.int32_str, '_LIST_OFFSET')),
         cgen.Const(
             cgen.Pointer(
                 cgen.Value(
                     host.int32_str,
                     Restrict(self._cc.restrict_keyword, '_CELL_LIST')))),
         cgen.Const(
             cgen.Pointer(
                 cgen.Value(host.int32_str,
                            Restrict(self._cc.restrict_keyword,
                                     '_CRL')), )),
         cgen.Const(
             cgen.Pointer(
                 cgen.Value(host.int32_str,
                            Restrict(self._cc.restrict_keyword,
                                     '_CCC')), )),
         cgen.Const(
             cgen.Pointer(
                 cgen.Value(host.int32_str,
                            Restrict(self._cc.restrict_keyword,
                                     '_OFFSET')), )),
         cgen.Pointer(
             cgen.Pointer(
                 cgen.Value(host.int32_str,
                            Restrict(self._cc.restrict_keyword,
                                     '_JSTORE')), )),
         self.loop_timer.get_cpp_arguments_ast()
     ]
示例#6
0
 def _generate_lib_specific_args(self):
     self._components['LIB_ARG_DECLS'] = [
         cgen.Const(
             cgen.Pointer(
                 cgen.Value(
                     host.int32_str,
                     Restrict(self._cc.restrict_keyword, '_H_BLOCKSIZE')))),
         cgen.Const(
             cgen.Pointer(
                 cgen.Value(
                     host.int32_str,
                     Restrict(self._cc.restrict_keyword,
                              '_H_THREADSIZE')))),
         cgen.Const(cgen.Value(host.int32_str, '_H_N_LOCAL')),
         self.loop_timer.get_cpp_arguments_ast()
     ]
示例#7
0
 def _generate_lib_specific_args(self):
     self._components['LIB_ARG_DECLS'] = [
         cgen.Const(cgen.Value(host.int64_str, '_NUM_THREADS')),
         cgen.Const(cgen.Value(host.int64_str, '_N_LOCAL')),
         cgen.Const(cgen.Value(host.int64_str, '_STRIDE')),
         cgen.Const(
             cgen.Pointer(
                 cgen.Value(host.int64_str,
                            Restrict(self._cc.restrict_keyword, '_NN')))),
         cgen.Const(
             cgen.Pointer(
                 cgen.Value(host.int64_str,
                            Restrict(self._cc.restrict_keyword,
                                     '_NLIST')), )),
         self.loop_timer.get_cpp_arguments_ast()
     ]
示例#8
0
    def generate_optimmat_code(self, pos, name=None):
        """Generates the code for computing the local optimization matrix
        for the optimization over site nr. `pos`

        The function has the following signature:

            DTYPE const *const A,
            DTYPE const *const X_0,
            ...,
            DTYPE const *const X_N,
            DTYPE *const result

        :param pos: The local tensor to copy (should be `< len(X)`)
        :param name: Name of the C function (default: get_optimmat_%(pos))
        :returns: cgen.FunctionBody with given name

        """
        name = 'get_optimmat_%i' % pos if name is None else name

        finalization_src = '''
        if (mid < {nr_meas:d}) {{
            for (uint i = 0; i < {pdim:d}; ++i) {{
                for (uint k_l = 0; k_l < {rank_l:d}; ++k_l) {{
                    for (uint k_r = 0; k_r < {rank_r:d}; ++k_r) {{
                        result[mid * {rank_l:d} * {pdim:d} * {rank_r:d}
                            + k_l * {pdim:d} * {rank_r:d}
                            + i * {rank_r:d}
                            + k_r]
                        = left_c[k_l] * current_row[{offset:d} + i] * right_c[k_r];
                    }}
                }}
            }}
        }}
        '''.format(nr_meas=self._meas,
                   pdim=self._dims[pos],
                   rank_l=1 if pos == 0 else self._ranks[pos - 1],
                   rank_r=1 if pos == self._sites - 1 else self._ranks[pos],
                   offset=sum(self._dims[:pos]))
        finalization = c.LiteralLines(finalization_src)

        arg_decls = [ConstPointerToConstDecl(self._dtype, 'A')]
        arg_decls += [
            ConstPointerToConstDecl(self._dtype, 'X%i' % i)
            for i in range(self._sites)
        ]
        arg_decls += [c.Pointer(c.Const(c.POD(self._dtype, 'result')))]

        return c.FunctionBody(
            ccu.CudaGlobal(
                c.FunctionDeclaration(c.Value('void', 'get_optimmat_%i' % pos),
                                      arg_decls=arg_decls)),
            c.Block(
                self.declaration(pos) + self.left_contractions(pos) +
                self.right_contractions(pos) + [finalization]))
示例#9
0
    def _generate_kernel_call(self):

        kernel_call = cgen.Module([
            cgen.Comment('#### Kernel call arguments ####'),
            cgen.Initializer(
                cgen.Const(
                    cgen.Value('int',
                               self._components['OMP_THREAD_INDEX_SYM'])),
                'omp_get_thread_num()')
        ])
        kernel_call_symbols = []
        shared_syms = self._components['OMP_SHARED_SYMS']

        if self._kernel.static_args is not None:
            for i, dat in enumerate(self._kernel.static_args.items()):
                kernel_call_symbols.append(dat[0])

        for i, dat in enumerate(self._dat_dict.items()):
            if issubclass(type(dat[1][0]), host._Array):
                sym = dat[0]
                if issubclass(type(dat[1][0]), data.GlobalArrayClassic):
                    sym += '[' + self._components['OMP_THREAD_INDEX_SYM'] + ']'
                kernel_call_symbols.append(sym)
                shared_syms.append(dat[0])

            elif issubclass(type(dat[1][0]), host.Matrix):
                call_symbol = dat[0] + '_c'
                kernel_call_symbols.append(call_symbol)

                nc = str(dat[1][0].ncomp)
                _ishift = '+' + self._components['LIB_PAIR_INDEX_0'] + '*' + nc

                isym = dat[0] + _ishift
                g = cgen.Value('_' + dat[0] + '_t', call_symbol)
                g = cgen.Initializer(g, '{ ' + isym + '}')

                kernel_call.append(g)
                shared_syms.append(dat[0])

            else:
                print("ERROR: Type not known")

        kernel_call.append(cgen.Comment('#### Kernel call ####'))

        kernel_call_symbols_s = ''
        for sx in kernel_call_symbols:
            kernel_call_symbols_s += sx + ','
        kernel_call_symbols_s = kernel_call_symbols_s[:-1]

        kernel_call.append(
            cgen.Line('k_' + self._kernel.name + '(' + kernel_call_symbols_s +
                      ');'))

        self._components['LIB_KERNEL_CALL'] = kernel_call
示例#10
0
    def _generate_lib_specific_args(self):
        self._components['LIB_ARG_DECLS'] = [
            cgen.Const(
                cgen.Pointer(
                    cgen.Value(host.int32_str,
                               Restrict(
                                   self._cc.restrict_keyword,'_H_BLOCKSIZE'
                               )
                               )
                )
            ),
            cgen.Const(
                cgen.Pointer(
                    cgen.Value(host.int32_str,
                               Restrict(
                                   self._cc.restrict_keyword,'_H_THREADSIZE'
                               )
                               )
                )
            ),
            cgen.Const(cgen.Value(host.int32_str, '_H_N_LOCAL')),
            cgen.Const(cgen.Value(host.int32_str, '_H_NMATRIX_STRIDE')),
            cgen.Const(
                cgen.Pointer(
                    cgen.Value(host.int32_str,
                               Restrict(
                                   self._cc.restrict_keyword,'_D_NMATRIX'
                               )
                               )
                )
            ),
            self.loop_timer.get_cpp_arguments_ast()
        ]
        self._components['LIB_SPECIFIC_KERNEL_ARGS'] = ['_H_N_LOCAL','_H_NMATRIX_STRIDE','_D_NMATRIX']


        self._components['KERNEL_ARG_DECLS'] = [
            cgen.Const(cgen.Value(host.int32_str, '_D_N_LOCAL')),
            cgen.Const(cgen.Value(host.int32_str, '_D_NMATRIX_STRIDE')),
            cgen.Const(
                cgen.Pointer(
                    cgen.Value(host.int32_str,
                               Restrict(
                                   self._cc.restrict_keyword,'_D_NMATRIX'
                               )
                               )
                )
            )
        ]

        self._components['KERNEL_LIB_ARG_DECLS'] = []
示例#11
0
    def declaration(self, pos):
        """Generates the declarative instructions for the optimizations over
        sites nr. `pos`

        :param pos: The local tensor to copy (should be `< len(X)`)
        :returns: List containing cgen Statements
        """
        max_ltens_size = max(self._ltens_sizes)
        max_left_size = 1 if pos == 0 else max(self._ranks[:pos])
        max_right_size = 1 if pos == self._sites - 1 else max(
            self._ranks[pos:])
        max_tmat_size = max(self._ranks[i] * self._ranks[i + 1]
                            for i in range(self._sites - 2))

        init_statements = [
            c.LineComment(
                "Define the row number the current thread is operating on"),
            c.Initializer(c.Const(c.POD(np.int32, 'mid')),
                          'threadIdx.x + blockIdx.x * blockDim.x'),
            c.LineComment("Allocate shared memory for the local tensors"),
            ccu.CudaShared(
                c.ArrayOf(c.POD(self._dtype, 'x_shared'), max_ltens_size)),
            c.LineComment(
                "Allocate the left-, right-, and transfer contractions"),
            c.ArrayOf(c.POD(self._dtype, 'left_c'), max_left_size),
            c.ArrayOf(c.POD(self._dtype, 'right_c'), max_right_size),
            c.ArrayOf(c.POD(self._dtype, 'tmat_c'), max_tmat_size),
            c.ArrayOf(c.POD(self._dtype, 'buf_c'),
                      max(max_right_size, max_left_size)),
            c.LineComment("Shortcut for current row of design matrix"),
            c.LineComment("Carefull, current_row might be out of bounds!"),
            ConstPointerToConst(self._dtype, 'current_row',
                                'A + (mid * %i)' % sum(self._dims))
        ]

        return init_statements
示例#12
0
def ConstPointerToConst(dtype, name, value):
    """Returns a cgen variable declaration & assignment of a constant pointer to
    a constant of type `dtype`
    """
    return c.Constant(c.Pointer(c.Const(c.POD(dtype, name))), value)
示例#13
0
    def _generate_kernel_func(self):
        IX = self._components['LIB_PAIR_INDEX_0']
        IY = self._components['LIB_PAIR_INDEX_1']
        CX = '_CX'
        CY = '_CY'

        if_block = cgen.If(
            IX + '<_D_N_LOCAL',
            cgen.Block([
                self._components['KERNEL_GATHER'],
                cgen.Initializer(cgen.Const(cgen.Value(host.int32_str, CX)), '_D_CRL[' + IX +']'),
                cgen.For('int _jk=0','_jk<_D_CCC['+CX+']', '_jk++',
                    cgen.Block([
                         cgen.Initializer(
                             cgen.Const(cgen.Value(host.int32_str, IY)),
                             '_D_L_MATRIX[' + CX+'*_D_N_LAYERS' + '+_jk]'
                         ),
                        cgen.If(
                            IX+'!='+IY,
                            cgen.Block([
                                self._components['KERNEL_MAPPING'],
                                cgen.Line(self._kernel.code)
                            ])
                        ),
                    ])
                ),
                cgen.For('int _k=0','_k<_D_N_OFFSETS', '_k++',
                    cgen.Block([
                        cgen.Initializer(cgen.Const(cgen.Value(host.int32_str, CY)), CX + '+ _D_OFFSETS[_k]'),
                            cgen.For('int _jk=0','_jk<_D_CCC['+CY+']', '_jk++',
                            cgen.Block([
                                cgen.Initializer(
                                    cgen.Const(cgen.Value(host.int32_str, IY)),
                                    '_D_L_MATRIX[' + CY+'*_D_N_LAYERS' + '+_jk]'
                                ),
                                #cgen.If(IX+'!='+IY,
                                #cgen.Block([
                                    self._components['KERNEL_MAPPING'],
                                    cgen.Line(self._kernel.code)
                                #]))
                            ]))
                    ])
                ),
                self._components['KERNEL_SCATTER']
            ])
        )


        func = cgen.Block([
            cgen.Initializer(
                    cgen.Const(
                    cgen.Value(
                        host.int32_str,
                        self._components['LIB_PAIR_INDEX_0']
                    )),
                    'threadIdx.x + blockIdx.x*blockDim.x'
            ),
            self._components['IF_GATHER'],
            if_block,
            self._components['IF_SCATTER']
        ])


        self._components['KERNEL_FUNC'] = cgen.FunctionBody(

            cgen.FunctionDeclaration(
                cgen.DeclSpecifier(
                    cgen.Value("void", 'k_' + self._kernel.name), '__global__'
                ),
                self._components['KERNEL_ARG_DECLS']
            ),
                func
            )
示例#14
0
 def _data_cref(self, dtype, name):
     return c.Const(c.Reference(c.Value(dtype, name)))
示例#15
0
import cgen as c

func = c.FunctionBody(
    c.FunctionDeclaration(c.Const(c.Pointer(c.Value("char", "greet"))), []),
    c.Block([c.Statement('return "hello world"')]))
code = c.Module([])

code.append(c.Value('int', 'cont'))
code.append(c.Assign('cont', '0'))
code.append(c.Increment('cont', '5'))

print(code)
示例#16
0
    def _generate_kernel_gather(self):
        cp = self._components
        cx = cp['LIB_CELL_CX']
        cy = cp['LIB_CELL_CY']
        cz = cp['LIB_CELL_CZ']

        ncx = cp['N_CELL_X']
        ncy = cp['N_CELL_Y']
        ncz = cp['N_CELL_Z']
        ci = cp['LIB_CELL_INDEX_0']

        kernel_gather = cgen.Module([
            cgen.Comment('#### Pre kernel gather ####'),
            # compute the linear cell index
            cgen.Initializer(
                cgen.Const(cgen.Value('INT64', ci)),
                cx + '+' + ncx + '*(' + cy + '+' + ncy + '*' + cz + ')'),
            # get the thread index
            cgen.Initializer(
                cgen.Const(
                    cgen.Value('int',
                               self._components['OMP_THREAD_INDEX_SYM'])),
                'omp_get_thread_num()')
        ])

        # partition this threads space for temporary vars
        self._components['PARTICLE_DAT_PARTITION'] = \
            DSLPartitionTempSpace(self._dat_dict,
                    self._components['CCC_MAX'],
                    '_GATHER_SPACE[_threadid]',
                    extras=((cp['TMP_INDEX'], 1, INT64),))

        kernel_gather.append(
            self._components['PARTICLE_DAT_PARTITION'].ptr_init)

        src_sym = '__tmp_gpx'
        dst_sym = cp['CCC_0']

        record_local = DSLRecordLocal(
            ind_sym=src_sym,
            nlocal_sym=cp['N_LOCAL'],
            store_sym=cp['PARTICLE_DAT_PARTITION'].idict[cp['TMP_INDEX']],
            store_ind_sym=dst_sym,
            count_sym=cp['I_LOCAL_SYM'])
        kernel_gather.append(record_local[0])
        inner_l = [record_local[1]]

        # add dats to omp shared and init global array reduction
        shared_syms = self._components['OMP_SHARED_SYMS']
        for i, dat in enumerate(self._dat_dict.items()):

            obj = dat[1][0]
            mode = dat[1][1]
            symbol = dat[0]
            shared_syms.append(symbol)

            if issubclass(type(obj), data.GlobalArrayClassic):
                isym = symbol + '_c'
                val = symbol + '[' + self._components[
                    'OMP_THREAD_INDEX_SYM'] + ']'

                g = cgen.Pointer(cgen.Value(host.ctypes_map[obj.dtype], isym))
                if not mode.write:
                    g = cgen.Const(g)
                g = cgen.Initializer(g, val)

                kernel_gather.append(g)

            if issubclass(type(obj), data.ParticleDat):
                tsym = cp['PARTICLE_DAT_PARTITION'].idict[symbol]
                inner_l.append(
                    DSLStrideGather(symbol, tsym, obj.ncomp, src_sym, dst_sym,
                                    self._components['CCC_MAX']))

        inner_l.append(cgen.Line(dst_sym + '++;'))

        inner = cgen.Module(inner_l)
        g = self._components['CELL_LIST_ITER'](src_sym, ci, inner)

        kernel_gather.append(
            cgen.Initializer(cgen.Value('INT64', dst_sym), '0'))
        kernel_gather.append(g)

        # skip cell if there are not local particles
        kernel_gather.append(
            cgen.If(cp['I_LOCAL_SYM'] + '==0',
                    cgen.Block((cgen.Line('continue;'), ))))

        self._components['LIB_KERNEL_GATHER'] = kernel_gather
示例#17
0
    def _generate_per_dat(self):

        # =================== DICT INIT ===============================

        self._components['KERNEL_ARG_DECLS'] = [
            cgen.Const(cgen.Value(host.int32_str, '_D_N_LOCAL'))
        ]

        self._components['KERNEL_LIB_ARG_DECLS'] = []

        self._components['KERNEL_STRUCT_TYPEDEFS'] = cgen.Module(
            [cgen.Comment('#### Structs generated per ParticleDat ####')])

        self._components['LIB_KERNEL_CALL'] = cgen.Module(
            [cgen.Comment('#### Kernel call ####')])
        kernel_call_symbols = ['_H_N_LOCAL']

        self._components['KERNEL_SCATTER'] = cgen.Module(
            [cgen.Comment('#### kernel scatter ####')])
        self._components['KERNEL_GATHER'] = cgen.Module(
            [cgen.Comment('#### kernel gather ####')])
        self._components['IF_SCATTER'] = cgen.Module(
            [cgen.Comment('#### if scatter ####')])
        self._components['IF_GATHER'] = cgen.Module(
            [cgen.Comment('#### if gather ####')])
        self._components['KERNEL_MAPPING'] = cgen.Module(
            [cgen.Comment('#### kernel symbol mapping ####')])

        # =================== Static Args ===============================
        if self._kernel.static_args is not None:

            for i, datt in enumerate(self._kernel.static_args.items()):

                ksym = datt[0]
                ktype = datt[1]

                # Add to kernel args
                g = cgen.Const(cgen.Value(host.ctypes_map[ktype], ksym))
                self._components['KERNEL_ARG_DECLS'].append(g)
                self._components['KERNEL_LIB_ARG_DECLS'].append(g)
                kernel_call_symbols.append(ksym)

        # =================== Dynamic Args ===============================
        for i, datt in enumerate(self._dat_dict.items()):
            assert type(datt[1]) is tuple, "Access descriptors not found"

            dati = datt[1][0]
            ksym = datt[0]
            dsym = 'd_' + ksym
            kacc = datt[1][1]

            # add to lib args
            kernel_lib_arg = cgen.Pointer(
                cgen.Value(host.ctypes_map[dati.dtype],
                           Restrict(self._cc.restrict_keyword, ksym)))


            if type(dati) is cuda_data.GlobalArray or \
                issubclass(type(dati), cuda_base.Array):

                # KERNEL ARGS DECLS -----------------------------
                kernel_arg = cgen.Pointer(
                    cgen.Value(host.ctypes_map[dati.dtype],
                               Restrict(self._cc.restrict_keyword, dsym)))
                if not kacc.write:
                    kernel_arg = cgen.Const(kernel_arg)
                self._components['KERNEL_ARG_DECLS'].append(kernel_arg)

                # KERNEL CALL SYMS -----------------------------
                kernel_call_symbols.append(ksym)

                # KERNEL GATHER/SCATTER START ------------------
                if not kacc.incremented:
                    a = cgen.Pointer(
                        cgen.Value(host.ctypes_map[dati.dtype], ksym))

                    a = cgen.Const(a)
                    a = cgen.Initializer(a, dsym)
                    self._components['IF_GATHER'].append(a)

                else:

                    a = cgen.Initializer(
                        cgen.Value(host.ctypes_map[dati.dtype],
                                   ksym + '[' + str(dati.ncomp) + ']'), '{0}')

                    self._components['IF_GATHER'].append(a)

                    # add the scatter code
                    self._components['IF_SCATTER'].append(
                        cgen.Line(
                            generate_reduction_final_stage(dsym, ksym, dati)))
                    # KERNEL GATHER/SCATTER END ------------------

            elif issubclass(type(dati), cuda_base.Matrix):

                # KERNEL ARGS DECLS, STRUCT DECLS ----------------

                dtype = dati.dtype
                ti = cgen.Pointer(
                    cgen.Value(ctypes_map(dtype),
                               Restrict(self._cc.restrict_keyword, 'i')))
                if not kacc.write:
                    ti = cgen.Const(ti)
                typename = '_' + ksym + '_t'
                self._components['KERNEL_STRUCT_TYPEDEFS'].append(
                    cgen.Typedef(cgen.Struct('', [ti], typename)))

                # add to kernel args
                kernel_arg = cgen.Pointer(
                    cgen.Value(host.ctypes_map[dati.dtype],
                               Restrict(self._cc.restrict_keyword, dsym)))
                if not kacc.write:
                    kernel_arg = cgen.Const(kernel_arg)
                self._components['KERNEL_ARG_DECLS'].append(kernel_arg)

                # KERNEL CALL SYMS -----------------------------
                kernel_call_symbols.append(ksym)

                # KERNEL GATHER/SCATTER START ------------------
                nc = str(dati.ncomp)
                _ishift = '+' + self._components['LIB_PAIR_INDEX_0'] + '*' + nc

                isym = dsym + _ishift
                g = cgen.Value(typename, ksym)
                g = cgen.Initializer(g, '{ ' + isym + '}')

                self._components['KERNEL_MAPPING'].append(g)
                # KERNEL GATHER/SCATTER END ------------------

            # END OF IF ------------------------

            # add to lib args
            if not kacc.write:
                kernel_lib_arg = cgen.Const(kernel_lib_arg)
            self._components['KERNEL_LIB_ARG_DECLS'].append(kernel_lib_arg)

        # KERNEL CALL SYMS -----------------------------

        kernel_call_symbols_s = ''
        for sx in kernel_call_symbols:
            kernel_call_symbols_s += sx + ','
        kernel_call_symbols_s = kernel_call_symbols_s[:-1]

        self._components['LIB_KERNEL_CALL'].append(
            cgen.Module([
                cgen.Value('dim3', '_B'),
                cgen.Value('dim3', '_T'),
                cgen.Assign('_B.x', '_H_BLOCKSIZE[0]'),
                cgen.Assign('_B.y', '_H_BLOCKSIZE[1]'),
                cgen.Assign('_B.z', '_H_BLOCKSIZE[2]'),
                cgen.Assign('_T.x', '_H_THREADSIZE[0]'),
                cgen.Assign('_T.y', '_H_THREADSIZE[1]'),
                cgen.Assign('_T.z', '_H_THREADSIZE[2]')
            ]))

        self._components['LIB_KERNEL_CALL'].append(
            cgen.Line('k_' + self._kernel.name + '<<<_B,_T>>>(' +
                      kernel_call_symbols_s + ');'))
        self._components['LIB_KERNEL_CALL'].append(
            cgen.Line('checkCudaErrors(cudaDeviceSynchronize());'))
示例#18
0
    def _generate_lib_specific_args(self):
        cp = self._components
        ncx = cp['N_CELL_X']
        ncy = cp['N_CELL_Y']
        ncz = cp['N_CELL_Z']
        npad = cp['N_CELL_PAD']
        nloc = cp['N_LOCAL']
        exec_count = cp['EXEC_COUNT']

        self._components['LIB_ARG_DECLS'] = [
            cgen.Const(cgen.Value(host.int32_str, '_NUM_THREADS')),
            cgen.Const(cgen.Value(host.int64_str, ncx)),
            cgen.Const(cgen.Value(host.int64_str, ncy)),
            cgen.Const(cgen.Value(host.int64_str, ncz)),
            cgen.Const(cgen.Value(host.int64_str, npad)),
            cgen.Const(cgen.Value(host.int32_str, nloc)),
            cgen.Const(cgen.Value(host.int32_str, '_LIST_OFFSET')),
            cgen.Const(
                cgen.Pointer(
                    cgen.Value(
                        host.int32_str,
                        Restrict(self._cc.restrict_keyword, '_CELL_LIST')))),
            cgen.Const(
                cgen.Pointer(
                    cgen.Value(host.int32_str,
                               Restrict(self._cc.restrict_keyword,
                                        '_CRL')), )),
            cgen.Const(
                cgen.Pointer(
                    cgen.Value(host.int32_str,
                               Restrict(self._cc.restrict_keyword,
                                        '_CCC')), )),
            cgen.Const(
                cgen.Pointer(
                    cgen.Value(host.int32_str,
                               Restrict(self._cc.restrict_keyword,
                                        '_OFFSET')), )),
            cgen.Pointer(
                cgen.Pointer(
                    cgen.Value(host.int32_str,
                               Restrict(self._cc.restrict_keyword,
                                        '_JSTORE')), )),
            cgen.Pointer(
                cgen.Pointer(
                    cgen.Value(
                        host.uint8_str,
                        Restrict(self._cc.restrict_keyword,
                                 '_GATHER_SPACE')), )),
            cgen.Const(cgen.Value(host.int64_str,
                                  self._components['CCC_MAX'])),
            cgen.Pointer(cgen.Value(host.int64_str, exec_count)),
            self.loop_timer.get_cpp_arguments_ast()
        ]
示例#19
0
    c.Define('OPS_2D', ''),
    c.Include('ops_seq.h'),
    c.Include('ops_lib_cpp.h'),
    c.Include('sources.cpp'),
    c.Include('velocity-model.cpp'),
    c.Include('wave-propagation-ops.h'),
    c.Line(),
    c.Statement('using namespace std'),
    c.Line()
])

code.append(
    c.FunctionBody(
        c.FunctionDeclaration(c.Value('void', 'wavePropagation'), [
            c.Pointer(c.Value('double', 'u_new')),
            c.Const(c.Pointer(c.Value('double', 'u_current'))),
            c.Const(c.Pointer(c.Value('double', 'u_previous'))),
            c.Const(c.Pointer(c.Value('double', 'velocity'))),
            c.Const(c.Pointer(c.Value('double', 'wx'))),
            c.Const(c.Pointer(c.Value('double', 'wy'))),
            c.Const(c.Pointer(c.Value('double', 'zetax'))),
            c.Const(c.Pointer(c.Value('double', 'zetay'))),
            c.Const(c.Pointer(c.Value('int', 'idx')))
        ]),
        c.Block([
            c.LineComment('Propagates the wave.'),
            c.Assign(
                'u_new[OPS_ACC0(0, 0)]',
                '(velocity[OPS_ACC3(0, 0)] * velocity[OPS_ACC3(0, 0)]) *\n\
                                ((-205.0 / 72) * u_current[OPS_ACC1(0, 0)] +\n\
                                 (8.0 / 5) * (u_current[OPS_ACC1(1, 0)] + u_current[OPS_ACC1(-1, 0)] + u_current[OPS_ACC1(0, 1)] + u_current[OPS_ACC1(0, -1)]) +\n\
示例#20
0
 def _generate_lib_specific_args(self):
     self._components['LIB_ARG_DECLS'] = [
         cgen.Const(cgen.Value(host.int32_str, '_NUM_THREADS')),
         cgen.Const(cgen.Value(host.int32_str, '_N_LOCAL')),
         self.loop_timer.get_cpp_arguments_ast()
     ]
示例#21
0
    def _generate_kernel_arg_decls(self):

        _kernel_arg_decls = []
        _kernel_lib_arg_decls = []
        _kernel_structs = cgen.Module([
            cgen.Comment('#### Structs generated per ParticleDat ####')
        ])

        if self._kernel.static_args is not None:
            for i, dat in enumerate(self._kernel.static_args.items()):
                arg = cgen.Const(cgen.Value(host.ctypes_map[dat[1]], dat[0]))
                _kernel_arg_decls.append(arg)
                _kernel_lib_arg_decls.append(arg)

        for i, dat in enumerate(self._dat_dict.items()):

            assert type(dat[1]) is tuple, "Access descriptors not found"
            obj = dat[1][0]
            mode = dat[1][1]
            symbol = dat[0]

            kernel_lib_arg = cgen.Pointer(cgen.Value(host.ctypes_map[obj.dtype],
                                          Restrict(self._cc.restrict_keyword, symbol))
                                      )

            if issubclass(type(obj), data.GlobalArrayClassic):
                kernel_lib_arg = cgen.Pointer(kernel_lib_arg)

            if issubclass(type(obj), host._Array):
                kernel_arg = cgen.Pointer(cgen.Value(host.ctypes_map[obj.dtype],
                                              Restrict(self._cc.restrict_keyword, symbol))
                                          )
                if not mode.write:
                    kernel_arg = cgen.Const(kernel_arg)
                _kernel_arg_decls.append(kernel_arg)

                if mode.write is True:
                    assert issubclass(type(obj), data.GlobalArrayClassic),\
                        "global array must be a thread safe type for \
                        write access. Type is:" + str(type(obj))


            elif issubclass(type(dat[1][0]), host.Matrix):
                # MAKE STRUCT TYPE
                dtype = dat[1][0].dtype
                ti = cgen.Pointer(cgen.Value(ctypes_map(dtype),
                                             Restrict(self._cc.restrict_keyword,'i')))
                tj = cgen.Pointer(cgen.Value(ctypes_map(dtype),
                                             Restrict(self._cc.restrict_keyword,'j')))
                if not dat[1][1].write:
                    ti = cgen.Const(ti)
                    tj = cgen.Const(tj)
                typename = '_'+dat[0]+'_t'
                _kernel_structs.append(cgen.Typedef(cgen.Struct('', [ti,tj], typename)))


                # MAKE STRUCT ARG
                _kernel_arg_decls.append(cgen.Value(typename, dat[0]))

            if not dat[1][1].write:
                kernel_lib_arg = cgen.Const(kernel_lib_arg)

            _kernel_lib_arg_decls.append(kernel_lib_arg)

        self._components['KERNEL_ARG_DECLS'] = _kernel_arg_decls
        self._components['KERNEL_LIB_ARG_DECLS'] = _kernel_lib_arg_decls
        self._components['KERNEL_STRUCT_TYPEDEFS'] = _kernel_structs
示例#22
0
def ConstPointerToConstDecl(dtype, name):
    """Returns a cgen variable declaration of a constant pointer to a constant
    of type `dtype`
    """
    return c.Const(c.Pointer(c.Const(c.POD(dtype, name))))