Python build_clib示例

编程语言: Python

命名空间/包名称: kemp.fdtd3d.util.common_cpu

方法/功能: build_clib

hotexamples.com的示例: 7

Python build_clib - 已找到7个示例。这些是从开源项目中提取的最受好评的kemp.fdtd3d.util.common_cpu.build_clib现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

    def __init__(self, fields):
        """
        """

        common.check_type('fields', fields, Fields)

        # local variables
        dtype = fields.dtype
        nx, ny, nz = ns = fields.ns

        ce_on = fields.ce_on
        ch_on = fields.ch_on
        rd_on = fields.rd_on

        # program
        cf_values = {True: ['', 'OOOOOOOOO', ', &Cx, &Cy, &Cz', 'cx[idx]', 'cy[idx]', 'cz[idx]'], \
                     False: ['// ', 'OOOOOO', '', '0.5', '0.5', '0.5']}
        ce_macros = [
            'COEFF_E ', 'PARSE_ARGS_CE', 'PYARRAYOBJECT_CE', 'CEX', 'CEY',
            'CEZ'
        ]
        ce_values = cf_values[ce_on]
        ch_macros = [
            'COEFF_H ', 'PARSE_ARGS_CH', 'PYARRAYOBJECT_CH', 'CHX', 'CHY',
            'CHZ'
        ]
        ch_values = cf_values[ch_on]

        macros = fields.dtype_omp_macros + ce_macros + ch_macros
        values = fields.dtype_omp_values + ce_values + ch_values

        ksrc = common.replace_template_code( \
                open(common_cpu.src_path + 'core.c').read(), macros, values)
        program = common_cpu.build_clib(ksrc, 'core')

        # arguments
        e_args = fields.ehs
        h_args = fields.ehs
        if ce_on:
            e_args += fields.ces
        if ch_on:
            h_args += fields.chs

        # global variables
        self.mainf = fields
        self.program = program
        self.e_args = e_args
        self.h_args = h_args

        # append to the update list
        self.priority_type = 'core'
        fields.append_instance(self)

示例#2

显示文件

文件： core_split.py 项目： wbkifun/fdtd_accelerate

    def __init__(self, fields):
        """
        """

        common.check_type('fields', fields, Fields)

        # local variables
        precision_float = fields.precision_float
        use_cpu_core = fields.use_cpu_core
        dtype = fields.dtype

        nx, ny, nz_pitch = ns_pitch = fields.ns_pitch
        align_size = fields.align_size
        pad = fields.pad

        ce_on = fields.ce_on
        ch_on = fields.ch_on

        ehs = fields.ehs
        if ce_on:
            ces = fields.ces
        if ch_on:
            chs = fields.chs

        # pad_str_list
        pad_str_list = []
        pad_str_append = lambda mask: pad_str_list.append( str(list(mask)).strip('[]') )
        mask0 = np.ones(align_size, 'i')

        mask_h = mask0.copy()
        mask_h[0] = 0
        pad_str_append(mask_h)

        mask_exy = mask0.copy()
        mask_exy[-(pad+1):] = 0
        pad_str_append(mask_exy)

        mask = mask0.copy()
        if pad != 0:
            mask[-pad:] = 0
        pad_str_append(mask)

        # program
        dtype_str_list = { \
                'single':['float', 'xmmintrin.h', 'ps', '__m128', '4'], \
                'double':['double', 'emmintrin.h', 'pd', '__m128d', '2'] }[precision_float]

        macros = [ \
                'OMP_HEADER', 'OMP_FOR_E', 'OMP_FOR_H', \
                'ARGS_CE', 'INIT_CE', 'PRIVATE_CE', 'CEX', 'CEY', 'CEZ', \
                'ARGS_CH', 'INIT_CH', 'PRIVATE_CH', 'CHX', 'CHY', 'CHZ', \
                'DTYPE', 'MM_HEADER', 'PSD', 'TYPE128', 'INCRE', \
                'MASK_H', 'MASK_EXY', 'MASK']

        values = ['', '', '', \
                '', 'ce=SET1(0.5)', '', '', '', '', \
                '', 'ch=SET1(0.5)', '', '', '', '', \
                ] + dtype_str_list + pad_str_list

        if use_cpu_core != 1:
            values[0] = '#include <omp.h>'

            omp_str = '' if use_cpu_core == 0 else 'omp_set_num_threads(%d);\n\t' % use_cpu_core
            values[1] = omp_str + '#pragma omp parallel for private(idx, i, j, k, hx0, hy0, hz0, h1, h2, e PRIVATE_CE)'
            values[2] = omp_str + '#pragma omp parallel for private(idx, i, j, k, ex0, ey0, ez0, e1, e2, h PRIVATE_CH)'

        if ce_on:
            values[3:9] = [ \
                    ', DTYPE *cex, DTYPE *cey, DTYPE *cez', 'ce', ', ce', \
                    'ce = LOAD(cex+idx);', 'ce = LOAD(cey+idx);', 'ce = LOAD(cez+idx);']
        if ch_on:
            values[9:15] = [ \
                    ', DTYPE *chx, DTYPE *chy, DTYPE *chz', 'ch', ', ch', \
                    'ch = LOAD(chx+idx);', 'ch = LOAD(chy+idx);', 'ch = LOAD(chz+idx);']

        ksrc = common.replace_template_code( \
                open(common_cpu.src_path + 'core.c').read(), macros, values)
        program = common_cpu.build_clib(ksrc)

        carg = np.ctypeslib.ndpointer(dtype, ndim=3, \
                shape=tuple(ns_pitch), flags='C_CONTIGUOUS, ALIGNED')
        argtypes = [c_int, c_int, c_int, c_int, c_int] + \
                [carg for i in xrange(6)]
        program.update_e.argtypes = argtypes
        program.update_e.restype = None
        program.update_h.argtypes = argtypes
        program.update_h.restype = None

        # arguments
        nyz_pitch = ny * nz_pitch
        e_args = ns_pitch + [0, nx*nyz_pitch] + ehs
        h_args = ns_pitch + [0, nx*nyz_pitch] + ehs
        if ce_on:
            program.update_e.argtypes += [carg for i in xrange(3)]
            e_args += ces
        if ch_on:
            program.update_h.argtypes += [carg for i in xrange(3)]
            h_args += chs

        set_args = lambda args, idx0, nmax: args[:3] + [idx0, nmax] + args[5:]
        e_args_dict = { \
                '': e_args, \
                'pre': set_args(e_args, nyz_pitch, 3*nyz_pitch), \
                'post': set_args(e_args, 0, nyz_pitch) }

        h_args_dict = { \
                '': h_args, \
                'pre': set_args(h_args, 0, 2*nyz_pitch), \
                'post': set_args(h_args, 2*nyz_pitch, 3*nyz_pitch) }

        # global variables
        self.mainf = fields
        self.program = program
        self.e_args_dict = e_args_dict
        self.h_args_dict = h_args_dict

        # append to the update list
        self.priority_type = 'core'
        fields.append_instance(self)

示例#3

显示文件

    def __init__(self, nx, ny, nz, \
            coeff_use='e', \
            precision_float='single', \
            use_cpu_core=0):
        """
        """

        common.check_type('nx', nx, int)
        common.check_type('ny', ny, int)
        common.check_type('nz', nz, int)
        common.check_value('coeff_use', coeff_use, ('', 'e', 'h', 'eh'))
        common.check_value('precision_float', precision_float,
                           ('single', 'double'))

        self.nx = nx
        self.ny = ny
        self.nz = nz
        self.coeff_use = coeff_use
        self.dtype = {
            'single': np.float32,
            'double': np.float64
        }[precision_float]
        self.dtype_str_list = { \
                'single':['float', 'xmmintrin.h', 'ps', '__m128', '4', '0, 1, 1, 1'], \
                'double':['double', 'emmintrin.h', 'pd', '__m128d', '2', '0, 1'] }[precision_float]

        self.device_type = 'cpu'

        # padding for the nz which is multiple of 4 (float32) or 2 (float64)
        a_size = {'single': 4, 'double': 2}[precision_float]  # 16 Bytes
        self.pad = pad = int(np.ceil(float(nz) / a_size) * a_size) - nz
        self.slz = slice(None, None) if pad == 0 else slice(None, -pad)
        self.nz_pitch = nz_pitch = nz + pad

        mask_arr = np.ones(a_size, 'i')
        mask_arr[-(pad + 1):] = 0
        self.dtype_str_list.append(str(list(mask_arr)).strip('[]'))

        # ns, qtask, enqueue
        self.ns = [nx, ny, nz]
        self.ns_pitch = [nx, ny, nz_pitch]
        self.qtask = QueueTask()
        self.enqueue = self.qtask.enqueue
        self.enqueue_barrier = self.qtask.enqueue_barrier

        # on/off the coefficient arrays
        self.ce_on = True if 'e' in self.coeff_use else False
        self.ch_on = True if 'h' in self.coeff_use else False

        # allocations
        self.ehs = [
            np.zeros(self.ns_pitch, dtype=self.dtype) for i in range(6)
        ]
        self.ex, self.ey, self.ez, self.hx, self.hy, self.hz = self.ehs

        if self.ce_on:
            self.ces = [
                np.ones(self.ns_pitch, dtype=self.dtype) * 0.5
                for i in range(3)
            ]
            self.cex, self.cey, self.cez = self.ces

        if self.ch_on:
            self.chs = [
                np.ones(self.ns_pitch, dtype=self.dtype) * 0.5
                for i in range(3)
            ]
            self.chx, self.chy, self.chz = self.chs

        # program
        macros = [ \
                'ARGS_CE', 'INIT_CE', 'PRIVATE_CE', 'CEX', 'CEY', 'CEZ', \
                'ARGS_CH', 'INIT_CH', 'PRIVATE_CH', 'CHX', 'CHY', 'CHZ', \
                'OMP_SET_NUM_THREADS', \
                'DTYPE', 'MM_HEADER', 'PSD', 'TYPE128', 'INCRE', 'MASK_H', 'MASK_E']

        values = [ \
                '', 'ce=SET1(0.5)', '', '', '', '', \
                '', 'ch=SET1(0.5)', '', '', '', '', \
                ''] + self.dtype_str_list

        if use_cpu_core != 0:
            values[12] = 'omp_set_num_threads(%d);' % use_cpu_core

        if self.ce_on:
            values[:6] = [ \
                    ', DTYPE *cex, DTYPE *cey, DTYPE *cez', 'ce', ', ce', \
                    'ce = LOAD(cex+idx);', 'ce = LOAD(cey+idx);', 'ce = LOAD(cez+idx);']
        if self.ch_on:
            values[6:12] = [ \
                    ', DTYPE *chx, DTYPE *chy, DTYPE *chz', 'ch', ', ch', \
                    'ch = LOAD(chx+idx);', 'ch = LOAD(chy+idx);', 'ch = LOAD(chz+idx);']

        ksrc = common.replace_template_code( \
                open(common_cpu.src_path + 'core.c').read(), macros, values)
        self.program = common_cpu.build_clib(ksrc)

        carg = np.ctypeslib.ndpointer(dtype=self.dtype, ndim=3, \
                shape=(nx, ny, nz_pitch), flags='C_CONTIGUOUS, ALIGNED')
        argtypes = [c_int, c_int, c_int, c_int, c_int] + \
                [carg for i in xrange(6)]
        self.program.update_e.argtypes = argtypes
        self.program.update_e.restype = None
        self.program.update_h.argtypes = argtypes
        self.program.update_h.restype = None

        self.e_args = self.ns_pitch + [0, nx * ny * nz_pitch] + self.ehs
        self.h_args = self.ns_pitch + [0, nx * ny * nz_pitch] + self.ehs

        if self.ce_on:
            self.program.update_e.argtypes += [carg for i in xrange(3)]
            self.e_args += self.ces

        if self.ch_on:
            self.program.update_h.argtypes += [carg for i in xrange(3)]
            self.h_args += self.chs

示例#4

显示文件

    def __init__(self, fields, directions, npml=10, sigma_max=4, kappa_max=1, alpha_max=0, m_sigma=3, m_alpha=1):
        """
        """

        common.check_type('fields', fields, Fields)
        common.check_type('directions', directions, (list, tuple), str)
        common.check_type('npml', npml, int)
        common.check_type('sigma_max', sigma_max, (int, float))
        common.check_type('kappa_max', kappa_max, (int, float))
        common.check_type('alpha_max', alpha_max, (int, float))
        common.check_type('m_sigma', m_sigma, (int, float))
        common.check_type('m_alpha', m_alpha, (int, float))

        assert len(directions) == 3
        for axis in directions:
            assert axis in ['+', '-', '+-', '']

        # local variables
        dt = fields.dt
        nx, ny, nz = fields.ns
        dtype = fields.dtype

        # psi allocations
        psi_shapes = {'x': (npml, ny, nz), 'y': (nx, npml, nz), 'z': (nx, ny, npml)}
        psis = {}
        for xyz, pms in zip(['x', 'y', 'z'], directions):
            psis[xyz] = {}
            for pm in pms:
                psis[xyz][pm] = {}
                for eh in ['E', 'H']:
                    psis[xyz][pm][eh] = [np.zeros(psi_shapes[xyz], dtype) for i in range(2)]

        # coefficient allocations
        sigma = lambda i: sigma_max * (i / npml) ** m_sigma
        kappa = lambda i: 1 + (kappa_max - 1) * (i / npml) ** m_sigma
        alpha = lambda i: alpha_max * ((npml - i) / npml) ** m_alpha
        pcb = lambda i: np.exp(-(sigma(i) / kappa(i) + alpha(i)) * dt)
        pca = lambda i: sigma(i) / (sigma(i) + alpha(i) * kappa(i)) * (pcb(i) - 1)
        
        i_half = np.arange(0.5, npml, 1, dtype)
        i_full = np.arange(1, npml+1, 1, dtype)
        iis = {'+': {'E': i_half, 'H': i_full}, \
               '-': {'E': i_full[::-1], 'H': i_half[::-1]}} 
        
        pcs = {}
        for pm in ['+', '-']:
            pcs[pm] = {}
            for eh in ['E', 'H']:
                pcs[pm][eh] = [pcb(iis[pm][eh]), pca(iis[pm][eh])]
        
        # modify reciprocal ds
        if kappa_max != 1:
            sls = {'+': {'E': slice(-npml-1, -1), 'H': slice(-npml, None)}, \
                   '-': {'E': slice(None, npml), 'H': slice(1, npml+1)}} 
            for pms, erd, hrd in zip(directions, *fields.get_rds()):
                for pm in pms:
                    erd[sls[pm]['E']] /= kappa(iis[pm]['E'])
                    hrd[sls[pm]['H']] /= kappa(iis[pm]['H'])
        
        # program
        idx_macros = ['NMAX', 'IDX_PC', 'IDX_F1', 'IDX_F2', 'IDX_F3']
        nmax = {'x': 'npml*ny*nz', 'y': 'nx*npml*nz', 'z': 'nx*ny*npml'}
        idx_pc = {'x': 'idx/(ny*nz)', 'y': '(idx/nz)%npml', 'z': 'idx%npml'}
        if0 = {'x': 'idx', 'y': 'idx+(idx/(npml*nz))*(ny-npml)*nz', 'z': 'idx+(idx/npml)*(nz-npml)'}
        idx_f1 = {'E': {'x': {'+': if0['x']+'+(nx-npml-1)*ny*nz', '-': if0['x']}, \
                        'y': {'+': if0['y']+'+(ny-npml-1)*nz', '-': if0['y']}, \
                        'z': {'+': if0['z']+'+(nz-npml-1)', '-': if0['z']}}, \
                  'H': {'x': {'+': if0['x']+'+(nx-npml)*ny*nz', '-': if0['x']+'+ny*nz'}, \
                        'y': {'+': if0['y']+'+(ny-npml)*nz', '-': if0['y']+'+nz'}, \
                        'z': {'+': if0['z']+'+(nz-npml)', '-': if0['z']+'+1'}}}
        idx_f2 = {'E': {'x': 'if1+ny*nz', 'y': 'if1+nz', 'z': 'if1+1'}, \
                  'H': {'x': 'if1', 'y': 'if1', 'z': 'if1'}}
        idx_f3 = {'E': {'x': 'if1', 'y': 'if1', 'z': 'if1'}, \
                  'H': {'x': 'if1-ny*nz', 'y': 'if1-nz', 'z': 'if1-1'}}

        cf_macros = ['COEFF ', 'PARSE_ARGS_C', 'PYARRAYOBJECT_C', 'CF1', 'CF2']
        cf_values = {True: ['', 'iOOOOOOOOOO', ', &C1, &C2', 'c1[if1]', 'c2[if1]'], \
                     False: ['// ', 'iOOOOOOOO', '', '0.5', '0.5']}

        macros = fields.dtype_omp_macros + idx_macros + cf_macros
        programs = {}
        for xyz, pms in zip(['x', 'y', 'z'], directions):
            programs[xyz] = {}
            for pm in pms:
                programs[xyz][pm] = {}
                for eh in ['E', 'H']:
                    values = fields.dtype_omp_values + \
                        [nmax[xyz], idx_pc[xyz], idx_f1[eh][xyz][pm], idx_f2[eh][xyz], idx_f3[eh][xyz]]
                    if (eh == 'E' and fields.ce_on) or (eh == 'H' and fields.ch_on):
                        values += cf_values[True]
                    else:
                        values += cf_values[False]
                    ksrc = common.replace_template_code(open(common_cpu.src_path + 'cpml.c').read(), macros, values)
                    programs[xyz][pm][eh] = common_cpu.build_clib(ksrc, 'cpml')

        # arguments
        ex, ey, ez, hx, hy, hz = fields.ehs
        fs = {'x': {'E': [ey, ez, hz, hy], 'H': [hz, hy, ey, ez]}, \
                  'y': {'E': [ez, ex, hx, hz], 'H': [hx, hz, ez, ex]}, \
                  'z': {'E': [ex, ey, hy, hx], 'H': [hy, hx, ex, ey]}}
        
        cfs = {'x': {'E': [], 'H': []}, \
                   'y': {'E': [], 'H': []}, \
                   'z': {'E': [], 'H': []}}
        if fields.ce_on:
            cex, cey, cez = fields.ces
            cfs['x']['E'] = [cey, cez]
            cfs['y']['E'] = [cez, cex]
            cfs['z']['E'] = [cex, cey]
        if fields.ch_on:
            chx, chy, chz = fields.chs
            cfs['x']['H'] = [chz, chy]
            cfs['y']['H'] = [chx, chz]
            cfs['z']['H'] = [chy, chx]
        
        arguments = {}
        for xyz, pms in zip(['x', 'y', 'z'], directions):
            arguments[xyz] = {}
            for pm in pms:
                arguments[xyz][pm] = {}
                for eh in ['E', 'H']:
                    arguments[xyz][pm][eh] = [npml] + fs[xyz][eh] + psis[xyz][pm][eh] + pcs[pm][eh] + cfs[xyz][eh]

        # global variables
        self.mainf = fields
        self.directions = directions
        self.programs = programs
        self.arguments = arguments

        # append to the update list
        self.priority_type = 'pml'
        fields.append_instance(self)

示例#5

显示文件

    def __init__(self, fields):
        """
        """

        common.check_type('fields', fields, Fields)

        # local variables
        precision_float = fields.precision_float
        use_cpu_core = fields.use_cpu_core
        dtype = fields.dtype

        nx, ny, nz_pitch = ns_pitch = fields.ns_pitch
        align_size = fields.align_size
        pad = fields.pad

        ce_on = fields.ce_on
        ch_on = fields.ch_on

        ehs = fields.ehs
        if ce_on:
            ces = fields.ces
        if ch_on:
            chs = fields.chs

        # pad_str_list
        pad_str_list = []
        pad_str_append = lambda mask: pad_str_list.append(
            str(list(mask)).strip('[]'))
        mask0 = np.ones(align_size, 'i')

        mask_h = mask0.copy()
        mask_h[0] = 0
        pad_str_append(mask_h)

        mask_exy = mask0.copy()
        mask_exy[-(pad + 1):] = 0
        pad_str_append(mask_exy)

        mask = mask0.copy()
        if pad != 0:
            mask[-pad:] = 0
        pad_str_append(mask)

        # program
        dtype_str_list = { \
                'single':['float', 'xmmintrin.h', 'ps', '__m128', '4'], \
                'double':['double', 'emmintrin.h', 'pd', '__m128d', '2'] }[precision_float]

        macros = [ \
                'OMP_HEADER', 'OMP_FOR_E', 'OMP_FOR_H', \
                'ARGS_CE', 'INIT_CE', 'PRIVATE_CE', 'CEX', 'CEY', 'CEZ', \
                'ARGS_CH', 'INIT_CH', 'PRIVATE_CH', 'CHX', 'CHY', 'CHZ', \
                'DTYPE', 'MM_HEADER', 'PSD', 'TYPE128', 'INCRE', \
                'MASK_H', 'MASK_EXY', 'MASK']

        values = ['', '', '', \
                '', 'ce=SET1(0.5)', '', '', '', '', \
                '', 'ch=SET1(0.5)', '', '', '', '', \
                ] + dtype_str_list + pad_str_list

        if use_cpu_core != 1:
            values[0] = '#include <omp.h>'

            omp_str = '' if use_cpu_core == 0 else 'omp_set_num_threads(%d);\n\t' % use_cpu_core
            values[
                1] = omp_str + '#pragma omp parallel for private(idx, i, j, k, hx0, hy0, hz0, h1, h2, e PRIVATE_CE)'
            values[
                2] = omp_str + '#pragma omp parallel for private(idx, i, j, k, ex0, ey0, ez0, e1, e2, h PRIVATE_CH)'

        if ce_on:
            values[3:9] = [ \
                    ', DTYPE *cex, DTYPE *cey, DTYPE *cez', 'ce', ', ce', \
                    'ce = LOAD(cex+idx);', 'ce = LOAD(cey+idx);', 'ce = LOAD(cez+idx);']
        if ch_on:
            values[9:15] = [ \
                    ', DTYPE *chx, DTYPE *chy, DTYPE *chz', 'ch', ', ch', \
                    'ch = LOAD(chx+idx);', 'ch = LOAD(chy+idx);', 'ch = LOAD(chz+idx);']

        ksrc = common.replace_template_code( \
                open(common_cpu.src_path + 'core.c').read(), macros, values)
        program = common_cpu.build_clib(ksrc)

        carg = np.ctypeslib.ndpointer(dtype, ndim=3, \
                shape=tuple(ns_pitch), flags='C_CONTIGUOUS, ALIGNED')
        argtypes = [c_int, c_int, c_int, c_int, c_int] + \
                [carg for i in xrange(6)]
        program.update_e.argtypes = argtypes
        program.update_e.restype = None
        program.update_h.argtypes = argtypes
        program.update_h.restype = None

        # arguments
        nyz_pitch = ny * nz_pitch
        e_args = ns_pitch + [0, nx * nyz_pitch] + ehs
        h_args = ns_pitch + [0, nx * nyz_pitch] + ehs
        if ce_on:
            program.update_e.argtypes += [carg for i in xrange(3)]
            e_args += ces
        if ch_on:
            program.update_h.argtypes += [carg for i in xrange(3)]
            h_args += chs

        set_args = lambda args, idx0, nmax: args[:3] + [idx0, nmax] + args[5:]
        e_args_dict = { \
                '': e_args, \
                'pre': set_args(e_args, nyz_pitch, 3*nyz_pitch), \
                'post': set_args(e_args, 0, nyz_pitch) }

        h_args_dict = { \
                '': h_args, \
                'pre': set_args(h_args, 0, 2*nyz_pitch), \
                'post': set_args(h_args, 2*nyz_pitch, 3*nyz_pitch) }

        # global variables
        self.mainf = fields
        self.program = program
        self.e_args_dict = e_args_dict
        self.h_args_dict = h_args_dict

        # append to the update list
        self.priority_type = 'core'
        fields.append_instance(self)

示例#6

显示文件

文件： fields.py 项目： wbkifun/fdtd_accelerate

    def __init__(self, nx, ny, nz, \
            coeff_use='e', \
            precision_float='single', \
            use_cpu_core=0):
        """
        """

        common.check_type('nx', nx, int)
        common.check_type('ny', ny, int)
        common.check_type('nz', nz, int)
        common.check_value('coeff_use', coeff_use, ('', 'e', 'h', 'eh'))
        common.check_value('precision_float', precision_float, ('single', 'double'))

        self.nx = nx
        self.ny = ny
        self.nz = nz
        self.coeff_use=coeff_use
        self.dtype = {'single':np.float32, 'double':np.float64}[precision_float]
        self.dtype_str_list = { \
                'single':['float', 'xmmintrin.h', 'ps', '__m128', '4', '0, 1, 1, 1'], \
                'double':['double', 'emmintrin.h', 'pd', '__m128d', '2', '0, 1'] }[precision_float]

        self.device_type = 'cpu'

        # padding for the nz which is multiple of 4 (float32) or 2 (float64)
        a_size = {'single':4, 'double':2}[precision_float]   # 16 Bytes
        self.pad = pad = int(np.ceil(float(nz) / a_size) * a_size) - nz
        self.slz = slice(None, None) if pad == 0 else slice(None, -pad)
        self.nz_pitch = nz_pitch = nz + pad

        mask_arr = np.ones(a_size, 'i')
        mask_arr[-(pad+1):] = 0
        self.dtype_str_list.append( str(list(mask_arr)).strip('[]') )

        # ns, qtask, enqueue
        self.ns = [nx, ny, nz]
        self.ns_pitch = [nx, ny, nz_pitch]
        self.qtask = QueueTask()
        self.enqueue = self.qtask.enqueue
        self.enqueue_barrier = self.qtask.enqueue_barrier

        # on/off the coefficient arrays
        self.ce_on = True if 'e' in self.coeff_use else False
        self.ch_on = True if 'h' in self.coeff_use else False

        # allocations
        self.ehs = [np.zeros(self.ns_pitch, dtype=self.dtype) for i in range(6)]
        self.ex, self.ey, self.ez, self.hx, self.hy, self.hz = self.ehs

        if self.ce_on:
            self.ces = [np.ones(self.ns_pitch, dtype=self.dtype)*0.5 for i in range(3)]
            self.cex, self.cey, self.cez = self.ces 

        if self.ch_on:
            self.chs = [np.ones(self.ns_pitch, dtype=self.dtype)*0.5 for i in range(3)]
            self.chx, self.chy, self.chz = self.chs

        # program
        macros = [ \
                'ARGS_CE', 'INIT_CE', 'PRIVATE_CE', 'CEX', 'CEY', 'CEZ', \
                'ARGS_CH', 'INIT_CH', 'PRIVATE_CH', 'CHX', 'CHY', 'CHZ', \
                'OMP_SET_NUM_THREADS', \
                'DTYPE', 'MM_HEADER', 'PSD', 'TYPE128', 'INCRE', 'MASK_H', 'MASK_E']

        values = [ \
                '', 'ce=SET1(0.5)', '', '', '', '', \
                '', 'ch=SET1(0.5)', '', '', '', '', \
                ''] + self.dtype_str_list

        if use_cpu_core != 0:
            values[12] = 'omp_set_num_threads(%d);' % use_cpu_core

        if self.ce_on:
            values[:6] = [ \
                    ', DTYPE *cex, DTYPE *cey, DTYPE *cez', 'ce', ', ce', \
                    'ce = LOAD(cex+idx);', 'ce = LOAD(cey+idx);', 'ce = LOAD(cez+idx);']
        if self.ch_on:
            values[6:12] = [ \
                    ', DTYPE *chx, DTYPE *chy, DTYPE *chz', 'ch', ', ch', \
                    'ch = LOAD(chx+idx);', 'ch = LOAD(chy+idx);', 'ch = LOAD(chz+idx);']

        ksrc = common.replace_template_code( \
                open(common_cpu.src_path + 'core.c').read(), macros, values)
        self.program = common_cpu.build_clib(ksrc)

        carg = np.ctypeslib.ndpointer(dtype=self.dtype, ndim=3, \
                shape=(nx, ny, nz_pitch), flags='C_CONTIGUOUS, ALIGNED')
        argtypes = [c_int, c_int, c_int, c_int, c_int] + \
                [carg for i in xrange(6)]
        self.program.update_e.argtypes = argtypes
        self.program.update_e.restype = None
        self.program.update_h.argtypes = argtypes
        self.program.update_h.restype = None

        self.e_args = self.ns_pitch + [0, nx*ny*nz_pitch] + self.ehs
        self.h_args = self.ns_pitch + [0, nx*ny*nz_pitch] + self.ehs

        if self.ce_on:
            self.program.update_e.argtypes += [carg for i in xrange(3)]
            self.e_args += self.ces

        if self.ch_on:
            self.program.update_h.argtypes += [carg for i in xrange(3)]
            self.h_args += self.chs

示例#7

显示文件

文件： core.py 项目： xj361685640/fdtd_accelerate

    def __init__(self, fields):
        """
        """

        common.check_type('fields', fields, Fields)

        # local variables
        precision_float = fields.precision_float
        use_cpu_core = fields.use_cpu_core
        dtype = fields.dtype

        nx, ny, nz_pitch = ns_pitch = fields.ns_pitch
        align_size = fields.align_size
        pad = fields.pad

        ce_on = fields.ce_on
        ch_on = fields.ch_on

        ehs = fields.ehs
        if ce_on:
            ces = fields.ces
        if ch_on:
            chs = fields.chs

        # program
        dtype_str_list = { \
                'single':['float', 'xmmintrin.h', 'ps', '__m128', '4'], \
                'double':['double', 'emmintrin.h', 'pd', '__m128d', '2'] }[precision_float]
        pad_str_list = []
        pad_str_append = lambda mask: pad_str_list.append( str(list(mask)).strip('[]') )
        mask0 = np.ones(align_size, 'i')

        mask_h = mask0.copy()
        mask_h[0] = 0
        pad_str_append(mask_h)

        mask_exy = mask0.copy()
        mask_exy[-(pad+1):] = 0
        pad_str_append(mask_exy)

        mask = mask0.copy()
        if pad != 0:
            mask[-pad:] = 0
        pad_str_append(mask)

        macros = [ \
                'ARGS_CE', 'INIT_CE', 'PRIVATE_CE', 'CEX', 'CEY', 'CEZ', \
                'ARGS_CH', 'INIT_CH', 'PRIVATE_CH', 'CHX', 'CHY', 'CHZ', \
                'OMP_SET_NUM_THREADS', \
                'DTYPE', 'MM_HEADER', 'PSD', 'TYPE128', 'INCRE', \
                'MASK_H', 'MASK_EXY', 'MASK']

        values = [ \
                '', 'ce=SET1(0.5)', '', '', '', '', \
                '', 'ch=SET1(0.5)', '', '', '', '', \
                ''] + dtype_str_list + pad_str_list

        if use_cpu_core != 0:
            values[12] = 'omp_set_num_threads(%d);' % use_cpu_core

        if ce_on:
            values[:6] = [ \
                    ', DTYPE *cex, DTYPE *cey, DTYPE *cez', 'ce', ', ce', \
                    'ce = LOAD(cex+idx);', 'ce = LOAD(cey+idx);', 'ce = LOAD(cez+idx);']
        if ch_on:
            values[6:12] = [ \
                    ', DTYPE *chx, DTYPE *chy, DTYPE *chz', 'ch', ', ch', \
                    'ch = LOAD(chx+idx);', 'ch = LOAD(chy+idx);', 'ch = LOAD(chz+idx);']

        ksrc = common.replace_template_code( \
                open(common_cpu.src_path + 'core.c').read(), macros, values)
        program = common_cpu.build_clib(ksrc)

        carg = np.ctypeslib.ndpointer(dtype, ndim=3, \
                shape=tuple(ns_pitch), flags='C_CONTIGUOUS, ALIGNED')
        argtypes = [c_int, c_int, c_int, c_int, c_int] + \
                [carg for i in xrange(6)]
        program.update_e.argtypes = argtypes
        program.update_e.restype = None
        program.update_h.argtypes = argtypes
        program.update_h.restype = None

        # arguments
        nyz_pitch = ny * nz_pitch
        e_args = ns_pitch + [0, nx*nyz_pitch] + ehs
        h_args = ns_pitch + [0, nx*nyz_pitch] + ehs
        if ce_on:
            program.update_e.argtypes += [carg for i in xrange(3)]
            e_args += ces
        if ch_on:
            program.update_h.argtypes += [carg for i in xrange(3)]
            h_args += chs

        pre_e_args = e_args[:]
        pre_e_args[3:5] = [(nx-2)*nyz_pitch, nx*nyz_pitch]
        mid_e_args = e_args[:]
        mid_e_args[3:5] = [nyz_pitch, (nx-2)*nyz_pitch]
        post_e_args = e_args[:]
        post_e_args[3:5] = [0, nyz_pitch]

        pre_h_args = h_args[:]
        pre_h_args[3:5] = [0, 2*nyz_pitch]
        mid_h_args = h_args[:]
        mid_h_args[3:5] = [2*nyz_pitch, (nx-1)*nyz_pitch]
        post_h_args = h_args[:]
        post_h_args[3:5] = [(nx-1)*nyz_pitch, nx*nyz_pitch]

        # global variables
        self.mainf = fields
        self.e_args = e_args
        self.h_args = h_args
        self.program = program

        self.e_args_dict = {'':e_args, \
                'pre':pre_e_args, 'mid':mid_e_args, 'post':post_e_args}
        self.h_args_dict = {'':h_args, \
                'pre':pre_h_args, 'mid':mid_h_args, 'post':post_h_args}

        # append to the update list
        self.priority_type = 'core'
        fields.append_instance(self)