def __init__(self, fields): """ """ common.check_type('fields', fields, Fields) # local variables dtype = fields.dtype nx, ny, nz = ns = fields.ns ce_on = fields.ce_on ch_on = fields.ch_on rd_on = fields.rd_on # program cf_values = {True: ['', 'OOOOOOOOO', ', &Cx, &Cy, &Cz', 'cx[idx]', 'cy[idx]', 'cz[idx]'], \ False: ['// ', 'OOOOOO', '', '0.5', '0.5', '0.5']} ce_macros = [ 'COEFF_E ', 'PARSE_ARGS_CE', 'PYARRAYOBJECT_CE', 'CEX', 'CEY', 'CEZ' ] ce_values = cf_values[ce_on] ch_macros = [ 'COEFF_H ', 'PARSE_ARGS_CH', 'PYARRAYOBJECT_CH', 'CHX', 'CHY', 'CHZ' ] ch_values = cf_values[ch_on] macros = fields.dtype_omp_macros + ce_macros + ch_macros values = fields.dtype_omp_values + ce_values + ch_values ksrc = common.replace_template_code( \ open(common_cpu.src_path + 'core.c').read(), macros, values) program = common_cpu.build_clib(ksrc, 'core') # arguments e_args = fields.ehs h_args = fields.ehs if ce_on: e_args += fields.ces if ch_on: h_args += fields.chs # global variables self.mainf = fields self.program = program self.e_args = e_args self.h_args = h_args # append to the update list self.priority_type = 'core' fields.append_instance(self)
def __init__(self, fields): """ """ common.check_type('fields', fields, Fields) # local variables precision_float = fields.precision_float use_cpu_core = fields.use_cpu_core dtype = fields.dtype nx, ny, nz_pitch = ns_pitch = fields.ns_pitch align_size = fields.align_size pad = fields.pad ce_on = fields.ce_on ch_on = fields.ch_on ehs = fields.ehs if ce_on: ces = fields.ces if ch_on: chs = fields.chs # pad_str_list pad_str_list = [] pad_str_append = lambda mask: pad_str_list.append( str(list(mask)).strip('[]') ) mask0 = np.ones(align_size, 'i') mask_h = mask0.copy() mask_h[0] = 0 pad_str_append(mask_h) mask_exy = mask0.copy() mask_exy[-(pad+1):] = 0 pad_str_append(mask_exy) mask = mask0.copy() if pad != 0: mask[-pad:] = 0 pad_str_append(mask) # program dtype_str_list = { \ 'single':['float', 'xmmintrin.h', 'ps', '__m128', '4'], \ 'double':['double', 'emmintrin.h', 'pd', '__m128d', '2'] }[precision_float] macros = [ \ 'OMP_HEADER', 'OMP_FOR_E', 'OMP_FOR_H', \ 'ARGS_CE', 'INIT_CE', 'PRIVATE_CE', 'CEX', 'CEY', 'CEZ', \ 'ARGS_CH', 'INIT_CH', 'PRIVATE_CH', 'CHX', 'CHY', 'CHZ', \ 'DTYPE', 'MM_HEADER', 'PSD', 'TYPE128', 'INCRE', \ 'MASK_H', 'MASK_EXY', 'MASK'] values = ['', '', '', \ '', 'ce=SET1(0.5)', '', '', '', '', \ '', 'ch=SET1(0.5)', '', '', '', '', \ ] + dtype_str_list + pad_str_list if use_cpu_core != 1: values[0] = '#include <omp.h>' omp_str = '' if use_cpu_core == 0 else 'omp_set_num_threads(%d);\n\t' % use_cpu_core values[1] = omp_str + '#pragma omp parallel for private(idx, i, j, k, hx0, hy0, hz0, h1, h2, e PRIVATE_CE)' values[2] = omp_str + '#pragma omp parallel for private(idx, i, j, k, ex0, ey0, ez0, e1, e2, h PRIVATE_CH)' if ce_on: values[3:9] = [ \ ', DTYPE *cex, DTYPE *cey, DTYPE *cez', 'ce', ', ce', \ 'ce = LOAD(cex+idx);', 'ce = LOAD(cey+idx);', 'ce = LOAD(cez+idx);'] if ch_on: values[9:15] = [ \ ', DTYPE *chx, DTYPE *chy, DTYPE *chz', 'ch', ', ch', \ 'ch = LOAD(chx+idx);', 'ch = LOAD(chy+idx);', 'ch = LOAD(chz+idx);'] ksrc = common.replace_template_code( \ open(common_cpu.src_path + 'core.c').read(), macros, values) program = common_cpu.build_clib(ksrc) carg = np.ctypeslib.ndpointer(dtype, ndim=3, \ shape=tuple(ns_pitch), flags='C_CONTIGUOUS, ALIGNED') argtypes = [c_int, c_int, c_int, c_int, c_int] + \ [carg for i in xrange(6)] program.update_e.argtypes = argtypes program.update_e.restype = None program.update_h.argtypes = argtypes program.update_h.restype = None # arguments nyz_pitch = ny * nz_pitch e_args = ns_pitch + [0, nx*nyz_pitch] + ehs h_args = ns_pitch + [0, nx*nyz_pitch] + ehs if ce_on: program.update_e.argtypes += [carg for i in xrange(3)] e_args += ces if ch_on: program.update_h.argtypes += [carg for i in xrange(3)] h_args += chs set_args = lambda args, idx0, nmax: args[:3] + [idx0, nmax] + args[5:] e_args_dict = { \ '': e_args, \ 'pre': set_args(e_args, nyz_pitch, 3*nyz_pitch), \ 'post': set_args(e_args, 0, nyz_pitch) } h_args_dict = { \ '': h_args, \ 'pre': set_args(h_args, 0, 2*nyz_pitch), \ 'post': set_args(h_args, 2*nyz_pitch, 3*nyz_pitch) } # global variables self.mainf = fields self.program = program self.e_args_dict = e_args_dict self.h_args_dict = h_args_dict # append to the update list self.priority_type = 'core' fields.append_instance(self)
def __init__(self, nx, ny, nz, \ coeff_use='e', \ precision_float='single', \ use_cpu_core=0): """ """ common.check_type('nx', nx, int) common.check_type('ny', ny, int) common.check_type('nz', nz, int) common.check_value('coeff_use', coeff_use, ('', 'e', 'h', 'eh')) common.check_value('precision_float', precision_float, ('single', 'double')) self.nx = nx self.ny = ny self.nz = nz self.coeff_use = coeff_use self.dtype = { 'single': np.float32, 'double': np.float64 }[precision_float] self.dtype_str_list = { \ 'single':['float', 'xmmintrin.h', 'ps', '__m128', '4', '0, 1, 1, 1'], \ 'double':['double', 'emmintrin.h', 'pd', '__m128d', '2', '0, 1'] }[precision_float] self.device_type = 'cpu' # padding for the nz which is multiple of 4 (float32) or 2 (float64) a_size = {'single': 4, 'double': 2}[precision_float] # 16 Bytes self.pad = pad = int(np.ceil(float(nz) / a_size) * a_size) - nz self.slz = slice(None, None) if pad == 0 else slice(None, -pad) self.nz_pitch = nz_pitch = nz + pad mask_arr = np.ones(a_size, 'i') mask_arr[-(pad + 1):] = 0 self.dtype_str_list.append(str(list(mask_arr)).strip('[]')) # ns, qtask, enqueue self.ns = [nx, ny, nz] self.ns_pitch = [nx, ny, nz_pitch] self.qtask = QueueTask() self.enqueue = self.qtask.enqueue self.enqueue_barrier = self.qtask.enqueue_barrier # on/off the coefficient arrays self.ce_on = True if 'e' in self.coeff_use else False self.ch_on = True if 'h' in self.coeff_use else False # allocations self.ehs = [ np.zeros(self.ns_pitch, dtype=self.dtype) for i in range(6) ] self.ex, self.ey, self.ez, self.hx, self.hy, self.hz = self.ehs if self.ce_on: self.ces = [ np.ones(self.ns_pitch, dtype=self.dtype) * 0.5 for i in range(3) ] self.cex, self.cey, self.cez = self.ces if self.ch_on: self.chs = [ np.ones(self.ns_pitch, dtype=self.dtype) * 0.5 for i in range(3) ] self.chx, self.chy, self.chz = self.chs # program macros = [ \ 'ARGS_CE', 'INIT_CE', 'PRIVATE_CE', 'CEX', 'CEY', 'CEZ', \ 'ARGS_CH', 'INIT_CH', 'PRIVATE_CH', 'CHX', 'CHY', 'CHZ', \ 'OMP_SET_NUM_THREADS', \ 'DTYPE', 'MM_HEADER', 'PSD', 'TYPE128', 'INCRE', 'MASK_H', 'MASK_E'] values = [ \ '', 'ce=SET1(0.5)', '', '', '', '', \ '', 'ch=SET1(0.5)', '', '', '', '', \ ''] + self.dtype_str_list if use_cpu_core != 0: values[12] = 'omp_set_num_threads(%d);' % use_cpu_core if self.ce_on: values[:6] = [ \ ', DTYPE *cex, DTYPE *cey, DTYPE *cez', 'ce', ', ce', \ 'ce = LOAD(cex+idx);', 'ce = LOAD(cey+idx);', 'ce = LOAD(cez+idx);'] if self.ch_on: values[6:12] = [ \ ', DTYPE *chx, DTYPE *chy, DTYPE *chz', 'ch', ', ch', \ 'ch = LOAD(chx+idx);', 'ch = LOAD(chy+idx);', 'ch = LOAD(chz+idx);'] ksrc = common.replace_template_code( \ open(common_cpu.src_path + 'core.c').read(), macros, values) self.program = common_cpu.build_clib(ksrc) carg = np.ctypeslib.ndpointer(dtype=self.dtype, ndim=3, \ shape=(nx, ny, nz_pitch), flags='C_CONTIGUOUS, ALIGNED') argtypes = [c_int, c_int, c_int, c_int, c_int] + \ [carg for i in xrange(6)] self.program.update_e.argtypes = argtypes self.program.update_e.restype = None self.program.update_h.argtypes = argtypes self.program.update_h.restype = None self.e_args = self.ns_pitch + [0, nx * ny * nz_pitch] + self.ehs self.h_args = self.ns_pitch + [0, nx * ny * nz_pitch] + self.ehs if self.ce_on: self.program.update_e.argtypes += [carg for i in xrange(3)] self.e_args += self.ces if self.ch_on: self.program.update_h.argtypes += [carg for i in xrange(3)] self.h_args += self.chs
def __init__(self, fields, directions, npml=10, sigma_max=4, kappa_max=1, alpha_max=0, m_sigma=3, m_alpha=1): """ """ common.check_type('fields', fields, Fields) common.check_type('directions', directions, (list, tuple), str) common.check_type('npml', npml, int) common.check_type('sigma_max', sigma_max, (int, float)) common.check_type('kappa_max', kappa_max, (int, float)) common.check_type('alpha_max', alpha_max, (int, float)) common.check_type('m_sigma', m_sigma, (int, float)) common.check_type('m_alpha', m_alpha, (int, float)) assert len(directions) == 3 for axis in directions: assert axis in ['+', '-', '+-', ''] # local variables dt = fields.dt nx, ny, nz = fields.ns dtype = fields.dtype # psi allocations psi_shapes = {'x': (npml, ny, nz), 'y': (nx, npml, nz), 'z': (nx, ny, npml)} psis = {} for xyz, pms in zip(['x', 'y', 'z'], directions): psis[xyz] = {} for pm in pms: psis[xyz][pm] = {} for eh in ['E', 'H']: psis[xyz][pm][eh] = [np.zeros(psi_shapes[xyz], dtype) for i in range(2)] # coefficient allocations sigma = lambda i: sigma_max * (i / npml) ** m_sigma kappa = lambda i: 1 + (kappa_max - 1) * (i / npml) ** m_sigma alpha = lambda i: alpha_max * ((npml - i) / npml) ** m_alpha pcb = lambda i: np.exp(-(sigma(i) / kappa(i) + alpha(i)) * dt) pca = lambda i: sigma(i) / (sigma(i) + alpha(i) * kappa(i)) * (pcb(i) - 1) i_half = np.arange(0.5, npml, 1, dtype) i_full = np.arange(1, npml+1, 1, dtype) iis = {'+': {'E': i_half, 'H': i_full}, \ '-': {'E': i_full[::-1], 'H': i_half[::-1]}} pcs = {} for pm in ['+', '-']: pcs[pm] = {} for eh in ['E', 'H']: pcs[pm][eh] = [pcb(iis[pm][eh]), pca(iis[pm][eh])] # modify reciprocal ds if kappa_max != 1: sls = {'+': {'E': slice(-npml-1, -1), 'H': slice(-npml, None)}, \ '-': {'E': slice(None, npml), 'H': slice(1, npml+1)}} for pms, erd, hrd in zip(directions, *fields.get_rds()): for pm in pms: erd[sls[pm]['E']] /= kappa(iis[pm]['E']) hrd[sls[pm]['H']] /= kappa(iis[pm]['H']) # program idx_macros = ['NMAX', 'IDX_PC', 'IDX_F1', 'IDX_F2', 'IDX_F3'] nmax = {'x': 'npml*ny*nz', 'y': 'nx*npml*nz', 'z': 'nx*ny*npml'} idx_pc = {'x': 'idx/(ny*nz)', 'y': '(idx/nz)%npml', 'z': 'idx%npml'} if0 = {'x': 'idx', 'y': 'idx+(idx/(npml*nz))*(ny-npml)*nz', 'z': 'idx+(idx/npml)*(nz-npml)'} idx_f1 = {'E': {'x': {'+': if0['x']+'+(nx-npml-1)*ny*nz', '-': if0['x']}, \ 'y': {'+': if0['y']+'+(ny-npml-1)*nz', '-': if0['y']}, \ 'z': {'+': if0['z']+'+(nz-npml-1)', '-': if0['z']}}, \ 'H': {'x': {'+': if0['x']+'+(nx-npml)*ny*nz', '-': if0['x']+'+ny*nz'}, \ 'y': {'+': if0['y']+'+(ny-npml)*nz', '-': if0['y']+'+nz'}, \ 'z': {'+': if0['z']+'+(nz-npml)', '-': if0['z']+'+1'}}} idx_f2 = {'E': {'x': 'if1+ny*nz', 'y': 'if1+nz', 'z': 'if1+1'}, \ 'H': {'x': 'if1', 'y': 'if1', 'z': 'if1'}} idx_f3 = {'E': {'x': 'if1', 'y': 'if1', 'z': 'if1'}, \ 'H': {'x': 'if1-ny*nz', 'y': 'if1-nz', 'z': 'if1-1'}} cf_macros = ['COEFF ', 'PARSE_ARGS_C', 'PYARRAYOBJECT_C', 'CF1', 'CF2'] cf_values = {True: ['', 'iOOOOOOOOOO', ', &C1, &C2', 'c1[if1]', 'c2[if1]'], \ False: ['// ', 'iOOOOOOOO', '', '0.5', '0.5']} macros = fields.dtype_omp_macros + idx_macros + cf_macros programs = {} for xyz, pms in zip(['x', 'y', 'z'], directions): programs[xyz] = {} for pm in pms: programs[xyz][pm] = {} for eh in ['E', 'H']: values = fields.dtype_omp_values + \ [nmax[xyz], idx_pc[xyz], idx_f1[eh][xyz][pm], idx_f2[eh][xyz], idx_f3[eh][xyz]] if (eh == 'E' and fields.ce_on) or (eh == 'H' and fields.ch_on): values += cf_values[True] else: values += cf_values[False] ksrc = common.replace_template_code(open(common_cpu.src_path + 'cpml.c').read(), macros, values) programs[xyz][pm][eh] = common_cpu.build_clib(ksrc, 'cpml') # arguments ex, ey, ez, hx, hy, hz = fields.ehs fs = {'x': {'E': [ey, ez, hz, hy], 'H': [hz, hy, ey, ez]}, \ 'y': {'E': [ez, ex, hx, hz], 'H': [hx, hz, ez, ex]}, \ 'z': {'E': [ex, ey, hy, hx], 'H': [hy, hx, ex, ey]}} cfs = {'x': {'E': [], 'H': []}, \ 'y': {'E': [], 'H': []}, \ 'z': {'E': [], 'H': []}} if fields.ce_on: cex, cey, cez = fields.ces cfs['x']['E'] = [cey, cez] cfs['y']['E'] = [cez, cex] cfs['z']['E'] = [cex, cey] if fields.ch_on: chx, chy, chz = fields.chs cfs['x']['H'] = [chz, chy] cfs['y']['H'] = [chx, chz] cfs['z']['H'] = [chy, chx] arguments = {} for xyz, pms in zip(['x', 'y', 'z'], directions): arguments[xyz] = {} for pm in pms: arguments[xyz][pm] = {} for eh in ['E', 'H']: arguments[xyz][pm][eh] = [npml] + fs[xyz][eh] + psis[xyz][pm][eh] + pcs[pm][eh] + cfs[xyz][eh] # global variables self.mainf = fields self.directions = directions self.programs = programs self.arguments = arguments # append to the update list self.priority_type = 'pml' fields.append_instance(self)
def __init__(self, fields): """ """ common.check_type('fields', fields, Fields) # local variables precision_float = fields.precision_float use_cpu_core = fields.use_cpu_core dtype = fields.dtype nx, ny, nz_pitch = ns_pitch = fields.ns_pitch align_size = fields.align_size pad = fields.pad ce_on = fields.ce_on ch_on = fields.ch_on ehs = fields.ehs if ce_on: ces = fields.ces if ch_on: chs = fields.chs # pad_str_list pad_str_list = [] pad_str_append = lambda mask: pad_str_list.append( str(list(mask)).strip('[]')) mask0 = np.ones(align_size, 'i') mask_h = mask0.copy() mask_h[0] = 0 pad_str_append(mask_h) mask_exy = mask0.copy() mask_exy[-(pad + 1):] = 0 pad_str_append(mask_exy) mask = mask0.copy() if pad != 0: mask[-pad:] = 0 pad_str_append(mask) # program dtype_str_list = { \ 'single':['float', 'xmmintrin.h', 'ps', '__m128', '4'], \ 'double':['double', 'emmintrin.h', 'pd', '__m128d', '2'] }[precision_float] macros = [ \ 'OMP_HEADER', 'OMP_FOR_E', 'OMP_FOR_H', \ 'ARGS_CE', 'INIT_CE', 'PRIVATE_CE', 'CEX', 'CEY', 'CEZ', \ 'ARGS_CH', 'INIT_CH', 'PRIVATE_CH', 'CHX', 'CHY', 'CHZ', \ 'DTYPE', 'MM_HEADER', 'PSD', 'TYPE128', 'INCRE', \ 'MASK_H', 'MASK_EXY', 'MASK'] values = ['', '', '', \ '', 'ce=SET1(0.5)', '', '', '', '', \ '', 'ch=SET1(0.5)', '', '', '', '', \ ] + dtype_str_list + pad_str_list if use_cpu_core != 1: values[0] = '#include <omp.h>' omp_str = '' if use_cpu_core == 0 else 'omp_set_num_threads(%d);\n\t' % use_cpu_core values[ 1] = omp_str + '#pragma omp parallel for private(idx, i, j, k, hx0, hy0, hz0, h1, h2, e PRIVATE_CE)' values[ 2] = omp_str + '#pragma omp parallel for private(idx, i, j, k, ex0, ey0, ez0, e1, e2, h PRIVATE_CH)' if ce_on: values[3:9] = [ \ ', DTYPE *cex, DTYPE *cey, DTYPE *cez', 'ce', ', ce', \ 'ce = LOAD(cex+idx);', 'ce = LOAD(cey+idx);', 'ce = LOAD(cez+idx);'] if ch_on: values[9:15] = [ \ ', DTYPE *chx, DTYPE *chy, DTYPE *chz', 'ch', ', ch', \ 'ch = LOAD(chx+idx);', 'ch = LOAD(chy+idx);', 'ch = LOAD(chz+idx);'] ksrc = common.replace_template_code( \ open(common_cpu.src_path + 'core.c').read(), macros, values) program = common_cpu.build_clib(ksrc) carg = np.ctypeslib.ndpointer(dtype, ndim=3, \ shape=tuple(ns_pitch), flags='C_CONTIGUOUS, ALIGNED') argtypes = [c_int, c_int, c_int, c_int, c_int] + \ [carg for i in xrange(6)] program.update_e.argtypes = argtypes program.update_e.restype = None program.update_h.argtypes = argtypes program.update_h.restype = None # arguments nyz_pitch = ny * nz_pitch e_args = ns_pitch + [0, nx * nyz_pitch] + ehs h_args = ns_pitch + [0, nx * nyz_pitch] + ehs if ce_on: program.update_e.argtypes += [carg for i in xrange(3)] e_args += ces if ch_on: program.update_h.argtypes += [carg for i in xrange(3)] h_args += chs set_args = lambda args, idx0, nmax: args[:3] + [idx0, nmax] + args[5:] e_args_dict = { \ '': e_args, \ 'pre': set_args(e_args, nyz_pitch, 3*nyz_pitch), \ 'post': set_args(e_args, 0, nyz_pitch) } h_args_dict = { \ '': h_args, \ 'pre': set_args(h_args, 0, 2*nyz_pitch), \ 'post': set_args(h_args, 2*nyz_pitch, 3*nyz_pitch) } # global variables self.mainf = fields self.program = program self.e_args_dict = e_args_dict self.h_args_dict = h_args_dict # append to the update list self.priority_type = 'core' fields.append_instance(self)
def __init__(self, nx, ny, nz, \ coeff_use='e', \ precision_float='single', \ use_cpu_core=0): """ """ common.check_type('nx', nx, int) common.check_type('ny', ny, int) common.check_type('nz', nz, int) common.check_value('coeff_use', coeff_use, ('', 'e', 'h', 'eh')) common.check_value('precision_float', precision_float, ('single', 'double')) self.nx = nx self.ny = ny self.nz = nz self.coeff_use=coeff_use self.dtype = {'single':np.float32, 'double':np.float64}[precision_float] self.dtype_str_list = { \ 'single':['float', 'xmmintrin.h', 'ps', '__m128', '4', '0, 1, 1, 1'], \ 'double':['double', 'emmintrin.h', 'pd', '__m128d', '2', '0, 1'] }[precision_float] self.device_type = 'cpu' # padding for the nz which is multiple of 4 (float32) or 2 (float64) a_size = {'single':4, 'double':2}[precision_float] # 16 Bytes self.pad = pad = int(np.ceil(float(nz) / a_size) * a_size) - nz self.slz = slice(None, None) if pad == 0 else slice(None, -pad) self.nz_pitch = nz_pitch = nz + pad mask_arr = np.ones(a_size, 'i') mask_arr[-(pad+1):] = 0 self.dtype_str_list.append( str(list(mask_arr)).strip('[]') ) # ns, qtask, enqueue self.ns = [nx, ny, nz] self.ns_pitch = [nx, ny, nz_pitch] self.qtask = QueueTask() self.enqueue = self.qtask.enqueue self.enqueue_barrier = self.qtask.enqueue_barrier # on/off the coefficient arrays self.ce_on = True if 'e' in self.coeff_use else False self.ch_on = True if 'h' in self.coeff_use else False # allocations self.ehs = [np.zeros(self.ns_pitch, dtype=self.dtype) for i in range(6)] self.ex, self.ey, self.ez, self.hx, self.hy, self.hz = self.ehs if self.ce_on: self.ces = [np.ones(self.ns_pitch, dtype=self.dtype)*0.5 for i in range(3)] self.cex, self.cey, self.cez = self.ces if self.ch_on: self.chs = [np.ones(self.ns_pitch, dtype=self.dtype)*0.5 for i in range(3)] self.chx, self.chy, self.chz = self.chs # program macros = [ \ 'ARGS_CE', 'INIT_CE', 'PRIVATE_CE', 'CEX', 'CEY', 'CEZ', \ 'ARGS_CH', 'INIT_CH', 'PRIVATE_CH', 'CHX', 'CHY', 'CHZ', \ 'OMP_SET_NUM_THREADS', \ 'DTYPE', 'MM_HEADER', 'PSD', 'TYPE128', 'INCRE', 'MASK_H', 'MASK_E'] values = [ \ '', 'ce=SET1(0.5)', '', '', '', '', \ '', 'ch=SET1(0.5)', '', '', '', '', \ ''] + self.dtype_str_list if use_cpu_core != 0: values[12] = 'omp_set_num_threads(%d);' % use_cpu_core if self.ce_on: values[:6] = [ \ ', DTYPE *cex, DTYPE *cey, DTYPE *cez', 'ce', ', ce', \ 'ce = LOAD(cex+idx);', 'ce = LOAD(cey+idx);', 'ce = LOAD(cez+idx);'] if self.ch_on: values[6:12] = [ \ ', DTYPE *chx, DTYPE *chy, DTYPE *chz', 'ch', ', ch', \ 'ch = LOAD(chx+idx);', 'ch = LOAD(chy+idx);', 'ch = LOAD(chz+idx);'] ksrc = common.replace_template_code( \ open(common_cpu.src_path + 'core.c').read(), macros, values) self.program = common_cpu.build_clib(ksrc) carg = np.ctypeslib.ndpointer(dtype=self.dtype, ndim=3, \ shape=(nx, ny, nz_pitch), flags='C_CONTIGUOUS, ALIGNED') argtypes = [c_int, c_int, c_int, c_int, c_int] + \ [carg for i in xrange(6)] self.program.update_e.argtypes = argtypes self.program.update_e.restype = None self.program.update_h.argtypes = argtypes self.program.update_h.restype = None self.e_args = self.ns_pitch + [0, nx*ny*nz_pitch] + self.ehs self.h_args = self.ns_pitch + [0, nx*ny*nz_pitch] + self.ehs if self.ce_on: self.program.update_e.argtypes += [carg for i in xrange(3)] self.e_args += self.ces if self.ch_on: self.program.update_h.argtypes += [carg for i in xrange(3)] self.h_args += self.chs
def __init__(self, fields): """ """ common.check_type('fields', fields, Fields) # local variables precision_float = fields.precision_float use_cpu_core = fields.use_cpu_core dtype = fields.dtype nx, ny, nz_pitch = ns_pitch = fields.ns_pitch align_size = fields.align_size pad = fields.pad ce_on = fields.ce_on ch_on = fields.ch_on ehs = fields.ehs if ce_on: ces = fields.ces if ch_on: chs = fields.chs # program dtype_str_list = { \ 'single':['float', 'xmmintrin.h', 'ps', '__m128', '4'], \ 'double':['double', 'emmintrin.h', 'pd', '__m128d', '2'] }[precision_float] pad_str_list = [] pad_str_append = lambda mask: pad_str_list.append( str(list(mask)).strip('[]') ) mask0 = np.ones(align_size, 'i') mask_h = mask0.copy() mask_h[0] = 0 pad_str_append(mask_h) mask_exy = mask0.copy() mask_exy[-(pad+1):] = 0 pad_str_append(mask_exy) mask = mask0.copy() if pad != 0: mask[-pad:] = 0 pad_str_append(mask) macros = [ \ 'ARGS_CE', 'INIT_CE', 'PRIVATE_CE', 'CEX', 'CEY', 'CEZ', \ 'ARGS_CH', 'INIT_CH', 'PRIVATE_CH', 'CHX', 'CHY', 'CHZ', \ 'OMP_SET_NUM_THREADS', \ 'DTYPE', 'MM_HEADER', 'PSD', 'TYPE128', 'INCRE', \ 'MASK_H', 'MASK_EXY', 'MASK'] values = [ \ '', 'ce=SET1(0.5)', '', '', '', '', \ '', 'ch=SET1(0.5)', '', '', '', '', \ ''] + dtype_str_list + pad_str_list if use_cpu_core != 0: values[12] = 'omp_set_num_threads(%d);' % use_cpu_core if ce_on: values[:6] = [ \ ', DTYPE *cex, DTYPE *cey, DTYPE *cez', 'ce', ', ce', \ 'ce = LOAD(cex+idx);', 'ce = LOAD(cey+idx);', 'ce = LOAD(cez+idx);'] if ch_on: values[6:12] = [ \ ', DTYPE *chx, DTYPE *chy, DTYPE *chz', 'ch', ', ch', \ 'ch = LOAD(chx+idx);', 'ch = LOAD(chy+idx);', 'ch = LOAD(chz+idx);'] ksrc = common.replace_template_code( \ open(common_cpu.src_path + 'core.c').read(), macros, values) program = common_cpu.build_clib(ksrc) carg = np.ctypeslib.ndpointer(dtype, ndim=3, \ shape=tuple(ns_pitch), flags='C_CONTIGUOUS, ALIGNED') argtypes = [c_int, c_int, c_int, c_int, c_int] + \ [carg for i in xrange(6)] program.update_e.argtypes = argtypes program.update_e.restype = None program.update_h.argtypes = argtypes program.update_h.restype = None # arguments nyz_pitch = ny * nz_pitch e_args = ns_pitch + [0, nx*nyz_pitch] + ehs h_args = ns_pitch + [0, nx*nyz_pitch] + ehs if ce_on: program.update_e.argtypes += [carg for i in xrange(3)] e_args += ces if ch_on: program.update_h.argtypes += [carg for i in xrange(3)] h_args += chs pre_e_args = e_args[:] pre_e_args[3:5] = [(nx-2)*nyz_pitch, nx*nyz_pitch] mid_e_args = e_args[:] mid_e_args[3:5] = [nyz_pitch, (nx-2)*nyz_pitch] post_e_args = e_args[:] post_e_args[3:5] = [0, nyz_pitch] pre_h_args = h_args[:] pre_h_args[3:5] = [0, 2*nyz_pitch] mid_h_args = h_args[:] mid_h_args[3:5] = [2*nyz_pitch, (nx-1)*nyz_pitch] post_h_args = h_args[:] post_h_args[3:5] = [(nx-1)*nyz_pitch, nx*nyz_pitch] # global variables self.mainf = fields self.e_args = e_args self.h_args = h_args self.program = program self.e_args_dict = {'':e_args, \ 'pre':pre_e_args, 'mid':mid_e_args, 'post':post_e_args} self.h_args_dict = {'':h_args, \ 'pre':pre_h_args, 'mid':mid_h_args, 'post':post_h_args} # append to the update list self.priority_type = 'core' fields.append_instance(self)