def __init__(self, gpuf, direction): common.check_type('gpuf', gpuf, gpu.Fields) common.check_value('direction', direction, ('+', '-', '+-')) qtask = cpu.QueueTask() if '+' in direction: self.cpuf_p = cpuf_p = cpu.Fields(qtask, 3, gpuf.ny, gpuf.nz, gpuf.coeff_use, gpuf.precision_float, use_cpu_core=0) self.gf_p_h = gpu.GetFields(gpuf, ['hy', 'hz'], (-2, 0, 0), (-2, -1, -1)) self.sf_p_h = cpu.SetFields(cpuf_p, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.gf_p_e = cpu.GetFields(cpuf_p, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_p_e = gpu.SetFields(gpuf, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.gf_h = cpu.GetFields(cpuf_p, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) self.sf_e = cpu.SetFields(cpuf_p, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.tmp_recv_e = np.zeros(self.gf_h.host_array.shape, gpuf.dtype) if '-' in direction: self.cpuf_m = cpuf_m = cpu.Fields(qtask, 3, gpuf.ny, gpuf.nz, gpuf.coeff_use, gpuf.precision_float, use_cpu_core=0) self.gf_m_e = gpu.GetFields(gpuf, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_m_e = cpu.SetFields(cpuf_m, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.gf_m_h = cpu.GetFields(cpuf_m, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) self.sf_m_h = gpu.SetFields(gpuf, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.gf_e = cpu.GetFields(cpuf_m, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_h = cpu.SetFields(cpuf_m, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.tmp_recv_h = np.zeros(self.gf_e.host_array.shape, gpuf.dtype) # global variables self.direction = direction self.qtask = qtask
def runTest(self): ufunc, nx, ny, nz, coeff_use, precision_float, use_cpu_core, tmax = self.args qtask = cpu.QueueTask() fields = cpu.Fields(qtask, nx, ny, nz, coeff_use, precision_float, use_cpu_core) cpu.Core(fields) fields_ref = naive.Fields(nx, ny, nz, precision_float, segment_nbytes=16) naive.Core(fields_ref) # allocations ns = fields.ns dtype = fields.dtype ehs = common_random.generate_ehs(nx, ny, nz, dtype, ufunc) fields.set_ehs(*ehs) fields_ref.set_ehs(*ehs) ces, chs = common_random.generate_cs(nx, ny, nz, dtype, coeff_use) if 'e' in coeff_use: fields.set_ces(*ces) fields_ref.set_ces(*ces) if 'h' in coeff_use: fields.set_chs(*chs) fields_ref.set_chs(*chs) # verify strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz'] if ufunc == 'e': for tstep in xrange(0, tmax): fields.update_e() fields_ref.update_e() fields.enqueue_barrier() for strf, eh in zip(strf_list, ehs)[:3]: norm = np.linalg.norm(fields.get(strf) - fields_ref.get(strf)) max_diff = np.abs(fields.get(strf) - fields_ref.get(strf)).max() self.assertEqual( norm, 0, '%s, %s, %g, %g' % (self.args, strf, norm, max_diff)) elif ufunc == 'h': for tstep in xrange(0, tmax): fields.update_h() fields_ref.update_h() fields.enqueue_barrier() for strf, eh in zip(strf_list, ehs)[3:]: norm = np.linalg.norm(fields.get(strf) - fields_ref.get(strf)) max_diff = np.abs(fields.get(strf) - fields_ref.get(strf)).max() self.assertEqual(norm, 0, '%s, %s, %g, %g' % \ (self.args, strf, norm, max_diff) )
def __init__(self, gpuf, direction, tmax): common.check_type('gpuf', gpuf, gpu.Fields) common.check_value('direction', direction, ('+', '-', '+-')) qtask = cpu.QueueTask() if '+' in direction: self.cpuf_p = cpuf_p = cpu.Fields(qtask, 3, gpuf.ny, gpuf.nz, gpuf.coeff_use, gpuf.precision_float, use_cpu_core=1) self.gf_p_h = gpu.GetFields(gpuf, ['hy', 'hz'], (-2, 0, 0), (-2, -1, -1)) self.sf_p_h = cpu.SetFields(cpuf_p, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.gf_p_e = cpu.GetFields(cpuf_p, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_p_e = gpu.SetFields(gpuf, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.gf_h = gf_h = cpu.GetFields(cpuf_p, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) self.sf_e = cpu.SetFields(cpuf_p, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.req_send_h = comm.Send_init(gf_h.host_array, rank + 1, tag=0) self.tmp_recv_e_list = [ np.zeros(gf_h.host_array.shape, gpuf.dtype) for i in range(2) ] self.req_recv_e_list = [ comm.Recv_init(tmp_recv_e, rank + 1, tag=1) for tmp_recv_e in self.tmp_recv_e_list ] self.switch_e = 0 if '-' in direction: self.cpuf_m = cpuf_m = cpu.Fields(qtask, 3, gpuf.ny, gpuf.nz, gpuf.coeff_use, gpuf.precision_float, use_cpu_core=1) self.gf_m_e = gpu.GetFields(gpuf, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_m_e = cpu.SetFields(cpuf_m, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.gf_m_h = cpu.GetFields(cpuf_m, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) self.sf_m_h = gpu.SetFields(gpuf, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.gf_e = gf_e = cpu.GetFields(cpuf_m, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_h = cpu.SetFields(cpuf_m, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.req_send_e = comm.Send_init(gf_e.host_array, rank - 1, tag=1) self.tmp_recv_h_list = [ np.zeros(gf_e.host_array.shape, gpuf.dtype) for i in range(2) ] self.req_recv_h_list = [ comm.Recv_init(tmp_recv_h, rank - 1, tag=0) for tmp_recv_h in self.tmp_recv_h_list ] self.switch_h = 0 # global variables self.direction = direction self.qtask = qtask self.tmax = tmax self.tstep = 1
#nx, ny, nz = 240, 256, 256 # 540 MB #nx, ny, nz = 544, 544, 544 # 5527 MB #nx, ny, nz = 512, 512, 512 # 4608 MB #nx, ny, nz = 480, 480, 480 # 3796 MB nx, ny, nz = 800, 256, 256 # 576 MB #nx, ny, nz = 128, 128, 128 # 72 MB coeff_use = 'e' precision_float = 'single' # instances gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) device = gpu_devices[0] qtask = cpu.QueueTask() fields = Fields(context, device, qtask, nx, ny, nz, coeff_use, precision_float) Core(fields) tmax = 250 if is_plot else 1000 if rank == 0: direction = '+' elif rank == size - 1: direction = '-' else: direction = '+-' #exch = node.ExchangeMpiNonBlock(fields, direction) #exch = node.ExchangeMpiBufferBlock(fields, direction) #exch = node.ExchangeMpiBufferBlockSplit(fields, direction) exch = node.ExchangeMpiBufferNonBlockSplitEnqueue(fields, direction, tmax) if '+' in direction: cpu.Core(exch.cpuf_p) if '-' in direction: cpu.Core(exch.cpuf_m)
use_cpu_core = 1 # instances gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) device = gpu_devices[0] gpuf = gpu.Fields(context, device, nx, ny, nz, coeff_use, precision_float) tmax = 250 if is_plot else 1000 if rank == 0: direction = '+' elif rank == size - 1: direction = '-' else: direction = '+-' buffer_dict = {} if '+' in direction: buffer_dict['x+'] = cpu.Fields(cpu.QueueTask(), 2, ny, nz, coeff_use, precision_float, use_cpu_core) if '-' in direction: buffer_dict['x-'] = cpu.Fields(cpu.QueueTask(), 2, ny, nz, coeff_use, precision_float, use_cpu_core) nodef = node.Fields([gpuf], buffer_dict) node.Core(nodef) #network.ExchangeMpiBlock(nodef, direction) #network.ExchangeMpiNonBlock(nodef, direction) network.ExchangeMpiBuffer(nodef) if is_plot: is_master = True if rank == 0 else False getf = gpu.GetFields(gpuf, 'ez', (0, 0, 0.5), (-1, -1, 0.5))
# instances gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) device = gpu_devices[0] gpuf = gpu.Fields(context, device, nx, ny, nz, coeff_use, precision_float) tmax = 250 if is_plot else 10000 #if rank == 0: direction = '+' #elif rank == size - 1: direction = '-' #else: direction = '+-' direction = '+-' buffer_dict = {} if '+' in direction: buffer_dict['x+'] = cpu.Fields(cpu.QueueTask(), 2, ny, nz, coeff_use, precision_float, use_cpu_core) if '-' in direction: buffer_dict['x-'] = cpu.Fields(cpu.QueueTask(), 2, ny, nz, coeff_use, precision_float, use_cpu_core) nodef = node.Fields([gpuf], buffer_dict) node.Core(nodef) #network.ExchangeMpiBlock(nodef, direction) #network.ExchangeMpiNonBlock(nodef, direction) network.ExchangeMpiBuffer(nodef) if is_plot: is_master = True if rank == 1 else False getf = gpu.GetFields(gpuf, 'ez', (0, 0, 0.5), (-1, -1, 0.5))
def runTest(self): ufunc, nx, ny, nz, coeff_use, precision_float, tmax = self.args gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) device = gpu_devices[0] qtask = cpu.QueueTask() fields = gpu.Fields(context, device, qtask, nx, ny, nz, coeff_use, precision_float) gpu.Core(fields) fields_ref = naive.Fields(nx, ny, nz, precision_float, segment_nbytes=64) naive.Core(fields_ref) # allocations ns = fields.ns dtype = fields.dtype strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz'] ehs = common_random.generate_ehs(nx, ny, nz, dtype, ufunc) fields.set_eh_bufs(*ehs) fields_ref.set_ehs(*ehs) ces, chs = common_random.generate_cs(nx, ny, nz, dtype, coeff_use) if 'e' in coeff_use: fields.set_ce_bufs(*ces) fields_ref.set_ces(*ces) if 'h' in coeff_use: fields.set_ch_bufs(*chs) fields_ref.set_chs(*chs) tmpf = np.zeros(fields.ns_pitch, dtype=dtype) # update if ufunc == 'e': for tstep in xrange(0, tmax): fields.update_e() fields_ref.update_e() qtask.enqueue_barrier() for strf, eh in zip(strf_list, ehs)[:3]: cl.enqueue_copy(fields.queue, tmpf, fields.get_buf(strf)) norm = np.linalg.norm(fields_ref.get(strf) - tmpf) max_diff = np.abs(fields_ref.get(strf) - tmpf).max() self.assertEqual( norm, 0, '%s, %s, %g, %g' % (self.args, strf, norm, max_diff)) elif ufunc == 'h': for tstep in xrange(0, tmax): fields.update_h() fields_ref.update_h() for strf, eh in zip(strf_list, ehs)[3:]: cl.enqueue_copy(fields.queue, tmpf, fields.get_buf(strf)) norm = np.linalg.norm(fields_ref.get(strf) - tmpf) max_diff = np.abs(fields_ref.get(strf) - tmpf).max() self.assertEqual( norm, 0, '%s, %s, %g, %g' % (self.args, strf, norm, max_diff))