示例#1
0
    def __init__(self, obs_pts, obs_ns, src_mesh, K_name, nq, params,
                 float_type):

        self.shape = (obs_pts.shape[0] * 3, src_mesh[1].shape[0] * 9)
        self.dim = obs_pts.shape[1]
        self.tensor_dim = kernels[K_name].tensor_dim
        self.n_obs = obs_pts.shape[0]
        self.n_src = src_mesh[1].shape[0]

        in_size = self.n_src * self.dim * self.tensor_dim
        out_size = self.n_obs * self.tensor_dim
        self.gpu_in = gpu.empty_gpu(in_size, float_type)
        self.gpu_out = gpu.empty_gpu(out_size, float_type)

        self.q = gauss2d_tri(nq)

        self.gpu_obs_pts = gpu.to_gpu(obs_pts, float_type)
        self.gpu_obs_ns = gpu.to_gpu(obs_ns, float_type)
        self.gpu_src_pts = gpu.to_gpu(src_mesh[0], float_type)
        self.gpu_src_tris = gpu.to_gpu(src_mesh[1], np.int32)
        self.gpu_params = gpu.to_gpu(np.array(params), float_type)
        self.block_size = 128
        self.n_blocks = int(np.ceil(self.n_obs / self.block_size))

        self.module = gpu.load_gpu('matrix_free.cl',
                                   tmpl_args=dict(
                                       block_size=self.block_size,
                                       float_type=gpu.np_to_c_type(float_type),
                                       quad_pts=self.q[0],
                                       quad_wts=self.q[1]))
        self.fnc = getattr(self.module, "farfield_tris_to_pts" + K_name)
示例#2
0
def farfield_pts_direct(K, obs_pts, obs_ns, src_pts, src_ns, vec, params,
                        float_type):
    module = get_gpu_module(float_type)
    fnc = getattr(module, "farfield_pts" + K)

    n_obs, dim = obs_pts.shape
    n_src = src_pts.shape[0]

    tensor_dim = int(vec.shape[0] / n_src)

    gpu_result = gpu.empty_gpu(n_obs * tensor_dim, float_type)
    gpu_obs_pts = gpu.to_gpu(obs_pts, float_type)
    gpu_obs_ns = gpu.to_gpu(obs_ns, float_type)
    gpu_src_pts = gpu.to_gpu(src_pts, float_type)
    gpu_src_ns = gpu.to_gpu(src_ns, float_type)
    gpu_vec = gpu.to_gpu(vec, float_type)
    gpu_params = gpu.to_gpu(np.array(params), float_type)

    n_blocks = int(np.ceil(n_obs / block_size))
    fnc(gpu_result,
        gpu_obs_pts,
        gpu_obs_ns,
        gpu_src_pts,
        gpu_src_ns,
        gpu_vec,
        gpu_params,
        np.int32(n_obs),
        np.int32(n_src),
        grid=(n_blocks, 1, 1),
        block=(block_size, 1, 1))
    return gpu_result.get()
示例#3
0
 def call_integrator(start_idx, end_idx):
     n_pairs = (end_idx - start_idx)
     n_threads = int(np.ceil(n_pairs / block_size))
     gpu_result = gpu.empty_gpu((n_pairs, 3, 3, 3, 3), self.float_type)
     integrator(
         gpu_result, np.int32(q[0].shape[0]), q[0], q[1],
         self.gpu_pts, self.gpu_tris,
         gpu_pairs_list, np.int32(start_idx), np.int32(end_idx),
         self.gpu_params,
         grid = (n_threads, 1, 1), block = (block_size, 1, 1)
     )
     result[start_idx:end_idx] = gpu_result.get()
示例#4
0
    def __init__(self, fmm):
        self.fmm = fmm

        gd = fmm.gpu_data

        self.inp = gpu.empty_gpu(fmm.n_input, fmm.cfg.float_type)
        self.out = gpu.empty_gpu(fmm.n_output, fmm.cfg.float_type)
        self.m_check = gpu.empty_gpu(fmm.n_multipoles, fmm.cfg.float_type)
        self.multipoles = gpu.empty_gpu(fmm.n_multipoles, fmm.cfg.float_type)
        self.c2e_scratch = gpu.empty_gpu(fmm.n_multipoles, fmm.cfg.float_type)
        self.l_check = gpu.empty_gpu(fmm.n_locals, fmm.cfg.float_type)
        self.locals = gpu.empty_gpu(fmm.n_locals, fmm.cfg.float_type)
示例#5
0
async def gpu_run():
    # gd = tsk.get_service('gpu_data')
    # if 'add' not in gd:
    #     gd['add'] = (fnc, arg, gpu_R)
    # else:
    #     fnc, arg, gpu_R = gd['add']
    module = load_module()
    fnc = module.add
    R = np.random.rand(10000000)
    gpu_R = gpu.to_gpu(R)

    gpu_out = gpu.empty_gpu(gpu_R.shape)
    fnc(gpu_out, gpu_R, grid=(gpu_R.shape[0], 1, 1), block=(1, 1, 1))
    R2 = await gpu.get(gpu_out)
    gpu.logger.debug('run')
示例#6
0
 def call_integrator(start_idx, end_idx):
     n_items = end_idx - start_idx
     gpu_result = gpu.empty_gpu(
         (n_items, 3, 3, src_tris.shape[0], 3, 3), self.float_type)
     gpu_obs_tris = gpu.to_gpu(obs_tris[start_idx:end_idx], np.int32)
     self.integrator(gpu_result,
                     np.int32(self.q[0].shape[0]),
                     self.gpu_qx,
                     self.gpu_qw,
                     gpu_pts,
                     np.int32(n_items),
                     gpu_obs_tris,
                     np.int32(src_tris.shape[0]),
                     gpu_src_tris,
                     self.gpu_params,
                     grid=(n_items, src_tris.shape[0], 1),
                     block=(1, 1, 1))
     out[start_idx:end_idx] = gpu_result.get()
示例#7
0
def test_simple_module():
    n = 10
    in_arr = np.random.rand(n)
    arg = 1.0
    this_dir = os.path.dirname(os.path.realpath(__file__))
    modules = [
        gpu.load_gpu('kernel.cl', tmpl_dir=this_dir, tmpl_args=dict(arg=arg)),
        gpu.load_gpu_from_code(open(os.path.join(this_dir,
                                                 'kernel.cl')).read(),
                               tmpl_args=dict(arg=arg))
    ]
    for m in modules:
        fnc = m.add

        in_gpu = gpu.to_gpu(in_arr, np.float32)
        out_gpu = gpu.empty_gpu(n, np.float32)
        fnc(out_gpu, in_gpu, grid=(n, 1, 1), block=(1, 1, 1))
        output = out_gpu.get()

        correct = in_arr + arg
        np.testing.assert_almost_equal(correct, output)
示例#8
0
 def setup_arrays(self):
     self.gpu_multipoles = gpu.empty_gpu(self.n_multipoles,
                                         self.cfg['float_type'])
     self.gpu_out = gpu.empty_gpu(self.n_output, self.cfg['float_type'])
     self.gpu_in = gpu.empty_gpu(self.n_input, self.cfg['float_type'])