def setUp(self): self.old_flag = get_config().use_local_memory get_config().use_local_memory = True super(TestAccelerationEval1DGPUOctreeNonCached, self).setUp()
def get_lmem_loop_kernel(self, g_idx, sg_idx, group, dest, source, eq_group): kind = 'loop' sub_grp = '' if sg_idx == -1 else 's{idx}'.format(idx=sg_idx) kernel = 'g{g_idx}{sg}_{source}_on_{dest}_loop'.format(g_idx=g_idx, sg=sub_grp, source=source, dest=dest) sph_k_name = self.object.kernel.__class__.__name__ context = eq_group.context all_args, py_args = [], [] setup_code = self._declare_precomp_vars(context) setup_code.append('GLOBAL_MEM %s* SPH_KERNEL = kern;' % sph_k_name) if eq_group.has_loop_all(): raise NotImplementedError("loop_all not suported with local " "memory") loop_code = [] pre = [] for p, cb in eq_group.precomputed.items(): src = cb.code.strip().splitlines() pre.extend([' ' * 4 + x + ';' for x in src]) if len(pre) > 0: pre.append('') loop_code.extend(pre) _all_args, _py_args, _calls = self._get_equation_method_calls( eq_group, kind, indent=' ') loop_code.extend(_calls) for arg, py_arg in zip(_all_args, _py_args): if arg not in all_args: all_args.append(arg) py_args.append(py_arg) s_ary, d_ary = eq_group.get_array_names() source_vars = set(s_ary) source_var_types = self._get_arg_base_types(source_vars) def modify_var_name(x): if x.startswith('s_'): return x + '_global' else: return x s_ary.update(d_ary) _args = list(s_ary) py_args.extend(_args) _args_modified = [modify_var_name(x) for x in _args] all_args.extend(self._get_typed_args(_args_modified)) setup_body = '\n'.join([' ' * 4 + x for x in setup_code]) setup_body = self._set_kernel(setup_body, self.object.kernel) loop_body = '\n'.join([' ' * 4 + x for x in loop_code]) loop_body = self._set_kernel(loop_body, self.object.kernel) all_args.extend([ 'GLOBAL_MEM {kernel}* kern'.format(kernel=sph_k_name), 'double t', 'double dt' ]) all_args.extend(get_kernel_args_list()) self.data.append( dict(kernel=kernel, args=py_args, dest=dest, source=source, loop=True, real=group.real, type='kernel')) body = generate_body(setup=setup_body, loop=loop_body, vars=source_vars, types=source_var_types, wgs=get_config().wgs) sig = get_kernel_definition(kernel, all_args) return ('{sig}\n{{\n{body}\n\n}}\n'.format(sig=sig, body=body))
def _cleanup(): get_config().use_double = orig
def convert_to_float_if_needed(code): use_double = get_config().use_double if not use_double: code = re.sub(r'\bdouble\b', 'float', code) return code
def get_loop_kernel(self, g_idx, sg_idx, group, dest, source, eq_group): if get_config().use_local_memory: return self.get_lmem_loop_kernel(g_idx, sg_idx, group, dest, source, eq_group) kind = 'loop' sub_grp = '' if sg_idx == -1 else 's{idx}'.format(idx=sg_idx) kernel = 'g{g_idx}{sg}_{source}_on_{dest}_loop'.format(g_idx=g_idx, sg=sub_grp, source=source, dest=dest) sph_k_name = self.object.kernel.__class__.__name__ context = eq_group.context all_args, py_args = [], [] code = self._declare_precomp_vars(context) code.extend([ 'unsigned int d_idx = GID_0 * LDIM_0 + LID_0;', '/* Guard for padded threads. */', 'if (d_idx > NP_MAX) {return;};', 'unsigned int s_idx, i;', 'GLOBAL_MEM %s* SPH_KERNEL = kern;' % sph_k_name, 'unsigned int start = start_idx[d_idx];', 'GLOBAL_MEM unsigned int* NBRS = &(neighbors[start]);', 'int N_NBRS = nbr_length[d_idx];', 'unsigned int end = start + N_NBRS;' ]) if eq_group.has_loop_all(): _all_args, _py_args, _calls = self._get_equation_method_calls( eq_group, kind='loop_all', indent='') code.extend(['', '// Calling loop_all of equations.']) code.extend(_calls) code.append('') all_args.extend(_all_args) py_args.extend(_py_args) if eq_group.has_loop(): code.append('// Calling loop of equations.') code.append('for (i=start; i<end; i++) {') code.append(' s_idx = neighbors[i];') pre = [] for p, cb in eq_group.precomputed.items(): src = cb.code.strip().splitlines() pre.extend([' ' * 4 + x + ';' for x in src]) if len(pre) > 0: pre.append('') code.extend(pre) _all_args, _py_args, _calls = self._get_equation_method_calls( eq_group, kind, indent=' ') code.extend(_calls) for arg, py_arg in zip(_all_args, _py_args): if arg not in all_args: all_args.append(arg) py_args.append(py_arg) code.append('}') s_ary, d_ary = eq_group.get_array_names() s_ary.update(d_ary) _args = list(s_ary) py_args.extend(_args) all_args.extend(self._get_typed_args(_args)) body = '\n'.join([' ' * 4 + x for x in code]) body = self._set_kernel(body, self.object.kernel) all_args.extend([ 'GLOBAL_MEM {kernel}* kern'.format(kernel=sph_k_name), 'GLOBAL_MEM unsigned int *nbr_length', 'GLOBAL_MEM unsigned int *start_idx', 'GLOBAL_MEM unsigned int *neighbors', 'double t', 'double dt', 'unsigned int NP_MAX' ]) self.data.append( dict(kernel=kernel, args=py_args, dest=dest, source=source, loop=True, real=group.real, type='kernel')) sig = get_kernel_definition(kernel, all_args) return ('{sig}\n{{\n{body}\n\n}}\n'.format(sig=sig, body=body))
def tearDown(self): super(OctreeGPUNNPSTestCase, self).tearDown() get_config().use_double = self._orig_use_double
def setup_module(): get_config().use_openmp = True
def setUp(self): cu = pytest.importorskip("pycuda") cfg = get_config() self.orig_use_double = cfg.use_double cfg.use_double = True self.backend = 'cuda'
def tearDown(self): get_config().use_double = self.orig_use_double
def __init__(self, all_particles, scheme, domain=None, innerloop=True, updates=True, parallel=False, steps=None, D=0): """The second integrator is a simple Euler-Integrator (accurate enough due to very small time steps; very fast) using EBGSteps. EBGSteps are basically the same as EulerSteps, exept for the fact that they work with an intermediate ebg velocity [eu, ev, ew]. This velocity does not interfere with the actual velocity, which is neseccery to not disturb the real velocity through artificial damping in this step. The ebg velocity is initialized for each inner loop again and reset in the outer loop.""" from math import ceil from pysph.base.kernels import CubicSpline from pysph.sph.integrator_step import EBGStep from compyle.config import get_config from pysph.sph.integrator import EulerIntegrator from pysph.sph.scheme import BeadChainScheme from pysph.sph.equation import Group from pysph.sph.fiber.utils import (HoldPoints, Contact, ComputeDistance) from pysph.sph.fiber.beadchain import (Tension, Bending, ArtificialDamping) from pysph.base.nnps import DomainManager, LinkedListNNPS from pysph.sph.acceleration_eval import AccelerationEval from pysph.sph.sph_compiler import SPHCompiler if not isinstance(scheme, BeadChainScheme): raise TypeError("Scheme must be BeadChainScheme") self.innerloop = innerloop self.dt = scheme.dt self.fiber_dt = scheme.fiber_dt self.domain_updates = updates self.steps = steps self.D = D self.eta0 = scheme.rho0 * scheme.nu # if there are more than 1 particles involved, elastic equations are # iterated in an inner loop. if self.innerloop: # second integrator # self.fiber_integrator = EulerIntegrator(fiber=EBGStep()) steppers = {} for f in scheme.fibers: steppers[f] = EBGStep() self.fiber_integrator = EulerIntegrator(**steppers) # The type of spline has no influence here. It must be large enough # to contain the next particle though. kernel = CubicSpline(dim=scheme.dim) equations = [] g1 = [] for fiber in scheme.fibers: g1.append(ComputeDistance(dest=fiber, sources=[fiber])) equations.append(Group(equations=g1)) g2 = [] for fiber in scheme.fibers: g2.append( Tension(dest=fiber, sources=None, ea=scheme.E * scheme.A)) g2.append( Bending(dest=fiber, sources=None, ei=scheme.E * scheme.Ip)) g2.append( Contact(dest=fiber, sources=scheme.fibers, E=scheme.E, d=scheme.dx, dim=scheme.dim, k=scheme.k, lim=scheme.lim, eta0=self.eta0)) g2.append(ArtificialDamping(dest=fiber, sources=None, d=self.D)) equations.append(Group(equations=g2)) g3 = [] for fiber in scheme.fibers: g3.append(HoldPoints(dest=fiber, sources=None, tag=100)) equations.append(Group(equations=g3)) # These equations are applied to fiber particles only - that's the # reason for computational speed up. particles = [p for p in all_particles if p.name in scheme.fibers] # A seperate DomainManager is needed to ensure that particles don't # leave the domain. if domain: xmin = domain.manager.xmin ymin = domain.manager.ymin zmin = domain.manager.zmin xmax = domain.manager.xmax ymax = domain.manager.ymax zmax = domain.manager.zmax periodic_in_x = domain.manager.periodic_in_x periodic_in_y = domain.manager.periodic_in_y periodic_in_z = domain.manager.periodic_in_z gamma_yx = domain.manager.gamma_yx gamma_zx = domain.manager.gamma_zx gamma_zy = domain.manager.gamma_zy n_layers = domain.manager.n_layers N = self.steps or int(ceil(self.dt / self.fiber_dt)) # dt = self.dt/N self.domain = DomainManager(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, zmin=zmin, zmax=zmax, periodic_in_x=periodic_in_x, periodic_in_y=periodic_in_y, periodic_in_z=periodic_in_z, gamma_yx=gamma_yx, gamma_zx=gamma_zx, gamma_zy=gamma_zy, n_layers=n_layers, dt=self.dt, calls_per_step=N) else: self.domain = None # A seperate list for the nearest neighbourhood search is # benefitial since it is much smaller than the original one. nnps = LinkedListNNPS(dim=scheme.dim, particles=particles, radius_scale=kernel.radius_scale, domain=self.domain, fixed_h=False, cache=False, sort_gids=False) # The acceleration evaluator needs to be set up in order to compile # it together with the integrator. if parallel: self.acceleration_eval = AccelerationEval( particle_arrays=particles, equations=equations, kernel=kernel) else: self.acceleration_eval = AccelerationEval( particle_arrays=particles, equations=equations, kernel=kernel, mode='serial') # Compilation of the integrator not using openmp, because the # overhead is too large for those few fiber particles. comp = SPHCompiler(self.acceleration_eval, self.fiber_integrator) if parallel: comp.compile() else: config = get_config() config.use_openmp = False comp.compile() config.use_openmp = True self.acceleration_eval.set_nnps(nnps) # Connecting neighbourhood list to integrator. self.fiber_integrator.set_nnps(nnps)
def setUp(self): ocl = pytest.importorskip("pyopencl") cfg = get_config() self.orig_use_double = cfg.use_double cfg.use_double = True self.backend = 'opencl'
def tearDown(self): super(TestZOrderGPUNNPSWithSorting, self).tearDown() get_config().use_double = self._orig_use_double
choices=['gpu_comp', 'omp_comp', 'comp_algo'], help='Choose the comparison.') p.add_argument('--nnps', action='store', dest='nnps', default='linear', choices=['linear', 'simple'], help='Choose algorithm.') p.add_argument('--use-double', action='store_true', dest='use_double', default=False, help='Use double precision on the GPU.') o = p.parse_args() get_config().use_double = o.use_double solver_algo = (md_nnps.MDNNPSSolver if o.nnps == 'linear' else md_simple.MDSolver) n_list = [10000 * (2 ** i) for i in range(10)] if o.nnps == 'linear' else \ [500 * (2 ** i) for i in range(8)] if o.comp == "gpu_comp": backends = ["opencl", "cuda", "cython"] print("Running for", n_list) speedups, t_list = compare(backends, n_list, solver_algo) plot(n_list, speedups, t_list, o.nnps) elif o.comp == "omp_comp": backends = ["cython_omp", "cython"] print("Running for", n_list) speedups, t_list = compare(backends, n_list, solver_algo) plot(n_list, speedups, t_list, o.nnps)
if __name__ == '__main__': from argparse import ArgumentParser p = ArgumentParser() p.add_argument('-b', '--backend', action='store', dest='backend', default='cython', help='Choose the backend.') p.add_argument('--openmp', action='store_true', dest='openmp', default=False, help='Use OpenMP.') p.add_argument('--use-double', action='store_true', dest='use_double', default=False, help='Use double precision on the GPU.') p.add_argument('-n', action='store', type=int, dest='n', default=10000, help='Number of particles.') o = p.parse_args() get_config().use_openmp = o.openmp get_config().use_double = o.use_double run(o.n, o.backend)
def tearDown(self): super(TestAccelerationEval1DGPUOctreeNonCached, self).tearDown() get_config().use_local_memory = self.old_flag
def setUp(self): get_config().use_opencl = False
def tearDown(self): super(BruteForceNNPSTestCase, self).tearDown() get_config().use_double = self._orig_use_double
def setUp(self): get_config().use_opencl = False self.backend = None
def tearDown(self): super(ZOrderGPUDoubleNNPSTestCase, self).tearDown() get_config().use_double = self._orig_use_double
def teardown_module(): get_config().use_openmp = False