def setUp(self):
     self.old_flag = get_config().use_local_memory
     get_config().use_local_memory = True
     super(TestAccelerationEval1DGPUOctreeNonCached, self).setUp()
示例#2
0
    def get_lmem_loop_kernel(self, g_idx, sg_idx, group, dest, source,
                             eq_group):
        kind = 'loop'
        sub_grp = '' if sg_idx == -1 else 's{idx}'.format(idx=sg_idx)
        kernel = 'g{g_idx}{sg}_{source}_on_{dest}_loop'.format(g_idx=g_idx,
                                                               sg=sub_grp,
                                                               source=source,
                                                               dest=dest)
        sph_k_name = self.object.kernel.__class__.__name__
        context = eq_group.context
        all_args, py_args = [], []
        setup_code = self._declare_precomp_vars(context)
        setup_code.append('GLOBAL_MEM %s* SPH_KERNEL = kern;' % sph_k_name)

        if eq_group.has_loop_all():
            raise NotImplementedError("loop_all not suported with local "
                                      "memory")

        loop_code = []
        pre = []
        for p, cb in eq_group.precomputed.items():
            src = cb.code.strip().splitlines()
            pre.extend([' ' * 4 + x + ';' for x in src])
        if len(pre) > 0:
            pre.append('')
        loop_code.extend(pre)

        _all_args, _py_args, _calls = self._get_equation_method_calls(
            eq_group, kind, indent='    ')
        loop_code.extend(_calls)
        for arg, py_arg in zip(_all_args, _py_args):
            if arg not in all_args:
                all_args.append(arg)
                py_args.append(py_arg)

        s_ary, d_ary = eq_group.get_array_names()

        source_vars = set(s_ary)
        source_var_types = self._get_arg_base_types(source_vars)

        def modify_var_name(x):
            if x.startswith('s_'):
                return x + '_global'
            else:
                return x

        s_ary.update(d_ary)

        _args = list(s_ary)
        py_args.extend(_args)

        _args_modified = [modify_var_name(x) for x in _args]
        all_args.extend(self._get_typed_args(_args_modified))

        setup_body = '\n'.join([' ' * 4 + x for x in setup_code])
        setup_body = self._set_kernel(setup_body, self.object.kernel)

        loop_body = '\n'.join([' ' * 4 + x for x in loop_code])
        loop_body = self._set_kernel(loop_body, self.object.kernel)

        all_args.extend([
            'GLOBAL_MEM {kernel}* kern'.format(kernel=sph_k_name), 'double t',
            'double dt'
        ])
        all_args.extend(get_kernel_args_list())

        self.data.append(
            dict(kernel=kernel,
                 args=py_args,
                 dest=dest,
                 source=source,
                 loop=True,
                 real=group.real,
                 type='kernel'))

        body = generate_body(setup=setup_body,
                             loop=loop_body,
                             vars=source_vars,
                             types=source_var_types,
                             wgs=get_config().wgs)

        sig = get_kernel_definition(kernel, all_args)
        return ('{sig}\n{{\n{body}\n\n}}\n'.format(sig=sig, body=body))
 def _cleanup():
     get_config().use_double = orig
示例#4
0
def convert_to_float_if_needed(code):
    use_double = get_config().use_double
    if not use_double:
        code = re.sub(r'\bdouble\b', 'float', code)
    return code
示例#5
0
    def get_loop_kernel(self, g_idx, sg_idx, group, dest, source, eq_group):
        if get_config().use_local_memory:
            return self.get_lmem_loop_kernel(g_idx, sg_idx, group, dest,
                                             source, eq_group)
        kind = 'loop'
        sub_grp = '' if sg_idx == -1 else 's{idx}'.format(idx=sg_idx)
        kernel = 'g{g_idx}{sg}_{source}_on_{dest}_loop'.format(g_idx=g_idx,
                                                               sg=sub_grp,
                                                               source=source,
                                                               dest=dest)
        sph_k_name = self.object.kernel.__class__.__name__
        context = eq_group.context
        all_args, py_args = [], []
        code = self._declare_precomp_vars(context)
        code.extend([
            'unsigned int d_idx = GID_0 * LDIM_0 + LID_0;',
            '/* Guard for padded threads. */',
            'if (d_idx > NP_MAX) {return;};', 'unsigned int s_idx, i;',
            'GLOBAL_MEM %s* SPH_KERNEL = kern;' % sph_k_name,
            'unsigned int start = start_idx[d_idx];',
            'GLOBAL_MEM unsigned int* NBRS = &(neighbors[start]);',
            'int N_NBRS = nbr_length[d_idx];',
            'unsigned int end = start + N_NBRS;'
        ])
        if eq_group.has_loop_all():
            _all_args, _py_args, _calls = self._get_equation_method_calls(
                eq_group, kind='loop_all', indent='')
            code.extend(['', '// Calling loop_all of equations.'])
            code.extend(_calls)
            code.append('')
            all_args.extend(_all_args)
            py_args.extend(_py_args)

        if eq_group.has_loop():
            code.append('// Calling loop of equations.')
            code.append('for (i=start; i<end; i++) {')
            code.append('    s_idx = neighbors[i];')
            pre = []
            for p, cb in eq_group.precomputed.items():
                src = cb.code.strip().splitlines()
                pre.extend([' ' * 4 + x + ';' for x in src])
            if len(pre) > 0:
                pre.append('')
            code.extend(pre)

            _all_args, _py_args, _calls = self._get_equation_method_calls(
                eq_group, kind, indent='    ')
            code.extend(_calls)
            for arg, py_arg in zip(_all_args, _py_args):
                if arg not in all_args:
                    all_args.append(arg)
                    py_args.append(py_arg)
            code.append('}')

        s_ary, d_ary = eq_group.get_array_names()
        s_ary.update(d_ary)

        _args = list(s_ary)
        py_args.extend(_args)
        all_args.extend(self._get_typed_args(_args))

        body = '\n'.join([' ' * 4 + x for x in code])
        body = self._set_kernel(body, self.object.kernel)

        all_args.extend([
            'GLOBAL_MEM {kernel}* kern'.format(kernel=sph_k_name),
            'GLOBAL_MEM unsigned int *nbr_length',
            'GLOBAL_MEM unsigned int *start_idx',
            'GLOBAL_MEM unsigned int *neighbors', 'double t', 'double dt',
            'unsigned int NP_MAX'
        ])

        self.data.append(
            dict(kernel=kernel,
                 args=py_args,
                 dest=dest,
                 source=source,
                 loop=True,
                 real=group.real,
                 type='kernel'))

        sig = get_kernel_definition(kernel, all_args)
        return ('{sig}\n{{\n{body}\n\n}}\n'.format(sig=sig, body=body))
示例#6
0
 def tearDown(self):
     super(OctreeGPUNNPSTestCase, self).tearDown()
     get_config().use_double = self._orig_use_double
示例#7
0
def setup_module():
    get_config().use_openmp = True
示例#8
0
 def setUp(self):
     cu = pytest.importorskip("pycuda")
     cfg = get_config()
     self.orig_use_double = cfg.use_double
     cfg.use_double = True
     self.backend = 'cuda'
示例#9
0
 def tearDown(self):
     get_config().use_double = self.orig_use_double
示例#10
0
    def __init__(self,
                 all_particles,
                 scheme,
                 domain=None,
                 innerloop=True,
                 updates=True,
                 parallel=False,
                 steps=None,
                 D=0):
        """The second integrator is a simple Euler-Integrator (accurate
        enough due to very small time steps; very fast) using EBGSteps.
        EBGSteps are basically the same as EulerSteps, exept for the fact
        that they work with an intermediate ebg velocity [eu, ev, ew].
        This velocity does not interfere with the actual velocity, which
        is neseccery to not disturb the real velocity through artificial
        damping in this step. The ebg velocity is initialized for each
        inner loop again and reset in the outer loop."""
        from math import ceil
        from pysph.base.kernels import CubicSpline
        from pysph.sph.integrator_step import EBGStep
        from compyle.config import get_config
        from pysph.sph.integrator import EulerIntegrator
        from pysph.sph.scheme import BeadChainScheme
        from pysph.sph.equation import Group
        from pysph.sph.fiber.utils import (HoldPoints, Contact,
                                           ComputeDistance)
        from pysph.sph.fiber.beadchain import (Tension, Bending,
                                               ArtificialDamping)
        from pysph.base.nnps import DomainManager, LinkedListNNPS
        from pysph.sph.acceleration_eval import AccelerationEval
        from pysph.sph.sph_compiler import SPHCompiler

        if not isinstance(scheme, BeadChainScheme):
            raise TypeError("Scheme must be BeadChainScheme")

        self.innerloop = innerloop
        self.dt = scheme.dt
        self.fiber_dt = scheme.fiber_dt
        self.domain_updates = updates
        self.steps = steps
        self.D = D
        self.eta0 = scheme.rho0 * scheme.nu

        # if there are more than 1 particles involved, elastic equations are
        # iterated in an inner loop.
        if self.innerloop:
            # second integrator
            # self.fiber_integrator = EulerIntegrator(fiber=EBGStep())
            steppers = {}
            for f in scheme.fibers:
                steppers[f] = EBGStep()
            self.fiber_integrator = EulerIntegrator(**steppers)
            # The type of spline has no influence here. It must be large enough
            # to contain the next particle though.
            kernel = CubicSpline(dim=scheme.dim)
            equations = []
            g1 = []
            for fiber in scheme.fibers:
                g1.append(ComputeDistance(dest=fiber, sources=[fiber]))
            equations.append(Group(equations=g1))

            g2 = []
            for fiber in scheme.fibers:
                g2.append(
                    Tension(dest=fiber, sources=None, ea=scheme.E * scheme.A))
                g2.append(
                    Bending(dest=fiber, sources=None, ei=scheme.E * scheme.Ip))
                g2.append(
                    Contact(dest=fiber,
                            sources=scheme.fibers,
                            E=scheme.E,
                            d=scheme.dx,
                            dim=scheme.dim,
                            k=scheme.k,
                            lim=scheme.lim,
                            eta0=self.eta0))
                g2.append(ArtificialDamping(dest=fiber, sources=None,
                                            d=self.D))
            equations.append(Group(equations=g2))

            g3 = []
            for fiber in scheme.fibers:
                g3.append(HoldPoints(dest=fiber, sources=None, tag=100))
            equations.append(Group(equations=g3))

            # These equations are applied to fiber particles only - that's the
            # reason for computational speed up.
            particles = [p for p in all_particles if p.name in scheme.fibers]
            # A seperate DomainManager is needed to ensure that particles don't
            # leave the domain.
            if domain:
                xmin = domain.manager.xmin
                ymin = domain.manager.ymin
                zmin = domain.manager.zmin
                xmax = domain.manager.xmax
                ymax = domain.manager.ymax
                zmax = domain.manager.zmax
                periodic_in_x = domain.manager.periodic_in_x
                periodic_in_y = domain.manager.periodic_in_y
                periodic_in_z = domain.manager.periodic_in_z
                gamma_yx = domain.manager.gamma_yx
                gamma_zx = domain.manager.gamma_zx
                gamma_zy = domain.manager.gamma_zy
                n_layers = domain.manager.n_layers
                N = self.steps or int(ceil(self.dt / self.fiber_dt))
                # dt = self.dt/N
                self.domain = DomainManager(xmin=xmin,
                                            xmax=xmax,
                                            ymin=ymin,
                                            ymax=ymax,
                                            zmin=zmin,
                                            zmax=zmax,
                                            periodic_in_x=periodic_in_x,
                                            periodic_in_y=periodic_in_y,
                                            periodic_in_z=periodic_in_z,
                                            gamma_yx=gamma_yx,
                                            gamma_zx=gamma_zx,
                                            gamma_zy=gamma_zy,
                                            n_layers=n_layers,
                                            dt=self.dt,
                                            calls_per_step=N)
            else:
                self.domain = None
            # A seperate list for the nearest neighbourhood search is
            # benefitial since it is much smaller than the original one.
            nnps = LinkedListNNPS(dim=scheme.dim,
                                  particles=particles,
                                  radius_scale=kernel.radius_scale,
                                  domain=self.domain,
                                  fixed_h=False,
                                  cache=False,
                                  sort_gids=False)
            # The acceleration evaluator needs to be set up in order to compile
            # it together with the integrator.
            if parallel:
                self.acceleration_eval = AccelerationEval(
                    particle_arrays=particles,
                    equations=equations,
                    kernel=kernel)
            else:
                self.acceleration_eval = AccelerationEval(
                    particle_arrays=particles,
                    equations=equations,
                    kernel=kernel,
                    mode='serial')
            # Compilation of the integrator not using openmp, because the
            # overhead is too large for those few fiber particles.
            comp = SPHCompiler(self.acceleration_eval, self.fiber_integrator)
            if parallel:
                comp.compile()
            else:
                config = get_config()
                config.use_openmp = False
                comp.compile()
                config.use_openmp = True
            self.acceleration_eval.set_nnps(nnps)

            # Connecting neighbourhood list to integrator.
            self.fiber_integrator.set_nnps(nnps)
示例#11
0
 def setUp(self):
     ocl = pytest.importorskip("pyopencl")
     cfg = get_config()
     self.orig_use_double = cfg.use_double
     cfg.use_double = True
     self.backend = 'opencl'
示例#12
0
 def tearDown(self):
     super(TestZOrderGPUNNPSWithSorting, self).tearDown()
     get_config().use_double = self._orig_use_double
示例#13
0
                   choices=['gpu_comp', 'omp_comp', 'comp_algo'],
                   help='Choose the comparison.')
    p.add_argument('--nnps',
                   action='store',
                   dest='nnps',
                   default='linear',
                   choices=['linear', 'simple'],
                   help='Choose algorithm.')
    p.add_argument('--use-double',
                   action='store_true',
                   dest='use_double',
                   default=False,
                   help='Use double precision on the GPU.')

    o = p.parse_args()
    get_config().use_double = o.use_double
    solver_algo = (md_nnps.MDNNPSSolver
                   if o.nnps == 'linear' else md_simple.MDSolver)
    n_list = [10000 * (2 ** i) for i in range(10)] if o.nnps == 'linear' else \
        [500 * (2 ** i) for i in range(8)]

    if o.comp == "gpu_comp":
        backends = ["opencl", "cuda", "cython"]
        print("Running for", n_list)
        speedups, t_list = compare(backends, n_list, solver_algo)
        plot(n_list, speedups, t_list, o.nnps)
    elif o.comp == "omp_comp":
        backends = ["cython_omp", "cython"]
        print("Running for", n_list)
        speedups, t_list = compare(backends, n_list, solver_algo)
        plot(n_list, speedups, t_list, o.nnps)
示例#14
0
if __name__ == '__main__':
    from argparse import ArgumentParser
    p = ArgumentParser()
    p.add_argument('-b',
                   '--backend',
                   action='store',
                   dest='backend',
                   default='cython',
                   help='Choose the backend.')
    p.add_argument('--openmp',
                   action='store_true',
                   dest='openmp',
                   default=False,
                   help='Use OpenMP.')
    p.add_argument('--use-double',
                   action='store_true',
                   dest='use_double',
                   default=False,
                   help='Use double precision on the GPU.')
    p.add_argument('-n',
                   action='store',
                   type=int,
                   dest='n',
                   default=10000,
                   help='Number of particles.')
    o = p.parse_args()
    get_config().use_openmp = o.openmp
    get_config().use_double = o.use_double
    run(o.n, o.backend)
示例#15
0
 def tearDown(self):
     super(TestAccelerationEval1DGPUOctreeNonCached, self).tearDown()
     get_config().use_local_memory = self.old_flag
示例#16
0
 def setUp(self):
     get_config().use_opencl = False
示例#17
0
 def tearDown(self):
     super(BruteForceNNPSTestCase, self).tearDown()
     get_config().use_double = self._orig_use_double
示例#18
0
 def setUp(self):
     get_config().use_opencl = False
     self.backend = None
示例#19
0
 def tearDown(self):
     super(ZOrderGPUDoubleNNPSTestCase, self).tearDown()
     get_config().use_double = self._orig_use_double
示例#20
0
def teardown_module():
    get_config().use_openmp = False