import numpy as np import pytest from numpy import linalg from conftest import skipif from devito import TimeFunction from devito.logger import log from examples.seismic import Model, AcquisitionGeometry from examples.seismic.acoustic import AcousticWaveSolver from examples.seismic.tti import AnisotropicWaveSolver pytestmark = skipif(['yask', 'ops']) @pytest.mark.parametrize('shape', [(120, 140), (120, 140, 150)]) @pytest.mark.parametrize('space_order', [4, 8]) @pytest.mark.parametrize('kernel', ['centered']) def test_tti(shape, space_order, kernel): """ This first test compare the solution of the acoustic wave-equation and the TTI wave-eqatuon with all anisotropy parametrs to 0. The two solutions should be the same. """ if kernel == 'shifted': space_order *= 2 to = 2 so = space_order nbl = 10 origin = [0. for _ in shape] spacing = [10. for _ in shape] vp = 1.5 * np.ones(shape)
import itertools import pytest import numpy as np from conftest import skipif from sympy import Integer from sympy.core.numbers import Zero, One # noqa pytestmark = skipif('noops', whole_module=True) # All ops-specific imports *must* be avoided if `backend != ops`, otherwise # a backend reinitialization would be triggered via `devito/ops/.__init__.py`, # thus invalidating all of the future tests. This is guaranteed by the # `pytestmark` above from devito import Eq, Function, Grid, Operator, TimeFunction, configuration # noqa from devito.ops.node_factory import OPSNodeFactory # noqa from devito.ops.transformer import create_ops_arg, create_ops_dat, make_ops_ast, to_ops_stencil # noqa from devito.ops.types import OpsAccessible, OpsDat, OpsStencil, OpsBlock # noqa from devito.ops.utils import namespace, AccessibleInfo # noqa from devito.symbolics import Byref, Literal, indexify # noqa from devito.tools import dtype_to_cstr # noqa from devito.types import Buffer, Constant, Symbol # noqa class TestOPSExpression(object): @pytest.mark.parametrize('equation, expected', [ ('Eq(u,3*a - 4**a)', 'void OPS_Kernel_0(ACC<float> & ut0)\n' '{\n ut0(0) = -2.97015324253729F;\n}'), ('Eq(u, u.dxl)', 'void OPS_Kernel_0(ACC<float> & ut0, const float *h_x)\n' '{\n r0 = 1.0/*h_x;\n '
from operator import mul import numpy as np import pytest from conftest import EVAL, skipif from devito import Grid, Function, TimeFunction, SparseTimeFunction, Eq, Operator, solve from devito.dle import NThreads, transform from devito.dle.parallelizer import nhyperthreads from devito.ir.equations import DummyEq from devito.ir.iet import (Call, Expression, Iteration, Conditional, FindNodes, iet_analyze, retrieve_iteration_tree) from devito.tools import as_tuple from unittest.mock import patch pytestmark = skipif(['yask', 'ops']) def get_blocksizes(op, dle, grid, blockshape): blocksizes = {'%s0_blk_size' % d: v for d, v in zip(grid.dimensions, blockshape)} blocksizes = {k: v for k, v in blocksizes.items() if k in op._known_arguments} # Sanity check if grid.dim == 1 or len(blockshape) == 0: assert len(blocksizes) == 0 return {} try: if dle[1].get('blockinner'): assert len(blocksizes) >= 1 if grid.dim == len(blockshape): assert len(blocksizes) == len(blockshape) else:
from sympy import cos import numpy as np from cached_property import cached_property import pytest # noqa pexpect = pytest.importorskip('yask') # Run only if YASK is available from conftest import skipif # noqa from devito import (Eq, Grid, Dimension, ConditionalDimension, Operator, Constant, Function, TimeFunction, SparseTimeFunction, configuration, clear_cache) # noqa from devito.ir.iet import FindNodes, ForeignExpression, retrieve_iteration_tree # noqa from examples.seismic.acoustic import iso_stencil # noqa from examples.seismic import demo_model, TimeAxis, RickerSource, Receiver # noqa pytestmark = skipif('noyask') def setup_module(module): """Get rid of any YASK modules generated and JIT-compiled in previous runs. This is not strictly necessary for the tests, but it helps in keeping the lib directory clean, which may be helpful for offline analysis. """ from devito.yask.wrappers import contexts # noqa contexts.dump() @pytest.fixture(autouse=True) def reset_isa(): """Force back to NO-SIMD after each test, as some tests may optionally switch on SIMD.
import numpy as np import pytest from conftest import skipif, opts_device_tiling from devito import (Grid, Dimension, Function, TimeFunction, Eq, Inc, solve, Operator, norm, cos) from devito.exceptions import InvalidOperator from devito.ir.iet import retrieve_iteration_tree from examples.seismic import TimeAxis, RickerSource, Receiver pytestmark = skipif(['nodevice'], whole_module=True) class TestCodeGeneration(object): def test_init_omp_env(self): grid = Grid(shape=(3, 3, 3)) u = TimeFunction(name='u', grid=grid) op = Operator(Eq(u.forward, u.dx + 1), language='openmp') assert str(op.body.init[0].body[0]) ==\ 'if (deviceid != -1)\n{\n omp_set_default_device(deviceid);\n}' @skipif('device-aomp') @pytest.mark.parallel(mode=1) def test_init_omp_env_w_mpi(self): grid = Grid(shape=(3, 3, 3)) u = TimeFunction(name='u', grid=grid)
import pytest import numpy as np from conftest import skipif from devito import (Grid, Function, TimeFunction, SparseTimeFunction, Dimension, # noqa Eq, Operator, ALLOC_GUARD, ALLOC_FLAT) from devito.data import LEFT, RIGHT, Decomposition pytestmark = skipif('ops') class TestDataBasic(object): def test_simple_indexing(self): """Test data packing/unpacking via basic indexing.""" grid = Grid(shape=(16, 16, 16)) u = Function(name='yu3D', grid=grid, space_order=0) # Test simple insertion and extraction u.data[0, 1, 1] = 1. assert u.data[0, 0, 0] == 0. assert u.data[0, 1, 1] == 1. assert np.all(u.data == u.data[:, :, :]) assert 1. in u.data[0] assert 1. in u.data[0, 1] # Test negative indices assert u.data[0, -15, -15] == 1. u.data[6, 0, 0] = 1. assert u.data[-10, :, :].sum() == 1.
import pytest import numpy as np from unittest.mock import patch from conftest import skipif from devito import Grid, TimeFunction, Eq, Operator, configuration, switchconfig from devito.data import LEFT pytestmark = skipif(['yask', 'ops'], whole_module=True) # All core-specific imports *must* be avoided if `backend != core`, otherwise # a backend reinitialization would be triggered via `devito/core/.__init__.py`, # thus invalidating all of the future tests. This is guaranteed by the # `pytestmark` above from devito.core.autotuning import options # noqa @switchconfig(log_level='DEBUG') @pytest.mark.parametrize("shape,expected", [ ((30, 30), 13), ((30, 30, 30), 17) ]) def test_at_is_actually_working(shape, expected): """ Check that autotuning is actually running when switched on, in both 2D and 3D operators. """ grid = Grid(shape=shape) f = TimeFunction(name='f', grid=grid) eqn = Eq(f.forward, f + 1)
import numpy as np from cached_property import cached_property import pytest # noqa pexpect = pytest.importorskip('yask') # Run only if YASK is available from conftest import skipif # noqa from devito import (Eq, Grid, Dimension, ConditionalDimension, Operator, Constant, Function, TimeFunction, SparseTimeFunction, configuration, clear_cache, switchconfig) # noqa from devito.ir.iet import FindNodes, ForeignExpression, retrieve_iteration_tree # noqa from examples.seismic.acoustic import iso_stencil # noqa from examples.seismic import demo_model, TimeAxis, RickerSource, Receiver # noqa pytestmark = skipif('noyask') def setup_module(module): """Get rid of any YASK modules generated and JIT-compiled in previous runs. This is not strictly necessary for the tests, but it helps in keeping the lib directory clean, which may be helpful for offline analysis. """ from devito.yask.wrappers import contexts # noqa contexts.dump() @pytest.fixture(autouse=True) def reset_isa(): """Force back to NO-SIMD after each test, as some tests may optionally switch on SIMD.
import pytest from conftest import skipif from devito import Function, Grid, NODE from devito.tools import powerset pytestmark = skipif(['yask']) @pytest.mark.parametrize('ndim', [1, 2, 3]) def test_indices(ndim): """ Test that inidces are shifted by half a grid point for staggered Function """ grid = Grid(tuple([10] * ndim)) dims = grid.dimensions for d in list(powerset(dims))[1:]: f = Function(name="f", grid=grid, staggered=d) for dd in d: assert f.indices_ref[dd] == dd + dd.spacing / 2 @pytest.mark.parametrize('ndim', [1, 2, 3]) def test_avg(ndim): """ Test automatic averaging of Function at undefined grid points """ grid = Grid(tuple([10] * ndim)) dims = list(powerset(grid.dimensions))[1:] for d in dims: f = Function(name="f", grid=grid, staggered=d)
import pytest from conftest import skipif from devito import Eq, Grid, Operator, TimeFunction, configuration # noqa from devito.symbolics import indexify pytestmark = skipif('noops', whole_module=True) # All ops-specific imports *must* be avoided if `backend != ops`, otherwise # a backend reinitialization would be triggered via `devito/ops/.__init__.py`, # thus invalidating all of the future tests. This is guaranteed by the # `pytestmark` above from devito.ops.node_factory import OPSNodeFactory # noqa from devito.ops.transformer import make_ops_ast # noqa class TestOPSExpression(object): @pytest.mark.parametrize('equation, expected', [ ('Eq(u,3*a - 4**a)', 'Eq(ut0[OPS_ACC0(0)], -2.97015324253729)'), ('Eq(u, u.dxl)', 'Eq(ut0[OPS_ACC0(0)], -2.0*ut0[OPS_ACC0(-1)]/h_x + ' '0.5*ut0[OPS_ACC0(-2)]/h_x + 1.5*ut0[OPS_ACC0(0)]/h_x)'), ('Eq(v,1)', 'Eq(vt0[OPS_ACC0(0,0)], 1)'), ('Eq(v,v.dxl + v.dxr - v.dyr - v.dyl)', 'Eq(vt0[OPS_ACC0(0,0)], 2.0*vt0[OPS_ACC0(0,-1)]/h_y - ' '0.5*vt0[OPS_ACC0(0,-2)]/h_y - 2.0*vt0[OPS_ACC0(0,1)]/h_y + ' '0.5*vt0[OPS_ACC0(0,2)]/h_y - 2.0*vt0[OPS_ACC0(-1,0)]/h_x + ' '0.5*vt0[OPS_ACC0(-2,0)]/h_x + 2.0*vt0[OPS_ACC0(1,0)]/h_x - ' '0.5*vt0[OPS_ACC0(2,0)]/h_x)'), ('Eq(v,v**2 - 3*v)',
import numpy as np import pytest from conftest import skipif from devito import (Grid, Constant, Function, TimeFunction, SparseFunction, SparseTimeFunction, Dimension, ConditionalDimension, SubDimension, Eq, Inc, Operator, norm, inner) from devito.data import LEFT, RIGHT from devito.ir.iet import Call, Conditional, Iteration, FindNodes from devito.mpi import MPI, HaloExchangeBuilder, HaloSchemeEntry from examples.seismic.acoustic import acoustic_setup pytestmark = skipif(['yask', 'ops', 'nompi']) class TestDistributor(object): @pytest.mark.parallel(mode=[2, 4]) def test_partitioning(self): grid = Grid(shape=(15, 15)) f = Function(name='f', grid=grid) distributor = grid.distributor expected = { # nprocs -> [(rank0 shape), (rank1 shape), ...] 2: [(15, 8), (15, 7)], 4: [(8, 8), (8, 7), (7, 8), (7, 7)] } assert f.shape == expected[distributor.nprocs][distributor.myrank] @pytest.mark.parallel(mode=[2, 4]) def test_partitioning_fewer_dims(self):
import numpy as np import pytest from devito import Operator, norm, Function, Grid, SparseFunction from devito.logger import info from examples.seismic import demo_model, Receiver from examples.seismic.acoustic import acoustic_setup from examples.seismic.tti import tti_setup from examples.seismic.viscoacoustic import viscoacoustic_setup from conftest import skipif pytestmark = skipif('device-openmp', whole_module=True) presets = { 'constant': { 'preset': 'constant-isotropic' }, 'layers': { 'preset': 'layers-isotropic', 'nlayers': 2 }, 'layers-tti': { 'preset': 'layers-tti', 'nlayers': 2 }, 'layers-viscoacoustic': { 'preset': 'layers-viscoacoustic', 'nlayers': 2 }, }
class TestStreaming(object): @pytest.mark.parametrize('opt', [ ('tasking', 'orchestrate'), ('tasking', 'orchestrate', { 'linearize': True }), ]) def test_tasking_in_isolation(self, opt): nt = 10 bundle0 = Bundle() grid = Grid(shape=(10, 10, 10), subdomains=bundle0) tmp = Function(name='tmp', grid=grid) u = TimeFunction(name='u', grid=grid, save=nt) v = TimeFunction(name='v', grid=grid) eqns = [ Eq(tmp, v), Eq(v.forward, v + 1), Eq(u.forward, tmp, subdomain=bundle0) ] op = Operator(eqns, opt=opt) # Check generated code assert len(retrieve_iteration_tree(op)) == 5 assert len([i for i in FindSymbols().visit(op) if isinstance(i, Lock)]) == 1 sections = FindNodes(Section).visit(op) assert len(sections) == 3 assert str(sections[0].body[0].body[0].body[0].body[0] ) == 'while(lock0[0] == 0);' body = sections[2].body[0].body[0] assert (str(body.body[1].condition) == 'Ne(lock0[0], 2) | ' 'Ne(FieldFromComposite(flag, sdata0[wi0], ()), 1)') assert str(body.body[2]) == 'sdata0[wi0].time = time;' assert str(body.body[3]) == 'lock0[0] = 0;' assert str(body.body[4]) == 'sdata0[wi0].flag = 2;' op.apply(time_M=nt - 2) assert np.all(u.data[nt - 1] == 8) def test_tasking_fused(self): nt = 10 bundle0 = Bundle() grid = Grid(shape=(10, 10, 10), subdomains=bundle0) tmp = Function(name='tmp', grid=grid) u = TimeFunction(name='u', grid=grid, save=nt) v = TimeFunction(name='v', grid=grid, save=nt) w = TimeFunction(name='w', grid=grid) eqns = [ Eq(w.forward, w + 1), Eq(tmp, w.forward), Eq(u.forward, tmp, subdomain=bundle0), Eq(v.forward, tmp, subdomain=bundle0) ] op = Operator(eqns, opt=('tasking', 'fuse', 'orchestrate')) # Check generated code assert len(retrieve_iteration_tree(op)) == 5 locks = [i for i in FindSymbols().visit(op) if isinstance(i, Lock)] assert len( locks) == 1 # Only 1 because it's only `tmp` that needs protection assert len(op._func_table) == 2 exprs = FindNodes(Expression).visit( op._func_table['copy_device_to_host0'].root) assert len(exprs) == 20 assert str(exprs[12]) == 'int id = sdata0->id;' assert str(exprs[13]) == 'int deviceid = sdata0->deviceid;' assert str(exprs[14]) == 'const int time = sdata0->time;' assert str(exprs[15]) == 'lock0[0] = 1;' assert exprs[16].write is u assert exprs[17].write is v assert str(exprs[18]) == 'lock0[0] = 2;' assert str(exprs[19]) == 'sdata0->flag = 1;' op.apply(time_M=nt - 2) assert np.all(u.data[nt - 1] == 9) assert np.all(v.data[nt - 1] == 9) def test_tasking_unfused_two_locks(self): nt = 10 bundle0 = Bundle() grid = Grid(shape=(10, 10, 10), subdomains=bundle0) tmp0 = Function(name='tmp0', grid=grid) tmp1 = Function(name='tmp1', grid=grid) u = TimeFunction(name='u', grid=grid, save=nt) v = TimeFunction(name='v', grid=grid, save=nt) w = TimeFunction(name='w', grid=grid) eqns = [ Eq(w.forward, w + 1), Eq(tmp0, w.forward), Eq(tmp1, w.forward), Eq(u.forward, tmp0, subdomain=bundle0), Eq(v.forward, tmp1, subdomain=bundle0) ] op = Operator(eqns, opt=('tasking', 'fuse', 'orchestrate')) # Check generated code assert len(retrieve_iteration_tree(op)) == 7 assert len([i for i in FindSymbols().visit(op) if isinstance(i, Lock)]) == 2 sections = FindNodes(Section).visit(op) assert len(sections) == 4 assert (str(sections[1].body[0].body[0].body[0].body[0]) == 'while(lock0[0] == 0 || lock1[0] == 0);') # Wait-lock body = sections[2].body[0].body[0] assert (str(body.body[1].condition) == 'Ne(lock0[0], 2) | ' 'Ne(FieldFromComposite(flag, sdata0[wi0], ()), 1)' ) # Wait-thread assert (str(body.body[1].body[0]) == 'wi0 = (wi0 + 1)%(npthreads0);') assert str(body.body[2]) == 'sdata0[wi0].time = time;' assert str(body.body[3]) == 'lock0[0] = 0;' # Set-lock assert str(body.body[4]) == 'sdata0[wi0].flag = 2;' body = sections[3].body[0].body[0] assert (str(body.body[1].condition) == 'Ne(lock1[0], 2) | ' 'Ne(FieldFromComposite(flag, sdata1[wi1], ()), 1)' ) # Wait-thread assert (str(body.body[1].body[0]) == 'wi1 = (wi1 + 1)%(npthreads1);') assert str(body.body[2]) == 'sdata1[wi1].time = time;' assert str(body.body[3]) == 'lock1[0] = 0;' # Set-lock assert str(body.body[4]) == 'sdata1[wi1].flag = 2;' assert len(op._func_table) == 4 exprs = FindNodes(Expression).visit( op._func_table['copy_device_to_host0'].root) assert len(exprs) == 19 assert str(exprs[15]) == 'lock0[0] = 1;' assert exprs[16].write is u exprs = FindNodes(Expression).visit( op._func_table['copy_device_to_host1'].root) assert str(exprs[15]) == 'lock1[0] = 1;' assert exprs[16].write is v op.apply(time_M=nt - 2) assert np.all(u.data[nt - 1] == 9) assert np.all(v.data[nt - 1] == 9) def test_tasking_forcefuse(self): nt = 10 bundle0 = Bundle() grid = Grid(shape=(10, 10, 10), subdomains=bundle0) tmp0 = Function(name='tmp0', grid=grid) tmp1 = Function(name='tmp1', grid=grid) u = TimeFunction(name='u', grid=grid, save=nt) v = TimeFunction(name='v', grid=grid, save=nt) w = TimeFunction(name='w', grid=grid) eqns = [ Eq(w.forward, w + 1), Eq(tmp0, w.forward), Eq(tmp1, w.forward), Eq(u.forward, tmp0, subdomain=bundle0), Eq(v.forward, tmp1, subdomain=bundle0) ] op = Operator(eqns, opt=('tasking', 'fuse', 'orchestrate', { 'fuse-tasks': True })) # Check generated code assert len(retrieve_iteration_tree(op)) == 5 assert len([i for i in FindSymbols().visit(op) if isinstance(i, Lock)]) == 2 sections = FindNodes(Section).visit(op) assert len(sections) == 3 assert (str(sections[1].body[0].body[0].body[0].body[0]) == 'while(lock0[0] == 0 || lock1[0] == 0);') # Wait-lock body = sections[2].body[0].body[0] assert (str(body.body[1].condition) == 'Ne(lock0[0], 2) | ' 'Ne(lock1[0], 2) | ' 'Ne(FieldFromComposite(flag, sdata0[wi0], ()), 1)' ) # Wait-thread assert (str(body.body[1].body[0]) == 'wi0 = (wi0 + 1)%(npthreads0);') assert str(body.body[2]) == 'sdata0[wi0].time = time;' assert str(body.body[3]) == 'lock0[0] = 0;' # Set-lock assert str(body.body[4]) == 'lock1[0] = 0;' # Set-lock assert str(body.body[5]) == 'sdata0[wi0].flag = 2;' assert len(op._func_table) == 2 exprs = FindNodes(Expression).visit( op._func_table['copy_device_to_host0'].root) assert len(exprs) == 22 assert str(exprs[15]) == 'lock0[0] = 1;' assert str(exprs[16]) == 'lock1[0] = 1;' assert exprs[17].write is u assert exprs[18].write is v op.apply(time_M=nt - 2) assert np.all(u.data[nt - 1] == 9) assert np.all(v.data[nt - 1] == 9) @pytest.mark.parametrize('opt', [ ('tasking', 'orchestrate'), ('tasking', 'streaming', 'orchestrate'), ]) def test_attempt_tasking_but_no_temporaries(self, opt): grid = Grid(shape=(10, 10, 10)) u = TimeFunction(name='u', grid=grid, save=10) op = Operator(Eq(u.forward, u + 1), opt=opt) piters = FindNodes(OmpIteration).visit(op) assert len(piters) == 0 op = Operator(Eq(u.forward, u + 1), opt=(opt, {'par-disabled': False})) # Degenerates to host execution with no data movement, since `u` is # a host Function piters = FindNodes(OmpIteration).visit(op) assert len(piters) == 1 assert type(piters.pop()) == OmpIteration def test_tasking_multi_output(self): nt = 10 bundle0 = Bundle() grid = Grid(shape=(10, 10, 10), subdomains=bundle0) t = grid.stepping_dim x, y, z = grid.dimensions u = TimeFunction(name='u', grid=grid, time_order=2) u1 = TimeFunction(name='u', grid=grid, time_order=2) usave = TimeFunction(name='usave', grid=grid, save=nt) usave1 = TimeFunction(name='usave', grid=grid, save=nt) eqns = [ Eq(u.forward, u + 1), Eq(usave, u.forward + u + u.backward + u[t, x - 1, y, z], subdomain=bundle0) ] op0 = Operator(eqns, opt=('noop', {'gpu-fit': usave})) op1 = Operator(eqns, opt=('tasking', 'orchestrate')) # Check generated code assert len(retrieve_iteration_tree(op1)) == 4 assert len( [i for i in FindSymbols().visit(op1) if isinstance(i, Lock)]) == 1 sections = FindNodes(Section).visit(op1) assert len(sections) == 2 assert str(sections[0].body[0].body[0].body[0].body[0]) ==\ 'while(lock0[t2] == 0);' for i in range(3): assert 'lock0[t' in str( sections[1].body[0].body[0].body[6 + i]) # Set-lock assert str( sections[1].body[0].body[0].body[9]) == 'sdata0[wi0].flag = 2;' assert len(op1._func_table) == 2 exprs = FindNodes(Expression).visit( op1._func_table['copy_device_to_host0'].root) assert len(exprs) == 26 for i in range(3): assert 'lock0[t' in str(exprs[18 + i]) assert exprs[21].write is usave op0.apply(time_M=nt - 2) op1.apply(time_M=nt - 2, u=u1, usave=usave1) assert np.all(u.data[:] == u1.data[:]) assert np.all(usave.data[:] == usave1.data[:]) def test_tasking_lock_placement(self): grid = Grid(shape=(10, 10, 10)) f = Function(name='f', grid=grid, space_order=2) u = TimeFunction(name='u', grid=grid) usave = TimeFunction(name='usave', grid=grid, save=10) eqns = [Eq(f, u + 1), Eq(u.forward, f.dx + u + 1), Eq(usave, u)] op = Operator(eqns, opt=('tasking', 'orchestrate')) # Check generated code -- the wait-lock is expected in section1 assert len(retrieve_iteration_tree(op)) == 5 assert len([i for i in FindSymbols().visit(op) if isinstance(i, Lock)]) == 1 sections = FindNodes(Section).visit(op) assert len(sections) == 3 assert sections[0].body[0].body[0].body[0].is_Iteration assert str(sections[1].body[0].body[0].body[0].body[0]) ==\ 'while(lock0[t1] == 0);' @pytest.mark.parametrize('opt,ntmps', [ pytest.param( ('streaming', 'orchestrate'), 0, marks=skipif('device-openmp')), (('buffering', 'streaming', 'orchestrate'), 1), (('buffering', 'streaming', 'orchestrate', { 'linearize': True }), 1), ]) def test_streaming_basic(self, opt, ntmps): nt = 10 grid = Grid(shape=(4, 4)) u = TimeFunction(name='u', grid=grid) usave = TimeFunction(name='usave', grid=grid, save=nt) for i in range(nt): usave.data[i, :] = i eqn = Eq(u.forward, u + usave) op = Operator(eqn, opt=opt) # Check generated code assert len(op._func_table) == 3 assert len([i for i in FindSymbols().visit(op) if i.is_Array]) == ntmps op.apply(time_M=nt - 2) assert np.all(u.data[0] == 28) assert np.all(u.data[1] == 36) @pytest.mark.parametrize('opt,ntmps,nfuncs', [ pytest.param( ('streaming', 'orchestrate'), 0, 3, marks=skipif('device-openmp')), (('buffering', 'streaming', 'orchestrate'), 2, 6), (('buffering', 'streaming', 'fuse', 'orchestrate'), 2, 3), ]) def test_streaming_two_buffers(self, opt, ntmps, nfuncs): nt = 10 grid = Grid(shape=(4, 4)) u = TimeFunction(name='u', grid=grid) usave = TimeFunction(name='usave', grid=grid, save=nt) vsave = TimeFunction(name='vsave', grid=grid, save=nt) for i in range(nt): usave.data[i, :] = i vsave.data[i, :] = i eqn = Eq(u.forward, u + usave + vsave) op = Operator(eqn, opt=opt) # Check generated code assert len(op._func_table) == nfuncs assert len([i for i in FindSymbols().visit(op) if i.is_Array]) == ntmps op.apply(time_M=nt - 2) assert np.all(u.data[0] == 56) assert np.all(u.data[1] == 72) @pytest.mark.parametrize('opt', [ pytest.param( ('streaming', 'orchestrate'), marks=skipif('device-openmp')), ('buffering', 'streaming', 'orchestrate'), ]) def test_streaming_conddim_forward(self, opt): nt = 10 grid = Grid(shape=(4, 4)) time_dim = grid.time_dim factor = Constant(name='factor', value=2, dtype=np.int32) time_sub = ConditionalDimension(name="time_sub", parent=time_dim, factor=factor) u = TimeFunction(name='u', grid=grid) usave = TimeFunction(name='usave', grid=grid, time_order=0, save=(int(nt // factor.data)), time_dim=time_sub) for i in range(usave.save): usave.data[i, :] = i eqn = Eq(u.forward, u.forward + u + usave) op = Operator(eqn, opt=opt) # TODO: we are *not* using the last entry of usave, so we gotta ensure # it is *not* streamed on to the device (thus avoiding dangerous leaks). # But how can we explicitly check this? time_M = 6 op.apply(time_M=time_M) # We entered the eq four times (at time=0,2,4,6) # Since factor=2, we *only* write to u.data[(time+1)%2]=u.data[1] assert np.all(u.data[0] == 0) # 1st time u[1] = u[0]+u[1]+usave[0] = 0+0+0 = 0 # 2nd time u[1] = u[0]+u[1]+usave[1] = 0+0+1 = 1 # 3rd time u[1] = u[0]+u[1]+usave[2] = 0+1+2 = 3 # 4th time u[1] = u[0]+u[1]+usave[3] = 0+3+3 = 6 assert np.all(u.data[1] == 6) @pytest.mark.parametrize('opt', [ pytest.param( ('streaming', 'orchestrate'), marks=skipif('device-openmp')), ('buffering', 'streaming', 'orchestrate'), ]) def test_streaming_conddim_backward(self, opt): nt = 10 grid = Grid(shape=(4, 4)) time_dim = grid.time_dim factor = Constant(name='factor', value=2, dtype=np.int32) time_sub = ConditionalDimension(name="time_sub", parent=time_dim, factor=factor) u = TimeFunction(name='u', grid=grid) usave = TimeFunction(name='usave', grid=grid, time_order=0, save=(int(nt // factor.data)), time_dim=time_sub) for i in range(usave.save): usave.data[i, :] = i eqn = Eq(u.backward, u.backward + u + usave) op = Operator(eqn, opt=opt) # TODO: we are *not* using the first two entries of usave, so we gotta ensure # they are *not* streamed on to the device (thus avoiding dangerous leaks). # But how can we explicitly check this? time_m = 4 op.apply(time_m=time_m, time_M=nt - 2) # We entered the eq three times (at time=8,6,4) # Since factor=2, we *only* write to u.data[(time-1)%2]=u.data[1] assert np.all(u.data[0] == 0) # 1st time u[1] = u[0]+u[1]+usave[4] = 0+0+4 = 4 # 2nd time u[1] = u[0]+u[1]+usave[3] = 0+4+3 = 7 # 3rd time u[1] = u[0]+u[1]+usave[2] = 0+7+2 = 9 assert np.all(u.data[1] == 9) @pytest.mark.parametrize('opt,ntmps', [ pytest.param( ('streaming', 'orchestrate'), 0, marks=skipif('device-openmp')), (('buffering', 'streaming', 'orchestrate'), 1), ]) def test_streaming_multi_input(self, opt, ntmps): nt = 100 grid = Grid(shape=(10, 10)) u = TimeFunction(name='u', grid=grid, save=nt, time_order=2, space_order=2) v = TimeFunction(name='v', grid=grid, save=None, time_order=2, space_order=2) grad = Function(name='grad', grid=grid) grad1 = Function(name='grad', grid=grid) v.data[:] = 0.02 for i in range(nt): u.data[i, :] = i + 0.1 eqn = Eq(grad, grad - u.dt2 * v) op0 = Operator(eqn, opt=('noop', {'gpu-fit': u})) op1 = Operator(eqn, opt=opt) # Check generated code assert len(op1._func_table) == 3 assert len([i for i in FindSymbols().visit(op1) if i.is_Array]) == ntmps op0.apply(time_M=nt - 2, dt=0.1) op1.apply(time_M=nt - 2, dt=0.1, grad=grad1) assert np.all(grad.data == grad1.data) @pytest.mark.parametrize('opt,ntmps', [ pytest.param( ('streaming', 'orchestrate'), 0, marks=skipif('device-openmp')), (('buffering', 'streaming', 'orchestrate'), 1), ]) def test_streaming_postponed_deletion(self, opt, ntmps): nt = 10 grid = Grid(shape=(10, 10, 10)) u = TimeFunction(name='u', grid=grid) v = TimeFunction(name='v', grid=grid) usave = TimeFunction(name='usave', grid=grid, save=nt) u1 = TimeFunction(name='u', grid=grid) v1 = TimeFunction(name='v', grid=grid) for i in range(nt): usave.data[i, :] = i eqns = [ Eq(u.forward, u + usave), Eq(v.forward, v + u.forward.dx + usave) ] op0 = Operator(eqns, opt=('noop', {'gpu-fit': usave})) op1 = Operator(eqns, opt=opt) # Check generated code assert len(op1._func_table) == 3 assert len([i for i in FindSymbols().visit(op1) if i.is_Array]) == ntmps op0.apply(time_M=nt - 1) op1.apply(time_M=nt - 1, u=u1, v=v1) assert np.all(u.data == u1.data) assert np.all(v.data == v1.data) def test_streaming_with_host_loop(self): grid = Grid(shape=(10, 10, 10)) f = Function(name='f', grid=grid) u = TimeFunction(name='u', grid=grid, save=10) eqns = [Eq(f, u), Eq(u.forward, f + 1)] op = Operator(eqns, opt=('streaming', 'orchestrate')) assert len(op._func_table) == 3 assert 'init_device0' in op._func_table assert 'prefetch_host_to_device0' in op._func_table @skipif('device-openmp' ) # TODO: Still unsupported with OpenMP, but soon will be def test_composite_streaming_tasking(self): nt = 10 grid = Grid(shape=(10, 10, 10)) u = TimeFunction(name='u', grid=grid) u1 = TimeFunction(name='u', grid=grid) fsave = TimeFunction(name='fsave', grid=grid, save=nt) usave = TimeFunction(name='usave', grid=grid, save=nt) usave1 = TimeFunction(name='usave', grid=grid, save=nt) for i in range(nt): fsave.data[i, :] = i eqns = [Eq(u.forward, u + fsave + 1), Eq(usave, u)] op0 = Operator(eqns, opt=('noop', {'gpu-fit': (fsave, usave)})) op1 = Operator(eqns, opt=('tasking', 'streaming', 'orchestrate')) # Check generated code assert len(retrieve_iteration_tree(op0)) == 1 assert len(retrieve_iteration_tree(op1)) == 4 symbols = FindSymbols().visit(op1) assert len([i for i in symbols if isinstance(i, Lock)]) == 1 threads = [i for i in symbols if isinstance(i, PThreadArray)] assert len(threads) == 2 assert threads[0].size == 1 assert threads[1].size.size == 2 op0.apply(time_M=nt - 1) op1.apply(time_M=nt - 1, u=u1, usave=usave1) assert np.all(u.data == u1.data) assert np.all(usave.data == usave1.data) def test_composite_buffering_tasking(self): nt = 10 bundle0 = Bundle() grid = Grid(shape=(4, 4, 4), subdomains=bundle0) u = TimeFunction(name='u', grid=grid, time_order=2) u1 = TimeFunction(name='u', grid=grid, time_order=2) usave = TimeFunction(name='usave', grid=grid, save=nt) usave1 = TimeFunction(name='usave', grid=grid, save=nt) eqns = [ Eq(u.forward, u * 1.1 + 1), Eq(usave, u.dt2, subdomain=bundle0) ] op0 = Operator(eqns, opt=('noop', {'gpu-fit': usave})) op1 = Operator(eqns, opt=('buffering', 'tasking', 'orchestrate')) # Check generated code -- thanks to buffering only expect 1 lock! assert len(retrieve_iteration_tree(op0)) == 2 assert len(retrieve_iteration_tree(op1)) == 5 symbols = FindSymbols().visit(op1) assert len([i for i in symbols if isinstance(i, Lock)]) == 1 threads = [i for i in symbols if isinstance(i, PThreadArray)] assert len(threads) == 1 assert threads[0].size.size == 1 op0.apply(time_M=nt - 1, dt=0.1) op1.apply(time_M=nt - 1, dt=0.1, u=u1, usave=usave1) assert np.all(u.data == u1.data) assert np.all(usave.data == usave1.data) def test_composite_buffering_tasking_multi_output(self): nt = 10 bundle0 = Bundle() grid = Grid(shape=(4, 4, 4), subdomains=bundle0) u = TimeFunction(name='u', grid=grid, time_order=2) v = TimeFunction(name='v', grid=grid, time_order=2) usave = TimeFunction(name='usave', grid=grid, save=nt) vsave = TimeFunction(name='vsave', grid=grid, save=nt) u1 = TimeFunction(name='u', grid=grid, time_order=2) v1 = TimeFunction(name='v', grid=grid, time_order=2) usave1 = TimeFunction(name='usave', grid=grid, save=nt) vsave1 = TimeFunction(name='vsave', grid=grid, save=nt) eqns = [ Eq(u.forward, u + 1), Eq(v.forward, v + 1), Eq(usave, u, subdomain=bundle0), Eq(vsave, v, subdomain=bundle0) ] op0 = Operator(eqns, opt=('noop', {'gpu-fit': (usave, vsave)})) op1 = Operator(eqns, opt=('buffering', 'tasking', 'topofuse', 'orchestrate')) # Check generated code -- thanks to buffering only expect 1 lock! assert len(retrieve_iteration_tree(op0)) == 2 assert len(retrieve_iteration_tree(op1)) == 7 symbols = FindSymbols().visit(op1) assert len([i for i in symbols if isinstance(i, Lock)]) == 2 threads = [i for i in symbols if isinstance(i, PThreadArray)] assert len(threads) == 2 assert threads[0].size.size == 1 assert threads[1].size.size == 1 assert len(op1._func_table ) == 4 # usave and vsave eqns are in two diff efuncs op0.apply(time_M=nt - 1) op1.apply(time_M=nt - 1, u=u1, v=v1, usave=usave1, vsave=vsave1) assert np.all(u.data == u1.data) assert np.all(v.data == v1.data) assert np.all(usave.data == usave1.data) assert np.all(vsave.data == vsave1.data) @pytest.mark.parametrize('opt', [ ('buffering', 'tasking', 'streaming', 'orchestrate'), ('buffering', 'tasking', 'streaming', 'orchestrate', { 'linearize': True }), ]) def test_composite_full(self, opt): nt = 10 grid = Grid(shape=(4, 4)) u = TimeFunction(name='u', grid=grid, save=nt) v = TimeFunction(name='v', grid=grid, save=nt) u1 = TimeFunction(name='u', grid=grid, save=nt) v1 = TimeFunction(name='v', grid=grid, save=nt) for i in range(nt): u.data[i, :] = i u1.data[i, :] = i eqns = [Eq(u.forward, u + v + 1), Eq(v.forward, u + v + v.backward)] op0 = Operator(eqns, opt=('noop', {'gpu-fit': (u, v)})) op1 = Operator(eqns, opt=opt) # Check generated code assert len(retrieve_iteration_tree(op1)) == 7 assert len( [i for i in FindSymbols().visit(op1) if isinstance(i, Lock)]) == 2 op0.apply(time_M=nt - 2) op1.apply(time_M=nt - 2, u=u1, v=v1) assert np.all(u.data == u1.data) assert np.all(v.data == v1.data) def test_tasking_over_compiler_generated(self): nt = 10 bundle0 = Bundle() grid = Grid(shape=(4, 4, 4), subdomains=bundle0) u = TimeFunction(name='u', grid=grid, space_order=4) u1 = TimeFunction(name='u', grid=grid, space_order=4) usave = TimeFunction(name='usave', grid=grid, save=nt) usave1 = TimeFunction(name='usave', grid=grid, save=nt) eqns = [ Eq(u.forward, u.dx.dx * 0.042 + 1), Eq(usave, u, subdomain=bundle0) ] op0 = Operator(eqns, opt=('cire-sops', {'gpu-fit': usave})) op1 = Operator(eqns, opt=('cire-sops', 'tasking', 'orchestrate')) op2 = Operator(eqns, opt=('tasking', 'cire-sops', 'orchestrate')) # Check generated code for op in [op1, op2]: assert len(retrieve_iteration_tree(op)) == 5 assert len([ i for i in FindSymbols().visit(op) if isinstance(i, Lock) ]) == 1 sections = FindNodes(Section).visit(op) assert len(sections) == 3 assert 'while(lock0[t1] == 0)' in str( sections[1].body[0].body[0].body[0]) op0.apply(time_M=nt - 1) op1.apply(time_M=nt - 1, u=u1, usave=usave1) assert np.all(u.data == u1.data) assert np.all(usave.data == usave1.data) @pytest.mark.parametrize('opt,gpu_fit,async_degree,linearize', [ (('tasking', 'orchestrate'), True, None, False), (('buffering', 'tasking', 'orchestrate'), True, None, False), (('buffering', 'tasking', 'orchestrate'), False, None, False), (('buffering', 'tasking', 'orchestrate'), False, 3, False), (('buffering', 'tasking', 'orchestrate'), False, 3, True), ]) def test_save(self, opt, gpu_fit, async_degree, linearize): nt = 10 grid = Grid(shape=(300, 300, 300)) time_dim = grid.time_dim factor = Constant(name='factor', value=2, dtype=np.int32) time_sub = ConditionalDimension(name="time_sub", parent=time_dim, factor=factor) u = TimeFunction(name='u', grid=grid) usave = TimeFunction(name='usave', grid=grid, time_order=0, save=int(nt // factor.data), time_dim=time_sub) # For the given `nt` and grid shape, `usave` is roughly 4*5*300**3=~ .5GB of data op = Operator( [Eq(u.forward, u + 1), Eq(usave, u.forward)], opt=(opt, { 'gpu-fit': usave if gpu_fit else None, 'buf-async-degree': async_degree, 'linearize': linearize })) op.apply(time_M=nt - 1) assert all( np.all(usave.data[i] == 2 * i + 1) for i in range(usave.save)) def test_save_multi_output(self): nt = 10 grid = Grid(shape=(150, 150, 150)) time_dim = grid.time_dim factor = Constant(name='factor', value=2, dtype=np.int32) time_sub = ConditionalDimension(name="time_sub", parent=time_dim, factor=factor) u = TimeFunction(name='u', grid=grid) usave = TimeFunction(name='usave', grid=grid, time_order=0, save=int(nt // factor.data), time_dim=time_sub) vsave = TimeFunction(name='vsave', grid=grid, time_order=0, save=int(nt // factor.data), time_dim=time_sub) eqns = [ Eq(u.forward, u + 1), Eq(usave, u.forward), Eq(vsave, u.forward) ] op = Operator(eqns, opt=('buffering', 'tasking', 'topofuse', 'orchestrate')) # Check generated code assert len( op._func_table) == 4 # usave and vsave eqns are in separate tasks op.apply(time_M=nt - 1) assert all( np.all(usave.data[i] == 2 * i + 1) for i in range(usave.save)) assert all( np.all(vsave.data[i] == 2 * i + 1) for i in range(vsave.save)) @pytest.mark.parametrize('opt', [ ('buffering', 'tasking', 'orchestrate'), ('buffering', 'tasking', 'orchestrate', { 'linearize': True }), ]) def test_save_w_shifting(self, opt): factor = 4 nt = 19 grid = Grid(shape=(11, 11)) time = grid.time_dim time_subsampled = ConditionalDimension('t_sub', parent=time, factor=factor) u = TimeFunction(name='u', grid=grid) usave = TimeFunction(name='usave', grid=grid, save=2, time_dim=time_subsampled) save_shift = Constant(name='save_shift', dtype=np.int32) eqns = [ Eq(u.forward, u + 1.), Eq(usave.subs(time_subsampled, time_subsampled - save_shift), u) ] op = Operator(eqns, opt=opt) # Starting at time_m=10, so time_subsampled - save_shift is in range op.apply(time_m=10, time_M=nt - 2, save_shift=3) assert np.all(np.allclose(u.data[0], 8)) assert np.all( [np.allclose(usave.data[i], 2 + i * factor) for i in range(2)]) def test_save_w_nonaffine_time(self): factor = 4 grid = Grid(shape=(11, 11)) x, y = grid.dimensions t = grid.stepping_dim time = grid.time_dim time_subsampled = ConditionalDimension('t_sub', parent=time, factor=factor) f = Function(name='f', grid=grid, dtype=np.int32) u = TimeFunction(name='u', grid=grid) usave = TimeFunction(name='usave', grid=grid, save=2, time_dim=time_subsampled) save_shift = Constant(name='save_shift', dtype=np.int32) eqns = [ Eq(u.forward, u[t, f[x, x], f[y, y]] + 1.), Eq(usave.subs(time_subsampled, time_subsampled - save_shift), u) ] op = Operator(eqns, opt=('buffering', 'tasking', 'orchestrate')) # We just check the generated code here assert len([i for i in FindSymbols().visit(op) if isinstance(i, Lock)]) == 1 assert len(op._func_table) == 2 def test_save_w_subdims(self): nt = 10 grid = Grid(shape=(10, 10)) x, y = grid.dimensions time_dim = grid.time_dim xi = SubDimension.middle(name='xi', parent=x, thickness_left=3, thickness_right=3) yi = SubDimension.middle(name='yi', parent=y, thickness_left=3, thickness_right=3) factor = Constant(name='factor', value=2, dtype=np.int32) time_sub = ConditionalDimension(name="time_sub", parent=time_dim, factor=factor) u = TimeFunction(name='u', grid=grid) usave = TimeFunction(name='usave', grid=grid, time_order=0, save=int(nt // factor.data), time_dim=time_sub) eqns = [Eq(u.forward, u + 1), Eq(usave, u.forward)] eqns = [e.xreplace({x: xi, y: yi}) for e in eqns] op = Operator(eqns, opt=('buffering', 'tasking', 'orchestrate')) op.apply(time_M=nt - 1) for i in range(usave.save): assert np.all(usave.data[i, 3:-3, 3:-3] == 2 * i + 1) assert np.all(usave.data[i, :3, :] == 0) assert np.all(usave.data[i, -3:, :] == 0) assert np.all(usave.data[i, :, :3] == 0) assert np.all(usave.data[i, :, -3:] == 0) @pytest.mark.parametrize('opt,ntmps', [ pytest.param( ('streaming', 'orchestrate'), 0, marks=skipif('device-openmp')), pytest.param(('streaming', 'orchestrate', { 'linearize': True }), 0, marks=skipif('device-openmp')), (('buffering', 'streaming', 'orchestrate'), 1), (('buffering', 'streaming', 'orchestrate', { 'linearize': True }), 1), ]) def test_streaming_w_shifting(self, opt, ntmps): nt = 50 grid = Grid(shape=(5, 5)) time = grid.time_dim factor = Constant(name='factor', value=5, dtype=np.int32) t_sub = ConditionalDimension('t_sub', parent=time, factor=factor) save_shift = Constant(name='save_shift', dtype=np.int32) u = TimeFunction(name='u', grid=grid, time_order=0) usave = TimeFunction(name='usave', grid=grid, time_order=0, save=(int(nt // factor.data)), time_dim=t_sub) for i in range(usave.save): usave.data[i, :] = i eqns = Eq(u.forward, u + usave.subs(t_sub, t_sub - save_shift)) op = Operator(eqns, opt=opt) # Check generated code assert len(op._func_table) == 3 assert len([i for i in FindSymbols().visit(op) if i.is_Array]) == ntmps # From time_m=15 to time_M=35 with a factor=5 -- it means that, thanks # to t_sub, we enter the Eq exactly (35-15)/5 + 1 = 5 times. We set # save_shift=1 so instead of accessing the range usave[15/5:35/5+1], # we rather access the range usave[15/5-1:35:5], which means accessing # the usave values 2, 3, 4, 5, 6. op.apply(time_m=15, time_M=35, save_shift=1) assert np.allclose(u.data, 20) # Again, but with a different shift op.apply(time_m=15, time_M=35, save_shift=-2) assert np.allclose(u.data, 20 + 35) def test_streaming_complete(self): nt = 50 grid = Grid(shape=(6, 6)) x, y = grid.dimensions time = grid.time_dim xi = SubDimension.middle(name='xi', parent=x, thickness_left=2, thickness_right=2) yi = SubDimension.middle(name='yi', parent=y, thickness_left=2, thickness_right=2) factor = Constant(name='factor', value=5, dtype=np.int32) t_sub = ConditionalDimension('t_sub', parent=time, factor=factor) save_shift = Constant(name='save_shift', dtype=np.int32) u = TimeFunction(name='u', grid=grid, time_order=0) u1 = TimeFunction(name='u', grid=grid, time_order=0) u2 = TimeFunction(name='u', grid=grid, time_order=0) va = TimeFunction(name='va', grid=grid, time_order=0, save=(int(nt // factor.data)), time_dim=t_sub) vb = TimeFunction(name='vb', grid=grid, time_order=0, save=(int(nt // factor.data)), time_dim=t_sub) for i in range(va.save): va.data[i, :] = i vb.data[i, :] = i * 2 - 1 vas = va.subs(t_sub, t_sub - save_shift) vasb = va.subs(t_sub, t_sub - 1 - save_shift) vasf = va.subs(t_sub, t_sub + 1 - save_shift) eqns = [Eq(u.forward, u + (vasb + vas + vasf) * 2. + vb)] eqns = [e.xreplace({x: xi, y: yi}) for e in eqns] op0 = Operator(eqns, opt='noop') op1 = Operator(eqns, opt=('buffering', 'streaming', 'orchestrate')) op2 = Operator(eqns, opt=('buffering', 'streaming', 'fuse', 'orchestrate')) # Check generated code assert len(op1._func_table) == 6 assert len([i for i in FindSymbols().visit(op1) if i.is_Array]) == 2 assert len(op2._func_table) == 4 assert len([i for i in FindSymbols().visit(op2) if i.is_Array]) == 2 op0.apply(time_m=15, time_M=35, save_shift=0) op1.apply(time_m=15, time_M=35, save_shift=0, u=u1) op2.apply(time_m=15, time_M=35, save_shift=0, u=u2) assert np.all(u.data == u1.data) assert np.all(u.data == u2.data) def test_streaming_split_noleak(self): """ Make sure the helper pthreads leak no memory in the target langauge runtime. """ nt = 1000 grid = Grid(shape=(20, 20, 20)) u = TimeFunction(name='u', grid=grid) u1 = TimeFunction(name='u', grid=grid) usave = TimeFunction(name='usave', grid=grid, save=nt) for i in range(nt): usave.data[i, :] = i eqn = Eq(u.forward, u + usave + usave.backward) op0 = Operator(eqn, opt='noop') op1 = Operator(eqn, opt=('buffering', 'streaming', 'orchestrate')) op0.apply(time_M=nt - 2) # We'll call `op1` in total `X` times, which will create and destroy # `X` pthreads. With `X` at least O(10), this test would be enough # to uncover outrageous memory leaks due to leaking resources in # the runtime (in the past, we've seen leaks due to pthreads-local # pinned memory used for the data transfers) m = 1 l = 20 npairs = nt // l + (1 if nt % l > 0 else 0) X = [(m + i * l, min((i + 1) * l, nt - 2)) for i in range(npairs)] for m, M in X: op1.apply(time_m=m, time_M=M, u=u1) assert np.all(u.data[0] == u1.data[0]) assert np.all(u.data[1] == u1.data[1]) @pytest.mark.parametrize( 'opt,opt_options,gpu_fit', [(('streaming', 'orchestrate'), {}, True), pytest.param(('streaming', 'orchestrate'), {}, False, marks=skipif('device-openmp')), (('buffering', 'streaming', 'orchestrate'), {}, False), (('buffering', 'streaming', 'orchestrate'), { 'linearize': True }, False)]) def test_xcor_from_saved(self, opt, opt_options, gpu_fit): nt = 10 grid = Grid(shape=(300, 300, 300)) time_dim = grid.time_dim period = 2 factor = Constant(name='factor', value=period, dtype=np.int32) time_sub = ConditionalDimension(name="time_sub", parent=time_dim, factor=factor) g = Function(name='g', grid=grid) v = TimeFunction(name='v', grid=grid) usave = TimeFunction(name='usave', grid=grid, time_order=0, save=int(nt // factor.data), time_dim=time_sub) # For the given `nt` and grid shape, `usave` is roughly 4*5*300**3=~ .5GB of data for i in range(int(nt // period)): usave.data[i, :] = i v.data[:] = i * 2 + 1 opt_options = {'gpu-fit': usave if gpu_fit else None, **opt_options} # Assuming nt//period=5, we are computing, over 5 iterations: # g = 4*4 [time=8] + 3*3 [time=6] + 2*2 [time=4] + 1*1 [time=2] op = Operator([Eq(v.backward, v - 1), Inc(g, usave * (v / 2))], opt=(opt, opt_options)) op.apply(time_M=nt - 1) assert np.all(g.data == 30)
from functools import reduce from operator import mul import pytest import numpy as np from unittest.mock import patch from conftest import skipif from devito import (Grid, Function, TimeFunction, Eq, Operator, configuration, switchconfig) from devito.data import LEFT pytestmark = skipif(['yask', 'ops'], whole_module=True) # All core-specific imports *must* be avoided if `backend != core`, otherwise # a backend reinitialization would be triggered via `devito/core/.__init__.py`, # thus invalidating all of the future tests. This is guaranteed by the # `pytestmark` above from devito.core.autotuning import options # noqa @switchconfig(log_level='DEBUG') @pytest.mark.parametrize("shape,expected", [ ((30, 30), 13), ((30, 30, 30), 17) ]) def test_at_is_actually_working(shape, expected): """ Check that autotuning is actually running when switched on, in both 2D and 3D operators. """