def test_oi_1(self): '''OI = (ADD+MUL)/[(LOAD+STORE)*word_size]; word_size=8(double),4(float) Equation = v1[i2][i1] = 3*v2[i2][i1] + 2*v3[i2][i1]; ''' load = 2.0 store = 1.0 add = 1.0 mul = 2.0 dtype = np.float32 i1, i2 = symbols('i1 i2') data = np.arange(50, dtype=np.float32).reshape((10, 5)) arr = np.empty_like(data) v1 = IndexedBase('v1') v2 = IndexedBase('v2') v3 = IndexedBase('v3') eq = Eq(v1[i2, i1], 3 * v2[i2, i1] + 2 * v3[i2, i1]) propagator = Propagator("process", 10, (5, ), 0, profile=True) propagator.stencils = (eq, ) propagator.add_param("v1", data.shape, data.dtype) propagator.add_param("v2", data.shape, data.dtype) propagator.add_param("v3", data.shape, data.dtype) propagator.run([data, data, arr]) propagator_oi = propagator.oi["loop_body"] hand_oi = (mul + add) / ((load + store) * np.dtype(dtype).itemsize) assert (propagator_oi == hand_oi)
def test_value_param(): data = np.arange(6, dtype=np.float64).reshape((3, 2)) kernel = Assign("output_grid[i2][i1]", "input_grid[i2][i1] + offset") propagator = Propagator("process", 3, (2, ), []) propagator.add_param("input_grid", data.shape, data.dtype) propagator.add_param("output_grid", data.shape, data.dtype) propagator.add_scalar_param("offset", np.int32) propagator.loop_body = kernel f = propagator.cfunction arr = np.empty_like(data) f(data, arr, np.int32(3)) assert (arr[2][1] == 8)
def first_touch(array): """Uses the Propagator low-level API to initialize the given array(in Devito types) in the same pattern that would later be used to access it. """ from devito.propagator import Propagator from devito.interfaces import TimeData, PointData exp_init = [Eq(array.indexed[array.indices], 0)] it_init = [] if isinstance(array, TimeData): shape = array.shape time_steps = shape[0] shape = shape[1:] space_dims = array.indices[1:] else: if isinstance(array, PointData): it_init = [Iteration(exp_init, index=p, limits=array.shape[1])] exp_init = [] time_steps = array.shape[0] shape = [] space_dims = [] else: shape = array.shape time_steps = 1 space_dims = array.indices prop = Propagator(name="init", nt=time_steps, shape=shape, stencils=exp_init, space_dims=space_dims) prop.add_devito_param(array) prop.save_vars[array.name] = True prop.time_loop_stencils_a = it_init prop.run([array.data])
def test_2d(self): data = np.arange(6, dtype=np.float64).reshape((3, 2)) kernel = cgen.Assign("output_grid[i2][i1]", "input_grid[i2][i1] + 3") propagator = Propagator("process", 3, (2, ), []) propagator.add_param("input_grid", data.shape, data.dtype) propagator.add_param("output_grid", data.shape, data.dtype) propagator.loop_body = kernel f = propagator.cfunction arr = np.empty_like(data) f(data, arr) assert (arr[2][1] == 8)
def test_4d(self): kernel = cgen.Assign("output_grid[i4][i1][i2][i3]", "input_grid[i4][i1][i2][i3] + 3") data = np.arange(120, dtype=np.float64).reshape((5, 4, 3, 2)) propagator = Propagator("process", 5, (4, 3, 2), []) propagator.add_param("input_grid", data.shape, data.dtype) propagator.add_param("output_grid", data.shape, data.dtype) propagator.loop_body = kernel f = propagator.cfunction arr = np.empty_like(data) f(data, arr) assert (arr[4][3][2][1] == 122)
def test_oi_3(self): '''OI: (ADD+MUL)/[(LOAD+STORE)*word_size]; word_size: 8(double),4(float) Equation: v1[i2][i1] = (v2[i2][i1] + 2.5F*v2[i2][i1 - 2] + 5*v2[i2][i1 - 1]) / (v3[i2][i1] + (1.0F/4.0F)*v3[i2][i1 - 2] + (1.0F/2.0F)*v3[i2][i1 - 1] + 7.0e-1F*v4[i2][i1] - 1.5e-1F*v4[i2][i1 - 2] - 3.33e-1F*v4[i2][i1 - 1]); ''' load = 3.0 store = 1.0 add = 7.0 mul = 8.0 dtype = np.float32 i1, i2 = symbols('i1 i2') data = np.arange(100, dtype=np.float32).reshape((10, 10)) arr = np.empty_like(data) v1 = IndexedBase('v1') v2 = IndexedBase('v2') v3 = IndexedBase('v3') v4 = IndexedBase('v4') eq = Eq(v1[i2, i1], (v2[i2, i1] + 5 * v2[i2, i1 - 1] + 2.5 * v2[i2, i1 - 2]) / ((0.7 * v4[i2, i1] - 0.333 * v4[i2, i1 - 1] - 0.15 * v4[i2, i1 - 2]) + v3[i2, i1] + v3[i2, i1 - 1] / 2 + v3[i2, i1 - 2] / 4)) propagator = Propagator("process", 10, (10, ), 2, profile=True) propagator.stencils = (eq, ) propagator.add_param("v1", data.shape, data.dtype) propagator.add_param("v2", data.shape, data.dtype) propagator.add_param("v3", data.shape, data.dtype) propagator.add_param("v4", data.shape, data.dtype) propagator.run([data, data, data, arr]) propagator_oi = propagator.oi["loop_body"] hand_oi = (mul + add) / ((load + store) * np.dtype(dtype).itemsize) assert (propagator_oi == hand_oi)
def __init__(self, nt, shape, dtype=np.float32, stencils=[], subs=[], spc_border=0, time_order=0, forward=True, compiler=None, profile=False, cse=True, cache_blocking=None, input_params=None, output_params=None, factorized={}): # Derive JIT compilation infrastructure self.compiler = compiler or get_compiler_from_env() # Ensure stencil and substititutions are lists internally self.stencils = stencils if isinstance(stencils, list) else [stencils] subs = subs if isinstance(subs, list) else [subs] self.input_params = input_params self.output_params = output_params # Get functions and symbols in LHS/RHS and update params sym_undef = set() for eqn in self.stencils: lhs_def, lhs_undef = dse_symbols(eqn.lhs) sym_undef.update(lhs_undef) if self.output_params is None: self.output_params = list(lhs_def) rhs_def, rhs_undef = dse_symbols(eqn.rhs) sym_undef.update(rhs_undef) if self.input_params is None: self.input_params = list(rhs_def) # Pull all dimension indices from the incoming stencil dimensions = [] for eqn in self.stencils: dimensions += [ i for i in dse_dimensions(eqn.lhs) if i not in dimensions ] dimensions += [ i for i in dse_dimensions(eqn.rhs) if i not in dimensions ] # Time dimension is fixed for now time_dim = t # Derive space dimensions from expression self.space_dims = None if len(dimensions) > 0: self.space_dims = dimensions if time_dim in self.space_dims: self.space_dims.remove(time_dim) else: # Default space dimension symbols self.space_dims = ((x, z) if len(shape) == 2 else (x, y, z))[:len(shape)] # Remove known dimensions from undefined symbols for d in dimensions: sym_undef.remove(d) # TODO: We should check that all undfined symbols have known subs # Shift time indices so that LHS writes into t only, # eg. u[t+2] = u[t+1] + u[t] -> u[t] = u[t-1] + u[t-2] self.stencils = [ eqn.subs(t, t + solve(eqn.lhs.args[0], t)[0]) if isinstance( eqn.lhs, TimeData) else eqn for eqn in self.stencils ] # Convert incoming stencil equations to "indexed access" format self.stencils = [ Eq(dse_indexify(eqn.lhs), dse_indexify(eqn.rhs)) for eqn in self.stencils ] for name, value in factorized.items(): factorized[name] = dse_indexify(value) # Apply user-defined subs to stencil self.stencils = [eqn.subs(subs[0]) for eqn in self.stencils] # Applies CSE if cse: self.stencils = dse_cse(self.stencils) self.propagator = Propagator(self.getName(), nt, shape, self.stencils, factorized=factorized, dtype=dtype, spc_border=spc_border, time_order=time_order, forward=forward, space_dims=self.space_dims, compiler=self.compiler, profile=profile, cache_blocking=cache_blocking) self.dtype = dtype self.nt = nt self.shape = shape self.spc_border = spc_border self.time_order = time_order self.symbol_to_data = {} for param in self.signature: self.propagator.add_devito_param(param) self.symbol_to_data[param.name] = param self.propagator.stencils = self.stencils self.propagator.factorized = factorized for name, val in factorized.items(): if forward: self.propagator.factorized[name] = \ dse_indexify(val.subs(t, t - 1)).subs(subs[1]) else: self.propagator.factorized[name] = \ dse_indexify(val.subs(t, t + 1)).subs(subs[1])
class Operator(object): """Class encapsulating a defined operator as defined by the given stencil The Operator class is the core abstraction in DeVito that allows users to generate high-performance Finite Difference kernels from a stencil definition defined from SymPy equations. :param nt: Number of timesteps to execute :param shape: Shape of the data buffer over which to execute :param dtype: Data type for the grid buffer :param stencils: SymPy equation or list of equations that define the stencil used to create the kernel of this Operator. :param subs: Dict or list of dicts containing the SymPy symbol substitutions for each stencil respectively. :param spc_border: Number of spatial padding layers :param time_order: Order of the time discretisation :param forward: Flag indicating whether to execute forward in time :param compiler: Compiler class used to perform JIT compilation. If not provided, the compiler will be inferred from the environment variable DEVITO_ARCH, or default to GNUCompiler. :param profile: Flag to enable performance profiling :param cse: Flag to enable common subexpression elimination :param cache_blocking: Block sizes used for cache clocking. Can be either a single number used for all dimensions except inner most or a list explicitly stating block sizes for each dimension Set cache_blocking to None to skip blocking on that dim Set cache_blocking to AutoTuner instance, to use auto tuned tuned block sizes :param input_params: List of symbols that are expected as input. :param output_params: List of symbols that define operator output. :param factorized: A map given by {string_name:sympy_object} for including factorized terms """ def __init__(self, nt, shape, dtype=np.float32, stencils=[], subs=[], spc_border=0, time_order=0, forward=True, compiler=None, profile=False, cse=True, cache_blocking=None, input_params=None, output_params=None, factorized={}): # Derive JIT compilation infrastructure self.compiler = compiler or get_compiler_from_env() # Ensure stencil and substititutions are lists internally self.stencils = stencils if isinstance(stencils, list) else [stencils] subs = subs if isinstance(subs, list) else [subs] self.input_params = input_params self.output_params = output_params # Get functions and symbols in LHS/RHS and update params sym_undef = set() for eqn in self.stencils: lhs_def, lhs_undef = dse_symbols(eqn.lhs) sym_undef.update(lhs_undef) if self.output_params is None: self.output_params = list(lhs_def) rhs_def, rhs_undef = dse_symbols(eqn.rhs) sym_undef.update(rhs_undef) if self.input_params is None: self.input_params = list(rhs_def) # Pull all dimension indices from the incoming stencil dimensions = [] for eqn in self.stencils: dimensions += [ i for i in dse_dimensions(eqn.lhs) if i not in dimensions ] dimensions += [ i for i in dse_dimensions(eqn.rhs) if i not in dimensions ] # Time dimension is fixed for now time_dim = t # Derive space dimensions from expression self.space_dims = None if len(dimensions) > 0: self.space_dims = dimensions if time_dim in self.space_dims: self.space_dims.remove(time_dim) else: # Default space dimension symbols self.space_dims = ((x, z) if len(shape) == 2 else (x, y, z))[:len(shape)] # Remove known dimensions from undefined symbols for d in dimensions: sym_undef.remove(d) # TODO: We should check that all undfined symbols have known subs # Shift time indices so that LHS writes into t only, # eg. u[t+2] = u[t+1] + u[t] -> u[t] = u[t-1] + u[t-2] self.stencils = [ eqn.subs(t, t + solve(eqn.lhs.args[0], t)[0]) if isinstance( eqn.lhs, TimeData) else eqn for eqn in self.stencils ] # Convert incoming stencil equations to "indexed access" format self.stencils = [ Eq(dse_indexify(eqn.lhs), dse_indexify(eqn.rhs)) for eqn in self.stencils ] for name, value in factorized.items(): factorized[name] = dse_indexify(value) # Apply user-defined subs to stencil self.stencils = [eqn.subs(subs[0]) for eqn in self.stencils] # Applies CSE if cse: self.stencils = dse_cse(self.stencils) self.propagator = Propagator(self.getName(), nt, shape, self.stencils, factorized=factorized, dtype=dtype, spc_border=spc_border, time_order=time_order, forward=forward, space_dims=self.space_dims, compiler=self.compiler, profile=profile, cache_blocking=cache_blocking) self.dtype = dtype self.nt = nt self.shape = shape self.spc_border = spc_border self.time_order = time_order self.symbol_to_data = {} for param in self.signature: self.propagator.add_devito_param(param) self.symbol_to_data[param.name] = param self.propagator.stencils = self.stencils self.propagator.factorized = factorized for name, val in factorized.items(): if forward: self.propagator.factorized[name] = \ dse_indexify(val.subs(t, t - 1)).subs(subs[1]) else: self.propagator.factorized[name] = \ dse_indexify(val.subs(t, t + 1)).subs(subs[1]) @property def signature(self): """List of data object parameters that define the operator signature :returns: List of unique input and output data objects """ return self.input_params + [ param for param in self.output_params if param not in self.input_params ] def apply(self, debug=False): """ :param debug: If True, use Python to apply the operator. Default False. :returns: A tuple containing the values of the operator outputs or compiled function and its args """ if debug: return self.apply_python() self.propagator.run(self.get_args()) return tuple([param for param in self.output_params]) def apply_python(self): """Uses Python to apply the operator :returns: A tuple containing the values of the operator outputs """ self.run_python() return tuple([param.data for param in self.output_params]) def symbol_to_var(self, term, ti, indices=[]): """Retrieves the Python data from a symbol :param term: The symbol from which the data has to be retrieved :param ti: The value of t to use :param indices: A list of indices to use for the space dimensions :returns: A tuple containing the data and the indices to access it """ arr = self.symbol_to_data[str(term.base.label)].data num_ind = [] for ind in term.indices: ind = ind.subs({t: ti}).subs(tuple(zip(self.space_dims, indices))) num_ind.append(ind) return (arr, tuple(num_ind)) def run_python(self): """ Execute the operator using Python """ time_loop_limits = self.propagator.time_loop_limits time_loop_lambdas_b = dse_tolambda( self.propagator.time_loop_stencils_b) time_loop_lambdas_a = dse_tolambda( self.propagator.time_loop_stencils_a) stencil_lambdas = dse_tolambda(self.stencils) for ti in range(*time_loop_limits): # Run time loop stencils before space loop for lams, expr in zip(time_loop_lambdas_b, self.propagator.time_loop_stencils_b): lamda = lams[0] subs = lams[1] arr_lhs, ind_lhs = self.symbol_to_var(expr.lhs, ti) args = [] for sub in subs: arr, ind = self.symbol_to_var(sub, ti) args.append(arr[ind]) arr_lhs[ind_lhs] = lamda(*args) lower_limits = [self.spc_border] * len(self.shape) upper_limits = [x - self.spc_border for x in self.shape] indices = lower_limits[:] # Number of iterations in each dimension total_size_arr = [ a - b for a, b in zip(upper_limits, lower_limits) ] # Total number of iterations total_iter = reduce(lambda x, y: x * y, total_size_arr) # The 2/3 dimensional space loop has been collapsed to a single loop for iter_index in range(0, total_iter): dimension_limit = 1 # Calculating 2/3 dimensional index based on 1D index indices[0] = lower_limits[0] + iter_index % total_size_arr[0] for dimension in range(1, len(self.shape)): dimension_limit *= total_size_arr[dimension] indices[dimension] = int(iter_index / dimension_limit) for lams, expr in zip(stencil_lambdas, self.stencils): lamda = lams[0] subs = lams[1] arr_lhs, ind_lhs = self.symbol_to_var( expr.lhs, ti, indices) args = [] for x in subs: arr, ind = self.symbol_to_var(x, ti, indices) args.append(arr[ind]) arr_lhs[ind_lhs] = lamda(*args) # Time loop stencils for after space loop for lams, expr in zip(time_loop_lambdas_a, self.propagator.time_loop_stencils_a): lamda = lams[0] subs = lams[1] arr_lhs, ind_lhs = self.symbol_to_var(expr.lhs, ti) args = [] for x in subs: arr, ind = self.symbol_to_var(x, ti) args.append(arr[ind]) arr_lhs[ind_lhs] = lamda(*args) def getName(self): """Gives the name of the class :returns: The name of the class """ return self.__class__.__name__ def get_args(self): """ Initialises all the input args and returns them :return: a list of input params """ for param in self.input_params: if hasattr(param, 'initialize'): param.initialize() return [param.data for param in self.signature]
def test_space_dims_3d_default(self): space_dims = (x, y, z) propagator = Propagator("process", 1, (4, 3, 2), []) assert(space_dims == propagator.space_dims)
def test_space_dims_3d(self): space_dims = (z, y, x) propagator = Propagator("process", 1, (4, 3, 2), [], space_dims=space_dims) assert(space_dims == propagator.space_dims)