def test_create_ops_dat_function(self): grid = Grid(shape=(4)) u = Function(name='u', grid=grid, space_order=2) block = OpsBlock('block') name_to_ops_dat = {} result = create_ops_dat(u, name_to_ops_dat, block) assert name_to_ops_dat['u'].name == namespace['ops_dat_name'](u.name) assert name_to_ops_dat['u']._C_typename == namespace['ops_dat_type'] assert result[0].expr.lhs.name == namespace['ops_dat_dim'](u.name) assert result[0].expr.rhs.params == (Integer(4), ) assert result[1].expr.lhs.name == namespace['ops_dat_base'](u.name) assert result[1].expr.rhs.params == (Zero(), ) assert result[2].expr.lhs.name == namespace['ops_dat_d_p'](u.name) assert result[2].expr.rhs.params == (Integer(2), ) assert result[3].expr.lhs.name == namespace['ops_dat_d_m'](u.name) assert result[3].expr.rhs.params == (Integer(-2), ) assert result[4].expr.lhs == name_to_ops_dat['u'] assert type(result[4].expr.rhs) == namespace['ops_decl_dat'] assert result[4].expr.rhs.args == ( block, 1, Symbol(namespace['ops_dat_dim'](u.name)), Symbol(namespace['ops_dat_base'](u.name)), Symbol(namespace['ops_dat_d_m'](u.name)), Symbol(namespace['ops_dat_d_p'](u.name)), Byref(u.indexify( (0, ))), Literal('"%s"' % u._C_typedata), Literal('"u"'))
def _specialize_iet(self, iet, **kwargs): warning("The OPS backend is still work-in-progress") ops_init = Call(namespace['ops_init'], [0, 0, 2]) ops_partition = Call(namespace['ops_partition'], Literal('""')) ops_exit = Call(namespace['ops_exit']) ops_block = OpsBlock('block') # Extract all symbols that need to be converted to ops_dat dims = [] to_dat = set() for section, trees in find_affine_trees(iet).items(): dims.append(len(trees[0].dimensions)) symbols = set(FindSymbols('symbolics').visit(trees[0].root)) symbols -= set(FindSymbols('defines').visit(trees[0].root)) to_dat |= symbols # To ensure deterministic code generation we order the datasets to # be generated (since a set is an unordered collection) to_dat = filter_sorted(to_dat) name_to_ops_dat = {} pre_time_loop = [] for f in to_dat: if f.is_Constant: continue pre_time_loop.extend(create_ops_dat(f, name_to_ops_dat, ops_block)) for n, (section, trees) in enumerate(find_affine_trees(iet).items()): pre_loop, ops_kernel = opsit(trees, n) pre_time_loop.extend(pre_loop) self._ops_kernels.append(ops_kernel) assert (d == dims[0] for d in dims), \ "The OPS backend currently assumes that all kernels \ have the same number of dimensions" ops_block_init = Expression( ClusterizedEq( Eq(ops_block, namespace['ops_decl_block'](dims[0], Literal('"block"'))))) self._headers.append(namespace['ops_define_dimension'](dims[0])) self._includes.append('stdio.h') body = [ ops_init, ops_block_init, *pre_time_loop, ops_partition, iet, ops_exit ] return List(body=body)
def test_create_ops_dat_function(self): grid = Grid(shape=(4)) u = Function(name='u', grid=grid, space_order=2) block = OpsBlock('block') name_to_ops_dat = {} result = create_ops_dat(u, name_to_ops_dat, block) assert name_to_ops_dat['u'].name == namespace['ops_dat_name'](u.name) assert name_to_ops_dat['u']._C_typename == namespace['ops_dat_type'] assert result[0].expr.lhs.name == namespace['ops_dat_dim'](u.name) assert result[0].expr.rhs.params == (Integer(4), ) assert result[1].expr.lhs.name == namespace['ops_dat_base'](u.name) assert result[1].expr.rhs.params == (Zero(), ) assert result[2].expr.lhs.name == namespace['ops_dat_d_p'](u.name) assert result[2].expr.rhs.params == (Integer(2), ) assert result[3].expr.lhs.name == namespace['ops_dat_d_m'](u.name) assert result[3].expr.rhs.params == (Integer(-2), ) assert result[4].expr.lhs == name_to_ops_dat['u'] assert result[4].expr.rhs.name == namespace['ops_decl_dat'].name assert result[4].expr.rhs.args == ( block, 1, Symbol(namespace['ops_dat_dim'](u.name)), Symbol(namespace['ops_dat_base'](u.name)), Symbol(namespace['ops_dat_d_m'](u.name)), Symbol(namespace['ops_dat_d_p'](u.name)), Byref(u.indexify( (0, ))), Literal('"%s"' % u._C_typedata), Literal('"u"')) def test_create_ops_arg_constant(self): a = Constant(name='*a') res = create_ops_arg(a, {}, {}) assert res.name == namespace['ops_arg_gbl'].name assert res.args == [ Byref(Constant(name='a')), 1, Literal('"%s"' % dtype_to_cstr(a.dtype)), namespace['ops_read'] ] @pytest.mark.parametrize('read', [True, False]) def test_create_ops_arg_function(self, read): u = OpsAccessible('u', np.float32, read) dat = OpsDat('u_dat') stencil = OpsStencil('stencil') res = create_ops_arg(u, {'u': dat}, {u: stencil}) assert res.name == namespace['ops_arg_dat'].name assert res.args == [ dat, 1, stencil, Literal('"%s"' % dtype_to_cstr(u.dtype)), namespace['ops_read'] if read else namespace['ops_write'] ]
def _specialize_iet(self, iet, **kwargs): warning("The OPS backend is still work-in-progress") affine_trees = find_affine_trees(iet).items() # If there is no affine trees, then there is no loop to be optimized using OPS. if not affine_trees: return iet ops_init = Call(namespace['ops_init'], [0, 0, 2]) ops_partition = Call(namespace['ops_partition'], Literal('""')) ops_exit = Call(namespace['ops_exit']) # Extract all symbols that need to be converted to ops_dat dims = [] to_dat = set() for _, tree in affine_trees: dims.append(len(tree[0].dimensions)) symbols = set(FindSymbols('symbolics').visit(tree[0].root)) symbols -= set(FindSymbols('defines').visit(tree[0].root)) to_dat |= symbols # Create the OPS block for this problem ops_block = OpsBlock('block') ops_block_init = Expression( ClusterizedEq( Eq(ops_block, namespace['ops_decl_block'](dims[0], Literal('"block"'))))) # To ensure deterministic code generation we order the datasets to # be generated (since a set is an unordered collection) to_dat = filter_sorted(to_dat) name_to_ops_dat = {} pre_time_loop = [] after_time_loop = [] for f in to_dat: if f.is_Constant: continue pre_time_loop.extend( list(create_ops_dat(f, name_to_ops_dat, ops_block))) # To return the result to Devito, it is necessary to copy the data # from the dat object back to the CPU memory. after_time_loop.extend( create_ops_fetch(f, name_to_ops_dat, self.time_dimension.extreme_max)) # Generate ops kernels for each offloadable iteration tree mapper = {} for n, (_, tree) in enumerate(affine_trees): pre_loop, ops_kernel, ops_par_loop_call = opsit( tree, n, name_to_ops_dat, ops_block, dims[0]) pre_time_loop.extend(pre_loop) self._func_table[namespace['ops_kernel_file'](ops_kernel.name)] = \ MetaCall(ops_kernel, False) mapper[tree[0].root] = ops_par_loop_call mapper.update({i.root: mapper.get(i.root) for i in tree}) # Drop trees iet = Transformer(mapper).visit(iet) assert (d == dims[0] for d in dims), \ "The OPS backend currently assumes that all kernels \ have the same number of dimensions" self._headers.append(namespace['ops_define_dimension'](dims[0])) self._includes.extend(['stdio.h', 'ops_seq.h']) body = [ ops_init, ops_block_init, *pre_time_loop, ops_partition, iet, *after_time_loop, ops_exit ] return List(body=body)
def make_ops_kernels(iet): warning("The OPS backend is still work-in-progress") affine_trees = find_affine_trees(iet).items() # If there is no affine trees, then there is no loop to be optimized using OPS. if not affine_trees: return iet, {} ops_init = Call(namespace['ops_init'], [0, 0, 2]) ops_partition = Call(namespace['ops_partition'], Literal('""')) ops_exit = Call(namespace['ops_exit']) # Extract all symbols that need to be converted to ops_dat dims = [] to_dat = set() for _, tree in affine_trees: dims.append(len(tree[0].dimensions)) symbols = set(FindSymbols('symbolics').visit(tree[0].root)) symbols -= set(FindSymbols('defines').visit(tree[0].root)) to_dat |= symbols # Create the OPS block for this problem ops_block = OpsBlock('block') ops_block_init = Expression( ClusterizedEq( Eq(ops_block, namespace['ops_decl_block'](dims[0], Literal('"block"'))))) # To ensure deterministic code generation we order the datasets to # be generated (since a set is an unordered collection) to_dat = filter_sorted(to_dat) name_to_ops_dat = {} pre_time_loop = [] after_time_loop = [] for f in to_dat: if f.is_Constant: continue pre_time_loop.extend( list(create_ops_dat(f, name_to_ops_dat, ops_block))) # Copy data from device to host after_time_loop.extend( create_ops_fetch(f, name_to_ops_dat, f.grid.time_dim.extreme_max)) # Generate ops kernels for each offloadable iteration tree mapper = {} ffuncs = [] for n, (_, tree) in enumerate(affine_trees): pre_loop, ops_kernel, ops_par_loop_call = opsit( tree, n, name_to_ops_dat, ops_block, dims[0]) pre_time_loop.extend(pre_loop) ffuncs.append(ops_kernel) mapper[tree[0].root] = ops_par_loop_call mapper.update({i.root: mapper.get(i.root) for i in tree}) # Drop trees iet = Transformer(mapper).visit(iet) assert (d == dims[0] for d in dims), \ "The OPS backend currently assumes that all kernels \ have the same number of dimensions" iet = iet._rebuild(body=flatten([ ops_init, ops_block_init, pre_time_loop, ops_partition, iet.body, after_time_loop, ops_exit ])) return iet, { 'includes': ['stdio.h', 'ops_seq.h'], 'ffuncs': ffuncs, 'headers': [namespace['ops_define_dimension'](dims[0])] }