def _build_plan(self, plan_factory, device_params, C, D, coeff1, coeff2): plan = plan_factory() nested = Dummy(C, D, coeff1, same_A_B=True) C_temp = plan.temp_array_like(C) D_temp = plan.temp_array_like(D) # Testing a computation call which uses the same argument for two parameters. plan.computation_call(nested, C_temp, D, C, C, coeff1) arr_dtype = C.dtype coeff_dtype = coeff2.dtype mul = functions.mul(arr_dtype, coeff_dtype) div = functions.div(arr_dtype, coeff_dtype) template = template_from( """ <%def name="dummy(kernel_declaration, CC, C, D, coeff)"> ${kernel_declaration} { VIRTUAL_SKIP_THREADS; VSIZE_T idx0 = virtual_global_id(0); VSIZE_T idx1 = virtual_global_id(1); ${CC.store_idx}(idx0, idx1, ${C.load_idx}(idx0, idx1) + ${mul}(${D.load_idx}(idx0, idx1), ${coeff})); } </%def> """ ) # Testing a kernel call which uses the same argument for two parameters. plan.kernel_call( template.get_def("dummy"), [C, C_temp, C_temp, coeff2], global_size=C.shape, render_kwds=dict(mul=mul) ) return plan
def _build_plan(self, plan_factory, device_params, C, D, coeff1, coeff2): plan = plan_factory() nested = Dummy(C, D, coeff1, same_A_B=True) C_temp = plan.temp_array_like(C) D_temp = plan.temp_array_like(D) # Testing a computation call which uses the same argument for two parameters. plan.computation_call(nested, C_temp, D, C, C, coeff1) arr_dtype = C.dtype coeff_dtype = coeff2.dtype mul = functions.mul(arr_dtype, coeff_dtype) div = functions.div(arr_dtype, coeff_dtype) template = template_from(""" <%def name="dummy(kernel_declaration, CC, C, D, coeff)"> ${kernel_declaration} { VIRTUAL_SKIP_THREADS; VSIZE_T idx0 = virtual_global_id(0); VSIZE_T idx1 = virtual_global_id(1); ${CC.store_idx}(idx0, idx1, ${C.load_idx}(idx0, idx1) + ${mul}(${D.load_idx}(idx0, idx1), ${coeff})); } </%def> """) # Testing a kernel call which uses the same argument for two parameters. plan.kernel_call(template.get_def('dummy'), [C, C_temp, C_temp, coeff2], global_size=C.shape, render_kwds=dict(mul=mul)) return plan
def _build_plan(self, plan_factory, device_params, output): plan = plan_factory() template = template_from(""" <%def name="dummy(kernel_declaration, output, arr1, arr2)"> ${kernel_declaration} { VIRTUAL_SKIP_THREADS; const VSIZE_T i = virtual_global_id(0); ${arr1.ctype} x1 = ${arr1.load_idx}(i); ${arr2.ctype} x2 = ${arr2.load_idx}(0, i); ${arr2.ctype} x3 = ${arr2.load_idx}(1, i); ${output.store_idx}(i, (x2 + x3) * x1); } </%def> """) arr1 = plan.constant_array(self._arr1) arr2 = plan.constant_array(self._arr2) plan.kernel_call(template.get_def('dummy'), [output, arr1, arr2], global_size=output.shape) return plan
def _build_plan(self, plan_factory, device_params, C, D, A, B, coeff): plan = plan_factory() arr_dtype = C.dtype coeff_dtype = coeff.dtype mul = functions.mul(arr_dtype, coeff_dtype) div = functions.div(arr_dtype, coeff_dtype) template = template_from( """ <%def name="dummy(kernel_declaration, C, D, A, B, coeff)"> ${kernel_declaration} { VIRTUAL_SKIP_THREADS; VSIZE_T idx0 = virtual_global_id(0); VSIZE_T idx1 = virtual_global_id(1); ${A.ctype} a = ${A.load_idx}(idx0, idx1); ${C.ctype} c = ${mul}(a, ${coeff}); ${C.store_idx}(idx1, idx0, c); %if same_A_B: ${B.ctype} b = ${B.load_idx}(idx0, idx1); ${D.ctype} d = ${div}(b, ${coeff}); ${D.store_idx}(idx0, idx1, d); %else: if (idx1 == 0) { ${B.ctype} b = ${B.load_idx}(idx0); ${D.ctype} d = ${div}(b, ${coeff}); ${D.store_idx}(idx0, d); } %endif } </%def> <%def name="dummy2(kernel_declaration, CC, DD, C, D, pers_arr, const_coeff)"> ${kernel_declaration} { VIRTUAL_SKIP_THREADS; VSIZE_T idx0 = virtual_global_id(0); VSIZE_T idx1 = virtual_global_id(1); ${CC.store_idx}(idx0, idx1, ${C.load_idx}(idx0, idx1)); %if same_A_B: ${DD.store_idx}( idx0, idx1, ${mul}(${D.load_idx}(idx0, idx1), ${const_coeff}) + ${pers_arr.load_idx}(idx0, idx1)); %else: if (idx1 == 0) { ${DD.store_idx}( idx0, ${mul}(${D.load_idx}(idx0), ${const_coeff}) + ${pers_arr.load_idx}(idx0)); } %endif } </%def> """ ) block_size = 8 C_temp = plan.temp_array_like(C) D_temp = plan.temp_array_like(D) arr = plan.persistent_array(self._persistent_array) plan.kernel_call( template.get_def("dummy"), [C_temp, D_temp, A, B, coeff], global_size=A.shape, local_size=(block_size, block_size), render_kwds=dict(mul=mul, div=div, same_A_B=self._same_A_B), ) plan.kernel_call( template.get_def("dummy2"), [ C, D, C_temp, D_temp, (self._persistent_array if self._test_kernel_adhoc_array else arr), (10 if self._test_untyped_scalar else numpy.float32(10)), ], global_size=A.shape, local_size=(block_size, block_size), render_kwds=dict(mul=mul, same_A_B=self._same_A_B), ) return plan
def __init__(self, template_src, render_kwds=None): self.template = template_from(template_src) self.render_kwds = {} if render_kwds is None else dict(render_kwds)
def _build_plan(self, plan_factory, device_params, C, D, A, B, coeff): plan = plan_factory() arr_dtype = C.dtype coeff_dtype = coeff.dtype mul = functions.mul(arr_dtype, coeff_dtype) div = functions.div(arr_dtype, coeff_dtype) template = template_from(""" <%def name="dummy(kernel_declaration, C, D, A, B, coeff)"> ${kernel_declaration} { VIRTUAL_SKIP_THREADS; VSIZE_T idx0 = virtual_global_id(0); VSIZE_T idx1 = virtual_global_id(1); ${A.ctype} a = ${A.load_idx}(idx0, idx1); ${C.ctype} c = ${mul}(a, ${coeff}); ${C.store_idx}(idx1, idx0, c); %if same_A_B: ${B.ctype} b = ${B.load_idx}(idx0, idx1); ${D.ctype} d = ${div}(b, ${coeff}); ${D.store_idx}(idx0, idx1, d); %else: if (idx1 == 0) { ${B.ctype} b = ${B.load_idx}(idx0); ${D.ctype} d = ${div}(b, ${coeff}); ${D.store_idx}(idx0, d); } %endif } </%def> <%def name="dummy2(kernel_declaration, CC, DD, C, D, pers_arr, const_coeff)"> ${kernel_declaration} { VIRTUAL_SKIP_THREADS; VSIZE_T idx0 = virtual_global_id(0); VSIZE_T idx1 = virtual_global_id(1); ${CC.store_idx}(idx0, idx1, ${C.load_idx}(idx0, idx1)); %if same_A_B: ${DD.store_idx}( idx0, idx1, ${mul}(${D.load_idx}(idx0, idx1), ${const_coeff}) + ${pers_arr.load_idx}(idx0, idx1)); %else: if (idx1 == 0) { ${DD.store_idx}( idx0, ${mul}(${D.load_idx}(idx0), ${const_coeff}) + ${pers_arr.load_idx}(idx0)); } %endif } </%def> """) block_size = 8 C_temp = plan.temp_array_like(C) D_temp = plan.temp_array_like(D) arr = plan.persistent_array(self._persistent_array) plan.kernel_call(template.get_def('dummy'), [C_temp, D_temp, A, B, coeff], global_size=A.shape, local_size=(block_size, block_size), render_kwds=dict(mul=mul, div=div, same_A_B=self._same_A_B)) plan.kernel_call(template.get_def('dummy2'), [ C, D, C_temp, D_temp, (self._persistent_array if self._test_kernel_adhoc_array else arr), (10 if self._test_untyped_scalar else numpy.float32(10)) ], global_size=A.shape, local_size=(block_size, block_size), render_kwds=dict(mul=mul, same_A_B=self._same_A_B)) return plan