def __init__(self, polynomial_degree, shape, powers_shape, powers_view=False, minus_one=False, invert_powers=False): self._batch_shape = powers_shape[:-1] if powers_view else powers_shape assert self._batch_shape == shape[:len(self._batch_shape)] self._powers_view = powers_view self._minus_one = minus_one self._invert_powers = invert_powers polynomials = Type(Torus32, shape + (polynomial_degree, )) powers = Type(Int32, powers_shape) Computation.__init__( self, [ Parameter('result', Annotation(polynomials, 'o')), Parameter('source', Annotation(polynomials, 'i')), Parameter('powers', Annotation(powers, 'i')), Parameter('powers_idx', Annotation( Type(Int32))) # unused if powers_view==False ])
def hanning_window(arr, NFFT): """ Applies the von Hann window to the rows of a 2D array. To account for zero padding (which we do not want to window), NFFT is provided separately. """ if dtypes.is_complex(arr.dtype): coeff_dtype = dtypes.real_for(arr.dtype) else: coeff_dtype = arr.dtype return Transformation([ Parameter('output', Annotation(arr, 'o')), Parameter('input', Annotation(arr, 'i')), ], """ ${dtypes.ctype(coeff_dtype)} coeff; %if NFFT != output.shape[0]: if (${idxs[1]} >= ${NFFT}) { coeff = 1; } else %endif { coeff = 0.5 * (1 - cos(2 * ${numpy.pi} * ${idxs[-1]} / (${NFFT} - 1))); } ${output.store_same}(${mul}(${input.load_same}, coeff)); """, render_kwds=dict(coeff_dtype=coeff_dtype, NFFT=NFFT, mul=functions.mul( arr.dtype, coeff_dtype)))
def __init__(self, mode_arr, add_points=None, inverse=False, order=1, axes=None): if axes is None: axes = tuple(range(len(mode_arr.shape))) else: axes = tuple(axes) self._axes = list(sorted(axes)) if add_points is None: add_points = [0] * len(mode_arr.shape) else: add_points = list(add_points) self._add_points = add_points coord_shape = list(mode_arr.shape) for axis in range(len(mode_arr.shape)): if axis in axes: coord_shape[axis] = get_spatial_points( mode_arr.shape[axis], order, add_points=add_points[axis]) coord_arr = Type(mode_arr.dtype, shape=coord_shape) self._inverse = inverse self._order = order if not inverse: parameters = [ Parameter('modes', Annotation(mode_arr, 'o')), Parameter('coords', Annotation(coord_arr, 'i'))] else: parameters = [ Parameter('coords', Annotation(coord_arr, 'o')), Parameter('modes', Annotation(mode_arr, 'i'))] Computation.__init__(self, parameters)
def test_guiding_output(thr): N = 1000 dtype = numpy.float32 p = PureParallel([ Parameter('output', Annotation(Type(dtype, shape=N), 'o')), Parameter('input', Annotation(Type(dtype, shape=(2, N)), 'i')) ], """ float t1 = ${input.load_idx}(0, ${idxs[0]}); float t2 = ${input.load_idx}(1, ${idxs[0]}); ${output.store_idx}(${idxs[0]}, t1 + t2); """, guiding_array='output') a = get_test_array_like(p.parameter.input) a_dev = thr.to_device(a) res_dev = thr.empty_like(p.parameter.output) pc = p.compile(thr) pc(res_dev, a_dev) res_ref = a[0] + a[1] assert diff_is_negligible(res_dev.get(), res_ref)
def get_nonlinear3(state_arr, scalar_dtype, nonlinear_module, dt): # k4 = N(D(psi_4), t + dt) # output = D(psi_k) + k4 / 6 return PureParallel([ Parameter('output', Annotation(state_arr, 'o')), Parameter('kprop_psi_k', Annotation(state_arr, 'i')), Parameter('kprop_psi_4', Annotation(state_arr, 'i')), Parameter('t', Annotation(scalar_dtype)) ], """ <% all_indices = ', '.join(idxs) %> ${output.ctype} psi4_0 = ${kprop_psi_4.load_idx}(0, ${all_indices}); ${output.ctype} psi4_1 = ${kprop_psi_4.load_idx}(1, ${all_indices}); ${output.ctype} psik_0 = ${kprop_psi_k.load_idx}(0, ${all_indices}); ${output.ctype} psik_1 = ${kprop_psi_k.load_idx}(1, ${all_indices}); ${output.ctype} k4_0 = ${nonlinear}0(psi4_0, psi4_1, ${t} + ${dt}); ${output.ctype} k4_1 = ${nonlinear}1(psi4_0, psi4_1, ${t} + ${dt}); ${output.store_idx}(0, ${all_indices}, psik_0 + ${div}(k4_0, 6)); ${output.store_idx}(1, ${all_indices}, psik_1 + ${div}(k4_1, 6)); """, guiding_array=state_arr.shape[1:], render_kwds=dict( nonlinear=nonlinear_module, dt=dtypes.c_constant(dt, scalar_dtype), div=functions.div(state_arr.dtype, numpy.int32, out_dtype=state_arr.dtype)))
def classification_delta_kernel(ctx, outputs, targets, deltas): kernel_cache, thread = ctx.kernel_cache, ctx.thread assert outputs.shape[0] == targets.shape[0] == deltas.shape[0] assert len(targets.shape) == 1 assert targets.dtype == numpy.int32 assert outputs.shape[1] == deltas.shape[1] key = (classification_delta_kernel, outputs.shape) if not key in kernel_cache.keys(): log.info("compiling " + str(key)) kernel = PureParallel([ Parameter('outputs', Annotation(outputs, 'i')), Parameter('targets', Annotation(targets, 'i')), Parameter('deltas', Annotation(deltas, 'o')) ], """ ${outputs.ctype} out = ${outputs.load_same}; SIZE_T t = ${targets.load_idx}(${idxs[0]}); SIZE_T idx = ${idxs[1]}; ${deltas.ctype} d; if (t == idx) { d = 1.0f - out; } else { d = -out; } ${deltas.store_same}(d); """, guiding_array='deltas') kernel_cache[key] = kernel.compile(thread) # Run kernel kernel_cache[key](outputs, targets, deltas)
def __init__(self, params: 'TGswParams', shape, bk_len, perf_params: PerformanceParameters): mask_size = params.tlwe_params.mask_size polynomial_degree = params.tlwe_params.polynomial_degree decomp_length = params.decomp_length transform = get_transform(params.tlwe_params.transform_type) tdtype = transform.transformed_dtype() tlength = transform.transformed_length(polynomial_degree) accum = Type(Torus32, shape + (mask_size + 1, polynomial_degree)) bootstrap_key = Type( tdtype, (bk_len, mask_size + 1, decomp_length, mask_size + 1, tlength)) self._params = params self._perf_params = perf_params self._shape = shape self._bk_len = bk_len Computation.__init__(self, [ Parameter('accum', Annotation(accum, 'io')), Parameter('bootstrap_key', Annotation(bootstrap_key, 'i')), Parameter('bk_row_idx', Annotation(numpy.int32)) ])
def _build_plan(self, plan_factory, device_params, result, lwe_a, lwe_b, key): plan = plan_factory() mul_key = MatrixMulVector(lwe_a) fill_res = Transformation([ Parameter('result', Annotation(result, 'o')), Parameter('b', Annotation(lwe_b, 'i')), Parameter('a_times_key', Annotation(lwe_b, 'i')) ], """ ${result.store_same}(${b.load_same} - ${a_times_key.load_same}); """, connectors=['a_times_key']) mul_key.parameter.output.connect(fill_res, fill_res.a_times_key, result=fill_res.result, b=fill_res.b) plan.computation_call(mul_key, result, lwe_b, lwe_a, key) return plan
def _build_plan(self, plan_factory, device_params, output, matrix, vector): plan = plan_factory() summation = Reduce(matrix, predicate_sum(matrix.dtype), axes=(len(matrix.shape) - 1, )) mul_vec = Transformation( [ Parameter('output', Annotation(matrix, 'o')), Parameter('matrix', Annotation(matrix, 'i')), Parameter('vector', Annotation(vector, 'i')) ], """ ${output.store_same}(${mul}(${matrix.load_same}, ${vector.load_idx}(${idxs[-1]}))); """, render_kwds=dict(mul=functions.mul(matrix.dtype, vector.dtype)), connectors=['output', 'matrix']) summation.parameter.input.connect(mul_vec, mul_vec.output, matrix=mul_vec.matrix, vector=mul_vec.vector) plan.computation_call(summation, output, matrix, vector) return plan
def logistic(context, activations, bias, dest=None): kernel_cache, thread = context.kernel_cache, context.thread if dest is None: dest = activations key = (logistic, activations.shape, thread) if not key in kernel_cache.keys(): log.info("compiling " + str(key)) assert activations.shape[1] == bias.shape[0] kernel = PureParallel([ Parameter('activations', Annotation(activations, 'i')), Parameter('bias', Annotation(bias, 'i')), Parameter('dest', Annotation(dest, 'o')), ], """ ${activations.ctype} a = ${activations.load_same}; ${bias.ctype} b = ${bias.load_idx}(${idxs[1]}); a += b; a = min(max(-45.0f, a), 45.0f); a = 1.0f / (1.0f + exp(-a)); ${dest.store_same}(a); """, guiding_array='activations') kernel_cache[key] = kernel.compile(thread, fast_math=True) # Run kernel kernel_cache[key](activations, bias, dest) return dest
def __init__(self, params: 'TLweParams', shape, noise: float, perf_params: PerformanceParametersForDevice): polynomial_degree = params.polynomial_degree mask_size = params.mask_size result_a = Type(Torus32, shape + (mask_size + 1, polynomial_degree)) result_cv = Type(ErrorFloat, shape) key = Type(Int32, (mask_size, polynomial_degree)) noises1 = Type(Torus32, shape + (mask_size, polynomial_degree)) noises2 = Type(Torus32, shape + (polynomial_degree, )) self._transform_type = params.transform_type self._noise = noise self._mask_size = mask_size self._polynomial_degree = polynomial_degree self._perf_params = perf_params Computation.__init__(self, [ Parameter('result_a', Annotation(result_a, 'o')), Parameter('result_cv', Annotation(result_cv, 'o')), Parameter('key', Annotation(key, 'i')), Parameter('noises1', Annotation(noises1, 'i')), Parameter('noises2', Annotation(noises2, 'i')) ])
def logistic_derivative(context, activations, delta, dest=None): kernel_cache, thread = context.kernel_cache, context.thread if dest is None: dest = delta key = (logistic_derivative, activations.shape, thread) if not key in kernel_cache.keys(): log.info("compiling " + str(key)) kernel = PureParallel([ Parameter('activations', Annotation(activations, 'i')), Parameter('delta', Annotation(activations, 'i')), Parameter('dest', Annotation(dest, 'o')), ], """ ${activations.ctype} a = ${activations.load_same}; ${delta.ctype} d = ${delta.load_same}; d = d*a*(1.0f - a); ${dest.store_same}(d); """, guiding_array='activations') kernel_cache[key] = kernel.compile(thread, fast_math=True) # Run kernel kernel_cache[key](activations, delta, dest)
def __init__(self, arr_t, output_arr_t=None, axes=None, block_width_override=None): self._block_width_override = block_width_override all_axes = range(len(arr_t.shape)) if axes is None: axes = tuple(reversed(all_axes)) else: assert set(axes) == set(all_axes) self._axes = tuple(axes) self._transposes = get_transposes(arr_t.shape, self._axes) output_shape = transpose_shape(arr_t.shape, self._axes) if output_arr_t is None: output_arr = Type(arr_t.dtype, output_shape) else: if output_arr_t.shape != output_shape: raise ValueError("Expected output array shape: {exp_shape}, got {got_shape}".format( exp_shape=output_arr_t, got_shape=output_arr_t.shape)) if output_arr_t.dtype != arr_t.dtype: raise ValueError("Input and output array must have the same dtype") output_arr = output_arr_t Computation.__init__(self, [ Parameter('output', Annotation(output_arr, 'o')), Parameter('input', Annotation(arr_t, 'i'))])
def roll_computation(array, axis): return PureParallel([ Parameter('output', Annotation(array, 'o')), Parameter('input', Annotation(array, 'i')), Parameter('shift', Annotation(Type(numpy.int32))) ], """ <% shape = input.shape %> %for i in range(len(shape)): VSIZE_T output_${idxs[i]} = %if i == axis: ${shift} == 0 ? ${idxs[i]} : ## Since ``shift`` can be negative, and its absolute value greater than ## ``shape[i]``, a double modulo division is necessary ## (the ``%`` operator preserves the sign of the dividend in C). (${idxs[i]} + (${shape[i]} + ${shift} % ${shape[i]})) % ${shape[i]}; %else: ${idxs[i]}; %endif %endfor ${output.store_idx}( ${", ".join("output_" + name for name in idxs)}, ${input.load_idx}(${", ".join(idxs)})); """, guiding_array='input', render_kwds=dict(axis=axis))
def get_prepare_iprfft_output(y): # Input: size N//4 # Output: size N//4 N = y.shape[-1] * 2 return Transformation([ Parameter('x', Annotation(y, 'o')), Parameter('y', Annotation(y, 'i')), Parameter('x0', Annotation(Type(y.dtype, y.shape[:-1]), 'i')), Parameter('coeffs', Annotation(Type(y.dtype, (N // 2, )), 'i')), ], """ ${y.ctype} y = ${y.load_same}; ${coeffs.ctype} coeff = ${coeffs.load_idx}(${idxs[-1]}); ${x.ctype} x; if (${idxs[-1]} == 0) { ${x0.ctype} x0 = ${x0.load_idx}(${", ".join(idxs[:-1])}); x = x0 / ${N // 2}; } else { x = y * coeff; } ${x.store_same}(x); """, connectors=['y'], render_kwds=dict(N=N))
def _build_plan(self, plan_factory, device_params, output, alpha, beta): plan = plan_factory() for_reduction = Type(numpy.float64, alpha.shape) meter_trf = Transformation([ Parameter('output', Annotation(for_reduction, 'o')), Parameter('alpha', Annotation(alpha, 'i')), Parameter('beta', Annotation(beta, 'i')), ], """ ${alpha.ctype} alpha = ${alpha.load_same}; ${beta.ctype} beta = ${beta.load_same}; ${alpha.ctype} t = ${mul_cc}(alpha, beta); ${alpha.ctype} np = ${exp_c}(COMPLEX_CTR(${alpha.ctype})(-t.x, -t.y)); ${alpha.ctype} cp = COMPLEX_CTR(${alpha.ctype})(1 - np.x, -np.y); ${output.store_same}(cp.x); """, render_kwds=dict( mul_cc=functions.mul(alpha.dtype, alpha.dtype), exp_c=functions.exp(alpha.dtype), )) reduction = Reduce(for_reduction, predicate_sum(output.dtype), axes=(0,)) reduction.parameter.input.connect( meter_trf, meter_trf.output, alpha_p=meter_trf.alpha, beta_p=meter_trf.beta) plan.computation_call(reduction, output, alpha, beta) return plan
def _build_plan(self, plan_factory, device_params, a, current_variances, mu): plan = plan_factory() fill = PureParallel([ Parameter('a', Annotation(a, 'o')), Parameter('current_variances', Annotation(current_variances, 'o')), Parameter('mu', Annotation(mu, 'i'))], """ ${a.ctype} a; if (${idxs[-2]} == ${mask_size}) { a = ${mu.load_idx}(${", ".join(idxs[:-2])}, ${idxs[-1]}); } else { a = 0; } ${a.store_same}(a); if (${idxs[-1]} == 0) { ${current_variances.store_idx}(${", ".join(idxs[:-1])}, 0); } """, render_kwds=dict(mask_size=self._mask_size)) plan.computation_call(fill, a, current_variances, mu) return plan
def _build_plan(self, plan_factory, device_params, output, alpha, beta): plan = plan_factory() for_reduction = Type(numpy.float64, alpha.shape) meter_trf = Transformation([ Parameter('output', Annotation(for_reduction, 'o')), Parameter('alpha', Annotation(alpha, 'i')), Parameter('beta', Annotation(beta, 'i')), ], """ ${alpha.ctype} alpha = ${alpha.load_same}; ${beta.ctype} beta = ${beta.load_same}; ${alpha.ctype} t = ${mul_cc}(alpha, beta); ${output.store_same}(t.x - ${ordering}); """, render_kwds=dict( mul_cc=functions.mul(alpha.dtype, alpha.dtype), ordering=ordering(self._representation), )) reduction = Reduce(for_reduction, predicate_sum(output.dtype), axes=(0,)) reduction.parameter.input.connect( meter_trf, meter_trf.output, alpha_p=meter_trf.alpha, beta_p=meter_trf.beta) plan.computation_call(reduction, output, alpha, beta) return plan
def __init__(self, params: TGswParams, in_out_params: LweParams, shape, perf_params: PerformanceParameters): tlwe_params = params.tlwe_params decomp_length = params.decomp_length mask_size = tlwe_params.mask_size polynomial_degree = tlwe_params.polynomial_degree input_size = params.tlwe_params.extracted_lweparams.size output_size = in_out_params.size assert mask_size == 1 and decomp_length == 2 transform_type = params.tlwe_params.transform_type transform = get_transform(transform_type) tlength = transform.transformed_length(polynomial_degree) tdtype = transform.transformed_dtype() out_a = Type(Torus32, shape + (input_size, )) out_b = Type(Torus32, shape) accum_a = Type(Torus32, shape + (mask_size + 1, polynomial_degree)) gsw = Type(tdtype, (output_size, mask_size + 1, decomp_length, mask_size + 1, tlength)) bara = Type(Torus32, shape + (output_size, )) self._params = params self._in_out_params = in_out_params self._perf_params = perf_params Computation.__init__(self, [ Parameter('lwe_a', Annotation(out_a, 'io')), Parameter('lwe_b', Annotation(out_b, 'io')), Parameter('accum_a', Annotation(accum_a, 'io')), Parameter('gsw', Annotation(gsw, 'i')), Parameter('bara', Annotation(bara, 'i')) ])
def norm_const(arr_t, order): """ Returns a transformation that calculates the ``order``-norm (1 output, 1 input): ``output = abs(input) ** order``. """ if dtypes.is_complex(arr_t.dtype): out_dtype = dtypes.real_for(arr_t.dtype) else: out_dtype = arr_t.dtype return Transformation( [ Parameter('output', Annotation(Type(out_dtype, arr_t.shape), 'o')), Parameter('input', Annotation(arr_t, 'i'))], """ ${input.ctype} val = ${input.load_same}; ${output.ctype} norm = ${norm}(val); %if order != 2: norm = pow(norm, ${dtypes.c_constant(order / 2, output.dtype)}); %endif ${output.store_same}(norm); """, render_kwds=dict( norm=functions.norm(arr_t.dtype), order=order))
def get_tgsw_polynomial_decomp_trf(params: 'TGswParams', shape): tlwe_params = params.tlwe_params decomp_length = params.decomp_length mask_size = tlwe_params.mask_size polynomial_degree = tlwe_params.polynomial_degree result = Type(Int32, shape + (mask_size + 1, decomp_length, polynomial_degree)) sample = Type(Torus32, shape + (mask_size + 1, polynomial_degree)) return Transformation([ Parameter('result', Annotation(result, 'o')), Parameter('sample', Annotation(sample, 'i')) ], """ <% mask = 2**params.bs_log2_base - 1 half_base = 2**(params.bs_log2_base - 1) %> ${sample.ctype} sample = ${sample.load_idx}(${", ".join(idxs[:-2])}, ${idxs[-1]}); int decomp_shift = 32 - (${idxs[-2]} + 1) * ${params.bs_log2_base}; ${result.store_same}( (((sample + (${params.offset})) >> decomp_shift) & ${mask}) - ${half_base} ); """, connectors=['results'], render_kwds=dict(params=params))
def __init__(self, arr1, arr2, coeff, second_coeff, same_A_B=False, test_computation_adhoc_array=False, test_computation_incorrect_role=False, test_computation_incorrect_type=False, test_same_arg_as_i_and_o=False): self._second_coeff = second_coeff self._same_A_B = same_A_B self._test_same_arg_as_i_and_o = test_same_arg_as_i_and_o self._test_computation_adhoc_array = test_computation_adhoc_array self._test_computation_incorrect_role = test_computation_incorrect_role self._test_computation_incorrect_type = test_computation_incorrect_type Computation.__init__(self, [ Parameter('C', Annotation(arr1, 'o')), Parameter('D', Annotation(arr2, 'o')), Parameter('A', Annotation(arr1, 'i')), Parameter('B', Annotation(arr2, 'i')), Parameter('coeff', Annotation(coeff)) ])
def __init__( self, transform, batch_shape, inverse=False, i32_conversion=False, transforms_per_block=4, kernel_repetitions=1): self._inverse = inverse self._transform = transform self._transforms_per_block = transforms_per_block self._kernel_repetitions = kernel_repetitions self._i32_conversion = i32_conversion tr_arr = Type(self._transform.elem_dtype, batch_shape + (transform.transform_length,)) if i32_conversion: arr = Type(numpy.int32, batch_shape + (transform.polynomial_length,)) if inverse: oarr = arr iarr = tr_arr else: oarr = tr_arr iarr = arr else: oarr = tr_arr iarr = tr_arr Computation.__init__(self, [ Parameter('output', Annotation(oarr, 'o')), Parameter('input', Annotation(iarr, 'i'))])
def __init__(self, arr, coeff): Computation.__init__(self, [ Parameter('C', Annotation(arr, 'io')), Parameter('D', Annotation(arr, 'io')), Parameter('coeff1', Annotation(coeff)), Parameter('coeff2', Annotation(coeff)) ])
def rolling_frame(arr, NFFT, noverlap, pad_to): """ Transforms a 1D array to a 2D array whose rows are partially overlapped parts of the initial array. """ frame_step = NFFT - noverlap frame_num = (arr.size - noverlap) // frame_step frame_size = NFFT if pad_to is None else pad_to result_arr = Type(arr.dtype, (frame_num, frame_size)) return Transformation( [ Parameter('output', Annotation(result_arr, 'o')), Parameter('input', Annotation(arr, 'i')), ], """ %if NFFT != output.shape[1]: if (${idxs[1]} >= ${NFFT}) { ${output.store_same}(0); } else %endif { ${output.store_same}(${input.load_idx}(${idxs[0]} * ${frame_step} + ${idxs[1]})); } """, render_kwds=dict(frame_step=frame_step, NFFT=NFFT), # note that only the "store_same"-using argument can serve as a connector! connectors=['output'])
def __init__(self, arr1, arr2, coeff, same_A_B=False, test_incorrect_parameter_name=False, test_untyped_scalar=False, test_kernel_adhoc_array=False): assert len(arr1.shape) == 2 assert len(arr2.shape) == (2 if same_A_B else 1) assert arr1.dtype == arr2.dtype if same_A_B: assert arr1.shape == arr2.shape else: assert arr1.shape[0] == arr1.shape[1] self._same_A_B = same_A_B self._persistent_array = numpy.arange(arr2.size).reshape( arr2.shape).astype(arr2.dtype) self._test_untyped_scalar = test_untyped_scalar self._test_kernel_adhoc_array = test_kernel_adhoc_array Computation.__init__(self, [ Parameter(('_C' if test_incorrect_parameter_name else 'C'), Annotation(arr1, 'o')), Parameter('D', Annotation(arr2, 'o')), Parameter('A', Annotation(arr1, 'i')), Parameter('B', Annotation(arr2, 'i')), Parameter('coeff', Annotation(coeff)) ])
def _build_plan( self, plan_factory, device_params, result_a, result_b, result_cv, messages, key, noises_a, noises_b): plan = plan_factory() mul_key = MatrixMulVector(noises_a) fill_b_cv = Transformation([ Parameter('result_b', Annotation(result_b, 'o')), Parameter('result_cv', Annotation(result_cv, 'o')), Parameter('messages', Annotation(messages, 'i')), Parameter('noises_a_times_key', Annotation(noises_b, 'i')), Parameter('noises_b', Annotation(noises_b, 'i'))], """ ${result_b.store_same}( ${noises_b.load_same} + ${messages.load_same} + ${noises_a_times_key.load_same}); ${result_cv.store_same}(${noise**2}); """, connectors=['noises_a_times_key'], render_kwds=dict(noise=self._noise)) mul_key.parameter.output.connect( fill_b_cv, fill_b_cv.noises_a_times_key, b=fill_b_cv.result_b, cv=fill_b_cv.result_cv, messages=fill_b_cv.messages, noises_b=fill_b_cv.noises_b) plan.computation_call(mul_key, result_b, result_cv, messages, noises_b, noises_a, key) plan.computation_call( PureParallel.from_trf(transformations.copy(noises_a)), result_a, noises_a) return plan
def get_prepare_prfft_scan(output): return Transformation( [ Parameter('output', Annotation(output, 'o')), Parameter('Y', Annotation(output, 'i')), Parameter( 're_X_0', Annotation( Type(dtypes.real_for(output.dtype), output.shape[:-1]), 'i')) ], """ ${Y.ctype} Y = ${Y.load_same}; Y = COMPLEX_CTR(${Y.ctype})(Y.y, -Y.x); if (${idxs[-1]} == 0) { Y.x = Y.x / 2 + ${re_X_0.load_idx}(${", ".join(idxs[:-1])}); Y.y /= 2; } ${output.store_same}(Y); """, connectors=['output', 'Y'], )
def identity(type): return PureParallel([ Parameter('output', Annotation(type, 'o')), Parameter('input', Annotation(type, 'i')) ], """ ${output.store_same}(${input.load_same}); """)
def __init__(self, out_type, in_type): ''' Input transformation that implements an explicit type cast. Arguments --------- out_type: `reikna.core.Type` Output dtype and shape. in_type: `reikna.core.Type` Input dtype and shape. Notes ----- * `in_type` and `out_type` shapes must be equal. * Does not support real-to-complex and complex-to-real conversions. ''' if (in_type.shape != out_type.shape): raise ValueError('shapes of out_type and in_type must be equal.') if (issubclass(in_type.dtype.type, np.complexfloating) != issubclass( out_type.dtype.type, np.complexfloating) #np.iscomplexobj(in_type) != np.iscomplexobj(out_type) ): raise ValueError('Unable to cast real to complex and vice versa.') out_param = Parameter('output', Annotation(out_type, 'o')) in_param = Parameter('input', Annotation(in_type, 'i')) ctype = out_type.ctype.replace('unsigned ', 'u') super(Cast, self).__init__([out_param, in_param], self.code, dict(ctype=ctype))