示例#1
0
    def __init__(self,
                 polynomial_degree,
                 shape,
                 powers_shape,
                 powers_view=False,
                 minus_one=False,
                 invert_powers=False):

        self._batch_shape = powers_shape[:-1] if powers_view else powers_shape
        assert self._batch_shape == shape[:len(self._batch_shape)]

        self._powers_view = powers_view
        self._minus_one = minus_one
        self._invert_powers = invert_powers

        polynomials = Type(Torus32, shape + (polynomial_degree, ))
        powers = Type(Int32, powers_shape)

        Computation.__init__(
            self,
            [
                Parameter('result', Annotation(polynomials, 'o')),
                Parameter('source', Annotation(polynomials, 'i')),
                Parameter('powers', Annotation(powers, 'i')),
                Parameter('powers_idx', Annotation(
                    Type(Int32)))  # unused if powers_view==False
            ])
示例#2
0
def hanning_window(arr, NFFT):
    """
    Applies the von Hann window to the rows of a 2D array.
    To account for zero padding (which we do not want to window), NFFT is provided separately.
    """
    if dtypes.is_complex(arr.dtype):
        coeff_dtype = dtypes.real_for(arr.dtype)
    else:
        coeff_dtype = arr.dtype
    return Transformation([
        Parameter('output', Annotation(arr, 'o')),
        Parameter('input', Annotation(arr, 'i')),
    ],
                          """
        ${dtypes.ctype(coeff_dtype)} coeff;
        %if NFFT != output.shape[0]:
        if (${idxs[1]} >= ${NFFT})
        {
            coeff = 1;
        }
        else
        %endif
        {
            coeff = 0.5 * (1 - cos(2 * ${numpy.pi} * ${idxs[-1]} / (${NFFT} - 1)));
        }
        ${output.store_same}(${mul}(${input.load_same}, coeff));
        """,
                          render_kwds=dict(coeff_dtype=coeff_dtype,
                                           NFFT=NFFT,
                                           mul=functions.mul(
                                               arr.dtype, coeff_dtype)))
示例#3
0
文件: dht.py 项目: xexo7C8/reikna
    def __init__(self, mode_arr, add_points=None, inverse=False, order=1, axes=None):

        if axes is None:
            axes = tuple(range(len(mode_arr.shape)))
        else:
            axes = tuple(axes)
        self._axes = list(sorted(axes))

        if add_points is None:
            add_points = [0] * len(mode_arr.shape)
        else:
            add_points = list(add_points)
        self._add_points = add_points

        coord_shape = list(mode_arr.shape)
        for axis in range(len(mode_arr.shape)):
            if axis in axes:
                coord_shape[axis] = get_spatial_points(
                    mode_arr.shape[axis], order, add_points=add_points[axis])
        coord_arr = Type(mode_arr.dtype, shape=coord_shape)

        self._inverse = inverse
        self._order = order

        if not inverse:
            parameters = [
                Parameter('modes', Annotation(mode_arr, 'o')),
                Parameter('coords', Annotation(coord_arr, 'i'))]
        else:
            parameters = [
                Parameter('coords', Annotation(coord_arr, 'o')),
                Parameter('modes', Annotation(mode_arr, 'i'))]

        Computation.__init__(self, parameters)
示例#4
0
def test_guiding_output(thr):

    N = 1000
    dtype = numpy.float32

    p = PureParallel([
        Parameter('output', Annotation(Type(dtype, shape=N), 'o')),
        Parameter('input', Annotation(Type(dtype, shape=(2, N)), 'i'))
    ],
                     """
        float t1 = ${input.load_idx}(0, ${idxs[0]});
        float t2 = ${input.load_idx}(1, ${idxs[0]});
        ${output.store_idx}(${idxs[0]}, t1 + t2);
        """,
                     guiding_array='output')

    a = get_test_array_like(p.parameter.input)
    a_dev = thr.to_device(a)
    res_dev = thr.empty_like(p.parameter.output)

    pc = p.compile(thr)
    pc(res_dev, a_dev)

    res_ref = a[0] + a[1]

    assert diff_is_negligible(res_dev.get(), res_ref)
示例#5
0
def get_nonlinear3(state_arr, scalar_dtype, nonlinear_module, dt):
    # k4 = N(D(psi_4), t + dt)
    # output = D(psi_k) + k4 / 6
    return PureParallel([
        Parameter('output', Annotation(state_arr, 'o')),
        Parameter('kprop_psi_k', Annotation(state_arr, 'i')),
        Parameter('kprop_psi_4', Annotation(state_arr, 'i')),
        Parameter('t', Annotation(scalar_dtype))
    ],
                        """
        <%
            all_indices = ', '.join(idxs)
        %>

        ${output.ctype} psi4_0 = ${kprop_psi_4.load_idx}(0, ${all_indices});
        ${output.ctype} psi4_1 = ${kprop_psi_4.load_idx}(1, ${all_indices});
        ${output.ctype} psik_0 = ${kprop_psi_k.load_idx}(0, ${all_indices});
        ${output.ctype} psik_1 = ${kprop_psi_k.load_idx}(1, ${all_indices});

        ${output.ctype} k4_0 = ${nonlinear}0(psi4_0, psi4_1, ${t} + ${dt});
        ${output.ctype} k4_1 = ${nonlinear}1(psi4_0, psi4_1, ${t} + ${dt});

        ${output.store_idx}(0, ${all_indices}, psik_0 + ${div}(k4_0, 6));
        ${output.store_idx}(1, ${all_indices}, psik_1 + ${div}(k4_1, 6));
        """,
                        guiding_array=state_arr.shape[1:],
                        render_kwds=dict(
                            nonlinear=nonlinear_module,
                            dt=dtypes.c_constant(dt, scalar_dtype),
                            div=functions.div(state_arr.dtype,
                                              numpy.int32,
                                              out_dtype=state_arr.dtype)))
示例#6
0
def classification_delta_kernel(ctx, outputs, targets, deltas):
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    assert outputs.shape[0] == targets.shape[0] == deltas.shape[0]
    assert len(targets.shape) == 1
    assert targets.dtype == numpy.int32
    assert outputs.shape[1] == deltas.shape[1]

    key = (classification_delta_kernel, outputs.shape)
    if not key in kernel_cache.keys():
        log.info("compiling " + str(key))
        kernel = PureParallel([
            Parameter('outputs', Annotation(outputs, 'i')),
            Parameter('targets', Annotation(targets, 'i')),
            Parameter('deltas', Annotation(deltas, 'o'))
        ],
                              """
        ${outputs.ctype} out = ${outputs.load_same};
        SIZE_T t = ${targets.load_idx}(${idxs[0]});
        SIZE_T idx = ${idxs[1]};
        ${deltas.ctype} d;
        if (t == idx) {
            d = 1.0f - out;
        } else {
            d = -out;
        }
        ${deltas.store_same}(d);
        """,
                              guiding_array='deltas')

        kernel_cache[key] = kernel.compile(thread)

    # Run kernel
    kernel_cache[key](outputs, targets, deltas)
示例#7
0
    def __init__(self, params: 'TGswParams', shape, bk_len,
                 perf_params: PerformanceParameters):

        mask_size = params.tlwe_params.mask_size
        polynomial_degree = params.tlwe_params.polynomial_degree
        decomp_length = params.decomp_length

        transform = get_transform(params.tlwe_params.transform_type)
        tdtype = transform.transformed_dtype()
        tlength = transform.transformed_length(polynomial_degree)

        accum = Type(Torus32, shape + (mask_size + 1, polynomial_degree))
        bootstrap_key = Type(
            tdtype,
            (bk_len, mask_size + 1, decomp_length, mask_size + 1, tlength))

        self._params = params
        self._perf_params = perf_params
        self._shape = shape
        self._bk_len = bk_len

        Computation.__init__(self, [
            Parameter('accum', Annotation(accum, 'io')),
            Parameter('bootstrap_key', Annotation(bootstrap_key, 'i')),
            Parameter('bk_row_idx', Annotation(numpy.int32))
        ])
示例#8
0
    def _build_plan(self, plan_factory, device_params, result, lwe_a, lwe_b,
                    key):

        plan = plan_factory()

        mul_key = MatrixMulVector(lwe_a)

        fill_res = Transformation([
            Parameter('result', Annotation(result, 'o')),
            Parameter('b', Annotation(lwe_b, 'i')),
            Parameter('a_times_key', Annotation(lwe_b, 'i'))
        ],
                                  """
            ${result.store_same}(${b.load_same} - ${a_times_key.load_same});
            """,
                                  connectors=['a_times_key'])

        mul_key.parameter.output.connect(fill_res,
                                         fill_res.a_times_key,
                                         result=fill_res.result,
                                         b=fill_res.b)

        plan.computation_call(mul_key, result, lwe_b, lwe_a, key)

        return plan
示例#9
0
    def _build_plan(self, plan_factory, device_params, output, matrix, vector):
        plan = plan_factory()

        summation = Reduce(matrix,
                           predicate_sum(matrix.dtype),
                           axes=(len(matrix.shape) - 1, ))

        mul_vec = Transformation(
            [
                Parameter('output', Annotation(matrix, 'o')),
                Parameter('matrix', Annotation(matrix, 'i')),
                Parameter('vector', Annotation(vector, 'i'))
            ],
            """
            ${output.store_same}(${mul}(${matrix.load_same}, ${vector.load_idx}(${idxs[-1]})));
            """,
            render_kwds=dict(mul=functions.mul(matrix.dtype, vector.dtype)),
            connectors=['output', 'matrix'])

        summation.parameter.input.connect(mul_vec,
                                          mul_vec.output,
                                          matrix=mul_vec.matrix,
                                          vector=mul_vec.vector)

        plan.computation_call(summation, output, matrix, vector)

        return plan
示例#10
0
def logistic(context, activations, bias, dest=None):
    kernel_cache, thread = context.kernel_cache, context.thread

    if dest is None:
        dest = activations

    key = (logistic, activations.shape, thread)
    if not key in kernel_cache.keys():
        log.info("compiling " + str(key))
        assert activations.shape[1] == bias.shape[0]

        kernel = PureParallel([
            Parameter('activations', Annotation(activations, 'i')),
            Parameter('bias', Annotation(bias, 'i')),
            Parameter('dest', Annotation(dest, 'o')),
        ],
                              """
        ${activations.ctype} a = ${activations.load_same};
        ${bias.ctype} b = ${bias.load_idx}(${idxs[1]});

        a += b;
        a = min(max(-45.0f, a), 45.0f);
        a = 1.0f / (1.0f + exp(-a));

        ${dest.store_same}(a);
        """,
                              guiding_array='activations')

        kernel_cache[key] = kernel.compile(thread, fast_math=True)

    # Run kernel
    kernel_cache[key](activations, bias, dest)

    return dest
示例#11
0
    def __init__(self, params: 'TLweParams', shape, noise: float,
                 perf_params: PerformanceParametersForDevice):

        polynomial_degree = params.polynomial_degree
        mask_size = params.mask_size

        result_a = Type(Torus32, shape + (mask_size + 1, polynomial_degree))
        result_cv = Type(ErrorFloat, shape)
        key = Type(Int32, (mask_size, polynomial_degree))
        noises1 = Type(Torus32, shape + (mask_size, polynomial_degree))
        noises2 = Type(Torus32, shape + (polynomial_degree, ))

        self._transform_type = params.transform_type
        self._noise = noise
        self._mask_size = mask_size
        self._polynomial_degree = polynomial_degree
        self._perf_params = perf_params

        Computation.__init__(self, [
            Parameter('result_a', Annotation(result_a, 'o')),
            Parameter('result_cv', Annotation(result_cv, 'o')),
            Parameter('key', Annotation(key, 'i')),
            Parameter('noises1', Annotation(noises1, 'i')),
            Parameter('noises2', Annotation(noises2, 'i'))
        ])
示例#12
0
def logistic_derivative(context, activations, delta, dest=None):
    kernel_cache, thread = context.kernel_cache, context.thread

    if dest is None:
        dest = delta

    key = (logistic_derivative, activations.shape, thread)
    if not key in kernel_cache.keys():
        log.info("compiling " + str(key))
        kernel = PureParallel([
            Parameter('activations', Annotation(activations, 'i')),
            Parameter('delta', Annotation(activations, 'i')),
            Parameter('dest', Annotation(dest, 'o')),
        ],
                              """
        ${activations.ctype} a = ${activations.load_same};
        ${delta.ctype} d = ${delta.load_same};

        d = d*a*(1.0f - a);

        ${dest.store_same}(d);
        """,
                              guiding_array='activations')

        kernel_cache[key] = kernel.compile(thread, fast_math=True)

    # Run kernel
    kernel_cache[key](activations, delta, dest)
示例#13
0
    def __init__(self, arr_t, output_arr_t=None, axes=None, block_width_override=None):

        self._block_width_override = block_width_override

        all_axes = range(len(arr_t.shape))
        if axes is None:
            axes = tuple(reversed(all_axes))
        else:
            assert set(axes) == set(all_axes)

        self._axes = tuple(axes)
        self._transposes = get_transposes(arr_t.shape, self._axes)

        output_shape = transpose_shape(arr_t.shape, self._axes)

        if output_arr_t is None:
            output_arr = Type(arr_t.dtype, output_shape)
        else:
            if output_arr_t.shape != output_shape:
                raise ValueError("Expected output array shape: {exp_shape}, got {got_shape}".format(
                    exp_shape=output_arr_t, got_shape=output_arr_t.shape))
            if output_arr_t.dtype != arr_t.dtype:
                raise ValueError("Input and output array must have the same dtype")
            output_arr = output_arr_t

        Computation.__init__(self, [
            Parameter('output', Annotation(output_arr, 'o')),
            Parameter('input', Annotation(arr_t, 'i'))])
示例#14
0
def roll_computation(array, axis):
    return PureParallel([
        Parameter('output', Annotation(array, 'o')),
        Parameter('input', Annotation(array, 'i')),
        Parameter('shift', Annotation(Type(numpy.int32)))
    ],
                        """
        <%
            shape = input.shape
        %>
        %for i in range(len(shape)):
            VSIZE_T output_${idxs[i]} =
                %if i == axis:
                ${shift} == 0 ?
                    ${idxs[i]} :
                    ## Since ``shift`` can be negative, and its absolute value greater than
                    ## ``shape[i]``, a double modulo division is necessary
                    ## (the ``%`` operator preserves the sign of the dividend in C).
                    (${idxs[i]} + (${shape[i]} + ${shift} % ${shape[i]})) % ${shape[i]};
                %else:
                ${idxs[i]};
                %endif
        %endfor
        ${output.store_idx}(
            ${", ".join("output_" + name for name in idxs)},
            ${input.load_idx}(${", ".join(idxs)}));
        """,
                        guiding_array='input',
                        render_kwds=dict(axis=axis))
示例#15
0
def get_prepare_iprfft_output(y):
    # Input: size N//4
    # Output: size N//4

    N = y.shape[-1] * 2

    return Transformation([
        Parameter('x', Annotation(y, 'o')),
        Parameter('y', Annotation(y, 'i')),
        Parameter('x0', Annotation(Type(y.dtype, y.shape[:-1]), 'i')),
        Parameter('coeffs', Annotation(Type(y.dtype, (N // 2, )), 'i')),
    ],
                          """
        ${y.ctype} y = ${y.load_same};
        ${coeffs.ctype} coeff = ${coeffs.load_idx}(${idxs[-1]});

        ${x.ctype} x;

        if (${idxs[-1]} == 0)
        {
            ${x0.ctype} x0 = ${x0.load_idx}(${", ".join(idxs[:-1])});
            x = x0 / ${N // 2};
        }
        else
        {
            x = y * coeff;
        }

        ${x.store_same}(x);
        """,
                          connectors=['y'],
                          render_kwds=dict(N=N))
示例#16
0
    def _build_plan(self, plan_factory, device_params, output, alpha, beta):

        plan = plan_factory()

        for_reduction = Type(numpy.float64, alpha.shape)

        meter_trf = Transformation([
            Parameter('output', Annotation(for_reduction, 'o')),
            Parameter('alpha', Annotation(alpha, 'i')),
            Parameter('beta', Annotation(beta, 'i')),
            ],
            """
                ${alpha.ctype} alpha = ${alpha.load_same};
                ${beta.ctype} beta = ${beta.load_same};
                ${alpha.ctype} t = ${mul_cc}(alpha, beta);
                ${alpha.ctype} np = ${exp_c}(COMPLEX_CTR(${alpha.ctype})(-t.x, -t.y));
                ${alpha.ctype} cp = COMPLEX_CTR(${alpha.ctype})(1 - np.x, -np.y);
                ${output.store_same}(cp.x);
                """,
            render_kwds=dict(
                mul_cc=functions.mul(alpha.dtype, alpha.dtype),
                exp_c=functions.exp(alpha.dtype),
                ))

        reduction = Reduce(for_reduction, predicate_sum(output.dtype), axes=(0,))
        reduction.parameter.input.connect(
            meter_trf, meter_trf.output, alpha_p=meter_trf.alpha, beta_p=meter_trf.beta)

        plan.computation_call(reduction, output, alpha, beta)

        return plan
示例#17
0
    def _build_plan(self, plan_factory, device_params, a, current_variances, mu):
        plan = plan_factory()

        fill = PureParallel([
            Parameter('a', Annotation(a, 'o')),
            Parameter('current_variances', Annotation(current_variances, 'o')),
            Parameter('mu', Annotation(mu, 'i'))],
            """
            ${a.ctype} a;
            if (${idxs[-2]} == ${mask_size})
            {
                a = ${mu.load_idx}(${", ".join(idxs[:-2])}, ${idxs[-1]});
            }
            else
            {
                a = 0;
            }
            ${a.store_same}(a);

            if (${idxs[-1]} == 0)
            {
                ${current_variances.store_idx}(${", ".join(idxs[:-1])}, 0);
            }
            """,
            render_kwds=dict(mask_size=self._mask_size))

        plan.computation_call(fill, a, current_variances, mu)

        return plan
示例#18
0
    def _build_plan(self, plan_factory, device_params, output, alpha, beta):

        plan = plan_factory()

        for_reduction = Type(numpy.float64, alpha.shape)

        meter_trf = Transformation([
            Parameter('output', Annotation(for_reduction, 'o')),
            Parameter('alpha', Annotation(alpha, 'i')),
            Parameter('beta', Annotation(beta, 'i')),
            ],
            """
                ${alpha.ctype} alpha = ${alpha.load_same};
                ${beta.ctype} beta = ${beta.load_same};
                ${alpha.ctype} t = ${mul_cc}(alpha, beta);
                ${output.store_same}(t.x - ${ordering});
                """,
            render_kwds=dict(
                mul_cc=functions.mul(alpha.dtype, alpha.dtype),
                ordering=ordering(self._representation),
                ))

        reduction = Reduce(for_reduction, predicate_sum(output.dtype), axes=(0,))
        reduction.parameter.input.connect(
            meter_trf, meter_trf.output, alpha_p=meter_trf.alpha, beta_p=meter_trf.beta)

        plan.computation_call(reduction, output, alpha, beta)

        return plan
示例#19
0
    def __init__(self, params: TGswParams, in_out_params: LweParams, shape,
                 perf_params: PerformanceParameters):

        tlwe_params = params.tlwe_params
        decomp_length = params.decomp_length
        mask_size = tlwe_params.mask_size
        polynomial_degree = tlwe_params.polynomial_degree
        input_size = params.tlwe_params.extracted_lweparams.size
        output_size = in_out_params.size

        assert mask_size == 1 and decomp_length == 2

        transform_type = params.tlwe_params.transform_type
        transform = get_transform(transform_type)
        tlength = transform.transformed_length(polynomial_degree)
        tdtype = transform.transformed_dtype()

        out_a = Type(Torus32, shape + (input_size, ))
        out_b = Type(Torus32, shape)
        accum_a = Type(Torus32, shape + (mask_size + 1, polynomial_degree))
        gsw = Type(tdtype, (output_size, mask_size + 1, decomp_length,
                            mask_size + 1, tlength))
        bara = Type(Torus32, shape + (output_size, ))

        self._params = params
        self._in_out_params = in_out_params
        self._perf_params = perf_params

        Computation.__init__(self, [
            Parameter('lwe_a', Annotation(out_a, 'io')),
            Parameter('lwe_b', Annotation(out_b, 'io')),
            Parameter('accum_a', Annotation(accum_a, 'io')),
            Parameter('gsw', Annotation(gsw, 'i')),
            Parameter('bara', Annotation(bara, 'i'))
        ])
示例#20
0
def norm_const(arr_t, order):
    """
    Returns a transformation that calculates the ``order``-norm
    (1 output, 1 input): ``output = abs(input) ** order``.
    """
    if dtypes.is_complex(arr_t.dtype):
        out_dtype = dtypes.real_for(arr_t.dtype)
    else:
        out_dtype = arr_t.dtype

    return Transformation(
        [
            Parameter('output', Annotation(Type(out_dtype, arr_t.shape), 'o')),
            Parameter('input', Annotation(arr_t, 'i'))],
        """
        ${input.ctype} val = ${input.load_same};
        ${output.ctype} norm = ${norm}(val);
        %if order != 2:
        norm = pow(norm, ${dtypes.c_constant(order / 2, output.dtype)});
        %endif
        ${output.store_same}(norm);
        """,
        render_kwds=dict(
            norm=functions.norm(arr_t.dtype),
            order=order))
示例#21
0
def get_tgsw_polynomial_decomp_trf(params: 'TGswParams', shape):
    tlwe_params = params.tlwe_params
    decomp_length = params.decomp_length
    mask_size = tlwe_params.mask_size
    polynomial_degree = tlwe_params.polynomial_degree

    result = Type(Int32,
                  shape + (mask_size + 1, decomp_length, polynomial_degree))
    sample = Type(Torus32, shape + (mask_size + 1, polynomial_degree))
    return Transformation([
        Parameter('result', Annotation(result, 'o')),
        Parameter('sample', Annotation(sample, 'i'))
    ],
                          """
        <%
            mask = 2**params.bs_log2_base - 1
            half_base = 2**(params.bs_log2_base - 1)
        %>
        ${sample.ctype} sample = ${sample.load_idx}(${", ".join(idxs[:-2])}, ${idxs[-1]});
        int decomp_shift = 32 - (${idxs[-2]} + 1) * ${params.bs_log2_base};
        ${result.store_same}(
            (((sample + (${params.offset})) >> decomp_shift) & ${mask}) - ${half_base}
        );
        """,
                          connectors=['results'],
                          render_kwds=dict(params=params))
示例#22
0
    def __init__(self,
                 arr1,
                 arr2,
                 coeff,
                 second_coeff,
                 same_A_B=False,
                 test_computation_adhoc_array=False,
                 test_computation_incorrect_role=False,
                 test_computation_incorrect_type=False,
                 test_same_arg_as_i_and_o=False):

        self._second_coeff = second_coeff
        self._same_A_B = same_A_B
        self._test_same_arg_as_i_and_o = test_same_arg_as_i_and_o

        self._test_computation_adhoc_array = test_computation_adhoc_array
        self._test_computation_incorrect_role = test_computation_incorrect_role
        self._test_computation_incorrect_type = test_computation_incorrect_type

        Computation.__init__(self, [
            Parameter('C', Annotation(arr1, 'o')),
            Parameter('D', Annotation(arr2, 'o')),
            Parameter('A', Annotation(arr1, 'i')),
            Parameter('B', Annotation(arr2, 'i')),
            Parameter('coeff', Annotation(coeff))
        ])
示例#23
0
    def __init__(
            self, transform, batch_shape, inverse=False,
            i32_conversion=False, transforms_per_block=4, kernel_repetitions=1):

        self._inverse = inverse
        self._transform = transform
        self._transforms_per_block = transforms_per_block
        self._kernel_repetitions = kernel_repetitions
        self._i32_conversion = i32_conversion

        tr_arr = Type(self._transform.elem_dtype, batch_shape + (transform.transform_length,))
        if i32_conversion:
            arr = Type(numpy.int32, batch_shape + (transform.polynomial_length,))
            if inverse:
                oarr = arr
                iarr = tr_arr
            else:
                oarr = tr_arr
                iarr = arr
        else:
            oarr = tr_arr
            iarr = tr_arr

        Computation.__init__(self, [
            Parameter('output', Annotation(oarr, 'o')),
            Parameter('input', Annotation(iarr, 'i'))])
示例#24
0
 def __init__(self, arr, coeff):
     Computation.__init__(self, [
         Parameter('C', Annotation(arr, 'io')),
         Parameter('D', Annotation(arr, 'io')),
         Parameter('coeff1', Annotation(coeff)),
         Parameter('coeff2', Annotation(coeff))
     ])
示例#25
0
def rolling_frame(arr, NFFT, noverlap, pad_to):
    """
    Transforms a 1D array to a 2D array whose rows are
    partially overlapped parts of the initial array.
    """

    frame_step = NFFT - noverlap
    frame_num = (arr.size - noverlap) // frame_step
    frame_size = NFFT if pad_to is None else pad_to

    result_arr = Type(arr.dtype, (frame_num, frame_size))

    return Transformation(
        [
            Parameter('output', Annotation(result_arr, 'o')),
            Parameter('input', Annotation(arr, 'i')),
        ],
        """
        %if NFFT != output.shape[1]:
        if (${idxs[1]} >= ${NFFT})
        {
            ${output.store_same}(0);
        }
        else
        %endif
        {
            ${output.store_same}(${input.load_idx}(${idxs[0]} * ${frame_step} + ${idxs[1]}));
        }
        """,
        render_kwds=dict(frame_step=frame_step, NFFT=NFFT),
        # note that only the "store_same"-using argument can serve as a connector!
        connectors=['output'])
示例#26
0
    def __init__(self,
                 arr1,
                 arr2,
                 coeff,
                 same_A_B=False,
                 test_incorrect_parameter_name=False,
                 test_untyped_scalar=False,
                 test_kernel_adhoc_array=False):

        assert len(arr1.shape) == 2
        assert len(arr2.shape) == (2 if same_A_B else 1)
        assert arr1.dtype == arr2.dtype
        if same_A_B:
            assert arr1.shape == arr2.shape
        else:
            assert arr1.shape[0] == arr1.shape[1]

        self._same_A_B = same_A_B
        self._persistent_array = numpy.arange(arr2.size).reshape(
            arr2.shape).astype(arr2.dtype)

        self._test_untyped_scalar = test_untyped_scalar
        self._test_kernel_adhoc_array = test_kernel_adhoc_array

        Computation.__init__(self, [
            Parameter(('_C' if test_incorrect_parameter_name else 'C'),
                      Annotation(arr1, 'o')),
            Parameter('D', Annotation(arr2, 'o')),
            Parameter('A', Annotation(arr1, 'i')),
            Parameter('B', Annotation(arr2, 'i')),
            Parameter('coeff', Annotation(coeff))
        ])
示例#27
0
    def _build_plan(
            self, plan_factory, device_params,
            result_a, result_b, result_cv, messages, key, noises_a, noises_b):

        plan = plan_factory()

        mul_key = MatrixMulVector(noises_a)

        fill_b_cv = Transformation([
            Parameter('result_b', Annotation(result_b, 'o')),
            Parameter('result_cv', Annotation(result_cv, 'o')),
            Parameter('messages', Annotation(messages, 'i')),
            Parameter('noises_a_times_key', Annotation(noises_b, 'i')),
            Parameter('noises_b', Annotation(noises_b, 'i'))],
            """
            ${result_b.store_same}(
                ${noises_b.load_same}
                + ${messages.load_same}
                + ${noises_a_times_key.load_same});
            ${result_cv.store_same}(${noise**2});
            """,
            connectors=['noises_a_times_key'],
            render_kwds=dict(noise=self._noise))

        mul_key.parameter.output.connect(
            fill_b_cv, fill_b_cv.noises_a_times_key,
            b=fill_b_cv.result_b, cv=fill_b_cv.result_cv, messages=fill_b_cv.messages,
            noises_b=fill_b_cv.noises_b)

        plan.computation_call(mul_key, result_b, result_cv, messages, noises_b, noises_a, key)
        plan.computation_call(
            PureParallel.from_trf(transformations.copy(noises_a)),
            result_a, noises_a)

        return plan
示例#28
0
def get_prepare_prfft_scan(output):
    return Transformation(
        [
            Parameter('output', Annotation(output, 'o')),
            Parameter('Y', Annotation(output, 'i')),
            Parameter(
                're_X_0',
                Annotation(
                    Type(dtypes.real_for(output.dtype), output.shape[:-1]),
                    'i'))
        ],
        """
        ${Y.ctype} Y = ${Y.load_same};
        Y = COMPLEX_CTR(${Y.ctype})(Y.y, -Y.x);

        if (${idxs[-1]} == 0)
        {
            Y.x = Y.x / 2 + ${re_X_0.load_idx}(${", ".join(idxs[:-1])});
            Y.y /= 2;
        }

        ${output.store_same}(Y);
        """,
        connectors=['output', 'Y'],
    )
示例#29
0
def identity(type):
    return PureParallel([
        Parameter('output', Annotation(type, 'o')),
        Parameter('input', Annotation(type, 'i'))
    ], """
        ${output.store_same}(${input.load_same});
        """)
示例#30
0
    def __init__(self, out_type, in_type):
        '''
        Input transformation that implements an explicit type cast.

        Arguments
        ---------
        out_type: `reikna.core.Type`
            Output dtype and shape.
        in_type: `reikna.core.Type`
            Input dtype and shape.

        Notes
        -----
        * `in_type` and `out_type` shapes must be equal.
        * Does not support real-to-complex and complex-to-real conversions.
        '''
        if (in_type.shape != out_type.shape):
            raise ValueError('shapes of out_type and in_type must be equal.')
        if (issubclass(in_type.dtype.type, np.complexfloating) != issubclass(
                out_type.dtype.type, np.complexfloating)
                #np.iscomplexobj(in_type) != np.iscomplexobj(out_type)
            ):
            raise ValueError('Unable to cast real to complex and vice versa.')
        out_param = Parameter('output', Annotation(out_type, 'o'))
        in_param = Parameter('input', Annotation(in_type, 'i'))
        ctype = out_type.ctype.replace('unsigned ', 'u')
        super(Cast, self).__init__([out_param, in_param], self.code,
                                   dict(ctype=ctype))