Пример #1
0
    def __init__(
            self, result_shape_info,
            input_size: int, output_size: int, decomp_length: int, log2_base: int):

        base = 2**log2_base

        a = result_shape_info.a
        b = result_shape_info.b
        cv = result_shape_info.current_variances

        ks_a = Type(Torus32, (input_size, decomp_length, base, output_size))
        ks_b = Type(Torus32, (input_size, decomp_length, base))
        ks_cv = Type(Float, (input_size, decomp_length, base))

        source_a = Type(Torus32, result_shape_info.shape + (input_size,))
        source_b = Type(Torus32, result_shape_info.shape)

        self._decomp_length = decomp_length
        self._input_size = input_size
        self._output_size = output_size
        self._log2_base = log2_base

        Computation.__init__(self, [
            Parameter('result_a', Annotation(a, 'io')),
            Parameter('result_b', Annotation(b, 'io')),
            Parameter('result_cv', Annotation(cv, 'io')),
            Parameter('ks_a', Annotation(ks_a, 'i')),
            Parameter('ks_b', Annotation(ks_b, 'i')),
            Parameter('ks_cv', Annotation(ks_cv, 'i')),
            Parameter('source_a', Annotation(source_a, 'i')),
            Parameter('source_b', Annotation(source_b, 'i'))])
Пример #2
0
def test_guiding_output(thr):

    N = 1000
    dtype = numpy.float32

    p = PureParallel([
        Parameter('output', Annotation(Type(dtype, shape=N), 'o')),
        Parameter('input', Annotation(Type(dtype, shape=(2, N)), 'i'))
    ],
                     """
        float t1 = ${input.load_idx}(0, ${idxs[0]});
        float t2 = ${input.load_idx}(1, ${idxs[0]});
        ${output.store_idx}(${idxs[0]}, t1 + t2);
        """,
                     guiding_array='output')

    a = get_test_array_like(p.parameter.input)
    a_dev = thr.to_device(a)
    res_dev = thr.empty_like(p.parameter.output)

    pc = p.compile(thr)
    pc(res_dev, a_dev)

    res_ref = a[0] + a[1]

    assert diff_is_negligible(res_dev.get(), res_ref)
Пример #3
0
def hough_paths(segments, line_dist=40):
    # View segments as a 1D structured array
    seg_struct = segments.ravel().astype(np.int32).view(int4).reshape(-1)
    segments, _ = sort_segments(thr.to_device(seg_struct))
    segments = segments[0].view(np.int32).reshape((seg_struct.shape[0], 2, 2))
    can_join = thr.empty_like(Type(np.int32, segments.shape[0]))
    can_join.fill(0)
    prg.can_join_segments(
        segments,
        can_join,
        np.int32(line_dist),
        global_size=segments.shape[:1],
        local_size=(1, ),
    )
    labels = cumsum(can_join)
    num_labels = int(labels[labels.shape[0] - 1].get().item()) + 1
    longest_seg_inds = thr.empty_like(Type(np.int32, num_labels))
    longest_seg_inds.fill(-1)
    prg.assign_segments(segments,
                        labels,
                        longest_seg_inds,
                        global_size=(segments.shape[0], ),
                        local_size=(1, ))
    longest_segs = thr.empty_like(Type(np.int32, (num_labels, 2, 2)))
    prg.copy_chosen_segments(segments,
                             longest_seg_inds,
                             longest_segs,
                             global_size=(num_labels, ),
                             local_size=(1, ))
    return longest_segs.get()
Пример #4
0
def get_prepare_iprfft_output(y):
    # Input: size N//4
    # Output: size N//4

    N = y.shape[-1] * 2

    return Transformation([
        Parameter('x', Annotation(y, 'o')),
        Parameter('y', Annotation(y, 'i')),
        Parameter('x0', Annotation(Type(y.dtype, y.shape[:-1]), 'i')),
        Parameter('coeffs', Annotation(Type(y.dtype, (N // 2, )), 'i')),
    ],
                          """
        ${y.ctype} y = ${y.load_same};
        ${coeffs.ctype} coeff = ${coeffs.load_idx}(${idxs[-1]});

        ${x.ctype} x;

        if (${idxs[-1]} == 0)
        {
            ${x0.ctype} x0 = ${x0.load_idx}(${", ".join(idxs[:-1])});
            x = x0 / ${N // 2};
        }
        else
        {
            x = y * coeff;
        }

        ${x.store_same}(x);
        """,
                          connectors=['y'],
                          render_kwds=dict(N=N))
Пример #5
0
    def __init__(
            self, transform, batch_shape, inverse=False,
            i32_conversion=False, transforms_per_block=4, kernel_repetitions=1):

        self._inverse = inverse
        self._transform = transform
        self._transforms_per_block = transforms_per_block
        self._kernel_repetitions = kernel_repetitions
        self._i32_conversion = i32_conversion

        tr_arr = Type(self._transform.elem_dtype, batch_shape + (transform.transform_length,))
        if i32_conversion:
            arr = Type(numpy.int32, batch_shape + (transform.polynomial_length,))
            if inverse:
                oarr = arr
                iarr = tr_arr
            else:
                oarr = tr_arr
                iarr = arr
        else:
            oarr = tr_arr
            iarr = tr_arr

        Computation.__init__(self, [
            Parameter('output', Annotation(oarr, 'o')),
            Parameter('input', Annotation(iarr, 'i'))])
Пример #6
0
    def __init__(self, params: TGswParams, in_out_params: LweParams, shape,
                 perf_params: PerformanceParameters):

        tlwe_params = params.tlwe_params
        decomp_length = params.decomp_length
        mask_size = tlwe_params.mask_size
        polynomial_degree = tlwe_params.polynomial_degree
        input_size = params.tlwe_params.extracted_lweparams.size
        output_size = in_out_params.size

        assert mask_size == 1 and decomp_length == 2

        transform_type = params.tlwe_params.transform_type
        transform = get_transform(transform_type)
        tlength = transform.transformed_length(polynomial_degree)
        tdtype = transform.transformed_dtype()

        out_a = Type(Torus32, shape + (input_size, ))
        out_b = Type(Torus32, shape)
        accum_a = Type(Torus32, shape + (mask_size + 1, polynomial_degree))
        gsw = Type(tdtype, (output_size, mask_size + 1, decomp_length,
                            mask_size + 1, tlength))
        bara = Type(Torus32, shape + (output_size, ))

        self._params = params
        self._in_out_params = in_out_params
        self._perf_params = perf_params

        Computation.__init__(self, [
            Parameter('lwe_a', Annotation(out_a, 'io')),
            Parameter('lwe_b', Annotation(out_b, 'io')),
            Parameter('accum_a', Annotation(accum_a, 'io')),
            Parameter('gsw', Annotation(gsw, 'i')),
            Parameter('bara', Annotation(bara, 'i'))
        ])
Пример #7
0
    def __init__(self,
                 polynomial_degree,
                 shape,
                 powers_shape,
                 powers_view=False,
                 minus_one=False,
                 invert_powers=False):

        self._batch_shape = powers_shape[:-1] if powers_view else powers_shape
        assert self._batch_shape == shape[:len(self._batch_shape)]

        self._powers_view = powers_view
        self._minus_one = minus_one
        self._invert_powers = invert_powers

        polynomials = Type(Torus32, shape + (polynomial_degree, ))
        powers = Type(Int32, powers_shape)

        Computation.__init__(
            self,
            [
                Parameter('result', Annotation(polynomials, 'o')),
                Parameter('source', Annotation(polynomials, 'i')),
                Parameter('powers', Annotation(powers, 'i')),
                Parameter('powers_idx', Annotation(
                    Type(Int32)))  # unused if powers_view==False
            ])
Пример #8
0
    def __init__(self, params: 'TGswParams', shape, bk_len,
                 perf_params: PerformanceParameters):

        mask_size = params.tlwe_params.mask_size
        polynomial_degree = params.tlwe_params.polynomial_degree
        decomp_length = params.decomp_length

        transform = get_transform(params.tlwe_params.transform_type)
        tdtype = transform.transformed_dtype()
        tlength = transform.transformed_length(polynomial_degree)

        accum = Type(Torus32, shape + (mask_size + 1, polynomial_degree))
        bootstrap_key = Type(
            tdtype,
            (bk_len, mask_size + 1, decomp_length, mask_size + 1, tlength))

        self._params = params
        self._perf_params = perf_params
        self._shape = shape
        self._bk_len = bk_len

        Computation.__init__(self, [
            Parameter('accum', Annotation(accum, 'io')),
            Parameter('bootstrap_key', Annotation(bootstrap_key, 'i')),
            Parameter('bk_row_idx', Annotation(numpy.int32))
        ])
Пример #9
0
    def __init__(self, params: 'TLweParams', shape, noise: float,
                 perf_params: PerformanceParametersForDevice):

        polynomial_degree = params.polynomial_degree
        mask_size = params.mask_size

        result_a = Type(Torus32, shape + (mask_size + 1, polynomial_degree))
        result_cv = Type(ErrorFloat, shape)
        key = Type(Int32, (mask_size, polynomial_degree))
        noises1 = Type(Torus32, shape + (mask_size, polynomial_degree))
        noises2 = Type(Torus32, shape + (polynomial_degree, ))

        self._transform_type = params.transform_type
        self._noise = noise
        self._mask_size = mask_size
        self._polynomial_degree = polynomial_degree
        self._perf_params = perf_params

        Computation.__init__(self, [
            Parameter('result_a', Annotation(result_a, 'o')),
            Parameter('result_cv', Annotation(result_cv, 'o')),
            Parameter('key', Annotation(key, 'i')),
            Parameter('noises1', Annotation(noises1, 'i')),
            Parameter('noises2', Annotation(noises2, 'i'))
        ])
Пример #10
0
def get_method(array):
    temp = array.thread.array(array.shape, array.dtype)
    comp = array.thread.get_cached_computation(setitem_computation,
                                               Type.from_value(temp),
                                               Type.from_value(array), True)
    comp(temp, array)
    return temp.get()
Пример #11
0
def get_tgsw_polynomial_decomp_trf(params: 'TGswParams', shape):
    tlwe_params = params.tlwe_params
    decomp_length = params.decomp_length
    mask_size = tlwe_params.mask_size
    polynomial_degree = tlwe_params.polynomial_degree

    result = Type(Int32,
                  shape + (mask_size + 1, decomp_length, polynomial_degree))
    sample = Type(Torus32, shape + (mask_size + 1, polynomial_degree))
    return Transformation([
        Parameter('result', Annotation(result, 'o')),
        Parameter('sample', Annotation(sample, 'i'))
    ],
                          """
        <%
            mask = 2**params.bs_log2_base - 1
            half_base = 2**(params.bs_log2_base - 1)
        %>
        ${sample.ctype} sample = ${sample.load_idx}(${", ".join(idxs[:-2])}, ${idxs[-1]});
        int decomp_shift = 32 - (${idxs[-2]} + 1) * ${params.bs_log2_base};
        ${result.store_same}(
            (((sample + (${params.offset})) >> decomp_shift) & ${mask}) - ${half_base}
        );
        """,
                          connectors=['results'],
                          render_kwds=dict(params=params))
Пример #12
0
 def __init__(self, matrix_t):
     Computation.__init__(self, [
         Parameter(
             'output',
             Annotation(Type(matrix_t.dtype, matrix_t.shape[:-1]), 'o')),
         Parameter('matrix', Annotation(matrix_t, 'i')),
         Parameter(
             'vector',
             Annotation(Type(matrix_t.dtype, matrix_t.shape[-1]), 'i'))
     ])
Пример #13
0
    def __init__(self,
                 x,
                 NFFT=256,
                 noverlap=128,
                 pad_to=None,
                 window=hanning_window):

        # print("x Data type = %s" % x.dtype)
        # print("Is Real = %s" % dtypes.is_real(x.dtype))
        # print("dim = %s" % x.ndim)
        assert dtypes.is_real(x.dtype)
        assert x.ndim == 1

        rolling_frame_trf = rolling_frame(x, NFFT, noverlap, pad_to)

        complex_dtype = dtypes.complex_for(x.dtype)
        fft_arr = Type(complex_dtype, rolling_frame_trf.output.shape)
        real_fft_arr = Type(x.dtype, rolling_frame_trf.output.shape)

        window_trf = window(real_fft_arr, NFFT)
        broadcast_zero_trf = transformations.broadcast_const(real_fft_arr, 0)
        to_complex_trf = transformations.combine_complex(fft_arr)
        amplitude_trf = transformations.norm_const(fft_arr, 1)
        crop_trf = crop_frequencies(amplitude_trf.output)

        fft = FFT(fft_arr, axes=(1, ))
        fft.parameter.input.connect(to_complex_trf,
                                    to_complex_trf.output,
                                    input_real=to_complex_trf.real,
                                    input_imag=to_complex_trf.imag)
        fft.parameter.input_imag.connect(broadcast_zero_trf,
                                         broadcast_zero_trf.output)
        fft.parameter.input_real.connect(window_trf,
                                         window_trf.output,
                                         unwindowed_input=window_trf.input)
        fft.parameter.unwindowed_input.connect(
            rolling_frame_trf,
            rolling_frame_trf.output,
            flat_input=rolling_frame_trf.input)
        fft.parameter.output.connect(amplitude_trf,
                                     amplitude_trf.input,
                                     amplitude=amplitude_trf.output)
        fft.parameter.amplitude.connect(crop_trf,
                                        crop_trf.input,
                                        cropped_amplitude=crop_trf.output)

        self._fft = fft

        self._transpose = Transpose(fft.parameter.cropped_amplitude)

        Computation.__init__(self, [
            Parameter('output',
                      Annotation(self._transpose.parameter.output, 'o')),
            Parameter('input', Annotation(fft.parameter.flat_input, 'i'))
        ])
Пример #14
0
    def __init__(self, params: 'TLweParams', shape):
        a_type = Type(Torus32, shape + (params.mask_size + 1, params.polynomial_degree))
        cv_type = Type(ErrorFloat, shape + (params.mask_size + 1,))
        mu_type = Type(Torus32, shape + (params.polynomial_degree,))

        self._mask_size = params.mask_size

        Computation.__init__(self,
            [Parameter('a', Annotation(a_type, 'o')),
            Parameter('current_variances', Annotation(cv_type, 'o')),
            Parameter('mu', Annotation(mu_type, 'i'))])
Пример #15
0
    def __init__(self, shape, mspace_size):

        self._mspace_size = mspace_size

        messages = Type(Torus32, shape)
        result = Type(Int32, shape)

        Computation.__init__(self, [
            Parameter('result', Annotation(result, 'o')),
            Parameter('messages', Annotation(messages, 'i'))
        ])
Пример #16
0
    def __init__(self, shape, lwe_size):

        a = Type(Torus32, shape + (lwe_size,))
        b = Type(Torus32, shape)
        key = Type(Int32, (lwe_size,))

        Computation.__init__(self, [
            Parameter('result', Annotation(b, 'o')),
            Parameter('lwe_a', Annotation(a, 'i')),
            Parameter('lwe_b', Annotation(b, 'i')),
            Parameter('key', Annotation(key, 'i'))])
Пример #17
0
def get_prepare_for_mul_trf(shape):
    # Preparation for FFT is just an identity
    dtype = transformed_dtype()
    return Transformation([
        Parameter('output', Annotation(Type(dtype, shape), 'o')),
        Parameter('input', Annotation(Type(dtype, shape), 'i'))
    ],
                          """
        ${output.store_same}(${input.load_same});
        """,
                          connectors=['input', 'output'])
Пример #18
0
def setitem_method(array, index, value):
    # We need it both in ``cuda.Array`` and ``ocl.Array``, hence a standalone function.

    # PyOpenCL and PyCUDA support __setitem__() for some restricted cases,
    # but it is too complicated to determine when it will work,
    # and it is easier to just call our own implementation every time.

    view = array[index]
    value = normalize_value(array.thread, type(array), value)
    comp = array.thread.get_cached_computation(
        setitem_computation, Type.from_value(view), Type.from_value(value))
    comp(view, value)
Пример #19
0
def get_tlwe_transformed_add_mul_to_trf(params: 'TGswParams', shape,
                                        bk_len: int,
                                        perf_params: PerformanceParameters):

    tlwe_params = params.tlwe_params
    decomp_length = params.decomp_length
    mask_size = tlwe_params.mask_size
    polynomial_degree = tlwe_params.polynomial_degree

    transform = get_transform(params.tlwe_params.transform_type)
    tdtype = transform.transformed_dtype()
    tlength = transform.transformed_length(polynomial_degree)
    tr_ctype = transform.transformed_internal_ctype()

    result = Type(tdtype, shape + (mask_size + 1, tlength))
    sample = Type(tdtype, shape + (mask_size + 1, decomp_length, tlength))
    bootstrap_key = Type(
        tdtype, (bk_len, mask_size + 1, decomp_length, mask_size + 1, tlength))

    return Transformation([
        Parameter('result', Annotation(result, 'o')),
        Parameter('sample', Annotation(sample, 'i')),
        Parameter('bootstrap_key', Annotation(bootstrap_key, 'i')),
        Parameter('bk_row_idx', Annotation(numpy.int32))
    ],
                          """
        ${tr_ctype} result = ${tr_ctype}pack(${dtypes.c_constant(0, result.dtype)});

        %for mask_idx in range(mask_size + 1):
        %for decomp_idx in range(decomp_length):
        {
            ${tr_ctype} a = ${tr_ctype}pack(
                ${sample.load_idx}(
                    ${", ".join(idxs[:-2])}, ${mask_idx}, ${decomp_idx}, ${idxs[-1]})
                );
            ${tr_ctype} b = ${tr_ctype}pack(
                ${bootstrap_key.load_idx}(
                    ${bk_row_idx}, ${mask_idx}, ${decomp_idx}, ${idxs[-2]}, ${idxs[-1]})
                );
            result = ${add}(result, ${mul}(a, b));
        }
        %endfor
        %endfor

        ${result.store_same}(${tr_ctype}unpack(result));
        """,
                          connectors=['result'],
                          render_kwds=dict(
                              mask_size=mask_size,
                              decomp_length=decomp_length,
                              add=transform.transformed_add(perf_params),
                              mul=transform.transformed_mul(perf_params),
                              tr_ctype=tr_ctype))
Пример #20
0
    def __init__(self, a, b, current_variances):

        if (not (len(a.shape) - 1 == len(b.shape) == len(
                current_variances.shape))
                or not (a.shape[:-1] == b.shape == current_variances.shape)):

            raise ValueError("Inconsistent shapes: {a}, {b}, {cv}".format(
                a=a.shape, b=b.shape, cv=current_variances.shape))

        self.a = Type.from_value(a)
        self.b = Type.from_value(b)
        self.current_variances = Type.from_value(current_variances)
        self.shape = b.shape
Пример #21
0
    def __init__(self, params: 'TLweParams', shape):

        self._mask_size = params.mask_size
        self._polynomial_degree = params.polynomial_degree

        result_a = Type(Torus32, shape + (params.extracted_lweparams.size,))
        result_b = Type(Torus32, shape)
        tlwe_a = Type(Torus32, shape + (params.mask_size + 1, params.polynomial_degree))

        Computation.__init__(self, [
            Parameter('result_a', Annotation(result_a, 'o')),
            Parameter('result_b', Annotation(result_b, 'o')),
            Parameter('tlwe_a', Annotation(tlwe_a, 'i'))])
Пример #22
0
    def _build_plan(self, plan_factory, device_params, output, input_):

        plan = plan_factory()

        N = input_.shape[-1] * 4
        batch_shape = input_.shape[:-1]
        batch_size = helpers.product(batch_shape)

        # The first element is unused
        coeffs = numpy.concatenate(
            [[0],
             1 / (4 * numpy.sin(2 * numpy.pi * numpy.arange(1, N // 2) / N))])
        coeffs_arr = plan.persistent_array(coeffs)

        prepare_iprfft_input = get_prepare_iprfft_input(input_)
        prepare_iprfft_output = get_prepare_iprfft_output(output)

        irfft = IRFFT(prepare_iprfft_input.Y)
        irfft.parameter.input.connect(prepare_iprfft_input,
                                      prepare_iprfft_input.Y,
                                      X=prepare_iprfft_input.X)
        irfft.parameter.output.connect(prepare_iprfft_output,
                                       prepare_iprfft_output.y,
                                       x=prepare_iprfft_output.x,
                                       x0=prepare_iprfft_output.x0,
                                       coeffs=prepare_iprfft_output.coeffs)

        real = Transformation([
            Parameter(
                'output',
                Annotation(Type(dtypes.real_for(input_.dtype), input_.shape),
                           'o')),
            Parameter('input', Annotation(input_, 'i')),
        ],
                              """
            ${output.store_same}((${input.load_same}).x);
            """,
                              connectors=['output'])

        rd_t = Type(output.dtype, input_.shape)
        rd = Reduce(rd_t,
                    predicate_sum(rd_t.dtype),
                    axes=(len(input_.shape) - 1, ))
        rd.parameter.input.connect(real, real.output, X=real.input)

        x0 = plan.temp_array_like(rd.parameter.output)

        plan.computation_call(rd, x0, input_)
        plan.computation_call(irfft, output, x0, coeffs_arr, input_)

        return plan
Пример #23
0
    def __init__(self, click_probability_meter, system, representation, samples):

        assert representation == Representation.POSITIVE_P
        self._system = system

        state = Type(numpy.complex128, (samples, system.modes))
        output = Type(numpy.float64, (system.modes,))
        Computation.__init__(
            self,
            [
                Parameter('output', Annotation(output, 'o')),
                Parameter('alpha', Annotation(state, 'i')),
                Parameter('beta', Annotation(state, 'i')),
            ])
Пример #24
0
def setitem_method(array, index, value):
    # We need it both in ``cuda.Array`` and ``ocl.Array``, hence a standalone function.

    # PyOpenCL and PyCUDA support __setitem__() for some restricted cases,
    # but it is too complicated to determine when it will work,
    # and it is easier to just call our own implementation every time.

    view = array[index]
    value, is_array = normalize_value(array.thread, type(array), value)
    comp = array.thread.get_cached_computation(setitem_computation,
                                               Type.from_value(view),
                                               Type.from_value(value),
                                               is_array)
    comp(view, value)
Пример #25
0
    def __init__(self, meter, system, representation, samples):

        self._system = system
        self._representation = representation

        state = Type(numpy.complex128, (samples, system.modes))
        output = Type(numpy.float64, (system.modes,))
        Computation.__init__(
            self,
            [
                Parameter('output', Annotation(output, 'o')),
                Parameter('alpha', Annotation(state, 'i')),
                Parameter('beta', Annotation(state, 'i')),
            ])
Пример #26
0
    def __init__(self, params: 'TGswParams', shape):

        self._params = params

        decomp_length = params.decomp_length
        mask_size = params.tlwe_params.mask_size
        polynomial_degree = params.tlwe_params.polynomial_degree

        result_a = Type(
            Torus32, shape + (mask_size + 1, decomp_length, mask_size + 1, polynomial_degree))
        messages = Type(Torus32, shape)

        Computation.__init__(self,
            [Parameter('result_a', Annotation(result_a, 'o')),
            Parameter('messages', Annotation(messages, 'i'))])
Пример #27
0
def test_computation_performance(thr_and_double, fast_math,
                                 test_sampler_float):

    thr, double = thr_and_double

    size = 2**15
    batch = 2**6

    sampler = test_sampler_float.get_sampler(double)

    rng = CBRNG(Type(sampler.dtype, shape=(batch, size)), 1, sampler)

    dest_dev = thr.empty_like(rng.parameter.randoms)
    counters = rng.create_counters()
    counters_dev = thr.to_device(counters)
    rngc = rng.compile(thr, fast_math=fast_math)

    attempts = 10
    times = []
    for i in range(attempts):
        t1 = time.time()
        rngc(counters_dev, dest_dev)
        thr.synchronize()
        times.append(time.time() - t1)

    byte_size = size * batch * sampler.dtype.itemsize
    return min(times), byte_size
Пример #28
0
def transpose(img):
    assert img.shape[0] % 8 == 0
    img_T = thr.empty_like(
        Type(np.uint8, (img.shape[1] * 8, img.shape[0] // 8)))
    img_T.fill(0)
    prg.transpose(img, img_T, global_size=img_T.shape[::-1], local_size=(1, 8))
    return img_T
Пример #29
0
def rolling_frame(arr, NFFT, noverlap, pad_to):
    """
    Transforms a 1D array to a 2D array whose rows are
    partially overlapped parts of the initial array.
    """

    frame_step = NFFT - noverlap
    frame_num = (arr.size - noverlap) // frame_step
    frame_size = NFFT if pad_to is None else pad_to

    result_arr = Type(arr.dtype, (frame_num, frame_size))

    return Transformation(
        [
            Parameter('output', Annotation(result_arr, 'o')),
            Parameter('input', Annotation(arr, 'i')),
        ],
        """
        %if NFFT != output.shape[1]:
        if (${idxs[1]} >= ${NFFT})
        {
            ${output.store_same}(0);
        }
        else
        %endif
        {
            ${output.store_same}(${input.load_idx}(${idxs[0]} * ${frame_step} + ${idxs[1]}));
        }
        """,
        render_kwds=dict(frame_step=frame_step, NFFT=NFFT),
        # note that only the "store_same"-using argument can serve as a connector!
        connectors=['output'])
Пример #30
0
    def __init__(self, mode_arr, add_points=None, inverse=False, order=1, axes=None):

        if axes is None:
            axes = tuple(range(len(mode_arr.shape)))
        else:
            axes = tuple(axes)
        self._axes = list(sorted(axes))

        if add_points is None:
            add_points = [0] * len(mode_arr.shape)
        else:
            add_points = list(add_points)
        self._add_points = add_points

        coord_shape = list(mode_arr.shape)
        for axis in range(len(mode_arr.shape)):
            if axis in axes:
                coord_shape[axis] = get_spatial_points(
                    mode_arr.shape[axis], order, add_points=add_points[axis])
        coord_arr = Type(mode_arr.dtype, shape=coord_shape)

        self._inverse = inverse
        self._order = order

        if not inverse:
            parameters = [
                Parameter('modes', Annotation(mode_arr, 'o')),
                Parameter('coords', Annotation(coord_arr, 'i'))]
        else:
            parameters = [
                Parameter('coords', Annotation(coord_arr, 'o')),
                Parameter('modes', Annotation(mode_arr, 'i'))]

        Computation.__init__(self, parameters)
Пример #31
0
    def __init__(self,
                 thr,
                 shape,
                 dtype,
                 box,
                 tmax,
                 steps,
                 samples,
                 kinetic_coeff=1,
                 nonlinear_module=None):

        state_arr = Type(dtype, shape)
        self.tmax = tmax
        self.steps = steps
        self.samples = samples
        self.dt = float(tmax) / steps
        self.dt_half = self.dt / 2

        self.thr = thr
        self.stepper = RK4IPStepper(
            state_arr,
            self.dt,
            box=box,
            kinetic_coeff=kinetic_coeff,
            nonlinear_module=nonlinear_module).compile(thr)
        self.stepper_half = RK4IPStepper(
            state_arr,
            self.dt_half,
            box=box,
            kinetic_coeff=kinetic_coeff,
            nonlinear_module=nonlinear_module).compile(thr)
Пример #32
0
def cast(arr_t, dtype):
    """
    Returns a typecast transformation of ``arr_t`` to ``dtype``
    (1 output, 1 input): ``output = cast[dtype](input)``.
    """
    dest = Type.from_value(arr_t).with_dtype(dtype)
    return Transformation(
        [Parameter('output', Annotation(dest, 'o')),
        Parameter('input', Annotation(arr_t, 'i'))],
        "${output.store_same}(${cast}(${input.load_same}));",
        render_kwds=dict(cast=functions.cast(dtype, arr_t.dtype)))
Пример #33
0
def copy_broadcasted(arr_t, out_arr_t=None):
    """
    Returns an identity transformation (1 output, 1 input): ``output = input``,
    where ``input`` may be broadcasted (with the same semantics as ``numpy.broadcast_to()``).
    Output array type ``out_arr_t`` may have different strides,
    but must have compatible shapes the same shape and data type.

    .. note::

        This is an input-only transformation.
    """

    if out_arr_t is None:
        out_arr_t = arr_t

    if out_arr_t.dtype != arr_t.dtype:
        raise ValueError("Input and output arrays must have the same data type")

    in_tp = Type.from_value(arr_t)
    out_tp = Type.from_value(out_arr_t)
    if not in_tp.broadcastable_to(out_tp):
        raise ValueError("Input is not broadcastable to output")

    return Transformation(
        [Parameter('output', Annotation(out_arr_t, 'o')),
        Parameter('input', Annotation(arr_t, 'i'))],
        """
        ${output.store_same}(${input.load_idx}(
        %for i in range(len(input.shape)):
            %if input.shape[i] == 1:
            0
            %else:
            ${idxs[i + len(output.shape) - len(input.shape)]}
            %endif
            %if i != len(input.shape) - 1:
                ,
            %endif
        %endfor
        ));
        """,
        connectors=['output'])
Пример #34
0
def roll(array, shift, axis=-1):
    """
    Cyclically shifts elements of ``array`` by ``shift`` positions to the right along ``axis``.
    ``shift`` can be negative (in which case the elements are shifted to the left).
    Elements that are shifted beyond the last position are re-introduced at the first
    (and vice versa).

    Works equivalently to ``numpy.roll`` (except ``axis=None`` is not supported).
    """
    temp = array.thread.array(array.shape, array.dtype)
    axis = axis % len(array.shape)
    comp = array.thread.get_cached_computation(
        roll_computation, Type.from_value(array), axis)
    comp(temp, array, shift)
    return temp
Пример #35
0
def setitem_computation(dest, source):
    """
    Returns a compiled computation that broadcasts ``source`` to ``dest``,
    where ``dest`` is a GPU array, and ``source`` is either a GPU array or a scalar.
    """
    if len(source.shape) == 0:
        trf = transformations.broadcast_param(dest)
        return PureParallel.from_trf(trf, guiding_array=trf.output)
    else:
        source_dt = Type.from_value(source).with_dtype(dest.dtype)
        trf = transformations.copy(source_dt, dest)
        comp = PureParallel.from_trf(trf, guiding_array=trf.output)
        cast_trf = transformations.cast(source, dest.dtype)
        comp.parameter.input.connect(cast_trf, cast_trf.output, src_input=cast_trf.input)
        return comp
Пример #36
0
def roll_method(array, shift, axis=-1):
    axis = axis % len(array.shape)
    comp = array.thread.get_cached_computation(
        RollInplace, Type.from_value(array), axis)
    comp(array, shift)
Пример #37
0
def get_method(array):
    temp = array.thread.array(array.shape, array.dtype)
    comp = array.thread.get_cached_computation(
        setitem_computation, Type.from_value(temp), Type.from_value(array))
    comp(temp, array)
    return temp.get()