Exemplo n.º 1
0
def get_common_kwds(dtype, device_params):
    return dict(
        dtype=dtype,
        min_mem_coalesce_width=device_params.min_mem_coalesce_width[dtype.itemsize],
        local_mem_banks=device_params.local_mem_banks,
        get_padding=get_padding,
        wrap_const=lambda x: dtypes.c_constant(x, dtypes.real_for(dtype)),
        min_blocks=helpers.min_blocks,
        mul=functions.mul(dtype, dtype),
        polar_unit=functions.polar_unit(dtypes.real_for(dtype)),
        cdivs=functions.div(dtype, numpy.uint32, out_dtype=dtype))
Exemplo n.º 2
0
Arquivo: fft.py Projeto: xbee/nufhe
def fft512(use_constant_memory=False):
    module = Module(TEMPLATE.get_def('fft512'),
                    render_kwds=dict(
                        elem_ctype=dtypes.ctype(numpy.complex128),
                        temp_ctype=dtypes.ctype(numpy.float64),
                        cdata_ctype=dtypes.ctype(numpy.complex128),
                        polar_unit=functions.polar_unit(numpy.float64),
                        mul=functions.mul(numpy.complex128, numpy.complex128),
                        use_constant_memory=use_constant_memory,
                    ))
    return FFT512(module, use_constant_memory)
Exemplo n.º 3
0
def get_common_kwds(dtype, device_params):
    return dict(
        dtype=dtype,
        min_mem_coalesce_width=device_params.min_mem_coalesce_width[dtype.itemsize],
        local_mem_banks=device_params.local_mem_banks,
        get_padding=get_padding,
        wrap_const=lambda x: dtypes.c_constant(x, dtypes.real_for(dtype)),
        min_blocks=helpers.min_blocks,
        mul=functions.mul(dtype, dtype),
        polar_unit=functions.polar_unit(dtypes.real_for(dtype)),
        cdivs=functions.div(dtype, numpy.uint32, out_dtype=dtype))
Exemplo n.º 4
0
def unimod_gen(size, single=True):
    if single:
        dtype = np.complex64
    else:
        dtype = np.complex128
    unimod = Transformation([
        Parameter('output', Annotation(Type(dtype, size), 'o')),
        Parameter('input', Annotation(Type(dtype, size), 'i'))
    ],
                            '''
        ${input.ctype} val = ${input.load_same};       
        ${output.store_same}(${polar_unit}(atan2(val.y, val.x)));
        ''',
                            render_kwds=dict(polar_unit=functions.polar_unit(
                                dtype=np.float32 if single else np.double)))
    return unimod
Exemplo n.º 5
0
def unimod_gen(size, single=True):
    if single:
        dtype = np.complex64
    else:
        dtype = np.complex128
    unimod = Transformation(
        [
             Parameter('output', Annotation(Type(dtype, size), 'o')),
             Parameter('input', Annotation(Type(dtype, size), 'i'))
        ],
        '''
        ${input.ctype} val = ${input.load_same};       
        ${output.store_same}(${polar_unit}(atan2(val.y, val.x)));
        ''',
        render_kwds=dict(polar_unit=functions.polar_unit(dtype=np.float32 if single else np.double))
    )
    return unimod
Exemplo n.º 6
0
def normal_bm(bijection, dtype, mean=0, std=1):
    """
    Generates normally distributed random numbers with the mean ``mean`` and
    the standard deviation ``std`` using Box-Muller transform.
    Supported dtypes: ``float(32/64)``, ``complex(64/128)``.
    Produces two random numbers per call for real types and one number for complex types.
    Returns a :py:class:`~reikna.cbrng.samplers.Sampler` object.

    .. note::

        In case of a complex ``dtype``, ``std`` refers to the standard deviation of the
        complex numbers (same as ``numpy.std()`` returns), not real and imaginary components
        (which will be normally distributed with the standard deviation ``std / sqrt(2)``).
        Consequently, while ``mean`` is of type ``dtype``, ``std`` must be real.
    """

    if dtypes.is_complex(dtype):
        r_dtype = dtypes.real_for(dtype)
        c_dtype = dtype
    else:
        r_dtype = dtype
        c_dtype = dtypes.complex_for(dtype)

    uf = uniform_float(bijection, r_dtype, low=0, high=1)

    module = Module(TEMPLATE.get_def("normal_bm"),
                    render_kwds=dict(complex_res=dtypes.is_complex(dtype),
                                     r_dtype=r_dtype,
                                     r_ctype=dtypes.ctype(r_dtype),
                                     c_dtype=c_dtype,
                                     c_ctype=dtypes.ctype(c_dtype),
                                     polar_unit=functions.polar_unit(r_dtype),
                                     bijection=bijection,
                                     mean=mean,
                                     std=std,
                                     uf=uf))

    return Sampler(bijection,
                   module,
                   dtype,
                   deterministic=uf.deterministic,
                   randoms_per_call=1 if dtypes.is_complex(dtype) else 2)
Exemplo n.º 7
0
def normal_bm(bijection, dtype, mean=0, std=1):
    """
    Generates normally distributed random numbers with the mean ``mean`` and
    the standard deviation ``std`` using Box-Muller transform.
    Supported dtypes: ``float(32/64)``, ``complex(64/128)``.
    Produces two random numbers per call for real types and one number for complex types.
    Returns a :py:class:`~reikna.cbrng.samplers.Sampler` object.

    .. note::

        In case of a complex ``dtype``, ``std`` refers to the standard deviation of the
        complex numbers (same as ``numpy.std()`` returns), not real and imaginary components
        (which will be normally distributed with the standard deviation ``std / sqrt(2)``).
        Consequently, while ``mean`` is of type ``dtype``, ``std`` must be real.
    """

    if dtypes.is_complex(dtype):
        r_dtype = dtypes.real_for(dtype)
        c_dtype = dtype
    else:
        r_dtype = dtype
        c_dtype = dtypes.complex_for(dtype)

    uf = uniform_float(bijection, r_dtype, low=0, high=1)

    module = Module(
        TEMPLATE.get_def("normal_bm"),
        render_kwds=dict(
            complex_res=dtypes.is_complex(dtype),
            r_dtype=r_dtype, r_ctype=dtypes.ctype(r_dtype),
            c_dtype=c_dtype, c_ctype=dtypes.ctype(c_dtype),
            polar_unit=functions.polar_unit(r_dtype),
            bijection=bijection,
            mean=mean,
            std=std,
            uf=uf))

    return Sampler(
        bijection, module, dtype,
        deterministic=uf.deterministic, randoms_per_call=1 if dtypes.is_complex(dtype) else 2)
Exemplo n.º 8
0
def get_procs(thr, N):
    fft = FFTFactory.create(thr, (N,), compile_=False)
    unimod_trans = Transformation(
        [Parameter('output', Annotation(Type(np.complex128, N), 'o')),
        Parameter('input', Annotation(Type(np.complex128, N), 'i'))],
        """
VSIZE_T idx = ${idxs[0]};
${input.ctype} val = ${input.load_same};
if (idx>${N}/2){
    val.x = 0.0;
    val.y = 0.0;
    ${output.store_same}(val);
}else
    ${output.store_same}(${polar_unit}(atan2(val.y, val.x)));
        """,
        render_kwds=dict(polar_unit=functions.polar_unit(dtype=np.float64), N=N)
    )
    fft.parameter.output.connect(unimod_trans, unimod_trans.input, uni=unimod_trans.output)
    fft_unimod = fft.compile(thr)
    
    mag_square = PureParallel(
        [Parameter('output', Annotation(Type(np.complex128, N), 'o')),
         Parameter('input', Annotation(Type(np.complex128, N), 'i'))],
        '''
VSIZE_T idx = ${idxs[0]};
${input.ctype} val = ${input.load_idx}(idx);  
val.x = val.x*val.x + val.y*val.y;
val.y = 0;
${output.store_idx}(idx, val);
        '''
    )
    mag_square = mag_square.compile(thr)
    
    apply_mask = PureParallel(
        [Parameter('output', Annotation(Type(np.complex128, N), 'o')),
         Parameter('origin', Annotation(Type(np.complex128, N), 'i')),
         Parameter('mask', Annotation(Type(np.double, N), 'i'))],
        '''
VSIZE_T idx = ${idxs[0]};
${output.store_idx}(idx, ${mul}(${origin.load_idx}(idx), ${mask.load_idx}(idx)));        
        ''',
        render_kwds=dict(mul=functions.mul(np.complex128, np.double))
    )
    apply_mask = apply_mask.compile(thr)
    
    combine_mag_phi = PureParallel(
        [Parameter('output', Annotation(Type(np.complex128, N), 'o')),
         Parameter('mag_square', Annotation(Type(np.complex128, N), 'i')),
         Parameter('phase', Annotation(Type(np.complex128, N), 'i'))],
        '''
VSIZE_T idx = ${idxs[0]};
double r = ${mag_square.load_idx}(idx).x;  
r = r<0.0 ? 0.0 : ${pow}(r, 0.5);
double2 v = ${phase.load_idx}(idx);
double angle = atan2(v.y, v.x);
${output.store_idx}(idx, ${polar}(r, angle));
        ''',
        render_kwds=dict(pow=functions.pow(np.double), polar=functions.polar(np.double))
    )
    combine_mag_phi = combine_mag_phi.compile(thr)
   
    return fft_unimod, mag_square, apply_mask, combine_mag_phi
Exemplo n.º 9
0
def test_polar_unit(thr, out_code, in_codes):
    out_dtype, in_dtypes = generate_dtypes(out_code, in_codes)
    check_func(thr, functions.polar_unit(in_dtypes[0]),
               lambda theta: numpy.exp(1j * theta), out_dtype, in_dtypes)
Exemplo n.º 10
0
def test_polar_unit(thr, out_code, in_codes):
    out_dtype, in_dtypes = generate_dtypes(out_code, in_codes)
    check_func(
        thr, functions.polar_unit(in_dtypes[0]),
        lambda theta: numpy.exp(1j * theta), out_dtype, in_dtypes)
Exemplo n.º 11
0
    def _build_plan(self, plan_factory, device_params, output, alpha, beta):

        plan = plan_factory()

        samples, modes = alpha.shape

        for_reduction = Type(alpha.dtype, (samples, self._max_total_clicks + 1))

        prepared_state = plan.temp_array_like(alpha)

        plan.kernel_call(
            TEMPLATE.get_def("compound_click_probability_prepare"),
            [prepared_state, alpha, beta],
            kernel_name="compound_click_probability_prepare",
            global_size=alpha.shape,
            render_kwds=dict(
                mul_cc=functions.mul(alpha.dtype, alpha.dtype),
                exp_c=functions.exp(alpha.dtype),
                ))

        # Block size is limited by the amount of available local memory.
        # In some OpenCL implementations the number reported cannot actually be fully used
        # (because it's used by kernel arguments), so we're padding it a little.
        local_mem_size = device_params.local_mem_size
        max_elems = (local_mem_size - 256) // alpha.dtype.itemsize
        block_size = 2**helpers.log2(max_elems)

        # No reason to have block size larger than the number of modes
        block_size = min(block_size, helpers.bounding_power_of_2(modes))

        products_gsize = (samples, helpers.min_blocks(self._max_total_clicks + 1, block_size) * block_size)
        products = plan.temp_array_like(for_reduction)

        read_size = min(block_size, device_params.max_work_group_size)

        while read_size > 1:

            full_steps = modes // block_size
            remainder_size = modes % block_size

            try:
                plan.kernel_call(
                    TEMPLATE.get_def("compound_click_probability_aggregate"),
                    [products, prepared_state],
                    kernel_name="compound_click_probability_aggregate",
                    global_size=products_gsize,
                    local_size=(1, read_size,),
                    render_kwds=dict(
                        block_size=block_size,
                        read_size=read_size,
                        full_steps=full_steps,
                        remainder_size=remainder_size,
                        output_size=self._max_total_clicks + 1,
                        mul_cc=functions.mul(alpha.dtype, alpha.dtype),
                        add_cc=functions.add(alpha.dtype, alpha.dtype),
                        polar_unit=functions.polar_unit(dtypes.real_for(alpha.dtype)),
                        modes=self._system.modes,
                        max_total_clicks=self._max_total_clicks,
                        ))

            except OutOfResourcesError:
                read_size //= 2

            break

        reduction = Reduce(for_reduction, predicate_sum(alpha.dtype), axes=(0,))

        temp = plan.temp_array_like(reduction.parameter.output)

        plan.computation_call(reduction, temp, products)

        fft = FFT(temp)
        real_trf = Transformation([
            Parameter('output', Annotation(output, 'o')),
            Parameter('input', Annotation(temp, 'i')),
            ],
            """
                ${input.ctype} val = ${input.load_same};
                ${output.store_same}(val.x);
                """)
        fft.parameter.output.connect(real_trf, real_trf.input, output_p=real_trf.output)

        plan.computation_call(fft, output, temp, True)

        return plan
Exemplo n.º 12
0
def get_procs(thr, N):
    fft = FFTFactory.create(thr, (N, ), compile_=False)
    unimod_trans = Transformation(
        [
            Parameter('output', Annotation(Type(np.complex128, N), 'o')),
            Parameter('input', Annotation(Type(np.complex128, N), 'i'))
        ],
        """
VSIZE_T idx = ${idxs[0]};
${input.ctype} val = ${input.load_same};
if (idx>${N}/2){
    val.x = 0.0;
    val.y = 0.0;
    ${output.store_same}(val);
}else
    ${output.store_same}(${polar_unit}(atan2(val.y, val.x)));
        """,
        render_kwds=dict(polar_unit=functions.polar_unit(dtype=np.float64),
                         N=N))
    fft.parameter.output.connect(unimod_trans,
                                 unimod_trans.input,
                                 uni=unimod_trans.output)
    fft_unimod = fft.compile(thr)

    mag_square = PureParallel([
        Parameter('output', Annotation(Type(np.complex128, N), 'o')),
        Parameter('input', Annotation(Type(np.complex128, N), 'i'))
    ], '''
VSIZE_T idx = ${idxs[0]};
${input.ctype} val = ${input.load_idx}(idx);  
val.x = val.x*val.x + val.y*val.y;
val.y = 0;
${output.store_idx}(idx, val);
        ''')
    mag_square = mag_square.compile(thr)

    apply_mask = PureParallel(
        [
            Parameter('output', Annotation(Type(np.complex128, N), 'o')),
            Parameter('origin', Annotation(Type(np.complex128, N), 'i')),
            Parameter('mask', Annotation(Type(np.double, N), 'i'))
        ],
        '''
VSIZE_T idx = ${idxs[0]};
${output.store_idx}(idx, ${mul}(${origin.load_idx}(idx), ${mask.load_idx}(idx)));        
        ''',
        render_kwds=dict(mul=functions.mul(np.complex128, np.double)))
    apply_mask = apply_mask.compile(thr)

    combine_mag_phi = PureParallel([
        Parameter('output', Annotation(Type(np.complex128, N), 'o')),
        Parameter('mag_square', Annotation(Type(np.complex128, N), 'i')),
        Parameter('phase', Annotation(Type(np.complex128, N), 'i'))
    ],
                                   '''
VSIZE_T idx = ${idxs[0]};
double r = ${mag_square.load_idx}(idx).x;  
r = r<0.0 ? 0.0 : ${pow}(r, 0.5);
double2 v = ${phase.load_idx}(idx);
double angle = atan2(v.y, v.x);
${output.store_idx}(idx, ${polar}(r, angle));
        ''',
                                   render_kwds=dict(
                                       pow=functions.pow(np.double),
                                       polar=functions.polar(np.double)))
    combine_mag_phi = combine_mag_phi.compile(thr)

    return fft_unimod, mag_square, apply_mask, combine_mag_phi