Пример #1
0
def test_dtype_support(thr, dtype):
    # Test passes if either thread correctly reports that it does not support given dtype,
    # or it successfully compiles kernel that operates with this dtype.

    N = 256

    if not thr.device_params.supports_dtype(dtype):
        pytest.skip()

    mul = functions.mul(dtype, dtype)
    div = functions.div(dtype, dtype)
    program = thr.compile(
    """
    KERNEL void test(
        GLOBAL_MEM ${ctype} *dest, GLOBAL_MEM ${ctype} *a, GLOBAL_MEM ${ctype} *b)
    {
      const SIZE_T i = get_global_id(0);
      ${ctype} temp = ${mul}(a[i], b[i]);
      dest[i] = ${div}(temp, b[i]);
    }
    """, render_kwds=dict(ctype=dtypes.ctype(dtype), dtype=dtype, mul=mul, div=div))

    test = program.test

    # we need results to fit even in unsigned char
    a = get_test_array(N, dtype, high=8)
    b = get_test_array(N, dtype, no_zeros=True, high=8)

    a_dev = thr.to_device(a)
    b_dev = thr.to_device(b)
    dest_dev = thr.empty_like(a_dev)
    test(dest_dev, a_dev, b_dev, global_size=N)
    assert diff_is_negligible(thr.from_device(dest_dev), a)
Пример #2
0
def get_nonlinear3(state_arr, scalar_dtype, nonlinear_module, dt):
    # k4 = N(D(psi_4), t + dt)
    # output = D(psi_k) + k4 / 6
    return PureParallel(
        [
            Parameter('output', Annotation(state_arr, 'o')),
            Parameter('kprop_psi_k', Annotation(state_arr, 'i')),
            Parameter('kprop_psi_4', Annotation(state_arr, 'i')),
            Parameter('t', Annotation(scalar_dtype))],
        """
        <%
            all_indices = ', '.join(idxs)
        %>

        ${output.ctype} psi4_0 = ${kprop_psi_4.load_idx}(0, ${all_indices});
        ${output.ctype} psi4_1 = ${kprop_psi_4.load_idx}(1, ${all_indices});
        ${output.ctype} psik_0 = ${kprop_psi_k.load_idx}(0, ${all_indices});
        ${output.ctype} psik_1 = ${kprop_psi_k.load_idx}(1, ${all_indices});

        ${output.ctype} k4_0 = ${nonlinear}0(psi4_0, psi4_1, ${t} + ${dt});
        ${output.ctype} k4_1 = ${nonlinear}1(psi4_0, psi4_1, ${t} + ${dt});

        ${output.store_idx}(0, ${all_indices}, psik_0 + ${div}(k4_0, 6));
        ${output.store_idx}(1, ${all_indices}, psik_1 + ${div}(k4_1, 6));
        """,
        guiding_array=state_arr.shape[1:],
        render_kwds=dict(
            nonlinear=nonlinear_module,
            dt=dtypes.c_constant(dt, scalar_dtype),
            div=functions.div(state_arr.dtype, numpy.int32, out_dtype=state_arr.dtype)))
Пример #3
0
def get_nonlinear3(state_arr, scalar_dtype, nonlinear_module, dt):
    # k4 = N(D(psi_4), t + dt)
    # output = D(psi_k) + k4 / 6
    return PureParallel([
        Parameter('output', Annotation(state_arr, 'o')),
        Parameter('kprop_psi_k', Annotation(state_arr, 'i')),
        Parameter('kprop_psi_4', Annotation(state_arr, 'i')),
        Parameter('t', Annotation(scalar_dtype))
    ],
                        """
        <%
            all_indices = ', '.join(idxs)
        %>

        ${output.ctype} psi4_0 = ${kprop_psi_4.load_idx}(0, ${all_indices});
        ${output.ctype} psi4_1 = ${kprop_psi_4.load_idx}(1, ${all_indices});
        ${output.ctype} psik_0 = ${kprop_psi_k.load_idx}(0, ${all_indices});
        ${output.ctype} psik_1 = ${kprop_psi_k.load_idx}(1, ${all_indices});

        ${output.ctype} k4_0 = ${nonlinear}0(psi4_0, psi4_1, ${t} + ${dt});
        ${output.ctype} k4_1 = ${nonlinear}1(psi4_0, psi4_1, ${t} + ${dt});

        ${output.store_idx}(0, ${all_indices}, psik_0 + ${div}(k4_0, 6));
        ${output.store_idx}(1, ${all_indices}, psik_1 + ${div}(k4_1, 6));
        """,
                        guiding_array=state_arr.shape[1:],
                        render_kwds=dict(
                            nonlinear=nonlinear_module,
                            dt=dtypes.c_constant(dt, scalar_dtype),
                            div=functions.div(state_arr.dtype,
                                              numpy.int32,
                                              out_dtype=state_arr.dtype)))
Пример #4
0
def get_nonlinear3(state_arr, potential_arr, scalar_dtype, nonlinear_module, dt):
    # k4 = N(D(psi_4), t + dt)
    # output = D(psi_k) + k4 / 6
    return PureParallel(
        [
            Parameter('output', Annotation(state_arr, 'o')),
            Parameter('kprop_psi_k', Annotation(state_arr, 'i')),
            Parameter('kprop_psi_4', Annotation(state_arr, 'i')),
            Parameter('potential_next', Annotation(potential_arr, 'i')),
            Parameter('t', Annotation(scalar_dtype))],
        """
        %for comp in range(components):
        ${output.ctype} psi4_${comp} = ${kprop_psi_4.load_idx}(${comp}, ${idxs.all()});
        ${output.ctype} psik_${comp} = ${kprop_psi_k.load_idx}(${comp}, ${idxs.all()});
        %endfor
        ${potential_next.ctype} V = ${potential_next.load_idx}(${', '.join(idxs[1:])});

        %for comp in range(components):
        ${output.ctype} k4_${comp} = ${nonlinear}${comp}(
            %for pcomp in range(components):
            psi4_${pcomp},
            %endfor
            V, ${t} + ${dt});
        ${output.store_idx}(${comp}, ${idxs.all()}, psik_${comp} + ${div}(k4_${comp}, 6));
        %endfor
        """,
        guiding_array=state_arr.shape[1:],
        render_kwds=dict(
            components=state_arr.shape[0],
            nonlinear=nonlinear_module,
            dt=dtypes.c_constant(dt, scalar_dtype),
            div=functions.div(state_arr.dtype, numpy.int32, out_dtype=state_arr.dtype)))
Пример #5
0
def test_dtype_support(thr, dtype):
    # Test passes if either thread correctly reports that it does not support given dtype,
    # or it successfully compiles kernel that operates with this dtype.

    N = 256

    if not thr.device_params.supports_dtype(dtype):
        pytest.skip()

    mul = functions.mul(dtype, dtype)
    div = functions.div(dtype, dtype)
    program = thr.compile(
    """
    KERNEL void test(
        GLOBAL_MEM ${ctype} *dest, GLOBAL_MEM ${ctype} *a, GLOBAL_MEM ${ctype} *b)
    {
      const SIZE_T i = get_global_id(0);
      ${ctype} temp = ${mul}(a[i], b[i]);
      dest[i] = ${div}(temp, b[i]);
    }
    """, render_kwds=dict(ctype=dtypes.ctype(dtype), dtype=dtype, mul=mul, div=div))

    test = program.test

    # we need results to fit even in unsigned char
    a = get_test_array(N, dtype, high=8)
    b = get_test_array(N, dtype, no_zeros=True, high=8)

    a_dev = thr.to_device(a)
    b_dev = thr.to_device(b)
    dest_dev = thr.empty_like(a_dev)
    test(dest_dev, a_dev, b_dev, global_size=N)
    assert diff_is_negligible(thr.from_device(dest_dev), a)
Пример #6
0
def get_nonlinear2(state_arr, scalar_dtype, nonlinear_module, dt):
    # k2 = N(psi_I + k1 / 2, t + dt / 2)
    # k3 = N(psi_I + k2 / 2, t + dt / 2)
    # psi_4 = psi_I + k3 (argument for the 4-th step k-propagation)
    # psi_k = psi_I + (k1 + 2(k2 + k3)) / 6 (argument for the final k-propagation)
    return PureParallel([
        Parameter('psi_k', Annotation(state_arr, 'o')),
        Parameter('psi_4', Annotation(state_arr, 'o')),
        Parameter('psi_I', Annotation(state_arr, 'i')),
        Parameter('k1', Annotation(state_arr, 'i')),
        Parameter('t', Annotation(scalar_dtype))
    ],
                        """
        <%
            all_indices = ', '.join(idxs)
        %>

        ${psi_k.ctype} psi_I_0 = ${psi_I.load_idx}(0, ${all_indices});
        ${psi_k.ctype} psi_I_1 = ${psi_I.load_idx}(1, ${all_indices});
        ${psi_k.ctype} k1_0 = ${k1.load_idx}(0, ${all_indices});
        ${psi_k.ctype} k1_1 = ${k1.load_idx}(1, ${all_indices});

        ${psi_k.ctype} k2_0 = ${nonlinear}0(
            psi_I_0 + ${div}(k1_0, 2),
            psi_I_1 + ${div}(k1_1, 2),
            ${t} + ${dt} / 2);
        ${psi_k.ctype} k2_1 = ${nonlinear}1(
            psi_I_0 + ${div}(k1_0, 2),
            psi_I_1 + ${div}(k1_1, 2),
            ${t} + ${dt} / 2);

        ${psi_k.ctype} k3_0 = ${nonlinear}0(
            psi_I_0 + ${div}(k2_0, 2),
            psi_I_1 + ${div}(k2_1, 2),
            ${t} + ${dt} / 2);
        ${psi_k.ctype} k3_1 = ${nonlinear}1(
            psi_I_0 + ${div}(k2_0, 2),
            psi_I_1 + ${div}(k2_1, 2),
            ${t} + ${dt} / 2);

        ${psi_4.store_idx}(0, ${all_indices}, psi_I_0 + k3_0);
        ${psi_4.store_idx}(1, ${all_indices}, psi_I_1 + k3_1);

        ${psi_k.store_idx}(
            0, ${all_indices},
            psi_I_0 + ${div}(k1_0, 6) + ${div}(k2_0, 3) + ${div}(k3_0, 3));
        ${psi_k.store_idx}(
            1, ${all_indices},
            psi_I_1 + ${div}(k1_1, 6) + ${div}(k2_1, 3) + ${div}(k3_1, 3));
        """,
                        guiding_array=state_arr.shape[1:],
                        render_kwds=dict(
                            nonlinear=nonlinear_module,
                            dt=dtypes.c_constant(dt, scalar_dtype),
                            div=functions.div(state_arr.dtype,
                                              numpy.int32,
                                              out_dtype=state_arr.dtype)))
Пример #7
0
def get_nonlinear2(state_arr, scalar_dtype, nonlinear_module, dt):
    # k2 = N(psi_I + k1 / 2, t + dt / 2)
    # k3 = N(psi_I + k2 / 2, t + dt / 2)
    # psi_4 = psi_I + k3 (argument for the 4-th step k-propagation)
    # psi_k = psi_I + (k1 + 2(k2 + k3)) / 6 (argument for the final k-propagation)
    return PureParallel(
        [
            Parameter('psi_k', Annotation(state_arr, 'o')),
            Parameter('psi_4', Annotation(state_arr, 'o')),
            Parameter('psi_I', Annotation(state_arr, 'i')),
            Parameter('k1', Annotation(state_arr, 'i')),
            Parameter('t', Annotation(scalar_dtype))],
        """
        <%
            all_indices = ', '.join(idxs)
        %>

        ${psi_k.ctype} psi_I_0 = ${psi_I.load_idx}(0, ${all_indices});
        ${psi_k.ctype} psi_I_1 = ${psi_I.load_idx}(1, ${all_indices});
        ${psi_k.ctype} k1_0 = ${k1.load_idx}(0, ${all_indices});
        ${psi_k.ctype} k1_1 = ${k1.load_idx}(1, ${all_indices});

        ${psi_k.ctype} k2_0 = ${nonlinear}0(
            psi_I_0 + ${div}(k1_0, 2),
            psi_I_1 + ${div}(k1_1, 2),
            ${t} + ${dt} / 2);
        ${psi_k.ctype} k2_1 = ${nonlinear}1(
            psi_I_0 + ${div}(k1_0, 2),
            psi_I_1 + ${div}(k1_1, 2),
            ${t} + ${dt} / 2);

        ${psi_k.ctype} k3_0 = ${nonlinear}0(
            psi_I_0 + ${div}(k2_0, 2),
            psi_I_1 + ${div}(k2_1, 2),
            ${t} + ${dt} / 2);
        ${psi_k.ctype} k3_1 = ${nonlinear}1(
            psi_I_0 + ${div}(k2_0, 2),
            psi_I_1 + ${div}(k2_1, 2),
            ${t} + ${dt} / 2);

        ${psi_4.store_idx}(0, ${all_indices}, psi_I_0 + k3_0);
        ${psi_4.store_idx}(1, ${all_indices}, psi_I_1 + k3_1);

        ${psi_k.store_idx}(
            0, ${all_indices},
            psi_I_0 + ${div}(k1_0, 6) + ${div}(k2_0, 3) + ${div}(k3_0, 3));
        ${psi_k.store_idx}(
            1, ${all_indices},
            psi_I_1 + ${div}(k1_1, 6) + ${div}(k2_1, 3) + ${div}(k3_1, 3));
        """,
        guiding_array=state_arr.shape[1:],
        render_kwds=dict(
            nonlinear=nonlinear_module,
            dt=dtypes.c_constant(dt, scalar_dtype),
            div=functions.div(state_arr.dtype, numpy.int32, out_dtype=state_arr.dtype)))
Пример #8
0
def div_param(arr_t, param_dtype):
    """
    Returns a scaling transformation with a dynamic parameter (1 output, 1 input, 1 scalar):
    ``output = input / param``.
    """
    return Transformation(
        [Parameter('output', Annotation(arr_t, 'o')),
        Parameter('input', Annotation(arr_t, 'i')),
        Parameter('param', Annotation(param_dtype))],
        "${output.store_same}(${div}(${input.load_same}, ${param}));",
        render_kwds=dict(div=functions.div(arr_t.dtype, param_dtype, out_dtype=arr_t.dtype)))
Пример #9
0
def get_common_kwds(dtype, device_params):
    return dict(
        dtype=dtype,
        min_mem_coalesce_width=device_params.min_mem_coalesce_width[dtype.itemsize],
        local_mem_banks=device_params.local_mem_banks,
        get_padding=get_padding,
        wrap_const=lambda x: dtypes.c_constant(x, dtypes.real_for(dtype)),
        min_blocks=helpers.min_blocks,
        mul=functions.mul(dtype, dtype),
        polar_unit=functions.polar_unit(dtypes.real_for(dtype)),
        cdivs=functions.div(dtype, numpy.uint32, out_dtype=dtype))
Пример #10
0
def get_nonlinear3(state_type, nonlinear_wrapper, components, diffusion=None, noise_type=None):

    real_dtype = dtypes.real_for(state_type.dtype)

    # k4 = N(D(psi_4), t + dt)
    # output = D(psi_k) + k4 / 6
    return PureParallel(
        [
            Parameter('output', Annotation(state_type, 'o')),
            Parameter('kprop_psi_k', Annotation(state_type, 'i')),
            Parameter('kprop_psi_4', Annotation(state_type, 'i'))]
            + ([Parameter('dW', Annotation(noise_type, 'i'))] if diffusion is not None else []) +
            [Parameter('t', Annotation(real_dtype)),
            Parameter('dt', Annotation(real_dtype))],
        """
        <%
            if diffusion is None:
                dW = None

            coords = ", ".join(idxs[1:])
            trajectory = idxs[0]

            args = lambda prefix, num: list(map(lambda i: prefix + str(i), range(num)))
            dW_args = args('dW_', diffusion.noise_sources) if diffusion is not None else []
            k4_args = ", ".join(idxs[1:] + args('psi4_', components) + dW_args)
        %>

        %for comp in range(components):
        ${output.ctype} psi4_${comp} = ${kprop_psi_4.load_idx}(${trajectory}, ${comp}, ${coords});
        ${output.ctype} psik_${comp} = ${kprop_psi_k.load_idx}(${trajectory}, ${comp}, ${coords});
        %endfor

        %if diffusion is not None:
        %for ncomp in range(diffusion.noise_sources):
        ${dW.ctype} dW_${ncomp} = ${dW.load_idx}(${trajectory}, ${ncomp}, ${coords});
        %endfor
        %endif

        %for comp in range(components):
        ${output.ctype} k4_${comp} = ${nonlinear}${comp}(${k4_args}, ${t} + ${dt}, ${dt});
        %endfor

        %for comp in range(components):
        ${output.store_idx}(
            ${trajectory}, ${comp}, ${coords},
            psik_${comp} + ${div}(k4_${comp}, 6));
        %endfor
        """,
        guiding_array=(state_type.shape[0],) + state_type.shape[2:],
        render_kwds=dict(
            components=components,
            nonlinear=nonlinear_wrapper,
            diffusion=diffusion,
            div=functions.div(state_type.dtype, numpy.int32, out_dtype=state_type.dtype)))
Пример #11
0
def div_param(arr_t, param_dtype):
    """
    Returns a scaling transformation with a dynamic parameter (1 output, 1 input, 1 scalar):
    ``output = input / param``.
    """
    return Transformation(
        [Parameter('output', Annotation(arr_t, 'o')),
        Parameter('input', Annotation(arr_t, 'i')),
        Parameter('param', Annotation(param_dtype))],
        "${output.store_same}(${div}(${input.load_same}, ${param}));",
        render_kwds=dict(div=functions.div(arr_t.dtype, param_dtype, out_dtype=arr_t.dtype)))
Пример #12
0
def get_common_kwds(dtype, device_params):
    return dict(
        dtype=dtype,
        min_mem_coalesce_width=device_params.min_mem_coalesce_width[dtype.itemsize],
        local_mem_banks=device_params.local_mem_banks,
        get_padding=get_padding,
        wrap_const=lambda x: dtypes.c_constant(x, dtypes.real_for(dtype)),
        min_blocks=helpers.min_blocks,
        mul=functions.mul(dtype, dtype),
        polar_unit=functions.polar_unit(dtypes.real_for(dtype)),
        cdivs=functions.div(dtype, numpy.uint32, out_dtype=dtype))
Пример #13
0
def div_const(arr_t, param):
    """
    Returns a scaling transformation with a fixed parameter (1 output, 1 input):
    ``output = input / param``.
    """
    param_dtype = dtypes.detect_type(param)
    return Transformation(
        [Parameter('output', Annotation(arr_t, 'o')),
        Parameter('input', Annotation(arr_t, 'i'))],
        "${output.store_same}(${div}(${input.load_same}, ${param}));",
        render_kwds=dict(
            div=functions.div(arr_t.dtype, param_dtype, out_dtype=arr_t.dtype),
            param=dtypes.c_constant(param, dtype=param_dtype)))
Пример #14
0
def div_const(arr_t, param):
    """
    Returns a scaling transformation with a fixed parameter (1 output, 1 input):
    ``output = input / param``.
    """
    param_dtype = dtypes.detect_type(param)
    return Transformation(
        [Parameter('output', Annotation(arr_t, 'o')),
        Parameter('input', Annotation(arr_t, 'i'))],
        "${output.store_same}(${div}(${input.load_same}, ${param}));",
        render_kwds=dict(
            div=functions.div(arr_t.dtype, param_dtype, out_dtype=arr_t.dtype),
            param=dtypes.c_constant(param, dtype=param_dtype)))
Пример #15
0
def get_nonlinear2(state_arr, potential_arr, scalar_dtype, nonlinear_module, dt):
    # k2 = N(psi_I + k1 / 2, t + dt / 2)
    # k3 = N(psi_I + k2 / 2, t + dt / 2)
    # psi_4 = psi_I + k3 (argument for the 4-th step k-propagation)
    # psi_k = psi_I + (k1 + 2(k2 + k3)) / 6 (argument for the final k-propagation)
    return PureParallel(
        [
            Parameter('psi_k', Annotation(state_arr, 'o')),
            Parameter('psi_4', Annotation(state_arr, 'o')),
            Parameter('psi_I', Annotation(state_arr, 'i')),
            Parameter('k1', Annotation(state_arr, 'i')),
            Parameter('potential_half', Annotation(potential_arr, 'i')),
            Parameter('t', Annotation(scalar_dtype))],
        """
        %for comp in range(components):
        ${psi_k.ctype} psi_I_${comp} = ${psi_I.load_idx}(${comp}, ${idxs.all()});
        ${psi_k.ctype} k1_${comp} = ${k1.load_idx}(${comp}, ${idxs.all()});
        %endfor
        ${potential_half.ctype} V = ${potential_half.load_idx}(${', '.join(idxs[1:])});

        %for comp in range(components):
        ${psi_k.ctype} k2_${comp} = ${nonlinear}${comp}(
            %for pcomp in range(components):
            psi_I_${pcomp} + ${div}(k1_${pcomp}, 2),
            %endfor
            V, ${t} + ${dt} / 2);
        %endfor

        %for comp in range(components):
        ${psi_k.ctype} k3_${comp} = ${nonlinear}${comp}(
            %for pcomp in range(components):
            psi_I_${pcomp} + ${div}(k2_${pcomp}, 2),
            %endfor
            V, ${t} + ${dt} / 2);
        %endfor

        %for comp in range(components):
        ${psi_4.store_idx}(${comp}, ${idxs.all()}, psi_I_${comp} + k3_${comp});
        ${psi_k.store_idx}(
            ${comp}, ${idxs.all()},
            psi_I_${comp} + ${div}(k1_${comp}, 6) + ${div}(k2_${comp}, 3) + ${div}(k3_${comp}, 3));
        %endfor
        """,
        guiding_array=state_arr.shape[1:],
        render_kwds=dict(
            components=state_arr.shape[0],
            nonlinear=nonlinear_module,
            dt=dtypes.c_constant(dt, scalar_dtype),
            div=functions.div(state_arr.dtype, numpy.int32, out_dtype=state_arr.dtype)))
Пример #16
0
    def _build_plan(self, plan_factory, device_params, C, D, coeff1, coeff2):
        plan = plan_factory()
        nested = Dummy(C, D, coeff1, same_A_B=True)

        C_temp = plan.temp_array_like(C)
        D_temp = plan.temp_array_like(D)

        # Testing a computation call which uses the same argument for two parameters.
        plan.computation_call(nested, C_temp, D, C, C, coeff1)

        arr_dtype = C.dtype
        coeff_dtype = coeff2.dtype

        mul = functions.mul(arr_dtype, coeff_dtype)
        div = functions.div(arr_dtype, coeff_dtype)

        template = template_from(
            """
        <%def name="dummy(kernel_declaration, CC, C, D, coeff)">
        ${kernel_declaration}
        {
            VIRTUAL_SKIP_THREADS;
            VSIZE_T idx0 = virtual_global_id(0);
            VSIZE_T idx1 = virtual_global_id(1);

            ${CC.store_idx}(idx0, idx1,
                ${C.load_idx}(idx0, idx1) +
                ${mul}(${D.load_idx}(idx0, idx1), ${coeff}));
        }
        </%def>
        """
        )

        # Testing a kernel call which uses the same argument for two parameters.
        plan.kernel_call(
            template.get_def("dummy"), [C, C_temp, C_temp, coeff2], global_size=C.shape, render_kwds=dict(mul=mul)
        )

        return plan
Пример #17
0
    def _build_plan(self, plan_factory, device_params, C, D, coeff1, coeff2):
        plan = plan_factory()
        nested = Dummy(C, D, coeff1, same_A_B=True)

        C_temp = plan.temp_array_like(C)
        D_temp = plan.temp_array_like(D)

        # Testing a computation call which uses the same argument for two parameters.
        plan.computation_call(nested, C_temp, D, C, C, coeff1)

        arr_dtype = C.dtype
        coeff_dtype = coeff2.dtype

        mul = functions.mul(arr_dtype, coeff_dtype)
        div = functions.div(arr_dtype, coeff_dtype)

        template = template_from("""
        <%def name="dummy(kernel_declaration, CC, C, D, coeff)">
        ${kernel_declaration}
        {
            VIRTUAL_SKIP_THREADS;
            VSIZE_T idx0 = virtual_global_id(0);
            VSIZE_T idx1 = virtual_global_id(1);

            ${CC.store_idx}(idx0, idx1,
                ${C.load_idx}(idx0, idx1) +
                ${mul}(${D.load_idx}(idx0, idx1), ${coeff}));
        }
        </%def>
        """)

        # Testing a kernel call which uses the same argument for two parameters.
        plan.kernel_call(template.get_def('dummy'),
                         [C, C_temp, C_temp, coeff2],
                         global_size=C.shape,
                         render_kwds=dict(mul=mul))

        return plan
Пример #18
0
def get_nonlinear2(state_type, nonlinear_wrapper, components, diffusion=None, noise_type=None):

    real_dtype = dtypes.real_for(state_type.dtype)

    # k2 = N(psi_I + k1 / 2, t + dt / 2)
    # k3 = N(psi_I + k2 / 2, t + dt / 2)
    # psi_4 = psi_I + k3 (argument for the 4-th step k-propagation)
    # psi_k = psi_I + (k1 + 2(k2 + k3)) / 6 (argument for the final k-propagation)
    return PureParallel(
        [
            Parameter('psi_k', Annotation(state_type, 'o')),
            Parameter('psi_4', Annotation(state_type, 'o')),
            Parameter('psi_I', Annotation(state_type, 'i')),
            Parameter('k1', Annotation(state_type, 'i'))]
            + ([Parameter('dW', Annotation(noise_type, 'i'))] if diffusion is not None else []) +
            [Parameter('t', Annotation(real_dtype)),
            Parameter('dt', Annotation(real_dtype))],
        """
        <%
            if diffusion is None:
                dW = None

            coords = ", ".join(idxs[1:])
            trajectory = idxs[0]

            args = lambda prefix, num: ", ".join(map(lambda i: prefix + str(i), range(num)))
            dW_args = (args('dW_', diffusion.noise_sources) + ",") if diffusion is not None else ""
        %>

        %for comp in range(components):
        ${psi_k.ctype} psi_I_${comp} = ${psi_I.load_idx}(${trajectory}, ${comp}, ${coords});
        ${psi_k.ctype} k1_${comp} = ${k1.load_idx}(${trajectory}, ${comp}, ${coords});
        %endfor

        %if diffusion is not None:
        %for ncomp in range(diffusion.noise_sources):
        ${dW.ctype} dW_${ncomp} = ${dW.load_idx}(${trajectory}, ${ncomp}, ${coords});
        %endfor
        %endif

        %for comp in range(components):
        ${psi_k.ctype} k2_${comp} = ${nonlinear}${comp}(
            ${coords},
            %for c in range(components):
            psi_I_${c} + ${div}(k1_${c}, 2),
            %endfor
            ${dW_args}
            ${t} + ${dt} / 2, ${dt});
        %endfor

        %for comp in range(components):
        ${psi_k.ctype} k3_${comp} = ${nonlinear}${comp}(
            ${coords},
            %for c in range(components):
            psi_I_${c} + ${div}(k2_${c}, 2),
            %endfor
            ${dW_args}
            ${t} + ${dt} / 2, ${dt});
        %endfor

        %for comp in range(components):
        ${psi_4.store_idx}(${trajectory}, ${comp}, ${coords}, psi_I_${comp} + k3_${comp});
        %endfor

        %for comp in range(components):
        ${psi_k.store_idx}(
            ${trajectory}, ${comp}, ${coords},
            psi_I_${comp} + ${div}(k1_${comp}, 6) + ${div}(k2_${comp}, 3) + ${div}(k3_${comp}, 3));
        %endfor
        """,
        guiding_array=(state_type.shape[0],) + state_type.shape[2:],
        render_kwds=dict(
            components=components,
            nonlinear=nonlinear_wrapper,
            diffusion=diffusion,
            div=functions.div(state_type.dtype, numpy.int32, out_dtype=state_type.dtype)))
Пример #19
0
def test_div(thr, out_code, in_codes):
    out_dtype, in_dtypes = generate_dtypes(out_code, in_codes)
    check_func(
        thr, functions.div(*in_dtypes, out_dtype=out_dtype),
        lambda x, y: dtypes.cast(out_dtype)(x / y), out_dtype, in_dtypes)
Пример #20
0
def test_div(thr, out_code, in_codes):
    out_dtype, in_dtypes = generate_dtypes(out_code, in_codes)
    check_func(thr, functions.div(*in_dtypes, out_dtype=out_dtype),
               lambda x, y: dtypes.cast(out_dtype)(x / y), out_dtype,
               in_dtypes)
Пример #21
0
    def _build_plan(self, plan_factory, device_params, C, D, A, B, coeff):
        plan = plan_factory()

        arr_dtype = C.dtype
        coeff_dtype = coeff.dtype

        mul = functions.mul(arr_dtype, coeff_dtype)
        div = functions.div(arr_dtype, coeff_dtype)

        template = template_from(
            """
        <%def name="dummy(kernel_declaration, C, D, A, B, coeff)">
        ${kernel_declaration}
        {
            VIRTUAL_SKIP_THREADS;
            VSIZE_T idx0 = virtual_global_id(0);
            VSIZE_T idx1 = virtual_global_id(1);

            ${A.ctype} a = ${A.load_idx}(idx0, idx1);
            ${C.ctype} c = ${mul}(a, ${coeff});
            ${C.store_idx}(idx1, idx0, c);

            %if same_A_B:
                ${B.ctype} b = ${B.load_idx}(idx0, idx1);
                ${D.ctype} d = ${div}(b, ${coeff});
                ${D.store_idx}(idx0, idx1, d);
            %else:
            if (idx1 == 0)
            {
                ${B.ctype} b = ${B.load_idx}(idx0);
                ${D.ctype} d = ${div}(b, ${coeff});
                ${D.store_idx}(idx0, d);
            }
            %endif
        }
        </%def>

        <%def name="dummy2(kernel_declaration, CC, DD, C, D, pers_arr, const_coeff)">
        ${kernel_declaration}
        {
            VIRTUAL_SKIP_THREADS;
            VSIZE_T idx0 = virtual_global_id(0);
            VSIZE_T idx1 = virtual_global_id(1);

            ${CC.store_idx}(idx0, idx1, ${C.load_idx}(idx0, idx1));

            %if same_A_B:
                ${DD.store_idx}(
                    idx0, idx1,
                    ${mul}(${D.load_idx}(idx0, idx1), ${const_coeff}) +
                        ${pers_arr.load_idx}(idx0, idx1));
            %else:
            if (idx1 == 0)
            {
                ${DD.store_idx}(
                    idx0,
                    ${mul}(${D.load_idx}(idx0), ${const_coeff}) +
                        ${pers_arr.load_idx}(idx0));
            }
            %endif
        }
        </%def>
        """
        )

        block_size = 8

        C_temp = plan.temp_array_like(C)
        D_temp = plan.temp_array_like(D)
        arr = plan.persistent_array(self._persistent_array)

        plan.kernel_call(
            template.get_def("dummy"),
            [C_temp, D_temp, A, B, coeff],
            global_size=A.shape,
            local_size=(block_size, block_size),
            render_kwds=dict(mul=mul, div=div, same_A_B=self._same_A_B),
        )

        plan.kernel_call(
            template.get_def("dummy2"),
            [
                C,
                D,
                C_temp,
                D_temp,
                (self._persistent_array if self._test_kernel_adhoc_array else arr),
                (10 if self._test_untyped_scalar else numpy.float32(10)),
            ],
            global_size=A.shape,
            local_size=(block_size, block_size),
            render_kwds=dict(mul=mul, same_A_B=self._same_A_B),
        )

        return plan
Пример #22
0
    def _build_plan(self, plan_factory, device_params, C, D, A, B, coeff):
        plan = plan_factory()

        arr_dtype = C.dtype
        coeff_dtype = coeff.dtype

        mul = functions.mul(arr_dtype, coeff_dtype)
        div = functions.div(arr_dtype, coeff_dtype)

        template = template_from("""
        <%def name="dummy(kernel_declaration, C, D, A, B, coeff)">
        ${kernel_declaration}
        {
            VIRTUAL_SKIP_THREADS;
            VSIZE_T idx0 = virtual_global_id(0);
            VSIZE_T idx1 = virtual_global_id(1);

            ${A.ctype} a = ${A.load_idx}(idx0, idx1);
            ${C.ctype} c = ${mul}(a, ${coeff});
            ${C.store_idx}(idx1, idx0, c);

            %if same_A_B:
                ${B.ctype} b = ${B.load_idx}(idx0, idx1);
                ${D.ctype} d = ${div}(b, ${coeff});
                ${D.store_idx}(idx0, idx1, d);
            %else:
            if (idx1 == 0)
            {
                ${B.ctype} b = ${B.load_idx}(idx0);
                ${D.ctype} d = ${div}(b, ${coeff});
                ${D.store_idx}(idx0, d);
            }
            %endif
        }
        </%def>

        <%def name="dummy2(kernel_declaration, CC, DD, C, D, pers_arr, const_coeff)">
        ${kernel_declaration}
        {
            VIRTUAL_SKIP_THREADS;
            VSIZE_T idx0 = virtual_global_id(0);
            VSIZE_T idx1 = virtual_global_id(1);

            ${CC.store_idx}(idx0, idx1, ${C.load_idx}(idx0, idx1));

            %if same_A_B:
                ${DD.store_idx}(
                    idx0, idx1,
                    ${mul}(${D.load_idx}(idx0, idx1), ${const_coeff}) +
                        ${pers_arr.load_idx}(idx0, idx1));
            %else:
            if (idx1 == 0)
            {
                ${DD.store_idx}(
                    idx0,
                    ${mul}(${D.load_idx}(idx0), ${const_coeff}) +
                        ${pers_arr.load_idx}(idx0));
            }
            %endif
        }
        </%def>
        """)

        block_size = 8

        C_temp = plan.temp_array_like(C)
        D_temp = plan.temp_array_like(D)
        arr = plan.persistent_array(self._persistent_array)

        plan.kernel_call(template.get_def('dummy'),
                         [C_temp, D_temp, A, B, coeff],
                         global_size=A.shape,
                         local_size=(block_size, block_size),
                         render_kwds=dict(mul=mul,
                                          div=div,
                                          same_A_B=self._same_A_B))

        plan.kernel_call(template.get_def('dummy2'), [
            C, D, C_temp, D_temp,
            (self._persistent_array if self._test_kernel_adhoc_array else arr),
            (10 if self._test_untyped_scalar else numpy.float32(10))
        ],
                         global_size=A.shape,
                         local_size=(block_size, block_size),
                         render_kwds=dict(mul=mul, same_A_B=self._same_A_B))

        return plan