Python exp примеры, reikna.cluda.functions.exp Python примеры использования

Пример #1

0

Показать файл

Файл: generate_gpu.py Проект: fjarri/squeezed-sim

    def _build_plan(self, plan_factory, device_params, alpha, beta, seed):
        plan = plan_factory()

        bijection = philox(64, 2)

        # Keeping the kernel the same so it can be cached.
        # The seed will be passed as the computation parameter instead.
        keygen = KeyGenerator.create(bijection, seed=numpy.int32(0))

        sampler = normal_bm(bijection, numpy.float64)

        squeezing = plan.persistent_array(self._system.squeezing)
        decoherence = plan.persistent_array(self._system.decoherence)

        plan.kernel_call(TEMPLATE.get_def("generate_input_state"),
                         [alpha, beta, squeezing, decoherence, seed],
                         kernel_name="generate",
                         global_size=alpha.shape,
                         render_kwds=dict(
                             system=self._system,
                             representation=self._representation,
                             Representation=Representation,
                             bijection=bijection,
                             keygen=keygen,
                             sampler=sampler,
                             ordering=ordering,
                             exp=functions.exp(numpy.float64),
                             mul_cr=functions.mul(numpy.complex128,
                                                  numpy.float64),
                             add_cc=functions.add(numpy.complex128,
                                                  numpy.complex128),
                         ))

        return plan

Пример #2

0

Показать файл

    def _build_plan(self, plan_factory, device_params, output, alpha, beta):

        plan = plan_factory()

        for_reduction = Type(numpy.float64, alpha.shape)

        meter_trf = Transformation([
            Parameter('output', Annotation(for_reduction, 'o')),
            Parameter('alpha', Annotation(alpha, 'i')),
            Parameter('beta', Annotation(beta, 'i')),
            ],
            """
                ${alpha.ctype} alpha = ${alpha.load_same};
                ${beta.ctype} beta = ${beta.load_same};
                ${alpha.ctype} t = ${mul_cc}(alpha, beta);
                ${alpha.ctype} np = ${exp_c}(COMPLEX_CTR(${alpha.ctype})(-t.x, -t.y));
                ${alpha.ctype} cp = COMPLEX_CTR(${alpha.ctype})(1 - np.x, -np.y);
                ${output.store_same}(cp.x);
                """,
            render_kwds=dict(
                mul_cc=functions.mul(alpha.dtype, alpha.dtype),
                exp_c=functions.exp(alpha.dtype),
                ))

        reduction = Reduce(for_reduction, predicate_sum(output.dtype), axes=(0,))
        reduction.parameter.input.connect(
            meter_trf, meter_trf.output, alpha_p=meter_trf.alpha, beta_p=meter_trf.beta)

        plan.computation_call(reduction, output, alpha, beta)

        return plan

Пример #3

0

Показать файл

def kspacegaussian_filter_CL(ksp, sigma):
    from reikna import cluda
    from reikna.cluda import functions, dtypes
    sz = np.array(ksp.shape)
    dtype = np.complex64
    ftype = np.float32
    api = cluda.ocl_api()
    thr = api.Thread.create()
    FACTOR = 1.0
    program = thr.compile("""
KERNEL void gauss_kernel(
    GLOBAL_MEM ${ctype} *dest,
    GLOBAL_MEM ${ctype} *src)
{
  const ${ultype} x = (${ultype}) get_global_id(0);
  const SIZE_T dim1= %d;
  const SIZE_T dim2= %d;
  const SIZE_T dim3= %d;                    
  ${ftype} sigma[3];
  sigma[0]=%f;sigma[1]=%f;sigma[2]=%f;
  ${ftype} factor = %f;            
  const double TWOPISQ = 19.739208802178716; //6.283185307179586;  //2*3.141592;
  const ${ftype} SQRT2PI = 2.5066282746;
  const double CUBEDSQRT2PI = 15.749609945722419;
  const ${ultype} idx = x;
  ${ftype} i = (${ftype})((x / dim3) / dim2);
      i = (i - (${ftype})floor((${ftype})(dim1)/2.0))/(${ftype})(dim1);
  ${ftype} j = (${ftype})(x / dim3);
      if((SIZE_T)j > dim2) {j=(${ftype})fmod(j, (${ftype})dim2);};
      j = (j - (${ftype})floor((${ftype})(dim2)/2.0f))/(${ftype})(dim2);
  //Account for large global index (stored as ulong) before performing modulus
  double pre_k=fmod((double)(x) , (double) dim3);
  ${ftype} k = (${ftype}) pre_k;
      k = (k - (${ftype})floor((${ftype})(dim3)/2.0f))/(${ftype})(dim3);

  ${ftype} weight = exp(-TWOPISQ*((i*i)*sigma[0]*sigma[0] + (j*j)*sigma[1]*sigma[1] + (k*k)*sigma[2]*sigma[2]));
  //${ftype} weight = expm1(-TWOPISQ*((i*i)*sigma[0]*sigma[0] + (j*j)*sigma[1]*sigma[1] + (k*k)*sigma[2]*sigma[2]))+1;
  //${ftype} weight= ${exp}(-TWOPISQ*((i*i)*sigma[0]*sigma[0] + (j*j)*sigma[1]*sigma[1] + (k*k)*sigma[2]*sigma[2]));
  dest[idx].x = src[idx].x * weight;
  dest[idx].y = src[idx].y * weight; 
  
}
""" % (sz[0], sz[1], sz[2], sigma[0], sigma[1], sigma[2], FACTOR),
        render_kwds=dict(ctype=dtypes.ctype(dtype),
                         ftype=dtypes.ctype(ftype), ultype=dtypes.ctype(np.uint64),
                         exp=functions.exp(ftype)), fast_math=True)

    gauss_kernel = program.gauss_kernel
    data_dev = thr.empty_like(ksp)
    gauss_kernel(data_dev, data_dev, global_size=sz[0] * sz[1] * sz[2])
    ksp_out = data_dev.get()

    ifft = FFT(data_dev)
    cifft = ifft.compile(thr)
    cifft(data_dev, data_dev, inverse=0)
    result = np.fft.fftshift(data_dev.get() / sz[0] * sz[1] * sz[2])
    result = result[::-1, ::-1, ::-1]
    result = np.roll(np.roll(np.roll(result, 1, axis=2), 1, axis=1), 1, axis=0)

    return ksp_out

Пример #4

0

Показать файл

Файл: helpers.py Проект: fjarri/reikna-integrator

def get_kprop_trf(state_arr, ksquared_arr, coeffs, exp=False):
    compound_dtype = dtypes.result_type(coeffs.dtype, ksquared_arr.dtype)
    return Transformation(
        [
            Parameter('output', Annotation(state_arr, 'o')),
            Parameter('input', Annotation(state_arr, 'i')),
            Parameter('ksquared', Annotation(ksquared_arr, 'i')),
            Parameter('dt', Annotation(ksquared_arr.dtype))],
        """
        %if max(coeffs.values) > 0:
        ${ksquared.ctype} ksquared = ${ksquared.load_idx}(${', '.join(idxs[2:])});
        %endif

        ${dtypes.ctype(compound_dtype)} compound_coeff = ${dtypes.c_constant(0, compound_dtype)};

        %for pwr, values in coeffs.values.items():
        {
            ${dtypes.ctype(coeffs.dtype)} value;

            %for comp in range(output.shape[1]):
            ${'if' if comp == 0 else 'else if'} (${idxs[1]} == ${comp})
            {
                value = ${dtypes.c_constant(values[comp], coeffs.dtype)};
            }
            %endfor

            compound_coeff =
                compound_coeff
                + ${mul_kc}(
                    %if pwr == 0:
                    ${dt}
                    %elif pwr == 2:
                    -ksquared * ${dt}
                    %else:
                    pow(-ksquared, ${pwr // 2}) * ${dt}
                    %endif
                    ,
                    value
                    );
        }
        %endfor

        ${output.store_same}(${mul_ic}(
            ${input.load_same},
            %if exp is not None:
            ${exp}(compound_coeff)
            %else:
            compound_coeff
            %endif
            ));
        """,
        render_kwds=dict(
            coeffs=coeffs,
            compound_dtype=compound_dtype,
            mul_ic=functions.mul(state_arr.dtype, compound_dtype, out_dtype=state_arr.dtype),
            mul_kc=functions.mul(ksquared_arr.dtype, coeffs.dtype, out_dtype=compound_dtype),
            exp=functions.exp(compound_dtype) if exp else None))

Пример #5

0

Показать файл

    def _build_plan(self, plan_factory, device_params, output, alpha, beta):

        plan = plan_factory()

        for_reduction = Type(numpy.float64, (alpha.shape[0], self._max_click_order))

        meter_trf = Transformation([
            Parameter('output', Annotation(for_reduction, 'o')),
            Parameter('alpha', Annotation(alpha, 'i')),
            Parameter('beta', Annotation(beta, 'i')),
            ],
            """
                VSIZE_T sample_idx = ${idxs[0]};
                VSIZE_T order = ${idxs[1]} + 1;

                ${alpha.ctype} result = COMPLEX_CTR(${alpha.ctype})(1, 0);
                for (VSIZE_T i = 0; i < ${modes}; i++) {
                    ${alpha.ctype} alpha = ${alpha.load_idx}(sample_idx, i);
                    ${beta.ctype} beta = ${beta.load_idx}(sample_idx, i);
                    ${alpha.ctype} t = ${mul_cc}(alpha, beta);
                    ${alpha.ctype} np = ${exp_c}(COMPLEX_CTR(${alpha.ctype})(-t.x, -t.y));

                    if (i >= order) {
                        result = ${mul_cc}(result, np);
                    }
                    else {
                        ${alpha.ctype} cp = COMPLEX_CTR(${alpha.ctype})(1 - np.x, -np.y);
                        result = ${mul_cc}(result, cp);
                    }
                }

                ${output.store_same}(result.x);
                """,
            render_kwds=dict(
                mul_cc=functions.mul(alpha.dtype, alpha.dtype),
                exp_c=functions.exp(alpha.dtype),
                modes=self._system.modes,
                ))

        reduction = Reduce(for_reduction, predicate_sum(output.dtype), axes=(0,))
        reduction.parameter.input.connect(
            meter_trf, meter_trf.output, alpha_p=meter_trf.alpha, beta_p=meter_trf.beta)

        plan.computation_call(reduction, output, alpha, beta)

        return plan

Пример #6

0

Показать файл

def gaussian_fourierkernel(siz, sigma_):
    """
    Create Gaussian Fourier filter kernel with GPU
    """
    if not hasattr(sigma, "__len__"):  # type(sigma) is float:
        sigma = np.ones(3) * sigma_
    elif len(sigma) == 2:
        sigma[2] = 0.0

    sz = siz
    ctype = np.complex64
    ftype = np.float32
    #api = cluda.ocl_api()
    api = any_api()
    thr = api.Thread.create()
    base = np.ones(siz, ctype)
    data_dev = thr.to_device(base)
    FACTOR = 1.0
    program = thr.compile("""
KERNEL void gauss_kernel(
    GLOBAL_MEM ${ctype} *dest,
    GLOBAL_MEM ${ctype} *src)
{
  const ulong x = get_global_id(0);
  const SIZE_T dim1= %d;
  const SIZE_T dim2= %d;
  const SIZE_T dim3= %d;                    
  ${ftype} sigma[3];
  sigma[0]=%f;sigma[1]=%f;sigma[2]=%f;
  ${ftype} factor = %f;            
  const double TWOPISQ = 19.739208802178716; //6.283185307179586;  //2*3.141592;
  const ${ftype} SQRT2PI = 2.5066282746;
  const double CUBEDSQRT2PI = 15.749609945722419;
  const ulong idx = x;
  ${ftype} i = (${ftype})((x / dim3) / dim2);
      i = (i - (${ftype})floor((${ftype})(dim1)/2.0))/(${ftype})(dim1);
  ${ftype} j = (${ftype})(x / dim3);
      if((SIZE_T)j > dim2) {j=(${ftype})fmod(j, (${ftype})dim2);};
      j = (j - (${ftype})floor((${ftype})(dim2)/2.0f))/(${ftype})(dim2);
  //Account for large global index (stored as ulong) before performing modulus
  double pre_k=fmod((double)(x) , (double) dim3);
  ${ftype} k = (${ftype}) pre_k;
      k = (k - (${ftype})floor((${ftype})(dim3)/2.0f))/(${ftype})(dim3);

  ${ftype} weight = exp(-TWOPISQ*((i*i)*sigma[0]*sigma[0] + (j*j)*sigma[1]*sigma[1] + (k*k)*sigma[2]*sigma[2]));
  //${ftype} weight = expm1(-TWOPISQ*((i*i)*sigma[0]*sigma[0] + (j*j)*sigma[1]*sigma[1] + (k*k)*sigma[2]*sigma[2]))+1;
  //${ftype} weight= ${exp}(-TWOPISQ*((i*i)*sigma[0]*sigma[0] + (j*j)*sigma[1]*sigma[1] + (k*k)*sigma[2]*sigma[2]));
  dest[idx].x = src[idx].x * weight;
  dest[idx].y = src[idx].y * weight; 
  
}
""" % (sz[0], sz[1], sz[2], sigma[0], sigma[1], sigma[2], FACTOR),
        render_kwds=dict(ctype=dtypes.ctype(ctype),
                         ftype=dtypes.ctype(ftype),
                         exp=functions.exp(ftype)), fast_math=True)
    gauss_kernel = program.gauss_kernel
    #data_dev = thr.empty_like(ksp_dev)
    gauss_kernel(data_dev, data_dev, global_size=sz[0] * sz[1] * sz[2])
    gfilter = data_dev.get()
    thr.synchronize()
    thr.release()

    return gfilter

Пример #7

0

Показать файл

def kspacegaussian_filter_CL2(ksp, sigma):
    """ Kspace gaussian filter and recon using GPU OpenCL

    1. GPU intialisation
    2. push KSP complex matrix to GPU
    3. declare FFT program
    4. declare Complex Gaussian GPU filter program
    5. Execute Gaussian GPU program
    6. GPU sync
    7. Execute FFT Recon
    8. Execute FFTshift
    9. Retrieve reconstruced complex image from GPU
    10. Reorganise image to standard (mimic numpy format)

    """
    sz = ksp.shape
    dtype = np.complex64
    ftype = np.float32
    ultype = np.uint64
    #api = cluda.ocl_api()
    api = any_api()
    thr = api.Thread.create()
    data_dev = thr.to_device(ksp)
    ifft = FFT(data_dev)
    FACTOR = 1.0
    program = thr.compile("""
KERNEL void gauss_kernel(
    GLOBAL_MEM ${ctype} *dest,
    GLOBAL_MEM ${ctype} *src)
{
  const ulong x = get_global_id(0);
  const SIZE_T dim1= %d;
  const SIZE_T dim2= %d;
  const SIZE_T dim3= %d;                    
  ${ftype} sigma[3];
  sigma[0]=%f;sigma[1]=%f;sigma[2]=%f;
  ${ftype} factor = %f;            
  const double TWOPISQ = 19.739208802178716; //6.283185307179586;  //2*3.141592;
  const ${ftype} SQRT2PI = 2.5066282746;
  const double CUBEDSQRT2PI = 15.749609945722419;
  const ulong idx = x;
  ${ftype} i = (${ftype})((x / dim3) / dim2);
      i = (i - (${ftype})floor((${ftype})(dim1)/2.0f))/(${ftype})(dim1);
  ${ftype} j = (${ftype})(x / dim3);
      if((SIZE_T)j > dim2) {j=(${ftype})fmod(j, (${ftype})dim2);};
      j = (j - (${ftype})floor((${ftype})(dim2)/2.0f))/(${ftype})(dim2);
  // Account for large global index (stored as ulong) before performing modulus
  double pre_k=fmod((double)(x), (double)dim3);
  ${ftype} k = (${ftype}) pre_k;
      k = (k - (${ftype})floor((${ftype})(dim3)/2.0f))/(${ftype})(dim3);

  ${ftype} weight = exp(-TWOPISQ*((i*i)*sigma[0]*sigma[0] + (j*j)*sigma[1]*sigma[1] + (k*k)*sigma[2]*sigma[2]));
  // ${ftype} weight = expm1(-TWOPISQ*((i*i)*sigma[0]*sigma[0] + (j*j)*sigma[1]*sigma[1] + (k*k)*sigma[2]*sigma[2]))+1;
  // ${ftype} weight= ${exp}(-TWOPISQ*((i*i)*sigma[0]*sigma[0] + (j*j)*sigma[1]*sigma[1] + (k*k)*sigma[2]*sigma[2]));
  dest[idx].x = src[idx].x * weight;
  dest[idx].y = src[idx].y * weight; 
  
}
""" % (sz[0], sz[1], sz[2], sigma[0], sigma[1], sigma[2], FACTOR),
        render_kwds=dict(ctype=dtypes.ctype(dtype),
                         ftype=dtypes.ctype(ftype),
                         exp=functions.exp(ftype)), fast_math=True)
    gauss_kernel = program.gauss_kernel
    gauss_kernel(data_dev, data_dev, global_size=sz[0] * sz[1] * sz[2])
    thr.synchronize()
    # Recon
    #data_dev = thr.to_device(ksp)
    ifftobj = FFT(data_dev)
    cifft = ifftobj.compile(thr)
    fftshiftobj = FFTShift(data_dev)
    cfftshift = fftshiftobj.compile(thr)
    cifft(data_dev, data_dev, inverse=0)
    thr.synchronize()
    cfftshift(data_dev, data_dev)
    thr.synchronize()
    result2 = data_dev.get() / np.prod(np.array(ksp.shape))
    result2 = result2[::-1, ::-1, ::-1]
    thr.release()
    return result2

Пример #8

0

Показать файл

Файл: test_functions.py Проект: xexo7C8/reikna

def test_exp(thr, out_code, in_codes):
    out_dtype, in_dtypes = generate_dtypes(out_code, in_codes)
    check_func(thr, functions.exp(in_dtypes[0]), numpy.exp, out_dtype,
               in_dtypes)

Пример #9

0

Показать файл

Файл: profile_reikna_cuda.py Проект: maeager/Agilent2Dicom

def kspacegaussian_filter_CL2(ksp, sigma):
    sz = ksp.shape
    dtype = np.complex64
    ftype = np.float32
    #api = cluda.ocl_api()
    api = cuda_api()
    thr = api.Thread.create()
    data_dev = thr.to_device(ksp)
    ifft = FFT(data_dev)
    FACTOR = 1.0
    program = thr.compile("""
KERNEL void gauss_kernel(
    GLOBAL_MEM ${ctype} *dest,
    GLOBAL_MEM ${ctype} *src)
{
  const ulong x = get_global_id(0);
  const SIZE_T dim1= %d;
  const SIZE_T dim2= %d;
  const SIZE_T dim3= %d;                    
  ${ftype} sigma[3];
  sigma[0]=%f;sigma[1]=%f;sigma[2]=%f;
  ${ftype} factor = %f;            
  const double TWOPISQ = 19.739208802178716; //6.283185307179586;  //2*3.141592;
//  const ${ftype} SQRT2PI = 2.5066282746;
//  const double CUBEDSQRT2PI = 15.749609945722419;
  const ulong idx = x;
  ${ftype} i = (${ftype})((x / dim3) / dim2);
      i = (i - (${ftype})floor((${ftype})(dim1)/2.0))/(${ftype})(dim1);
  ${ftype} j = (${ftype})(x / dim3);
      if((SIZE_T)j > dim2) {j=(${ftype})fmod(j, (${ftype})dim2);};
      j = (j - (${ftype})floor((${ftype})(dim2)/2.0f))/(${ftype})(dim2);
  //Account for large global index (stored as ulong) before performing modulus
  double pre_k=fmod((double)(x) , (double) dim3);
  ${ftype} k = (${ftype}) pre_k;
      k = (k - (${ftype})floor((${ftype})(dim3)/2.0f))/(${ftype})(dim3);

  ${ftype} weight = exp(-TWOPISQ*((i*i)*sigma[0]*sigma[0] + (j*j)*sigma[1]*sigma[1] + (k*k)*sigma[2]*sigma[2]));
  //${ftype} weight = expm1(-TWOPISQ*((i*i)*sigma[0]*sigma[0] + (j*j)*sigma[1]*sigma[1] + (k*k)*sigma[2]*sigma[2]))+1;
  //${ftype} weight= ${exp}(-TWOPISQ*((i*i)*sigma[0]*sigma[0] + (j*j)*sigma[1]*sigma[1] + (k*k)*sigma[2]*sigma[2]));
  dest[idx].x = src[idx].x * weight * factor;
  dest[idx].y = src[idx].y * weight * factor; 
  
}
""" % (sz[0], sz[1], sz[2], sigma[0], sigma[1], sigma[2], FACTOR),
        render_kwds=dict(ctype=dtypes.ctype(dtype),
                         ftype=dtypes.ctype(ftype),
                         exp=functions.exp(ftype)), fast_math=True)
    gauss_kernel = program.gauss_kernel
    #data_dev = thr.empty_like(ksp_dev)
    gauss_kernel(data_dev, data_dev, global_size=(sz[0], sz[1], sz[2]))

    thr.synchronize()
    #data_dev = thr.to_device(ksp)
    ifft = FFT(data_dev)
    cifft = ifft.compile(thr)
    fftshift = FFTShift(data_dev)
    cfftshift = fftshift.compile(thr)
    cifft(data_dev, data_dev, inverse=0)
    thr.synchronize()
    cfftshift(data_dev, data_dev)
    thr.synchronize()
    result2 = data_dev.get() / np.prod(np.array(ksp.shape))
    result2 = result2[::-1, ::-1, ::-1]
    thr.release()
    return result2

Пример #10

0

Показать файл

Файл: test_functions.py Проект: fjarri/reikna

def test_exp(thr, out_code, in_codes):
    out_dtype, in_dtypes = generate_dtypes(out_code, in_codes)
    check_func(thr, functions.exp(in_dtypes[0]), numpy.exp, out_dtype, in_dtypes)

Пример #11

0

Показать файл

    def _build_plan(self, plan_factory, device_params, output, alpha, beta):

        plan = plan_factory()

        samples, modes = alpha.shape

        for_reduction = Type(alpha.dtype, (samples, self._max_total_clicks + 1))

        prepared_state = plan.temp_array_like(alpha)

        plan.kernel_call(
            TEMPLATE.get_def("compound_click_probability_prepare"),
            [prepared_state, alpha, beta],
            kernel_name="compound_click_probability_prepare",
            global_size=alpha.shape,
            render_kwds=dict(
                mul_cc=functions.mul(alpha.dtype, alpha.dtype),
                exp_c=functions.exp(alpha.dtype),
                ))

        # Block size is limited by the amount of available local memory.
        # In some OpenCL implementations the number reported cannot actually be fully used
        # (because it's used by kernel arguments), so we're padding it a little.
        local_mem_size = device_params.local_mem_size
        max_elems = (local_mem_size - 256) // alpha.dtype.itemsize
        block_size = 2**helpers.log2(max_elems)

        # No reason to have block size larger than the number of modes
        block_size = min(block_size, helpers.bounding_power_of_2(modes))

        products_gsize = (samples, helpers.min_blocks(self._max_total_clicks + 1, block_size) * block_size)
        products = plan.temp_array_like(for_reduction)

        read_size = min(block_size, device_params.max_work_group_size)

        while read_size > 1:

            full_steps = modes // block_size
            remainder_size = modes % block_size

            try:
                plan.kernel_call(
                    TEMPLATE.get_def("compound_click_probability_aggregate"),
                    [products, prepared_state],
                    kernel_name="compound_click_probability_aggregate",
                    global_size=products_gsize,
                    local_size=(1, read_size,),
                    render_kwds=dict(
                        block_size=block_size,
                        read_size=read_size,
                        full_steps=full_steps,
                        remainder_size=remainder_size,
                        output_size=self._max_total_clicks + 1,
                        mul_cc=functions.mul(alpha.dtype, alpha.dtype),
                        add_cc=functions.add(alpha.dtype, alpha.dtype),
                        polar_unit=functions.polar_unit(dtypes.real_for(alpha.dtype)),
                        modes=self._system.modes,
                        max_total_clicks=self._max_total_clicks,
                        ))

            except OutOfResourcesError:
                read_size //= 2

            break

        reduction = Reduce(for_reduction, predicate_sum(alpha.dtype), axes=(0,))

        temp = plan.temp_array_like(reduction.parameter.output)

        plan.computation_call(reduction, temp, products)

        fft = FFT(temp)
        real_trf = Transformation([
            Parameter('output', Annotation(output, 'o')),
            Parameter('input', Annotation(temp, 'i')),
            ],
            """
                ${input.ctype} val = ${input.load_same};
                ${output.store_same}(val.x);
                """)
        fft.parameter.output.connect(real_trf, real_trf.input, output_p=real_trf.output)

        plan.computation_call(fft, output, temp, True)

        return plan

Пример #12

0

Показать файл

Файл: reikna_cluda_ex5.py Проект: maeager/Agilent2Dicom

  double pre_k=fmod((double)(x) , (double) dim3);
  ${ftype} k = (${ftype}) pre_k;
      k = (k - (${ftype})floor((${ftype})(dim3)/2.0f))/(${ftype})(dim3);

  ${ftype} weight = exp(-TWOPISQ*((i*i)*sigma[0]*sigma[0] + (j*j)*sigma[1]*sigma[1] + (k*k)*sigma[2]*sigma[2]));
  //${ftype} weight = expm1(-TWOPISQ*((i*i)*sigma[0]*sigma[0] + (j*j)*sigma[1]*sigma[1] + (k*k)*sigma[2]*sigma[2]))+1;
  //${ftype} weight= ${exp}(-TWOPISQ*((i*i)*sigma[0]*sigma[0] + (j*j)*sigma[1]*sigma[1] + (k*k)*sigma[2]*sigma[2]));
  
  dest[idx].x = src[idx].x * weight;
  dest[idx].y = src[idx].y * weight;  //(${ftype})k; //
  
}
""" % (N, N, N, sigma[0], sigma[1], sigma[2], FACTOR), render_kwds=dict(ctype=dtypes.ctype(dtype),
                                                                        ftype=dtypes.ctype(
                                                                            ftype),
                                                                        exp=functions.exp(ftype)), fast_math=True)

gauss_kernel = program.gauss_kernel

r1 = np.ones((N, N, N)).astype(ftype)  # /N
r2 = np.ones((N, N, N)).astype(ftype)  # /N
a = r1 + 1j * r2
b = r1 - 1j * r2
a_dev = thr.to_device(a)
#b_dev = thr.to_device(b)
#c_dev= thr.to_device(b.ravel())
#sigma_dev = thr.to_device(sigma)
dest_dev = thr.empty_like(a_dev)


# (np.pi).astype(np.float32),

Python exp примеры использования