Пример #1
0
            data_buffer,
            local_buffer,
            points_per_group,
            np.uint32(NUM_POINTS),
            direction)


# There is some overhead involved with spawning a new kernel (code caching)
# A good rule of thumb is therefore to create the kernel object outside of loops
# Ref: https://lists.tiker.net/pipermail/pyopencl/2016-February/002107.html
kernel_stage = prog.fft_stage

# Enqueue further stages of the FFT
if NUM_POINTS > points_per_group:
    # for(stage = 2; stage <= num_points/points_per_group; stage <<= 1)
    for stage in utility.range_bitwise_shift(low=2, high=NUM_POINTS//points_per_group + 1, n=1):
        print('Stage: ' + str(stage))
        # fft_stage(__global float2* g_data, uint stage, uint points_per_group, int dir)
        kernel_stage(queue, (global_size,), (local_size,),
                     data_buffer,
                     np.uint32(stage),
                     points_per_group,
                     direction)

# Scale values if performing the inverse FFT
if not FORWARD_FFT:
    # fft_scale(__global float2* g_data, uint points_per_group, uint scale)
    prog.fft_scale(queue, (global_size,), (local_size,), data_buffer, points_per_group, np.uint32(NUM_POINTS))

# Read results
cl.enqueue_copy(queue, dest=output_data, src=data_buffer, is_blocking=True)
Пример #2
0
global_size = (NUM_POINTS // points_per_group) * local_size
print('Global size: ' + str(global_size))
print('Local size\n: ' + str(local_size))

kernel_init(queue, (global_size, ), (local_size, ), data_buffer, local_buffer,
            points_per_group, np.uint32(NUM_POINTS), direction)

# There is some overhead involved with spawning a new kernel (code caching)
# A good rule of thumb is therefore to create the kernel object outside of loops
# Ref: https://lists.tiker.net/pipermail/pyopencl/2016-February/002107.html
kernel_stage = prog.fft_stage

# Enqueue further stages of the FFT
if NUM_POINTS > points_per_group:
    # for(stage = 2; stage <= num_points/points_per_group; stage <<= 1)
    for stage in utility.range_bitwise_shift(
            low=2, high=NUM_POINTS // points_per_group + 1, n=1):
        print('Stage: ' + str(stage))
        # fft_stage(__global float2* g_data, uint stage, uint points_per_group, int dir)
        kernel_stage(queue, (global_size, ), (local_size, ), data_buffer,
                     np.uint32(stage), points_per_group, direction)

# Scale values if performing the inverse FFT
if not FORWARD_FFT:
    # fft_scale(__global float2* g_data, uint points_per_group, uint scale)
    prog.fft_scale(queue, (global_size, ), (local_size, ), data_buffer,
                   points_per_group, np.uint32(NUM_POINTS))

# Read results
cl.enqueue_copy(queue, dest=output_data, src=data_buffer, is_blocking=True)

# Change to array of complex values
Пример #3
0
local_size = (local_size,)

# Enqueue initial sorting kernel
# bsort_init(__global float4 *g_data, __local float4 *l_data)
prog.bsort_init(queue, global_size, local_size, data_buffer, local_buffer)

# There is some overhead involved with spawning a new kernel (code caching)
# A good rule of thumb is therefore to create the kernel object outside of loops
# Ref: https://lists.tiker.net/pipermail/pyopencl/2016-February/002107.html
kernel_stage_n = prog.bsort_stage_n
kernel_stage_0 = prog.bsort_stage_0
kernel_merge = prog.bsort_merge

# Enqueue further stages
num_stages = global_size[0] // local_size[0]
for high_stage in utility.range_bitwise_shift(low=2, high=num_stages, n=1):

    for stage in utility.range_bitwise_shift(low=1, high=high_stage, n=-1):
        # bsort_stage_n(__global float4 *g_data, __local float4 *l_data, uint stage, uint high_stage)
        kernel_stage_n(queue, global_size, local_size, data_buffer, local_buffer,
                       np.int32(stage), np.int32(high_stage))

    # bsort_stage_0(__global float4 *g_data, __local float4 *l_data, uint high_stage)
    kernel_stage_0(queue, global_size, local_size, data_buffer, local_buffer, np.int32(high_stage))

# Perform the bitonic merge
for stage in utility.range_bitwise_shift(low=1, high=num_stages, n=-1):
    # bsort_merge(__global float4 *g_data, __local float4 *l_data, uint stage, int dir)
    kernel_merge(queue, global_size, local_size, data_buffer, local_buffer, np.int32(stage), direction)

# bsort_merge_last(__global float4 *g_data, __local float4 *l_data, int dir)