def apply(self, source_array): # Split image into channels as cupy (GPU) arrays blue_channel = cp.asarray(source_array[:, :, 0]) green_channel = cp.asarray(source_array[:, :, 1]) red_channel = cp.asarray(source_array[:, :, 2]) # Determine parameters for CUDA Blur height, width = source_array.shape[:2] dim_grid_x = math.ceil(width / DIM_BLOCK) dim_grid_y = math.ceil(height / DIM_BLOCK) # Blur each channel of the image for channel in (red_channel, green_channel, blue_channel): self.apply_filter( (dim_grid_x, dim_grid_y), (DIM_BLOCK, DIM_BLOCK), ( channel, channel, cp.uint32(width), cp.uint32(height), cp.uint32(self.filter_width), ), ) # Convert RGB to HSV hue_channel, sat_channel, val_channel = cuda_rgb2hsv( red_channel, green_channel, blue_channel) # Threshold the image result_array = self.cuda_kernel(hue_channel, sat_channel, val_channel) # Convert back to numpy array return cp.asnumpy(result_array)
def apply(self, source_array): # Numpy array to store the output result_array = np.empty_like(source_array) # Split image into channels as cupy (GPU) arrays red_channel = cp.asarray( source_array[:, :, 0]) # TODO Make work for greyscale images green_channel = cp.asarray(source_array[:, :, 1]) blue_channel = cp.asarray(source_array[:, :, 2]) # Determine parameters for CUDA Blur height, width = source_array.shape[:2] dim_grid_x = math.ceil(width / DIM_BLOCK) dim_grid_y = math.ceil(height / DIM_BLOCK) # Blur each channel of the image for channel in (red_channel, green_channel, blue_channel): self.apply_filter( (dim_grid_x, dim_grid_y), (DIM_BLOCK, DIM_BLOCK), ( channel, channel, cp.uint32(width), cp.uint32(height), cp.uint32(self.filter_width), ), ) # Convert the results back to a single numpy array result_array[:, :, 0] = cp.asnumpy(red_channel) result_array[:, :, 1] = cp.asnumpy(green_channel) result_array[:, :, 2] = cp.asnumpy(blue_channel) return result_array
def reduction(x, y, size): tid = jit.threadIdx.x ntid = jit.blockDim.x value = cupy.float32(0) for i in range(tid, size, ntid): value += x[i] smem = jit.shared_memory(cupy.float32, 1024) smem[tid] = value jit.syncthreads() if tid == cupy.uint32(0): value = cupy.float32(0) for i in range(ntid): value += smem[i] y[0] = value
def reduction(x, y, size): tid = jit.blockIdx.x * jit.blockDim.x + jit.threadIdx.x ntid = jit.blockDim.x * jit.gridDim.x value = cupy.float32(0) for i in range(tid, size, ntid): value += x[i] smem = jit.shared_memory(cupy.float32, 1024) smem[jit.threadIdx.x] = value jit.syncthreads() if jit.threadIdx.x == cupy.uint32(0): value = cupy.float32(0) for i in range(jit.blockDim.x): value += smem[i] jit.atomic_add(y, 0, value)
def get_number_of_ranges(record: OrderedDict) -> int: """ Gets the number of ranges for the record. Parameters ---------- record: OrderedDict hdf5 record containing antennas_iq data and metadata Returns ------- num_ranges: int The number of ranges of the data """ # Infer the number of ranges from the record metadata first_range_offset = ProcessAntennasIQ2Bfiq.calculate_first_range_rtt(record) * 1e-6 * record['rx_sample_rate'] num_ranges = record['num_samps'] - xp.int32(first_range_offset) - record['blanked_samples'][-1] # 3 extra samples taken for each record (not sure why) num_ranges = num_ranges - 3 return xp.uint32(num_ranges)
@jit.rawkernel() def reduction(x, y, size): tid = jit.threadIdx.x ntid = jit.blockDim.x value = cupy.float32(0) for i in range(tid, size, ntid): value += x[i] smem = jit.shared_memory(cupy.float32, 1024) smem[tid] = value jit.syncthreads() if tid == cupy.uint32(0): value = cupy.float32(0) for i in range(ntid): value += smem[i] y[0] = value size = cupy.uint32(2 ** 22) x = cupy.random.normal(size=(size,), dtype=cupy.float32) y = cupy.empty((1,), dtype=cupy.float32) reduction[1, 1024](x, y, size) print(y[0]) print(x.sum())
Let G(n)=∑k=1nf(k)k2φ(k) where φ(n) is Euler's totient function. For example, G(10)=3053 and G(105)≡157612967(mod1000000007). Find G(1012)mod1000000007. """ import cupy as cp import numpy as np import itertools as ittr import numba from math import gcd MOD = cp.uint32(1e9+7) import numpy as np from math import gcd import itertools as ittr from sympy.ntheory import totient MOD = np.uint32(1e9+7) n = 3 tot = 0