def reduce(self, context, x, out=None, initial=0.0, queue=None): if queue is None: queue = x.queue if not isinstance(x, cl.DeviceMemoryView): x = cl.from_host(queue.context, x) #output, input, shared, group_size, initial=0.0 size = x.size shared = cl.local_memory(x.ctype, ndim=1, shape=[size]) group_size = size // 2 for item in [2, 4, 8, 16, 32, 64, 128, 256, 512]: if group_size < item: group_size = item // 2 break else: group_size = 512 if out is None: out = cl.empty(queue.context, [1], x.format) kernel = reduce_kernel.compile(queue.context, function=self.device_func, output=cl.global_memory(out.ctype, flat=True), array=cl.global_memory(x.ctype, flat=True), shared=shared, group_size=cl.cl_uint, cly_meta=self.device_func.func_name) max_wgsize = kernel.work_group_size(queue.device) group_size = min(max_wgsize, group_size) kernel(queue, out, out.array_info, x, x.array_info, shared, shared.local_info, group_size) # reduce_kernel(queue, self.device_func, out, x, shared, group_size) # reduce_kernel(queue, self.device_func, out, x, shared, group_size) array = CLArray._view_as_this(out) array.__array_init__(context, queue) return array
def reduce(queue, function, input, initial=0.0): ''' reduce(queue, function, sequence[, initial]) -> value Apply a function of two arguments cumulatively to the items of a sequence, from left to right, so as to reduce the sequence to a single value. For example, reduce(lambda x, y: x+y, [1, 2, 3, 4, 5]) calculates ((((1+2)+3)+4)+5). If initial is present, it is placed before the items of the sequence in the calculation, and serves as a default when the sequence is empty. ''' size = input.size shared = cl.local_memory(input.format, [size]) output = cl.empty(queue.context, [1], input.format) group_size = size // 2 cl_reduce(queue, function, output, input, shared, group_size, initial) return output
def reduce(queue, function, input, initial=0.0): ''' reduce(queue, function, sequence[, initial]) -> value Apply a function of two arguments cumulatively to the items of a sequence, from left to right, so as to reduce the sequence to a single value. For example, reduce(lambda x, y: x+y, [1, 2, 3, 4, 5]) calculates ((((1+2)+3)+4)+5). If initial is present, it is placed before the items of the sequence in the calculation, and serves as a default when the sequence is empty. ''' size = input.size shared = cl.local_memory(input.format, [size]) output = cl.empty(queue.context, [1], input.format) group_size = size // 2 cl_reduce(queue, function, output, input , shared, group_size, initial) return output