def setUp(self): self.h_data = numpy.random.random(self.N).astype("float32") self.h2_data = numpy.random.random( (self.N, self.N)).astype("float32").reshape((self.N, self.N)) self.ctx = ocl.create_context(devicetype="GPU") device = self.ctx.devices[0] try: devtype = pyopencl.device_type.to_string(device.type).upper() except ValueError: # pocl does not describe itself as a CPU ! devtype = "CPU" workgroup = device.max_work_group_size if (devtype == "CPU") and (device.platform.vendor == "Apple"): logger.info( "For Apple's OpenCL on CPU: enforce max_work_goup_size=1") workgroup = 1 self.ws = min(workgroup, self.ws) self.queue = pyopencl.CommandQueue( self.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE) self.local_mem = pyopencl.LocalMemory( self.ws * 32) # 2float4 = 2*4*4 bytes per workgroup size src = pyFAI.utils.read_cl_file("bitonic.cl") self.prg = pyopencl.Program(self.ctx, src).build()
def init(self, devicetype="GPU", useFp64=True, platformid=None, deviceid=None): """Initial configuration: Choose a device and initiate a context. Devicetypes can be GPU, gpu, CPU, cpu, DEF, ACC, ALL. Suggested are GPU,CPU. For each setting to work there must be such an OpenCL device and properly installed. E.g.: If Nvidia driver is installed, GPU will succeed but CPU will fail. The AMD SDK kit (AMD APP) is required for CPU via OpenCL. @param devicetype: string in ["cpu","gpu", "all", "acc"] @param useFp64: boolean specifying if double precision will be used @param platformid: integer @param devid: integer """ if self._ctx is None: self._ctx = ocl.create_context(devicetype, useFp64, platformid, deviceid) device = self._ctx.devices[0] self.devicetype = pyopencl.device_type.to_string(device.type) if (self.devicetype == "CPU")\ and (device.platform.vendor == "Apple"): logger.warning("This is a workaround for Apple's OpenCL" " on CPU: enforce BLOCK_SIZE=1") self.BLOCK_SIZE = 1 if self.nBins: self.wdim_bins = (self.nBins + self.BLOCK_SIZE - 1) & \ ~ (self.BLOCK_SIZE - 1), if self.nData: self.wdim_data = (self.nData + self.BLOCK_SIZE - 1) & \ ~ (self.BLOCK_SIZE - 1), self.useFp64 = "fp64" in device.extensions platforms = pyopencl.get_platforms() self.platformid = platforms.index(device.platform) devices = platforms[self.platformid].get_devices() self.deviceid = devices.index(device) if self.filename: self._queue = pyopencl.CommandQueue( self._ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE) else: self._queue = pyopencl.CommandQueue(self._ctx) else: logger.warning("Recycling existing context ..." " if you want to get start from scratch," " use clean()")
def setUp(self): self.h_data = numpy.random.random(self.N).astype("float32") self.h2_data = numpy.random.random((self.N, self.N)).astype("float32").reshape((self.N, self.N)) self.ctx = ocl.create_context(devicetype="GPU") device = self.ctx.devices[0] try: devtype = pyopencl.device_type.to_string(device.type).upper() except ValueError: # pocl does not describe itself as a CPU ! devtype = "CPU" workgroup = device.max_work_group_size if (devtype == "CPU") and (device.platform.vendor == "Apple"): logger.info("For Apple's OpenCL on CPU: enforce max_work_goup_size=1") workgroup = 1 self.ws = min(workgroup, self.ws) self.queue = pyopencl.CommandQueue(self.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE) self.local_mem = pyopencl.LocalMemory(self.ws * 32) # 2float4 = 2*4*4 bytes per workgroup size src = pyFAI.utils.read_cl_file("bitonic.cl") self.prg = pyopencl.Program(self.ctx, src).build()
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. from __future__ import print_function import utilstest import numpy, time import pyFAI, pyFAI.opencl from pyFAI.opencl import pyopencl, ocl import pyopencl.array N = 1024 ws = N // 8 ctx = ocl.create_context("GPU") queue = pyopencl.CommandQueue(ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE) h_data = numpy.random.random(N).astype("float32") d_data = pyopencl.array.to_device(queue, h_data) local_mem = pyopencl.LocalMemory(ws * 32) # 2float4 = 2*4*4 bytes per workgroup size src = pyFAI.utils.read_cl_file("bsort.cl") prg = pyopencl.Program(ctx, src).build() t0 = time.time() hs_data = numpy.sort(h_data) t1 = time.time() time_sort = 1e3 * (t1 - t0) print(time_sort)