platforms = cl.get_platforms() devices = platforms[0].get_devices() devices = [devices[1]] context = cl.Context(devices) queue = cl.CommandQueue(context) global_dim = (4096, 4096) global_shape = (global_dim[1], global_dim[0]) tileList = IncrementalTileList(context, devices, global_dim, (16, 16)) tiles_dim = tileList.dim n_tiles = tiles_dim[0]*tiles_dim[1] prefixSum = PrefixSum(context, devices, n_tiles) streamCompact = StreamCompact(context, devices, n_tiles) hTiles = np.random.randint(0, 20, (tiles_dim[1], tiles_dim[0])).astype(np.int32) cl.enqueue_copy(queue, tileList.d_tiles, hTiles).wait() tileList.build(Operator.GTE, 10) hList = np.empty((tiles_dim[0]*tiles_dim[1],), np.int32) cl.enqueue_copy(queue, hList, tileList.d_list).wait() #Test correctness using tileList - prefixsum and streamcompact are then #correct too compact_cpu = np.where(hTiles >= 10) compact_cpu = map(lambda x, y: y*tiles_dim[0] + x, compact_cpu[1], compact_cpu[0]) assert(np.all(compact_cpu == hList[0:tileList.length]))
szFloat = 4 szInt = 4 szChar = 1 cm = cl.mem_flags platforms = cl.get_platforms() devices = platforms[0].get_devices() devices = [devices[1]] context = cl.Context(devices) queue = cl.CommandQueue(context) nSamples = 65536 capcity = nSamples streamCompact = StreamCompact(context, devices, capcity) hList = np.empty((nSamples,), np.int32) dList = streamCompact.listFactory(nSamples) hFlags = np.random.randint(0, 2, nSamples).astype(np.int32) dFlags = streamCompact.flagFactory(nSamples) cl.enqueue_copy(queue, dFlags, hFlags).wait() hLength = np.empty((1, ), np.int32) dLength = cl.Buffer(context, cl.mem_flags.READ_WRITE, 1*szInt) streamCompact.compact(dFlags, dList, dLength, nSamples) cl.enqueue_copy(queue, hList, dList).wait() cl.enqueue_copy(queue, hLength, dLength).wait()