def test_run_cuda(lt_ctx, mask_cupy): # The cupy module is set to None in mask_cupy fixture so that # any use of it will raise an error with pytest.raises(ModuleNotFoundError): import cupy # NOQA: F401 data = _mk_random(size=(16, 16, 16, 16)) ds = lt_ctx.load("memory", data=data) use_cpu = bae.get_use_cpu() use_cuda = bae.get_use_cuda() backend = bae.get_device_class() with mock.patch.dict(os.environ, {'LIBERTEM_USE_CUDA': "23"}): # This should set the same environment variable as the mock above # so that it will be unset after the "with" bae.set_use_cuda(23) res = lt_ctx.run_udf( udf=DebugDeviceUDF(backends=('cuda', 'numpy')), dataset=ds ) for val in res['device_id'].data[0].values(): print(val) assert val['cpu'] is None assert val['cuda'] == 23 # We make sure that the mocking was successful, i.e. # restored the previous state assert use_cpu == bae.get_use_cpu() assert use_cuda == bae.get_use_cuda() assert backend == bae.get_device_class() assert np.all(res['device_class'].data == 'cuda') assert np.allclose(res['on_device'].data, data.sum(axis=(0, 1)))
def test_run_cupy(lt_ctx, mock_cupy): data = _mk_random(size=(16, 16, 16, 16)) ds = lt_ctx.load("memory", data=data) use_cpu = bae.get_use_cpu() use_cuda = bae.get_use_cuda() backend = bae.get_device_class() with mock.patch.dict(os.environ, {'LIBERTEM_USE_CUDA': "23"}): # This should set the same environment variable as the mock above # so that it will be unset after the "with" bae.set_use_cuda(23) # add `numpy.cuda` so we can make `numpy` work as a mock replacement for `cupy` with mock.patch('numpy.cuda', return_value=MockCuda, create=True): res = lt_ctx.run_udf( udf=DebugDeviceUDF(backends=('cupy', 'numpy')), dataset=ds ) for val in res['device_id'].data[0].values(): assert val['cpu'] is None assert val['cuda'] == 23 # We make sure that the mocking was successful, i.e. # restored the previous state assert use_cpu == bae.get_use_cpu() assert use_cuda == bae.get_use_cuda() assert backend == bae.get_device_class() assert np.all(res['device_class'].data == 'cuda') assert np.allclose(res['on_device'].data, data.sum(axis=(0, 1)))
def test_run_numpy(lt_ctx, mask_cupy): data = _mk_random(size=(16, 16, 16, 16)) ds = lt_ctx.load("memory", data=data) use_cpu = bae.get_use_cpu() use_cuda = bae.get_use_cuda() backend = bae.get_device_class() with mock.patch.dict(os.environ, {'LIBERTEM_USE_CPU': "42"}): # This should set the same environment variable as the mock above # so that it will be unset after the "with" bae.set_use_cpu(42) res = lt_ctx.run_udf(udf=DebugDeviceUDF(), dataset=ds) for val in res['device_id'].data[0].values(): assert val['cpu'] == 42 assert val['cuda'] is None # We make sure that the mocking was successful, i.e. # restored the previous state assert use_cpu == bae.get_use_cpu() assert use_cuda == bae.get_use_cuda() assert backend == bae.get_device_class() assert np.all(res['device_class'].data == 'cpu') assert np.allclose(res['on_device'].data, data.sum(axis=(0, 1)))
def run_for_partition(self, partition: Partition, roi, corrections): with set_num_threads(1): try: previous_id = None device_class = get_device_class() # numpy_udfs and cupy_udfs contain references to the objects in # self._udfs numpy_udfs, cupy_udfs = self._udf_lists(device_class) # Will only be populated if actually on CUDA worker # and any UDF supports 'cupy' (and not 'cuda') if cupy_udfs: # Avoid importing if not used import cupy device = get_use_cuda() previous_id = cupy.cuda.Device().id cupy.cuda.Device(device).use() (meta, tiling_scheme, dtype) = self._init_udfs(numpy_udfs, cupy_udfs, partition, roi, corrections, device_class) # print("UDF TilingScheme: %r" % tiling_scheme.shape) partition.set_corrections(corrections) self._run_udfs(numpy_udfs, cupy_udfs, partition, tiling_scheme, roi, dtype) self._wrapup_udfs(numpy_udfs, cupy_udfs, partition) finally: if previous_id is not None: cupy.cuda.Device(previous_id).use() # Make sure results are in the same order as the UDFs return tuple(udf.results for udf in self._udfs)
def get_available_workers(self): resources = {"compute": 1, "CPU": 1} if get_use_cuda() is not None: resources["CUDA"] = 1 return WorkerSet( [Worker(name='inline', host='localhost', resources=resources)])
def process_partition(self, partition): cpu = bae.get_use_cpu() cuda = bae.get_use_cuda() self.results.device_id[0][self.meta.slice] = {"cpu": cpu, "cuda": cuda} self.results.on_device[:] += self.xp.sum(partition, axis=0) self.results.device_class[:] = self.meta.device_class self.results.backend[0][self.meta.slice] = str(self.xp) print(f"meta device_class {self.meta.device_class}")
def get_available_workers(self): resources = {"compute": 1, "CPU": 1} if get_use_cuda() is not None: resources["CUDA"] = 1 return WorkerSet([ Worker(name='concurrent', host='localhost', resources=resources, nthreads=1) ]) else: devices = detect() return WorkerSet([ Worker( name='concurrent', host='localhost', resources=resources, nthreads=len(devices['cpus']), ) ])