def test_start_local_cupyonly(hdf5_ds_1): cudas = detect()['cudas'] # Make sure we have enough partitions hdf5_ds_1.set_num_cores(len(cudas)) mask = _mk_random(size=(16, 16)) with hdf5_ds_1.get_reader().get_h5ds() as h5ds: data = h5ds[:] expected = _naive_mask_apply([mask], data) spec = cluster_spec(cpus=(), cudas=cudas, has_cupy=True) with DaskJobExecutor.make_local(spec=spec) as executor: ctx = api.Context(executor=executor) # Uses ApplyMasksUDF, which supports CuPy analysis = ctx.create_mask_analysis( dataset=hdf5_ds_1, factories=[lambda: mask] ) results = ctx.run(analysis) udf_res = ctx.run_udf(udf=DebugDeviceUDF(), dataset=hdf5_ds_1) # No CPU compute resources with pytest.raises(RuntimeError): _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('numpy',)), dataset=hdf5_ds_1) cuda_res = ctx.run_udf(udf=DebugDeviceUDF(backends=('cuda',)), dataset=hdf5_ds_1) assert np.allclose( results.mask_0.raw_data, expected ) found = {} for val in udf_res['device_id'].data[0].values(): print(val) # no CPU assert val["cpu"] is None # Register which GPUs got work found[val["cuda"]] = True for val in cuda_res['device_id'].data[0].values(): print(val) # no CPU assert val["cpu"] is None # Register which GPUs got work found[val["cuda"]] = True for val in udf_res['backend'].data[0].values(): # use CuPy print(val) assert 'cupy' in val for val in cuda_res['backend'].data[0].values(): # no CuPy, i.e. NumPy print(val) assert 'numpy' in val # Test if each GPU got work. We have to see if this # actually works always since this depends on the scheduler behavior assert set(found.keys()) == set(cudas) assert np.all(udf_res['device_class'].data == 'cuda') assert np.allclose(udf_res['on_device'].data, data.sum(axis=(0, 1)))
def test_run_cuda(lt_ctx, mask_cupy): # The cupy module is set to None in mask_cupy fixture so that # any use of it will raise an error with pytest.raises(ModuleNotFoundError): import cupy # NOQA: F401 data = _mk_random(size=(16, 16, 16, 16)) ds = lt_ctx.load("memory", data=data) use_cpu = bae.get_use_cpu() use_cuda = bae.get_use_cuda() backend = bae.get_device_class() with mock.patch.dict(os.environ, {'LIBERTEM_USE_CUDA': "23"}): # This should set the same environment variable as the mock above # so that it will be unset after the "with" bae.set_use_cuda(23) res = lt_ctx.run_udf( udf=DebugDeviceUDF(backends=('cuda', 'numpy')), dataset=ds ) for val in res['device_id'].data[0].values(): print(val) assert val['cpu'] is None assert val['cuda'] == 23 # We make sure that the mocking was successful, i.e. # restored the previous state assert use_cpu == bae.get_use_cpu() assert use_cuda == bae.get_use_cuda() assert backend == bae.get_device_class() assert np.all(res['device_class'].data == 'cuda') assert np.allclose(res['on_device'].data, data.sum(axis=(0, 1)))
def test_run_cupy(lt_ctx, mock_cupy): data = _mk_random(size=(16, 16, 16, 16)) ds = lt_ctx.load("memory", data=data) use_cpu = bae.get_use_cpu() use_cuda = bae.get_use_cuda() backend = bae.get_device_class() with mock.patch.dict(os.environ, {'LIBERTEM_USE_CUDA': "23"}): # This should set the same environment variable as the mock above # so that it will be unset after the "with" bae.set_use_cuda(23) # add `numpy.cuda` so we can make `numpy` work as a mock replacement for `cupy` with mock.patch('numpy.cuda', return_value=MockCuda, create=True): res = lt_ctx.run_udf( udf=DebugDeviceUDF(backends=('cupy', 'numpy')), dataset=ds ) for val in res['device_id'].data[0].values(): assert val['cpu'] is None assert val['cuda'] == 23 # We make sure that the mocking was successful, i.e. # restored the previous state assert use_cpu == bae.get_use_cpu() assert use_cuda == bae.get_use_cuda() assert backend == bae.get_device_class() assert np.all(res['device_class'].data == 'cuda') assert np.allclose(res['on_device'].data, data.sum(axis=(0, 1)))
def test_run_numpy(lt_ctx, mask_cupy): data = _mk_random(size=(16, 16, 16, 16)) ds = lt_ctx.load("memory", data=data) use_cpu = bae.get_use_cpu() use_cuda = bae.get_use_cuda() backend = bae.get_device_class() with mock.patch.dict(os.environ, {'LIBERTEM_USE_CPU': "42"}): # This should set the same environment variable as the mock above # so that it will be unset after the "with" bae.set_use_cpu(42) res = lt_ctx.run_udf(udf=DebugDeviceUDF(), dataset=ds) for val in res['device_id'].data[0].values(): assert val['cpu'] == 42 assert val['cuda'] is None # We make sure that the mocking was successful, i.e. # restored the previous state assert use_cpu == bae.get_use_cpu() assert use_cuda == bae.get_use_cuda() assert backend == bae.get_device_class() assert np.all(res['device_class'].data == 'cpu') assert np.allclose(res['on_device'].data, data.sum(axis=(0, 1)))
def test_run_default(lt_ctx, mock_cupy): data = _mk_random(size=(16, 16, 16, 16)) ds = lt_ctx.load("memory", data=data) res = lt_ctx.run_udf(udf=DebugDeviceUDF(), dataset=ds) # Make sure a single string works, common mistype and # we can guess what it is supposed to mean _ = lt_ctx.run_udf(udf=DebugDeviceUDF(backends='numpy'), dataset=ds) for val in res['device_id'].data[0].values(): # Inline executor uses CPU 0 by default assert val['cpu'] == 0 assert val['cuda'] is None # Default to running on CPU assert np.all(res['device_class'].data == 'cpu') assert np.allclose(res['on_device'].data, data.sum(axis=(0, 1)))
def test_start_local_default(hdf5_ds_1, local_cluster_ctx): mask = _mk_random(size=(16, 16)) d = detect() cudas = d['cudas'] with hdf5_ds_1.get_reader().get_h5ds() as h5ds: data = h5ds[:] expected = _naive_mask_apply([mask], data) ctx = local_cluster_ctx analysis = ctx.create_mask_analysis(dataset=hdf5_ds_1, factories=[lambda: mask]) num_cores_ds = ctx.load('memory', data=np.zeros((2, 3, 4, 5))) workers = ctx.executor.get_available_workers() cpu_count = len(workers.has_cpu()) gpu_count = len(workers.has_cuda()) assert num_cores_ds._cores == max(cpu_count, gpu_count) # Based on ApplyMasksUDF, which is CuPy-enabled hybrid = ctx.run(analysis) _ = ctx.run_udf(udf=DebugDeviceUDF(), dataset=hdf5_ds_1) _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy', 'numpy')), dataset=hdf5_ds_1) _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('cuda', 'numpy')), dataset=hdf5_ds_1) _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy', 'cuda', 'numpy')), dataset=hdf5_ds_1) if cudas: cuda_only = ctx.run_udf(udf=DebugDeviceUDF(backends=('cuda', 'numpy')), dataset=hdf5_ds_1, backends=('cuda', )) if d['has_cupy']: cupy_only = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy', 'numpy')), dataset=hdf5_ds_1, backends=('cupy', )) else: with pytest.raises(RuntimeError): cupy_only = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy', 'numpy')), dataset=hdf5_ds_1, backends=('cupy', )) cupy_only = None numpy_only = ctx.run_udf(udf=DebugDeviceUDF(backends=('numpy', )), dataset=hdf5_ds_1) assert np.allclose(hybrid.mask_0.raw_data, expected) if cudas: assert np.all(cuda_only['device_class'].data == 'cuda') if cupy_only is not None: assert np.all(cupy_only['device_class'].data == 'cuda') assert np.all(numpy_only['device_class'].data == 'cpu')
def test_start_local_cpuonly(hdf5_ds_1): # We don't use all since that might be too many cpus = (0, 1) hdf5_ds_1.set_num_cores(len(cpus)) mask = _mk_random(size=(16, 16)) with hdf5_ds_1.get_reader().get_h5ds() as h5ds: data = h5ds[:] expected = _naive_mask_apply([mask], data) spec = cluster_spec(cpus=cpus, cudas=(), has_cupy=False) with DaskJobExecutor.make_local(spec=spec) as executor: ctx = api.Context(executor=executor) analysis = ctx.create_mask_analysis( dataset=hdf5_ds_1, factories=[lambda: mask] ) results = ctx.run(analysis) udf_res = ctx.run_udf(udf=DebugDeviceUDF(), dataset=hdf5_ds_1) # No CuPy resources with pytest.raises(RuntimeError): _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy',)), dataset=hdf5_ds_1) assert np.allclose( results.mask_0.raw_data, expected ) found = {} for val in udf_res['device_id'].data[0].values(): print(val) # no CUDA assert val["cuda"] is None found[val["cpu"]] = True for val in udf_res['backend'].data[0].values(): print(val) # no CUDA assert 'numpy' in val # Each CPU got work. We have to see if this # actually works always since this depends on the scheduler behavior assert set(found.keys()) == set(cpus) assert np.all(udf_res['device_class'].data == 'cpu') assert np.allclose(udf_res['on_device'].data, data.sum(axis=(0, 1)))
def test_use_plain_dask(hdf5_ds_1): # We deactivate the resource scheduling and run on a plain dask cluster hdf5_ds_1.set_num_cores(2) mask = _mk_random(size=(16, 16)) with hdf5_ds_1.get_reader().get_h5ds() as h5ds: data = h5ds[:] expected = _naive_mask_apply([mask], data) with dd.LocalCluster(n_workers=2, threads_per_worker=1) as cluster: client = dd.Client(cluster, set_as_default=False) try: executor = DaskJobExecutor(client=client) ctx = api.Context(executor=executor) analysis = ctx.create_mask_analysis( dataset=hdf5_ds_1, factories=[lambda: mask] ) results = ctx.run(analysis) udf_res = ctx.run_udf(udf=DebugDeviceUDF(), dataset=hdf5_ds_1) # Requesting CuPy, which is not available with pytest.raises(RuntimeError): _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy',)), dataset=hdf5_ds_1) finally: # to fix "distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client" # NOQA client.close() assert np.allclose( results.mask_0.raw_data, expected ) for val in udf_res['device_id'].data[0].values(): print(val) # no CUDA assert val["cuda"] is None # Default without worker setup assert val["cpu"] == 0 for val in udf_res['backend'].data[0].values(): print(val) # no CUDA assert 'numpy' in val assert np.all(udf_res['device_class'].data == 'cpu') assert np.allclose(udf_res['on_device'].data, data.sum(axis=(0, 1)))
def test_start_local_cudaonly(hdf5_ds_1): cudas = detect()['cudas'] # Make sure we have enough partitions hdf5_ds_1.set_num_cores(len(cudas)) with hdf5_ds_1.get_reader().get_h5ds() as h5ds: data = h5ds[:] spec = cluster_spec(cpus=(), cudas=cudas, has_cupy=False) with DaskJobExecutor.make_local(spec=spec) as executor: ctx = api.Context(executor=executor) udf_res = ctx.run_udf(udf=DebugDeviceUDF(backends=('cuda', )), dataset=hdf5_ds_1) # No CPU compute resources with pytest.raises(RuntimeError): _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('numpy', )), dataset=hdf5_ds_1) # No ndarray (CuPy) resources with pytest.raises(RuntimeError): _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy', )), dataset=hdf5_ds_1) found = {} for val in udf_res['device_id'].data[0].values(): print(val) # no CPU assert val["cpu"] is None # Register which GPUs got work found[val["cuda"]] = True for val in udf_res['backend'].data[0].values(): print(val) # CUDA, but no CuPy, i.e. use NumPy assert 'numpy' in val # Test if each GPU got work. We have to see if this # actually works always since this depends on the scheduler behavior assert set(found.keys()) == set(cudas) assert np.all(udf_res['device_class'].data == 'cuda') assert np.allclose(udf_res['on_device'].data, data.sum(axis=(0, 1)))
def test_start_local_default(hdf5_ds_1): mask = _mk_random(size=(16, 16)) d = detect() cudas = d['cudas'] with hdf5_ds_1.get_reader().get_h5ds() as h5ds: data = h5ds[:] expected = _naive_mask_apply([mask], data) with api.Context() as ctx: analysis = ctx.create_mask_analysis(dataset=hdf5_ds_1, factories=[lambda: mask]) # Based on ApplyMasksUDF, which is CuPy-enabled hybrid = ctx.run(analysis) _ = ctx.run_udf(udf=DebugDeviceUDF(), dataset=hdf5_ds_1) _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy', 'numpy')), dataset=hdf5_ds_1) _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('cuda', 'numpy')), dataset=hdf5_ds_1) _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy', 'cuda', 'numpy')), dataset=hdf5_ds_1) if cudas: cuda_only = ctx.run_udf(udf=DebugDeviceUDF(backends=('cuda', 'numpy')), dataset=hdf5_ds_1, backends=('cuda', )) if d['has_cupy']: cupy_only = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy', 'numpy')), dataset=hdf5_ds_1, backends=('cupy', )) else: with pytest.raises(RuntimeError): cupy_only = ctx.run_udf( udf=DebugDeviceUDF(backends=('cupy', 'numpy')), dataset=hdf5_ds_1, backends=('cupy', )) cupy_only = None numpy_only = ctx.run_udf(udf=DebugDeviceUDF(backends=('numpy', )), dataset=hdf5_ds_1) assert np.allclose(hybrid.mask_0.raw_data, expected) if cudas: assert np.all(cuda_only['device_class'].data == 'cuda') if cupy_only is not None: assert np.all(cupy_only['device_class'].data == 'cuda') assert np.all(numpy_only['device_class'].data == 'cpu')