async def put(self): # TODO: extract json request data stuff into mixin? request_data = tornado.escape.json_decode(self.request.body) connection = request_data['connection'] if connection["type"].lower() == "tcp": dask_client = await AioClient(address=connection['address']) executor = DaskJobExecutor(client=dask_client, is_local=connection['isLocal']) elif connection["type"].lower() == "local": # NOTE: we can't use DaskJobExecutor.make_local as it doesn't use AioClient # which then conflicts with LocalCluster(asynchronous=True) # error message: "RuntimeError: Non-thread-safe operation invoked on an event loop # other than the current one" # related: debugging via env var PYTHONASYNCIODEBUG=1 cluster_kwargs = { "threads_per_worker": 1, "asynchronous": True, } if "numWorkers" in connection: cluster_kwargs.update({"n_workers": connection["numWorkers"]}) cluster = dd.LocalCluster(**cluster_kwargs) dask_client = await AioClient(address=cluster) executor = DaskJobExecutor(client=dask_client, is_local=True) self.data.set_executor(executor, request_data) msg = Message(self.data).initial_state( jobs=self.data.serialize_jobs(), datasets=self.data.serialize_datasets(), ) log_message(msg) self.event_registry.broadcast_event(msg) self.write({ "status": "ok", "connection": connection, })
def test_multiple_clients(local_cluster_url, default_raw): ex1 = DaskJobExecutor.connect(local_cluster_url) # this creates a second Client, and even though we are setting `set_as_default=False`, # this Client is then used by functions like `dd.as_completed`. That is because # `set_as_default` only sets the dask scheduler config to "dask.distributed", it does # not affect setting the _client_ as the global default `Client`! # so any time `as_completed` is called, the `loop` needs to be set correctly, otherwise # this may result in strange hangs and crashes DaskJobExecutor.connect(local_cluster_url) udf = SumUDF() cx1 = Context(executor=ex1) cx1.run_udf(dataset=default_raw, udf=udf)
def test_start_local_cupyonly(hdf5_ds_1): cudas = detect()['cudas'] # Make sure we have enough partitions hdf5_ds_1.set_num_cores(len(cudas)) mask = _mk_random(size=(16, 16)) with hdf5_ds_1.get_reader().get_h5ds() as h5ds: data = h5ds[:] expected = _naive_mask_apply([mask], data) spec = cluster_spec(cpus=(), cudas=cudas, has_cupy=True) with DaskJobExecutor.make_local(spec=spec) as executor: ctx = api.Context(executor=executor) # Uses ApplyMasksUDF, which supports CuPy analysis = ctx.create_mask_analysis( dataset=hdf5_ds_1, factories=[lambda: mask] ) results = ctx.run(analysis) udf_res = ctx.run_udf(udf=DebugDeviceUDF(), dataset=hdf5_ds_1) # No CPU compute resources with pytest.raises(RuntimeError): _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('numpy',)), dataset=hdf5_ds_1) cuda_res = ctx.run_udf(udf=DebugDeviceUDF(backends=('cuda',)), dataset=hdf5_ds_1) assert np.allclose( results.mask_0.raw_data, expected ) found = {} for val in udf_res['device_id'].data[0].values(): print(val) # no CPU assert val["cpu"] is None # Register which GPUs got work found[val["cuda"]] = True for val in cuda_res['device_id'].data[0].values(): print(val) # no CPU assert val["cpu"] is None # Register which GPUs got work found[val["cuda"]] = True for val in udf_res['backend'].data[0].values(): # use CuPy print(val) assert 'cupy' in val for val in cuda_res['backend'].data[0].values(): # no CuPy, i.e. NumPy print(val) assert 'numpy' in val # Test if each GPU got work. We have to see if this # actually works always since this depends on the scheduler behavior assert set(found.keys()) == set(cudas) assert np.all(udf_res['device_class'].data == 'cuda') assert np.allclose(udf_res['on_device'].data, data.sum(axis=(0, 1)))
def _create_local_executor(self): cores = psutil.cpu_count(logical=False) if cores is None: cores = 2 return DaskJobExecutor.make_local( cluster_kwargs={"threads_per_worker": 1, "n_workers": cores} )
def ipy_ctx(): import ipyparallel client = ipyparallel.Client() # wait for two engines: see also docker-compose.yml where the engines are started client.wait_for_engines(2) dask_client = client.become_dask() executor = DaskJobExecutor(client=dask_client, is_local=False) with lt.Context(executor=executor) as ctx: yield ctx
def get_libertem_executor(self): cores = psutil.cpu_count(logical=False) if cores is None: cores = 2 executor = DaskJobExecutor.make_local(cluster_kwargs={ "threads_per_worker": 1, "n_workers": cores }) return AsyncAdapter(wrapped=executor)
def dist_ctx(scheduler_addr): """ This Context needs to have an external dask cluster running, with the following assumptions: - two workers: hostnames worker-1 and worker-2 - one scheduler node - data availability TBD """ executor = DaskJobExecutor.connect(scheduler_addr) with lt.Context(executor=executor) as ctx: yield ctx
def ipy_ctx(): import ipyparallel client = ipyparallel.Client() retries = 10 while retries > 0: retries -= 1 if len(client.ids) > 0: break time.sleep(1) dask_client = client.become_dask() executor = DaskJobExecutor(client=dask_client, is_local=False) with lt.Context(executor=executor) as ctx: yield ctx
def dist_ctx(): """ This Context needs to have an external dask cluster running, with the following assumptions: - two workers: hostnames worker-1 and worker-2 - one scheduler node - data availability TBD - the address of the dask scheduler is passed in as DASK_SCHEDULER_ADDRESS """ scheduler_addr = os.environ['DASK_SCHEDULER_ADDRESS'] executor = DaskJobExecutor.connect(scheduler_addr) with lt.Context(executor=executor) as ctx: yield ctx
def test_connect_default(local_cluster_url): try: executor = DaskJobExecutor.connect( local_cluster_url, client_kwargs={'set_as_default': True}) ctx = Context(executor=executor) # This queries Dask which scheduler it is using ctx2 = Context.make_with("dask-integration") # make sure the second uses the Client of the first assert ctx2.executor.client is ctx.executor.client finally: # Only close the Client, keep the cluster running # since that is test infrastructure executor.client.close() ctx.close()
def test_start_local_cpuonly(hdf5_ds_1): # We don't use all since that might be too many cpus = (0, 1) hdf5_ds_1.set_num_cores(len(cpus)) mask = _mk_random(size=(16, 16)) with hdf5_ds_1.get_reader().get_h5ds() as h5ds: data = h5ds[:] expected = _naive_mask_apply([mask], data) spec = cluster_spec(cpus=cpus, cudas=(), has_cupy=False) with DaskJobExecutor.make_local(spec=spec) as executor: ctx = api.Context(executor=executor) analysis = ctx.create_mask_analysis( dataset=hdf5_ds_1, factories=[lambda: mask] ) results = ctx.run(analysis) udf_res = ctx.run_udf(udf=DebugDeviceUDF(), dataset=hdf5_ds_1) # No CuPy resources with pytest.raises(RuntimeError): _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy',)), dataset=hdf5_ds_1) assert np.allclose( results.mask_0.raw_data, expected ) found = {} for val in udf_res['device_id'].data[0].values(): print(val) # no CUDA assert val["cuda"] is None found[val["cpu"]] = True for val in udf_res['backend'].data[0].values(): print(val) # no CUDA assert 'numpy' in val # Each CPU got work. We have to see if this # actually works always since this depends on the scheduler behavior assert set(found.keys()) == set(cpus) assert np.all(udf_res['device_class'].data == 'cpu') assert np.allclose(udf_res['on_device'].data, data.sum(axis=(0, 1)))
def test_preload(hdf5_ds_1): # We don't use all since that might be too many cpus = (0, 1) hdf5_ds_1.set_num_cores(len(cpus)) class CheckEnvUDF(NoOpUDF): def process_tile(self, tile): assert os.environ['LT_TEST_1'] == 'hello' assert os.environ['LT_TEST_2'] == 'world' preloads = ( "import os; os.environ['LT_TEST_1'] = 'hello'", "import os; os.environ['LT_TEST_2'] = 'world'", ) spec = cluster_spec(cpus=cpus, cudas=(), has_cupy=False, preload=preloads) with DaskJobExecutor.make_local(spec=spec) as executor: ctx = api.Context(executor=executor) ctx.run_udf(udf=CheckEnvUDF(), dataset=hdf5_ds_1)
def test_use_plain_dask(hdf5_ds_1): # We deactivate the resource scheduling and run on a plain dask cluster hdf5_ds_1.set_num_cores(2) mask = _mk_random(size=(16, 16)) with hdf5_ds_1.get_reader().get_h5ds() as h5ds: data = h5ds[:] expected = _naive_mask_apply([mask], data) with dd.LocalCluster(n_workers=2, threads_per_worker=1) as cluster: client = dd.Client(cluster, set_as_default=False) try: executor = DaskJobExecutor(client=client) ctx = api.Context(executor=executor) analysis = ctx.create_mask_analysis( dataset=hdf5_ds_1, factories=[lambda: mask] ) results = ctx.run(analysis) udf_res = ctx.run_udf(udf=DebugDeviceUDF(), dataset=hdf5_ds_1) # Requesting CuPy, which is not available with pytest.raises(RuntimeError): _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy',)), dataset=hdf5_ds_1) finally: # to fix "distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client" # NOQA client.close() assert np.allclose( results.mask_0.raw_data, expected ) for val in udf_res['device_id'].data[0].values(): print(val) # no CUDA assert val["cuda"] is None # Default without worker setup assert val["cpu"] == 0 for val in udf_res['backend'].data[0].values(): print(val) # no CUDA assert 'numpy' in val assert np.all(udf_res['device_class'].data == 'cpu') assert np.allclose(udf_res['on_device'].data, data.sum(axis=(0, 1)))
def main(): with DaskJobExecutor.connect('tcp://localhost:8786') as executor: ctx = api.Context(executor=executor) ds = ctx.load("RAW", path=r"C:\Users\Dieter\testfile-32-32-32-32-float32.raw", nav_shape=(32, 32), sig_shape=(32, 32), dtype=np.float32) sum_analysis = ctx.create_sum_analysis(dataset=ds) sum_result = ctx.run(sum_analysis) sum_image = DM.CreateImage(sum_result.intensity.raw_data.copy()) sum_image.ShowImage() haadf_analysis = ctx.create_ring_analysis(dataset=ds) haadf_result = ctx.run(haadf_analysis) haadf_image = DM.CreateImage(haadf_result.intensity.raw_data.copy()) haadf_image.ShowImage()
def test_start_local_cudaonly(hdf5_ds_1): cudas = detect()['cudas'] # Make sure we have enough partitions hdf5_ds_1.set_num_cores(len(cudas)) with hdf5_ds_1.get_reader().get_h5ds() as h5ds: data = h5ds[:] spec = cluster_spec(cpus=(), cudas=cudas, has_cupy=False) with DaskJobExecutor.make_local(spec=spec) as executor: ctx = api.Context(executor=executor) udf_res = ctx.run_udf(udf=DebugDeviceUDF(backends=('cuda', )), dataset=hdf5_ds_1) # No CPU compute resources with pytest.raises(RuntimeError): _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('numpy', )), dataset=hdf5_ds_1) # No ndarray (CuPy) resources with pytest.raises(RuntimeError): _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy', )), dataset=hdf5_ds_1) found = {} for val in udf_res['device_id'].data[0].values(): print(val) # no CPU assert val["cpu"] is None # Register which GPUs got work found[val["cuda"]] = True for val in udf_res['backend'].data[0].values(): print(val) # CUDA, but no CuPy, i.e. use NumPy assert 'numpy' in val # Test if each GPU got work. We have to see if this # actually works always since this depends on the scheduler behavior assert set(found.keys()) == set(cudas) assert np.all(udf_res['device_class'].data == 'cuda') assert np.allclose(udf_res['on_device'].data, data.sum(axis=(0, 1)))
async def get_preview_image(self, dataset_uuid): ds = self.data.get_dataset(dataset_uuid) job = SumFramesJob(dataset=ds) dask_client = await AioClient("tcp://localhost:8786") executor = DaskJobExecutor(client=dask_client, is_local=True) futures = [] for task in job.get_tasks(): submit_kwargs = {} futures.append(executor.client.submit(task, **submit_kwargs)) full_result = np.zeros(shape=ds.shape[2:]) async for future, result in dd.as_completed(futures, with_results=True): for tile in result: tile.copy_to_result(full_result) image = _encode_image( full_result, colormap=cm.gist_earth, save_kwargs={'format': 'png'}, ) return image.read()
async def put(self, uuid): request_data = tornado.escape.json_decode(self.request.body) params = request_data['job'] ds = self.data.get_dataset(params['dataset']) mask_factories = self.make_mask_factories(params['masks'], frame_size=ds.shape[2:]) job = ApplyMasksJob(dataset=ds, mask_factories=mask_factories) self.data.register_job(uuid=uuid, job=job) dask_client = await AioClient("tcp://localhost:8786") executor = DaskJobExecutor(client=dask_client, is_local=True) futures = [] for task in job.get_tasks(): submit_kwargs = {} futures.append(executor.client.submit(task, **submit_kwargs)) self.write(Message(self.data).start_job(job_id=uuid)) self.finish() self.event_registry.broadcast_event( Message(self.data).start_job(job_id=uuid, )) full_result = np.zeros(shape=(len(mask_factories), ) + tuple(ds.shape[:2])) async for future, result in dd.as_completed(futures, with_results=True): # TODO: # + only send PNG of area that has changed (bounding box of all result tiles!) # + normalize each channel (per channel: keep running min/max, map data to [0, 1]) # + if min/max changes, send whole channel (all results up to this point re-normalized) # + maybe saturate up to some point (20% over current max => keep current max) and send # whole result image once finished # + maybe use visualization framework in-browser (example: GR) # TODO: update task_result message: # + send bbox for blitting for tile in result: tile.copy_to_result(full_result) images = await self.result_images( full_result, save_kwargs={ 'format': 'jpeg', 'quality': 65 }, ) # NOTE: make sure the following broadcast_event messages are sent atomically! # (that is: keep the code below synchronous, and only send the messages # once the images have finished encoding, and then send all at once) self.event_registry.broadcast_event( Message(self.data).task_result( job_id=uuid, num_images=len(images), )) for image in images: raw_bytes = image.read() self.event_registry.broadcast_event(raw_bytes, binary=True) images = await self.result_images( full_result, save_kwargs={'format': 'png'}, ) self.event_registry.broadcast_event( Message(self.data).finish_job( job_id=uuid, num_images=len(images), )) for image in images: raw_bytes = image.read() self.event_registry.broadcast_event(raw_bytes, binary=True)
def dask_executor(): sync_executor = DaskJobExecutor.make_local() yield sync_executor sync_executor.close()
async def aexecutor(): sync_executor = DaskJobExecutor.make_local() executor = AsyncAdapter(wrapped=sync_executor) yield executor await executor.close()
def dask_executor(local_cluster_url): executor = DaskJobExecutor.connect(local_cluster_url) yield executor executor.close()
def make_with(cls, executor_spec: ExecutorSpecType, *args, **kwargs) -> 'Context': ''' Create a Context with a specific kind of executor. .. versionadded:: 0.9.0 This simplifies creating a :class:`Context` for a number of common executor choices. See :ref:`executors` for general information on executors. Parameters ---------- executor_spec: A string identifier for executor variants: "synchronous", "inline": Use a single-process, single-threaded :class:`~libertem.executor.inline.InlineJobExecutor` "threads": Use a multi-threaded :class:`~libertem.executor.concurrent.ConcurrentJobExecutor` "dask-integration": Use a JobExecutor that is compatible with the currently active Dask scheduler. See :func:`~libertem.executor.integration.get_dask_integration_executor` for more information. "dask-make-default": Create a local :code:`dask.distributed` cluster and client using :meth:`~libertem.executor.dask.DaskJobExecutor.make_local`, similar to the default behaviour of :code:`Context()` called with no arguments. However, the Client will be set as the default Dask scheduler and will persist after the LiberTEM Context closes, which is suitable for downstream computation using :code:`dask.distributed`. "delayed": Create a :class:`~libertem.executor.delayed.DelayedJobExecutor` which performs computation using `dask.delayed <https://docs.dask.org/en/stable/delayed.html>`_. This functionality is highly experimental at this time, see :ref:`delayed_udfs` for more information. *args, **kwargs Passed to :class:`Context`. Returns ------- Instance of :class:`Context` using a new instance of the specified executor. ''' if executor_spec in ('synchronous', 'inline'): executor = InlineJobExecutor() elif executor_spec == 'threads': executor = ConcurrentJobExecutor.make_local() elif executor_spec == 'dask-integration': executor = get_dask_integration_executor() elif executor_spec == 'dask-make-default': executor = DaskJobExecutor.make_local( client_kwargs={"set_as_default": True}) elif executor_spec == 'delayed': executor = DelayedJobExecutor() else: raise ValueError( f'Argument `executor_spec` is {executor_spec}. Allowed are ' f'synchronous", "inline", "threads", "dask-integration" or "dask-make-default".' ) return cls(executor=executor, *args, **kwargs)
def _create_local_executor(self): return DaskJobExecutor.make_local()
def get_libertem_executor(self): executor = DaskJobExecutor.make_local() return AsyncAdapter(wrapped=executor)