async def pick_frame(self, dataset_uuid, x, y): ds = self.data.get_dataset(dataset_uuid) x = int(x) y = int(y) slice_ = Slice(origin=(y, x, 0, 0), shape=(1, 1, ds.shape[2], ds.shape[3])) job = PickFrameJob(dataset=ds, slice_=slice_) executor = self.data.get_executor() log.info("picking %d/%d from %s", x, y, dataset_uuid) futures = [] for task in job.get_tasks(): submit_kwargs = {} futures.append(executor.client.submit(task, **submit_kwargs)) full_result = np.zeros(shape=ds.shape[2:]) async for future, result in dd.as_completed(futures, with_results=True): for tile in result: tile.copy_to_result(full_result) log.info("picking done, encoding image (dtype=%s)", full_result.dtype) image = await run_blocking( _encode_image, full_result, colormap=cm.gist_earth, save_kwargs={'format': 'png'}, ) log.info("image encoded, sending response") return image.read()
async def test_run_job(async_executor): data = _mk_random(size=(16, 16, 16, 16), dtype='<u2') dataset = MemoryDataSet(data=data, tileshape=(1, 16, 16), num_partitions=2) expected = data[0, 0] slice_ = Slice(origin=(0, 0, 0), shape=Shape((1, 16, 16), sig_dims=2)) job = PickFrameJob(dataset=dataset, slice_=slice_) out = job.get_result_buffer() async for tiles in async_executor.run_job(job, cancel_id="42"): for tile in tiles: tile.reduce_into_result(out) assert out.shape == (1, 16, 16) assert np.allclose(out, expected)
async def test_fd_limit(async_executor): import resource import psutil # set soft limit, throws errors but allows to raise it # again afterwards: proc = psutil.Process() oldlimit = resource.getrlimit(resource.RLIMIT_NOFILE) resource.setrlimit(resource.RLIMIT_NOFILE, (proc.num_fds() + 24, oldlimit[1])) print("fds", proc.num_fds()) try: data = _mk_random(size=(1, 16, 16), dtype='<u2') dataset = MemoryDataSet(data=data, tileshape=(1, 16, 16), num_partitions=1) slice_ = Slice(origin=(0, 0, 0), shape=Shape((1, 16, 16), sig_dims=2)) job = PickFrameJob(dataset=dataset, slice_=slice_) for i in range(32): print(i) print(proc.num_fds()) async for tiles in async_executor.run_job(job, cancel_id="42"): pass finally: resource.setrlimit(resource.RLIMIT_NOFILE, oldlimit)
def test_get_multiple_frame_row(): data = _mk_random(size=(16, 16, 16, 16)) dataset = MemoryDataSet(data=data, tileshape=(1, 1, 16, 16), partition_shape=(16, 16, 16, 16), sig_dims=2) job = PickFrameJob(dataset=dataset, slice_=Slice(origin=(5, 0, 0, 0), shape=Shape((1, 16, 16, 16), sig_dims=2))) executor = InlineJobExecutor() result = np.zeros(job.get_result_shape()) for tiles in executor.run_job(job): for tile in tiles: tile.reduce_into_result(result) assert result.shape == (1, 16, 16, 16) assert not np.allclose(result[0, 0], result[0, 1]) assert np.allclose(result[0, 0:16], data[5, 0:16])
def get_job(self): origin = self.get_origin() origin = (np.ravel_multi_index(origin, self.dataset.shape.nav), ) shape = self.dataset.shape origin = origin + tuple([0] * self.dataset.shape.sig.dims) return PickFrameJob( dataset=self.dataset, slice_=Slice( origin=origin, shape=Shape((1, ) + tuple(shape.sig), sig_dims=shape.sig.dims), ), squeeze=True, )
def get_job(self): assert self.dataset.shape.nav.dims in ( 1, 2, 3), "can only handle 1D/2D/3D nav currently" x, y, z = ( self.parameters.get('x'), self.parameters.get('y'), self.parameters.get('z'), ) if self.dataset.shape.nav.dims == 1: if x is None: raise ValueError("need x to index 1D nav datasets") if y is not None or z is not None: raise ValueError( "y and z must not be specified for 1D nav dataset") origin = (x, ) elif self.dataset.shape.nav.dims == 2: if x is None or y is None: raise ValueError("need x/y to index 2D nav datasets") if z is not None: raise ValueError("z must not be specified for 2D nav dataset") origin = (y, x) elif self.dataset.shape.nav.dims == 3: if x is None or y is None or z is None: raise ValueError("need x/y/z to index 3D nav datasets") origin = (z, y, x) else: raise ValueError( "cannot operate on datasets with more than 3 nav dims") raw_shape = self.dataset.raw_shape if raw_shape.nav.dims != len(origin): if raw_shape.nav.dims != 1: raise ValueError("can only reduce origin from N dims to 1D") origin = (np.ravel_multi_index(origin, self.dataset.shape.nav), ) shape = self.dataset.shape origin = origin + tuple([0] * self.dataset.shape.sig.dims) return PickFrameJob( dataset=self.dataset, slice_=Slice( origin=origin, shape=Shape(tuple([1] * raw_shape.nav.dims) + tuple(shape.sig), sig_dims=shape.sig.dims), ), squeeze=True, )
def test_get_multiple_frames(lt_ctx): data = _mk_random(size=(16, 16, 16, 16)) dataset = MemoryDataSet(data=data, tileshape=(1, 16, 16), num_partitions=2, sig_dims=2) job = PickFrameJob(dataset=dataset, slice_=Slice(origin=(0, 0, 0), shape=Shape((2, 16, 16), sig_dims=2))) result = lt_ctx.run(job) print(result[0, 0].astype("uint32")) print(data[0, 0]) print(result[0, 1].astype("uint32")) print(data[0, 1]) assert result.shape == (2, 16, 16) assert not np.allclose(result[0, 0], result[0, 1]) assert np.allclose(result[0], data[0, 0])
async def test_fd_limit(aexecutor): import resource # set soft limit, throws errors but allows to raise it # again afterwards: oldlimit = resource.getrlimit(resource.RLIMIT_NOFILE) resource.setrlimit(resource.RLIMIT_NOFILE, (100, oldlimit[1])) try: data = _mk_random(size=(1, 1, 16, 16), dtype='<u2') dataset = MemoryDataSet(data=data, tileshape=(1, 1, 16, 16), partition_shape=(1, 1, 16, 16)) slice_ = Slice(origin=(0, 0, 0, 0), shape=Shape((1, 1, 16, 16), sig_dims=2)) job = PickFrameJob(dataset=dataset, slice_=slice_) for i in range(32): print(i) async for tiles in aexecutor.run_job(job): pass finally: resource.setrlimit(resource.RLIMIT_NOFILE, oldlimit)
def create_pick_job(self, dataset: DataSet, origin: Tuple[int], shape: Tuple[int] = None) -> PickFrameJob: """ Create a job that picks raw data from `origin` with the size defined in `shape`. Note ---- If you just want to read single frames, it is easier to use :meth:`create_pick_analysis`. Note ---- It is not efficient to use this method on large parts of datasets, please consider implementing a UDF instead. .. deprecated:: 0.4.0 Use :meth:`libertem.api.Context.create_pick_analysis`, :class:`libertem.udf.raw.PickUDF`, :class:`libertem.udf.masks.ApplyMasksUDF` or a custom UDF (:ref:`user-defined functions`) as a replacement. See also :ref:`job deprecation`. Parameters ---------- dataset The dataset to work on origin Where to start reading. You can either specify all dimensions, or only nav dimensions, in which case the signal is read starting from (0, ..., 0). shape The shape of the data to read. If None, read a "frame" or single signal element Returns ------- PickFrameJob : libertem.job.base.Job A job that returns the specified raw data as :class:`numpy.ndarray` Examples -------- >>> dataset = ctx.load( ... filetype="memory", ... data=np.zeros([16, 16, 16, 16, 16], dtype=np.float32), ... sig_dims=2 ... ) >>> origin = (7, 8, 9) >>> job = ctx.create_pick_job(dataset=dataset, origin=origin) >>> result = ctx.run(job) >>> assert result.shape == tuple(dataset.shape.sig) """ warnings.warn( "The Job API is deprecated and will be removed after version 0.6.0. " "Use Context.create_pick_analysis, libertem.udf.raw.PickUDF, " "libertem.udf.masks.ApplyMasksUDF or a custom UDF as a replacement. " "See " "https://libertem.github.io/LiberTEM/changelog.html#job-deprecation " "for details and a migration guide.", FutureWarning) # FIXME: this method works well if we can flatten to 3D # need vectorized I/O for general case if len(origin) == dataset.shape.nav.dims: origin = (np.ravel_multi_index(origin, dataset.shape.nav),)\ + tuple([0] * dataset.shape.sig.dims) elif len(origin) == dataset.shape.sig.dims + 1: pass # keep as-is elif len(origin) == 1: origin = origin + tuple([0] * dataset.shape.sig.dims) else: raise ValueError( "incompatible origin: can only read in flattened form") if shape is None: shape = (1, ) + tuple(dataset.shape.sig) else: if len(shape) != dataset.shape.flatten_nav().dims: raise ValueError( "incompatible: shape needs to match the dataset shape") shape = Shape(shape, sig_dims=dataset.shape.sig.dims).flatten_nav() slice_ = Slice(origin=origin, shape=Shape(shape, sig_dims=dataset.shape.sig.dims)) return PickFrameJob( dataset=dataset, slice_=slice_, squeeze=True, )
def test_crop_to(default_empad, lt_ctx): slice_ = Slice(shape=Shape((4, 64, 64), sig_dims=2), origin=(0, 64, 64)) job = PickFrameJob(dataset=default_empad, slice_=slice_) res = lt_ctx.run(job) assert res.shape == (4, 64, 64) assert np.count_nonzero(res) > 0
def test_crop_to(default_mib, lt_ctx): slice_ = Slice(shape=Shape((1024, 64, 64), sig_dims=2), origin=(0, 64, 64)) job = PickFrameJob(dataset=default_mib, slice_=slice_) res = lt_ctx.run(job) assert res.shape == (1024, 64, 64)
def create_pick_job(self, dataset, origin: Tuple[int], shape: Tuple[int] = None) -> np.ndarray: """ Pick raw data from `origin` with the size defined in `shape`. NOTE: if you just want to read single frames, it is easier to use `create_pick_analysis`. NOTE: It is not efficient to use this method on large parts of datasets, please consider implementing a UDF instead. Parameters ---------- dataset The dataset to work on origin Where to start reading. You can either specify all dimensions, or only nav dimensions, in which case the signal is read starting from (0, ..., 0). shape The shape of the data to read. If None, read a "frame" or single signal element Returns ------- :py:class:`numpy.ndarray` the raw data as numpy array Examples -------- >>> from libertem.api import Context >>> ctx = Context() >>> ds = ctx.load("...") >>> origin = (7, 8, 9) >>> job = create_pick_job(dataset=ds, origin=origin) >>> result = ctx.run(job) >>> assert result.shape == ds.shape.sig """ # FIXME: this method works well if we can flatten to 3D # need vectorized I/O for general case if len(origin) == dataset.shape.nav.dims: origin = (np.ravel_multi_index(origin, dataset.shape.nav),)\ + tuple([0] * dataset.shape.sig.dims) elif len(origin) == dataset.shape.sig.dims + 1: pass # keep as-is elif len(origin) == 1: origin = origin + tuple([0] * dataset.shape.sig.dims) else: raise ValueError( "incompatible origin: can only read in flattened form") if shape is None: shape = (1, ) + tuple(dataset.shape.sig) else: if len(shape) != dataset.shape.flatten_nav().dims: raise ValueError( "incompatible: shape needs to match the dataset shape") shape = Shape(shape, sig_dims=dataset.shape.sig.dims).flatten_nav() slice_ = Slice(origin=origin, shape=Shape(shape, sig_dims=dataset.shape.sig.dims)) return PickFrameJob( dataset=dataset, slice_=slice_, squeeze=True, )