def load(dataset, indices, vis, weights, flags, err): """Load data from lazy indexers into existing storage. This is optimised for the MVF v4 case where we can use dask directly to eliminate one copy, and also load vis, flags and weights in parallel. In older formats it causes an extra copy. Parameters ---------- dataset : :class:`katdal.DataSet` Input dataset, possibly with an existing selection indices : tuple Slice expression for subsetting the dataset vis, flags : array-like Outputs, which must have the correct shape and type """ t_min = indices[0].start t_max = indices[0].stop in_time_slices = [ slice(ts, min(ts + CHUNK_SIZE, t_max)) for ts in range(t_min, t_max, CHUNK_SIZE) ] for in_ts in in_time_slices: out_ts = slice(in_ts.start - t_min, in_ts.stop - t_min) out_vis = vis[out_ts] out_weights = weights[out_ts] out_flags = flags[out_ts] for i in range(NUM_RETRIES): try: if isinstance(dataset.vis, DaskLazyIndexer): DaskLazyIndexer.get( [dataset.vis, dataset.weights, dataset.flags], in_ts, out=[out_vis, out_weights, out_flags]) else: out_vis[:] = dataset.vis[in_ts] out_weights[:] = dataset.weights[in_ts] out_flags[:] = dataset.flags[in_ts] break except (StoreUnavailable, socket.timeout): msg = 'Timeout when reading dumps %d to %d. Try %d/%d....' % ( out_ts.start + 1, out_ts.stop, i + 1, NUM_RETRIES) OErr.PLog(err, OErr.Warn, msg) OErr.printErr(err) print(msg) # Flag the data and warn if we can't get it if i == NUM_RETRIES - 1: msg = 'Too many timeouts, flagging dumps %d to %d' % ( out_ts.start + 1, out_ts.stop) OErr.PLog(err, OErr.Warn, msg) OErr.printErr(err) print(msg) flags[out_ts] = True
def test_transforms(self): # Add transform at initialisation indexer = DaskLazyIndexer(self.data_dask, transforms=[lambda x: 0 * x]) np.testing.assert_array_equal(indexer[:], np.zeros_like(indexer)) # Add transform before first use of object indexer = DaskLazyIndexer(self.data_dask) indexer.add_transform(lambda x: 0 * x) np.testing.assert_array_equal(indexer[:], np.zeros_like(indexer)) # Add transform after first use of object indexer = DaskLazyIndexer(self.data_dask) indexer.dataset indexer.add_transform(lambda x: 0 * x) np.testing.assert_array_equal(indexer[:], np.zeros_like(indexer))
def load(dataset, indices, vis, weights, flags): """Load data from lazy indexers into existing storage. This is optimised for the MVF v4 case where we can use dask directly to eliminate one copy, and also load vis, flags and weights in parallel. In older formats it causes an extra copy. Parameters ---------- dataset : :class:`katdal.DataSet` Input dataset, possibly with an existing selection indices : tuple Index expression for subsetting the dataset vis, weights, flags : array-like Outputs, which must have the correct shape and type """ if isinstance(dataset.vis, DaskLazyIndexer): DaskLazyIndexer.get([dataset.vis, dataset.weights, dataset.flags], indices, out=[vis, weights, flags]) else: vis[:] = dataset.vis[indices] weights[:] = dataset.weights[indices] flags[:] = dataset.flags[indices]
def test_str_repr(self): def transform1(x): return x transform2 = lambda x: x # noqa: E731 class Transform3: # noqa: E306 def __call__(self, x): return x transform3 = Transform3() transform4 = partial(transform1) transforms = [transform1, transform2, transform3, transform4] indexer = DaskLazyIndexer(self.data_dask, transforms=transforms) expected = 'x | transform1 | <lambda> | Transform3 | transform1' expected += f' -> {indexer.shape} {indexer.dtype}' assert_equal(str(indexer), expected) # Simply exercise repr - no need to check result repr(indexer)
def _test_with(self, stage1=(), stage2=()): npy1 = numpy_oindex(self.data, stage1) npy2 = numpy_oindex(npy1, stage2) indexer = DaskLazyIndexer(self.data_dask, stage1) np.testing.assert_array_equal(indexer[stage2], npy2)
kwargs = {} if args.applycal is not None: kwargs['applycal'] = args.applycal f = katdal.open(args.filename, **kwargs) logging.info('File loaded, shape %s', f.shape) if args.channels: f.select(channels=np.s_[:args.channels]) if args.dumps: f.select(dumps=np.s_[:args.dumps]) # Trigger creation of the dask graphs, population of sensor cache for applycal etc _ = (f.vis[0, 0, 0], f.weights[0, 0, 0], f.flags[0, 0, 0]) logging.info('Selection complete') start = time.time() last_time = start for st in range(0, f.shape[0], args.time): et = st + args.time if args.joint: vis, weights, flags = DaskLazyIndexer.get([f.vis, f.weights, f.flags], np.s_[st:et]) else: vis = f.vis[st:et] weights = f.weights[st:et] flags = f.flags[st:et] current_time = time.time() elapsed = current_time - last_time last_time = current_time size = np.product(vis.shape) * 10 logging.info('Loaded %d dumps (%.3f MB/s)', vis.shape[0], size / elapsed / 1e6) size = np.product(f.shape) * 10 elapsed = time.time() - start logging.info('Loaded %d bytes in %.3f s (%.3f MB/s)', size, elapsed, size / elapsed / 1e6)
def test_stage1_multiple_boolean_indices(self): stage1 = tuple([True] * d for d in self.data.shape) indexer = DaskLazyIndexer(self.data_dask, stage1) np.testing.assert_array_equal(indexer[:], self.data)
def test_stage1_slices(self): stage1 = np.s_[5:, :, 1::2] indexer = DaskLazyIndexer(self.data_dask, stage1) np.testing.assert_array_equal(indexer[:], self.data[stage1])