Exemplo n.º 1
0
def load(dataset, indices, vis, weights, flags, err):
    """Load data from lazy indexers into existing storage.
    This is optimised for the MVF v4 case where we can use dask directly
    to eliminate one copy, and also load vis, flags and weights in parallel.
    In older formats it causes an extra copy.
    Parameters
    ----------
    dataset : :class:`katdal.DataSet`
        Input dataset, possibly with an existing selection
    indices : tuple
        Slice expression for subsetting the dataset
    vis, flags : array-like
        Outputs, which must have the correct shape and type
    """

    t_min = indices[0].start
    t_max = indices[0].stop
    in_time_slices = [
        slice(ts, min(ts + CHUNK_SIZE, t_max))
        for ts in range(t_min, t_max, CHUNK_SIZE)
    ]
    for in_ts in in_time_slices:
        out_ts = slice(in_ts.start - t_min, in_ts.stop - t_min)
        out_vis = vis[out_ts]
        out_weights = weights[out_ts]
        out_flags = flags[out_ts]
        for i in range(NUM_RETRIES):
            try:
                if isinstance(dataset.vis, DaskLazyIndexer):
                    DaskLazyIndexer.get(
                        [dataset.vis, dataset.weights, dataset.flags],
                        in_ts,
                        out=[out_vis, out_weights, out_flags])
                else:
                    out_vis[:] = dataset.vis[in_ts]
                    out_weights[:] = dataset.weights[in_ts]
                    out_flags[:] = dataset.flags[in_ts]
                break
            except (StoreUnavailable, socket.timeout):
                msg = 'Timeout when reading dumps %d to %d. Try %d/%d....' % (
                    out_ts.start + 1, out_ts.stop, i + 1, NUM_RETRIES)
                OErr.PLog(err, OErr.Warn, msg)
                OErr.printErr(err)
                print(msg)
        # Flag the data and warn if we can't get it
        if i == NUM_RETRIES - 1:
            msg = 'Too many timeouts, flagging dumps %d to %d' % (
                out_ts.start + 1, out_ts.stop)
            OErr.PLog(err, OErr.Warn, msg)
            OErr.printErr(err)
            print(msg)
            flags[out_ts] = True
Exemplo n.º 2
0
 def test_transforms(self):
     # Add transform at initialisation
     indexer = DaskLazyIndexer(self.data_dask, transforms=[lambda x: 0 * x])
     np.testing.assert_array_equal(indexer[:], np.zeros_like(indexer))
     # Add transform before first use of object
     indexer = DaskLazyIndexer(self.data_dask)
     indexer.add_transform(lambda x: 0 * x)
     np.testing.assert_array_equal(indexer[:], np.zeros_like(indexer))
     # Add transform after first use of object
     indexer = DaskLazyIndexer(self.data_dask)
     indexer.dataset
     indexer.add_transform(lambda x: 0 * x)
     np.testing.assert_array_equal(indexer[:], np.zeros_like(indexer))
Exemplo n.º 3
0
def load(dataset, indices, vis, weights, flags):
    """Load data from lazy indexers into existing storage.

    This is optimised for the MVF v4 case where we can use dask directly
    to eliminate one copy, and also load vis, flags and weights in parallel.
    In older formats it causes an extra copy.

    Parameters
    ----------
    dataset : :class:`katdal.DataSet`
        Input dataset, possibly with an existing selection
    indices : tuple
        Index expression for subsetting the dataset
    vis, weights, flags : array-like
        Outputs, which must have the correct shape and type
    """
    if isinstance(dataset.vis, DaskLazyIndexer):
        DaskLazyIndexer.get([dataset.vis, dataset.weights, dataset.flags],
                            indices,
                            out=[vis, weights, flags])
    else:
        vis[:] = dataset.vis[indices]
        weights[:] = dataset.weights[indices]
        flags[:] = dataset.flags[indices]
Exemplo n.º 4
0
 def test_str_repr(self):
     def transform1(x):
         return x
     transform2 = lambda x: x  # noqa: E731
     class Transform3:         # noqa: E306
         def __call__(self, x):
             return x
     transform3 = Transform3()
     transform4 = partial(transform1)
     transforms = [transform1, transform2, transform3, transform4]
     indexer = DaskLazyIndexer(self.data_dask, transforms=transforms)
     expected = 'x | transform1 | <lambda> | Transform3 | transform1'
     expected += f' -> {indexer.shape} {indexer.dtype}'
     assert_equal(str(indexer), expected)
     # Simply exercise repr - no need to check result
     repr(indexer)
Exemplo n.º 5
0
 def _test_with(self, stage1=(), stage2=()):
     npy1 = numpy_oindex(self.data, stage1)
     npy2 = numpy_oindex(npy1, stage2)
     indexer = DaskLazyIndexer(self.data_dask, stage1)
     np.testing.assert_array_equal(indexer[stage2], npy2)
Exemplo n.º 6
0
kwargs = {}
if args.applycal is not None:
    kwargs['applycal'] = args.applycal
f = katdal.open(args.filename, **kwargs)
logging.info('File loaded, shape %s', f.shape)
if args.channels:
    f.select(channels=np.s_[:args.channels])
if args.dumps:
    f.select(dumps=np.s_[:args.dumps])
# Trigger creation of the dask graphs, population of sensor cache for applycal etc
_ = (f.vis[0, 0, 0], f.weights[0, 0, 0], f.flags[0, 0, 0])
logging.info('Selection complete')
start = time.time()
last_time = start
for st in range(0, f.shape[0], args.time):
    et = st + args.time
    if args.joint:
        vis, weights, flags = DaskLazyIndexer.get([f.vis, f.weights, f.flags], np.s_[st:et])
    else:
        vis = f.vis[st:et]
        weights = f.weights[st:et]
        flags = f.flags[st:et]
    current_time = time.time()
    elapsed = current_time - last_time
    last_time = current_time
    size = np.product(vis.shape) * 10
    logging.info('Loaded %d dumps (%.3f MB/s)', vis.shape[0], size / elapsed / 1e6)
size = np.product(f.shape) * 10
elapsed = time.time() - start
logging.info('Loaded %d bytes in %.3f s (%.3f MB/s)', size, elapsed, size / elapsed / 1e6)
Exemplo n.º 7
0
 def test_stage1_multiple_boolean_indices(self):
     stage1 = tuple([True] * d for d in self.data.shape)
     indexer = DaskLazyIndexer(self.data_dask, stage1)
     np.testing.assert_array_equal(indexer[:], self.data)
Exemplo n.º 8
0
 def test_stage1_slices(self):
     stage1 = np.s_[5:, :, 1::2]
     indexer = DaskLazyIndexer(self.data_dask, stage1)
     np.testing.assert_array_equal(indexer[:], self.data[stage1])