Python DaskLazyIndexer示例，katdal.lazy_indexer.DaskLazyIndexer Python示例

示例#1

0

显示文件

文件： KATH5toAIPS.py 项目： mauch/KATObitPipe

def load(dataset, indices, vis, weights, flags, err):
    """Load data from lazy indexers into existing storage.
    This is optimised for the MVF v4 case where we can use dask directly
    to eliminate one copy, and also load vis, flags and weights in parallel.
    In older formats it causes an extra copy.
    Parameters
    ----------
    dataset : :class:`katdal.DataSet`
        Input dataset, possibly with an existing selection
    indices : tuple
        Slice expression for subsetting the dataset
    vis, flags : array-like
        Outputs, which must have the correct shape and type
    """

    t_min = indices[0].start
    t_max = indices[0].stop
    in_time_slices = [
        slice(ts, min(ts + CHUNK_SIZE, t_max))
        for ts in range(t_min, t_max, CHUNK_SIZE)
    ]
    for in_ts in in_time_slices:
        out_ts = slice(in_ts.start - t_min, in_ts.stop - t_min)
        out_vis = vis[out_ts]
        out_weights = weights[out_ts]
        out_flags = flags[out_ts]
        for i in range(NUM_RETRIES):
            try:
                if isinstance(dataset.vis, DaskLazyIndexer):
                    DaskLazyIndexer.get(
                        [dataset.vis, dataset.weights, dataset.flags],
                        in_ts,
                        out=[out_vis, out_weights, out_flags])
                else:
                    out_vis[:] = dataset.vis[in_ts]
                    out_weights[:] = dataset.weights[in_ts]
                    out_flags[:] = dataset.flags[in_ts]
                break
            except (StoreUnavailable, socket.timeout):
                msg = 'Timeout when reading dumps %d to %d. Try %d/%d....' % (
                    out_ts.start + 1, out_ts.stop, i + 1, NUM_RETRIES)
                OErr.PLog(err, OErr.Warn, msg)
                OErr.printErr(err)
                print(msg)
        # Flag the data and warn if we can't get it
        if i == NUM_RETRIES - 1:
            msg = 'Too many timeouts, flagging dumps %d to %d' % (
                out_ts.start + 1, out_ts.stop)
            OErr.PLog(err, OErr.Warn, msg)
            OErr.printErr(err)
            print(msg)
            flags[out_ts] = True

示例#2

0

显示文件

文件： test_lazy_indexer.py 项目： adriaanph/katdal

 def test_transforms(self):
     # Add transform at initialisation
     indexer = DaskLazyIndexer(self.data_dask, transforms=[lambda x: 0 * x])
     np.testing.assert_array_equal(indexer[:], np.zeros_like(indexer))
     # Add transform before first use of object
     indexer = DaskLazyIndexer(self.data_dask)
     indexer.add_transform(lambda x: 0 * x)
     np.testing.assert_array_equal(indexer[:], np.zeros_like(indexer))
     # Add transform after first use of object
     indexer = DaskLazyIndexer(self.data_dask)
     indexer.dataset
     indexer.add_transform(lambda x: 0 * x)
     np.testing.assert_array_equal(indexer[:], np.zeros_like(indexer))

示例#3

0

显示文件

文件： mvftoms.py 项目： adriaanph/katdal

def load(dataset, indices, vis, weights, flags):
    """Load data from lazy indexers into existing storage.

    This is optimised for the MVF v4 case where we can use dask directly
    to eliminate one copy, and also load vis, flags and weights in parallel.
    In older formats it causes an extra copy.

    Parameters
    ----------
    dataset : :class:`katdal.DataSet`
        Input dataset, possibly with an existing selection
    indices : tuple
        Index expression for subsetting the dataset
    vis, weights, flags : array-like
        Outputs, which must have the correct shape and type
    """
    if isinstance(dataset.vis, DaskLazyIndexer):
        DaskLazyIndexer.get([dataset.vis, dataset.weights, dataset.flags],
                            indices,
                            out=[vis, weights, flags])
    else:
        vis[:] = dataset.vis[indices]
        weights[:] = dataset.weights[indices]
        flags[:] = dataset.flags[indices]

示例#4

0

显示文件

文件： test_lazy_indexer.py 项目： adriaanph/katdal

 def test_str_repr(self):
     def transform1(x):
         return x
     transform2 = lambda x: x  # noqa: E731
     class Transform3:         # noqa: E306
         def __call__(self, x):
             return x
     transform3 = Transform3()
     transform4 = partial(transform1)
     transforms = [transform1, transform2, transform3, transform4]
     indexer = DaskLazyIndexer(self.data_dask, transforms=transforms)
     expected = 'x | transform1 | <lambda> | Transform3 | transform1'
     expected += f' -> {indexer.shape} {indexer.dtype}'
     assert_equal(str(indexer), expected)
     # Simply exercise repr - no need to check result
     repr(indexer)

示例#5

0

显示文件

文件： test_lazy_indexer.py 项目： adriaanph/katdal

 def _test_with(self, stage1=(), stage2=()):
     npy1 = numpy_oindex(self.data, stage1)
     npy2 = numpy_oindex(npy1, stage2)
     indexer = DaskLazyIndexer(self.data_dask, stage1)
     np.testing.assert_array_equal(indexer[stage2], npy2)

示例#6

0

显示文件

文件： mvf_read_benchmark.py 项目： LChristelis/katdal

kwargs = {}
if args.applycal is not None:
    kwargs['applycal'] = args.applycal
f = katdal.open(args.filename, **kwargs)
logging.info('File loaded, shape %s', f.shape)
if args.channels:
    f.select(channels=np.s_[:args.channels])
if args.dumps:
    f.select(dumps=np.s_[:args.dumps])
# Trigger creation of the dask graphs, population of sensor cache for applycal etc
_ = (f.vis[0, 0, 0], f.weights[0, 0, 0], f.flags[0, 0, 0])
logging.info('Selection complete')
start = time.time()
last_time = start
for st in range(0, f.shape[0], args.time):
    et = st + args.time
    if args.joint:
        vis, weights, flags = DaskLazyIndexer.get([f.vis, f.weights, f.flags], np.s_[st:et])
    else:
        vis = f.vis[st:et]
        weights = f.weights[st:et]
        flags = f.flags[st:et]
    current_time = time.time()
    elapsed = current_time - last_time
    last_time = current_time
    size = np.product(vis.shape) * 10
    logging.info('Loaded %d dumps (%.3f MB/s)', vis.shape[0], size / elapsed / 1e6)
size = np.product(f.shape) * 10
elapsed = time.time() - start
logging.info('Loaded %d bytes in %.3f s (%.3f MB/s)', size, elapsed, size / elapsed / 1e6)

示例#7

0

显示文件

 def test_stage1_multiple_boolean_indices(self):
     stage1 = tuple([True] * d for d in self.data.shape)
     indexer = DaskLazyIndexer(self.data_dask, stage1)
     np.testing.assert_array_equal(indexer[:], self.data)

示例#8

0

显示文件

 def test_stage1_slices(self):
     stage1 = np.s_[5:, :, 1::2]
     indexer = DaskLazyIndexer(self.data_dask, stage1)
     np.testing.assert_array_equal(indexer[:], self.data[stage1])