Пример #1
0
def main():
    parser = get_parser()
    args = parser.parse_args(namespace=arguments.SmartNamespace())
    if (args.stop_channel is None
            or args.stop_channel - args.start_channel > 1):
        if '%' not in args.output_file:
            parser.error(
                'More than one channel selected but no %d in output filename')
    configure_logging(args)
    if args.write_profile or args.write_device_profile:
        profiling.Profiler.set_profiler(profiling.FlamegraphProfiler())

    queue = None
    context = None
    if not args.host:
        context = accel.create_some_context(device_filter=lambda x: x.is_cuda)
        queue = context.create_command_queue()
    else:
        context = dummy_context()
        queue = DummyCommandQueue()

    with closing(
            loader.load(args.input_file, args.input_option, args.start_channel,
                        args.stop_channel)) as dataset:
        frontend.run(args, context, queue, dataset, Writer(args, dataset))

    profiler = profiling.Profiler.get_profiler()
    if args.write_profile:
        with open(args.write_profile, 'w') as f:
            assert isinstance(profiler, profiling.FlamegraphProfiler)
            profiler.write_flamegraph(f)
    if args.write_device_profile:
        with open(args.write_device_profile, 'w') as f:
            assert isinstance(profiler, profiling.FlamegraphProfiler)
            profiler.write_device_flamegraph(f)
Пример #2
0
def main():
    ctx = create_some_context()
    queue = ctx.create_command_queue()
    op = SumTemplate(ctx).instantiate(queue, 1024)
    op.ensure_all_bound()
    src = np.random.randint(1, 100,
                            size=op.buffer('src').shape).astype(np.int32)
    op.buffer('src').set(queue, src)
    op()
    dest = op.buffer('dest').get(queue)
    wgs = op.template.wgs
    expected = src.reshape(-1, wgs).sum(axis=1)
    np.testing.assert_equal(dest, expected)
    print(dest)
Пример #3
0
def benchmark_fft(args):
    context = accel.create_some_context()
    queue = context.create_tuning_command_queue()
    allocator = accel.SVMAllocator(context)
    shape = (args.pixels, args.pixels)
    template = fft.FftTemplate(queue, 2, shape, np.complex64, np.complex64, shape, shape)
    fn = template.instantiate(args.mode, allocator=allocator)
    fn.ensure_all_bound()
    # Zero-fill, just to ensure no NaNs etc
    fn.buffer('src').fill(0)
    fn.buffer('dest').fill(0)
    fn()  # Warm-up and forces data transfer
    queue.start_tuning()
    fn()
    elapsed = queue.stop_tuning()
    print('{pixels}x{pixels} in {elapsed:.6f} seconds'.format(pixels=args.pixels, elapsed=elapsed))
    # 8 bytes for complex64, 4 accesses (from source, to/from scratch, to dest)
    mem_rate = args.pixels * args.pixels * 8 * 4 / elapsed
    print('{:.3f} GiB/s'.format(mem_rate / 1024**3))
Пример #4
0
def benchmark_grid_degrid(args):
    n_time = 3600
    add_parameters(args)
    N = n_time * len(args.antennas) * (len(args.antennas) - 1) // 2
    reader = make_compressed_vis(args, n_time)

    context = accel.create_some_context()
    queue = context.create_tuning_command_queue()
    gridder_template = args.template_class(context, args.image_parameters,
                                           args.grid_parameters, tuning=args.tuning)
    gridder = gridder_template.instantiate(queue, args.array_parameters, N)
    gridder.ensure_all_bound()
    elapsed = 0.0
    N_compressed = 0
    uv = gridder.buffer('uv').empty_like()
    w_plane = gridder.buffer('w_plane').empty_like()
    vis = gridder.buffer('vis').empty_like()
    for w_slice in range(reader.num_w_slices(0)):
        gridder.num_vis = reader.len(0, w_slice)
        N_compressed += gridder.num_vis
        if gridder.num_vis > 0:
            gridder.buffer('grid').zero(queue)
            start = 0
            for chunk in reader.iter_slice(0, w_slice):
                rng = slice(start, start + len(chunk))
                uv[rng, 0:2] = chunk['uv']
                uv[rng, 2:4] = chunk['sub_uv']
                w_plane[rng] = chunk['w_plane']
                vis[rng] = chunk['vis']
                start += len(chunk)
            gridder.buffer('uv').set_async(queue, uv)
            gridder.buffer('w_plane').set_async(queue, w_plane)
            gridder.buffer('vis').set_async(queue, vis)
            queue.finish()
            queue.start_tuning()
            gridder()
            elapsed += queue.stop_tuning()
            queue.finish()
    gaps = N_compressed * args.grid_parameters.kernel_width**2 * args.polarizations / elapsed
    print('Processed {} ({}) visibilities in {:.6f}s with kernel size {} and {} polarizations'
          .format(N_compressed, N, elapsed, args.grid_parameters.kernel_width, args.polarizations))
    print('{:.3f} GGAPS uncompressed'.format(gaps * N / N_compressed / 1e9))
    print('{:.3f} GGAPS compressed'.format(gaps / 1e9))
Пример #5
0
def main():
    parser = get_parser()
    args = parser.parse_args(namespace=arguments.SmartNamespace())
    katsdpservices.setup_logging()
    if args.log_level is not None:
        logger.setLevel(args.log_level.upper())

    profiling.Profiler.set_profiler(profiling.FlamegraphProfiler())

    with closing(
            loader.load(args.input_file, args.input_option, args.start_channel,
                        args.stop_channel)) as dataset:
        writer = Writer(args, dataset)
        context = accel.create_some_context(interactive=False,
                                            device_filter=lambda x: x.is_cuda)
        queue = context.create_command_queue()
        frontend.run(args, context, queue, dataset, writer)
        # frontend.run modifies args.stop_channel in place, so even if it
        # wasn't specified by the user it will now be valid.
        writer.finalize(dataset, args.start_channel, args.stop_channel)
Пример #6
0
#!/usr/bin/env python
# for nosetest: nosetests katsdpsigproc.test.test_maskedsum

import time
import numpy as np
from katsdpsigproc import accel, maskedsum

context = accel.create_some_context(True)
queue = context.create_command_queue(profile=True)

data = np.random.randn(4000, 5000,
                       2).astype(np.float32).view(dtype=np.complex64)[..., 0]
mask = np.ones((4000, )).astype(np.float32)

template = maskedsum.MaskedSumTemplate(context)
msum = template.instantiate(queue, data.shape)
msum.ensure_all_bound()
msum.buffer('src').set(queue, data)
msum.buffer('mask').set(queue, mask)
start_event = queue.enqueue_marker()
msum()
end_event = queue.enqueue_marker()
out = msum.buffer('dest').get(queue)

t0 = time.time()
expected = np.sum(data * mask.reshape(data.shape[0], 1),
                  axis=0).astype(np.complex64)
t1 = time.time()
print('gpu:', end_event.time_since(start_event), 'cpu:', t1 - t0)
np.testing.assert_equal(out.reshape(-1), expected)
Пример #7
0
def benchmark1d(args, data):
    if args.width % 2 != 1:
        raise argparse.ArgumentError('Width must be odd')
    if data.shape[0] <= args.width:
        raise argparse.ArgumentError(
            'Channels cannot be less than the filter width')

    context = None
    if not args.host:
        try:
            context = accel.create_some_context(True)
        except RuntimeError:
            print("No devices available. Executing on the CPU.",
                  file=sys.stderr)

    if context is None:
        background = katsdpsigproc.rfi.host.BackgroundMedianFilterHost(
            args.width)
        noise_est = katsdpsigproc.rfi.host.NoiseEstMADHost()
        threshold = katsdpsigproc.rfi.host.ThresholdSumHost(args.sigmas)
        flagger = katsdpsigproc.rfi.host.FlaggerHost(background, noise_est,
                                                     threshold)
        start = time.time()
        flags = flagger(data)
        end = time.time()
        print("CPU time (ms):", (end - start) * 1000.0)
    else:
        command_queue = context.create_command_queue(profile=True)
        background = katsdpsigproc.rfi.device.BackgroundMedianFilterDeviceTemplate(
            context, args.width)
        noise_est = katsdpsigproc.rfi.device.NoiseEstMADTDeviceTemplate(
            context, 10240)
        threshold = katsdpsigproc.rfi.device.ThresholdSumDeviceTemplate(
            context)
        template = katsdpsigproc.rfi.device.FlaggerDeviceTemplate(
            background, noise_est, threshold)
        flagger = template.instantiate(command_queue,
                                       data.shape[0],
                                       data.shape[1],
                                       threshold_args={'n_sigma': args.sigmas})
        flagger.ensure_all_bound()

        data_device = flagger.buffer('vis')
        flags_device = flagger.buffer('flags')

        data_device.set(command_queue, data)
        # Run once for warmup (allocates memory)
        flagger()
        # Run again, timing it
        command_queue.finish()

        start_time = time.time()
        start_event = command_queue.enqueue_marker()
        flagger()
        end_event = command_queue.enqueue_marker()
        command_queue.finish()
        end_time = time.time()
        flags = flags_device.get(command_queue)
        print("Host time (ms):  ", (end_time - start_time) * 1000.0)
        try:
            device_time = end_event.time_since(start_event) * 1000.0
        except Exception:
            # AMD CPU device doesn't seem to support profiling on marker events
            device_time = 'unknown'
        print("Device time (ms):", device_time)
    return flags
Пример #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--vis', type=int, default=10**6)
    parser.add_argument('--sources', type=int, default=10**4)
    args = parser.parse_args()

    image_parameters = parameters.ImageParameters(
        q_fov=1.0,
        image_oversample=None,
        frequency=0.2 * units.m,
        array=None,
        polarizations=polarization.STOKES_IQUV,
        dtype=np.float64,
        pixel_size=0.00001,
        pixels=4096)
    oversample = 8
    w_planes = 100
    grid_parameters = parameters.GridParameters(
        antialias_width=7.0,
        oversample=oversample,
        image_oversample=4,
        w_slices=10,
        w_planes=w_planes,
        max_w=5 * units.m,
        kernel_width=7)
    base_sources = [
        "dummy0, radec, 19:39:25.03, -63:42:45.7, (200.0 12000.0 -11.11 7.777 -1.231 0 0 0 1 0.1 0 0)",       # noqa: E501
        "dummy1, radec, 19:39:20.38, -63:42:09.1, (800.0 8400.0 -3.708 3.807 -0.7202 0 0 0 1 0.2 0.2 0.2)",   # noqa: E501
        "dummy2, radec, 19:39:08.29, -63:42:33.0, (800.0 43200.0 0.956 0.584 -0.1644 0 0 0 1 0.1 0 1)"        # noqa: E501
    ]
    sources = []
    for i in range(args.sources):
        sources.append(str(uuid.uuid4()) + base_sources[i % len(base_sources)])
    model = sky_model.KatpointSkyModel(katpoint.Catalogue(sources))
    phase_centre = katpoint.construct_radec_target(
        '19:39:30', '-63:42:30').astrometric_radec() * units.rad

    rs = RandomState(seed=1)
    uv = rs.random_integers(-2048, 2048, size=(args.vis, 2)).astype(np.int16)
    sub_uv = rs.random_integers(0, grid_parameters.oversample - 1,
                                size=(args.vis, 2)).astype(np.int16)
    w_plane = rs.random_integers(0, grid_parameters.w_planes - 1, size=args.vis).astype(np.int16)
    weights = rs.uniform(size=(args.vis, len(image_parameters.polarizations))).astype(np.float32)
    vis = rs.complex_normal(size=(args.vis, len(image_parameters.polarizations)))

    context = accel.create_some_context(device_filter=lambda x: x.is_cuda)
    queue = context.create_command_queue()
    allocator = accel.SVMAllocator(context)
    template = predict.PredictTemplate(context, np.float32, len(image_parameters.polarizations))
    fn = template.instantiate(queue, image_parameters, grid_parameters,
                              args.vis, len(model), allocator=allocator)
    fn.ensure_all_bound()
    fn.num_vis = args.vis
    fn.set_coordinates(uv, sub_uv, w_plane)
    fn.set_vis(vis)
    fn.set_weights(weights)
    fn.set_sky_model(model, phase_centre)
    fn.set_w(1.2)
    fn()
    fn()
    queue.finish()