Exemplo n.º 1
def entrypoint(argv):
    if len(argv) == 1:
        die('must supply a subcommand: "hpc", "integrate", "ljob", "preprays", "sde", "view", "vkl"')

    if argv[1] == 'hpc':
        from ..hpc import entrypoint
        entrypoint(['vernon ' + argv[1]] + argv[2:])
    elif argv[1] == 'integrate':
        from ..integrate import entrypoint
        entrypoint(['vernon ' + argv[1]] + argv[2:])
    elif argv[1] == 'ljob':
        farm_out_to_ljob(['vernon ' + argv[1]] + argv[2:])
    elif argv[1] == 'preprays':
        from ..preprays import entrypoint
        entrypoint(['vernon ' + argv[1]] + argv[2:])
    elif argv[1] == 'sde':
        from ..sde import entrypoint
        entrypoint(['vernon ' + argv[1]] + argv[2:])
    elif argv[1] == 'view':
        from .view import entrypoint
        entrypoint(['vernon ' + argv[1]] + argv[2:])
    elif argv[1] == 'vkl':
        from ..vkl import entrypoint
        entrypoint(['vernon ' + argv[1]] + argv[2:])
        die('unrecognized subcommand %r', argv[1])
Exemplo n.º 2
def dump_task_log(jobdir, taskid):
    """Helper function for extracting the log for a given attempt from a queue-job
    work directory. Used by the 'ljob tasklog' command, so this is written
    in a completely non-library style. Will dump output from all relevant

    taskid = six.text_type(taskid)
    attempts = []
    workerid = None

    with io.open(os.path.join(jobdir, 'attempts.log'), 'rt') as f:
        for line in f:
            pieces = line.split()

            if pieces[1] != 'issued':
            if pieces[5] == taskid:
                    (pieces[3], pieces[4]))  # worker ID, attempt ID

    if not len(attempts):
        die('cannot find any attempts for task %s', taskid)

    _dump_worker_attempt_log(jobdir, *attempts[0])

    for attinfo in attempts[1:]:
        _dump_worker_attempt_log(jobdir, *attinfo)
Exemplo n.º 3
    def connect(self):
        self.attlog = LogFile('bulk/%s/attempts.log' % self.jobid)
        self.stdiolog = io.open('bulk/%s/stdio.log' % self.jobid,

        dest = port = cookie = None

        for i in range(6):
                with io.open('jobinfo/sockinfo.txt', 'rt') as f:
                    dest = f.readline().strip()
                    port = int(f.readline())
                    cookie = f.readline().strip()
            except Exception as e:
                warn('failed to read master socket info: %s', e)

        if cookie is None:
            die('took too long to find master; giving up')

        log('connecting to %s/%d', dest, port)
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.connect((dest, port))
        self.sock = sock
        self.msgreader = JsonMessageReader(sock)
            'hello', {
                'jobid': six.text_type(self.jobid),
                'cookie': six.text_type(cookie),
                'job_array_id': six.text_type(self.job_array_id),
Exemplo n.º 4
def compute_cli(args):
    settings = make_compute_parser().parse_args(args=args)
    config = PrepraysConfiguration.from_toml(settings.config_path)

    imaker = config.get_prep_rays_imaker(settings.cml)

    ray_parameters = common_ray_parameters + imaker.setup.distrib._parameter_names
    n_vals = len(ray_parameters)
    data = np.zeros((n_vals, settings.n_cols_to_compute, config.max_n_samps))
    n_samps = np.zeros((settings.n_cols_to_compute,), dtype=np.int)

    for i in range(settings.n_cols_to_compute):
        ray = imaker.get_ray(settings.start_col + i, settings.row_num)

        if ray.s.size > config.max_n_samps:
            die('too many samples required for ray at ix=%d iy=%d: max=%d, got=%d',
                settings.start_col + i, settings.row_num, config.max_n_samps, ray.s.size)

        n_samps[i] = ray.s.size
        sl = slice(0, ray.s.size)

        for j, pname in enumerate(ray_parameters):
            data[j,i,sl] = getattr(ray, pname)

    obs_max_n_samps = n_samps.max()
    data = data[:,:,:obs_max_n_samps]

    fn = 'archive/frame%04d_%04d_%04d.npy' % \
         (settings.frame_num, settings.row_num, settings.start_col)

    with io.open(fn, 'wb') as f:
        np.save(f, n_samps)
        np.save(f, data)
Exemplo n.º 5
Arquivo: hpc.py Projeto: pkgw/vernon
def prep_and_image_ui(pre_args, settings, config):
    if not Path('Config.toml').exists():
        die('expected "Config.toml" in current directory')


    with open('preprays/tasks', 'wb') as tasks:
            ['vernon', 'preprays', 'seed',
             '-c', 'Config.toml',
             '-g', str(config.preprays_n_col_groups)
            stdout = tasks,

    masterid = config.launch_ljob('preprays', 'preprays')
    nextid = config.schedule_next_stage(
        pre_args + ['--stage=pr_assemble', '--previd=%s' % masterid],

    print('Preprays ljob master ID:', masterid)
    print('Next-stage job ID:', nextid)

    with open('pandi_launch.log', 'wt') as log:
        print('Preprays ljob master ID:', masterid, file=log)
        print('Next-stage job ID:', nextid, file=log)
Exemplo n.º 6
Arquivo: hpc.py Projeto: pkgw/vernon
def entrypoint(argv):
    if len(argv) == 1:
        die('must supply a subcommand: "prep-and-image"')

    if argv[1] == 'prep-and-image':
        prep_and_image_cli(argv) # note, full argv!
        die('unrecognized subcommand %r', argv[1])
Exemplo n.º 7
    def validate(self):
        p = Path(self.nn_path)
        if p != p.absolute():
            die('neural-net path must be absolute; got "%s"' % p)

        nn_cfg = p / 'nn_config.toml'
        if not nn_cfg.exists():
            die('bad setting for neural-net path: no such file %s', nn_cfg)
Exemplo n.º 8
def entrypoint(argv):
    if len(argv) == 1:
        die('must supply a subcommand: "gen-grid", "test-coeffs"')

    if argv[1] == 'gen-grid':
    elif argv[1] == 'test-coeffs':
        die('unrecognized subcommand %r', argv[1])
Exemplo n.º 9
def farm_out_to_ljob(argv):
    "ljob is a shell script"
    import os
    from pwkit.io import Path

    if 'TOP' not in os.environ:
        die('ljob command must be run with the environment variable $TOP set to a directory')

    ljob_support_dir = (Path(__file__).parent.parent / 'ljob_support').resolve()
    os.environ['LJOB_SUPPORT'] = str(ljob_support_dir)
    ljob_script = ljob_support_dir / 'ljob.sh'
    os.execv(str(ljob_script), argv)
Exemplo n.º 10
def commandline(argv=None):
    if argv is None:
        argv = sys.argv

    if len(argv) < 2:
        die('usage: {ljob} launch')

    command = argv[1]

    if command == 'launch':
        die('unrecognized subcommand "%s"', command)
Exemplo n.º 11
def entrypoint(args):
    if not len(args):
        cli.die('must provide a subcommand: "cube", "hdf5"')

    subcommand = args[0]
    remaining_args = args[1:]

    if subcommand == 'cube':
    elif subcommand == 'hdf5':
        cli.die('unrecognized subcommand %r' % (subcommand, ))
Exemplo n.º 12
def nninit_cli(settings):
    nndir = Path(settings.nndir)

    except OSError as e:
        if e.errno == 17:
            die('directory \"%s\" already exists' % settings.nndir)

    cfg_path = nndir / 'nn_config.toml'
    with cfg_path.open('wt') as f:
        pytoml.dump(f, NNINIT_DEFAULT_CONFIG)
Exemplo n.º 13
def view_cli(args):
    if len(args) == 0:
        die('must supply a sub-subcommand: "lc", "rot", "specmovie", "specseq"'

    if args[0] == 'lc':
    elif args[0] == 'rot':
    elif args[0] == 'specmovie':
    elif args[0] == 'specseq':
        die('unrecognized sub-subcommand %r', args[0])
Exemplo n.º 14
def entrypoint(argv):
    if len(argv) == 1:
        die('must supply a subcommand: "assemble", "gen-grid-config", "seed", "test-approx"')

    if argv[1] == 'assemble':
    elif argv[1] == 'gen-grid-config':
        GriddedPrepraysConfiguration.generate_config_cli('preprays gen-grid-config', argv[2:])
    elif argv[1] == 'seed':
    elif argv[1] == '_compute':
    elif argv[1] == 'test-approx':
        die('unrecognized subcommand %r', argv[1])
Exemplo n.º 15
def entrypoint(argv):
    if len(argv) == 1:
        die('must supply a subcommand: "assemble", "framegrab", "movie", "seed", "view"'

    if argv[1] == 'seed':
    elif argv[1] == '_integrate':
    elif argv[1] == 'assemble':
    elif argv[1] == 'view':
    elif argv[1] == 'movie':
    elif argv[1] == 'framegrab':
        die('unrecognized subcommand %r', argv[1])
Exemplo n.º 16
def entrypoint(argv):
    if len(argv) == 1:
        die('must supply a subcommand: "average", "cube-to-particles", "forward", "gen-gg-config", "gen-grid"'

    if argv[1] == 'average':
    elif argv[1] == 'cube-to-particles':
    elif argv[1] == 'forward':
    elif argv[1] == 'gen-gg-config':
        from .grid import GenGridTask
        GenGridTask.generate_config_cli('sde gen-gg-config', argv[2:])
    elif argv[1] == 'gen-grid':
        from .grid import gen_grid_cli
        die('unrecognized subcommand %r', argv[1])
Exemplo n.º 17
def do_launch(argv):
    """We're a master or worker job, which we can distinguish from the
    environment variable LJOB_IS_MASTER. (We could avoid needing that
    environment variable, but it's easy and fine that way.) We take a unique
    identifier from the LJOB_PROC_ID variable (which allows us not to have to
    worry about what particular mechanism was used to launch us).

    ismaster = os.environ.get('LJOB_IS_MASTER')
    if ismaster is None:
        die('framework failure: no environment variable LJOB_IS_MASTER')

    jobid = os.environ.get('LJOB_PROC_ID')
    if jobid is None:
        die('framework failure: no environment variable LJOB_PROC_ID')

    if ismaster == 'y':
Exemplo n.º 18
Arquivo: hpc.py Projeto: pkgw/vernon
def prep_and_image_cli(argv):
    # When we're sub-executed by the "vernon" CLI driver, our argv[0] becomes
    # "vernon hpc". We need to re-split that so that our Slurm scripts invoke
    # the right thing.

    pre_args = argv[:2]
    if ' ' in argv[0]:
        pre_args = pre_args[0].split() + pre_args[1:]

    settings = make_prep_and_image_parser().parse_args(args=argv[2:])
    config = HPCConfiguration.from_toml('Config.toml')

    if settings.stage is None:
        prep_and_image_ui(pre_args, settings, config)
    elif settings.stage == 'pr_assemble':
        prep_and_image_pr_assemble(pre_args, settings, config)
    elif settings.stage == 'integ_assemble':
        prep_and_image_integ_assemble(pre_args, settings, config)
        die('unknown prep-and-image stage %r', settings.stage)
Exemplo n.º 19
def ms_transpose (vpath, tpath, transpose_args):
    squash_time_gaps = transpose_args.pop ('squash_time_gaps', 'n')
    if squash_time_gaps == 'y':
        squash_time_gaps = True
    elif squash_time_gaps == 'n':
        squash_time_gaps = False
        die ('"squash_time_gaps" keyword must be either "y" or "n"')

        return _ms_transpose (vpath, tpath, transpose_args,
        # The unlink can overwrite the exception info that
        # an argumentless "raise" would reraise.
        t, v, tb = sys.exc_info ()
            os.unlink (tpath)
Exemplo n.º 20
def grep_attempt_log(jobdir, regex_str, mode='grep'):
    """Helper function for grepping through attempt logs. Used by the 'ljob
    att(un)grep' commands, so this is written in a completely non-library

    import re

        regex = re.compile(regex_str)
    except Exception as e:
        die('cannot compile regular expression %r: %s', regex_str, e)

    if mode == 'grep':
        func = _grep_worker_attempt_log
    elif mode == 'ungrep':
        func = _ungrep_worker_attempt_log
        die('internal bug, unknown mode %r', mode)

    for workerid in os.listdir(os.path.join(jobdir, 'bulkdata')):
        func(jobdir, workerid, regex)
Exemplo n.º 21
def _dump_worker_attempt_log(jobdir, workerid, attid):
    startofs = endofs = elapsed = failcode = None

    with io.open(os.path.join(jobdir, 'bulkdata', workerid, 'attempts.log'),
                 'rt') as f:
        for line in f:
            pieces = line.split()

            if pieces[3] != attid:

            if pieces[1] in ('starting', 'complete'):
                if pieces[1][0] == 's':
                    startofs = int(pieces[5])
                    endofs = int(pieces[5])
                    elapsed = int(pieces[6])
                    failcode = int(pieces[7])

                if startofs is not None and endofs is not None:

    if startofs is None or endofs is None:
        die('cannot find offset information in logs of worker %s', workerid)

    ntoread = endofs - startofs

    print('==> attempt %s worker=%s elapsed=%d failcode=%d <==' %
          (attid, workerid, elapsed, failcode))

    with io.open(os.path.join(jobdir, 'bulkdata', workerid, 'stdio.log'),
                 'rb') as f:
        f.seek(startofs, 0)

        while ntoread > 0:
            d = f.read(min(ntoread, 4096))
            ntoread -= len(d)
Exemplo n.º 22
def load_data_and_train(datadir, nndir, result_name):
    from .impl import NSModel

    cfg_path = Path(nndir) / 'nn_config.toml'
    dr, rinfo = DomainRange.from_serialized(cfg_path,

    if rinfo is None:
        die('no known result named %r', result_name)

    sd = dr.load_and_normalize(datadir)

    trainer_name = rinfo['trainer']
    trainer_func = globals().get(trainer_name + '_trainer')
    if trainer_func is None:
        die('unknown trainer function %r', trainer_name)

    print('Training with scheme \"%s\"' % trainer_name)
    m = NSModel()
    m.ns_setup(rinfo['_index'], sd)
    t0 = time.time()
    m.training_wall_clock = time.time() - t0
    return m
Exemplo n.º 23
def entrypoint(argv):
    ap = argparse.ArgumentParser(prog='neurosynchro')
    subparsers = ap.add_subparsers(dest='subcommand',
                                   help='The sub-command to invoke')

            'Initialize a directory to save the neural network training data'))

            help='Find the domain and range of the training set'))

            help='Print summary statistics about the training set'))

    from .grtrans import make_parser as make_grtrans_parser
                              help='Do a test integration with grtrans'))

            'Transform the training set into Neurosynchro\'s internal parametrization',
            'Transform the training set into Neurosynchro\'s internal parametrization.',
            '''The training set can have arbitrary input parameters, but should have eight
output parameters named `j_I`, `j_Q`, `j_V`, `alpha_I`, `alpha_Q`, `alpha_V`,
`rho_Q`, `rho_V` -- these are the standard Stokes-basis radiative transfer
coefficients. The transformed training set will be printed to standard output,
so you almost surely want to redirect the output of this program to a file.''')

    from .train import make_parser as make_train_parser
                              help='Train one of the neural networks'))

    settings = ap.parse_args(argv[1:])

    if settings.subcommand is None:
        die('you must supply a subcommand; run with "--help" for help')

    if settings.subcommand == 'init-nndir':
    elif settings.subcommand == 'lock-domain-range':
    elif settings.subcommand == 'summarize':
    elif settings.subcommand == 'test-grtrans':
        from .grtrans import grtrans_cli
    elif settings.subcommand == 'train':
        from .train import train_cli
    elif settings.subcommand == 'transform':
        # argparse will error out if it the user gives an unrecognized
        # subcommand, so if we get here it's an internal bug
        assert False, 'internal bug: forgot to handle subcommand!'
Exemplo n.º 24
def doit(driver, args):
    # Load up the driver code

        text = open(driver).read()
    except Exception as e:
        cli.die('cannot read driver file "%s": %s', driver, e)

        code = compile(text, driver, 'exec')
    except Exception as e:
        if 'OMEGAFIG_BACKTRACE' in os.environ:
        cli.die('cannot compile driver file "%s": %s', driver, e)

    ns = {'__file__': driver, '__name__': '__omegafig__'}

        exec(code, ns)
    except Exception as e:
        if 'OMEGAFIG_BACKTRACE' in os.environ:
        cli.die('cannot execute driver file "%s": %s', driver, e)

    pfunc = ns.get('plot')
    if pfunc is None:
        cli.die('driver file "%s" does not provide a function called "plot"',
    if not callable(pfunc):
            'driver file "%s" provides something called "plot", but it\'s '
            'not a function', driver)

    # Deal with args

        code = pfunc.__code__
    except AttributeError:
        code = pfunc.func_code

    nargs = code.co_argcount
    argnames = code.co_varnames

    keywords = []
    nonkeywords = []

    for arg in args:
        if '=' in arg:

    if len(nonkeywords) != nargs:
        cli.die('expected %d non-keyword arguments to driver, but got %d',
                nargs, len(nonkeywords))

    config = Config()
    defaults = ns.get('figdefaults')

    if defaults is not None:
        for key in defaults:
            setattr(config, key, defaults[key])


    # Set up omegaplot globals as appropriate

    if config.pango:
            import omega.pango_g3 as ompango
        except ImportError:
            import omega.pango_g2 as ompango

        fontparams = {}
        if config.pangofamily is not None:
            fontparams['family'] = config.pangofamily
        if config.pangosize is not None:
            fontparams['size'] = config.pangosize
        if len(fontparams):

    # Execute.

    p = pfunc(*nonkeywords)

    if config.out is None:
Exemplo n.º 25
def _mir_transpose (vpath, tpath, unused_transpose_args):
    from miriad import VisData
    from mirtask.util import mir2pbp32
    from . import visobjs
    vis = VisData (vpath)

    # Pass 1: build up list of basepols, times

    first = True
    nrecs = 0
    delays = None
    window = None
    fc = visobjs.FreqConfig ()
    times = set ()
    pbps = set ()
    visgen = vis.readLowlevel ('3', False)

    print ('pass 1 ...')

    for inp, pream, data, flags in visgen:
        t = pream[3]
        pbp = mir2pbp32 (inp, pream)
        nrecs += 1

        if first:
            ftrack = fc.makeTracker (inp)
            first = False

        if ftrack.updated ():
            fc.fill (inp)

            if fc.numSpectralWindows () != 1:
                die ('cannot transpose: need exactly one spectral window')

            idents = list (fc.fundamentalWinIdents ())
            newwindow = fc.windowFromIdent (idents[0])

            if window is not None and newwindow != window:
                die ('cannot transpose: frequency config changes inside dataset')

            window = newwindow

        if delays is None:
            nants = inp.getVarInt ('nants')
            dinfo = inp.probeVar ('delay0')

            if dinfo is None:
                delays = False
            elif dinfo[1] == 2 * nants:
                # An ATA extension: one fixed delay per antpol. Reshape
                # to be a bit more sensible
                delays = inp.getVarFloat ('delay0', 2 * nants)
                delays = delays.reshape ((2, nants)).T
                delays = inp.getVarFloat ('delay0', nants)
                delays = np.vstack ((delays, delays)).T

        times.add (t)
        pbps.add (pbp)

    # Get the timestamps onto a nice even grid, checking that our
    # gridding is decent.

    datatimes = np.asarray (sorted (times), dtype=np.double)
    nt = datatimes.size
    time0 = datatimes[0]
    cadence = np.median (datatimes[1:] - datatimes[:-1])
    tidxs = (datatimes - time0) / cadence
    timemap = np.empty (nt, dtype=np.int)
    nslot = int (round (tidxs[-1])) + 1
    scale = nslot * 1. / nt
    noff = 0

    for i in range (nt):
        timemap[i] = int (round (tidxs[i]))
        if (tidxs[i] - timemap[i]) > 0.01:
            noff += 1

    if noff > 0:
        warn ('had %d timestamps (out of %d) with poor '
              'mapping onto the grid', noff, nt)

    if scale > 1.05:
        warn ('data size increasing by factor of %.2f '
              'to get everything onto the time grid', scale)

    times = np.arange (nslot) * cadence + time0
    nt = nslot

    # Compute a few other things

    pbps = np.asarray (sorted (pbps), dtype=np.int32)
    nbp = pbps.size

    # Without the int(), nchan is a numpy.int32, the type of which
    # propagates along to various byte counts and offsets which end up
    # overflowing for sufficiently large datasets and causing
    # exceptions on negative values getting passed to the various
    # system calls used below.
    nchan = int (window.nchan)
    sdf = window.width / nchan
    sfreq = window.cfreq - 0.5 * (window.width - sdf)

    corr_bytes = 8 * nchan
    uvww_bytes = 4 * 8
    flag_bytes = nchan
    slice_bytes = (corr_bytes + flag_bytes + uvww_bytes) * nt
    dump_bytes = (corr_bytes + flag_bytes + uvww_bytes) * nbp

    nsimult = CACHE_SIZE // dump_bytes

    # Write out header info
    # Write-then-seek seems to break if buffering is used???

    data_offset = ((header.size + 7) // 8) * 8
    data_size = slice_bytes * nbp

    vars_offset = ((data_offset + data_size + 7) // 8) * 8

    f = open (tpath, 'w+', 0)
    f.truncate (vars_offset) # hint how big the file will be
    f.write (header.pack (BYTE_ORDER_MARKER,
                          nbp, nt, nchan,
                          sfreq, sdf,
                          time0, cadence,

    # Pass 2: write data. Below we cast the tidx variables to ints for
    # the same reason as with nchan above.

    def corr_offset (bpidx, tidx):
        return data_offset + bpidx * slice_bytes + corr_bytes * int (tidx)
    def flag_offset (bpidx, tidx):
        return (data_offset + bpidx * slice_bytes + corr_bytes * nt +
                flag_bytes * int (tidx))
    def uvww_offset (bpidx, tidx):
        return (data_offset + bpidx * slice_bytes + (corr_bytes + flag_bytes) * nt +
                uvww_bytes * int (tidx))

    lsts = np.empty (nt, dtype=np.double)

    corrs = np.empty ((nsimult, nbp, nchan), dtype=np.complex64)
    flags = np.empty ((nsimult, nbp, nchan), dtype=np.int8)
    uvwws = np.empty ((nsimult, nbp, 4), dtype=np.double)
    seen = np.empty ((nsimult, nbp), dtype=np.bool)
    lstbuf = np.empty (nsimult, dtype=np.double)

    empty_corr = np.zeros (nchan, dtype=np.complex64)
    empty_flags = np.zeros (nchan, dtype=np.int8)
    empty_uvww = np.zeros (4, dtype=np.double)

    # Progress reporting:
    unbufout = os.fdopen (os.dup (1), 'w', 0)
    currec = 0
    tstart = time.time ()
    tlastprint = 0

    def dump (curtimes):
        nbatch = len (curtimes)

        tidxs = np.empty (nbatch, dtype=np.int)
        for time, sidx in iteritems(curtimes):
            tidxs[sidx] = timemap[datatimes.searchsorted (time)]
            lsts[tidxs[sidx]] = lstbuf[sidx]

        info = np.empty ((nbatch, 3), dtype=np.int)
        info[:,0] = tidxs.argsort ()
        info[:,1] = tidxs[info[:,0]]
        info[0,2] = 1

        for i in range (1, nbatch):
            info[i,2] = (info[i,1] != info[i-1,1] + 1)

        for bpidx in range (nbp):
            for sidx, tidx, seek in info:
                if seek:
                    f.seek (corr_offset (bpidx, tidx))
                if seen[sidx,bpidx]:
                    f.write (corrs[sidx,bpidx])
                    f.write (empty_corr)

            for sidx, tidx, seek in info:
                if seek:
                    f.seek (flag_offset (bpidx, tidx))
                if seen[sidx,bpidx]:
                    f.write (flags[sidx,bpidx])
                    f.write (empty_flags)

            for sidx, tidx, seek in info:
                if seek:
                    f.seek (uvww_offset (bpidx, tidx))
                if seen[sidx,bpidx]:
                    f.write (uvwws[sidx,bpidx])
                    f.write (empty_uvww)

    newchunk = True
    curtimes = {}
    nrec = nvis = 0
    seenany = np.zeros (nbp, dtype=np.bool)
    meanuvw = np.zeros ((nbp, 3), dtype=np.double)
    muvwcounts = np.zeros (nbp, dtype=np.int)
    visgen = vis.readLowlevel ('3', False)

    print ('pass 2 ...')

    for inp, pream, data, recflags in visgen:
        uvw = pream[:3]
        t = pream[3]
        pbp = mir2pbp32 (inp, pream)
        var = inp.getVariance ()
        if var == 0:
            var = 1.
        weight = 1. / var

        if currec % 500 == 0 and currec:
            now = time.time ()

            if now - tlastprint > 1:
                pct = 100. * currec / nrecs
                elapsed = now - tstart
                total = 1. * elapsed * nrecs / currec
                eta = total - elapsed

                msg = '   %3.1f%% (%d/%d) elapsed %s ETA %s total %s' % \
                    (pct, currec, nrecs, _sfmt (elapsed), _sfmt (eta), _sfmt (total))
                unbufout.write(msg.ljust (60).encode('utf8') + b'\r')
                tlastprint = now

        currec += 1

        if t not in curtimes and len (curtimes) == nsimult:
            msg = '   %3.1f%% (%d/%d) writing ...' % (pct, currec, nrecs)
            unbufout.write(msg.ljust (60).encode('utf8') + b'\r\n')
            dump (curtimes)
            newchunk = True

        if newchunk:
            curtimes = {}
            newchunk = False

        sidx = curtimes.get (t)

        if sidx is None:
            sidx = len (curtimes)
            curtimes[t] = sidx
            seen[sidx].fill (False)

        bpidx = pbps.searchsorted (pbp)

        seen[sidx,bpidx] = True
        uvwws[sidx,bpidx,:3] = uvw
        uvwws[sidx,bpidx,3] = weight
        corrs[sidx,bpidx] = data
        flags[sidx,bpidx] = recflags.astype (np.int8)
        lstbuf[sidx] = inp.getVarDouble ('lst')
        muvwcounts[bpidx] += 1
        meanuvw[bpidx] += uvw

        if recflags.any ():
            seenany[bpidx] = 1

        nrec += 1
        nvis += data.size

    if len (curtimes):
        msg = '   100%% (%d/%d) writing ...' % (currec, nrecs)
        unbufout.write(msg.ljust (60).encode('utf8') + b'\r')
        dump (curtimes)

    tfinish = time.time ()
    elapsed = tfinish - tstart
    print ('   100%% (%d/%d) elapsed %s ETA 0s total %s   ' % \
           (currec, nrecs, _sfmt (elapsed), _sfmt (elapsed)))
    unbufout.close ()

    # Finally, write out variables

    f.seek (vars_offset)
    savevariable (f, 'vispath', np.fromstring (str (vis), dtype=np.byte))
    savevariable (f, 'basepols', pbps)
    if delays is not False:
        savevariable (f, 'delays', delays)
    flaggedbps = pbps[np.where (seenany == 0)]
    savevariable (f, 'flaggedbps', flaggedbps)
    savevariable (f, 'lsts', lsts)

    wbad = np.where (muvwcounts == 0)
    muvwcounts[wbad] = 1
    meanuvw[:,0] /= muvwcounts # apparently broadcasting doesn't
    meanuvw[:,1] /= muvwcounts # do what you'd want here. Not sure
    meanuvw[:,2] /= muvwcounts # why, but it's only two extra lines.
    meanuvw[wbad] = 0
    # Take the mean across the spectral window, as well as in time:
    meanuvw *= window.cfreq / sfreq
    savevariable (f, 'meanuvws', meanuvw)

    f.close ()
    return nrec, nvis, data_size
Exemplo n.º 26
 def omstyle(v):
         return getattr(om.styles, v)()
         cli.die('can\'t load/instantiate OmegaPlot style "%s"', v)
Exemplo n.º 27
def movie_cli(args):
    import cairo, subprocess, tempfile
    from pwkit.cli import die
    from pwkit.data_gui_helpers import Clipper, ColorMapper
    from pwkit.io import Path

    settings = make_movie_parser().parse_args(args=args)
    ii = IntegratedImages(settings.inpath)

    if settings.kind == 'rot':
        print('Rotation movie; non-movie freq choice is:',
        cube = np.array(
            ii.rotmovie(settings.index, settings.stokes, yflip=True))
    elif settings.kind == 'spec':
        print('Spectrum movie; non-movie CML choice is:',
        cube = np.array(
            ii.specmovie(settings.index, settings.stokes, yflip=True))
        die('unrecognized movie type %r', settings.kind)

    if settings.crop != 0:
        c = settings.crop
        cube = cube[:, c:-c, c:-c]

    n, h, w = cube.shape

    s = settings.scaling
    h *= s
    w *= s
    scaled = np.empty((h, w), dtype=cube.dtype)
    tiled = scaled.reshape((h // s, s, w // s, s))

    stride = cairo.ImageSurface.format_stride_for_width(cairo.FORMAT_ARGB32, w)
    assert stride % 4 == 0  # stride is in bytes
    assert stride == 4 * w

    clipper = Clipper()

    if settings.symmetrize:
        m = np.nanmax(np.abs(cube))
        clipper.dmin = -m
        clipper.dmax = m

    mapper = ColorMapper(settings.colormap)

    surface = cairo.ImageSurface.create_for_data(mapper.buffer,
                                                 cairo.FORMAT_ARGB32, w, h,

    tempdir = Path(tempfile.mkdtemp())
    argv = [

    for i, plane in enumerate(cube):
        tiled[...] = plane.reshape((plane.shape[0], 1, plane.shape[1], 1))
        png = str(tempdir / ('%d.png' % i))

    argv += [settings.outpath]
    subprocess.check_call(argv, shell=False)
Exemplo n.º 28
def view_hdf5_cli(args):
    """XXX: huge code redundancy with "view cube". Whatever."""
    import h5py

    ap = argparse.ArgumentParser(prog='vernon view hdf5', )
                    choices='default sqrt neg'.split(),
                    help='What kind of stretch to use on the data.')
        help='Isolate the outermost P\'th plane of the cube before viewing.')
                    help='Transpose the array before viewing.')
        help='Render the cube so that the first row is at the bottom.')
    ap.add_argument('FILE', metavar='HDF5-PATH', help='The HDF5 file to load')
                    help='The name of the item within the file to view')

    settings = ap.parse_args(args=args)
    stretch_spec = settings.stretch[0]

    if stretch_spec == 'default':
        stretch = lambda data: data
    elif stretch_spec == 'sqrt':

        def stretch(data):
            neg = (data < 0)
            data[neg] *= -1
            data = np.sqrt(data)
            data[neg] *= -1
            return data
    elif stretch_spec == 'neg':
        stretch = lambda data: (data < 0).astype(np.int)
        cli.die('unknown stretch specification %r', stretch_spec)

    if settings.y_flip:
        y_slice = slice(None, None, -1)
        y_slice = slice(None, None)

    def describe(a):
        print('Final shape:', repr(a.shape))
        print('Min/max/med: %.16e  %.16e  %.16e' %
              (np.nanmin(a), np.nanmax(a), np.nanmedian(a)))
        print('# positive / # negative / # nonfinite: %d  %d  %d' %
              ((a > 0).sum(), (a < 0).sum(), (~np.isfinite(a)).sum()))
        return a  # convenience

    arrays = []

    with h5py.File(settings.FILE, 'r') as ds:
        for item in settings.ITEMS:
            a = ds[item][...]
            if settings.outer_plane_number is not None:
                a = a[settings.outer_plane_number]

    if len(arrays) > 2:
        a = np.stack(arrays)
        a = arrays[0]

    if settings.transpose:
        a = a.T

    if a.ndim == 2:
        stretched = stretch(describe(a))
        view(stretched[y_slice], yflip=settings.y_flip)
    elif a.ndim == 3:
        stretched = stretch(describe(a))
        cycle(stretched[:, y_slice], yflip=settings.y_flip)
    elif a.ndim == 4:
        print('Shape:', a.shape)
        for i in range(a.shape[0]):
            stretched = stretch(describe(a[i]))
            cycle(stretched[:, y_slice], yflip=settings.y_flip)
        cli.die('cannot handle %d-dimensional arrays', a.ndim)
Exemplo n.º 29
def grtrans_cli(settings):
    from pwkit import cgs
    from pwkit.cli import die
    from time import time
    from .impl import PhysicalApproximator, hardcoded_nu_ref, hardcoded_ne_ref

    # Read and validate the test dataset.

    testdata = pd.read_table(settings.testdata)

    psi = testdata.get('psi(meta)')
    if psi is None:
        die('the test dataset must contain a column of field-to-Stokes-U angles \"psi(meta)\"'

    d = testdata.get('d(meta)')
    if d is None:
        die('the test dataset must contain a column of integration path lengths \"d(meta)\"'

    n_e = testdata.get('n_e(meta)')
    if n_e is None:
        die('the test dataset must contain a column of particle densities \"n_e(meta)\"'

    time_ms = testdata.get('time_ms(meta)')
    if time_ms is None:
        die('the test dataset must contain a column of computation times \"time_ms(meta)\"'

    s = None
    theta = None
    others = {}

    for col in testdata.columns:
        if col.startswith('s('):
            s = testdata[col]
        elif col.startswith('theta('):
            theta = testdata[col]
        elif col.endswith('(lin)') or col.endswith('(log)'):
            others[col.split('(')[0]] = testdata[col]

    if s is None:
        die('the test dataset must have an input parameter of the harmonic number \"s\"'

    if theta is None:
        die('the test dataset must have an input parameter of the field-to-LOS angle \"theta\"'

    # Get the coefficients into physical units, packed in our standard format.

    nu_hz = settings.frequency * 1e9
    freq_scale = nu_hz / hardcoded_nu_ref
    n_e_scale = n_e / hardcoded_ne_ref

    coeffs = np.empty((psi.size, 8))
    coeffs[..., 0] = testdata['j_I(res)'] * freq_scale
    coeffs[..., 1] = testdata['alpha_I(res)'] / freq_scale
    coeffs[..., 2] = testdata['j_Q(res)'] * freq_scale
    coeffs[..., 3] = testdata['alpha_Q(res)'] / freq_scale
    coeffs[..., 4] = testdata['j_V(res)'] * freq_scale
    coeffs[..., 5] = testdata['alpha_V(res)'] / freq_scale
    coeffs[..., 6] = testdata['rho_Q(res)'] / freq_scale
    coeffs[..., 7] = testdata['rho_V(res)'] / freq_scale
    coeffs *= n_e_scale.values.reshape((-1, 1))

    # Ground truth:

    iquv_precise = integrate(d, coeffs, psi)
    ctime_precise = time_ms.sum()
        'Precise computation: I={:.4e}  Q={:.4e}  U={:.4e}  V={:.4e}  calc_time={:.0f} ms'
        .format(iquv_precise[0], iquv_precise[1], iquv_precise[2],
                iquv_precise[3], ctime_precise))

    # Now set up the approximator and do the same thing. (Note that often the
    # timing seems backwards, because the time spent doing the precise
    # calculation has already been spent, whereas we have a lot of overhead to
    # set up the neural networks.)

    B = 2 * np.pi * cgs.me * cgs.c * nu_hz / (s * cgs.e)
    approx = PhysicalApproximator(settings.nndir)
    t0 = time()
    coeffs, oos = approx.compute_all_nontrivial(nu_hz, B, n_e, theta, **others)
    ctime_approx = 1000 * (time() - t0)

    if np.any(oos != 0):
        print('WARNING: some of the approximations were out-of-sample')

    iquv_approx = integrate(d, coeffs, psi)
        'Approx. computation: I={:.4e}  Q={:.4e}  U={:.4e}  V={:.4e}  calc_time={:.0f} ms'
        .format(iquv_approx[0], iquv_approx[1], iquv_approx[2], iquv_approx[3],

    acc = np.abs((iquv_approx - iquv_precise) / iquv_precise)
    print('Accuracy: I={:.3f}  Q={:.3f}  U={:.3f}  V={:.3f}'.format(
        acc[0], acc[1], acc[2], acc[3]))

    print('Speedup: {:.1f}'.format(ctime_precise / ctime_approx))
Exemplo n.º 30
 def omstyle (v):
         return getattr (om.styles, v) ()
         cli.die ('can\'t load/instantiate OmegaPlot style "%s"', v)
Exemplo n.º 31
def doit (driver, args):
    # Load up the driver code

        text = open (driver).read ()
    except Exception as e:
        cli.die ('cannot read driver file "%s": %s', driver, e)

        code = compile (text, driver, 'exec')
    except Exception as e:
        if 'OMEGAFIG_BACKTRACE' in os.environ:
        cli.die ('cannot compile driver file "%s": %s', driver, e)

    ns = {'__file__': driver,
          '__name__': '__omegafig__'}

        exec (code, ns)
    except Exception as e:
        if 'OMEGAFIG_BACKTRACE' in os.environ:
        cli.die ('cannot execute driver file "%s": %s', driver, e)

    pfunc = ns.get ('plot')
    if pfunc is None:
        cli.die ('driver file "%s" does not provide a function called "plot"', driver)
    if not callable (pfunc):
        cli.die ('driver file "%s" provides something called "plot", but it\'s '
                 'not a function', driver)

    # Deal with args

        code = pfunc.__code__
    except AttributeError:
        code = pfunc.func_code

    nargs = code.co_argcount
    argnames = code.co_varnames

    keywords = []
    nonkeywords = []

    for arg in args:
        if '=' in arg:
            keywords.append (arg)
            nonkeywords.append (arg)

    if len (nonkeywords) != nargs:
        cli.die ('expected %d non-keyword arguments to driver, but got %d',
                 nargs, len (nonkeywords))

    config = Config ()
    defaults = ns.get ('figdefaults')

    if defaults is not None:
        for key in defaults:
            setattr (config, key, defaults[key])

    config.parse (keywords)

    # Set up omegaplot globals as appropriate

    if config.pango:
            import omega.pango_g3 as ompango
        except ImportError:
            import omega.pango_g2 as ompango

        fontparams = {}
        if config.pangofamily is not None:
            fontparams['family'] = config.pangofamily
        if config.pangosize is not None:
            fontparams['size'] = config.pangosize
        if len (fontparams):
            ompango.setFont (**fontparams)

    # Execute.

    p = pfunc (*nonkeywords)

    if config.out is None:
        p.show (style=config.omstyle)
        p.save (config.out, style=config.omstyle, dims=config.dims,
Exemplo n.º 32
def _ms_transpose (vpath, tpath, transpose_args, squash_time_gaps=False):
    from pwkit.environments.casa import util as casautil
    b = casautil.sanitize_unicode

    def vispath (*args):
        return b(os.path.join (vpath, *args))

    # TODO: I think that with ms.nrow() and ms.range() we can do this
    # while taking only one pass through the data.

    tb = casautil.tools.table ()
    ms = casautil.tools.ms ()
    print ('pass 1 ...')

    # Load polarization stuff we need

    tb.open (vispath ('DATA_DESCRIPTION'))
    ddid_to_pid = tb.getcol (b'POLARIZATION_ID')
    ddid_to_spwid = tb.getcol (b'SPECTRAL_WINDOW_ID')
    tb.close ()

    tb.open (vispath ('POLARIZATION'))
    numcorrs = tb.getcol (b'NUM_CORR')
    npids = numcorrs.size
    prodinfo = [None] * npids

    for i in range (npids):
        corrtypes = tb.getcell (b'CORR_TYPE', i)
        prodinfo[i] = [casautil.pol_to_miriad[c] for c in corrtypes]

    tb.close ()

    ddprods = [prodinfo[p] for p in ddid_to_pid]

    # Load spw configuration stuff we need. Don't grid the info yet
    # since many of the spws may be filtered out by the selection
    # setup.

    tb.open (vispath ('SPECTRAL_WINDOW'))
    nspws = tb.getcol (b'NUM_CHAN').size
    sfreqs = []

    for i in range (nspws):
        sfreqs.append (tb.getcell (b'CHAN_FREQ', i) * 1e-9) # Hz -> GHz

    tb.close ()

    # Antenna info

    tb.open (vispath ('ANTENNA'))
    nants = tb.getcol (b'DISH_DIAMETER').size
    names = tb.getcol (b'NAME')
    stations = tb.getcol (b'STATION')
    fullnames = []
    maxnamelen = 0

    for i in range (nants):
        f = '%s@%s' % (names[i], stations[i])
        fullnames.append (f)
        maxnamelen = max (maxnamelen, len (f))

    antnames = np.zeros ((nants, maxnamelen), dtype=np.byte)

    for i in range (nants):
        f = fullnames[i]
        n = len (f)
        antnames[i,:n] = np.fromstring (f, dtype=np.byte)

    # Open and set up filtering. msselect() says it supports
    # 'polarization' as a field, but it doesn't seem to do anything?

    ms.open (vispath ())
    ms_selectors = frozenset ('array baseline field observation polarization '
                              'scan scanintent spw taql time uvdist'.split ())
    mssel = dict (kv for kv in iteritems(transpose_args)
                  if kv[0] in ms_selectors)
    # ms.selectinit () needed for selectpolarization() below
    ms.msselect (b(mssel))

    # Changes shape of 'data' column below. Disable for now since
    # I don't feel like debugging it.
    if 'polarization' in transpose_args:
        warn ('polarization selection not implemented for MS data')
        pass #ms.selectpolarization (transpose_args['polarization'].split (','))

    # Get table of times and basepols

    ms.iterinit (maxrows=65536) # XXX semi-arbitrary constant
    ms.iterorigin ()
    colnames = b('time antenna1 antenna2 data_desc_id'.split ())
    nrecs = 0
    times = set ()
    pbps = set ()
    seenspws = set ()

    while True:
        cols = ms.getdata (items=colnames)
        # time is (chunksize)

        for i in range (cols['time'].size):
            t = cols['time'][i] / 86400. + 2400000.5 # CASA to miriad timesystems

            ddid = cols['data_desc_id'][i]

            pi = ddprods[ddid]
            a1 = cols['antenna1'][i] + 1 # 0-based -> 1-based
            a2 = cols['antenna2'][i] + 1

            seenspws.add (ddid_to_spwid[ddid])

            for j in range (len (pi)):
                nrecs += 1
                pbp = mtutil.bpToPBP32 (mtutil.aap2bp (a1, a2, pi[j]))
                times.add (t)
                pbps.add (pbp)

        if not ms.iternext ():

    # Get the timestamps onto a nice even grid, checking that our
    # gridding is decent.

    datatimes = np.asarray (sorted (times), dtype=np.double)
    nt = datatimes.size
    time0 = datatimes[0]
    cadence = np.median (datatimes[1:] - datatimes[:-1])
    tidxs = (datatimes - time0) / cadence
    timemap = np.empty (nt, dtype=np.int)
    ntslot = int (round (tidxs[-1])) + 1
    tscale = ntslot * 1. / nt
    ntoff = 0

    if squash_time_gaps:
        slot_to_data = np.zeros (ntslot, dtype=np.int) - 1

    for i in range (nt):
        timemap[i] = int (round (tidxs[i]))
        if (tidxs[i] - timemap[i]) > 0.01:
            ntoff += 1

        if squash_time_gaps:
            slot_to_data[timemap[i]] = i

    if ntoff > 0:
        warn ('had %d timestamps (out of %d) with poor mapping onto the grid',
              ntoff, nt)

    if squash_time_gaps:
        # Re-index the data to remove time gaps. As a convenience we throw in
        # a small break between discrete observations.
        seen_any = False
        in_populated_run = False
        squashed_idx = 0
        new_gap_size = 1

        for i in range (ntslot):
            if slot_to_data[i] == -1:
                # There are no data for this slot.
                in_populated_run = False
                # There are data for this slot.
                if not in_populated_run and seen_any:
                    squashed_idx += new_gap_size
                timemap[slot_to_data[i]] = squashed_idx
                squashed_idx += 1
                seen_any = True
                in_populated_run = True

        ntslot = squashed_idx
        tscale = ntslot * 1. / nt

    if tscale > 1.05:
        warn ('data size increasing by factor of %.2f to get everything onto '
              'the time grid', tscale)

    nt = ntslot

    # Now do the same thing for the spectral windows that are actually used,
    # computing lookup info for fast mapping of DDID to our frequency grid.

    freqs = set ()

    for spwid in seenspws:
        freqs.update (sfreqs[spwid])

    datafreqs = np.asarray (sorted (freqs), dtype=np.double)
    nf = datafreqs.size
    freq0 = datafreqs[0]
    sdf = np.median (datafreqs[1:] - datafreqs[:-1])
    nfslot = int (round ((datafreqs[-1] - freq0) / sdf)) + 1
    fscale = nfslot * 1. / nf
    ddfreqmap = []
    nfoff = 0
    maxnchan = 0

    for i in range (len (ddid_to_spwid)):
        spwid = ddid_to_spwid[i]
        if spwid not in seenspws:
            ddfreqmap.append (None)

        # If more than one DDID shares a SPWID, we're recomputing this stuff.
        # Oh well.

        ddfreqs = sfreqs[spwid]
        ddidx0 = None
        ddprevidx = None

        if ddfreqs.size > 1 and ddfreqs[1] < ddfreqs[0]:
            ddstep = -1
            ddstep = 1

        for j in range (ddfreqs.size):
            trueidx = (ddfreqs[j] - freq0) / sdf
            ddidx = int (round (trueidx))

            if (ddidx - trueidx) > 0.01:
                nfoff += 1

            if j == 0:
                ddidx0 = ddidx
            elif ddidx != ddprevidx + ddstep:
                die ('cannot transpose: spw must map directly onto freq grid '
                     '(spw #%d, chan %d->%d, %d->%d)', spwid, j - 1, j,
                     ddprevidx, ddidx)

            ddprevidx = ddidx

        if ddstep == -1:
            ddidx0 = ddidx

        ddfreqmap.append ((ddidx0, ddfreqs.size, ddstep))
        maxnchan = max (maxnchan, ddfreqs.size)

    if nfoff > 0:
        warn ('had %d frequencies (out of %d) with poor mapping onto the grid',
              nfoff, nf)

    if fscale > 1.05:
        warn ('data size increasing by factor of %.2f to get everything onto '
              'the frequency grid', fscale)

    freqs = np.arange (nfslot) * sdf + freq0
    nf = nfslot

    # Compute offsets and record sizes for our output file, and write
    # the header. Write-then-seek seems to break if buffering is used???

    pbps = np.asarray (sorted (pbps), dtype=np.int32)
    nbp = pbps.size

    corr_bytes = 8 * nf
    uvww_bytes = 4 * 8
    flag_bytes = nf
    slice_bytes = (corr_bytes + flag_bytes + uvww_bytes) * nt

    data_offset = ((header.size + 7) // 8) * 8
    data_size = slice_bytes * nbp

    vars_offset = ((data_offset + data_size + 7) // 8) * 8

    def corr_offset (bpidx, tidx, fidx):
        return data_offset + bpidx * slice_bytes + corr_bytes * tidx + 8 * fidx

    def flag_offset (bpidx, tidx, fidx):
        return (data_offset + bpidx * slice_bytes + corr_bytes * nt +
                flag_bytes * tidx + fidx)

    def uvww_offset (bpidx, tidx):
        return (data_offset + bpidx * slice_bytes + (corr_bytes + flag_bytes) * nt +
                uvww_bytes * tidx)

    f = open (tpath, 'wb+', 0)
    f.truncate (vars_offset) # hint how big the file will be
    f.write (header.pack (BYTE_ORDER_MARKER,
                          nbp, nt, nf,
                          freq0, sdf,
                          time0, cadence,

    # Our little system for buffering/writing data. Given how the CASA Python
    # interface works, I don't think we can preallocate a huge buffer that
    # everything gets stuffed in. Which is sad. TODO: smarter data structure
    # that sorts the keys as we insert them.

    buffer_size = [0] # hack so we can modify value in the funcs below
    buffer_info = {}
    buffer_data = np.empty (CACHE_SIZE, dtype=np.uint8)
    currec = 0

    def dump ():
        if not len (buffer_info):

        pct = 100. * currec / nrecs
        msg = '   %3.1f%% (%d/%d) writing ...' % (pct, currec, nrecs)
        unbufout.write(msg.ljust (60).encode('utf8') + b'\r')

        offsets = sorted (iterkeys(buffer_info))
        curofs = None

        for offset in offsets:
            bofs, blen = buffer_info[offset]

            if curofs is None or offset != curofs:
                f.seek (offset)
            f.write (buffer_data[bofs:bofs+blen])

            curofs = offset + blen

        buffer_size[0] = 0
        buffer_info.clear ()

    def bufferview (offset, dtype, nelem):
        bofs = (buffer_size[0] + 7) & (~7) # align for safety
        blen = dtype ().nbytes * nelem

        if bofs + blen > CACHE_SIZE:
            dump ()
            bofs = 0

        # if paranoid, check that offset not already in buffer_data
        buffer_size[0] = bofs + blen
        buffer_info[offset] = (bofs, blen)
        return buffer_data[bofs:bofs+blen].view (dtype)

    # Pass 2: write data. Set up some stuff for progress reporting.
    # NOTE: we're going to keep on rewriting uvw once for each spw

    print ('pass 2 ...')

    unbufout = os.fdopen (os.dup (1), 'wb', 0)
    tstart = time.time ()
    tlastprint = 0
    nvis = 0
    seenany = np.zeros (nbp, dtype=np.bool)
    meanuvw = np.zeros ((nbp, 3), dtype=np.double)
    muvwcounts = np.zeros (nbp, dtype=np.int)

    datacol = transpose_args.get ('datacol', 'data')
    colnames = b([datacol] +
                 'time antenna1 antenna2 data_desc_id flag uvw sigma'.split ())
    maxrows = CACHE_SIZE // (2 * maxnchan * 16) # 128 bits per viz.; factor of 2 safety margin
    ms.iterinit (maxrows=maxrows)
    ms.iterorigin ()

    while True:
        cols = ms.getdata (items=colnames)
        # flag and data are (npol, nchan, chunksize)
        # uvw is (3, chunksize)
        # sigma is (npol, chunksize)
        # rest are scalars, shape (chunksize)
        # data is complex128!!! converting is super slow and sad :-(

        data = cols[datacol]
        flags = cols['flag']

        for i in range (cols['time'].size):
            t = cols['time'][i] / 86400. + 2400000.5 # CASA to miriad timesystems
            tidx = timemap[datatimes.searchsorted (t)]
            ddid = cols['data_desc_id'][i]
            pi = ddprods[ddid]
            npol = len (pi)
            a1 = cols['antenna1'][i] + 1 # 0-based -> 1-based
            a2 = cols['antenna2'][i] + 1
            freqidx0, nchan, step = ddfreqmap[ddid]

            if currec % 100 == 0 and currec:
                now = time.time ()

                if now - tlastprint > 1:
                    pct = 100. * currec / nrecs
                    elapsed = now - tstart
                    total = 1. * elapsed * nrecs / currec
                    eta = total - elapsed

                    msg = '   %3.1f%% (%d/%d) elapsed %s ETA %s total %s' % \
                        (pct, currec, nrecs, _sfmt (elapsed), _sfmt (eta), _sfmt (total))
                    unbufout.write(msg.ljust (60).encode('utf8') + b'\r')
                    tlastprint = now

            nvis += npol * nchan

            for j in range (npol):
                currec += 1
                pbp = mtutil.bpToPBP32 (mtutil.aap2bp (a1, a2, pi[j]))
                bpidx = pbps.searchsorted (pbp)

                uvww = bufferview (uvww_offset (bpidx, tidx), np.double, 4)
                uvww[:3] = cols['uvw'][:,i] * casautil.INVERSE_C_MNS
                uvww[3] = cols['sigma'][j,i]**-2
                muvwcounts[bpidx] += 1
                meanuvw[bpidx] += uvww[:3]

                corrdata = bufferview (corr_offset (bpidx, tidx, freqidx0),
                                       np.complex64, nchan)
                corrdata[:] = data[j,::step,i] # copy and convert

                flagdata = bufferview (flag_offset (bpidx, tidx, freqidx0),
                                       np.uint8, nchan)
                np.logical_not (flags[j,::step,i], flagdata)

                if flagdata.any ():
                    seenany[bpidx] = 1

        if not ms.iternext ():

    dump ()

    tfinish = time.time ()
    elapsed = tfinish - tstart
    print ('   100%% (%d/%d) elapsed %s ETA 0s total %s   ' %
           (currec, nrecs, _sfmt (elapsed), _sfmt (elapsed)))
    unbufout.close ()

    # Finally, write out variables

    f.seek (vars_offset)
    savevariable (f, 'vispath', np.fromstring (b(vpath), dtype=np.byte))
    savevariable (f, 'basepols', pbps)
    savevariable (f, 'antnames', antnames)
    flaggedbps = pbps[np.where (seenany == 0)]
    savevariable (f, 'flaggedbps', flaggedbps)
    s = ' '.join ('%s=%s' % t for t in iteritems(transpose_args))
    savevariable (f, 'transargs', np.fromstring (b(s), dtype=np.byte))

    wbad = np.where (muvwcounts == 0)
    muvwcounts[wbad] = 1
    meanuvw[:,0] /= muvwcounts # see _mir_transpose ()
    meanuvw[:,1] /= muvwcounts
    meanuvw[:,2] /= muvwcounts
    meanuvw[wbad] = 0
    meanuvw *= (freq0 + 0.5 * sdf * nf) / freq0
    savevariable (f, 'meanuvws', meanuvw)

    f.close ()
    ms.close ()
    return currec, nvis, data_size