示例#1
0
def main():
    """Main function for calling summarize_clsim_table as a script"""
    t0 = time()
    args = parse_args()
    kwargs = vars(args)
    table_fpaths = []
    for fpath in kwargs.pop('table-fpaths'):
        table_fpaths.extend(glob(expand(fpath)))
    for fpath in table_fpaths:
        kwargs['table_fpath'] = fpath
        summarize_clsim_table(**kwargs)
    total_time = time() - t0
    if len(table_fpaths) > 1:
        avg = np.round(total_time / len(table_fpaths), 3)
        wstderr('Average time to summarize tables: {} s/table\n'.format(avg))
示例#2
0
def summarize_clsim_table(table_fpath,
                          table=None,
                          save_summary=True,
                          outdir=None):
    """
    Parameters
    ----------
    table_fpath : string
        Path to table (or just the table's filename if `outdir` is specified)

    table : mapping, optional
        If the table has already been loaded, it can be passed here to avoid
        re-loading the table.

    save_summary : bool
        Whether to save the table summary to disk.

    outdir : string, optional
        If `save_summary` is True, write the summary to this directory. If
        `outdir` is not specified and `save_summary` is True, the summary will
        be written to the same directory that contains `table_fpath`.

    Returns
    -------
    table
        See `load_clsim_table` for details of the data structure

    summary : OrderedDict

    """
    t_start = time()
    if save_summary:
        from pisa.utils.jsons import from_json, to_json

    table_fpath = expand(table_fpath)
    srcdir, clsim_fname = dirname(table_fpath), basename(table_fpath)
    invalid_fname = False
    try:
        fname_info = interpret_clsim_table_fname(clsim_fname)
    except ValueError:
        invalid_fname = True
        fname_info = {}

    if outdir is None:
        outdir = srcdir
    outdir = expand(outdir)
    mkdir(outdir)

    if invalid_fname:
        metapath = None
    else:
        metaname = (CLSIM_TABLE_METANAME_PROTO[-1].format(
            hash_val=fname_info['hash_val']))
        metapath = join(outdir, metaname)
    if metapath and isfile(metapath):
        meta = from_json(metapath)
    else:
        meta = dict()

    if table is None:
        table = load_clsim_table(table_fpath)

    summary = OrderedDict()
    for key in table.keys():
        if key == 'table':
            continue
        summary[key] = table[key]
    if fname_info:
        for key in ('hash_val', 'string', 'depth_idx', 'seed'):
            summary[key] = fname_info[key]
    # TODO: Add hole ice info when added to tray_kw_to_hash
    if meta:
        summary['n_events'] = meta['tray_kw_to_hash']['NEvents']
        summary['ice_model'] = meta['tray_kw_to_hash']['IceModel']
        summary['tilt'] = not meta['tray_kw_to_hash']['DisableTilt']
        for key, val in meta.items():
            if key.endswith('_binning_kw'):
                summary[key] = val
    elif 'fname_version' in fname_info and fname_info['fname_version'] == 1:
        summary['n_events'] = fname_info['n_events']
        summary['ice_model'] = 'spice_mie'
        summary['tilt'] = False
        summary['r_binning_kw'] = dict(min=0.0, max=400.0, n_bins=200, power=2)
        summary['costheta_binning_kw'] = dict(min=-1, max=1, n_bins=40)
        summary['t_binning_kw'] = dict(min=0.0, max=3000.0, n_bins=300)
        summary['costhetadir_binning_kw'] = dict(min=-1, max=1, n_bins=20)
        summary['deltaphidir_binning_kw'] = dict(min=0.0, max=np.pi, n_bins=20)

    # Save marginal distributions and info to file
    norm = (
        1 / table['n_photons'] /
        (SPEED_OF_LIGHT_M_PER_NS / table['phase_refractive_index'] *
         np.mean(np.diff(table['t_bin_edges'])))
        #* table['angular_acceptance_fract']
        * (len(table['costheta_bin_edges']) - 1))
    summary['norm'] = norm

    dim_names = ('r', 'costheta', 't', 'costhetadir', 'deltaphidir')
    n_dims = len(table['table_shape'])
    assert n_dims == len(dim_names)

    # Apply norm to underflow and overflow so magnitudes can be compared
    # relative to plotted marginal distributions
    for flow, idx in product(('underflow', 'overflow'), iter(range(n_dims))):
        summary[flow][idx] = summary[flow][idx] * norm

    wstderr('Finding marginal distributions...\n')
    wstderr('    masking off zeros in table...')
    t0 = time()
    nonzero_table = np.ma.masked_equal(table['table'], 0)
    wstderr(' ({} ms)\n'.format(np.round((time() - t0) * 1e3, 3)))

    t0_marg = time()
    summary['dimensions'] = OrderedDict()
    for keep_axis, ax_name in zip(tuple(range(n_dims)), dim_names):
        remove_axes = list(range(n_dims))
        remove_axes.pop(keep_axis)
        remove_axes = tuple(remove_axes)
        axis = OrderedDict()

        wstderr('    mean across non-{} axes...'.format(ax_name))
        t0 = time()
        axis['mean'] = norm * np.asarray(
            np.mean(table['table'], axis=remove_axes))
        wstderr(' ({} s)\n'.format(np.round(time() - t0, 3)))

        wstderr('    median across non-{} axes...'.format(ax_name))
        t0 = time()
        axis['median'] = norm * np.asarray(
            np.ma.median(nonzero_table, axis=remove_axes))
        wstderr(' ({} s)\n'.format(np.round(time() - t0, 3)))

        wstderr('    max across non-{} axes...'.format(ax_name))
        t0 = time()
        axis['max'] = norm * np.asarray(
            np.max(table['table'], axis=remove_axes))
        wstderr(' ({} s)\n'.format(np.round(time() - t0, 3)))
        summary['dimensions'][ax_name] = axis
    wstderr('  Total time to find marginal distributions: {} s\n'.format(
        np.round(time() - t0_marg, 3)))

    if save_summary:
        ext = None
        base_fname = clsim_fname
        while ext not in ('', '.fits'):
            base_fname, ext = splitext(base_fname)
            ext = ext.lower()
        outfpath = join(outdir, base_fname + '_summary.json.bz2')
        to_json(summary, outfpath)
        print('saved summary to "{}"'.format(outfpath))

    wstderr('Time to summarize table: {} s\n'.format(
        np.round(time() - t_start, 3)))

    return table, summary
示例#3
0
def load_ckv_table_compr(fpath, mmap):
    """Load a Cherenkov table from disk.

    Parameters
    ----------
    fpath : string
        Path to directory containing the table's .npy files.

    mmap : bool
        Whether to memory map the table (if it's stored in a directory
        containing .npy files).

    Returns
    -------
    table : OrderedDict
        Items are
        - 'n_photons' :
        - 'group_refractive_index' :
        - 'phase_refractive_index' :
        - 'r_bin_edges' :
        - 'costheta_bin_edges' :
        - 't_bin_edges' :
        - 'costhetadir_bin_edges' :
        - 'deltaphidir_bin_edges' :
        - 'ckv_template_map' : np.ndarray
        - 't_indep_ckv_table' : np.ndarray (if available)

    """
    fpath = expand(fpath)
    table = OrderedDict()

    if mmap:
        mmap_mode = 'r'
    else:
        mmap_mode = None

    if DEBUG:
        wstderr('Loading ckv table from {} ...\n'.format(fpath))

    if isfile(fpath):
        assert basename(fpath) == 'ckv_table.npy'
        fpath = dirname(fpath)

    t0 = time()
    indir = fpath

    for key in CKV_TABLE_KEYS + ['t_indep_ckv_table']:
        fpath = join(indir, key + '.npy')
        if DEBUG:
            wstderr('    loading {} from "{}" ...'.format(key, fpath))

        t1 = time()
        if isfile(fpath):
            table[key] = np.load(fpath)

        elif key != 't_indep_ckv_table':
            raise ValueError(
                'Could not find file "{}" for loading table key "{}"'
                .format(fpath, key)
            )

        if DEBUG:
            wstderr(' ({} ms)\n'.format(np.round((time() - t1)*1e3, 3)))

    if DEBUG:
        wstderr('  Total time to load: {} s\n'.format(np.round(time() - t0, 3)))

    return table
示例#4
0
def load_clsim_table_minimal(fpath, mmap=False, include_overflow=False):
    """Load a CLSim table from disk (optionally compressed with zstd).

    Similar to the `load_clsim_table` function but the full table, including
    under/overflow bins, is kept and no normalization or further processing is
    performed on the table data besides populating the ouptput OrderedDict.

    Parameters
    ----------
    fpath : string
        Path to file to be loaded. If the file has extension 'zst', 'zstd', or
        'zstandard', the file will be decompressed using the `python-zstandard`
        Python library before passing to `fits` for interpreting.

    mmap : bool, optional
        Whether to memory map the table

    include_overflow : bool, optional
        By default, overflow bins (if present) are removed

    Returns
    -------
    table : OrderedDict

    """
    t0 = time()

    table = OrderedDict()
    fpath = expand(fpath)

    if DEBUG:
        wstderr('Loading table from {} ...\n'.format(fpath))

    if isdir(fpath):
        indir = fpath
        if mmap:
            mmap_mode = 'r'
        else:
            mmap_mode = None

        for rel_fpath in listdir(indir):
            key, ext = splitext(rel_fpath)
            abs_fpath = join(indir, rel_fpath)

            if not (isfile(abs_fpath) and ext == '.npy'):
                continue

            if DEBUG:
                wstderr('    loading {} from "{}" ...'.format(key, abs_fpath))

            t1 = time()
            val = np.load(abs_fpath, mmap_mode=mmap_mode)

            # Pull "small" things (less than 10 MiB) into memory so we don't
            # have too many file handles open due to memory mapping
            if mmap and val.nbytes < 10 * 1024**2:
                val = np.copy(val)

            table[key] = val

            if DEBUG:
                wstderr(' ({} ms)\n'.format(np.round((time() - t1) * 1e3, 3)))

    elif isfile(fpath):
        from astropy.io import fits
        fobj = get_decompressd_fobj(fpath)
        pf_table = None
        try:
            pf_table = fits.open(fobj, mode='readonly', memmap=mmap)

            header = pf_table[0].header  # pylint: disable=no-member
            table['table_shape'] = np.array(pf_table[0].data.shape, dtype=int)  # pylint: disable=no-member
            table['group_refractive_index'] = set_explicit_dtype(
                force_little_endian(header['_i3_n_group']))
            table['phase_refractive_index'] = set_explicit_dtype(
                force_little_endian(header['_i3_n_phase']))

            n_dims = len(table['table_shape'])

            new_style = False
            axnames = [None] * n_dims
            binning = [None] * n_dims
            for key in header.keys():
                if not key.startswith('_i3_ax_'):
                    continue
                new_style = True
                axnum = header[key]
                axname = key[len('_i3_ax_'):]
                be0 = header['_i3_{}_min'.format(axname)]
                be1 = header['_i3_{}_max'.format(axname)]
                n_bins = header['_i3_{}_n_bins'.format(axname)]
                power = header.get('_i3_{}_power'.format(axname), 1)
                bin_edges = force_little_endian(pf_table[axnum + 1].data)  # pylint: disable=no-member
                assert np.isclose(bin_edges[0],
                                  be0), '%f .. %f' % (be0, bin_edges[0])
                assert np.isclose(bin_edges[-1],
                                  be1), '%f .. %f' % (be1, bin_edges[-1])
                assert len(bin_edges) == n_bins + 1, '%d vs. %d' % (
                    len(bin_edges), n_bins + 1)
                assert np.allclose(
                    bin_edges,
                    powerspace(start=be0,
                               stop=be1,
                               num=n_bins + 1,
                               power=power),
                )
                axnames[axnum] = axname
                binning[axnum] = bin_edges

            if not new_style:
                if n_dims == 5:
                    axnames = [
                        'r', 'costheta', 't', 'costhetadir', 'deltaphidir'
                    ]
                elif n_dims == 6:
                    axnames = [
                        'r', 'costheta', 'phi', 't', 'costhetadir',
                        'deltaphidir'
                    ]
                else:
                    raise NotImplementedError(
                        '{}-dimensional table not handled for old-style CLSim'
                        ' tables'.format(n_dims))
                binning = [
                    force_little_endian(pf_table[i + 1].data).flat
                    for i in range(len(axnames))
                ]  # pylint: disable=no-member

            for axnum, (axname, bin_edges) in enumerate(zip(axnames, binning)):
                assert axname is not None, 'missing axis %d name' % axnum
                assert bin_edges is not None, 'missing axis %d binning' % axnum

            dtype = np.dtype([(axname, np.float64, dim.size)
                              for axname, dim in zip(axnames, binning)])
            table['binning'] = np.array(tuple(binning), dtype=dtype)

            for keyroot in GENERIC_KEYS:
                keyname = '_i3_' + keyroot
                if keyname in header:
                    val = force_little_endian(header[keyname])
                    if keyroot in (
                            't_is_residual_time',
                            'disable_tilt',
                            'disable_anisotropy',
                    ):
                        val = np.bool8(val)
                    else:
                        val = set_explicit_dtype(val)
                    table[keyroot] = val

            # Get string values from keys that have a prefix preceded by the
            # value all in the key (I3 software had issues saving strings as
            # values in the header "dict" so the workaround was to store the
            # string value in this way)
            for infix in INFIX_KEYS:
                keyroot = '_i3_' + infix + '_'
                for keyname in header.keys():
                    if not keyname.startswith(keyroot):
                        continue
                    val = keyname[len(keyroot):]
                    table[infix] = np.string0(val)

            if include_overflow:
                slicer = (slice(None), ) * n_dims
            else:
                slicer = (slice(1, -1), ) * n_dims
            table['table'] = force_little_endian(pf_table[0].data[slicer])  # pylint: disable=no-member

            wstderr('    (load took {} s)\n'.format(np.round(time() - t0, 3)))

        except:
            wstderr('ERROR: Failed to load "{}"\n'.format(fpath))
            raise

        finally:
            del pf_table
            if hasattr(fobj, 'close'):
                fobj.close()
            del fobj

    else:  # fpath is neither dir nor file
        raise ValueError('Table does not exist at path "{}"'.format(fpath))

    if 'step_length' not in table:
        table['step_length'] = 1

    if 't_is_residual_time' not in table:
        table['t_is_residual_time'] = True

    if DEBUG:
        wstderr('  Total time to load: {} s\n'.format(np.round(time() - t0,
                                                               3)))

    return table
示例#5
0
def load_clsim_table(fpath, angular_acceptance_fract, quantum_efficiency):
    """Load a CLSim table from disk (optionally compressed with zstd).

    Parameters
    ----------
    fpath : string
        Path to file to be loaded. If the file has extension 'zst', 'zstd', or
        'zstandard', the file will be decompressed using the `python-zstandard`
        Python library before passing to `fits` for interpreting.

    Returns
    -------
    table : OrderedDict
        Items include
        - 'table_shape' : tuple of int
        - 'table' : np.ndarray
        - 't_indep_table' : np.ndarray
        - 'n_photons' :
        - 'group_refractive_index' :
        - 'phase_refractive_index' :

        If the table is 5D, items also include
        - 'r_bin_edges' :
        - 'costheta_bin_edges' :
        - 't_bin_edges' :
        - 'costhetadir_bin_edges' :
        - 'deltaphidir_bin_edges' :
        - 'table_norm'

    """
    table = OrderedDict()

    assert isfile(fpath)

    table = load_clsim_table_minimal(fpath=fpath, include_overflow=True)
    if 'is_normed' not in table:
        table['is_normed'] = False
    is_normed = table['is_normed']
    if not is_normed:
        table['table_norm'] = get_table_norm(
            angular_acceptance_fract=angular_acceptance_fract,
            quantum_efficiency=quantum_efficiency,
            step_length=table['step_length'],
            **{k: table[k]
               for k in TABLE_NORM_KEYS if k != 'step_length'})
        table[
            't_indep_table_norm'] = quantum_efficiency * angular_acceptance_fract

    wstderr('Interpreting table...\n')
    t0 = time()
    n_dims = len(table['table_shape'])

    # Cut off first and last bin in each dimension (underflow and
    # overflow bins)
    slice_wo_overflow = (slice(1, -1), ) * n_dims
    wstderr('    slicing to remove underflow/overflow bins...')
    t0 = time()
    table_wo_overflow = table['table'][slice_wo_overflow]
    wstderr(' ({} ms)\n'.format(np.round((time() - t0) * 1e3)))

    wstderr('    slicing and summarizing underflow and overflow...')
    is_normed = table['is_normed']
    if not is_normed:
        table['table_norm'] = get_table_norm(
            angular_acceptance_fract=angular_acceptance_fract,
            quantum_efficiency=quantum_efficiency,
            step_length=table['step_length'],
            **{k: table[k]
               for k in TABLE_NORM_KEYS if k != 'step_length'})
        table[
            't_indep_table_norm'] = quantum_efficiency * angular_acceptance_fract

    wstderr('Interpreting table...\n')
    t0 = time()
    n_dims = len(table['table_shape'])

    # Cut off first and last bin in each dimension (underflow and
    # overflow bins)
    slice_wo_overflow = (slice(1, -1), ) * n_dims
    wstderr('    slicing to remove underflow/overflow bins...')
    t0 = time()
    table_wo_overflow = table['table'][slice_wo_overflow]
    wstderr(' ({} ms)\n'.format(np.round((time() - t0) * 1e3)))

    wstderr('    slicing and summarizing underflow and overflow...')
    t0 = time()
    underflow, overflow = [], []
    for n in range(n_dims):
        sl = tuple([slice(1, -1)] * n + [0] + [slice(1, -1)] *
                   (n_dims - 1 - n))
        underflow.append(table['table'][sl].sum())

        sl = tuple([slice(1, -1)] * n + [-1] + [slice(1, -1)] *
                   (n_dims - 1 - n))
        overflow.append(table['table'][sl].sum())
    wstderr(' ({} ms)\n'.format(np.round((time() - t0) * 1e3)))

    table['table'] = table_wo_overflow
    table['underflow'] = np.array(underflow)
    table['overflow'] = np.array(overflow)

    return table
示例#6
0
def get_all_stats(
    outdir,
    min_pulses_per_event,
    overwrite=False,
    only_sets=None,
    processes=None,
    verbosity=0,
):
    """Get stats for all data and MC sets.

    Parameters
    ----------
    outdir : string

    min_pulses_per_event : int >= 0

    overwrite : bool, optional
        Whether to overwrite any existing stats files

    only_sets : string, iterable thereof, or None, optional
        If specified, string(s) must be keys of `MC_NAME_DIRINFOS` and/or
        `DATA_NAME_DIRINFOS` dicts.

    processes : None or int > 0, optional

    verbosity : int >= 0, optional

    Returns
    -------
    stats : OrderedDict
        Keys are dataset names and values are OrderedDicts containing the stats
        for the corresponding datasets.

    """
    outdir = expand(outdir)

    if isinstance(only_sets, string_types):
        only_sets = [only_sets]

    to_process = chain.from_iterable(
        [MC_NAME_DIRINFOS.items(),
         DATA_NAME_DIRINFOS.items()])
    if only_sets is not None:
        only_sets = [s.split("/") for s in only_sets]
        new_to_process = []
        for set_name, subsets_list in to_process:
            new_subsets_list = []
            for only_set in only_sets:
                if set_name != only_set[0]:
                    continue
                if len(only_set) == 1:
                    new_subsets_list = subsets_list
                    break
                else:
                    for subset in subsets_list:
                        if subset["id"] == only_set[1]:
                            new_subsets_list.append(subset)
            if len(new_subsets_list) > 0:
                new_to_process.append((set_name, new_subsets_list))
        to_process = new_to_process  #((key, val) for key, val in to_process if key in only_sets)
        print(to_process)

    mkdir(outdir)
    stats = OrderedDict()
    for name, dirinfos in to_process:
        t0 = time.time()

        this_stats = OrderedDict()
        for dirinfo in dirinfos:
            augmented_name = "{}.{}".format(name, dirinfo["id"])
            outfile = join(outdir, "stats_{}.npz".format(augmented_name))
            if isfile(outfile) and not overwrite:
                contents = OrderedDict([(k, v)
                                        for k, v in np.load(outfile).items()])
                if verbosity >= 1:
                    wstderr(
                        'loaded stats for set "{}" from file "{}" ({} sec)\n'.
                        format(augmented_name, outfile,
                               time.time() - t0))
            else:
                contents = get_stats(
                    min_pulses_per_event=min_pulses_per_event,
                    dirinfo=dirinfo,
                    processes=processes,
                    verbosity=verbosity,
                )
                #np.savez_compressed(outfile, **contents)
                np.savez(outfile, **contents)
                if verbosity >= 1:
                    wstderr('saved stats for set "{}" to file "{}" ({} sec)\n'.
                            format(name, outfile,
                                   time.time() - t0))

            if name == "data":
                stats[dirinfo["id"]] = contents
            else:
                for key, vals in contents.items():
                    if key not in this_stats:
                        this_stats[key] = []
                    this_stats[key].append(vals)

            del contents

        if name != "data":
            stats[name] = OrderedDict([(k, np.concatenate(v))
                                       for k, v in this_stats.items()])

    return stats
示例#7
0
def combine_tdi_tiles(
    source_dir,
    dest_dir,
    table_hash,
    gcd,
    bin_edges_file,
    tile_spec_file,
):
    """Combine individual time-independent tiles (one produced per DOM) into a single
    TDI table.

    Parameters
    ----------
    source_dir : str
    dest_dir : str
    bin_edges_file : str
    tile_spec_file : str

    """
    source_dir = expand(source_dir)
    dest_dir = expand(dest_dir)
    gcd = expand(gcd)
    bin_edges_file = expand(bin_edges_file)
    tile_spec_file = expand(tile_spec_file)
    mkdir(dest_dir)
    assert isdir(source_dir)
    assert isfile(bin_edges_file)
    assert isfile(tile_spec_file)

    gcd = extract_gcd(gcd)

    bin_edges = load_pickle(bin_edges_file)
    x_edges = bin_edges['x']
    y_edges = bin_edges['y']
    z_edges = bin_edges['z']
    ctdir_edges = bin_edges['costhetadir']
    phidir_edges = bin_edges['phidir']

    n_x = len(x_edges) - 1
    n_y = len(y_edges) - 1
    n_z = len(z_edges) - 1
    n_ctdir = len(ctdir_edges) - 1
    n_phidir = len(phidir_edges) - 1

    n_dir_bins = n_ctdir * n_phidir

    x_bw = (x_edges.max() - x_edges.min()) / n_x
    y_bw = (y_edges.max() - y_edges.min()) / n_y
    z_bw = (z_edges.max() - z_edges.min()) / n_z
    bin_vol = x_bw * y_bw * z_bw

    ctdir_min = ctdir_edges.min()
    ctdir_max = ctdir_edges.max()

    phidir_min = phidir_edges.min()
    phidir_max = phidir_edges.max()

    with open(tile_spec_file, 'r') as f:
        tile_specs = [l.strip() for l in f.readlines()]

    table = np.zeros(shape=(n_x, n_y, n_z, n_ctdir, n_phidir),
                     dtype=np.float32)

    # Slice all table dimensions to exclude {under,over}flow bins
    central_slice = (slice(1, -1), ) * 5

    angsens_model = None
    ice_model = None
    disable_tilt = None
    disable_anisotropy = None
    n_phase = None
    n_group = None

    tiles_info = []

    for tile_spec in tile_specs:
        info = None
        try:
            fields = tile_spec.split()

            info = OrderedDict()

            info['tbl_idx'] = int(fields[0])
            info['string'] = int(fields[1])
            info['dom'] = int(fields[2])
            info['seed'] = int(fields[3])
            info['n_events'] = int(fields[4])

            info['x_min'] = float(fields[5])
            info['x_max'] = float(fields[6])
            info['n_x'] = int(fields[7])

            info['y_min'] = float(fields[8])
            info['y_max'] = float(fields[9])
            info['n_y'] = int(fields[10])

            info['z_min'] = float(fields[11])
            info['z_max'] = float(fields[12])
            info['n_z'] = int(fields[13])

            info['n_ctdir'] = int(fields[14])
            info['n_phidir'] = int(fields[15])

            tiles_info.append(info)

            tile_fpath = glob(
                join(
                    source_dir, 'clsim_table_set'
                    '_{table_hash}'
                    '_tile_{tbl_idx}'
                    '_string_{string}'
                    '_dom_{dom}'
                    '_seed_{seed}'
                    '_n_{n_events}'
                    '.fits'.format(table_hash=table_hash, **info)))[0]
            try:
                fits_table = fits.open(tile_fpath,
                                       mode='readonly',
                                       memmap=True)
            except:
                wstderr('Failed on tile_fpath "{}"'.format(tile_fpath))
                raise

            primary = fits_table[0]

            header = primary.header  # pylint: disable=no-member
            keys = header.keys()

            this_gcd_i3_md5 = extract_meta_from_keys(keys, '_i3_gcd_i3_md5_')
            assert this_gcd_i3_md5 == gcd['source_gcd_i3_md5'], \
                    'this: {}, ref: {}'.format(this_gcd_i3_md5, gcd['source_gcd_i3_md5'])

            this_angsens_model = extract_meta_from_keys(keys, '_i3_angsens_')
            if angsens_model is None:
                angsens_model = this_angsens_model
                _, avg_angsens = load_angsens_model(angsens_model)
            else:
                assert this_angsens_model == angsens_model

            this_table_hash = extract_meta_from_keys(keys, '_i3_hash_')
            assert this_table_hash == table_hash

            this_ice_model = extract_meta_from_keys(keys, '_i3_ice_')
            if ice_model is None:
                ice_model = this_ice_model
            else:
                assert this_ice_model == ice_model

            this_disable_anisotropy = header['_i3_disable_anisotropy']
            if disable_anisotropy is None:
                disable_anisotropy = this_disable_anisotropy
            else:
                assert this_disable_anisotropy == disable_anisotropy

            this_disable_tilt = header['_i3_disable_tilt']
            if disable_tilt is None:
                disable_tilt = this_disable_tilt
            else:
                assert this_disable_tilt == disable_tilt

            this_n_phase = header['_i3_n_phase']
            if n_phase is None:
                n_phase = this_n_phase
            else:
                assert this_n_phase == n_phase

            this_n_group = header['_i3_n_group']
            if n_group is None:
                n_group = this_n_group
            else:
                assert this_n_group == n_group

            assert info['n_ctdir'] == n_ctdir
            assert info['n_phidir'] == n_phidir

            assert np.isclose(header['_i3_costhetadir_min'], ctdir_min)
            assert np.isclose(header['_i3_costhetadir_max'], ctdir_max)

            assert np.isclose(header['_i3_phidir_min'], phidir_min)
            assert np.isclose(header['_i3_phidir_max'], phidir_max)

            n_photons = header['_i3_n_photons']
            n_dir_bins = info['n_ctdir'] * info['n_phidir']

            this_x_bw = (info['x_max'] - info['x_min']) / info['n_x']
            this_y_bw = (info['y_max'] - info['y_min']) / info['n_y']
            this_z_bw = (info['z_max'] - info['z_min']) / info['n_z']

            assert this_x_bw == x_bw
            assert this_y_bw == y_bw
            assert this_z_bw == z_bw

            assert np.any(np.isclose(info['x_min'], x_edges))
            assert np.any(np.isclose(info['x_max'], x_edges))

            assert np.any(np.isclose(info['y_min'], y_edges))
            assert np.any(np.isclose(info['y_max'], y_edges))

            assert np.any(np.isclose(info['z_min'], z_edges))
            assert np.any(np.isclose(info['z_max'], z_edges))

            quantum_efficiency = 0.25 * gcd['rde'][info['string'] - 1,
                                                   info['dom'] - 1]
            norm = n_dir_bins * quantum_efficiency * avg_angsens / (n_photons *
                                                                    bin_vol)
            if np.isnan(norm):
                print('\nTile {} norm is nan!'.format(info['tbl_idx']))
                print('    quantum_efficiency = {}, n_photons = {}'.format(
                    quantum_efficiency, n_photons))
            elif norm == 0:
                print('\nTile {} norm is 0'.format(info['tbl_idx']))

            x_start = np.digitize(info['x_min'] + x_bw / 2, x_edges) - 1
            x_stop = np.digitize(info['x_max'] - x_bw / 2, x_edges)

            y_start = np.digitize(info['y_min'] + y_bw / 2, y_edges) - 1
            y_stop = np.digitize(info['y_max'] - y_bw / 2, y_edges)

            z_start = np.digitize(info['z_min'] + z_bw / 2, z_edges) - 1
            z_stop = np.digitize(info['z_max'] - z_bw / 2, z_edges)

            # NOTE: comparison excludes norm = 0 _and_ norm = NaN
            if norm > 0:
                assert not np.isnan(norm)
                table[x_start:x_stop, y_start:y_stop,
                      z_start:z_stop, :, :] += (
                          norm * primary.data[central_slice]  # pylint: disable=no-member
                      )
        except:
            wstderr('Failed on tile_spec {}'.format(tile_spec))
            if info is not None:
                wstderr('Info:\n{}'.format(info))
            raise
        wstderr('.')

    wstderr('\n')

    metadata = OrderedDict()
    metadata['table_hash'] = table_hash
    metadata['disable_tilt'] = disable_tilt
    metadata['disable_anisotropy'] = disable_anisotropy
    metadata['gcd'] = gcd
    metadata['angsens_model'] = angsens_model
    metadata['ice_model'] = ice_model
    metadata['n_phase'] = n_phase
    metadata['n_group'] = n_group
    metadata['tiles_info'] = tiles_info

    outdir = join(
        dest_dir, 'tdi_table_{}_tilt_{}_anisotropy_{}'.format(
            table_hash,
            'off' if disable_tilt else 'on',
            'off' if disable_anisotropy else 'on',
        ))
    mkdir(outdir)

    name = 'tdi_table.npy'
    outfpath = join(outdir, name)
    wstdout('saving table to "{}"\n'.format(outfpath))
    np.save(outfpath, table)

    #outfpath = join(outdir, 'tdi_bin_edges.json')
    #wstdout('saving bin edges to "{}"\n'.format(outfpath))
    #json.dump(
    #    bin_edges,
    #    file(outfpath, 'w'),
    #    sort_keys=False,
    #    indent=2,
    #)
    outfpath = join(outdir, 'tdi_bin_edges.pkl')
    wstdout('saving bin edges to "{}"\n'.format(outfpath))
    pickle.dump(
        bin_edges,
        open(outfpath, 'wb'),
        protocol=pickle.HIGHEST_PROTOCOL,
    )

    #outfpath = join(outdir, 'tdi_metadata.json')
    #wstdout('saving metadata to "{}"\n'.format(outfpath))
    #json.dump(
    #    metadata,
    #    file(outfpath, 'w'),
    #    sort_keys=False,
    #    indent=2,
    #)
    outfpath = join(outdir, 'tdi_metadata.pkl')
    wstdout('saving metadata to "{}"\n'.format(outfpath))
    pickle.dump(
        metadata,
        open(outfpath, 'wb'),
        protocol=pickle.HIGHEST_PROTOCOL,
    )
示例#8
0
def get_stats(dirinfo, min_pulses_per_event, processes=None, verbosity=0):
    """
    Parameters
    ----------
    dirinfo : dict
        Must contain keys / vals
            "id" : string
            "path" : string
            "n_files" : int

    min_pulses_per_event : int >= 0

    processes : None or int > 0, optional

    verbosity : int >= 0

    """
    if isinstance(dirinfo, string_types):
        dirinfo = [dirinfo]
    elif isinstance(dirinfo, Mapping):
        dirinfo = [dirinfo]

    #pulses_filter = fixed_charge_filter
    #pulses_filter = quantize_min_q_filter
    #pulses_filter = irregular_quantize_min_q_filter
    pulses_filter = pulse_integration_filter
    #pulses_filter = None

    #emax = 100
    emax = np.inf

    pool = Pool(processes=processes)
    results = []
    for root_dirinfo in dirinfo:
        root_dir = expand(root_dirinfo["path"])
        n_files = root_dirinfo["n_files"]
        for dirpath, dirs_, files in walk(root_dir, followlinks=True):
            dirs_.sort(key=nsort_key_func)
            if "events.npy" in files:
                results.append(
                    pool.apply_async(
                        process_dir,
                        tuple(),
                        dict(
                            dirpath=dirpath,
                            n_files=n_files,
                            pulses_filter=pulses_filter,
                            emax=emax,
                            min_pulses_per_event=min_pulses_per_event,
                            verbosity=verbosity,
                        ),
                    ))
    pool.close()
    pool.join()

    stats = deepcopy(STATS_PROTO)
    for result in results:
        result = result.get()
        for key in result.keys():
            stats[key].extend(result[key])

    # Concatenate and cull
    new_stats = OrderedDict()
    for stat_name in stats.keys():
        vals = stats[stat_name]
        if len(vals) == 0:
            if verbosity >= 1:
                wstderr('Not using stat "{}" for dirs {}\n'.format(
                    stat_name, dirinfo))
        elif np.isscalar(vals[0]):
            new_stats[stat_name] = np.array(vals)
        else:
            new_stats[stat_name] = np.concatenate(vals)
    stats = new_stats

    return stats
示例#9
0
def process_dir(
    dirpath,
    n_files,
    min_pulses_per_event,
    pulses_filter,
    emax,
    verbosity=0,
):
    """
    Parameters
    ----------
    dirpath : string
    n_files : int > 0
    min_pulses_per_event : int >= 0
    pulses_filter : None or callable, optional
    emax : 0 <= scalar <= np.inf
    verbosity : int >= 0

    Returns
    -------
    stats : OrderedDict
        Keys are taken from STATS_PROTO, values are numpy arrays

    """
    stats = deepcopy(STATS_PROTO)

    events = np.load(join(dirpath, "events.npy"), mmap_mode="r")
    if len(events) == 0:
        return stats
    mask_vals = deepcopy(events["L5_oscNext_bool"])
    if np.count_nonzero(mask_vals) == 0:
        return stats

    if verbosity >= 2:
        wstderr(".")

    if isfile(join(dirpath, "truth.npy")):
        truth = np.load(join(dirpath, "truth.npy"), mmap_mode="r")
        weights = truth["weight"]
        use_weights = True
    else:
        weights = np.ones(shape=len(events))
        use_weights = False

    if np.isfinite(emax) and emax > 0:
        recos = np.load(
            join(dirpath, "recos", "retro_crs_prefit.npy"),
            mmap_mode="r",
        )
        with np.errstate(invalid='ignore'):
            mask_vals &= recos["energy"]["median"] <= emax
        if np.count_nonzero(mask_vals) == 0:
            return stats

    pulses = load_pickle(
        join(dirpath, "pulses", "{}.pkl".format(PULSE_SERIES_NAME)))

    for mask_val, event_pulses, weight in zip(mask_vals, pulses, weights):
        if not mask_val:
            continue

        if callable(pulses_filter):
            event_pulses = pulses_filter(event_pulses)
            if len(event_pulses) == 0:
                continue

        if use_weights:
            normed_weight = weight / n_files

        # qtot is sum of charge of all hits on all DOMs
        event_pulses_ = []
        tmp_hits_per_dom = []
        tmp_charge_per_dom = []
        tmp_time_diffs_within_dom = []
        tmp_weight_per_dom = []
        for omkey, dom_pulses in event_pulses:
            event_pulses_.append(dom_pulses)
            tmp_hits_per_dom.append(len(dom_pulses))
            tmp_charge_per_dom.append(dom_pulses["charge"].sum())
            #stats["time_diffs_between_hits"].append(
            #    np.concatenate([[0.], np.diff(np.sort(dom_pulses["time"]))])
            #)
            tmp_time_diffs_within_dom.append(dom_pulses["time"] -
                                             dom_pulses["time"].min())
            if use_weights:
                tmp_weight_per_dom.append(normed_weight)

        event_pulses = np.concatenate(event_pulses_)

        # TODO: move min_pulses_per_event before qmin processing
        # TODO: small-pulse agglomeration filter
        if len(event_pulses) < min_pulses_per_event:
            continue

        stats["doms_per_event"].append(len(event_pulses))

        stats["hits_per_dom"].extend(tmp_hits_per_dom)
        stats["charge_per_dom"].extend(tmp_charge_per_dom)
        stats["time_diffs_within_dom"].extend(tmp_time_diffs_within_dom)
        if use_weights:
            stats["weight_per_dom"].extend(tmp_weight_per_dom)

        charge = event_pulses["charge"]
        stats["charge_per_hit"].append(charge)
        stats["charge_per_event"].append(charge.sum())
        stats["hits_per_event"].append(len(event_pulses))
        stats["time_diffs_within_event"].append(event_pulses["time"] -
                                                event_pulses["time"].min())
        if use_weights:
            stats["weight_per_event"].append(normed_weight)
            stats["weight_per_hit"].append(
                np.full(shape=len(event_pulses), fill_value=normed_weight))

    return stats
示例#10
0
def combine_tables(table_fpaths, outdir=None, overwrite=False):
    """Combine multiple tables together into a single table.

    All tables specified must have the same binnings defined. Tables should
    also be produced using different random seeds (if all else besides
    n_photons is equal); if corresponding metadata files can be found in the
    same directories as the CLSim tables, this will be enforced prior to
    loading and combining the actual tables together.

    Parameters
    ----------
    table_fpaths : string or iterable thereof
        Each string is glob-expanded

    outdir : string, optional
        Directory to which to save the combined table; if not specified, the
        resulting table will be returned but not saved to disk.

    overwrite : bool
        Overwrite an existing table. If a table is found at the output path and
        `overwrite` is False, the function simply returns without raising an
        exception.

    Returns
    -------
    combined_table

    """
    t_start = time()

    # Get all input table filepaths, including glob expansion

    orig_table_fpaths = deepcopy(table_fpaths)
    if isinstance(table_fpaths, string_types):
        table_fpaths = [table_fpaths]
    table_fpaths_tmp = []
    for fpath in table_fpaths:
        table_fpaths_tmp.extend(glob(expand(fpath)))
    table_fpaths = sorted(table_fpaths_tmp, key=nsort_key_func)

    if not table_fpaths:
        raise ValueError(
            "Found no tables given `table_fpaths` = {}".format(orig_table_fpaths)
        )

    wstderr(
        'Found {} tables to combine:\n  {}\n'.format(
            len(table_fpaths), '\n  '.join(table_fpaths)
        )
    )

    # Create the output directory

    if outdir is not None:
        outdir = expand(outdir)
        mkdir(outdir)

    # Combine the tables

    combined_table = None
    table_keys = None
    for fpath in table_fpaths:
        table = load_clsim_table_minimal(fpath, mmap=True)

        base = basename(fpath)
        rootname, ext = splitext(base)
        if ext.lstrip('.') in COMPR_EXTENSIONS:
            base = rootname
        if 'source_tables' not in table:
            table['source_tables'] = np.array([base], dtype=np.string0)

        if combined_table is None:
            combined_table = table
            table_keys = set(table.keys())

            # Formulate output file paths and check if they exist (do on first
            # table to avoid finding out we are going to overwrite a file
            # before loading all the source tables)
            if outdir is not None:
                output_fpaths = OrderedDict(
                    (
                        (k, join(outdir, k + '.npy'))
                        for k in sorted(table_keys.difference(NO_WRITE_KEYS))
                    )
                )
                if not overwrite:
                    for fp in output_fpaths.values():
                        if isfile(fp):
                            raise IOError(
                                'File at {} already exists, NOT overwriting'.format(fp)
                            )
                wstderr(
                    'Output files will be written to:\n  {}\n'.format(
                        '\n  '.join(output_fpaths.values())
                    )
                )

            continue

        # Make sure keys are the same

        new_table_keys = set(table.keys())
        missing_keys = sorted(
            table_keys
            .difference(new_table_keys)
            .difference(NO_VALIDATE_KEYS)
        )
        additional_keys = sorted(
            new_table_keys
            .difference(table_keys)
            .difference(NO_VALIDATE_KEYS)
        )
        if missing_keys or additional_keys:
            raise ValueError(
                'Table is missing keys {} and/or has additional keys {}'.format(
                    missing_keys, additional_keys
                )
            )

        # Validate keys that should be equal

        for key in sorted(table_keys.difference(NO_VALIDATE_KEYS)):
            if not np.array_equal(table[key], combined_table[key]):
                raise ValueError('Unequal "{}" in file {}'.format(key, fpath))

        # Add values from keys that should be summed

        for key in SUM_KEYS:
            if key not in table:
                continue
            combined_table[key] += table[key]

        # Concatenate and sort new source table(s) in source_tables array

        combined_table['source_tables'] = np.sort(
            np.concatenate([combined_table['source_tables'], table['source_tables']])
        )

        # Make sure to clear table from memory since these can be quite large

        del table

    # Save the data to npy files on disk (in a sub-directory for all of this
    # table's files)
    if outdir is not None:
        wstderr('Writing files:\n')

        len_longest_fpath = np.max([len(p) for p in output_fpaths.values()])
        for key in sorted(table_keys.difference(NO_WRITE_KEYS)):
            fpath = output_fpaths[key]
            wstderr('  {} ...'.format(fpath.ljust(len_longest_fpath)))
            t0 = time()
            np.save(fpath, combined_table[key])
            wstderr(' ({:12.3f} s)\n'.format(time() - t0))

    wstderr(
        'Total time to combine tables: {} s\n'.format(np.round(time() - t_start, 3))
    )

    return combined_table
示例#11
0
def load_clsim_table_minimal(fpath, step_length=None, mmap=False):
    """Load a CLSim table from disk (optionally compressed with zstd).

    Similar to the `load_clsim_table` function but the full table, including
    under/overflow bins, is kept and no normalization or further processing is
    performed on the table data besides populating the ouptput OrderedDict.

    Parameters
    ----------
    fpath : string
        Path to file to be loaded. If the file has extension 'zst', 'zstd', or
        'zstandard', the file will be decompressed using the `python-zstandard`
        Python library before passing to `pyfits` for interpreting.

    mmap : bool, optional
        Whether to memory map the table (if it's stored in a directory
        containing .npy files).

    Returns
    -------
    table : OrderedDict
        Items include
        - 'table_shape' : tuple of int
        - 'table' : np.ndarray
        - 't_indep_table' : np.ndarray (if available)
        - 'n_photons' :
        - 'phase_refractive_index' :
        - 'r_bin_edges' :
        - 'costheta_bin_edges' :
        - 't_bin_edges' :
        - 'costhetadir_bin_edges' :
        - 'deltaphidir_bin_edges' :

    """
    table = OrderedDict()

    fpath = expand(fpath)

    if DEBUG:
        wstderr('Loading table from {} ...\n'.format(fpath))

    if isdir(fpath):
        t0 = time()
        indir = fpath
        if mmap:
            mmap_mode = 'r'
        else:
            mmap_mode = None
        for key in MY_CLSIM_TABLE_KEYS + ['t_indep_table']:
            fpath = join(indir, key + '.npy')
            if DEBUG:
                wstderr('    loading {} from "{}" ...'.format(key, fpath))
            t1 = time()
            if isfile(fpath):
                table[key] = np.load(fpath, mmap_mode=mmap_mode)
            elif key != 't_indep_table':
                raise ValueError(
                    'Could not find file "{}" for loading table key "{}"'
                    .format(fpath, key)
                )
            if DEBUG:
                wstderr(' ({} ms)\n'.format(np.round((time() - t1)*1e3, 3)))
        if step_length is not None and 'step_length' in table:
            assert step_length == table['step_length']
        if DEBUG:
            wstderr('  Total time to load: {} s\n'.format(np.round(time() - t0, 3)))
        return table

    if not isfile(fpath):
        raise ValueError('Table does not exist at path "{}"'.format(fpath))

    if mmap:
        print('WARNING: Cannot memory map a fits or compressed fits file;'
              ' ignoring `mmap=True`.')

    import pyfits
    t0 = time()
    fobj = get_decompressd_fobj(fpath)
    try:
        pf_table = pyfits.open(fobj)

        table['table_shape'] = pf_table[0].data.shape # pylint: disable=no-member
        table['n_photons'] = force_little_endian(
            pf_table[0].header['_i3_n_photons'] # pylint: disable=no-member
        )
        table['group_refractive_index'] = force_little_endian(
            pf_table[0].header['_i3_n_group'] # pylint: disable=no-member
        )
        table['phase_refractive_index'] = force_little_endian(
            pf_table[0].header['_i3_n_phase'] # pylint: disable=no-member
        )
        if step_length is not None:
            table['step_length'] = step_length

        n_dims = len(table['table_shape'])
        if n_dims == 5:
            # Space-time dimensions
            table['r_bin_edges'] = force_little_endian(
                pf_table[1].data # meters # pylint: disable=no-member
            )
            table['costheta_bin_edges'] = force_little_endian(
                pf_table[2].data # pylint: disable=no-member
            )
            table['t_bin_edges'] = force_little_endian(
                pf_table[3].data # nanoseconds # pylint: disable=no-member
            )

            # Photon directionality
            table['costhetadir_bin_edges'] = force_little_endian(
                pf_table[4].data # pylint: disable=no-member
            )
            table['deltaphidir_bin_edges'] = force_little_endian(
                pf_table[5].data # pylint: disable=no-member
            )

        else:
            raise NotImplementedError(
                '{}-dimensional table not handled'.format(n_dims)
            )

        table['table'] = force_little_endian(pf_table[0].data) # pylint: disable=no-member

        wstderr('    (load took {} s)\n'.format(np.round(time() - t0, 3)))

    finally:
        del pf_table
        if hasattr(fobj, 'close'):
            fobj.close()
        del fobj

    return table
示例#12
0
def combine_clsim_tables(table_fpaths,
                         outdir=None,
                         overwrite=False,
                         step_length=1.0):
    """Combine multiple CLSim-produced tables together into a single table.

    All tables specified must have the same binnings defined. Tables should
    also be produced using different random seeds; if corresponding metadata
    files can be found in the same directories as the CLSim tables, this will
    be enforced prior to loading and combining the actual tables together.

    Parameters
    ----------
    table_fpaths : string or iterable thereof
        Each string is glob-expanded

    outdir : string, optional
        Directory to which to save the combined table; if not specified, the
        resulting table will be returned but not saved to disk.

    overwrite : bool
        Overwrite an existing table. If a table is found at the output path and
        `overwrite` is False, the function simply returns.

    step_length : float > 0 in units of meters
        Needed for computing the normalization to apply to the `table` in order
        to generate the `t_indep_table` (if the latter doesn't already exist).
        Note that normalization constants due to `n_photons`,
        `quantum_efficiency`, and `angular_acceptance_fract` as well as
        normalization depending (only) upon radial bin (i.e 1/r^2 geometric
        factor) are _not_ applied to the tables. The _only_ normalization
        applied (and _only_ to `t_indep_table`) is the multiple-counting factor
        that is a function of `step_length` and whichever of the time or radial
        bin dimensions is smaller.

    Returns
    -------
    combined_table

    """
    t_start = time()

    # Get all input table filepaths, including glob expansion

    if isinstance(table_fpaths, basestring):
        table_fpaths = [table_fpaths]
    table_fpaths_tmp = []
    for fpath in table_fpaths:
        table_fpaths_tmp.extend(glob(expand(fpath)))
    table_fpaths = sorted(table_fpaths_tmp)

    wstderr('Found {} tables to combine:\n  {}\n'.format(
        len(table_fpaths), '\n  '.join(table_fpaths)))

    # Formulate output filenames and check if they exist

    output_fpaths = None
    if outdir is not None:
        outdir = expand(outdir)
        mkdir(outdir)
        output_fpaths = OrderedDict(
            ((k, join(outdir, k + '.npy')) for k in ALL_KEYS))
        output_fpaths['source_tables'] = join(outdir, 'source_tables.txt')
        if not overwrite:
            for fpath in output_fpaths:
                if isfile(fpath):
                    raise IOError('File {} exists'.format(fpath))
        wstderr('Output files will be written to:\n  {}\n'.format('\n  '.join(
            output_fpaths.values())))

    # Combine the tables

    combined_table = None
    for fpath in table_fpaths:
        table = load_clsim_table_minimal(fpath,
                                         step_length=step_length,
                                         mmap=True)

        if combined_table is None:
            combined_table = table
            continue

        if set(table.keys()) != set(SUM_KEYS + VALIDATE_KEYS):
            raise ValueError(
                'Table keys {} do not match expected keys {}'.format(
                    sorted(table.keys()), sorted(ALL_KEYS)))

        for key in VALIDATE_KEYS:
            if not np.array_equal(table[key], combined_table[key]):
                raise ValueError('Unequal {} in file {}'.format(key, fpath))

        for key in SUM_KEYS:
            combined_table[key] += table[key]

        del table

    # Force quantum_efficiency and angular_acceptance_fract to 1 (these should
    # be handled by the user at the time the table is used to represent a
    # particular or subgroup of DOMs)
    t_indep_table, _ = generate_time_indep_table(table=table,
                                                 quantum_efficiency=1,
                                                 angular_acceptance_fract=1)
    table['t_indep_table'] = t_indep_table

    # Save the data to npy files on disk (in a sub-directory for all of this
    # table's files)
    if outdir is not None:
        basenames = []
        for fpath in table_fpaths:
            base = basename(fpath)
            rootname, ext = splitext(base)
            if ext.lstrip('.') in COMPR_EXTENSIONS:
                base = rootname
            basenames.append(base)

        wstderr('Writing files:\n')

        for key in ALL_KEYS:
            fpath = output_fpaths[key]
            wstderr('  {} ...'.format(fpath))
            t0 = time()
            np.save(fpath, combined_table[key])
            wstderr(' ({} ms)\n'.format(np.round((time() - t0) * 1e3, 3)))

        fpath = output_fpaths['source_tables']
        wstderr('  {} ...'.format(fpath))
        t0 = time()
        with open(fpath, 'w') as fobj:
            fobj.write('\n'.join(sorted(basenames)))
        wstderr(' ({} ms)\n'.format(np.round((time() - t0) * 1e3, 3)))

    wstderr('Total time to combine tables: {} s\n'.format(
        np.round(time() - t_start, 3)))

    return combined_table