Exemplo n.º 1
0
def test_generate_digitizer():
    """Test the functions that `generate_digitizer` produces."""
    # TODO: use local file for this test
    meta = load_pickle(
        '/home/icecube/retro/tables/'
        'large_5d_notilt_combined/stacked/stacked_ckv_template_map_meta.pkl')
    binning = meta['binning']

    for dim, edges in binning.items():
        assert np.all(np.diff(edges) > 0)
        num_bins = len(edges) - 1
        digitize = generate_digitizer(edges)
        digitize_overflow = generate_digitizer(edges, clip=False)
        rand = np.random.RandomState(0)

        # Check lots of values within the valid range of the binning
        vals = rand.uniform(low=edges[0], high=edges[-1], size=int(1e5))
        test = np.array([digitize(v) for v in vals])
        ref = np.digitize(vals, bins=edges, right=False) - 1
        assert np.all(test == ref), dim

        # Check edge cases
        assert digitize(edges[0]) == 0, dim
        assert digitize(edges[0] - 1e-8) == 0, dim
        assert digitize_overflow(edges[0] - 1e-8) < 0, dim
        assert digitize(edges[-1]) == num_bins - 1, dim
        assert digitize(edges[-1] + 1e-8) == num_bins - 1, dim
        assert digitize_overflow(edges[-1] + 1e-8) == num_bins, dim

    print('<< PASS : test_generate_digitizer >>')
Exemplo n.º 2
0
def setup_tdi_tables(tdi=None, mmap=False):
    """Load and instantiate (Cherenkov) TDI tables.

    Parameters
    ----------
    tdi : sequence of strings, optional
        Path to TDI tables' `ckv_tdi_table.npy` files, or paths to
        directories containing those files; one entry per TDI table

    mmap : bool

    Returns
    -------
    tdi_tables : tuple of 0 or more numpy arrays
    tdi_metas : tuple of 0 or more OrderedDicts

    """
    if tdi is None:
        return (), ()

    mmap_mode = 'r' if mmap else None

    tdi_tables = []
    tdi_metas = []
    for tdi_ in tdi:
        if tdi_ is None:
            continue
        tdi_ = expand(tdi_)
        if isdir(tdi_):
            tdi_ = join(tdi_, 'ckv_tdi_table.npy')

        print('Loading and instantiating TDI table at "{}"'.format(tdi_))

        be = load_pickle(join(dirname(tdi_), 'tdi_bin_edges.pkl'))
        meta = load_pickle(join(dirname(tdi_), 'tdi_metadata.pkl'))
        meta['bin_edges'] = be
        tdi_table = np.load(tdi_, mmap_mode=mmap_mode)

        tdi_metas.append(meta)
        tdi_tables.append(tdi_table)

    return tuple(tdi_tables), tuple(tdi_metas)
Exemplo n.º 3
0
def find_problematic_pulses(indir, pulse_series):
    """Find missing, bad, or old extracted pulse series and print the paths of
    the corresponding events directories.

    Parameters
    ----------
    indir : str
    pulse_series : str or iterable thereof

    """
    if isinstance(pulse_series, str):
        pulse_series = [pulse_series]
    indir = expand(indir)

    for dirpath, dirs_, files in walk(indir, followlinks=True):
        if "events.npy" in files:
            dirs_.clear()
        else:
            dirs_.sort(key=nsort_key_func)
            files.sort(key=nsort_key_func)

            for fname in files:
                match = OSCNEXT_FNAME_RE.match(fname)
                if not match:
                    continue

                i3f_dname = join(dirpath, match.groupdict()["basename"])
                if isdir(i3f_dname):
                    if not isfile(join(i3f_dname, "events.npy")):
                        print(i3f_dname)
                else:
                    print(i3f_dname)

            continue

        sys.stderr.write(".")
        sys.stderr.flush()

        # If any one of the named pulse series are missing or bad, record
        # the path and move on without checking the other pulse series
        for ps_name in pulse_series:
            pulses_fpath = join(dirpath, "pulses", ps_name + ".pkl")
            if not isfile(pulses_fpath):
                print(dirpath)
                break
            try:
                pulses = load_pickle(pulses_fpath)
                if len(pulses) > 0 and "flags" not in pulses[0][0][1].dtype.names:
                    print(dirpath)
                    break
            except Exception:
                print(dirpath)
                break
Exemplo n.º 4
0
    def load_stacked_tables(
        self,
        stacked_tables_meta_fpath,
        stacked_tables_fpath,
        stacked_t_indep_tables_fpath,
        mmap_tables=False,
        mmap_t_indep=False,
    ):
        if self.is_stacked is not None:
            assert self.is_stacked

        stacked_tables_meta_fpath = expand(stacked_tables_meta_fpath)
        stacked_tables_fpath = expand(stacked_tables_fpath)
        stacked_t_indep_tables_fpath = expand(stacked_t_indep_tables_fpath)

        tables_mmap_mode = 'r' if mmap_tables else None
        t_indep_mmap_mode = 'r' if mmap_t_indep else None

        self.table_meta = load_pickle(stacked_tables_meta_fpath)
        self.tables = np.load(stacked_tables_fpath, mmap_mode=tables_mmap_mode)
        self.tables.setflags(write=False, align=True, uic=False)
        num_tables = self.tables.shape[0]

        self.t_is_residual_time = bool(
            self.table_meta.get('t_is_residual_time', False))

        self.t_indep_tables = np.load(stacked_t_indep_tables_fpath,
                                      mmap_mode=t_indep_mmap_mode)
        self.t_indep_tables.setflags(write=False, align=True, uic=False)
        assert self.t_indep_tables.shape[0] == num_tables

        self.sd_idx_table_indexer = deepcopy(
            self.table_meta['sd_idx_table_indexer'])
        self.sd_idx_table_indexer.setflags(write=False, align=True, uic=False)

        self.loaded_sd_indices = np.where(self.sd_idx_table_indexer >= 0)[0]
        self.n_photons_per_table = self.table_meta['n_photons_per_table']

        # Note that in creating the stacked tables, each indiividual table
        # is scaled such that the effective number of photons used to generate
        # the table is one (to avoid different norms across the tables if
        # different number of photons was used originally to create each).
        self.table_norm, self.t_indep_table_norm = get_table_norm(
            avg_angsens=self.avg_angsens,
            quantum_efficiency=1,
            norm_version=self.norm_version,
            **{k: self.table_meta[k]
               for k in TABLE_NORM_KEYS})

        self.table_norms = [self.table_norm] * num_tables
        self.t_indep_table_norms = [self.t_indep_table_norm] * num_tables

        self.is_stacked = True
Exemplo n.º 5
0
def iterate_file(fpath, start=None, stop=None, step=None, mmap_mode=None):
    """Iterate through the elements in a pickle (.pkl) or numpy (.npy) file. If
    a pickle file, structure must be a sequence of objects, one object per
    event. If a numpy file, it must be a one-dimensional structured array where
    each "entry" in the array contains the information from one event.

    Parameters
    ----------
    fpath : string
    start, stop, step : optional
        Arguments passed to `slice` for extracting select events from the
        file.
    mmap_mode : None or string in {"r", "r+", "w+", "c"}
        Only applicable if `fpath` is a numpy .npy file; see help for
        `numpy.memmap` for more information on each mode. Note that memory
        mapping a file is useful for not consuming too much memory and being
        able to simultaneously write to the same reco output file from multiple
        processes (presumably each process working on different events) from
        multiple processes BUT too many open file handles can result in an
        exception. Default is `None` (file is not memory mapped, instead entire
        file is read into memory).

    Yields
    ------
    info : OrderedDict
        Information extracted from the file for each event.

    """
    slicer = slice(start, stop, step)
    _, ext = splitext(fpath)
    if ext == '.pkl':
        events = load_pickle(fpath)
    elif ext == '.npy':
        try:
            events = np.load(fpath, mmap_mode=mmap_mode)
        except:
            sys.stderr.write('failed to load "{}"\n'.format(fpath))
            raise
    else:
        raise ValueError(fpath)

    num_events_in_file = len(events)
    indices = range(num_events_in_file)[slicer]  # pylint: disable=range-builtin-not-iterating
    sliced_events = events[slicer]

    return num_events_in_file, indices, sliced_events
Exemplo n.º 6
0
def generate_binmap(r_max, r_power, n_rbins, n_costhetabins, n_phibins,
                    cart_binwidth, oversample, antialias, tables_dir,
                    recompute):
    """Generate mapping from polar binning (assumed to be symmetric about
    Z-axis) to Cartesian 3D binning.

    The heart of the functionality is implemented in
    `retro.sphbin2cartbin.sphbin2cartbin`, while this function implements
    loading already-computed mappings and storing the results to disk.

    Parameters
    ----------
    r_max : float > 0
    r_power : float != 0
    n_rbins, n_costhetabins, n_phibins : int >= 1
    cart_binwidth : float > 0
    oversample : int >= 1
    antialias : int between 1 and 50
    tables_dir : string
    recompute : bool

    Returns
    -------
    ind_arrays
    vol_arrays
    meta
        Output of `generate_binmap_meta`

    """
    assert isdir(tables_dir)
    r_edges = powerspace(0, r_max, n_rbins + 1, r_power)
    theta_edges = np.arccos(np.linspace(1, -1, n_costhetabins + 1))

    r_mesh, theta_mesh = np.meshgrid(r_edges, theta_edges, indexing='ij')
    exact_vols = []
    for ri in range(n_rbins):
        sub_exact_vols = []
        for ti in range(int(np.ceil(n_costhetabins / 2.0))):
            rs = r_mesh[ri:ri+2, ti:ti+2]
            ts = theta_mesh[ri:ri+2, ti:ti+2]
            dcostheta = np.abs(np.diff(np.cos([ts.max(), ts.min()])))
            exact_vol = spherical_volume(rmin=rs.max(), rmax=rs.min(),
                                         dcostheta=dcostheta, dphi=np.pi/2)
            sub_exact_vols.append(exact_vol)
        exact_vols.append(sub_exact_vols)
    exact_vols = np.array(exact_vols)

    meta = generate_binmap_meta(
        r_max=r_max, r_power=r_power,
        n_rbins=n_rbins, n_costhetabins=n_costhetabins, n_phibins=n_phibins,
        cart_binwidth=cart_binwidth, oversample=oversample, antialias=antialias
    )
    fpath = join(tables_dir, meta['fname'])

    print('Binmap kwargs:', meta['kwargs'])

    if not recompute and isfile(fpath):
        sys.stdout.write('Loading binmap from file\n  "%s"\n' % fpath)
        sys.stdout.flush()

        t0 = time.time()
        data = load_pickle(fpath)
        ind_arrays = data['ind_arrays']
        vol_arrays = data['vol_arrays']
        t1 = time.time()
        print('  Time to load bin mapping from pickle: {} ms'
              .format(np.round((t1 - t0)*1000, 3)))

    else:
        sys.stdout.write('  Computing bin mapping...\n')
        sys.stdout.flush()

        t0 = time.time()
        ind_arrays, vol_arrays = sphbin2cartbin(**meta['kwargs'])
        t1 = time.time()
        print('    Time to compute bin mapping: {} ms'
              .format(np.round((t1 - t0)*1000, 3)))

        print('  Writing bin mapping to pickle file\n  "%s"' % fpath)
        data = OrderedDict([
            ('kwargs', meta['kwargs']),
            ('ind_arrays', ind_arrays),
            ('vol_arrays', vol_arrays)
        ])
        pickle.dump(data, file(fpath, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
        t2 = time.time()
        print('    Time to pickle bin mapping: {} ms'
              .format(np.round((t2 - t1)*1000, 3)))

    print('')

    binned_vol = np.sum([va.sum() for va in vol_arrays])
    exact_vol = spherical_volume(rmin=0, rmax=r_max, dcostheta=-1, dphi=np.pi/2)
    print('  Exact vol = %f, binned vol = %f (%e fract error)'
          % (exact_vol, binned_vol, (binned_vol-exact_vol)/exact_vol))

    ind_bin_vols = np.array([va.sum() for va in vol_arrays])
    fract_err = ind_bin_vols/exact_vols.flat - 1
    abs_fract_err = np.abs(fract_err)
    worst_abs_fract_err = np.max(abs_fract_err)
    flat_idx = np.where(abs_fract_err == worst_abs_fract_err)[0][0]
    r_idx, costheta_idx = divmod(flat_idx, int(np.ceil(n_costhetabins/2)))
    print('  Worst single-bin fract err: %e;'
          'r_idx=%d, costheta_idx=%d;'
          'binned vol=%e, exact vol=%e'
          % (worst_abs_fract_err, r_idx, costheta_idx, ind_bin_vols[flat_idx],
             exact_vols[r_idx, costheta_idx]))

    return ind_arrays, vol_arrays, meta
Exemplo n.º 7
0
def combine_tdi_tiles(
    source_dir,
    dest_dir,
    table_hash,
    gcd,
    bin_edges_file,
    tile_spec_file,
):
    """Combine individual time-independent tiles (one produced per DOM) into a single
    TDI table.

    Parameters
    ----------
    source_dir : str
    dest_dir : str
    bin_edges_file : str
    tile_spec_file : str

    """
    source_dir = expand(source_dir)
    dest_dir = expand(dest_dir)
    gcd = expand(gcd)
    bin_edges_file = expand(bin_edges_file)
    tile_spec_file = expand(tile_spec_file)
    mkdir(dest_dir)
    assert isdir(source_dir)
    assert isfile(bin_edges_file)
    assert isfile(tile_spec_file)

    gcd = extract_gcd(gcd)

    bin_edges = load_pickle(bin_edges_file)
    x_edges = bin_edges['x']
    y_edges = bin_edges['y']
    z_edges = bin_edges['z']
    ctdir_edges = bin_edges['costhetadir']
    phidir_edges = bin_edges['phidir']

    n_x = len(x_edges) - 1
    n_y = len(y_edges) - 1
    n_z = len(z_edges) - 1
    n_ctdir = len(ctdir_edges) - 1
    n_phidir = len(phidir_edges) - 1

    n_dir_bins = n_ctdir * n_phidir

    x_bw = (x_edges.max() - x_edges.min()) / n_x
    y_bw = (y_edges.max() - y_edges.min()) / n_y
    z_bw = (z_edges.max() - z_edges.min()) / n_z
    bin_vol = x_bw * y_bw * z_bw

    ctdir_min = ctdir_edges.min()
    ctdir_max = ctdir_edges.max()

    phidir_min = phidir_edges.min()
    phidir_max = phidir_edges.max()

    with open(tile_spec_file, 'r') as f:
        tile_specs = [l.strip() for l in f.readlines()]

    table = np.zeros(shape=(n_x, n_y, n_z, n_ctdir, n_phidir),
                     dtype=np.float32)

    # Slice all table dimensions to exclude {under,over}flow bins
    central_slice = (slice(1, -1), ) * 5

    angsens_model = None
    ice_model = None
    disable_tilt = None
    disable_anisotropy = None
    n_phase = None
    n_group = None

    tiles_info = []

    for tile_spec in tile_specs:
        info = None
        try:
            fields = tile_spec.split()

            info = OrderedDict()

            info['tbl_idx'] = int(fields[0])
            info['string'] = int(fields[1])
            info['dom'] = int(fields[2])
            info['seed'] = int(fields[3])
            info['n_events'] = int(fields[4])

            info['x_min'] = float(fields[5])
            info['x_max'] = float(fields[6])
            info['n_x'] = int(fields[7])

            info['y_min'] = float(fields[8])
            info['y_max'] = float(fields[9])
            info['n_y'] = int(fields[10])

            info['z_min'] = float(fields[11])
            info['z_max'] = float(fields[12])
            info['n_z'] = int(fields[13])

            info['n_ctdir'] = int(fields[14])
            info['n_phidir'] = int(fields[15])

            tiles_info.append(info)

            tile_fpath = glob(
                join(
                    source_dir, 'clsim_table_set'
                    '_{table_hash}'
                    '_tile_{tbl_idx}'
                    '_string_{string}'
                    '_dom_{dom}'
                    '_seed_{seed}'
                    '_n_{n_events}'
                    '.fits'.format(table_hash=table_hash, **info)))[0]
            try:
                fits_table = fits.open(tile_fpath,
                                       mode='readonly',
                                       memmap=True)
            except:
                wstderr('Failed on tile_fpath "{}"'.format(tile_fpath))
                raise

            primary = fits_table[0]

            header = primary.header  # pylint: disable=no-member
            keys = header.keys()

            this_gcd_i3_md5 = extract_meta_from_keys(keys, '_i3_gcd_i3_md5_')
            assert this_gcd_i3_md5 == gcd['source_gcd_i3_md5'], \
                    'this: {}, ref: {}'.format(this_gcd_i3_md5, gcd['source_gcd_i3_md5'])

            this_angsens_model = extract_meta_from_keys(keys, '_i3_angsens_')
            if angsens_model is None:
                angsens_model = this_angsens_model
                _, avg_angsens = load_angsens_model(angsens_model)
            else:
                assert this_angsens_model == angsens_model

            this_table_hash = extract_meta_from_keys(keys, '_i3_hash_')
            assert this_table_hash == table_hash

            this_ice_model = extract_meta_from_keys(keys, '_i3_ice_')
            if ice_model is None:
                ice_model = this_ice_model
            else:
                assert this_ice_model == ice_model

            this_disable_anisotropy = header['_i3_disable_anisotropy']
            if disable_anisotropy is None:
                disable_anisotropy = this_disable_anisotropy
            else:
                assert this_disable_anisotropy == disable_anisotropy

            this_disable_tilt = header['_i3_disable_tilt']
            if disable_tilt is None:
                disable_tilt = this_disable_tilt
            else:
                assert this_disable_tilt == disable_tilt

            this_n_phase = header['_i3_n_phase']
            if n_phase is None:
                n_phase = this_n_phase
            else:
                assert this_n_phase == n_phase

            this_n_group = header['_i3_n_group']
            if n_group is None:
                n_group = this_n_group
            else:
                assert this_n_group == n_group

            assert info['n_ctdir'] == n_ctdir
            assert info['n_phidir'] == n_phidir

            assert np.isclose(header['_i3_costhetadir_min'], ctdir_min)
            assert np.isclose(header['_i3_costhetadir_max'], ctdir_max)

            assert np.isclose(header['_i3_phidir_min'], phidir_min)
            assert np.isclose(header['_i3_phidir_max'], phidir_max)

            n_photons = header['_i3_n_photons']
            n_dir_bins = info['n_ctdir'] * info['n_phidir']

            this_x_bw = (info['x_max'] - info['x_min']) / info['n_x']
            this_y_bw = (info['y_max'] - info['y_min']) / info['n_y']
            this_z_bw = (info['z_max'] - info['z_min']) / info['n_z']

            assert this_x_bw == x_bw
            assert this_y_bw == y_bw
            assert this_z_bw == z_bw

            assert np.any(np.isclose(info['x_min'], x_edges))
            assert np.any(np.isclose(info['x_max'], x_edges))

            assert np.any(np.isclose(info['y_min'], y_edges))
            assert np.any(np.isclose(info['y_max'], y_edges))

            assert np.any(np.isclose(info['z_min'], z_edges))
            assert np.any(np.isclose(info['z_max'], z_edges))

            quantum_efficiency = 0.25 * gcd['rde'][info['string'] - 1,
                                                   info['dom'] - 1]
            norm = n_dir_bins * quantum_efficiency * avg_angsens / (n_photons *
                                                                    bin_vol)
            if np.isnan(norm):
                print('\nTile {} norm is nan!'.format(info['tbl_idx']))
                print('    quantum_efficiency = {}, n_photons = {}'.format(
                    quantum_efficiency, n_photons))
            elif norm == 0:
                print('\nTile {} norm is 0'.format(info['tbl_idx']))

            x_start = np.digitize(info['x_min'] + x_bw / 2, x_edges) - 1
            x_stop = np.digitize(info['x_max'] - x_bw / 2, x_edges)

            y_start = np.digitize(info['y_min'] + y_bw / 2, y_edges) - 1
            y_stop = np.digitize(info['y_max'] - y_bw / 2, y_edges)

            z_start = np.digitize(info['z_min'] + z_bw / 2, z_edges) - 1
            z_stop = np.digitize(info['z_max'] - z_bw / 2, z_edges)

            # NOTE: comparison excludes norm = 0 _and_ norm = NaN
            if norm > 0:
                assert not np.isnan(norm)
                table[x_start:x_stop, y_start:y_stop,
                      z_start:z_stop, :, :] += (
                          norm * primary.data[central_slice]  # pylint: disable=no-member
                      )
        except:
            wstderr('Failed on tile_spec {}'.format(tile_spec))
            if info is not None:
                wstderr('Info:\n{}'.format(info))
            raise
        wstderr('.')

    wstderr('\n')

    metadata = OrderedDict()
    metadata['table_hash'] = table_hash
    metadata['disable_tilt'] = disable_tilt
    metadata['disable_anisotropy'] = disable_anisotropy
    metadata['gcd'] = gcd
    metadata['angsens_model'] = angsens_model
    metadata['ice_model'] = ice_model
    metadata['n_phase'] = n_phase
    metadata['n_group'] = n_group
    metadata['tiles_info'] = tiles_info

    outdir = join(
        dest_dir, 'tdi_table_{}_tilt_{}_anisotropy_{}'.format(
            table_hash,
            'off' if disable_tilt else 'on',
            'off' if disable_anisotropy else 'on',
        ))
    mkdir(outdir)

    name = 'tdi_table.npy'
    outfpath = join(outdir, name)
    wstdout('saving table to "{}"\n'.format(outfpath))
    np.save(outfpath, table)

    #outfpath = join(outdir, 'tdi_bin_edges.json')
    #wstdout('saving bin edges to "{}"\n'.format(outfpath))
    #json.dump(
    #    bin_edges,
    #    file(outfpath, 'w'),
    #    sort_keys=False,
    #    indent=2,
    #)
    outfpath = join(outdir, 'tdi_bin_edges.pkl')
    wstdout('saving bin edges to "{}"\n'.format(outfpath))
    pickle.dump(
        bin_edges,
        open(outfpath, 'wb'),
        protocol=pickle.HIGHEST_PROTOCOL,
    )

    #outfpath = join(outdir, 'tdi_metadata.json')
    #wstdout('saving metadata to "{}"\n'.format(outfpath))
    #json.dump(
    #    metadata,
    #    file(outfpath, 'w'),
    #    sort_keys=False,
    #    indent=2,
    #)
    outfpath = join(outdir, 'tdi_metadata.pkl')
    wstdout('saving metadata to "{}"\n'.format(outfpath))
    pickle.dump(
        metadata,
        open(outfpath, 'wb'),
        protocol=pickle.HIGHEST_PROTOCOL,
    )
Exemplo n.º 8
0
def process_dir(
    dirpath,
    n_files,
    min_pulses_per_event,
    pulses_filter,
    emax,
    verbosity=0,
):
    """
    Parameters
    ----------
    dirpath : string
    n_files : int > 0
    min_pulses_per_event : int >= 0
    pulses_filter : None or callable, optional
    emax : 0 <= scalar <= np.inf
    verbosity : int >= 0

    Returns
    -------
    stats : OrderedDict
        Keys are taken from STATS_PROTO, values are numpy arrays

    """
    stats = deepcopy(STATS_PROTO)

    events = np.load(join(dirpath, "events.npy"), mmap_mode="r")
    if len(events) == 0:
        return stats
    mask_vals = deepcopy(events["L5_oscNext_bool"])
    if np.count_nonzero(mask_vals) == 0:
        return stats

    if verbosity >= 2:
        wstderr(".")

    if isfile(join(dirpath, "truth.npy")):
        truth = np.load(join(dirpath, "truth.npy"), mmap_mode="r")
        weights = truth["weight"]
        use_weights = True
    else:
        weights = np.ones(shape=len(events))
        use_weights = False

    if np.isfinite(emax) and emax > 0:
        recos = np.load(
            join(dirpath, "recos", "retro_crs_prefit.npy"),
            mmap_mode="r",
        )
        with np.errstate(invalid='ignore'):
            mask_vals &= recos["energy"]["median"] <= emax
        if np.count_nonzero(mask_vals) == 0:
            return stats

    pulses = load_pickle(
        join(dirpath, "pulses", "{}.pkl".format(PULSE_SERIES_NAME)))

    for mask_val, event_pulses, weight in zip(mask_vals, pulses, weights):
        if not mask_val:
            continue

        if callable(pulses_filter):
            event_pulses = pulses_filter(event_pulses)
            if len(event_pulses) == 0:
                continue

        if use_weights:
            normed_weight = weight / n_files

        # qtot is sum of charge of all hits on all DOMs
        event_pulses_ = []
        tmp_hits_per_dom = []
        tmp_charge_per_dom = []
        tmp_time_diffs_within_dom = []
        tmp_weight_per_dom = []
        for omkey, dom_pulses in event_pulses:
            event_pulses_.append(dom_pulses)
            tmp_hits_per_dom.append(len(dom_pulses))
            tmp_charge_per_dom.append(dom_pulses["charge"].sum())
            #stats["time_diffs_between_hits"].append(
            #    np.concatenate([[0.], np.diff(np.sort(dom_pulses["time"]))])
            #)
            tmp_time_diffs_within_dom.append(dom_pulses["time"] -
                                             dom_pulses["time"].min())
            if use_weights:
                tmp_weight_per_dom.append(normed_weight)

        event_pulses = np.concatenate(event_pulses_)

        # TODO: move min_pulses_per_event before qmin processing
        # TODO: small-pulse agglomeration filter
        if len(event_pulses) < min_pulses_per_event:
            continue

        stats["doms_per_event"].append(len(event_pulses))

        stats["hits_per_dom"].extend(tmp_hits_per_dom)
        stats["charge_per_dom"].extend(tmp_charge_per_dom)
        stats["time_diffs_within_dom"].extend(tmp_time_diffs_within_dom)
        if use_weights:
            stats["weight_per_dom"].extend(tmp_weight_per_dom)

        charge = event_pulses["charge"]
        stats["charge_per_hit"].append(charge)
        stats["charge_per_event"].append(charge.sum())
        stats["hits_per_event"].append(len(event_pulses))
        stats["time_diffs_within_event"].append(event_pulses["time"] -
                                                event_pulses["time"].min())
        if use_weights:
            stats["weight_per_event"].append(normed_weight)
            stats["weight_per_hit"].append(
                np.full(shape=len(event_pulses), fill_value=normed_weight))

    return stats
Exemplo n.º 9
0
def process_events_dir(events_dirpath, pulse_series):
    """
    Parameters
    ----------
    events_dirpath : string
    pulse_series : string

    Returns
    -------
    events_array : numpy ndarray
        ndarray dtype is `DATA_DOMS_IDX_T` if is data, otherwise `MC_DOMS_IDX_T`

    doms_array : numpy ndarray of dtype `DOM_PULSES_IDX_T`

    pulses_array : numpy ndarray of dtype `PULSE_T`

    """
    try:
        events_dirpath = expand(events_dirpath)
        basedir = basename(events_dirpath)
        events = np.load(join(events_dirpath, "events.npy"), mmap_mode="r")
        if len(events) == 0:
            return None

        mask_vals = events["L5_oscNext_bool"]
        valid_event_indices = np.argwhere(mask_vals).flatten()
        num_valid_events = len(valid_event_indices)
        if num_valid_events == 0:
            return None

        truth = None
        weights = None
        is_noise = False
        if isfile(join(events_dirpath,
                       "truth.npy")):  # is Monte Carlo simulation
            is_data = False
            truth = np.load(join(events_dirpath, "truth.npy"), mmap_mode="r")
            weights = truth["weight"]
            events_dtype = MC_DOMS_IDX_T
            is_noise = "pdg_encoding" not in truth.dtype.names
            match = MC_DIRPATH_META_RE.match(basedir)
            if not match:
                raise ValueError(events_dirpath)
            finfo_d = match.groupdict()
            finfo_d["dataset"] = int(finfo_d["dataset"])
            finfo_d["file_id"] = int(finfo_d["file_id"])
        else:  # is actual detector data
            is_data = True
            events_dtype = DATA_DOMS_IDX_T
            match = DATA_DIRPATH_META_RE.match(basename(events_dirpath))
            if not match:
                raise ValueError(events_dirpath)
            finfo_d = match.groupdict()
            finfo_d["season"] = int(finfo_d["season"])
            finfo_d["sub_run_id"] = int(finfo_d["sub_run_id"])

        events_array = np.empty(shape=num_valid_events, dtype=events_dtype)

        doms_arrays = []
        pulses_arrays = []

        dom_idx0 = 0
        pulses_idx0 = 0

        pulses = load_pickle(
            join(events_dirpath, "pulses", "{}.pkl".format(pulse_series)))
        linefit_dc = np.load(join(events_dirpath, "recos", "LineFit_DC.npy"))

        for rel_idx, valid_idx in enumerate(valid_event_indices):
            events_array[rel_idx:rel_idx +
                         1][COPY_ID_FIELDS] = events[valid_idx][COPY_ID_FIELDS]
            events_array[rel_idx]["dom_idx0"] = dom_idx0

            if is_data:
                events_array[rel_idx:rel_idx + 1][COPY_TIME_FIELDS] = (
                    events[valid_idx]["start_time"][COPY_TIME_FIELDS])
                events_array[rel_idx]["season"] = finfo_d["season"]
                events_array[rel_idx]["actual_sub_run_id"] = finfo_d[
                    "sub_run_id"]
            else:
                events_array[rel_idx]["dataset"] = finfo_d["dataset"]
                events_array[rel_idx]["file_id"] = finfo_d["file_id"]

                events_array[rel_idx]["weight"] = weights[valid_idx]
                if is_noise:
                    true_pdg = 0
                    true_energy = np.nan
                    true_time = np.nan
                else:
                    true_pdg = truth[valid_idx]["pdg_encoding"]
                    true_energy = truth[valid_idx]["energy"]
                    true_time = truth[valid_idx]["time"]
                events_array[rel_idx]["true_pdg"] = true_pdg
                #if abs(true_pdg) >= 128:
                #    print("true_pdg =", true_pdg)
                #    raise ValueError("true_pdg = {}".format(true_pdg))
                events_array[rel_idx]["true_energy"] = true_energy
                events_array[rel_idx]["true_time"] = true_time

                if true_pdg in NEUTRINOS:
                    events_array[rel_idx]["true_int"] = truth[valid_idx][
                        "InteractionType"]
                else:
                    events_array[rel_idx]["true_int"] = 0

            event_pulses = pulses[valid_idx]

            events_array[rel_idx]["num_hit_doms"] = num_hit_doms = len(
                event_pulses)
            doms_array = np.empty(shape=num_hit_doms, dtype=DOM_PULSES_IDX_T)

            event_num_pulses = 0
            event_charge = 0.
            for dom_rel_idx, (omkey, dom_pulses) in enumerate(event_pulses):
                dom_num_pulses = len(dom_pulses)
                if dom_num_pulses >= 2**8:
                    print("dom_num_pulses =", dom_num_pulses)
                    raise ValueError(
                        "dom_num_pulses = {}".format(dom_num_pulses))

                dom_charge = np.sum(dom_pulses["charge"])

                event_num_pulses += dom_num_pulses
                event_charge += dom_charge

                doms_array[dom_rel_idx]["string"] = omkey[0]
                doms_array[dom_rel_idx]["om"] = omkey[1]
                doms_array[dom_rel_idx]["pulses_idx0"] = pulses_idx0
                doms_array[dom_rel_idx]["num_pulses"] = dom_num_pulses
                doms_array[dom_rel_idx]["charge"] = dom_charge

                simple_dom_pulses = np.empty(shape=dom_num_pulses,
                                             dtype=PULSE_T)
                simple_dom_pulses["time"] = dom_pulses["time"]
                simple_dom_pulses["charge"] = dom_pulses["charge"]
                simple_dom_pulses["width"] = dom_pulses["width"]
                simple_dom_pulses["flags"] = dom_pulses["flags"]

                pulses_arrays.append(simple_dom_pulses)
                pulses_idx0 += dom_num_pulses

            if event_num_pulses >= 2**32:
                print("event_num_pulses =", event_num_pulses)
                raise ValueError(
                    "event_num_pulses = {}".format(event_num_pulses))

            events_array[rel_idx]["num_pulses"] = event_num_pulses
            events_array[rel_idx]["charge"] = event_charge

            doms_arrays.append(doms_array)
            dom_idx0 += num_hit_doms

        events_array[COPY_LINEFIT_DC_DST_FIELDS] = (
            linefit_dc[valid_event_indices][COPY_LINEFIT_DC_SRC_FIELDS])

        doms_array = np.concatenate(doms_arrays)
        pulses_array = np.concatenate(pulses_arrays)

    except Exception:
        print('Failed on events_dirpath = "{}", pulse_series = "{}"'.format(
            events_dirpath, pulse_series))
        raise

    return events_array, doms_array, pulses_array
Exemplo n.º 10
0
def generate_histos(
    photons,
    hole_ice_model,
    t_max,
    num_bins,
    gcd=None,
    include_rde=True,
    include_noise=True,
    outfile=None,
):
    """Generate time histograms from photons extracted from CLSim (repated)
    forward event simulations.

    Parameters
    ----------
    photons : string or mapping

    hole_ice_model : string
        Raw CLSim does not (currently) incorproate hole ice model; this is a
        modification to the angular acceptance of the phtons that CLSim
        returns, so must be specified (and applied) post-hoc (e.g., in this
        function).

    t_max : float
        Last edge in time binning (first edge is at 0), in units of ns.

    num_bins : int
        Number of time bins, which span from 0 to t_max.

    gcd : str or None, optional
        Path to GCD i3 or pkl file to get DOM coordinates, rde, and noise
        (where the latter two only have an effect if `include_rde` and/or
        `include_noise` are True). Regardless if this is specified, the code
        will attempt to automatically figure out the GCD file used to produce
        the table. If this succeeds and `gcd` is specified by the user, the
        user's value is checked against that found in the data. If the user
        does not specify `gcd`, the value found in the data is used. If neither
        `gcd` is provided nor one can be found in the data, an error is raised.

    include_rde : bool, optional
        Whether to use relative DOM efficiencies (RDE) to scale the results per
        DOM. RDE is included by default.

    include_noise : bool, optiional
        Whether to add the noise floor for each DOM to the results. Noise is
        included by default.

    outfile : str or None, optiional
        If a string is specified, save the histos to a pickle file by the name
        `outfile`. If not specified (or `None`), `histos` will not be written
        to a file.


    Returns
    -------
    histos : OrderedDict


    Raises
    ------
    ValueError
        If `gcd` is specified but does not match a GCD file found in the data

    ValueError
        If `gcd` is not specified and no GCD can be found in the data


    See also
    --------
    i3processing.sim
        Perform the repeated simulation to get photons at DOMs. Generates an i3
        file.

    i3processing.extract_photon_info
        Extract photon info (and pertinent metadata) from the i3 file produced
        from the above.

    retro_dom_pdfs
        Produce distributions corresponding to the histograms made here, but
        using Retro reco.

    """
    photons_file_name = None
    if isinstance(photons, string_types):
        photons_file_name = photons
        photons = load_pickle(photons_file_name)
    dom_info = photons['doms']

    bin_edges = np.linspace(0, t_max, num_bins + 1)
    bin_widths = np.diff(bin_edges)

    gcd_info = None
    if isinstance(gcd, string_types):
        exp_gcd = expanduser(expandvars(gcd))
        if exp_gcd.endswith('.pkl'):
            gcd_info = load_pickle(exp_gcd)
        elif '.i3' in exp_gcd:
            gcd_info = extract_gcd(exp_gcd)
        else:
            raise ValueError('No idea how to handle GCD file "{}"'.format(gcd))

    if photons['gcd']:
        try:
            gcd_from_data = expanduser(expandvars(photons['gcd']))
            if gcd_from_data.endswith('.pkl'):
                gcd_info_from_data = load_pickle(gcd_from_data)
            else:
                gcd_info_from_data = extract_gcd(gcd_from_data)
        except (AttributeError, KeyError, ValueError):
            raise
            #assert gcd_info is not None
        else:
            if gcd_info is None:
                gcd_info = gcd_info_from_data
            else:
                pass
                #if not np.all(gcd_info == gcd_info_from_data):
                #    print('WARNING: Using different GCD from the one used'
                #          ' during simulation!')

    if gcd_info is None:
        if photons_file_name is not None:
            photons_err = ' filename "{}"'.format(photons_file_name)
        raise ValueError(
            'No GCD info could be found from arg `gcd`={} or in `photons`'
            '{}'.format(gcd, photons_err))

    rde = gcd_info['rde']
    noise_rate_hz = gcd_info['noise']
    mask = (rde == 0) | np.isnan(rde) | np.isinf(rde)
    operational_doms = ~mask
    rde = np.ma.masked_where(mask, rde)
    quantum_effieincy = rde

    histos = OrderedDict()
    keep_gcd_keys = ['source_gcd_name', 'source_gcd_md5', 'source_gcd_i3_md5']
    histos['gcd_info'] = OrderedDict([(k, gcd_info[k]) for k in keep_gcd_keys])
    histos['include_rde'] = include_rde
    histos['include_noise'] = include_noise
    histos['bin_edges'] = bin_edges
    histos['binning_spec'] = OrderedDict([('domain', (0, t_max)),
                                          ('num_bins', num_bins),
                                          ('spacing', 'linear'),
                                          ('units', 'ns')])

    # Note the first number in the file is a number approximately equal (but
    # greater than) the peak in the distribution, so is useless for us.
    possible_paths = [
        hole_ice_model,
        '$I3_SRC/ice-models/resources/models/angsens/' + hole_ice_model,
        '$I3_SRC/ice-models/resources/models/angsens/as.' + hole_ice_model,
        '$I3_SRC/ice-models/resources/models/angsens_flasher/' +
        hole_ice_model,
        '$I3_SRC/ice-models/resources/models/angsens_flasher/as.' +
        hole_ice_model,
    ]
    coeffs_loaded = False
    for path in possible_paths:
        path = expanduser(expandvars(path))
        if not isfile(path):
            continue
        try:
            poly_coeffs = np.loadtxt(path)[1:]
        except:
            pass
        else:
            coeffs_loaded = True
            break

    if not coeffs_loaded:
        raise ValueError('Could not load hole ice model at any of\n{}'.format(
            possible_paths))

    # We want coszen = -1 to correspond to upgoing particles, but angular
    # sensitivity is given w.r.t. the DOM axis (which points "down" towards earth,
    # and therefore is rotated 180-deg). So rotate the coszen polynomial about cz=0
    # by negating the odd coefficients (coeffs are in ascending powers of "x".
    flipped_coeffs = np.empty_like(poly_coeffs)
    flipped_coeffs[0::2] = poly_coeffs[0::2]
    flipped_coeffs[1::2] = -poly_coeffs[1::2]
    angsens_poly = np.polynomial.Polynomial(flipped_coeffs, domain=(-1, 1))

    # Attach the weights to the data
    num_sims = photons['num_sims']
    for data_dict in photons['doms'].values():
        cz = data_dict['coszen']
        try:
            # Note that angular sensitivity will modify the total number of
            # photons detected, and the poly is normalized as such already, so no
            # normalization should be applied here.
            angsens_wt = angsens_poly(cz)
        except:
            print(np.min(cz), np.max(cz))
            raise

        data_dict['weight'] = angsens_wt / num_sims

        for k, array in data_dict.items():
            data_dict[k] = array.astype(np.float32)

    histos['results'] = results = OrderedDict()
    for (string, dom), data in dom_info.items():
        string_idx, dom_idx = string - 1, dom - 1
        if not operational_doms[string_idx, dom_idx]:
            continue

        hist, _ = np.histogram(data['time'],
                               bins=bin_edges,
                               weights=data['weight'],
                               normed=False)
        if include_rde:
            hist *= quantum_effieincy[string_idx, dom_idx]
        if include_noise:
            hist += (noise_rate_hz[string_idx, dom_idx] / 1e9) * bin_widths
        results[(string, dom)] = hist

    if outfile is not None:
        outfile = expanduser(expandvars(outfile))
        print('Writing histos to\n"{}"'.format(outfile))
        pickle.dump(histos,
                    open(outfile, 'wb'),
                    protocol=pickle.HIGHEST_PROTOCOL)

    return histos, dom_info
Exemplo n.º 11
0
def plot_run_info(files,
                  labels,
                  outdir,
                  fwd_hists=None,
                  data_or_sim_label=None,
                  paired=False,
                  gradient=False,
                  plot=True):
    """Plot `files` using `labels` (one for each file).

    Parameters
    ----------
    files : string or iterable thereof
    labels : string or iterable thereof
    outdir : string
    fwd_hists : string, optional
    data_or_sim_label : string, optional

    """
    if isinstance(files, string_types):
        files = [files]
    if isinstance(labels, string_types):
        labels = [labels]

    outdir = expand(outdir)

    if fwd_hists is not None:
        fwd_hists = load_pickle(fwd_hists)
        if 'binning' in fwd_hists:
            t_min = fwd_hists['binning']['t_min']
            t_max = fwd_hists['binning']['t_max']
            t_window = t_max - t_min
            num_bins = fwd_hists['binning']['num_bins']
            spacing = fwd_hists['binning']['spacing']
            assert spacing == 'linear', spacing
            fwd_hists_binning = np.linspace(t_min, t_max, num_bins + 1)
        elif 'bin_edges' in fwd_hists:
            fwd_hists_binning = fwd_hists['bin_edges']
            t_window = np.max(fwd_hists_binning) - np.min(fwd_hists_binning)
        else:
            raise ValueError(
                'Need "binning" or "bin_edges" in fwd_hists; keys are {}'.
                format(fwd_hists.keys()))
        hist_bin_widths = np.diff(fwd_hists_binning)
        if 'results' in fwd_hists:
            fwd_hists = fwd_hists['results']
        else:
            raise ValueError('Could not find key "results" in fwd hists!')
    else:
        raise NotImplementedError('Need fwd hists for now.')

    if not isdir(outdir):
        makedirs(outdir)

    run_infos = []
    all_string_dom_pairs = set()
    mc_true_params = None
    for filepath in files:
        filepath = expand(filepath)
        if isdir(filepath):
            filepath = join(filepath, 'run_info.pkl')
        run_info = load_pickle(filepath)
        run_infos.append(run_info)
        pairs = []
        for sd_idx in run_info['sd_indices']:
            pairs.append(get_string_om_pair(sd_idx))
        all_string_dom_pairs.update(pairs)
        if data_or_sim_label is None:
            data_or_sim_label = (
                'Simulation: ' +
                run_info['sim_to_test'].replace('_', ' ').capitalize())

        if mc_true_params is None:
            if 'sim' in run_info:
                mc_true_params = run_info['sim']['mc_true_params']
            else:
                print('mc_true_params not in run_info', filepath)

    params_label = None
    if mc_true_params is not None:
        params_label = []
        for plab, pval in mc_true_params.items():
            units = ''

            if plab == 't':
                pval = format(int(pval), 'd')
                #plab = r'{}'.format(plab)
                units = r'\, \rm{ ns}'

            elif plab in 'x y z'.split():
                pval = format(pval, '0.1f')
                #plab = r'${}$'.format(plab)
                units = r'\, \rm{ m}'

            elif plab in 'track_energy cascade_energy'.split():
                pval = format(int(pval), 'd')
                plab = r'E_{\rm %s}' % plab.split('_')[0]
                units = r'\, \rm{ GeV}'

            elif plab in 'track_azimuth track_zenith cascade_azimuth cascade_zenith'.split(
            ):
                pval = format(pval / np.pi, '.2f')
                if 'azimuth' in plab:
                    ltr = r'\phi'
                elif 'zenith' in plab:
                    ltr = r'\theta'
                plab = ltr + r'_{\rm %s}' % plab.split('_')[0]
                units = r'\, \pi'

            params_label.append('{}={}{}'.format(plab, pval, units))
        params_label = '$' + r',\;'.join(params_label) + '$'

    if plot:
        fig, ax = plt.subplots(1, 1, figsize=(10, 8), dpi=72)

    t_indep_tots = []
    tots_incl_noise = []
    tots_excl_noise = []
    kss = []
    ref_tots_incl_noise = []
    ref_tots_excl_noise = []
    ref_areas_incl_noise = []
    for string, dom in reversed(sorted(all_string_dom_pairs)):
        if plot:
            ax.clear()
        all_zeros = True
        xmin = np.inf
        xmax = -np.inf
        ref_y = None
        if fwd_hists:
            if (string, dom) in fwd_hists:
                # Hit rate per nanosecond in each bin (includes noise hit rate)
                ref_y = fwd_hists[(string, dom)] / hist_bin_widths

                # Duplicate first element for plotting via `plt.step`
                ref_y = np.array([ref_y[0]] + ref_y.tolist())

                # Figure out "meaningful" range
                nonzero_mask = ref_y != 0  #~np.isclose(ref_y, 0)
                if np.any(nonzero_mask):
                    all_zeros = False
                    #ref_y_all_zeros = False
                    min_mask = (ref_y - ref_y.min()) >= 0.01 * (ref_y.max() -
                                                                ref_y.min())
                    xmin = min(xmin, fwd_hists_binning[min_mask].min())
                    xmax = max(xmax, fwd_hists_binning[min_mask].max())
            else:
                ref_y = np.zeros_like(fwd_hists_binning)

            ref_y_areas = ref_y[1:] * hist_bin_widths
            ref_y_area = np.sum(ref_y_areas)

            ref_tots_incl_noise.append(ref_y_area)

            # Following only works if our time window is large enough s.t. exp
            # hits from event is zero somewhere, and then it'll only be noise
            # contributing at that time...
            ref_tots_excl_noise.append(np.sum(ref_y_areas - ref_y_areas.min()))
            ref_areas_incl_noise.append(ref_y_area)

            if plot:
                ax.step(
                    fwd_hists_binning,
                    ref_y,
                    lw=1,
                    label=(r'Fwd: $\Sigma \lambda_q \Delta t$={}'.format(
                        num_fmt(ref_y_area))),
                    clip_on=True,
                    #color='C0'
                )

        colors = ['C%d' % i for i in range(1, 10)]
        linestyles = ['-', '--']
        linewidths = [5, 3, 2, 2, 2, 2, 2]

        for plt_i, (label, run_info) in enumerate(zip(labels, run_infos)):
            sample_hit_times = run_info['hit_times']
            if len(tots_incl_noise) <= plt_i:
                tots_incl_noise.append([])
                tots_excl_noise.append([])
                t_indep_tots.append([])
                kss.append([])

            results = run_info['results']
            if (string, dom) in pairs:
                rslt = results[get_sd_idx(string, dom)]
                if 'exp_p_at_hit_times' in rslt:
                    y = rslt['exp_p_at_hit_times']
                    y_ti = rslt['exp_p_at_all_times']
                    t_indep_tots[plt_i].append(y_ti)
                else:
                    y = rslt['pexp_at_hit_times']

                nonzero_mask = y != y[0]  #~np.isclose(y, 0)
                if np.any(nonzero_mask):
                    all_zeros = False
                    min_mask = y >= 0.01 * max(y)
                    xmin = min(xmin, sample_hit_times[min_mask].min())
                    xmax = max(xmax, sample_hit_times[min_mask].max())
            else:
                y = np.zeros_like(sample_hit_times)

            #y_area = np.sum(

            masked_y = np.ma.masked_invalid(y * hist_bin_widths)
            tot_excl_noise = np.sum(masked_y - masked_y.min())
            tot_incl_noise = masked_y.sum()
            if tot_excl_noise != 0:
                tots_excl_noise[plt_i].append(tot_excl_noise)
                tots_incl_noise[plt_i].append(tot_incl_noise)
            else:
                tots_excl_noise[plt_i].append(0)
                tots_incl_noise[plt_i].append(0)
            kss[plt_i].append(ks_test(y, ref_y[1:]))

            #kl_div = None
            custom_label = r'{:3s}: $\Sigma \lambda_q \Delta t$={}, ti={}'.format(
                label, num_fmt(tots_incl_noise[plt_i][-1]), num_fmt(y_ti))
            #if ref_y is not None: # and not ref_y_all_zeros:
            #    abs_mean_diff = np.abs(np.mean(y - ref_y[1:]))
            #    #rel_abs_mean_diff = abs_mean_diff / np.sum(ref_y[1:])

            #    mask = ref_y[1:] > 0
            #    kl_ref_vals = ref_y[1:][mask]
            #    kl_ref_vals /= np.sum(kl_ref_vals)

            #    y_prob_vals = y[mask]
            #    y_prob_vals /= np.sum(y_prob_vals)

            #    with np.errstate(divide='ignore'):
            #        kl_div = -np.sum(kl_ref_vals * np.log(y_prob_vals / kl_ref_vals))
            #    custom_label = format(rel_abs_mean_diff, '9.6f') + '  ' + label

            if paired:
                c_idx, ls_idx = divmod(plt_i, 2)
                color = colors[c_idx]
                linestyle = linestyles[ls_idx]
            else:
                color = None
                linestyle = None

            if plot:
                ax.plot(sample_hit_times,
                        y,
                        label=custom_label,
                        color=color,
                        linestyle=linestyle,
                        linewidth=linewidths[plt_i],
                        clip_on=True)

        if all_zeros:
            continue

        if xmin == xmax:
            xmin = np.min(fwd_hists_binning)
            xmax = np.max(fwd_hists_binning)

        if plot:
            ax.set_xlim(xmin, xmax)
            ax.set_ylim(0, ax.get_ylim()[1])

            for pos in 'bottom left top right'.split():
                ax.spines[pos].set_visible(False)

            ax.xaxis.set_ticks_position('none')
            ax.yaxis.set_ticks_position('none')

            ax.xaxis.tick_bottom()
            ax.yaxis.tick_left()

            #if kl_div is not None:
            #title = ' '*6 + 'Abs diff'.ljust(8) + '  ' + 'Simulation'
            #else:
            title = 'Code'

            leg = ax.legend(
                #title=title,
                #loc='best',
                loc='upper right',
                #frameon=False,
                framealpha=0.7,
                prop=dict(family='monospace', size=12))
            plt.setp(leg.get_title(), family='monospace', fontsize=12)
            #if kl_div is not None:
            #leg._legend_box.align = "left"
            leg.get_frame().set_linewidth(0)
            ax.set_xlabel('Time from event vertex (ns)', fontsize=14)

            if data_or_sim_label is not None:
                plt.text(0.5,
                         1.1,
                         data_or_sim_label,
                         ha='center',
                         va='bottom',
                         transform=ax.transAxes,
                         fontsize=16)
            if params_label is not None:
                plt.text(0.5,
                         1.05,
                         params_label,
                         ha='center',
                         va='bottom',
                         transform=ax.transAxes,
                         fontsize=12)

            ax.text(0.5,
                    1.0,
                    'String {}, DOM {}'.format(string, dom),
                    ha='center',
                    va='bottom',
                    transform=ax.transAxes,
                    fontsize=14)

            fbasename = 'string_{}_dom_{}'.format(string, dom)
            fig.savefig(join(outdir, fbasename + '.png'))
            sys.stdout.write('({}, {}) '.format(string, dom))
            sys.stdout.flush()
    sys.stdout.write('\n\n')
    sys.stdout.flush()

    ref_tots_incl_noise = np.array(ref_tots_incl_noise)
    ref_tots_excl_noise = np.array(ref_tots_excl_noise)
    ref_areas_incl_noise = np.array(ref_areas_incl_noise)

    ref_tot_incl_noise = np.sum(ref_tots_incl_noise)
    ref_tot_excl_noise = np.sum(ref_tots_excl_noise)
    ref_area_incl_noise = np.sum(ref_areas_incl_noise)

    print('{:9s}  {:9s}  {:16s}  {:16s}  {:16s}  {}'.format(
        'wtd KS'.rjust(9), 'avg KS'.rjust(9), 'Ratio incl noise'.rjust(16),
        'Ratio excl noise'.rjust(16), 't-indep ratio'.rjust(16), 'Label'))
    for label, ks, tot_incl_noise, tot_excl_noise, ti_tot in zip(
            labels, kss, tots_incl_noise, tots_excl_noise, t_indep_tots):
        ks = np.array(ks)
        mask = ~np.isnan(ks)
        ks_avg = np.mean(ks[mask])
        ks_wtd_avg = (np.sum(ks[mask] * ref_tots_excl_noise[mask]) /
                      np.sum(ref_tots_excl_noise[mask]))
        print('{:9s}  {:9s}  {:16s}  {:16s}  {:16s}  {}'.format(
            format(ks_wtd_avg, '.7f').rjust(9),
            format(ks_avg, '.7f').rjust(9),
            format(np.sum(tot_excl_noise) / ref_tot_excl_noise,
                   '.12f').rjust(16),
            format(np.sum(tot_incl_noise) / ref_tot_incl_noise,
                   '.12f').rjust(16),
            format(np.sum(ti_tot) / ref_area_incl_noise, '.12f').rjust(16),
            label))
Exemplo n.º 12
0
def plot_run_info2(
    fpath,
    only_string,
    subtract_noisefloor=True,
    plot_ref=True,
    normalize=False,
    scalefact=None,
    axes=None,
):
    """Plot information from `run_info.pkl` file as produced by
    `retro_dom_pdfs.py` script.

    Parameters
    ----------
    fpath : str
        Full path to `run_info.pkl` file
    only_string : int in [1, 86]
        String to plot
    subtract_noisefloor : bool, optional
        Whether to subtract the miniminum value from each distribution, which
        (usually but not always) is the noise floor
    plot_ref : bool, optional
        Plot the forward-simulation distribution
    scalefact : float, optional
        If not specified, a scale factor will be derived from the ratio between
        the forward-simulation and Retro distributions
    axes : length-3 sequence of matplotlib.axis, optional
        Provide the axes on which to plot the distributions; otherwise, a new
        figure with 3 axes will be created

    Returns
    -------
    fig : matplotlib.figure
    axes : length-3 list of matplotlib.axis

    """
    if axes is None:
        fig, axes = plt.subplots(3, 1, figsize=(16, 24), dpi=120)
    else:
        assert len(axes) == 3
        fig = axes[0].get_figure()

    subtract_noisefloor = 1 if subtract_noisefloor else 0

    # -- Extract info from files -- #

    fpath = expand(fpath)
    if isdir(fpath):
        fpath = join(fpath, 'run_info.pkl')
    info = load_pickle(fpath)

    sd_indices = info['sd_indices']
    hit_times = info['hit_times']
    dom_exp = info['dom_exp']
    hit_exp = info['hit_exp']
    #dt = np.diff(hit_times)

    fwd = load_pickle(info['sim']['fwd_sim_histo_file'])
    bin_edges = fwd['bin_edges']
    fwd_results = fwd['results']
    # why?
    dt = np.diff(bin_edges)
    dt = np.ones_like(dt)

    # -- Figure out how many lines are to be plotted -- #

    total_num_lines = 0
    for idx, sd_idx in enumerate(sd_indices):
        he = hit_exp[idx, :]
        string, dom = get_string_om_pair(sd_idx)
        if string != only_string or np.sum(he) == 0:
            continue
        total_num_lines += 1

    # -- Get info from all distributions -- #

    weights = []
    rats = []
    xmin = np.inf
    ymax = -np.inf
    ymin_at_3k = np.inf
    absdiff3k = np.abs(hit_times - 3000)
    idx_at_3k = np.where(absdiff3k == np.min(absdiff3k))[0][0]
    for idx, sd_idx in enumerate(sd_indices):
        he = hit_exp[idx, :]
        de = dom_exp[idx]
        he -= np.min(he)
        string, dom = get_string_om_pair(sd_idx)
        if np.sum(he) == 0 or (string, dom) not in fwd_results:
            continue
        ref = fwd_results[(string, dom)]
        ref_tot = np.sum(ref)
        he_tot = np.sum(he)
        #print('ratio clsim vs. retro %.2f for (%s, %s)'%(ref_tot/de, string, dom))
        ref -= np.min(ref)
        mask = (he > 1e-12) & (ref >= 1e-12)
        rats.append(np.sum((ref[mask] / he[mask]) * ref[mask]))
        weights.append(np.sum(ref[mask]))
        if string != only_string:
            continue
        xmin_idx = np.where(ref > 0)[0][0]
        xmin = min(xmin, hit_times[xmin_idx])
        ymax = max(ymax, np.max(ref))
        ymin_at_3k = min(ymin_at_3k, ref[idx_at_3k])
    wtdavg_rat = np.sum(rats) / np.sum(weights)
    xmin -= 50
    if ymin_at_3k == 0:
        ymin_at_3k = ymax / 1e6

    if scalefact is None:
        print('wtdavg_rat:', wtdavg_rat, '(using as scalefact)')
        scalefact = wtdavg_rat
    else:
        print('wtdavg_rat:', wtdavg_rat,
              '(but using {} as scalefact)'.format(scalefact))

    def innerplot(ax):  # pylint: disable=missing-docstring
        for idx, sd_idx in enumerate(sd_indices):
            string, dom = get_string_om_pair(sd_idx)
            he = hit_exp[idx, :]
            de = dom_exp[idx]
            if np.sum(he) > 0:
                norm = de / np.sum(he)
            else:
                norm = 1
            if (string, dom) in fwd_results:
                ref = fwd_results[(string, dom)]
            else:
                ref = he
            if normalize:
                mask = (he > 1e-12) & (ref >= 1e-12)
                tot_ref = np.sum(ref[mask] / dt[mask])
                tot_he = np.sum(he[mask])
                if tot_he == 0.:
                    scale = 1.
                else:
                    scale = tot_ref / tot_he
            else:
                scale = scalefact
            if string != only_string or np.sum(he) == 0:
                continue
            line, = ax.plot(
                hit_times,
                scale * (he * norm - subtract_noisefloor * np.min(he * norm)),
                '-',
                lw=1,
                label='({}, {})'.format(string, dom))
            if not plot_ref or (string, dom) not in fwd_results:
                continue
            ax.plot(hit_times,
                    ref - subtract_noisefloor * np.min(ref),
                    linestyle='--',
                    lw=0.5,
                    color=line.get_color())

    # -- Plot overview of distributions -- #

    ax = axes[0]
    num_lines = total_num_lines
    cm = plt.cm.gist_rainbow
    ax.set_prop_cycle('color',
                      [cm(1. * i / num_lines) for i in range(num_lines)])
    innerplot(ax)
    ax.set_ylim(ymin_at_3k, ymax * 2)
    ax.set_xlim(xmin, min(xmin + 2000, 3000))
    ax.legend(loc='best', fontsize=8, ncol=4, frameon=False)

    # -- Zoom on peaks -- #

    ax = axes[1]
    num_lines = 20
    cm = plt.cm.tab20
    ax.set_prop_cycle('color',
                      [cm(1. * i / num_lines) for i in range(num_lines)])
    innerplot(ax)
    ax.set_ylim(ymax / 5e3, ymax * 3)
    ax.set_xlim(xmin + 25, xmin + 750)
    ax.legend(loc='best', fontsize=7, ncol=14, frameon=False)

    # -- Zoom on tails -- #

    ax = axes[2]
    num_lines = 20
    cm = plt.cm.tab20
    ax.set_prop_cycle('color',
                      [cm(1. * i / num_lines) for i in range(num_lines)])
    innerplot(ax)
    ax.set_xlim(xmin + 750, 3000)
    ax.set_ylim(ymin_at_3k / 2, ymin_at_3k * 1e3)
    ax.legend(loc='best', fontsize=7, ncol=6, frameon=False)

    # -- Set common plot things -- #

    axes[0].set_title(info['sim_to_test'])
    axes[-1].set_xlabel('Time (ns)')
    for ax in axes:
        ax.set_ylabel('Charge (PE)')
        ax.set_yscale('log')
    fig.tight_layout()

    return fig, axes
Exemplo n.º 13
0
def extract_gcd(gcd_file, outdir=None):
    """Extract info from a GCD in i3 format, optionally saving to a simple
    Python pickle file.

    Parameters
    ----------
    gcd_file : str
    outdir : str, optional
        If provided, the gcd info is saved to a .pkl file with same name as
        `gcd_file` just with extension replaced.

    Returns
    -------
    gcd_info : OrderedDict
        'source_gcd_name': basename of the `gcd_file` provided
        'source_gcd_md5': direct md5sum of `gcd_file` (possibly compressed)
        'source_gcd_i3_md5': md5sum of `gcd_file` after decompressing to .i3
        'geo': (86, 60, 3) array of DOM x, y, z coords in m rel to IceCube coord system
        'rde' : (86, 60) array with relative DOM efficiencies
        'noise' : (86, 60) array with noise rate, in Hz, for each DOM

    """
    gcd_file = expanduser(expandvars(gcd_file))
    src_gcd_dir, src_gcd_basename = split(gcd_file)

    # Strip all recognized extensions to find base file name's "stem," then
    # attach ".pkl" extension to that
    src_gcd_stripped = src_gcd_basename
    while True:
        src_gcd_stripped, ext = splitext(src_gcd_stripped)
        if ext.lower().lstrip('.') not in ['i3', 'pkl', 'bz2', 'gz', 'zst']:
            # reattach unknown "extension"; presumably it's actually part of
            # the filename and not an extesion at all (or an extension we don't
            # care about, or an empty string in the case that there is no dot
            # remaining in the name)
            src_gcd_stripped += ext
            break
    pkl_outfname = src_gcd_stripped + '.pkl'

    pkl_outfpath = None
    if outdir is not None:
        outdir = expanduser(expandvars(outdir))
        mkdir(outdir)
        pkl_outfpath = join(outdir, pkl_outfname)
        if isfile(pkl_outfpath):
            return load_pickle(pkl_outfpath)

    def save_pickle_if_appropriate(gcd_info):
        if pkl_outfpath is not None:
            with open(pkl_outfpath, 'wb') as fobj:
                pickle.dump(gcd_info, fobj, protocol=pickle.HIGHEST_PROTOCOL)

    # Look for existing extracted (pkl) version in choice directories
    look_in_dirs = []
    if src_gcd_dir:
        look_in_dirs.append(src_gcd_dir)
    look_in_dirs += ['.', DATA_DIR]
    if 'I3_DATA' in os.environ:
        look_in_dirs.append('$I3_DATA/GCD')
    look_in_dirs = [expanduser(expandvars(d)) for d in look_in_dirs]

    for look_in_dir in look_in_dirs:
        uncompr_pkl_fpath = join(look_in_dir, pkl_outfname)
        if isfile(uncompr_pkl_fpath):
            gcd_info = load_pickle(uncompr_pkl_fpath)
            save_pickle_if_appropriate(gcd_info)
            return gcd_info

    # If we couldn't find the already-extracted file, find the source file
    # (if user doesn't specify a full path to the file, try in several possible
    # directories)
    if src_gcd_dir:
        look_in_dirs = [src_gcd_dir]
    else:
        look_in_dirs = ['.', DATA_DIR]
        if 'I3_DATA' in os.environ:
            look_in_dirs.append('$I3_DATA/GCD')
    look_in_dirs = [expanduser(expandvars(d)) for d in look_in_dirs]

    src_fpath = None
    for look_in_dir in look_in_dirs:
        fpath = join(look_in_dir, src_gcd_basename)
        if isfile(fpath):
            src_fpath = fpath
            break

    if src_fpath is None:
        raise IOError('Cannot find file "{}" in dir(s) {}'.format(
            src_gcd_basename, look_in_dirs))

    # Figure out what compression algorithms are used on the file; final state
    # will have `ext_lower` containing either "i3" or "pkl" indicating the
    # basic type of file we have
    compression = []
    src_gcd_stripped = src_gcd_basename
    while True:
        src_gcd_stripped, ext = splitext(src_gcd_stripped)
        ext_lower = ext.lower().lstrip('.')
        if ext_lower in ['gz', 'bz2', 'zst']:
            compression.append(ext_lower)
        elif ext_lower in ['i3', 'pkl']:
            break
        else:
            if ext:
                raise IOError(
                    'Unhandled extension "{}" found in GCD file "{}"'.format(
                        ext, gcd_file))
            raise IOError(
                'Illegal filename "{}"; must have either ".i3" or ".pkl" extesion,'
                " optionally followed by compression extension(s)".format(
                    gcd_file))

    with open(src_fpath, 'rb') as fobj:
        decompressed = fobj.read()

    # Don't hash a pickle file; all we care about is the hash of the original
    # i3 file, which is a value already stored in the pickle file
    if ext_lower == 'i3':
        source_gcd_md5 = hashlib.md5(decompressed).hexdigest()

    for comp_alg in compression:
        if comp_alg == 'gz':
            decompressed = gzip.GzipFile(fileobj=BytesIO(decompressed)).read()
        elif comp_alg == 'bz2':
            decompressed = bz2.decompress(decompressed)
        elif comp_alg == 'zst':
            decompressor = zstandard.ZstdDecompressor()
            decompressed = decompressor.decompress(decompressed,
                                                   max_output_size=100000000)

    if ext_lower == 'pkl':
        if PY2:
            gcd_info = pickle.loads(decompressed)
        else:
            gcd_info = pickle.loads(decompressed, encoding='latin1')
        save_pickle_if_appropriate(gcd_info)
        return gcd_info

    # -- If we get here, we have an i3 file -- #

    decompressed_gcd_md5 = hashlib.md5(decompressed).hexdigest()

    from I3Tray import I3Units, OMKey  # pylint: disable=import-error
    from icecube import dataclasses, dataio  # pylint: disable=import-error, unused-variable, unused-import

    gcd = dataio.I3File(gcd_file)  # pylint: disable=no-member
    frame = gcd.pop_frame()

    omgeo, dom_cal = None, None
    while gcd.more() and (omgeo is None or dom_cal is None):
        frame = gcd.pop_frame()
        keys = list(frame.keys())
        if 'I3Geometry' in keys:
            omgeo = frame['I3Geometry'].omgeo
        if 'I3Calibration' in keys:
            dom_cal = frame['I3Calibration'].dom_cal

    assert omgeo is not None
    assert dom_cal is not None

    # create output dict
    gcd_info = OrderedDict()
    gcd_info['source_gcd_name'] = src_gcd_basename
    gcd_info['source_gcd_md5'] = source_gcd_md5
    gcd_info['source_gcd_i3_md5'] = decompressed_gcd_md5
    gcd_info['geo'] = np.full(shape=(N_STRINGS, N_DOMS, 3), fill_value=np.nan)
    gcd_info['noise'] = np.full(shape=(N_STRINGS, N_DOMS), fill_value=np.nan)
    gcd_info['rde'] = np.full(shape=(N_STRINGS, N_DOMS), fill_value=np.nan)

    for string_idx in range(N_STRINGS):
        for dom_idx in range(N_DOMS):
            omkey = OMKey(string_idx + 1, dom_idx + 1)
            om = omgeo.get(omkey)
            gcd_info['geo'][string_idx, dom_idx, 0] = om.position.x
            gcd_info['geo'][string_idx, dom_idx, 1] = om.position.y
            gcd_info['geo'][string_idx, dom_idx, 2] = om.position.z
            try:
                gcd_info['noise'][string_idx,
                                  dom_idx] = (dom_cal[omkey].dom_noise_rate /
                                              I3Units.hertz)
            except KeyError:
                gcd_info['noise'][string_idx, dom_idx] = 0.0

            try:
                gcd_info['rde'][string_idx,
                                dom_idx] = dom_cal[omkey].relative_dom_eff
            except KeyError:
                gcd_info['rde'][string_idx, dom_idx] = 0.0

    save_pickle_if_appropriate(gcd_info)

    return gcd_info