Пример #1
0
def apply_ratio_scale(orig_maps, key1, key2, ratio_scale, is_flux_scale, int_type = None):
    '''
    Scales the ratio of the entries of two maps, conserving the total.
    '''

    if is_flux_scale: log_str = 'flux'
    else: log_str = 'event rate (%s)'%int_type

    if not is_flux_scale:
	# we have maps of event counts of a certain interaction type
        orig_sum = orig_maps[key1][int_type]['map'] + orig_maps[key2][int_type]['map']
        orig_total1 = orig_maps[key1][int_type]['map'].sum()
        orig_total2 = orig_maps[key2][int_type]['map'].sum()
        orig_ratio = orig_maps[key1][int_type]['map'] / orig_maps[key2][int_type]['map']
    else:
        # we have flux_maps
        orig_sum = orig_maps[key1]['map'] + orig_maps[key2]['map']
        orig_total1 = orig_maps[key1]['map'].sum()
        orig_total2 = orig_maps[key2]['map'].sum()
        orig_ratio = orig_maps[key1]['map'] / orig_maps[key2]['map']

    # conserved total:
    scaled_map2 = orig_sum / (1 + ratio_scale*orig_ratio)
    scaled_map1 = ratio_scale*orig_ratio*scaled_map2

    logging.trace(' %s / %s %s ratio before scaling: %.3f'%(key1, key2, log_str,
                    orig_total1/orig_total2))
    logging.trace(' %s / %s %s ratio after scaling with %.2f: %.3f'%(key1, key2, log_str,
                    ratio_scale, scaled_map1.sum()/scaled_map2.sum()))

    return scaled_map1, scaled_map2
Пример #2
0
    def apply_function(self):

        for container in self.data:
            # update uncertainty first, before the weights are changed. This step is skipped in event mode
            if self.error_method == "sumw2":

                # If computing uncertainties in events mode, warn that
                # hs error propagation will be skipped
                if self.data.representation == 'events':
                    logging.trace(
                        'WARNING: running stage in events mode. Hypersurface error propagation will be IGNORED.'
                    )

                elif self.propagate_uncertainty:
                    container["errors"] = container["weights"] * container[
                        "hs_scales_uncertainty"]

                else:
                    container["errors"] *= container["hs_scales"]
                    container.mark_changed('errors')

            # Update weights according to hypersurfaces
            container["weights"] = np.clip(container["weights"] *
                                           container["hs_scales"],
                                           a_min=0,
                                           a_max=np.inf)
Пример #3
0
def apply_ratio_scale(orig_maps, key1, key2, ratio_scale, is_flux_scale, int_type=None):
    """
    Scales the ratio of the entries of two maps, conserving the total.
    """

    if is_flux_scale: log_str = 'flux'
    else: log_str = 'event rate (%s)'%int_type

    if not is_flux_scale:
	# we have maps of event counts of a certain interaction type
        orig_sum = orig_maps[key1][int_type]['map'] + orig_maps[key2][int_type]['map']
        orig_total1 = orig_maps[key1][int_type]['map'].sum()
        orig_total2 = orig_maps[key2][int_type]['map'].sum()
        orig_ratio = orig_maps[key1][int_type]['map'] / orig_maps[key2][int_type]['map']
    else:
        # we have flux_maps
        orig_sum = orig_maps[key1]['map'] + orig_maps[key2]['map']
        orig_total1 = orig_maps[key1]['map'].sum()
        orig_total2 = orig_maps[key2]['map'].sum()
        orig_ratio = orig_maps[key1]['map'] / orig_maps[key2]['map']

    # conserved total:
    scaled_map2 = orig_sum / (1 + ratio_scale*orig_ratio)
    scaled_map1 = ratio_scale*orig_ratio*scaled_map2

    logging.trace(' %s / %s %s ratio before scaling: %.3f'%(key1, key2, log_str,
                    orig_total1/orig_total2))
    logging.trace(' %s / %s %s ratio after scaling with %.2f: %.3f'%(key1, key2, log_str,
                    ratio_scale, scaled_map1.sum()/scaled_map2.sum()))

    return scaled_map1, scaled_map2
Пример #4
0
def flatten_map(template, channel='all'):
    """
    Takes a final level true (expected) template of trck/cscd, and returns a
    single flattened map of trck appended to cscd, with all zero bins
    removed.
    """

    logging.trace("Getting flattened map of channel: %s"%channel)

    if channel == 'all':
        cscd = template['cscd']['map'].flatten()
        trck = template['trck']['map'].flatten()
        fmap = np.append(cscd, trck)
    elif channel == 'trck':
        trck = template[channel]['map'].flatten()
        fmap = np.array(trck)
        #fmap = np.array(fmap)[np.nonzero(fmap)]
    elif channel == 'cscd':
        cscd = template[channel]['map'].flatten()
        fmap = np.array(cscd)
        #fmap = np.array(fmap)[np.nonzero(fmap)]
    elif channel == 'no_pid':
        cscd = template['cscd']['map'].flatten()
        trck = template['trck']['map'].flatten()
        fmap = cscd + trck
        #fmap = np.array(fmap)[np.nonzero(fmap)]
    else:
        raise ValueError(
            "channel: '%s' not implemented! Allowed: ['all', 'trck', 'cscd', 'no_pid']"
            %channel)

    
    fmap = np.array(fmap)[np.nonzero(fmap)]
    return fmap
Пример #5
0
    def next(self):
        """Iterate through lines in the file(s).

        Returns
        -------
        line : string
            The next line from the current file.

        fpname : string
            The `fpname` of the file from which the line was gotten.

        lineno : int
            The line number in the file.

        """
        if not self._iter_stack:
            self._cleanup()
            raise StopIteration
        try:
            record = self._iter_stack[-1]
            record['line'] = next(record['fp'])
            record['lineno'] += 1
            return record
        except StopIteration:
            record = self._iter_stack.pop()
            logging.trace(('Finished processing "{fpname:s}" with {lineno:d}'
                           ' line(s)').format(**record))
            return next(self)
        except:
            self._cleanup()
            raise
Пример #6
0
    def get_inv_eff(self, signal_data=None, gen_data=None):
        this_hash = hash_obj(
            [self.true_binning.hash, self.output_str, 'inv_eff'],
            full_hash=self.full_hash)
        assert len(set([signal_data is None, gen_data is None])) == 1
        if signal_data is None and gen_data is None:
            if self.inv_eff_hash == this_hash:
                logging.trace('Loading inv eff from mem cache')
                return self._inv_eff
            if this_hash in self.disk_cache:
                logging.debug('Loading inv eff histogram from disk cache.')
                inv_eff = self.disk_cache[this_hash]
            else:
                raise ValueError(
                    'inverse efficiency histogram with correct hash not found '
                    'in disk_cache')
        else:
            this_hash = hash_obj([this_hash, self.fit_hash],
                                 full_hash=self.full_hash)
            if self.inv_eff_hash == this_hash:
                logging.trace('Loading inv eff from mem cache')
                return self._inv_eff
            inv_eff = self._get_inv_eff(signal_data, gen_data,
                                        self.true_binning, self.output_str)

            if self.disk_cache is not None:
                if this_hash not in self.disk_cache:
                    logging.debug('Caching inv eff histogram to disk.')
                    self.disk_cache[this_hash] = inv_eff

        self.inv_eff_hash = this_hash
        self._inv_eff = inv_eff
        return inv_eff
Пример #7
0
def flatten_map(template,chan='all'):
    '''
    Takes a final level true (expected) template of trck/cscd, and returns a
    single flattened map of trck appended to cscd, with all zero bins
    removed.
    '''

    logging.trace("Getting flattened map of chan: %s"%chan)
    if chan == 'all':
        cscd = template['cscd']['map'].flatten()
        trck = template['trck']['map'].flatten()
        fmap = np.append(cscd,trck)
    elif chan == 'trck':
        trck = template[chan]['map'].flatten()
        fmap = np.array(trck)
        #fmap = np.array(fmap)[np.nonzero(fmap)]
    elif chan == 'cscd':
        cscd = template[chan]['map'].flatten()
        fmap = np.array(cscd)
        #fmap = np.array(fmap)[np.nonzero(fmap)]
    elif chan == 'no_pid':
        cscd = template['cscd']['map'].flatten()
        trck = template['trck']['map'].flatten()
        fmap = cscd + trck
        #fmap = np.array(fmap)[np.nonzero(fmap)]
    else:
        raise ValueError("chan: '%s' not implemented! Allowed: ['all', 'trck', 'cscd','no_pid']")

    fmap = np.array(fmap)[np.nonzero(fmap)]
    return fmap
Пример #8
0
    def _compute_outputs(self, inputs=None):

        """Apply basic cuts and compute histograms for output channels."""

        logging.debug('Entering sample._compute_outputs')

        self.config = from_file(self.params['data_sample_config'].value)

        name = self.config.get('general', 'name')
        logging.trace('{0} sample sample_hash = '
                      '{1}'.format(name, self.sample_hash))
        self.load_sample_events()

        if self.params['keep_criteria'].value is not None:
            # TODO(shivesh)
            raise NotImplementedError(
                'needs check to make sure this works in a DistributionMaker'
            )
            self._data.applyCut(self.params['keep_criteria'].value)
            self._data.update_hash()

        if self.output_events:
            return self._data

        outputs = []
        if self.neutrinos:
            trans_nu_data = self._data.transform_groups(
                self._output_nu_groups
            )
            for fig in trans_nu_data.keys():
                outputs.append(trans_nu_data.histogram(
                    kinds       = fig,
                    binning     = self.output_binning,
                    weights_col = 'pisa_weight',
                    errors      = True,
                    name        = str(NuFlavIntGroup(fig)),
                ))

        if self.muons:
            outputs.append(self._data.histogram(
                kinds       = 'muons',
                binning     = self.output_binning,
                weights_col = 'pisa_weight',
                errors      = True,
                name        = 'muons',
                tex         = r'\rm{muons}'
            ))

        if self.noise:
            outputs.append(self._data.histogram(
                kinds       = 'noise',
                binning     = self.output_binning,
                weights_col = 'pisa_weight',
                errors      = True,
                name        = 'noise',
                tex         = r'\rm{noise}'
            ))

        name = self.config.get('general', 'name')
        return MapSet(maps=outputs, name=name)
Пример #9
0
    def apply_function(self):

        for container in self.data:
            # update uncertainty first, before the weights are changed. This step is skipped in event mode
            if self.error_method == "sumw2":

                # If computing uncertainties in events mode, warn that
                # hs error propagation will be skipped
                if self.data.data_specs == 'events':
                    logging.trace(
                        'WARNING: running stage in events mode. Hypersurface error propagation will be IGNORED.'
                    )

                elif self.propagate_uncertainty:
                    calc_uncertainty(
                        container["weights"].get(WHERE),
                        container["hs_scales_uncertainty"].get(WHERE),
                        container["errors"].get(WHERE),
                    )
                    container['errors'].mark_changed()

                else:
                    vectorizer.imul(container["hs_scales"],
                                    out=container["errors"])
                    container['errors'].mark_changed()

            # Update weights according to hypersurfaces
            propagate_hs_scales(container["weights"].get(WHERE),
                                container["hs_scales"].get(WHERE),
                                container["weights"].get(WHERE))

            container['weights'].mark_changed()
Пример #10
0
    def link_containers(self, key, names):
        """Link containers together. When containers are linked, they are
        treated as a single (virtual) container for binned data

        Parameters
        ----------
        key : str
            name of linked object

        names : list
            name of containers to be linked under the given key

        """
        # intersection of names for linking and available names

        link_names = set(names) & set(self.names)
        if len(link_names) < len(names):
            logging.warning(
                "Skipping containers %s in linking, as those are not present" %
                (set(names) - set(self.names)))

        containers = [self.__getitem__(name) for name in link_names]
        logging.trace('Linking containers %s into %s' % (link_names, key))
        new_container = VirtualContainer(key, containers)
        self.linked_containers.append(new_container)
Пример #11
0
    def create_response(self,
                        reco_norm_data=None,
                        true_norm_data=None,
                        data=None):
        """Create the response object from the signal data."""
        unfold_bg = self.params['unfold_bg'].value
        unfold_eff = self.params['unfold_eff'].value
        unfold_unweighted = self.params['unfold_unweighted'].value
        this_hash = hash_obj([
            self.reco_binning.hash, self.true_binning.hash, unfold_bg,
            unfold_eff, unfold_unweighted, self.output_str, 'response'
        ],
                             full_hash=self.full_hash)
        assert len(
            set([reco_norm_data is None, true_norm_data is None,
                 data is None])) == 1
        if reco_norm_data is None and true_norm_data is None and data is None:
            if self.response_hash == this_hash:
                logging.trace('Loading response from mem cache')
                return self._response
            else:
                try:
                    del self._response
                except:
                    pass
            if this_hash in self.disk_cache:
                logging.debug('Loading response from disk cache.')
                response = self.disk_cache[this_hash]
            else:
                raise ValueError(
                    'response object with correct hash not found in disk_cache'
                )
        else:
            this_hash = hash_obj([this_hash, self.fit_hash] +
                                 list(self.params.values),
                                 full_hash=self.full_hash)
            if self.response_hash == this_hash:
                logging.debug('Loading response from mem cache')
                return self._response
            else:
                try:
                    del self._response
                    del self.t_th1d
                except:
                    pass

            # Truth histogram also gets returned if response matrix is created
            response, self.t_th1d = self._create_response(
                reco_norm_data, true_norm_data, data, self.reco_binning,
                self.true_binning)

            if self.disk_cache is not None:
                if this_hash not in self.disk_cache:
                    logging.debug('Caching response object to disk.')
                    self.disk_cache[this_hash] = response

        self.response_hash = this_hash
        self._response = response
        return response
Пример #12
0
def test_container():
    """Unit tests for Container class."""

    # NOTE: Right now the numbers are tuned so that the weights are identical
    # per bin. If you change binning that's likely not the case anymore and you
    # inevitably end up with averaged values over bins, which are then not
    # equal to the individual weights anymore when those are not identical per
    # bin

    n_evts = 10000
    x = np.linspace(0, 100, n_evts, dtype=FTYPE)
    y = np.linspace(0, 100, n_evts, dtype=FTYPE)
    w = np.tile(np.arange(100, dtype=FTYPE) + 0.5, (100, 1)).T.ravel()

    container = Container('test', 'events')
    container['x'] = x
    container['y'] = y
    container['w'] = w

    binning_x = OneDimBinning(name='x',
                              num_bins=100,
                              is_lin=True,
                              domain=[0, 100])
    binning_y = OneDimBinning(name='y',
                              num_bins=100,
                              is_lin=True,
                              domain=[0, 100])
    binning = MultiDimBinning([binning_x, binning_y])

    logging.trace('Testing container and translation methods')

    container.representation = binning
    bx = container['x']
    m = np.meshgrid(binning.midpoints[0].m, binning.midpoints[1].m)[1].ravel()
    assert np.allclose(bx, m, **ALLCLOSE_KW), f'test:\n{bx}\n!= ref:\n{m}'

    # array repr
    container.representation = 'events'
    array_weights = container['w']
    assert np.allclose(array_weights, w,
                       **ALLCLOSE_KW), f'test:\n{array_weights}\n!= ref:\n{w}'

    # binned repr
    container.representation = binning
    diag = np.diag(np.arange(100) + 0.5)
    bd = container['w']
    h = container.get_hist('w')

    assert np.allclose(bd, diag.ravel(),
                       **ALLCLOSE_KW), f'test:\n{bd}\n!= ref:\n{diag.ravel()}'
    assert np.allclose(h[0], diag,
                       **ALLCLOSE_KW), f'test:\n{h[0]}\n!= ref:\n{diag}'
    assert h[1] == binning, f'test:\n{h[1]}\n!= ref:\n{binning}'

    # augment to array repr again
    container.representation = 'events'
    a = container['w']

    assert np.allclose(a, w, **ALLCLOSE_KW), f'test:\n{a}\n!= ref:\n{w}'
Пример #13
0
    def _compute_outputs(self, inputs=None):
        """Compute histograms for output channels."""
        logging.debug('Entering fit._compute_outputs')
        if not isinstance(inputs, Data):
            raise AssertionError('inputs is not a Data object, instead is '
                                 'type {0}'.format(type(inputs)))
        self.weight_hash = deepcopy(inputs.metadata['weight_hash'])
        logging.trace('{0} fit weight_hash = '
                      '{1}'.format(inputs.metadata['name'], self.weight_hash))
        logging.trace('{0} fit fit_hash = '
                      '{1}'.format(inputs.metadata['name'], self.fit_hash))
        self._data = inputs
        self.reweight()

        if self.output_events:
            return self._data

        outputs = []
        if self.neutrinos:
            trans_nu_data = self._data.transform_groups(
                self._output_nu_groups
            )
            for fig in trans_nu_data.iterkeys():
                outputs.append(
                    trans_nu_data.histogram(
                        kinds=fig,
                        binning=self.output_binning,
                        weights_col='pisa_weight',
                        errors=True,
                        name=str(NuFlavIntGroup(fig)),
                    )
                )

        if self.muons:
            outputs.append(
                self._data.histogram(
                    kinds='muons',
                    binning=self.output_binning,
                    weights_col='pisa_weight',
                    errors=True,
                    name='muons',
                    tex=text2tex('muons')
                )
            )

        if self.noise:
            outputs.append(
                self._data.histogram(
                    kinds='noise',
                    binning=self.output_binning,
                    weights_col='pisa_weight',
                    errors=True,
                    name='noise',
                    tex=text2tex('noise')
                )
            )

        return MapSet(maps=outputs, name=self._data.metadata['name'])
Пример #14
0
def get_flux_maps(flux_service, ebins, czbins, nue_numu_ratio, nu_nubar_ratio,
                  energy_scale, atm_delta_index, **kwargs):
    """
    Get a set of flux maps for the different primaries.

    \params:
      * flux_service -
      * ebins/czbins - energy/coszenith bins to calculate flux
      * nue_numu_ratio - systematic to be a proxy for the realistic
        Flux_nue/Flux_numu and Flux_nuebar/Flux_numubar ratios,
        keeping both the total flux from neutrinos and antineutrinos
        constant. The adjusted ratios are given by
        "nue_numu_ratio * original ratio".
      * nu_nubar_ratio - systematic to be a proxy for the
        neutrino/anti-neutrino production/cross section ratio.
      * energy_scale - factor to scale energy bin centers by
      * atm_delta_index  - change in spectral index from fiducial
    """

    # Be verbose on input
    params = get_params()
    report_params(params, units=[''])

    # Initialize return dict
    maps = {'params': params}

    for prim in primaries:

        # Get the flux for this primary
        maps[prim] = {
            'ebins': ebins,
            'czbins': czbins,
            'map': flux_service.get_flux(ebins * energy_scale, czbins, prim)
        }

        # be a bit verbose
        logging.trace("Total flux of %s is %u [s^-1 m^-2]" %
                      (prim, maps[prim]['map'].sum()))

    # now scale the nue(bar) / numu(bar) flux ratios, keeping the total
    # Flux (nue + numu, nue_bar + numu_bar) constant, or return unscaled maps:
    scaled_maps = apply_nue_numu_ratio(
        maps, nue_numu_ratio) if nue_numu_ratio != 1.0 else maps

    # now scale the nu(e/mu) / nu(e/mu)bar event count ratios, keeping the total
    # (nue + nuebar etc.) constant
    if nu_nubar_ratio != 1.:
        scaled_maps = apply_nu_nubar_ratio(scaled_maps, nu_nubar_ratio)

    median_energy = get_median_energy(maps['numu'])
    if atm_delta_index != 0.0:
        scaled_maps = apply_delta_index(scaled_maps, atm_delta_index,
                                        median_energy)

    return scaled_maps
Пример #15
0
 def __init__(self, ebins, czbins):
     """
     Parameters needed to instantiate any oscillation service:
     * ebins: Energy bin edges
     * czbins: cos(zenith) bin edges
     If further member variables are needed, extend this method.
     """
     logging.trace('Instantiating %s' % self.__class__.__name__)
     self.ebins = np.array(ebins)
     self.czbins = np.array(czbins)
     for ax in [self.ebins, self.czbins]:
         if (len(np.shape(ax)) != 1):
             raise IndexError('Axes must be 1d! ' + str(np.shape(ax)))
Пример #16
0
 def __init__(self, ebins, czbins):
     """
     Parameters needed to instantiate any oscillation service:
     * ebins: Energy bin edges
     * czbins: cos(zenith) bin edges
     If further member variables are needed, extend this method.
     """
     logging.trace('Instantiating %s'%self.__class__.__name__)
     self.ebins = np.array(ebins)
     self.czbins = np.array(czbins)
     for ax in [self.ebins, self.czbins]:
         if (len(np.shape(ax)) != 1):
             raise IndexError('Axes must be 1d! '+str(np.shape(ax)))
Пример #17
0
def get_flux_maps(flux_service, ebins, czbins, nue_numu_ratio, nu_nubar_ratio,
                  energy_scale, atm_delta_index,**kwargs):
    """
    Get a set of flux maps for the different primaries.

    \params:
      * flux_service -
      * ebins/czbins - energy/coszenith bins to calculate flux
      * nue_numu_ratio - systematic to be a proxy for the realistic
        Flux_nue/Flux_numu and Flux_nuebar/Flux_numubar ratios,
        keeping both the total flux from neutrinos and antineutrinos
        constant. The adjusted ratios are given by
        "nue_numu_ratio * original ratio".
      * nu_nubar_ratio - systematic to be a proxy for the
        neutrino/anti-neutrino production/cross section ratio.
      * energy_scale - factor to scale energy bin centers by
      * atm_delta_index  - change in spectral index from fiducial
    """

    # Be verbose on input
    params = get_params()
    report_params(params, units = [''])

    # Initialize return dict
    maps = {'params': params}

    for prim in primaries:

        # Get the flux for this primary
        maps[prim] = {'ebins': ebins,
                      'czbins': czbins,
                      'map': flux_service.get_flux(ebins*energy_scale,czbins,prim)}

        # be a bit verbose
        logging.trace("Total flux of %s is %u [s^-1 m^-2]"%
                      (prim,maps[prim]['map'].sum()))

    # now scale the nue(bar) / numu(bar) flux ratios, keeping the total
    # Flux (nue + numu, nue_bar + numu_bar) constant, or return unscaled maps:
    scaled_maps = apply_nue_numu_ratio(maps, nue_numu_ratio) if nue_numu_ratio != 1.0 else maps

    # now scale the nu(e/mu) / nu(e/mu)bar event count ratios, keeping the total
    # (nue + nuebar etc.) constant
    if nu_nubar_ratio != 1.:
        scaled_maps = apply_nu_nubar_ratio(scaled_maps, nu_nubar_ratio)

    median_energy = get_median_energy(maps['numu'])
    if atm_delta_index != 0.0:
        scaled_maps = apply_delta_index(scaled_maps, atm_delta_index, median_energy)

    return scaled_maps
Пример #18
0
 def check_reco_dist_consistency(self, dist_list):
     """Enforces correct normalisation of resolution functions."""
     logging.trace(
         " Verifying correct normalisation of resolution function.")
     # Obtain list of all distributions. The sum of their relative weights
     # should yield 1.
     frac_sum = np.zeros_like(dist_list[0]['fraction'])
     for dist_dict in dist_list:
         frac_sum += dist_dict['fraction']
     if not recursiveEquality(frac_sum, np.ones_like(frac_sum)):
         err_msg = ("Total normalisation of resolution function is off"
                    " (fractions do not add up to 1).")
         raise ValueError(err_msg)
     return True
Пример #19
0
    def array_to_binned(self, key, src_representation, dest_representation):
        """Histogram data array into binned data
        Parameters
        ----------
        key : str
        src_representation : str
        dest_representation : MultiDimBinning
        #averaged : bool
        #    if True, the histogram entries are averages of the numbers that
        #    end up in a given bin. This for example must be used when oscillation
        #    probabilities are translated.....otherwise we end up with probability*count
        #    per bin
        Notes
        -----
        right now, CPU-only
        """
        # TODO: make work for n-dim
        logging.trace('Transforming %s array to binned data' % (key))

        assert src_representation in self.array_representations
        assert isinstance(dest_representation, MultiDimBinning)

        if not dest_representation.is_irregular:
            sample = []
            dimensions = []
            for d in dest_representation:
                if d.is_log:
                    self.representation = "log_events"
                    sample.append(self[d.name])
                    dimensions.append(
                        OneDimBinning(d.name,
                                      domain=np.log(d.domain.m),
                                      num_bins=d.num_bins))
                else:
                    self.representation = "events"
                    sample.append(self[d.name])
                    dimensions.append(d)
            hist_binning = MultiDimBinning(dimensions)
        else:
            self.representation = src_representation
            sample = [self[name] for name in dest_representation.names]
            hist_binning = dest_representation

        self.representation = src_representation
        weights = self[key]

        hist = histogram(sample, weights, hist_binning, averaged=True)

        return hist
Пример #20
0
def saveDict(data,fh):
    """
    Saves a data dictionary to filehandle (or group), fh, as long as the
    group to be written to the hdf5 file is one of:
      [np.ndarray, list, float, int, bool, str, None]
    """

    for k,v in data.items():
        if type(v) == dict:
            group = fh.create_group(k)
            logging.trace("  creating group %s"%k)
            saveDict(v,group)
        else:
            saveNonDict(k, v, fh)

    return
Пример #21
0
    def compute(self):

        # simplest caching algorithm: don't compute if params didn't change
        new_param_hash = self.params.values_hash
        if new_param_hash == self.param_hash:
            logging.trace("cached output")
            return

        if self.calc_mode is not None:
            self.data.representation = self.calc_mode

        if self.profile:
            start_t = time()
            self.compute_function()
            end_t = time()
            self.calc_times.append(end_t - start_t)
        else:
            self.compute_function()
        self.param_hash = new_param_hash
Пример #22
0
    def compute(self):

        if len(self.params) == 0 and len(self.output_calc_keys) == 0:
            return

        # simplest caching algorithm: don't compute if params didn't change
        new_param_hash = self.params.values_hash
        if new_param_hash == self.param_hash:
            logging.trace("cached output")
            return

        self.data.data_specs = self.input_specs
        # convert any inputs if necessary:
        if self.mode[:2] == "EB":
            for container in self.data:
                for key in self.input_calc_keys:
                    container.array_to_binned(key, self.calc_specs)

        elif self.mode == "EBE":
            for container in self.data:
                for key in self.input_calc_keys:
                    container.binned_to_array(key)

        #elif self.mode == "BBE":
        #    for container in self.data:
        #        for key in self.input_calc_keys:
        #            container.binned_to_array(key)

        self.data.data_specs = self.calc_specs
        self.compute_function()
        self.param_hash = new_param_hash

        # convert any outputs if necessary:
        if self.mode[1:] == "EB":
            for container in self.data:
                for key in self.output_calc_keys:
                    container.array_to_binned(key, self.output_specs)

        elif self.mode[1:] == "BE":
            for container in self.data:
                for key in self.output_calc_keys:
                    container.binned_to_array(key)
Пример #23
0
    def extend_binning_for_coszen(self, ext_low=-3., ext_high=+3.):
        """
        Check whether `coszen_flipback` can be applied to the stage's
        coszen output binning and return an extended binning spanning [-3, +3]
        if that is the case.
        """
        logging.trace("Preparing binning for flipback of reco kernel at"
                      " coszen boundaries of physical range.")

        cz_edges_out = self.output_binning['reco_coszen'].bin_edges.magnitude
        coszen_range = self.output_binning['reco_coszen'].range.magnitude
        n_cz_out = self.output_binning['reco_coszen'].size
        coszen_step = coszen_range / n_cz_out
        # we need to check for possible contributions from (-3, -1) and
        # (1, 3) in coszen
        assert ext_high > ext_low
        ext_range = ext_high - ext_low
        extended = np.linspace(ext_low, ext_high,
                               int(ext_range / coszen_step) + 1)

        # We cannot flipback if we don't have -1 & +1 as (part of extended)
        # bin edges. This could happen if 1 is a multiple of the output bin
        # size, but the original edges themselves are not a multiple of that
        # size.
        for bound in (-1., +1.):
            comp = [recursiveEquality(bound, e) for e in extended]
            assert np.any(comp)

        # Perform one final check: original edges subset of extended ones?
        for coszen in cz_edges_out:
            comp = [recursiveEquality(coszen, e) for e in extended]
            assert np.any(comp)

        # Binning seems fine - we can proceed
        ext_cent = (extended[1:] + extended[:-1]) / 2.
        flipback_mask = ((ext_cent < -1.) | (ext_cent > +1.))
        keep = np.where((ext_cent > cz_edges_out[0])
                        & (ext_cent < cz_edges_out[-1]))[0]
        cz_edges_out = extended
        logging.trace("  -> temporary coszen bin edges:\n%s" % cz_edges_out)

        return cz_edges_out, flipback_mask, keep
Пример #24
0
def apply_delta_index(flux_maps, delta_index, egy_med):
    """
    Applies the spectral index systematic to the flux maps by scaling
    each bin with (egy_cen/egy_med)^(-delta_index), preserving the total
    integral flux  Note that only the numu/numu_bar are scaled, because
    the nue_numu_ratio will handle the systematic on the nue flux.
    """

    for flav in ['numu','numu_bar']:
        ecen = get_bin_centers(flux_maps[flav]['ebins'])
        scale = np.power((ecen/egy_med),delta_index)
        flux_map = flux_maps[flav]['map']
        total_flux = flux_map.sum()
        logging.trace("flav: %s, total counts before scale: %f"%(flav,total_flux))
        scaled_flux = (flux_map.T*scale).T
        scaled_flux *= (total_flux/scaled_flux.sum())
        flux_maps[flav]['map'] = scaled_flux
        logging.trace("flav: %s, total counts after scale: %f"%
                      (flav,flux_maps[flav]['map'].sum()))

    return flux_maps
Пример #25
0
    def select_params(self, selections, error_on_missing=False):
        """Apply the `selections` to contained ParamSet.

        Parameters
        ----------
        selections : string or iterable
        error_on_missing : bool

        """
        try:
            self._param_selector.select_params(selections, error_on_missing=True)
        except KeyError:
            msg = "Not all of the selections %s found in this stage." % (selections,)
            if error_on_missing:
                # logging.error(msg)
                raise
            logging.trace(msg)
        else:
            logging.trace(
                "`selections` = %s yielded `params` = %s" % (selections, self.params)
            )
Пример #26
0
    def binned_to_array(self, key, src_representation, dest_representation):
        """Augmented binned data to array data"""

        logging.trace('Transforming %s binned to array data' % (key))

        self.representation = src_representation
        weights = self[key]

        if not src_representation.is_irregular:
            logging.trace(
                f"Container `{self.name}`: regularized lookup for {key}")
            sample = []
            dimensions = []
            for d in src_representation:
                if d.is_log:
                    self.representation = "log_events"
                    sample.append(self[d.name])
                    dimensions.append(
                        OneDimBinning(d.name,
                                      domain=np.log(d.domain.m),
                                      num_bins=d.num_bins))
                else:
                    self.representation = "events"
                    sample.append(self[d.name])
                    dimensions.append(d)
            hist_binning = MultiDimBinning(dimensions)
        else:
            logging.trace(
                f"Container `{self.name}`: irregular lookup for {key}")
            self.representation = dest_representation
            sample = [self[name] for name in src_representation.names]
            hist_binning = src_representation

        return lookup(sample, weights, hist_binning)
Пример #27
0
def apply_delta_index(flux_maps, delta_index, egy_med):
    """
    Applies the spectral index systematic to the flux maps by scaling
    each bin with (egy_cen/egy_med)^(-delta_index), preserving the total
    integral flux  Note that only the numu/numu_bar are scaled, because
    the nue_numu_ratio will handle the systematic on the nue flux.
    """

    for flav in ['numu', 'numu_bar']:
        ecen = get_bin_centers(flux_maps[flav]['ebins'])
        scale = np.power((ecen / egy_med), delta_index)
        flux_map = flux_maps[flav]['map']
        total_flux = flux_map.sum()
        logging.trace("flav: %s, total counts before scale: %f" %
                      (flav, total_flux))
        scaled_flux = (flux_map.T * scale).T
        scaled_flux *= (total_flux / scaled_flux.sum())
        flux_maps[flav]['map'] = scaled_flux
        logging.trace("flav: %s, total counts after scale: %f" %
                      (flav, flux_maps[flav]['map'].sum()))

    return flux_maps
Пример #28
0
    def _histogram(events, binning, weights=None, errors=False, **kwargs):
        """Histogram the events given the input binning."""
        logging.trace('Histogramming')

        bin_names = binning.names
        bin_edges = [edges.m for edges in binning.bin_edges]
        for name in bin_names:
            if name not in events:
                raise AssertionError('Input events object does not have '
                                     'key {0}'.format(name))

        sample = [events[colname] for colname in bin_names]
        hist, edges = np.histogramdd(sample=sample,
                                     weights=weights,
                                     bins=bin_edges)
        if errors:
            hist2, edges = np.histogramdd(sample=sample,
                                          weights=np.square(weights),
                                          bins=bin_edges)
            hist = unp.uarray(hist, np.sqrt(hist2))

        return Map(hist=hist, binning=binning, **kwargs)
Пример #29
0
    def _derive_transforms_hash(self, nominal_transforms_hash=None):
        """Compute a hash that uniquely identifies the transforms that will be
        produced from the current configuration. Note that this hash needs only
        to be valid for this run (i.e., it is a volatile hash).

        This implementation returns a hash from the current parameters' values.

        """
        id_objects = []
        h = self.params.values_hash
        logging.trace("self.params.values_hash = %s" % h)
        id_objects.append(h)

        # Grab any provided nominal transforms hash, or derive it again
        if nominal_transforms_hash is None:
            nominal_transforms_hash = self._derive_nominal_transforms_hash()
        # If a valid hash has been gotten, include it
        if nominal_transforms_hash is not None:
            id_objects.append(nominal_transforms_hash)

        for attr in sorted(self._attrs_to_hash):
            val = getattr(self, attr)
            if hasattr(val, "hash"):
                attr_hash = val.hash
            elif self.full_hash:
                norm_val = normQuant(val)
                attr_hash = hash_obj(norm_val, full_hash=self.full_hash)
            else:
                attr_hash = hash_obj(val, full_hash=self.full_hash)
            id_objects.append(attr_hash)

        # If any hashes are missing (i.e, None), invalidate the entire hash
        if any([(h is None) for h in id_objects]):
            transforms_hash = None
        else:
            transforms_hash = hash_obj(id_objects, full_hash=self.full_hash)

        return transforms_hash, nominal_transforms_hash
Пример #30
0
def get_gradients(param, hypo_maker, test_vals):
    """Use the template maker to create all the templates needed
    to obtain the gradients in a given parameter.

    Parameters
    ----------
    param : str
        Name of parameter w.r.t. which we are calculating binwise
        template changes
    hypo_maker : DistributionMaker
        Needs to hold the parameter `param` in its `ParamSet`
    test_vals :  sequence with units
        Values of the parameter `param` to probe, i.e., generate templates for

    Returns
    -------
    pmaps : dict
        Dictionary with `test_vals` as keys and resulting templates' 'total'
        nominal values
    gradient_map : sequence
        As returned by `get_derivative_map`

    """
    logging.trace("Working on parameter %s." % param)

    pmaps = {}

    # generate one template for each value of the parameter in question
    # and store in pmaps
    for param_value in test_vals:
        hypo_maker.params[param].value = param_value
        # make the template corresponding to the current value of the parameter
        hypo_asimov_dist = hypo_maker.get_outputs(return_sum=True)
        pmaps[param_value] = hypo_asimov_dist.nominal_values['total']

    gradient_map = get_derivative_map(hypo_maps=pmaps, )

    return pmaps, gradient_map
Пример #31
0
def saveNonDict(key, data, fh):

    logging.trace("    key: %s is of type: %s"%(key,type(data)))
    if type(data) in [np.ndarray,list]:
        logging.trace("    >>saving to dataset: %s"%key)
        fh.create_dataset(key,data=data)
    elif type(data) in [float,int,bool]:
        logging.trace("   >>saving '%s' to dataset: %s"%(str(data),key))
        fh.create_dataset(key,data=data)
    elif type(data) == str:
        logging.trace("   >>saving '%s' to dataset: %s"%(data,key))
        dtype = h5py.special_dtype(vlen=str)
        fh.create_dataset(key,data=data,dtype=dtype)
    elif data is None:
        # NOTE: we convert it to the boolean false here, since
        # I can't find a good way to store the 'None' type in h5py
        data = False
        logging.trace("   >>saving '%s' to dataset: %s"%(data,key))
        fh.create_dataset(key,data=data)
    else:
        raise ValueError("Key: '%s' is unrecognized type: '%s'"%(key,type(data)))

    return
Пример #32
0
def get_fisher_matrix(hypo_maker, test_vals, counter):
    """Compute Fisher matrices at fiducial hypothesis given data.
    """
    from pisa.utils.pull_method import get_gradients
    hypo_params = hypo_maker.params.free

    #fisher = {'total': {}}
    fid_hypo_asimov_dist = hypo_maker.get_outputs(return_sum=True)
    counter += 1

    pmaps = {'total': {}}
    gradient_maps = {'total': {}}

    for pname in hypo_params.names:
        logging.trace("Computing binwise gradients for parameter '%s'." %
                      pname)
        tpm, gm = get_gradients(
            param=pname,
            hypo_maker=hypo_maker,
            test_vals=test_vals[pname],
        )
        counter += len(test_vals[pname])
        # the maps corresponding to variations of
        # a single param are not flattened
        pmaps['total'][pname] = tpm
        # these are flattened, which is also what the
        # method below assumes
        gradient_maps['total'][pname] = gm

    # hypo param values are not at nominal anymore,
    # but we don't use their values here

    fisher, nonempty = build_fisher_matrix(
        gradient_hist_flat_d=gradient_maps['total'],
        fiducial_hist=fid_hypo_asimov_dist,
        fiducial_params=hypo_params)
    return fisher, gradient_maps, fid_hypo_asimov_dist, nonempty
Пример #33
0
    def get_bg_hist(self, bg_data=None):
        """Histogram the bg data unless using real data, in which case load
        the bg hist from disk cache."""
        this_hash = hash_obj(
            [self.reco_binning.hash, self.output_str, 'bg_hist'],
            full_hash=self.full_hash)
        if bg_data is None:
            if self.bg_hist_hash == this_hash:
                logging.trace('Loading bg hist from mem cache')
                return self._bg_hist
            if this_hash in self.disk_cache:
                logging.debug('Loading bg hist from disk cache.')
                bg_hist = self.disk_cache[this_hash]
            else:
                raise ValueError(
                    'bg hist object with correct hash not found in disk_cache')
        else:
            this_hash = hash_obj([this_hash, self.fit_hash],
                                 full_hash=self.full_hash)
            if self.bg_hist_hash == this_hash:
                logging.trace('Loading bg hist from mem cache')
                return self._bg_hist
            bg_hist = self._histogram(events=bg_data,
                                      binning=self.reco_binning,
                                      weights=bg_data['pisa_weight'],
                                      errors=True,
                                      name='background',
                                      tex=r'\rm{background}')

            if self.disk_cache is not None:
                if this_hash not in self.disk_cache:
                    logging.debug('Caching bg hist to disk.')
                    self.disk_cache[this_hash] = bg_hist

        self.bg_hist_hash = this_hash
        self._bg_hist = bg_hist
        return bg_hist
Пример #34
0
def get_flux_maps(flux_service, ebins, czbins, nue_numu_ratio, energy_scale, **kwargs):
    '''
    Get a set of flux maps for the different primaries.

    \params:
      * flux_service -
      * ebins/czbins - energy/coszenith bins to calculate flux
      * nue_numu_ratio - systematic to be a proxy for the realistic
        Flux_nue/Flux_numu and Flux_nuebar/Flux_numubar ratios,
        keeping both the total flux from neutrinos and antineutrinos
        constant. The adjusted ratios are given by
        "nue_numu_ratio * original ratio".
      * energy_scale - factor to scale energy bin centers by
    '''

    #Be verbose on input
    params = get_params()
    report_params(params, units = [''])

    #Initialize return dict
    maps = {'params': params}

    for prim in primaries:

        #Get the flux for this primary
        maps[prim] = {'ebins': ebins,
                      'czbins': czbins,
                      'map': flux_service.get_flux(ebins*energy_scale,czbins,prim)}

        #be a bit verbose
        logging.trace("Total flux of %s is %u [s^-1 m^-2]"%
                      (prim,maps[prim]['map'].sum()))

    # now scale the nue(bar) / numu(bar) flux ratios, keeping the total
    # flux (nue + numu, nue_bar + numu_bar) constant, or return unscaled maps:
    return apply_nue_numu_ratio(maps, nue_numu_ratio) if nue_numu_ratio != 1.0 else maps
Пример #35
0
def test_MutableMultiFileIterator():
    """Unit test for class `MutableMultiFileIterator`"""
    import shutil
    import tempfile

    prefixes = ['a', 'b', 'c']
    file_len = 4

    reference_lines = [
        # start in file a
        'a0', 'a1',
        # switch to file b after second line of a
        'b0', 'b1',
        # switch to file c after second line of b
        'c0', 'c1', 'c2', 'c3',
        # switch back to b after exhausting c
        'b2', 'b3',
        # switch back to a after exhausting b
        'a2', 'a3'
    ]

    tempdir = tempfile.mkdtemp()
    try:
        # Create test files
        paths = [join(tempdir, prefix) for prefix in prefixes]
        for prefix, path in zip(prefixes, paths):
            with open(path, 'w') as f:
                for i in range(file_len):
                    f.write('%s%d\n' % (prefix, i))
            logging.trace(path)

        actual_lines = []
        with open(paths[0]) as fp:
            file_iter = MutableMultiFileIterator(fp=fp, fpname=paths[0])

            remaining_paths = paths[1:]

            for record in file_iter:
                actual_lines.append(record['line'].strip())
                logging.trace(str(record))
                if record['line'][1:].strip() == '1':
                    if remaining_paths:
                        path = remaining_paths.pop(0)
                        file_iter.switch_to_file(fpname=path)
                    else:
                        for l in str(file_iter.location).split('\n'):
                            logging.trace(l)
    except:
        shutil.rmtree(tempdir)
        raise

    if actual_lines != reference_lines:
        raise ValueError('<< FAIL : test_MutableMultiFileIterator >>')

    logging.info('<< PASS : test_MutableMultiFileIterator >>')
Пример #36
0
    def _combine_xs(self, flavintgroup):
        """Combine all cross sections specified by the flavints in
        `flavintgroup`. All CC and NC interactions are separately grouped
        together and averaged, then the average of each interaction type
        is added to the other.

        If CC and NC interactions are present, they *must* be from the same
        flavor(s). I.e., it doesn't make sense (and so causes an exception) if
        you combine numu CC with numubar NC. It does make sense if you combine
        numu and numubar CC with numu and numubar NC, though, and this is
        allowed.

        Notes
        -----
        Does not yet implement *Ngen/spectrum-weighted* averages, which are
        necessary when combining cross sections of disparate flavor/interaction
        types from different Monte Carlo simulation runs.
        """
        flavintgroup = NuFlavIntGroup(flavintgroup)
        # Trivial case: nothing to combine
        if len(flavintgroup.flavints) == 1:
            return self[flavintgroup.flavints[0]]

        cc_flavints = flavintgroup.cc_flavints
        nc_flavints = flavintgroup.nc_flavints
        if cc_flavints and nc_flavints:
            assert flavintgroup.cc_flavs == flavintgroup.nc_flavs, \
                    'Combining CC and NC but CC flavors do not match NC flavors'
        cc_avg_xs = 0
        if cc_flavints:
            logging.trace('cc_flavints = %s' % (cc_flavints,))
            cc_avg_xs = np.sum([self[k] for k in cc_flavints], axis=0) \
                    / len(cc_flavints)
        nc_avg_xs = 0
        if nc_flavints:
            logging.trace('nc_flavints = %s' % (nc_flavints,))
            nc_avg_xs = np.sum([self[k] for k in nc_flavints], axis=0) \
                    / len(nc_flavints)
        tot_xs = cc_avg_xs + nc_avg_xs
        logging.trace('mean(tot_xs) = %s' % (np.mean(tot_xs),))
        return tot_xs
Пример #37
0
def find_resource(filename, fail = True):
    '''
    Try to find the resource given by directory/filename. Will first check if
    filename is an absolute path, then relative to the $PISA
    environment variable if set. Otherwise will look in the resources directory
    of the pisa installation. Will return the file handle or throw an Exception
    if the file is not found.
    '''

    #First check for absolute path
    fpath = os.path.expanduser(os.path.expandvars(filename))
    logging.trace("Checking if %s is a file..."%fpath)
    if os.path.isfile(fpath):
        logging.debug('Found %s'%(fpath))
        return fpath
    
    #Next check if $PISA is set in environment
    logging.trace("Checking environment for $PISA...")
    if 'PISA' in os.environ:
        rpath = os.path.expanduser(os.path.expandvars(os.environ['PISA']))
        logging.debug('Searching resource path PISA=%s'%rpath)

        fpath = os.path.join(rpath,filename)
        if os.path.isfile(fpath):
            logging.debug('Found %s at %s'%(filename,fpath))
            return fpath

    #Not in the resource path, so look inside the package
    logging.trace('Searching package resources...')
    fpath = resource_filename(__name__,filename)
    if os.path.isfile(fpath):
        logging.debug('Found %s at %s'%(filename,fpath))
        return fpath

    #Nowhere to be found
    if fail:
        raise IOError('Could not find resource "%s"'%filename)
    else:
        logging.debug('Could not find resource "%s"'%filename)
        return None
Пример #38
0
def test_nsi_parameterization():
    """Unit test for Hvac-like NSI parameterization."""
    rand = np.random.RandomState(0)
    alpha1, alpha2, deltansi = rand.rand(3) * 2. * np.pi
    phi12, phi13, phi23 = rand.rand(3) * 2*np.pi - np.pi
    eps_max_abs = 10.0
    eps_scale, eps_prime = rand.rand(2) * 2 * eps_max_abs - eps_max_abs
    nsi_params = VacuumLikeNSIParams()
    nsi_params.eps_scale = eps_scale
    nsi_params.eps_prime = eps_prime
    nsi_params.phi12 = phi12
    nsi_params.phi13 = phi13
    nsi_params.phi23 = phi23
    nsi_params.alpha1 = alpha1
    nsi_params.alpha2 = alpha2
    nsi_params.deltansi = deltansi

    logging.trace('Checking agreement between numerical & analytical NSI matrix...')

    eps_mat_numerical = nsi_params.eps_matrix
    eps_mat_analytical = nsi_params.eps_matrix_analytical

    try:
        close = np.isclose(eps_mat_numerical, eps_mat_analytical, **ALLCLOSE_KW)
        if not np.all(close):
            logging.debug(
                "Numerical NSI matrix:\n%s",
                np.array2string(eps_mat_numerical, **ARY2STR_KW)
            )
            logging.debug(
                "Analytical expansion (by hand):\n%s",
                np.array2string(eps_mat_analytical, **ARY2STR_KW)
            )
            raise ValueError(
                'Evaluating analytical expressions for NSI matrix elements'
                ' does not give agreement with numerical calculation!'
                ' Elementwise agreement:\n%s'
                % close
            )
    except ValueError as err:
        logging.warning(
            "%s\nThis is expected."
            " Going ahead with numerical calculation for now.", err
        )

    logging.trace('Now checking agreement with sympy calculation...')

    eps_mat_sympy = nsi_sympy_mat_mult(
        eps_scale_val=eps_scale,
        eps_prime_val=eps_prime,
        phi12_val=phi12,
        phi13_val=phi13,
        phi23_val=phi23,
        alpha1_val=alpha1,
        alpha2_val=alpha2,
        deltansi_val=deltansi
    )

    logging.trace('ALLCLOSE_KW = {}'.format(ALLCLOSE_KW))
    close = np.isclose(eps_mat_numerical, eps_mat_sympy, **ALLCLOSE_KW)
    if not np.all(close):
        logging.error(
            'Numerical NSI matrix:\n%s',
            np.array2string(eps_mat_numerical, **ARY2STR_KW)
        )
        logging.error(
            'Sympy NSI matrix:\n%s', np.array2string(eps_mat_sympy, **ARY2STR_KW)
        )
        raise ValueError(
            'Sympy and numerical calculations disagree! Elementwise agreement:\n'
            '%s' % close
        )
Пример #39
0
    def single_kernel_set(self, e_true, cz_true, e_reco, cz_reco):
        """Construct a 4D kernel set from MC events using VBWKDE.

        Given a set of MC events and each of their {energy{true, reco},
        coszen{true, reco}}, generate a 4D NumPy array that maps a 2D true-flux
        histogram onto the corresponding 2D reco-flux histogram.

        The resulting 4D array can be indexed logically using
          kernel4d[e_true_i, cz_true_j][e_reco_k, cz_reco_l]
        where the 4 indices point from a single MC-true histogram bin (i,j) to
        a single reco histogram bin (k,l).

        Binning of both MC-true and reco histograms is the same and is given by
        the values in self.ebins and self.czbins which define the bin *edges*
        (not the bin centers; hence, len(self.ebins) is one greater than the
        number of bins, etc.).

        NOTE: Actual limits in energy used to group events into a single "true"
        bin may be extended beyond the bin edges defined by self.ebins in order
        to gather enough events to successfully apply VBWKDE.

        Parameters
        ----------
        e_true : sequence
            MC-true neutrino energies, one per event
        cz_true : sequence
            MC-true neutrino coszen, one per event
        e_reco : sequence
            Reconstructed neutrino energies, one per event
        cz_reco : sequence
            Reconstructed neutrino coszen, one per event

        Returns
        -------
        kernel4d : 4D array of float
            Mapping from the number of events in each bin of the 2D
            MC-true-events histogram to the number of events reconstructed in
            each bin of the 2D reconstructed-events histogram. Dimensions are
              len(self.ebins)-1 x len(self.czbins)-1 x len(self.ebins)-1 x
              len(self.czbins)-1
            since ebins and czbins define the histograms' bin edges.
        """
        OVERFIT_FACTOR = 1.0

        assert np.min(np.diff(self.ebins)) > 0, \
            "Energy bin edges not monotonically increasing."
        assert np.min(np.diff(self.czbins)) > 0, \
            "coszen bin edges not monotonically increasing."

        # NOTE: below defines bin centers on linear scale; other logic
        # in this method assumes this to be the case, so
        # **DO NOT USE** utils.utils.get_bin_centers in this method, which
        # may return logarithmically-defined centers instead.

        ebin_edges = np.array(self.ebins)
        left_ebin_edges = ebin_edges[0:-1]
        right_ebin_edges = ebin_edges[1:]
        ebin_centers = (left_ebin_edges+right_ebin_edges)/2.0
        n_ebins = len(ebin_centers)

        czbin_edges = np.array(self.czbins)
        left_czbin_edges = czbin_edges[0:-1]
        right_czbin_edges = czbin_edges[1:]
        czbin_centers = (left_czbin_edges+right_czbin_edges)/2.0
        n_czbins = len(czbin_centers)

        n_events = len(e_true)

        if self.MIN_NUM_EVENTS > n_events:
            self.MIN_NUM_EVENTS = n_events
        if self.TGT_NUM_EVENTS > n_events:
            self.TGT_NUM_EVENTS = n_events

        # Object with which to store the 4D kernels: np 4D array
        kernel4d = np.zeros((n_ebins, n_czbins, n_ebins, n_czbins))

        # Object with which to store the 2D "aggregate_map": the total number
        # of events reconstructed into a given (E, CZ) bin, used for sanity
        # checks
        aggregate_map = np.zeros((n_ebins, n_czbins))
        for ebin_n in range(n_ebins):
            ebin_min = left_ebin_edges[ebin_n]
            ebin_max = right_ebin_edges[ebin_n]
            ebin_mid = (ebin_min+ebin_max)/2.0
            ebin_wid = ebin_max-ebin_min
            
            logging.trace(
                '  processing true-energy bin_n=' + str(ebin_n) + ' of ' +
                str(n_ebins-1) + ', E_{nu,true} in ' +
                '[' + str(ebin_min) + ', ' + str(ebin_max) + '] ...'
            )

            # Absolute distance from these events' re-centered reco energies to
            # the center of this energy bin; sort in ascending-distance order
            abs_enu_dist = sorted(np.abs(e_true - ebin_mid))

            # Grab the distance the number-"TGT_NUM_EVENTS" event is from the
            # bin center
            tgt_thresh_enu_dist = abs_enu_dist[self.TGT_NUM_EVENTS-1]

            # Grab the distance the number-"MIN_NUM_EVENTS" event is from the
            # bin center
            min_thresh_enu_dist = abs_enu_dist[self.MIN_NUM_EVENTS-1]

            # TODO: revisit the below algorithm with proper testing

            # Make threshold distance (which is half the total width) no more
            # than 4x the true-energy-bin width in order to capture the
            # "target" number of points (TGT_NUM_EVENTS) but no less than half
            # the bin width (i.e., the bin should be at least be as wide as the
            # pre-defined bin width).
            #
            # HOWEVER, allow the threshold distance (bin half-width) to expand
            # to as much as 4x the original bin full-width in order to capture
            # the "minimum" number of points (MIN_NUM_EVENTS).
            thresh_enu_dist = \
                    max(min(max(tgt_thresh_enu_dist, ebin_wid/2),
                            4*ebin_wid),
                        min_thresh_enu_dist)

            # Grab all events within the threshold distance
            in_ebin_ind = np.where(abs_enu_dist <= thresh_enu_dist)[0]
            n_in_bin = len(in_ebin_ind)

            # Extract just the neutrino-energy/coszen error columns' values for
            # succinctness
            enu_err = e_reco[in_ebin_ind] - e_true[in_ebin_ind]
            cz_err = cz_reco[in_ebin_ind] - cz_true[in_ebin_ind]

            #==================================================================
            # Neutrino energy resolutions
            #==================================================================
            dmin = min(enu_err)
            dmax = max(enu_err)
            drange = dmax-dmin

            e_lowerlim = min(self.ENERGY_RANGE[0]-ebin_mid*1.5, dmin-drange*0.5)
            e_upperlim = max((np.max(ebin_edges)-ebin_mid)*1.5, dmax+drange*0.5)
            egy_kde_lims = np.array([e_lowerlim, e_upperlim])

            # Use at least min_num_pts points and at most the next-highest
            # integer-power-of-two that allows for at least 10 points in the
            # smallest energy bin
            min_num_pts = 2**12
            min_bin_width = np.min(ebin_edges)
            min_pts_smallest_bin = 10.0
            kde_range = np.diff(egy_kde_lims)
            num_pts0 = kde_range/(min_bin_width/min_pts_smallest_bin)
            kde_num_pts = int(max(2**10, 2**np.ceil(np.log2(num_pts0))))
            logging.debug(
                ' Nevts=' + str(n_in_bin) + ' taken from [' +
                str(ebin_mid-thresh_enu_dist) + ', ' +
                str(ebin_mid+thresh_enu_dist) + ']' + ', KDE lims=' +
                str(kde_range) + ', KDE_N: ' + str(kde_num_pts)
            )

            # Compute variable-bandwidth KDEs
            enu_bw, enu_mesh, enu_pdf = kde.vbw_kde(
                data           = enu_err,
                overfit_factor = OVERFIT_FACTOR,
                MIN            = egy_kde_lims[0],
                MAX            = egy_kde_lims[1],
                N              = kde_num_pts
            )

            if np.min(enu_pdf) < 0:
                # Only issue warning if the most-negative value is negative
                # beyond specified acceptable-numerical-precision threshold
                # (EPSILON)
                if np.min(enu_pdf) <= -self.EPSILON:
                    logging.warn(
                        "np.min(enu_pdf) < 0: Minimum value is " +
                        str(np.min(enu_pdf)) +
                        "; forcing all negative values to 0."
                    )
                # Otherwise, just quietly clip any negative values at 0
                enu_pdf = np.clip(a=enu_pdf, a_min=0, a_max=np.inf)

            assert np.min(enu_pdf) >= 0, str(np.min(enu_pdf))

            # Re-center distribution at the center of the energy bin for which
            # errors were computed
            offset_enu_mesh = enu_mesh+ebin_mid
            offset_enu_pdf = enu_pdf

            # Get reference area under the PDF, for checking after interpolated
            # values are added.
            #
            # NOTE There should be NO normalization because any events lost due
            # to cutting off tails outside the binned region are actually going
            # to be lost, and so should penalize the total area.
            int_val0 = np.trapz(y=offset_enu_pdf,
                                x=offset_enu_mesh)

            # Create linear interpolator for the PDF
            interp = interpolate.interp1d(
                x             = offset_enu_mesh,
                y             = offset_enu_pdf,
                kind          = 'linear',
                copy          = True,
                bounds_error  = True,
                fill_value    = np.nan
            )

            # Insert all bin edges' exact locations into the mesh (For accurate
            # accounting of area in each bin, must include values out to bin
            # edges)
            edge_locs = [be for be in
                         np.concatenate((left_ebin_edges, right_ebin_edges))
                         if not(be in offset_enu_mesh)]
            edge_locs.sort()
            edge_pdfs = interp(edge_locs)
            insert_ind = np.searchsorted(offset_enu_mesh, edge_locs)
            offset_enu_mesh = np.insert(offset_enu_mesh, insert_ind, edge_locs)
            offset_enu_pdf = np.insert(offset_enu_pdf, insert_ind, edge_pdfs)

            int_val = np.trapz(y=offset_enu_pdf, x=offset_enu_mesh)

            assert np.abs(int_val - int_val0) < self.EPSILON

            # Chop off distribution at extrema of energy bins
            valid_ind = np.where(
                (offset_enu_mesh >= np.min(ebin_edges)) &
                (offset_enu_mesh <= np.max(ebin_edges))
            )[0]
            offset_enu_mesh = offset_enu_mesh[valid_ind]
            offset_enu_pdf = offset_enu_pdf[valid_ind]

            # Check that there are no negative density values (after inserts)
            assert np.min(offset_enu_pdf) > 0-self.EPSILON, \
                str(np.min(offset_enu_pdf))

            # Record the integrated area after removing parts outside binned
            # range
            tot_ebin_area0 = np.trapz(y=offset_enu_pdf,
                                      x=offset_enu_mesh)

            # Check that it integrates to <= 1, sanity check
            assert tot_ebin_area0 < 1+self.EPSILON, str(tot_ebin_area0)

            # Identify indices encapsulating the defined energy bins' ranges,
            # and find the area of each bin
            lbinds = np.searchsorted(offset_enu_mesh, left_ebin_edges)
            rbinds = np.searchsorted(offset_enu_mesh, right_ebin_edges)
            bininds = zip(lbinds, rbinds)
            ebin_areas = [np.trapz(y=offset_enu_pdf[l:r+1],
                                   x=offset_enu_mesh[l:r+1])
                          for (l, r) in bininds]

            # Check that no bins have negative areas
            assert np.min(ebin_areas) >= 0

            # Sum the individual bins' areas
            tot_ebin_area = np.sum(ebin_areas)

            # Check that this total of all the bins is equal to the total area
            # under the curve (i.e., make sure there is no overlap or gaps
            # between bins)
            assert np.abs(tot_ebin_area-tot_ebin_area0) < self.EPSILON, \
                    'tot_ebin_area=' + str(tot_ebin_area) + \
                    ' should equal tot_ebin_area0=' + str(tot_ebin_area0)

            #==================================================================
            # Neutrino coszen resolutions
            #==================================================================
            dmin = min(cz_err)
            dmax = max(cz_err)
            drange = dmax-dmin

            # NOTE the limits are 1 less than / 1 greater than the limits that
            # the error will actually take on, so as to allow for any smooth
            # roll-off at edges of data. The calculation of areas below
            # captures all of the area, though, by reflecting bins defined in
            # [-1, 1] about the points -1 and 1, thereby capturing any
            # densities in the range [-3, +3]. This is not necessarily
            # accurate, but it's better than throwing that info out entirely.
            #
            # NOTE also that since reco events as of now are only in range -1
            # to 0, though, that there are "gaps" in the capture range, but
            # this is due to densities being in the upper-hemisphere which we
            # are intentionally ignoring, rather than the code here not taking
            # them into account. Normalization is based upon *all* events,
            # whether or not they fall within a bin specified above.

            # Number of points in the mesh used for VBWKDE; must be large
            # enough to capture fast changes in the data but the larger the
            # number, the longer it takes to compute the densities at all the
            # points. Here, just choosing a fixed number regardless of the data
            # or binning
            N_cz_mesh = 2**10

            # Data range for VBWKDE to consider
            cz_gaus_kde_min = -3
            cz_gaus_kde_max = +2

            cz_gaus_kde_failed = False
            previous_fail = False
            for n in xrange(3):
                # TODO: only catch specific exception
                try:
                    cz_bw, cz_mesh, cz_pdf = kde.vbw_kde(
                        data           = cz_err,
                        overfit_factor = OVERFIT_FACTOR,
                        MIN            = cz_gaus_kde_min,
                        MAX            = cz_gaus_kde_max,
                        N              = N_cz_mesh
                    )
                except:
                    cz_gaus_kde_failed = True
                    if n == 0:
                        logging.trace('(cz vbwkde ')
                    logging.trace('fail, ')
                    # If failure occurred in vbw_kde, expand the data range it
                    # takes into account; this usually helps
                    cz_gaus_kde_min -= 1
                    cz_gaus_kde_max += 1
                else:
                    if cz_gaus_kde_failed:
                        previous_fail = True
                        logging.trace('success!')
                    cz_gaus_kde_failed = False
                finally:
                    if previous_fail:
                        logging.trace(')')
                    previous_fail = False
                    if not cz_gaus_kde_failed:
                        break

            if cz_gaus_kde_failed:
                logging.warn('Failed to fit VBWKDE!')
                continue

            if np.min(cz_pdf) < 0:
                logging.warn("np.min(cz_pdf) < 0: Minimum value is " +
                             str(np.min(cz_pdf)) +
                             "; forcing all negative values to 0.")
                np.clip(a=cz_mesh, a_min=0, a_max=np.inf)

            assert np.min(cz_pdf) >= -self.EPSILON, \
                str(np.min(cz_pdf))

            for czbin_n in range(n_czbins):
                czbin_mid = czbin_centers[czbin_n]

                # Re-center distribution at the center of the current cz bin
                offset_cz_mesh = cz_mesh + czbin_mid

                # Create interpolation object, used to fill in bin edge values
                interp = interpolate.interp1d(
                    x             = offset_cz_mesh,
                    y             = cz_pdf,
                    kind          = 'linear',
                    copy          = True,
                    bounds_error  = False,
                    fill_value    = 0
                )

                # Figure out where all bin edges lie in this re-centered
                # distribution (some bins may be repeated since bins in [-1,0]
                # and err in [-2,1]:
                #
                # 1. Find limits of mesh values..
                mmin = offset_cz_mesh[0]
                mmax = offset_cz_mesh[-1]

                # 2. Map all bin edges into the full mesh-value range,
                # reflecting about -1 and +1. If the reflected edge is outside
                # the mesh range, use the exceeded limit of the mesh range as
                # the bin edge instead.
                #
                # This maps every bin edge {i} to 3 new edges, indexed
                # new_edges[i][{0,1,2}]. Bins are formed by adjacent indices
                # and same-subindices, so what started as, e.g., bin 3 now is
                # described by (left, right) edges at
                #   (new_edges[3][0], new_edges[4][0]),
                #   (new_edges[3][1], new_edges[4][1]), and
                #   (new_edges[3][2], new_edges[4][2]).

                # NOTE / TODO: It's tempting to dynamically set the number of
                # reflections to minimize computation time, but I think it
                # breaks the code. Just set to a reasonably large number for
                # now and accept the performance penalty. ALSO: if you change
                # the parity of the number of reflections, the code below that
                # has either (wrap_n % 2 == 0) or (wrap_n+1 % 2 == 0) must be
                # swapped!!!
                n_left_reflections = 4
                n_right_reflections = 4

                new_czbin_edges = []
                for edge in czbin_edges:
                    edges_refl_left = []
                    for n in xrange(n_left_reflections):
                        edge_refl_left = reflect1d(edge, -1-(2*n))
                        if edge_refl_left < mmin:
                            edge_refl_left = mmin
                        edges_refl_left.append(edge_refl_left)
                    edges_refl_right = []
                    for n in xrange(n_right_reflections):
                        edge_refl_right = reflect1d(edge, +1+(2*n))
                        if edge_refl_right > mmax:
                            edge_refl_right = mmax
                        edges_refl_right.append(edge_refl_right)
                    # Include all left-reflected versions of this bin edge, in
                    # increasing-x order + this bin edge + right-reflected
                    # versions of this bin edge
                    new_czbin_edges.append(edges_refl_left[::-1] + [edge]
                                           + edges_refl_right)

                # Record all unique bin edges
                edge_locs = set()
                [edge_locs.update(edges) for edges in new_czbin_edges]

                # Throw away bin edges that are already in the mesh
                [edge_locs.remove(edge) for edge in list(edge_locs)
                 if edge in offset_cz_mesh]

                # Make into sorted list
                edge_locs = sorted(edge_locs)

                # Record the total area under the curve
                int_val0 = np.trapz(y=cz_pdf, x=offset_cz_mesh)

                # Insert the missing bin edge locations & pdf-values into
                # the mesh & pdf, respectively
                edge_pdfs = interp(edge_locs)
                insert_ind = np.searchsorted(offset_cz_mesh, edge_locs)
                offset_cz_mesh = np.insert(offset_cz_mesh, insert_ind,
                                           edge_locs)
                offset_cz_pdf = np.insert(cz_pdf, insert_ind, edge_pdfs)
                assert np.min(offset_cz_pdf) > -self.EPSILON

                # Check that this total of all the bins is equal to the total
                # area under the curve (i.e., check there is no overlap between
                # or gaps between bins)
                int_val = np.trapz(y=offset_cz_pdf, x=offset_cz_mesh)
                assert np.abs(int_val-1) < self.EPSILON

                # Renormalize if it's not exactly 1
                if int_val != 1.0:
                    offset_cz_pdf = offset_cz_pdf / int_val

                # Add up the area in the bin and areas that are "reflected"
                # into this bin
                new_czbin_edges = np.array(new_czbin_edges)
                czbin_areas = np.zeros(np.shape(new_czbin_edges)[0]-1)
                for wrap_n in range(np.shape(new_czbin_edges)[1]):
                    bin_edge_inds = np.searchsorted(offset_cz_mesh,
                                                    new_czbin_edges[:,wrap_n])
                    lbinds = bin_edge_inds[0:-1]
                    rbinds = bin_edge_inds[1:]
                    # Make sure indices that appear first are less than indices
                    # that appear second in a pair of bin indices
                    if (wrap_n+1) % 2 == 0:
                        bininds = zip(rbinds, lbinds)
                    else:
                        bininds = zip(lbinds, rbinds)
                    tmp_areas = []
                    for (binind_left_edge, binind_right_edge) in bininds:
                        if binind_left_edge == binind_right_edge:
                            tmp_areas.append(0)
                            continue
                        this_bin_area = np.array(np.trapz(
                            y=offset_cz_pdf[binind_left_edge:binind_right_edge+1],
                            x=offset_cz_mesh[binind_left_edge:binind_right_edge+1]
                        ))
                        tmp_areas.append(this_bin_area)
                    czbin_areas += np.array(tmp_areas)

                assert np.min(czbin_areas) > -self.EPSILON

                tot_czbin_area = np.sum(czbin_areas)
                assert tot_czbin_area < int_val + self.EPSILON

                kernel4d[ebin_n, czbin_n] = np.outer(ebin_areas, czbin_areas)
                assert (np.sum(kernel4d[ebin_n, czbin_n]) -
                        tot_ebin_area*tot_czbin_area) < self.EPSILON

        check_areas = kernel4d.sum(axis=(2,3))

        assert np.max(check_areas) < 1 + self.EPSILON, str(np.max(check_areas))
        assert np.min(check_areas) > 0 - self.EPSILON, str(np.min(check_areas))

        return kernel4d
Пример #40
0
 def store_recursively(fhandle, node, path=None, node_hashes=None):
     if path is None:
         path = []
     if node_hashes is None:
         node_hashes = {}
     full_path = '/' + '/'.join(path)
     if isinstance(node, dict):
         logging.trace("  creating Group `%s`" % full_path)
         try:
             fhandle.create_group(full_path)
         except ValueError:
             pass
         for key in sorted(node.iterkeys()):
             key_str = str(key)
             if not isinstance(key, str):
                 logging.warn('Stringifying key `' + key_str +
                              '`for use as name in HDF5 file')
             val = node[key]
             new_path = path + [key_str]
             store_recursively(fhandle=fhandle, node=val, path=new_path,
                               node_hashes=node_hashes)
     else:
         # Check for existing node
         node_hash = utils.hash_obj(node)
         if node_hash in node_hashes:
             logging.trace("  creating hardlink for Dataset: `%s` -> `%s`" %
                           (full_path, node_hashes[node_hash]))
             # Hardlink the matching existing dataset
             fhandle[full_path] = fhandle[node_hashes[node_hash]]
             return
         # For now, convert None to np.nan since h5py appears to not handle None
         if node is None:
             node = np.nan
             logging.warn("  encountered `None` at node `%s`; converting to"
                          " np.nan" % full_path)
         # "Scalar datasets don't support chunk/filter options". Shuffling
         # is a good idea otherwise since subsequent compression will
         # generally benefit; shuffling requires chunking. Compression is
         # not done here since it is slow.
         if np.isscalar(node):
             shuffle = False
             chunks = None
         else:
             shuffle = True
             chunks = True
             # Store the node_hash for linking to later if this is more than
             # a scalar datatype. Assumed that "None" has 
             node_hashes[node_hash] = full_path
         # TODO: Treat strings as follows? Would this break compatibility
         # with pytables/Pandas? What are benefits? Leaving out for now.
         # if isinstance(node, basestr):
         #     dtype = h5py.special_dtype(vlen=str)
         #     fh.create_dataset(k,data=v,dtype=dtype)
         logging.trace("  creating dataset at node `%s`" % full_path)
         try:
             fhandle.create_dataset(name=full_path, data=node,
                                    chunks=chunks, compression=None,
                                    shuffle=shuffle, fletcher32=False)
         except TypeError:
             try:
                 shuffle = False
                 chunks = None
                 fhandle.create_dataset(name=full_path, data=node,
                                        chunks=chunks, compression=None,
                                        shuffle=shuffle, fletcher32=False)
             except:
                 logging.error('  full_path: ' + full_path)
                 logging.error('  chunks   : ' + str(chunks))
                 logging.error('  shuffle  : ' + str(shuffle))
                 logging.error('  node     : ' + str(node))
                 raise
Пример #41
0
    def switch_to_file(self, fp=None, fpname=None):
        """Switch iterator to a new resource location to continue processing.

        Parameters
        ----------
        fp : None or file-like object
            If `fp` is specified, this takes precedence over `fpname`.

        fpname : None or string
            Path of the file or resource to read from. This resource will be
            located and opened if `fp` is None.

        encoding
            Argument is passed to the builtin ``open`` function for opening
            the file.

        """
        fpath = None
        if fp is None:
            assert fpname
            resource = find_resource(fpname)
            if isfile(resource):
                fpath = abspath(expanduser(expandvars(resource)))
                if fpath in self.fpaths_processed:
                    self._cleanup()
                    raise ValueError(
                        'Circular reference; already processed "%s" at path'
                        ' "%s"' % (fpname, fpath))
            else:
                self._cleanup()
                raise ValueError('`fpname` "%s" is not a file')
            fp_ = c_open(fpath, encoding=None)
        else:
            fp_ = fp
            if fpname is None:
                if hasattr(fp_, 'name'):
                    fpname = fp_.name
                else:
                    fpname = ''
            try:
                resource = find_resource(fpname)
            except IOError:
                pass
            else:
                if isfile(resource):
                    fpath = resource
            if fp in self.fps_processed:
                self._cleanup()
                raise ValueError(
                    'Circular reference; already processed file pointer "%s"'
                    ' at path "%s"' % (fp_, fpname))

        if fpath is not None:
            if fpath in self.fpaths_processed:
                self._cleanup()
                raise ValueError(
                    'Circular reference; already processed "%s" at path'
                    ' "%s"' % (fpname, fpath))
            self.fpaths_processed.append(fpath)

        self.fps_processed.append(fp)
        if fpath is not None:
            self.fpaths_processed.append(fpath)

        logging.trace('Switching to "%s" at path "%s"' % (fpname, fpath))

        record = dict(fp=fp_, fpname=fpname, fpath=fpath, lineno=0, line='')
        self._iter_stack.append(record)
Пример #42
0
    def single_kernel_set(self, e_true, cz_true, e_reco, cz_reco,
                          flav, int_type, make_plots=False, out_dir=None):
        """Construct a 4D kernel set from MC events using VBWKDE.

        Given a set of MC events and each of their {energy{true, reco},
        coszen{true, reco}}, generate a 4D NumPy array that maps a 2D true-flux
        histogram onto the corresponding 2D reco-flux histogram.

        The resulting 4D array can be indexed logically using
          kernel4d[e_true_i, cz_true_j][e_reco_k, cz_reco_l]
        where the 4 indices point from a single MC-true histogram bin (i,j) to
        a single reco histogram bin (k,l).

        Binning of both MC-true and reco histograms is the same and is given by
        the values in self.ebins and self.czbins which define the bin *edges*
        (not the bin centers; hence, len(self.ebins) is one greater than the
        number of bins, etc.).

        NOTE: Actual limits in energy used to group events into a single "true"
        bin may be extended beyond the bin edges defined by self.ebins in order
        to gather enough events to successfully apply VBWKDE.

        Parameters
        ----------
        e_true : sequence
            MC-true neutrino energies, one per event
        cz_true : sequence
            MC-true neutrino coszen, one per event
        e_reco : sequence
            Reconstructed neutrino energies, one per event
        cz_reco : sequence
            Reconstructed neutrino coszen, one per event
        flav : str
        int_type : str
        make_plots : bool
        out_dir : str or None
            path to directory into which to save plots. ``None`` (default)
            saves to PWD.

        Returns
        -------
        kernel4d : 4D array of float
            Mapping from the number of events in each bin of the 2D
            MC-true-events histogram to the number of events reconstructed in
            each bin of the 2D reconstructed-events histogram. Dimensions are
              len(self.ebins)-1 x len(self.czbins)-1 x len(self.ebins)-1 x
              len(self.czbins)-1
            since ebins and czbins define the histograms' bin edges.
        """
        OVERFIT_FACTOR = 1.0

        if make_plots:
            import matplotlib as mpl
            import matplotlib.pyplot as plt
            from matplotlib.backends.backend_pdf import PdfPages
            from matplotlib.patches import Rectangle
            plt.close(1)
            plt.close(2)
            plt.close(3)
            def rugplot(a, y0, dy, ax, **kwargs):
                return ax.plot([a,a], [y0, y0+dy], **kwargs)
            plot_fname = '_'.join(['resolutions', 'vbwkde', flav, int_type]) + '.pdf'
            if out_dir is not None:
                plot_fname = os.path.join(out_dir, plot_fname)
            TOP = 0.925
            BOTTOM = 0.05
            RIGHT = 0.97
            LEFT = 0.07
            HSPACE = 0.12
            LABELPAD = 0.058
            AXISBG = (0.5, 0.5, 0.5)
            DARK_RED =  (0.7, 0.0, 0.0)
            HIST_PP = dict(
                facecolor=(1,0.5,0.5), edgecolor=DARK_RED,
                histtype='stepfilled', alpha=0.7, linewidth=2.0,
                label=r'$\mathrm{Histogram}$'
            )
            N_HBINS = 25
            DIFFUS_PP = dict(
                color=(0.0, 0.0, 0.0), linestyle='-', marker=None, alpha=0.6,
                linewidth=2.0, label=r'$\mathrm{VBWKDE}$'
            )
            RUG_PP = dict(color=(1.0, 1.0, 1.0), linewidth=0.4, alpha=0.5)
            RUG_LAB =r'$\mathrm{Rug\,plot}$'
            LEGFNTCOL = (1,1,1)
            LEGFACECOL = (0.2,0.2,0.2)
            GRIDCOL = (0.4, 0.4, 0.4)
            pdfpgs = PdfPages(plot_fname)

        assert np.min(np.diff(self.ebins)) > 0, \
            "Energy bin edges not monotonically increasing."
        assert np.min(np.diff(self.czbins)) > 0, \
            "coszen bin edges not monotonically increasing."

        # NOTE: below defines bin centers on linear scale; other logic
        # in this method assumes this to be the case, so
        # **DO NOT USE** utils.utils.get_bin_centers in this method, which
        # may return logarithmically-defined centers instead.

        ebin_edges = np.array(self.ebins)
        left_ebin_edges = ebin_edges[0:-1]
        right_ebin_edges = ebin_edges[1:]
        ebin_centers = (left_ebin_edges+right_ebin_edges)/2.0
        ebin_range = ebin_edges[-1] - ebin_edges[0]
        n_ebins = len(ebin_centers)

        czbin_edges = np.array(self.czbins)
        left_czbin_edges = czbin_edges[0:-1]
        right_czbin_edges = czbin_edges[1:]
        czbin_centers = (left_czbin_edges+right_czbin_edges)/2.0
        n_czbins = len(czbin_centers)

        n_events = len(e_true)

        if self.MIN_NUM_EVENTS > n_events:
            self.MIN_NUM_EVENTS = n_events
        if self.TGT_NUM_EVENTS > n_events:
            self.TGT_NUM_EVENTS = n_events

        # Object with which to store the 4D kernels: np 4D array
        kernel4d = np.zeros((n_ebins, n_czbins, n_ebins, n_czbins))

        # Object with which to store the 2D "aggregate_map": the total number
        # of events reconstructed into a given (E, CZ) bin, used for sanity
        # checks
        aggregate_map = np.zeros((n_ebins, n_czbins))
        for ebin_n in range(n_ebins):
            ebin_min = left_ebin_edges[ebin_n]
            ebin_max = right_ebin_edges[ebin_n]
            ebin_mid = (ebin_min+ebin_max)/2.0
            ebin_wid = ebin_max-ebin_min

            logging.debug(
                'Processing true-energy bin_n=' + format(ebin_n, 'd') + ' of ' +
                format(n_ebins-1, 'd') + ', E_{nu,true} in ' +
                '[' + format(ebin_min, '0.3f') + ', ' +
                format(ebin_max, '0.3f') + '] ...'
            )

            # Absolute distance from these events' re-centered reco energies to
            # the center of this energy bin; sort in ascending-distance order
            abs_enu_dist = np.abs(e_true - ebin_mid)
            sorted_abs_enu_dist = np.sort(abs_enu_dist)

            # Grab the distance the number-"TGT_NUM_EVENTS" event is from the
            # bin center
            tgt_thresh_enu_dist = sorted_abs_enu_dist[self.TGT_NUM_EVENTS-1]

            # Grab the distance the number-"MIN_NUM_EVENTS" event is from the
            # bin center
            min_thresh_enu_dist = sorted_abs_enu_dist[self.MIN_NUM_EVENTS-1]

            # TODO: revisit the below algorithm with proper testing

            # Make threshold distance (which is half the total width) no more
            # than 4x the true-energy-bin width in order to capture the
            # "target" number of points (TGT_NUM_EVENTS) but no less than half
            # the bin width (i.e., the bin should be at least be as wide as the
            # pre-defined bin width).
            #
            # HOWEVER, allow the threshold distance (bin half-width) to expand
            # to as much as 4x the original bin full-width in order to capture
            # the "minimum" number of points (MIN_NUM_EVENTS).
            thresh_enu_dist = \
                    max(min(max(tgt_thresh_enu_dist, ebin_wid/2),
                            4*ebin_wid),
                        min_thresh_enu_dist)

            # Grab all events within the threshold distance
            in_ebin_ind = np.where(abs_enu_dist <= thresh_enu_dist)[0]
            #print '** IN EBIN FIRST, LAST ENERGY:', e_reco[in_ebin_ind[0]], e_reco[in_ebin_ind[-1]]
            n_in_bin = len(in_ebin_ind)

            # Record lowest/highest energies that are included in the bin
            actual_left_ebin_edge = min(ebin_min, min(e_true[in_ebin_ind])) #max(min(ebins), ebin_mid-thresh_enu_dist)
            actual_right_ebin_edge = max(ebin_max, max(e_true[in_ebin_ind])) #(max(ebins), ebin_mid+thresh_enu_dist)

            # Extract just the neutrino-energy/coszen error columns' values for
            # succinctness
            enu_err = e_reco[in_ebin_ind] - e_true[in_ebin_ind]
            cz_err = cz_reco[in_ebin_ind] - cz_true[in_ebin_ind]

            #==================================================================
            # Neutrino energy resolutions
            #==================================================================
            dmin = min(enu_err)
            dmax = max(enu_err)
            drange = dmax-dmin

            e_lowerlim = min(self.ENERGY_RANGE[0]-ebin_mid*1.5, dmin-drange*0.5)
            e_upperlim = max((np.max(ebin_edges)-ebin_mid)*1.5, dmax+drange*0.5)
            egy_kde_lims = np.array([e_lowerlim, e_upperlim])

            # Use at least min_num_pts points and at most the next-highest
            # integer-power-of-two that allows for at least 10 points in the
            # smallest energy bin
            min_num_pts = 2**12
            min_bin_width = np.min(ebin_edges[1:]-ebin_edges[:-1])
            min_pts_smallest_bin = 5.0
            kde_range = np.diff(egy_kde_lims)
            num_pts0 = kde_range/(min_bin_width/min_pts_smallest_bin)
            kde_num_pts = int(max(min_num_pts, 2**np.ceil(np.log2(num_pts0))))
            logging.debug(
                '  N_evts=' + str(n_in_bin) + ', taken from [' +
                format(actual_left_ebin_edge, '0.3f') + ', ' +
                format(actual_right_ebin_edge, '0.3f') + ']' + ', VBWKDE lims=' +
                str(egy_kde_lims) + ', VBWKDE_N: ' + str(kde_num_pts)
            )

            # Compute variable-bandwidth KDEs
            enu_bw, enu_mesh, enu_pdf = kde.vbw_kde(
                data           = enu_err,
                overfit_factor = OVERFIT_FACTOR,
                MIN            = egy_kde_lims[0],
                MAX            = egy_kde_lims[1],
                N              = kde_num_pts
            )

            if np.min(enu_pdf) < 0:
                # Only issue warning if the most-negative value is negative
                # beyond specified acceptable-numerical-precision threshold
                # (EPSILON)
                if np.min(enu_pdf) <= -self.EPSILON:
                    logging.warn(
                        "np.min(enu_pdf) < 0: Minimum value is " +
                        str(np.min(enu_pdf)) +
                        "; forcing all negative values to 0."
                    )
                # Otherwise, just quietly clip any negative values at 0
                enu_pdf = np.clip(a=enu_pdf, a_min=0, a_max=np.inf)

            assert np.min(enu_pdf) >= 0, str(np.min(enu_pdf))

            # Re-center distribution at the center of the energy bin for which
            # errors were computed
            offset_enu_mesh = enu_mesh+ebin_mid
            offset_enu_pdf = enu_pdf

            # Get reference area under the PDF, for checking after interpolated
            # values are added.
            #
            # NOTE There should be NO normalization because any events lost due
            # to cutting off tails outside the binned region are actually going
            # to be lost, and so should penalize the total area.
            int_val0 = np.trapz(y=offset_enu_pdf,
                                x=offset_enu_mesh)

            # Create linear interpolator for the PDF
            interp = interpolate.interp1d(
                x             = offset_enu_mesh,
                y             = offset_enu_pdf,
                kind          = 'linear',
                copy          = True,
                bounds_error  = True,
                fill_value    = np.nan
            )

            # Insert all bin edges' exact locations into the mesh (For accurate
            # accounting of area in each bin, must include values out to bin
            # edges)
            edge_locs = [be for be in
                         np.concatenate((left_ebin_edges, right_ebin_edges))
                         if not(be in offset_enu_mesh)]
            edge_locs.sort()
            edge_pdfs = interp(edge_locs)
            insert_ind = np.searchsorted(offset_enu_mesh, edge_locs)
            offset_enu_mesh = np.insert(offset_enu_mesh, insert_ind, edge_locs)
            offset_enu_pdf = np.insert(offset_enu_pdf, insert_ind, edge_pdfs)

            int_val = np.trapz(y=offset_enu_pdf, x=offset_enu_mesh)

            assert np.abs(int_val - int_val0) < self.EPSILON

            # Chop off distribution at extrema of energy bins
            valid_ind = np.where(
                (offset_enu_mesh >= np.min(ebin_edges)) &
                (offset_enu_mesh <= np.max(ebin_edges))
            )[0]
            offset_enu_mesh = offset_enu_mesh[valid_ind]
            offset_enu_pdf = offset_enu_pdf[valid_ind]

            # Check that there are no negative density values (after inserts)
            assert np.min(offset_enu_pdf) > 0-self.EPSILON, \
                str(np.min(offset_enu_pdf))

            # Record the integrated area after removing parts outside binned
            # range
            tot_ebin_area0 = np.trapz(y=offset_enu_pdf,
                                      x=offset_enu_mesh)

            # Check that it integrates to <= 1, sanity check
            assert tot_ebin_area0 < 1+self.EPSILON, str(tot_ebin_area0)

            # Identify indices encapsulating the defined energy bins' ranges,
            # and find the area of each bin
            lbinds = np.searchsorted(offset_enu_mesh, left_ebin_edges)
            rbinds = np.searchsorted(offset_enu_mesh, right_ebin_edges)
            bininds = zip(lbinds, rbinds)
            ebin_areas = [np.trapz(y=offset_enu_pdf[l:r+1],
                                   x=offset_enu_mesh[l:r+1])
                          for (l, r) in bininds]

            # Check that no bins have negative areas
            assert np.min(ebin_areas) >= 0

            # Sum the individual bins' areas
            tot_ebin_area = np.sum(ebin_areas)

            # Check that this total of all the bins is equal to the total area
            # under the curve (i.e., make sure there is no overlap or gaps
            # between bins)
            assert np.abs(tot_ebin_area-tot_ebin_area0) < self.EPSILON, \
                    'tot_ebin_area=' + str(tot_ebin_area) + \
                    ' should equal tot_ebin_area0=' + str(tot_ebin_area0)

            if make_plots:
                fig1 = plt.figure(1, figsize=(8,10), dpi=90)
                fig1.clf()
                ax1 = fig1.add_subplot(211, axisbg=AXISBG)

                # Retrieve region where VBWKDE lives
                ml_ci = confInterval.MLConfInterval(x=enu_mesh, y=enu_pdf)
                #for conf in np.logspace(np.log10(0.999), np.log10(0.95), 50):
                #    try:
                #        lb, ub, yopt, r = ml_ci.findCI_lin(conf=conf)
                #    except:
                #        pass
                #    else:
                #        break
                #xlims = (min(-ebin_mid*1.5, lb),
                #         max(min(ub, 6*ebin_mid),2*ebin_mid))
                lb, ub, yopt, r = ml_ci.findCI_lin(conf=0.98)
                xlims = (lb, #min(-ebin_mid*1.5, lb),
                         max(min(ub, 6*ebin_mid),2*ebin_wid))

                #xlims = (
                #    -ebin_wid*1.5,
                #    ebin_wid*1.5
                #)
                #    min(ebin_mid*2, ebin_edges[-1]+(ebin_edges[-1]-ebin_edges[0])*0.1)
                #)

                # Histogram of events' reco error
                hbins = np.linspace(dmin-0.02*drange, dmax+0.02*drange,
                                    N_HBINS*np.round(drange/ebin_centers[ebin_n]))
                hvals, hbins, hpatches = ax1.hist(enu_err,
                                                  bins=hbins,
                                                  normed=True,
                                                  **HIST_PP)

                # Plot the VBWKDE
                ax1.plot(enu_mesh, enu_pdf, **DIFFUS_PP)
                axlims = ax1.axis('tight')
                ax1.set_xlim(xlims)
                ymax = axlims[3]*1.05
                ax1.set_ylim(0, ymax)

                # Grey-out regions outside binned region, so it's clear what
                # part of tail(s) will be thrown away
                width = -ebin_mid+ebin_edges[0]-xlims[0]
                unbinned_region_tex = r'$\mathrm{Unbinned}$'
                if width > 0:
                    ax1.add_patch(Rectangle((xlims[0],0), width, ymax, #zorder=-1,
                                            alpha=0.30, facecolor=(0.0 ,0.0, 0.0), fill=True,
                                            ec='none'))
                    ax1.text(xlims[0]+(xlims[1]-xlims[0])/40., ymax/10.,
                             unbinned_region_tex, fontsize=14, ha='left',
                             va='bottom', rotation=90, color='k')
                
                width = xlims[1] - (ebin_edges[-1]-ebin_mid)
                if width > 0:
                    ax1.add_patch(Rectangle((xlims[1]-width,0), width, ymax,
                                            alpha=0.30, facecolor=(0, 0, 0),
                                            fill=True, ec='none'))
                    ax1.text(xlims[1]-(xlims[1]-xlims[0])/40., ymax/10.,
                             unbinned_region_tex, fontsize=14, ha='right',
                             va='bottom', rotation=90, color='k')

                # Rug plot of events' reco energy errors
                ylim = ax1.get_ylim()
                dy = ylim[1] - ylim[0]
                ruglines = rugplot(enu_err, y0=ylim[1], dy=-dy/40., ax=ax1,
                                   **RUG_PP)
                ruglines[-1].set_label(RUG_LAB)

                # Legend
                leg_title_tex = r'$\mathrm{Normalized}\,E_\nu\mathrm{-err.\,distr.}$'
                x1lab = ax1.set_xlabel(
                    r'$E_{\nu,\mathrm{reco}}-E_{\nu,\mathrm{true}}\;' +
                    r'(\mathrm{GeV})$', labelpad=LABELPAD
                )
                leg = ax1.legend(loc='upper right', title=leg_title_tex,
                                 frameon=True, framealpha=0.8,
                                 fancybox=True, bbox_to_anchor=[1,0.975])

                # Other plot details
                ax1.xaxis.set_label_coords(0.9, -LABELPAD)
                ax1.xaxis.grid(color=GRIDCOL)
                ax1.yaxis.grid(color=GRIDCOL)
                leg.get_title().set_fontsize(16)
                leg.get_title().set_color(LEGFNTCOL)
                [t.set_color(LEGFNTCOL) for t in leg.get_texts()]
                frame = leg.get_frame()
                frame.set_facecolor(LEGFACECOL)
                frame.set_edgecolor(None)

            #==================================================================
            # Neutrino coszen resolution for events in this energy bin
            #==================================================================
            dmin = min(cz_err)
            dmax = max(cz_err)
            drange = dmax-dmin

            # NOTE the limits are 1 less than / 1 greater than the limits that
            # the error will actually take on, so as to allow for any smooth
            # roll-off at edges of data. The calculation of areas below
            # captures all of the area, though, by reflecting bins defined in
            # [-1, 1] about the points -1 and 1, thereby capturing any
            # densities in the range [-3, +3]. This is not necessarily
            # accurate, but it's better than throwing that info out entirely.
            #
            # NOTE also that since reco events as of now are only in range -1
            # to 0, though, that there are "gaps" in the capture range, but
            # this is due to densities being in the upper-hemisphere which we
            # are intentionally ignoring, rather than the code here not taking
            # them into account. Normalization is based upon *all* events,
            # whether or not they fall within a bin specified above.

            # Number of points in the mesh used for VBWKDE; must be large
            # enough to capture fast changes in the data but the larger the
            # number, the longer it takes to compute the densities at all the
            # points. Here, just choosing a fixed number regardless of the data
            # or binning
            N_cz_mesh = 2**10

            # Data range for VBWKDE to consider
            cz_kde_min = -3
            cz_kde_max = +2

            cz_kde_failed = False
            previous_fail = False
            for n in xrange(3):
                # TODO: only catch specific exception
                try:
                    cz_bw, cz_mesh, cz_pdf = kde.vbw_kde(
                        data           = cz_err,
                        overfit_factor = OVERFIT_FACTOR,
                        MIN            = cz_kde_min,
                        MAX            = cz_kde_max,
                        N              = N_cz_mesh
                    )
                except:
                    cz_kde_failed = True
                    if n == 0:
                        logging.trace('(cz vbwkde ')
                    logging.trace('fail, ')
                    # If failure occurred in vbw_kde, expand the data range it
                    # takes into account; this usually helps
                    cz_kde_min -= 1
                    cz_kde_max += 1
                else:
                    if cz_kde_failed:
                        previous_fail = True
                        logging.trace('success!')
                    cz_kde_failed = False
                finally:
                    if previous_fail:
                        logging.trace(')')
                    previous_fail = False
                    if not cz_kde_failed:
                        break

            if cz_kde_failed:
                logging.warn('Failed to fit VBWKDE!')
                continue

            if np.min(cz_pdf) < 0:
                logging.warn("np.min(cz_pdf) < 0: Minimum value is " +
                             str(np.min(cz_pdf)) +
                             "; forcing all negative values to 0.")
                np.clip(a=cz_mesh, a_min=0, a_max=np.inf)

            assert np.min(cz_pdf) >= -self.EPSILON, \
                str(np.min(cz_pdf))

            # TODO: test and/or visualize the shifting & re-binning process
            for czbin_n in range(n_czbins):
                czbin_mid = czbin_centers[czbin_n]

                # Re-center distribution at the center of the current cz bin
                offset_cz_mesh = cz_mesh + czbin_mid

                # Create interpolation object, used to fill in bin edge values
                interp = interpolate.interp1d(
                    x             = offset_cz_mesh,
                    y             = cz_pdf,
                    kind          = 'linear',
                    copy          = True,
                    bounds_error  = False,
                    fill_value    = 0
                )

                # Figure out where all bin edges lie in this re-centered
                # distribution (some bins may be repeated since bins in [-1,0]
                # and err in [-2,1]:
                #
                # 1. Find limits of mesh values..
                mmin = offset_cz_mesh[0]
                mmax = offset_cz_mesh[-1]

                # 2. Map all bin edges into the full mesh-value range,
                # reflecting about -1 and +1. If the reflected edge is outside
                # the mesh range, use the exceeded limit of the mesh range as
                # the bin edge instead.
                #
                # This maps every bin edge {i} to 3 new edges, indexed
                # new_edges[i][{0,1,2}]. Bins are formed by adjacent indices
                # and same-subindices, so what started as, e.g., bin 3 now is
                # described by (left, right) edges at
                #   (new_edges[3][0], new_edges[4][0]),
                #   (new_edges[3][1], new_edges[4][1]), and
                #   (new_edges[3][2], new_edges[4][2]).

                # NOTE / TODO: It's tempting to dynamically set the number of
                # reflections to minimize computation time, but I think it
                # breaks the code. Just set to a reasonably large number for
                # now and accept the performance penalty. ALSO: if you change
                # the parity of the number of reflections, the code below that
                # has either (wrap_n % 2 == 0) or (wrap_n+1 % 2 == 0) must be
                # swapped!!!
                n_left_reflections = 4
                n_right_reflections = 4

                new_czbin_edges = []
                for edge in czbin_edges:
                    edges_refl_left = []
                    for n in xrange(n_left_reflections):
                        edge_refl_left = reflect1d(edge, -1-(2*n))
                        if edge_refl_left < mmin:
                            edge_refl_left = mmin
                        edges_refl_left.append(edge_refl_left)
                    edges_refl_right = []
                    for n in xrange(n_right_reflections):
                        edge_refl_right = reflect1d(edge, +1+(2*n))
                        if edge_refl_right > mmax:
                            edge_refl_right = mmax
                        edges_refl_right.append(edge_refl_right)
                    # Include all left-reflected versions of this bin edge, in
                    # increasing-x order + this bin edge + right-reflected
                    # versions of this bin edge
                    new_czbin_edges.append(edges_refl_left[::-1] + [edge]
                                           + edges_refl_right)

                # Record all unique bin edges
                edge_locs = set()
                [edge_locs.update(edges) for edges in new_czbin_edges]

                # Throw away bin edges that are already in the mesh
                [edge_locs.remove(edge) for edge in list(edge_locs)
                 if edge in offset_cz_mesh]

                # Make into sorted list
                edge_locs = sorted(edge_locs)

                # Record the total area under the curve
                int_val0 = np.trapz(y=cz_pdf, x=offset_cz_mesh)

                # Insert the missing bin edge locations & pdf-values into
                # the mesh & pdf, respectively
                edge_pdfs = interp(edge_locs)
                insert_ind = np.searchsorted(offset_cz_mesh, edge_locs)
                offset_cz_mesh = np.insert(offset_cz_mesh, insert_ind,
                                           edge_locs)
                offset_cz_pdf = np.insert(cz_pdf, insert_ind, edge_pdfs)
                assert np.min(offset_cz_pdf) > -self.EPSILON

                # Check that this total of all the bins is equal to the total
                # area under the curve (i.e., check there is no overlap between
                # or gaps between bins)
                int_val = np.trapz(y=offset_cz_pdf, x=offset_cz_mesh)
                assert np.abs(int_val-1) < self.EPSILON

                # Renormalize if it's not exactly 1
                if int_val != 1.0:
                    offset_cz_pdf = offset_cz_pdf / int_val

                # Add up the area in the bin and areas that are "reflected"
                # into this bin
                new_czbin_edges = np.array(new_czbin_edges)
                czbin_areas = np.zeros(np.shape(new_czbin_edges)[0]-1)
                for wrap_n in range(np.shape(new_czbin_edges)[1]):
                    bin_edge_inds = np.searchsorted(offset_cz_mesh,
                                                    new_czbin_edges[:,wrap_n])
                    lbinds = bin_edge_inds[0:-1]
                    rbinds = bin_edge_inds[1:]
                    # Make sure indices that appear first are less than indices
                    # that appear second in a pair of bin indices
                    if (wrap_n+1) % 2 == 0:
                        bininds = zip(rbinds, lbinds)
                    else:
                        bininds = zip(lbinds, rbinds)
                    tmp_areas = []
                    for (binind_left_edge, binind_right_edge) in bininds:
                        if binind_left_edge == binind_right_edge:
                            tmp_areas.append(0)
                            continue
                        this_bin_area = np.array(np.trapz(
                            y=offset_cz_pdf[binind_left_edge:binind_right_edge+1],
                            x=offset_cz_mesh[binind_left_edge:binind_right_edge+1]
                        ))
                        tmp_areas.append(this_bin_area)
                    czbin_areas += np.array(tmp_areas)

                assert np.min(czbin_areas) > -self.EPSILON

                tot_czbin_area = np.sum(czbin_areas)
                assert tot_czbin_area < int_val + self.EPSILON

                kernel4d[ebin_n, czbin_n] = np.outer(ebin_areas, czbin_areas)
                assert (np.sum(kernel4d[ebin_n, czbin_n]) -
                        tot_ebin_area*tot_czbin_area) < self.EPSILON

            if make_plots:
                ax2 = fig1.add_subplot(212, axisbg=AXISBG)
                hbins = np.linspace(dmin-0.02*drange, dmax+0.02*drange, N_HBINS*3)
                hvals, hbins, hpatches = ax2.hist(cz_err, bins=hbins,
                                                  normed=True, **HIST_PP)
                ax2.plot(cz_mesh, cz_pdf, **DIFFUS_PP)
                fci = confInterval.MLConfInterval(x=cz_mesh,
                                                  y=cz_pdf)
                lb, ub, yopt, r = fci.findCI_lin(conf=0.995)
                axlims = ax2.axis('tight')
                ax2.set_xlim(lb, ub)
                ax2.set_ylim(0, axlims[3]*1.05)

                ylim = ax2.get_ylim()
                dy = ylim[1] - ylim[0]
                ruglines = rugplot(cz_err, y0=ylim[1], dy=-dy/40., ax=ax2, **RUG_PP)
                ruglines[-1].set_label(r'$\mathrm{Rug\,plot}$')

                x2lab = ax2.set_xlabel(
                    r'$\cos\vartheta_{\mathrm{track,reco}}-\cos\vartheta_{\nu,\mathrm{true}}$',
                    labelpad=LABELPAD
                )
                ax2.xaxis.set_label_coords(0.9, -LABELPAD)
                ax2.xaxis.grid(color=GRIDCOL)
                ax2.yaxis.grid(color=GRIDCOL)
                leg_title_tex = r'$\mathrm{Normalized}\,\cos\vartheta\mathrm{-err.\,distr.}$'
                leg = ax2.legend(loc='upper right', title=leg_title_tex,
                                 frameon=True, framealpha=0.8, fancybox=True,
                                 bbox_to_anchor=[1,0.975])
                leg.get_title().set_fontsize(16)
                leg.get_title().set_color(LEGFNTCOL)
                [t.set_color(LEGFNTCOL) for t in leg.get_texts()]
                frame = leg.get_frame()
                frame.set_facecolor(LEGFACECOL)
                frame.set_edgecolor(None)

                actual_bin_tex = ''
                if (actual_left_ebin_edge != ebin_min) or (actual_right_ebin_edge != ebin_max):
                    actual_bin_tex = r'E_{\nu,\mathrm{true}}\in [' + \
                            format(actual_left_ebin_edge, '0.2f') + r',\,' + \
                            format(actual_right_ebin_edge, '0.2f') + r'] \mapsto '
                stt = r'$\mathrm{Resolutions,\,' + flav_tex(flav) + r'\,' + \
                        int_tex(int_type) + r'}$' + '\n' + \
                        r'$' + actual_bin_tex + r'\mathrm{Bin}_{' + format(ebin_n, 'd') + r'}\equiv E_{\nu,\mathrm{true}}\in [' + format(ebin_min, '0.2f') + \
                        r',\,' + format(ebin_max, '0.2f') + r']\,\mathrm{GeV}' + \
                        r',\,N_\mathrm{events}=' + format(n_in_bin, 'd') + r'$'
                
                fig1.subplots_adjust(top=TOP, bottom=BOTTOM, left=LEFT, right=RIGHT, hspace=HSPACE)
                suptitle = fig1.suptitle(stt)
                suptitle.set_fontsize(16)
                suptitle.set_position((0.5,0.98))
                fig1.savefig(pdfpgs, format='pdf')

        check_areas = kernel4d.sum(axis=(2,3))

        assert np.max(check_areas) < 1 + self.EPSILON, str(np.max(check_areas))
        assert np.min(check_areas) > 0 - self.EPSILON, str(np.min(check_areas))

        if make_plots:
            fig2 = plt.figure(2, figsize=(8,10), dpi=90)
            fig2.clf()
            ax = fig2.add_subplot(111)
            X, Y = np.meshgrid(range(n_czbins), range(n_ebins))
            cm = mpl.cm.Paired_r
            cm.set_over((1,1,1), 1)
            cm.set_under((0,0,0), 1)
            plt.pcolor(X, Y, check_areas, vmin=0+self.EPSILON, vmax=1.0,
                       shading='faceted', cmap=cm)
            plt.colorbar(ticks=np.arange(0, 1.05, 0.05))
            ax.grid(0)
            ax.axis('tight')
            ax.set_xlabel(r'$\cos\vartheta_\mathrm{true}\mathrm{\,bin\,num.}$')
            ax.set_ylabel(r'$E_{\nu,\mathrm{true}}\mathrm{\,bin\,num.}$')
            ax.set_title(r'$\mathrm{Fract\,of\,evts\,starting\,in\,each}\,(E_{\nu,\mathrm{true}},\,\cos\vartheta_\mathrm{true})\,\mathrm{bin\,that\,reco\,in\,bounds}$'+
                 '\n'+r'$\mathrm{None\,should\,be\,>1\,(shown\,white);\,no-event\,bins\,are\,black;\,avg.}=' + format(np.mean(check_areas),'0.3f') + r'$')
            fig2.tight_layout()
            fig2.savefig(pdfpgs, format='pdf')

            check_areas2 = kernel4d.sum(axis=(0,1))
            fig3 = plt.figure(2, figsize=(8,10), dpi=90)
            fig3.clf()
            ax = fig3.add_subplot(111)
            X, Y = np.meshgrid(range(n_czbins), range(n_ebins))
            cm = mpl.cm.Paired_r
            cm.set_over((1,1,1), 1)
            cm.set_under((0,0,0), 1)
            plt.pcolor(X, Y, check_areas2, vmin=0+self.EPSILON,# vmax=1.0,
                       shading='faceted', cmap=cm)
            plt.colorbar(ticks=np.arange(0, 0.1+np.ceil(10.*np.max(check_areas2))/10., 0.05))
            ax.grid(0)
            ax.axis('tight')
            ax.set_xlabel(r'$\cos\vartheta_\mathrm{reco}\mathrm{\,bin\,num.}$')
            ax.set_ylabel(r'$E_{\nu,\mathrm{reco}}\mathrm{\,bin\,num.}$')
            ax.set_title(r'$\mathrm{Normed\,num\,events\,reconstructing\,into\,each}\,(E_{\nu,\mathrm{reco}},\,\cos\vartheta_\mathrm{reco})\,\mathrm{bin}$'+
                 '\n'+r'$\mathrm{No-event\,bins\,are\,black;\,avg.}=' + format(np.mean(check_areas2),'0.3f') + r'$')
            fig3.tight_layout()
            fig3.savefig(pdfpgs, format='pdf')

            pdfpgs.close()

        return kernel4d
Пример #43
0
#
# author: Sebastian Boeser
#         [email protected]
#
# date:   2014-01-27

import os
import sys
import numpy as np
from pisa.utils.log import logging

# Try and get the much faster simplejson if we can
try:
    import simplejson as json
    from simplejson import JSONDecodeError
    logging.trace("Using simplejson")
except ImportError:
    import json as json
    # No DecodeError in default json, dummy one
    class JSONDecodeError(ValueError):
        pass
    logging.trace("Using json")


def json_string(string):
    """Decode a json string"""
    return json.loads(string)


def from_json(filename):
    """Open a file in JSON format an parse the content"""
Пример #44
0
    def initialize_kernel(self):
        '''
        Initializes: 1) the grid_propagator class, 2) the device arrays
        that will be passed to the propagateGrid() kernel and 3) the
        kernel module.
        '''

        self.grid_prop  = GridPropagator(self.earth_model,self.czcen_fine)
        self.maxLayers  = self.grid_prop.GetMaxLayers()
        nczbins_fine    = len(self.czcen_fine)
        numLayers       = np.zeros(nczbins_fine,dtype=np.int32)
        densityInLayer  = np.zeros((nczbins_fine*self.maxLayers),dtype=self.FTYPE)
        distanceInLayer = np.zeros((nczbins_fine*self.maxLayers),dtype=self.FTYPE)

        self.grid_prop.GetNumberOfLayers(numLayers)
        self.grid_prop.GetDensityInLayer(densityInLayer)
        self.grid_prop.GetDistanceInLayer(distanceInLayer)

        # Copy all these earth info arrays to device:
        self.d_numLayers       = cuda.mem_alloc(numLayers.nbytes)
        self.d_densityInLayer  = cuda.mem_alloc(densityInLayer.nbytes)
        self.d_distanceInLayer = cuda.mem_alloc(distanceInLayer.nbytes)
        cuda.memcpy_htod(self.d_numLayers,numLayers)
        cuda.memcpy_htod(self.d_densityInLayer,densityInLayer)
        cuda.memcpy_htod(self.d_distanceInLayer,distanceInLayer)

        self.d_ecen_fine = cuda.mem_alloc(self.ecen_fine.nbytes)
        self.d_czcen_fine = cuda.mem_alloc(self.czcen_fine.nbytes)
        #cuda.memcpy_htod(self.d_ecen_fine,self.ecen_fine)
        cuda.memcpy_htod(self.d_czcen_fine,self.czcen_fine)

        ###############################################
        ###### DEFINE KERNEL
        ###############################################
        kernel_template = """
          #include "mosc.cu"
          #include "mosc3.cu"
          #include "utils.h"
          #include <stdio.h>

          __global__ void propagateGrid(double* d_smooth_maps,
                                        double d_dm[3][3], double d_mix[3][3][2],
                                        const double* const d_ecen_fine,
                                        const double* const d_czcen_fine,
                                        const int nebins_fine, const int nczbins_fine,
                                        const int nebins, const int nczbins, const int maxLayers,
                                        const int* const d_numberOfLayers,
                                        const double* const d_densityInLayer,
                                        const double* const d_distanceInLayer)
          {

            const int2 thread_2D_pos = make_int2(blockIdx.x*blockDim.x + threadIdx.x,
                                                 blockIdx.y*blockDim.y + threadIdx.y);

            // ensure we don't access memory outside of bounds!
            if(thread_2D_pos.x >= nczbins_fine || thread_2D_pos.y >= nebins_fine) return;
            const int thread_1D_pos = thread_2D_pos.y*nczbins_fine + thread_2D_pos.x;

            int eidx = thread_2D_pos.y;
            int czidx = thread_2D_pos.x;

            int kNuBar;
            if(blockIdx.z == 0) kNuBar = 1;
            else kNuBar=-1;

            bool kUseMassEstates = false;

            double TransitionMatrix[3][3][2];
            double TransitionProduct[3][3][2];
            double TransitionTemp[3][3][2];
            double RawInputPsi[3][2];
            double OutputPsi[3][2];
            double Probability[3][3];

            clear_complex_matrix( TransitionMatrix );
            clear_complex_matrix( TransitionProduct );
            clear_complex_matrix( TransitionTemp );
            clear_probabilities( Probability );

            int layers = *(d_numberOfLayers + czidx);

            double energy = d_ecen_fine[eidx];
            //double coszen = d_czcen_fine[czidx];
            for( int i=0; i<layers; i++) {
              double density = *(d_densityInLayer + czidx*maxLayers + i);
              double distance = *(d_distanceInLayer + czidx*maxLayers + i);

              get_transition_matrix( kNuBar,
                                     energy,
                                     density,
                                     distance,
                                     TransitionMatrix,
                                     0.0,
                                     d_mix,
                                     d_dm);

              if(i==0) { copy_complex_matrix(TransitionMatrix, TransitionProduct);
              } else {
                clear_complex_matrix( TransitionTemp );
                multiply_complex_matrix( TransitionMatrix, TransitionProduct, TransitionTemp );
                copy_complex_matrix( TransitionTemp, TransitionProduct );
              }
            } // end layer loop

            // loop on neutrino types, and compute probability for neutrino i:
            // We actually don't care about nutau -> anything since the flux there is zero!
            for( unsigned i=0; i<2; i++) {
              for ( unsigned j = 0; j < 3; j++ ) {
                RawInputPsi[j][0] = 0.0;
                RawInputPsi[j][1] = 0.0;
              }

              if( kUseMassEstates ) convert_from_mass_eigenstate(i+1,kNuBar,RawInputPsi,d_mix);
              else RawInputPsi[i][0] = 1.0;

              multiply_complex_matvec( TransitionProduct, RawInputPsi, OutputPsi );
              Probability[i][0] +=OutputPsi[0][0]*OutputPsi[0][0]+OutputPsi[0][1]*OutputPsi[0][1];
              Probability[i][1] +=OutputPsi[1][0]*OutputPsi[1][0]+OutputPsi[1][1]*OutputPsi[1][1];
              Probability[i][2] +=OutputPsi[2][0]*OutputPsi[2][0]+OutputPsi[2][1]*OutputPsi[2][1];

            }//end of neutrino loop

            int efctr = nebins_fine/nebins;
            int czfctr = nczbins_fine/nczbins;
            int eidx_smooth = eidx/efctr;
            int czidx_smooth = czidx/czfctr;
            double scale = double(efctr*czfctr);
            for (int i=0;i<2;i++) {
              int iMap = 0;
              if (kNuBar == 1) iMap = i*3;
              else iMap = 6 + i*3;

              for (unsigned to_nu=0; to_nu<3; to_nu++) {
                int k = (iMap+to_nu);
                double prob = Probability[i][to_nu];
                atomicAdd((d_smooth_maps + k*nczbins*nebins + eidx_smooth*nczbins +
                           czidx_smooth),prob/scale);
              }
            }

          }
        """

        include_path = os.path.expandvars('$PISA/pisa/oscillations/grid_propagator/')
        #cache_dir=os.path.expandvars('$PISA/pisa/oscillations/'+'.cache_dir')
        logging.trace("  pycuda INC PATH: %s"%include_path)
        #logging.trace("  pycuda cache_dir: %s"%cache_dir)
        logging.trace("  pycuda FLAGS: %s"%pycuda.compiler.DEFAULT_NVCC_FLAGS)
        self.module = SourceModule(kernel_template,
                                   include_dirs=[include_path],
                                   #cache_dir=cache_dir,
                                   keep=True)
        self.propGrid = self.module.get_function("propagateGrid")

        return