Exemplo n.º 1
0
    def select_params(self, selections, error_on_missing=True):
        successes = 0
        if selections is not None:
            for pipeline in self:
                try:
                    pipeline.select_params(selections, error_on_missing=True)
                except KeyError:
                    pass
                else:
                    successes += 1

            if error_on_missing and successes == 0:
                raise KeyError(
                    'None of the stages from any pipeline in this distribution'
                    ' maker has all of the selections %s available.'
                    %(selections,)
                )
        else:
            for pipeline in self:
                possible_selections = pipeline.param_selections
                if possible_selections:
                    logging.warn("Although you didn't make a parameter "
                                 "selection, the following were available: %s."
                                 " This may cause issues.",
                                 possible_selections)
Exemplo n.º 2
0
Arquivo: utils.py Projeto: lkijmj/pisa
def oversample_binning(coarse_bins, factor):
    """
    Oversample bin edges (coarse_bins) by the given factor
    """

    if is_linear(coarse_bins):
        logging.info('Oversampling linear output binning by factor %i.'
                %factor)
        fine_bins = np.linspace(coarse_bins[0], coarse_bins[-1],
                                factor*(len(coarse_bins)-1)+1)
    elif is_logarithmic(coarse_bins):
        logging.info('Oversampling logarithmic output binning by factor %i.'
                %factor)
        fine_bins = np.logspace(np.log10(coarse_bins[0]),
                                np.log10(coarse_bins[-1]),
                                factor*(len(coarse_bins)-1)+1)
    else:
        logging.warn('Irregular binning detected! Evenly oversampling '
                     'by factor %i'%factor)
        fine_bins = np.array([])
        for i, upper_edge in enumerate(coarse_bins[1:]):
            fine_bins = np.append(fine_bins,
                                  np.linspace(coarse_bins[i], upper_edge,
                                              factor, endpoint=False))

    return fine_bins
Exemplo n.º 3
0
def get_reco_arrays(data,cuts,files_per_run,reco_string=None,
                    mcnu='MCNeutrino'):
    '''
    Forms arrays of reco events for true/reco energy/coszen from the
    data_files
    '''

    logging.warn('Getting reconstructions from: %s'%reco_string)

    nfiles = len(set(data.root.I3EventHeader.col('Run')))*files_per_run
    sim_weight = ((2.0*data.root.I3MCWeightDict.col('OneWeight')[cuts]*CMSQ_TO_MSQ)/
                  (data.root.I3MCWeightDict.col('NEvents')[cuts]*nfiles))

    try:
        true_cz = np.cos(data.root.__getattr__(mcnu).col('zenith'))[cuts]
        true_egy = data.root.__getattr__(mcnu).col('energy')[cuts]
        reco_cz = np.cos(data.root.__getattr__(reco_string).col('zenith'))[cuts]
        reco_egy = data.root.__getattr__(reco_string).col('energy')[cuts]
    except:
        true_cz = np.cos(data.root.__getattribute__(mcnu).col('zenith'))[cuts]
        true_egy = data.root.__getattribute__(mcnu).col('energy')[cuts]
        reco_cz = np.cos(data.root.__getattribute__(reco_string).col('zenith'))[cuts]
        reco_egy = data.root.__getattribute__(reco_string).col('energy')[cuts]

    arrays = [true_egy,true_cz,reco_egy,reco_cz,sim_weight]

    return arrays
Exemplo n.º 4
0
    def compute_function(self):

        self.data.data_specs = self.calc_specs

        # Link containers
        if self.links is not None:
            for key, val in self.links.items():
                self.data.link_containers(key, val)

        # Format the params dict that will be passed to `Hypersurface.evaluate`
        #TODO checks on param units
        param_values = {
            sys_param_name: self.params[sys_param_name].m
            for sys_param_name in self.hypersurface_param_names
        }
        if self.interpolated:
            osc_params = {
                name: self.params[name]
                for name in self.inter_params
            }
        # Evaluate the hypersurfaces
        for container in self.data:
            if self.interpolated:
                # in the case of interpolated hypersurfaces, the actual hypersurface
                # must be generated for the given oscillation parameters first
                container_hs = self.hypersurfaces[
                    container.name].get_hypersurface(**osc_params)
            else:
                container_hs = self.hypersurfaces[container.name]
            # Get the hypersurface scale factors (reshape to 1D array)
            if self.propagate_uncertainty:
                scales, uncertainties = container_hs.evaluate(
                    param_values, return_uncertainty=True)
                scales = scales.reshape(container.size)
                uncertainties = uncertainties.reshape(container.size)
            else:
                scales = container_hs.evaluate(param_values).reshape(
                    container.size)

            # Where there are no scales (e.g. empty bins), set scale factor to 1
            empty_bins_mask = ~np.isfinite(scales)
            num_empty_bins = np.sum(empty_bins_mask)
            if num_empty_bins > 0. and not self.warning_issued:
                logging.warn("%i empty bins found in hypersurface" %
                             num_empty_bins)
                self.warning_issued = True
            scales[empty_bins_mask] = 1.
            if self.propagate_uncertainty:
                uncertainties[empty_bins_mask] = 0.

            # Add to container
            np.copyto(src=scales, dst=container["hs_scales"].get('host'))
            container["hs_scales"].mark_changed()
            if self.propagate_uncertainty:
                np.copyto(src=uncertainties,
                          dst=container["hs_scales_uncertainty"].get('host'))
                container["hs_scales_uncertainty"].mark_changed()

        # Unlink the containers again
        self.data.unlink_containers()
Exemplo n.º 5
0
def oversample_binning(coarse_bins, factor):
    """
    Oversample bin edges (coarse_bins) by the given factor
    """

    if is_linear(coarse_bins):
        logging.info('Oversampling linear output binning by factor %i.' %
                     factor)
        fine_bins = np.linspace(coarse_bins[0], coarse_bins[-1],
                                factor * (len(coarse_bins) - 1) + 1)
    elif is_logarithmic(coarse_bins):
        logging.info('Oversampling logarithmic output binning by factor %i.' %
                     factor)
        fine_bins = np.logspace(np.log10(coarse_bins[0]),
                                np.log10(coarse_bins[-1]),
                                factor * (len(coarse_bins) - 1) + 1)
    else:
        logging.warn('Irregular binning detected! Evenly oversampling '
                     'by factor %i' % factor)
        fine_bins = np.array([])
        for i, upper_edge in enumerate(coarse_bins[1:]):
            fine_bins = np.append(
                fine_bins,
                np.linspace(coarse_bins[i], upper_edge, factor,
                            endpoint=False))

    return fine_bins
Exemplo n.º 6
0
def get_reco_arrays(data, cuts, reco_string=None, mcnu='MCNeutrino'):
    '''
    Forms arrays of reco events for true/reco energy/coszen from the
    data_files
    '''

    logging.warn('Getting reconstructions from: %s' % reco_string)

    #true_egy = data.root.MCNeutrino.col('energy')[cuts]
    #true_cz = np.cos(data.root.MCNeutrino.col('zenith'))[cuts]

    try:
        true_egy = data.root.__getattr__(mcnu).col('energy')[cuts]
        true_cz = np.cos(data.root.__getattr__(mcnu).col('zenith'))[cuts]
        reco_cz = np.cos(
            data.root.__getattr__(reco_string).col('zenith'))[cuts]
        reco_egy = data.root.__getattr__(reco_string).col('energy')[cuts]
    except:
        true_egy = data.root.__getattribute__(mcnu).col('energy')[cuts]
        true_cz = np.cos(data.root.__getattribute__(mcnu).col('zenith'))[cuts]
        reco_cz = np.cos(
            data.root.__getattribute__(reco_string).col('zenith'))[cuts]
        reco_egy = data.root.__getattribute__(reco_string).col('energy')[cuts]

    arrays = [true_egy, true_cz, reco_egy, reco_cz]

    return arrays
Exemplo n.º 7
0
def get_reco_arrays(data,cuts,reco_string=None,mcnu='MCNeutrino'):
    '''
    Forms arrays of reco events for true/reco energy/coszen from the
    data_files
    '''

    logging.warn('Getting reconstructions from: %s'%reco_string)

    #true_egy = data.root.MCNeutrino.col('energy')[cuts]
    #true_cz = np.cos(data.root.MCNeutrino.col('zenith'))[cuts]

    try:
        true_egy = data.root.__getattr__(mcnu).col('energy')[cuts]
        true_cz = np.cos(data.root.__getattr__(mcnu).col('zenith'))[cuts]
        reco_cz = np.cos(data.root.__getattr__(reco_string).col('zenith'))[cuts]
        reco_egy = data.root.__getattr__(reco_string).col('energy')[cuts]
    except:
        true_egy = data.root.__getattribute__(mcnu).col('energy')[cuts]
        true_cz = np.cos(data.root.__getattribute__(mcnu).col('zenith'))[cuts]
        reco_cz = np.cos(data.root.__getattribute__(reco_string).col('zenith'))[cuts]
        reco_egy = data.root.__getattribute__(reco_string).col('energy')[cuts]

    arrays = [true_egy,true_cz,reco_egy,reco_cz]

    return arrays
Exemplo n.º 8
0
 def setup_function(self):
     '''
     Check the range of the axial masses parameter
     in the analysis. Send a warning if these are beyond +- 2sigma
     '''
     if self.params['Genie_Ma_QE'].range[0]<-2. or self.params['Genie_Ma_QE'].range[1]>2.:
         logging.warn('Genie_Ma_QE parameter bounds have been set larger than the range used to produce interpolation points ([-2.,2]). This will void the warranty...')
     if self.params['Genie_Ma_RES'].range[0]<-2. or self.params['Genie_Ma_RES'].range[1]>2.:
         logging.warn('Genie_Ma_RES parameter bounds have been set larger than the range used to produce interpolation points ([-2.,2]). This will void the warranty...')
Exemplo n.º 9
0
def check_scipy_version(minimizer_settings):
    #Workaround for old scipy versions
    import scipy
    if scipy.__version__ < '0.12.0':
        logging.warn('Detected scipy version %s < 0.12.0'%scipy.__version__)
        if 'maxiter' in minimizer_settings:
            logging.warn('Optimizer settings for \"maxiter\" will be ignored')
            minimizer_settings.pop('maxiter')
    return
Exemplo n.º 10
0
def check_scipy_version(minimizer_settings):
    #Workaround for old scipy versions
    import scipy
    if scipy.__version__ < '0.12.0':
        logging.warn('Detected scipy version %s < 0.12.0'%scipy.__version__)
        if 'maxiter' in minimizer_settings:
            logging.warn('Optimizer settings for \"maxiter\" will be ignored')
            minimizer_settings.pop('maxiter')
    return
Exemplo n.º 11
0
def test_nsi_parameterization():
    """Unit test for Hvac-like NSI parameterization."""
    alpha1, alpha2, deltansi = np.random.rand(3) * 2. * np.pi
    phi12, phi13, phi23 = np.random.rand(3) * 2 * np.pi - np.pi
    eps_max_abs = 10.0
    eps_scale, eps_prime = np.random.rand(2) * 2 * eps_max_abs - eps_max_abs
    nsi_params = VacuumLikeNSIParams()
    nsi_params.eps_scale = eps_scale
    nsi_params.eps_prime = eps_prime
    nsi_params.phi12 = phi12
    nsi_params.phi13 = phi13
    nsi_params.phi23 = phi23
    nsi_params.alpha1 = alpha1
    nsi_params.alpha2 = alpha2
    nsi_params.deltansi = deltansi

    logging.trace(
        'Checking agreement between numerical & analytical NSI matrix...')

    eps_mat_numerical = nsi_params.eps_matrix
    eps_mat_analytical = nsi_params.eps_matrix_analytical

    logging.trace("Numerical NSI matrix:\n%s" % eps_mat_numerical)
    logging.trace("Analytical expansion (by hand):\n%s" % eps_mat_analytical)
    try:
        close = np.isclose(eps_mat_numerical, eps_mat_analytical,
                           **ALLCLOSE_KW)
        if not np.all(close):
            raise ValueError(
                'Evaluating analytical expressions for NSI matrix elements'
                ' does not give agreement with numerical calculation!'
                ' Elementwise agreement:\n%s' % close)
    except ValueError as e:
        logging.warn(
            str(e) + "...\nThis is expected."
            " Going ahead with numerical calculation for now.")

    logging.trace('Now checking agreement with sympy calculation...')

    eps_mat_sympy = nsi_sympy_mat_mult(eps_scale_val=eps_scale,
                                       eps_prime_val=eps_prime,
                                       phi12_val=phi12,
                                       phi13_val=phi13,
                                       phi23_val=phi23,
                                       alpha1_val=alpha1,
                                       alpha2_val=alpha2,
                                       deltansi_val=deltansi)
    logging.trace('Numerical NSI matrix:\n%s' % eps_mat_numerical)
    logging.trace('Sympy NSI matrix:\n%s' % eps_mat_sympy)
    close = np.isclose(eps_mat_numerical, eps_mat_sympy, **ALLCLOSE_KW)
    if not np.all(close):
        raise ValueError(
            'Sympy and numerical calculations disagree! Elementwise agreement:\n'
            '%s' % close)
Exemplo n.º 12
0
    def get_reco_kernels(self, **kwargs):
        """
        Wrapper around _get_reco_kernels() that is to be used from outside,
        ensures that reco kernels are in correct shape and normalized
        """
        kernels = self._get_reco_kernels(**kwargs)
        if kernels is None:
            logging.warn("No kernels defined yet...")
            return kernels

        if self.check_kernels(kernels):
            return kernels
Exemplo n.º 13
0
    def get_reco_kernels(self, **kwargs):
        """
        Wrapper around _get_reco_kernels() that is to be used from outside,
        ensures that reco kernels are in correct shape and normalized
        """
        kernels = self._get_reco_kernels(**kwargs)
        if kernels is None:
            logging.warn("No kernels defined yet...")
            return kernels

        if self.check_kernels(kernels):
            return kernels
Exemplo n.º 14
0
Arquivo: hdf.py Projeto: mamday/pisa
 def store_recursively(fhandle, node, path=[], node_hashes={}):
     full_path = '/' + '/'.join(path)
     if isinstance(node, dict):
         try:
             fhandle.create_group(full_path)
         except ValueError:
             pass
         for key in sorted(node.iterkeys()):
             key_str = str(key)
             if not isinstance(key, str):
                 logging.warn('Stringifying key "' + key_str +
                              '"for use as name in HDF5 file')
             val = node[key]
             new_path = path + [key_str]
             store_recursively(fhandle=fhandle,
                               node=val,
                               path=new_path,
                               node_hashes=node_hashes)
     else:
         # Check for existing node
         node_hash = utils.utils.hash_obj(node)
         if node_hash in node_hashes:
             # Hardlink the matching existing dataset
             fhandle[full_path] = fhandle[node_hashes[node_hash]]
             return
         node_hashes[node_hash] = full_path
         # "Scalar datasets don't support chunk/filter options"; extra
         # checking that a sequence isn't a string, also. Shuffling is
         # a good idea since subsequent compression will generally benefit;
         # shuffling requires chunking. Compression is not done here
         # since it is slow.
         if hasattr(node, '__iter__') and not isinstance(node, basestring):
             shuffle = True
             chunks = True
         else:
             shuffle = False
             chunks = None
         fhandle.create_dataset(name=full_path,
                                data=node,
                                chunks=chunks,
                                compression=None,
                                shuffle=shuffle,
                                fletcher32=False)
Exemplo n.º 15
0
 def get_earth_model(self, model):
     """
     Check whether the specified Earth density profile has a correct
     NuCraft preface. If not, create a temporary file that does.
     """
     logging.debug('Trying to construct Earth model from "%s"'%model)
     try:
         resource_path = find_resource(model)
         self.earth_model = EarthModel(resource_path)
         logging.info('Loaded Earth model from %s'%model)
     except SyntaxError:
         #Probably the file is lacking the correct preamble
         logging.warn('Failed to construct NuCraft Earth model from '
                      '%s! Adding default preamble...'%resource_path)
         #Generate tempfile with preamble
         with open(resource_path, 'r') as infile:
             profile_lines = infile.readlines()
         preamble = ['# nuCraft Earth model with PREM density '
                      'values for use as template; keep structure '
                      'of the first six lines unmodified!\n',
                     '(0.5, 0.5, 0.5)   # tuple of (relative) '
                      'electron numbers for mantle, outer core, '
                      'and inner core\n',
                     '6371.    # radius of the Earth\n',
                     '3480.    # radius of the outer core\n',
                     '1121.5   # radius of the inner core\n',
                     '# two-columned list of radii and corresponding '
                      'matter density values in km and kg/dm^3; '
                      'add, remove or modify lines as necessary\n']
         tfile = NamedTemporaryFile()
         tfile.writelines(preamble+profile_lines)
         tfile.flush()
         try:
             self.earth_model = EarthModel(tfile.name)
         except:
             logging.error('Could not construct Earth model from %s: %s'
                           %(model, sys.exc_info()[1]))
             sys.exit(1)
         logging.info('Successfully constructed Earth model')
         tfile.close()
     except IOError:
         logging.info('Using NuCraft built-in Earth model "%s"'%model)
         self.earth_model = EarthModel(model)
Exemplo n.º 16
0
def mkdir(d, mode=0o2777, group=None, warn=True):
    """Only set mode and group for dirs created by this function"""
    d = expand(d)
    gid = None
    if group is not None:
        gid = get_gid(group)

    if warn and os.path.isdir(d):
        logging.warn('Directory already exists: "%s"', d)
        return

    dirs = path_components(d)
    fullpath = ''
    for d in dirs:
        fullpath = os.path.join(fullpath, d)
        if os.path.isdir(fullpath):
            continue
        os.mkdir(fullpath, mode)
        if gid is not None:
            os.chown(fullpath, -1, gid)
Exemplo n.º 17
0
Arquivo: hdf.py Projeto: lkijmj/pisa
 def store_recursively(fhandle, node, path=[], node_hashes={}):
     full_path = '/' + '/'.join(path)
     if isinstance(node, dict):
         try:
             fhandle.create_group(full_path)
         except ValueError:
             pass
         for key in sorted(node.iterkeys()):
             key_str = str(key)
             if not isinstance(key, str):
                 logging.warn('Stringifying key "' + key_str +
                              '"for use as name in HDF5 file')
             val = node[key]
             new_path = path + [key_str]
             store_recursively(fhandle=fhandle, node=val, path=new_path,
                               node_hashes=node_hashes)
     else:
         # Check for existing node
         node_hash = utils.utils.hash_obj(node)
         if node_hash in node_hashes:
             # Hardlink the matching existing dataset
             fhandle[full_path] = fhandle[node_hashes[node_hash]]
             return
         node_hashes[node_hash] = full_path
         # "Scalar datasets don't support chunk/filter options"; extra
         # checking that a sequence isn't a string, also. Shuffling is
         # a good idea since subsequent compression will generally benefit;
         # shuffling requires chunking. Compression is not done here
         # since it is slow.
         if hasattr(node, '__iter__') and not isinstance(node, basestring):
             shuffle = True
             chunks = True
         else:
             shuffle = False
             chunks = None
         fhandle.create_dataset(name=full_path, data=node, chunks=chunks,
                           compression=None, shuffle=shuffle,
                           fletcher32=False)
Exemplo n.º 18
0
    def single_kernel_set(self,
                          e_true,
                          cz_true,
                          e_reco,
                          cz_reco,
                          flav,
                          int_type,
                          make_plots=False,
                          out_dir=None):
        """Construct a 4D kernel set from MC events using VBWKDE.

        Given a set of MC events and each of their {energy{true, reco},
        coszen{true, reco}}, generate a 4D NumPy array that maps a 2D true-flux
        histogram onto the corresponding 2D reco-flux histogram.

        The resulting 4D array can be indexed logically using
          kernel4d[e_true_i, cz_true_j][e_reco_k, cz_reco_l]
        where the 4 indices point from a single MC-true histogram bin (i,j) to
        a single reco histogram bin (k,l).

        Binning of both MC-true and reco histograms is the same and is given by
        the values in self.ebins and self.czbins which define the bin *edges*
        (not the bin centers; hence, len(self.ebins) is one greater than the
        number of bins, etc.).

        NOTE: Actual limits in energy used to group events into a single "true"
        bin may be extended beyond the bin edges defined by self.ebins in order
        to gather enough events to successfully apply VBWKDE.

        Parameters
        ----------
        e_true : sequence
            MC-true neutrino energies, one per event
        cz_true : sequence
            MC-true neutrino coszen, one per event
        e_reco : sequence
            Reconstructed neutrino energies, one per event
        cz_reco : sequence
            Reconstructed neutrino coszen, one per event
        flav : str
        int_type : str
        make_plots : bool
        out_dir : str or None
            path to directory into which to save plots. ``None`` (default)
            saves to PWD.

        Returns
        -------
        kernel4d : 4D array of float
            Mapping from the number of events in each bin of the 2D
            MC-true-events histogram to the number of events reconstructed in
            each bin of the 2D reconstructed-events histogram. Dimensions are
              len(self.ebins)-1 x len(self.czbins)-1 x len(self.ebins)-1 x
              len(self.czbins)-1
            since ebins and czbins define the histograms' bin edges.
        """
        OVERFIT_FACTOR = 1.0

        if make_plots:
            import matplotlib as mpl
            import matplotlib.pyplot as plt
            from matplotlib.backends.backend_pdf import PdfPages
            from matplotlib.patches import Rectangle
            plt.close(1)
            plt.close(2)
            plt.close(3)

            def rugplot(a, y0, dy, ax, **kwargs):
                return ax.plot([a, a], [y0, y0 + dy], **kwargs)

            plot_fname = '_'.join(['resolutions', 'vbwkde', flav, int_type
                                   ]) + '.pdf'
            if out_dir is not None:
                plot_fname = os.path.join(out_dir, plot_fname)
            TOP = 0.925
            BOTTOM = 0.05
            RIGHT = 0.97
            LEFT = 0.07
            HSPACE = 0.12
            LABELPAD = 0.058
            AXISBG = (0.5, 0.5, 0.5)
            DARK_RED = (0.7, 0.0, 0.0)
            HIST_PP = dict(facecolor=(1, 0.5, 0.5),
                           edgecolor=DARK_RED,
                           histtype='stepfilled',
                           alpha=0.7,
                           linewidth=2.0,
                           label=r'$\mathrm{Histogram}$')
            N_HBINS = 25
            DIFFUS_PP = dict(color=(0.0, 0.0, 0.0),
                             linestyle='-',
                             marker=None,
                             alpha=0.6,
                             linewidth=2.0,
                             label=r'$\mathrm{VBWKDE}$')
            RUG_PP = dict(color=(1.0, 1.0, 1.0), linewidth=0.4, alpha=0.5)
            RUG_LAB = r'$\mathrm{Rug\,plot}$'
            LEGFNTCOL = (1, 1, 1)
            LEGFACECOL = (0.2, 0.2, 0.2)
            GRIDCOL = (0.4, 0.4, 0.4)
            pdfpgs = PdfPages(plot_fname)

        assert np.min(np.diff(self.ebins)) > 0, \
            "Energy bin edges not monotonically increasing."
        assert np.min(np.diff(self.czbins)) > 0, \
            "coszen bin edges not monotonically increasing."

        # NOTE: below defines bin centers on linear scale; other logic
        # in this method assumes this to be the case, so
        # **DO NOT USE** utils.utils.get_bin_centers in this method, which
        # may return logarithmically-defined centers instead.

        ebin_edges = np.array(self.ebins)
        left_ebin_edges = ebin_edges[0:-1]
        right_ebin_edges = ebin_edges[1:]
        ebin_centers = (left_ebin_edges + right_ebin_edges) / 2.0
        ebin_range = ebin_edges[-1] - ebin_edges[0]
        n_ebins = len(ebin_centers)

        czbin_edges = np.array(self.czbins)
        left_czbin_edges = czbin_edges[0:-1]
        right_czbin_edges = czbin_edges[1:]
        czbin_centers = (left_czbin_edges + right_czbin_edges) / 2.0
        n_czbins = len(czbin_centers)

        n_events = len(e_true)

        if self.MIN_NUM_EVENTS > n_events:
            self.MIN_NUM_EVENTS = n_events
        if self.TGT_NUM_EVENTS > n_events:
            self.TGT_NUM_EVENTS = n_events

        # Object with which to store the 4D kernels: np 4D array
        kernel4d = np.zeros((n_ebins, n_czbins, n_ebins, n_czbins))

        # Object with which to store the 2D "aggregate_map": the total number
        # of events reconstructed into a given (E, CZ) bin, used for sanity
        # checks
        aggregate_map = np.zeros((n_ebins, n_czbins))
        for ebin_n in range(n_ebins):
            ebin_min = left_ebin_edges[ebin_n]
            ebin_max = right_ebin_edges[ebin_n]
            ebin_mid = (ebin_min + ebin_max) / 2.0
            ebin_wid = ebin_max - ebin_min

            logging.debug('Processing true-energy bin_n=' +
                          format(ebin_n, 'd') + ' of ' +
                          format(n_ebins - 1, 'd') + ', E_{nu,true} in ' +
                          '[' + format(ebin_min, '0.3f') + ', ' +
                          format(ebin_max, '0.3f') + '] ...')

            # Absolute distance from these events' re-centered reco energies to
            # the center of this energy bin; sort in ascending-distance order
            abs_enu_dist = np.abs(e_true - ebin_mid)
            sorted_abs_enu_dist = np.sort(abs_enu_dist)

            # Grab the distance the number-"TGT_NUM_EVENTS" event is from the
            # bin center
            tgt_thresh_enu_dist = sorted_abs_enu_dist[self.TGT_NUM_EVENTS - 1]

            # Grab the distance the number-"MIN_NUM_EVENTS" event is from the
            # bin center
            min_thresh_enu_dist = sorted_abs_enu_dist[self.MIN_NUM_EVENTS - 1]

            # TODO: revisit the below algorithm with proper testing

            # Make threshold distance (which is half the total width) no more
            # than 4x the true-energy-bin width in order to capture the
            # "target" number of points (TGT_NUM_EVENTS) but no less than half
            # the bin width (i.e., the bin should be at least be as wide as the
            # pre-defined bin width).
            #
            # HOWEVER, allow the threshold distance (bin half-width) to expand
            # to as much as 4x the original bin full-width in order to capture
            # the "minimum" number of points (MIN_NUM_EVENTS).
            thresh_enu_dist = \
                    max(min(max(tgt_thresh_enu_dist, ebin_wid/2),
                            4*ebin_wid),
                        min_thresh_enu_dist)

            # Grab all events within the threshold distance
            in_ebin_ind = np.where(abs_enu_dist <= thresh_enu_dist)[0]
            #print '** IN EBIN FIRST, LAST ENERGY:', e_reco[in_ebin_ind[0]], e_reco[in_ebin_ind[-1]]
            n_in_bin = len(in_ebin_ind)

            # Record lowest/highest energies that are included in the bin
            actual_left_ebin_edge = min(
                ebin_min, min(e_true[in_ebin_ind]
                              ))  #max(min(ebins), ebin_mid-thresh_enu_dist)
            actual_right_ebin_edge = max(ebin_max, max(
                e_true[in_ebin_ind]))  #(max(ebins), ebin_mid+thresh_enu_dist)

            # Extract just the neutrino-energy/coszen error columns' values for
            # succinctness
            enu_err = e_reco[in_ebin_ind] - e_true[in_ebin_ind]
            cz_err = cz_reco[in_ebin_ind] - cz_true[in_ebin_ind]

            #==================================================================
            # Neutrino energy resolutions
            #==================================================================
            dmin = min(enu_err)
            dmax = max(enu_err)
            drange = dmax - dmin

            e_lowerlim = min(self.ENERGY_RANGE[0] - ebin_mid * 1.5,
                             dmin - drange * 0.5)
            e_upperlim = max((np.max(ebin_edges) - ebin_mid) * 1.5,
                             dmax + drange * 0.5)
            egy_kde_lims = np.array([e_lowerlim, e_upperlim])

            # Use at least min_num_pts points and at most the next-highest
            # integer-power-of-two that allows for at least 10 points in the
            # smallest energy bin
            min_num_pts = 2**12
            min_bin_width = np.min(ebin_edges[1:] - ebin_edges[:-1])
            min_pts_smallest_bin = 5.0
            kde_range = np.diff(egy_kde_lims)
            num_pts0 = kde_range / (min_bin_width / min_pts_smallest_bin)
            kde_num_pts = int(max(min_num_pts, 2**np.ceil(np.log2(num_pts0))))
            logging.debug('  N_evts=' + str(n_in_bin) + ', taken from [' +
                          format(actual_left_ebin_edge, '0.3f') + ', ' +
                          format(actual_right_ebin_edge, '0.3f') + ']' +
                          ', VBWKDE lims=' + str(egy_kde_lims) +
                          ', VBWKDE_N: ' + str(kde_num_pts))

            # Compute variable-bandwidth KDEs
            enu_bw, enu_mesh, enu_pdf = kde.vbw_kde(
                data=enu_err,
                overfit_factor=OVERFIT_FACTOR,
                MIN=egy_kde_lims[0],
                MAX=egy_kde_lims[1],
                N=kde_num_pts)

            if np.min(enu_pdf) < 0:
                # Only issue warning if the most-negative value is negative
                # beyond specified acceptable-numerical-precision threshold
                # (EPSILON)
                if np.min(enu_pdf) <= -self.EPSILON:
                    logging.warn("np.min(enu_pdf) < 0: Minimum value is " +
                                 str(np.min(enu_pdf)) +
                                 "; forcing all negative values to 0.")
                # Otherwise, just quietly clip any negative values at 0
                enu_pdf = np.clip(a=enu_pdf, a_min=0, a_max=np.inf)

            assert np.min(enu_pdf) >= 0, str(np.min(enu_pdf))

            # Re-center distribution at the center of the energy bin for which
            # errors were computed
            offset_enu_mesh = enu_mesh + ebin_mid
            offset_enu_pdf = enu_pdf

            # Get reference area under the PDF, for checking after interpolated
            # values are added.
            #
            # NOTE There should be NO normalization because any events lost due
            # to cutting off tails outside the binned region are actually going
            # to be lost, and so should penalize the total area.
            int_val0 = np.trapz(y=offset_enu_pdf, x=offset_enu_mesh)

            # Create linear interpolator for the PDF
            interp = interpolate.interp1d(x=offset_enu_mesh,
                                          y=offset_enu_pdf,
                                          kind='linear',
                                          copy=True,
                                          bounds_error=True,
                                          fill_value=np.nan)

            # Insert all bin edges' exact locations into the mesh (For accurate
            # accounting of area in each bin, must include values out to bin
            # edges)
            edge_locs = [
                be
                for be in np.concatenate((left_ebin_edges, right_ebin_edges))
                if not (be in offset_enu_mesh)
            ]
            edge_locs.sort()
            edge_pdfs = interp(edge_locs)
            insert_ind = np.searchsorted(offset_enu_mesh, edge_locs)
            offset_enu_mesh = np.insert(offset_enu_mesh, insert_ind, edge_locs)
            offset_enu_pdf = np.insert(offset_enu_pdf, insert_ind, edge_pdfs)

            int_val = np.trapz(y=offset_enu_pdf, x=offset_enu_mesh)

            assert np.abs(int_val - int_val0) < self.EPSILON

            # Chop off distribution at extrema of energy bins
            valid_ind = np.where((offset_enu_mesh >= np.min(ebin_edges))
                                 & (offset_enu_mesh <= np.max(ebin_edges)))[0]
            offset_enu_mesh = offset_enu_mesh[valid_ind]
            offset_enu_pdf = offset_enu_pdf[valid_ind]

            # Check that there are no negative density values (after inserts)
            assert np.min(offset_enu_pdf) > 0-self.EPSILON, \
                str(np.min(offset_enu_pdf))

            # Record the integrated area after removing parts outside binned
            # range
            tot_ebin_area0 = np.trapz(y=offset_enu_pdf, x=offset_enu_mesh)

            # Check that it integrates to <= 1, sanity check
            assert tot_ebin_area0 < 1 + self.EPSILON, str(tot_ebin_area0)

            # Identify indices encapsulating the defined energy bins' ranges,
            # and find the area of each bin
            lbinds = np.searchsorted(offset_enu_mesh, left_ebin_edges)
            rbinds = np.searchsorted(offset_enu_mesh, right_ebin_edges)
            bininds = zip(lbinds, rbinds)
            ebin_areas = [
                np.trapz(y=offset_enu_pdf[l:r + 1], x=offset_enu_mesh[l:r + 1])
                for (l, r) in bininds
            ]

            # Check that no bins have negative areas
            assert np.min(ebin_areas) >= 0

            # Sum the individual bins' areas
            tot_ebin_area = np.sum(ebin_areas)

            # Check that this total of all the bins is equal to the total area
            # under the curve (i.e., make sure there is no overlap or gaps
            # between bins)
            assert np.abs(tot_ebin_area-tot_ebin_area0) < self.EPSILON, \
                    'tot_ebin_area=' + str(tot_ebin_area) + \
                    ' should equal tot_ebin_area0=' + str(tot_ebin_area0)

            if make_plots:
                fig1 = plt.figure(1, figsize=(8, 10), dpi=90)
                fig1.clf()
                ax1 = fig1.add_subplot(211, axisbg=AXISBG)

                # Retrieve region where VBWKDE lives
                ml_ci = confInterval.MLConfInterval(x=enu_mesh, y=enu_pdf)
                #for conf in np.logspace(np.log10(0.999), np.log10(0.95), 50):
                #    try:
                #        lb, ub, yopt, r = ml_ci.findCI_lin(conf=conf)
                #    except:
                #        pass
                #    else:
                #        break
                #xlims = (min(-ebin_mid*1.5, lb),
                #         max(min(ub, 6*ebin_mid),2*ebin_mid))
                lb, ub, yopt, r = ml_ci.findCI_lin(conf=0.98)
                xlims = (
                    lb,  #min(-ebin_mid*1.5, lb),
                    max(min(ub, 6 * ebin_mid), 2 * ebin_wid))

                #xlims = (
                #    -ebin_wid*1.5,
                #    ebin_wid*1.5
                #)
                #    min(ebin_mid*2, ebin_edges[-1]+(ebin_edges[-1]-ebin_edges[0])*0.1)
                #)

                # Histogram of events' reco error
                hbins = np.linspace(
                    dmin - 0.02 * drange, dmax + 0.02 * drange,
                    N_HBINS * np.round(drange / ebin_centers[ebin_n]))
                hvals, hbins, hpatches = ax1.hist(enu_err,
                                                  bins=hbins,
                                                  normed=True,
                                                  **HIST_PP)

                # Plot the VBWKDE
                ax1.plot(enu_mesh, enu_pdf, **DIFFUS_PP)
                axlims = ax1.axis('tight')
                ax1.set_xlim(xlims)
                ymax = axlims[3] * 1.05
                ax1.set_ylim(0, ymax)

                # Grey-out regions outside binned region, so it's clear what
                # part of tail(s) will be thrown away
                width = -ebin_mid + ebin_edges[0] - xlims[0]
                unbinned_region_tex = r'$\mathrm{Unbinned}$'
                if width > 0:
                    ax1.add_patch(
                        Rectangle(
                            (xlims[0], 0),
                            width,
                            ymax,  #zorder=-1,
                            alpha=0.30,
                            facecolor=(0.0, 0.0, 0.0),
                            fill=True,
                            ec='none'))
                    ax1.text(xlims[0] + (xlims[1] - xlims[0]) / 40.,
                             ymax / 10.,
                             unbinned_region_tex,
                             fontsize=14,
                             ha='left',
                             va='bottom',
                             rotation=90,
                             color='k')

                width = xlims[1] - (ebin_edges[-1] - ebin_mid)
                if width > 0:
                    ax1.add_patch(
                        Rectangle((xlims[1] - width, 0),
                                  width,
                                  ymax,
                                  alpha=0.30,
                                  facecolor=(0, 0, 0),
                                  fill=True,
                                  ec='none'))
                    ax1.text(xlims[1] - (xlims[1] - xlims[0]) / 40.,
                             ymax / 10.,
                             unbinned_region_tex,
                             fontsize=14,
                             ha='right',
                             va='bottom',
                             rotation=90,
                             color='k')

                # Rug plot of events' reco energy errors
                ylim = ax1.get_ylim()
                dy = ylim[1] - ylim[0]
                ruglines = rugplot(enu_err,
                                   y0=ylim[1],
                                   dy=-dy / 40.,
                                   ax=ax1,
                                   **RUG_PP)
                ruglines[-1].set_label(RUG_LAB)

                # Legend
                leg_title_tex = r'$\mathrm{Normalized}\,E_\nu\mathrm{-err.\,distr.}$'
                x1lab = ax1.set_xlabel(
                    r'$E_{\nu,\mathrm{reco}}-E_{\nu,\mathrm{true}}\;' +
                    r'(\mathrm{GeV})$',
                    labelpad=LABELPAD)
                leg = ax1.legend(loc='upper right',
                                 title=leg_title_tex,
                                 frameon=True,
                                 framealpha=0.8,
                                 fancybox=True,
                                 bbox_to_anchor=[1, 0.975])

                # Other plot details
                ax1.xaxis.set_label_coords(0.9, -LABELPAD)
                ax1.xaxis.grid(color=GRIDCOL)
                ax1.yaxis.grid(color=GRIDCOL)
                leg.get_title().set_fontsize(16)
                leg.get_title().set_color(LEGFNTCOL)
                [t.set_color(LEGFNTCOL) for t in leg.get_texts()]
                frame = leg.get_frame()
                frame.set_facecolor(LEGFACECOL)
                frame.set_edgecolor(None)

            #==================================================================
            # Neutrino coszen resolution for events in this energy bin
            #==================================================================
            dmin = min(cz_err)
            dmax = max(cz_err)
            drange = dmax - dmin

            # NOTE the limits are 1 less than / 1 greater than the limits that
            # the error will actually take on, so as to allow for any smooth
            # roll-off at edges of data. The calculation of areas below
            # captures all of the area, though, by reflecting bins defined in
            # [-1, 1] about the points -1 and 1, thereby capturing any
            # densities in the range [-3, +3]. This is not necessarily
            # accurate, but it's better than throwing that info out entirely.
            #
            # NOTE also that since reco events as of now are only in range -1
            # to 0, though, that there are "gaps" in the capture range, but
            # this is due to densities being in the upper-hemisphere which we
            # are intentionally ignoring, rather than the code here not taking
            # them into account. Normalization is based upon *all* events,
            # whether or not they fall within a bin specified above.

            # Number of points in the mesh used for VBWKDE; must be large
            # enough to capture fast changes in the data but the larger the
            # number, the longer it takes to compute the densities at all the
            # points. Here, just choosing a fixed number regardless of the data
            # or binning
            N_cz_mesh = 2**10

            # Data range for VBWKDE to consider
            cz_kde_min = -3
            cz_kde_max = +2

            cz_kde_failed = False
            previous_fail = False
            for n in xrange(3):
                # TODO: only catch specific exception
                try:
                    cz_bw, cz_mesh, cz_pdf = kde.vbw_kde(
                        data=cz_err,
                        overfit_factor=OVERFIT_FACTOR,
                        MIN=cz_kde_min,
                        MAX=cz_kde_max,
                        N=N_cz_mesh)
                except:
                    cz_kde_failed = True
                    if n == 0:
                        logging.trace('(cz vbwkde ')
                    logging.trace('fail, ')
                    # If failure occurred in vbw_kde, expand the data range it
                    # takes into account; this usually helps
                    cz_kde_min -= 1
                    cz_kde_max += 1
                else:
                    if cz_kde_failed:
                        previous_fail = True
                        logging.trace('success!')
                    cz_kde_failed = False
                finally:
                    if previous_fail:
                        logging.trace(')')
                    previous_fail = False
                    if not cz_kde_failed:
                        break

            if cz_kde_failed:
                logging.warn('Failed to fit VBWKDE!')
                continue

            if np.min(cz_pdf) < 0:
                logging.warn("np.min(cz_pdf) < 0: Minimum value is " +
                             str(np.min(cz_pdf)) +
                             "; forcing all negative values to 0.")
                np.clip(a=cz_mesh, a_min=0, a_max=np.inf)

            assert np.min(cz_pdf) >= -self.EPSILON, \
                str(np.min(cz_pdf))

            # TODO: test and/or visualize the shifting & re-binning process
            for czbin_n in range(n_czbins):
                czbin_mid = czbin_centers[czbin_n]

                # Re-center distribution at the center of the current cz bin
                offset_cz_mesh = cz_mesh + czbin_mid

                # Create interpolation object, used to fill in bin edge values
                interp = interpolate.interp1d(x=offset_cz_mesh,
                                              y=cz_pdf,
                                              kind='linear',
                                              copy=True,
                                              bounds_error=False,
                                              fill_value=0)

                # Figure out where all bin edges lie in this re-centered
                # distribution (some bins may be repeated since bins in [-1,0]
                # and err in [-2,1]:
                #
                # 1. Find limits of mesh values..
                mmin = offset_cz_mesh[0]
                mmax = offset_cz_mesh[-1]

                # 2. Map all bin edges into the full mesh-value range,
                # reflecting about -1 and +1. If the reflected edge is outside
                # the mesh range, use the exceeded limit of the mesh range as
                # the bin edge instead.
                #
                # This maps every bin edge {i} to 3 new edges, indexed
                # new_edges[i][{0,1,2}]. Bins are formed by adjacent indices
                # and same-subindices, so what started as, e.g., bin 3 now is
                # described by (left, right) edges at
                #   (new_edges[3][0], new_edges[4][0]),
                #   (new_edges[3][1], new_edges[4][1]), and
                #   (new_edges[3][2], new_edges[4][2]).

                # NOTE / TODO: It's tempting to dynamically set the number of
                # reflections to minimize computation time, but I think it
                # breaks the code. Just set to a reasonably large number for
                # now and accept the performance penalty. ALSO: if you change
                # the parity of the number of reflections, the code below that
                # has either (wrap_n % 2 == 0) or (wrap_n+1 % 2 == 0) must be
                # swapped!!!
                n_left_reflections = 4
                n_right_reflections = 4

                new_czbin_edges = []
                for edge in czbin_edges:
                    edges_refl_left = []
                    for n in xrange(n_left_reflections):
                        edge_refl_left = reflect1d(edge, -1 - (2 * n))
                        if edge_refl_left < mmin:
                            edge_refl_left = mmin
                        edges_refl_left.append(edge_refl_left)
                    edges_refl_right = []
                    for n in xrange(n_right_reflections):
                        edge_refl_right = reflect1d(edge, +1 + (2 * n))
                        if edge_refl_right > mmax:
                            edge_refl_right = mmax
                        edges_refl_right.append(edge_refl_right)
                    # Include all left-reflected versions of this bin edge, in
                    # increasing-x order + this bin edge + right-reflected
                    # versions of this bin edge
                    new_czbin_edges.append(edges_refl_left[::-1] + [edge] +
                                           edges_refl_right)

                # Record all unique bin edges
                edge_locs = set()
                [edge_locs.update(edges) for edges in new_czbin_edges]

                # Throw away bin edges that are already in the mesh
                [
                    edge_locs.remove(edge) for edge in list(edge_locs)
                    if edge in offset_cz_mesh
                ]

                # Make into sorted list
                edge_locs = sorted(edge_locs)

                # Record the total area under the curve
                int_val0 = np.trapz(y=cz_pdf, x=offset_cz_mesh)

                # Insert the missing bin edge locations & pdf-values into
                # the mesh & pdf, respectively
                edge_pdfs = interp(edge_locs)
                insert_ind = np.searchsorted(offset_cz_mesh, edge_locs)
                offset_cz_mesh = np.insert(offset_cz_mesh, insert_ind,
                                           edge_locs)
                offset_cz_pdf = np.insert(cz_pdf, insert_ind, edge_pdfs)
                assert np.min(offset_cz_pdf) > -self.EPSILON

                # Check that this total of all the bins is equal to the total
                # area under the curve (i.e., check there is no overlap between
                # or gaps between bins)
                int_val = np.trapz(y=offset_cz_pdf, x=offset_cz_mesh)
                assert np.abs(int_val - 1) < self.EPSILON

                # Renormalize if it's not exactly 1
                if int_val != 1.0:
                    offset_cz_pdf = offset_cz_pdf / int_val

                # Add up the area in the bin and areas that are "reflected"
                # into this bin
                new_czbin_edges = np.array(new_czbin_edges)
                czbin_areas = np.zeros(np.shape(new_czbin_edges)[0] - 1)
                for wrap_n in range(np.shape(new_czbin_edges)[1]):
                    bin_edge_inds = np.searchsorted(offset_cz_mesh,
                                                    new_czbin_edges[:, wrap_n])
                    lbinds = bin_edge_inds[0:-1]
                    rbinds = bin_edge_inds[1:]
                    # Make sure indices that appear first are less than indices
                    # that appear second in a pair of bin indices
                    if (wrap_n + 1) % 2 == 0:
                        bininds = zip(rbinds, lbinds)
                    else:
                        bininds = zip(lbinds, rbinds)
                    tmp_areas = []
                    for (binind_left_edge, binind_right_edge) in bininds:
                        if binind_left_edge == binind_right_edge:
                            tmp_areas.append(0)
                            continue
                        this_bin_area = np.array(
                            np.trapz(
                                y=offset_cz_pdf[
                                    binind_left_edge:binind_right_edge + 1],
                                x=offset_cz_mesh[
                                    binind_left_edge:binind_right_edge + 1]))
                        tmp_areas.append(this_bin_area)
                    czbin_areas += np.array(tmp_areas)

                assert np.min(czbin_areas) > -self.EPSILON

                tot_czbin_area = np.sum(czbin_areas)
                assert tot_czbin_area < int_val + self.EPSILON

                kernel4d[ebin_n, czbin_n] = np.outer(ebin_areas, czbin_areas)
                assert (np.sum(kernel4d[ebin_n, czbin_n]) -
                        tot_ebin_area * tot_czbin_area) < self.EPSILON

            if make_plots:
                ax2 = fig1.add_subplot(212, axisbg=AXISBG)
                hbins = np.linspace(dmin - 0.02 * drange, dmax + 0.02 * drange,
                                    N_HBINS * 3)
                hvals, hbins, hpatches = ax2.hist(cz_err,
                                                  bins=hbins,
                                                  normed=True,
                                                  **HIST_PP)
                ax2.plot(cz_mesh, cz_pdf, **DIFFUS_PP)
                fci = confInterval.MLConfInterval(x=cz_mesh, y=cz_pdf)
                lb, ub, yopt, r = fci.findCI_lin(conf=0.995)
                axlims = ax2.axis('tight')
                ax2.set_xlim(lb, ub)
                ax2.set_ylim(0, axlims[3] * 1.05)

                ylim = ax2.get_ylim()
                dy = ylim[1] - ylim[0]
                ruglines = rugplot(cz_err,
                                   y0=ylim[1],
                                   dy=-dy / 40.,
                                   ax=ax2,
                                   **RUG_PP)
                ruglines[-1].set_label(r'$\mathrm{Rug\,plot}$')

                x2lab = ax2.set_xlabel(
                    r'$\cos\vartheta_{\mathrm{track,reco}}-\cos\vartheta_{\nu,\mathrm{true}}$',
                    labelpad=LABELPAD)
                ax2.xaxis.set_label_coords(0.9, -LABELPAD)
                ax2.xaxis.grid(color=GRIDCOL)
                ax2.yaxis.grid(color=GRIDCOL)
                leg_title_tex = r'$\mathrm{Normalized}\,\cos\vartheta\mathrm{-err.\,distr.}$'
                leg = ax2.legend(loc='upper right',
                                 title=leg_title_tex,
                                 frameon=True,
                                 framealpha=0.8,
                                 fancybox=True,
                                 bbox_to_anchor=[1, 0.975])
                leg.get_title().set_fontsize(16)
                leg.get_title().set_color(LEGFNTCOL)
                [t.set_color(LEGFNTCOL) for t in leg.get_texts()]
                frame = leg.get_frame()
                frame.set_facecolor(LEGFACECOL)
                frame.set_edgecolor(None)

                actual_bin_tex = ''
                if (actual_left_ebin_edge !=
                        ebin_min) or (actual_right_ebin_edge != ebin_max):
                    actual_bin_tex = r'E_{\nu,\mathrm{true}}\in [' + \
                            format(actual_left_ebin_edge, '0.2f') + r',\,' + \
                            format(actual_right_ebin_edge, '0.2f') + r'] \mapsto '
                stt = r'$\mathrm{Resolutions,\,' + flav_tex(flav) + r'\,' + \
                        int_tex(int_type) + r'}$' + '\n' + \
                        r'$' + actual_bin_tex + r'\mathrm{Bin}_{' + format(ebin_n, 'd') + r'}\equiv E_{\nu,\mathrm{true}}\in [' + format(ebin_min, '0.2f') + \
                        r',\,' + format(ebin_max, '0.2f') + r']\,\mathrm{GeV}' + \
                        r',\,N_\mathrm{events}=' + format(n_in_bin, 'd') + r'$'

                fig1.subplots_adjust(top=TOP,
                                     bottom=BOTTOM,
                                     left=LEFT,
                                     right=RIGHT,
                                     hspace=HSPACE)
                suptitle = fig1.suptitle(stt)
                suptitle.set_fontsize(16)
                suptitle.set_position((0.5, 0.98))
                fig1.savefig(pdfpgs, format='pdf')

        check_areas = kernel4d.sum(axis=(2, 3))

        assert np.max(check_areas) < 1 + self.EPSILON, str(np.max(check_areas))
        assert np.min(check_areas) > 0 - self.EPSILON, str(np.min(check_areas))

        if make_plots:
            fig2 = plt.figure(2, figsize=(8, 10), dpi=90)
            fig2.clf()
            ax = fig2.add_subplot(111)
            X, Y = np.meshgrid(range(n_czbins), range(n_ebins))
            cm = mpl.cm.Paired_r
            cm.set_over((1, 1, 1), 1)
            cm.set_under((0, 0, 0), 1)
            plt.pcolor(X,
                       Y,
                       check_areas,
                       vmin=0 + self.EPSILON,
                       vmax=1.0,
                       shading='faceted',
                       cmap=cm)
            plt.colorbar(ticks=np.arange(0, 1.05, 0.05))
            ax.grid(0)
            ax.axis('tight')
            ax.set_xlabel(r'$\cos\vartheta_\mathrm{true}\mathrm{\,bin\,num.}$')
            ax.set_ylabel(r'$E_{\nu,\mathrm{true}}\mathrm{\,bin\,num.}$')
            ax.set_title(
                r'$\mathrm{Fract\,of\,evts\,starting\,in\,each}\,(E_{\nu,\mathrm{true}},\,\cos\vartheta_\mathrm{true})\,\mathrm{bin\,that\,reco\,in\,bounds}$'
                + '\n' +
                r'$\mathrm{None\,should\,be\,>1\,(shown\,white);\,no-event\,bins\,are\,black;\,avg.}='
                + format(np.mean(check_areas), '0.3f') + r'$')
            fig2.tight_layout()
            fig2.savefig(pdfpgs, format='pdf')

            check_areas2 = kernel4d.sum(axis=(0, 1))
            fig3 = plt.figure(2, figsize=(8, 10), dpi=90)
            fig3.clf()
            ax = fig3.add_subplot(111)
            X, Y = np.meshgrid(range(n_czbins), range(n_ebins))
            cm = mpl.cm.Paired_r
            cm.set_over((1, 1, 1), 1)
            cm.set_under((0, 0, 0), 1)
            plt.pcolor(
                X,
                Y,
                check_areas2,
                vmin=0 + self.EPSILON,  # vmax=1.0,
                shading='faceted',
                cmap=cm)
            plt.colorbar(ticks=np.arange(
                0, 0.1 + np.ceil(10. * np.max(check_areas2)) / 10., 0.05))
            ax.grid(0)
            ax.axis('tight')
            ax.set_xlabel(r'$\cos\vartheta_\mathrm{reco}\mathrm{\,bin\,num.}$')
            ax.set_ylabel(r'$E_{\nu,\mathrm{reco}}\mathrm{\,bin\,num.}$')
            ax.set_title(
                r'$\mathrm{Normed\,num\,events\,reconstructing\,into\,each}\,(E_{\nu,\mathrm{reco}},\,\cos\vartheta_\mathrm{reco})\,\mathrm{bin}$'
                + '\n' + r'$\mathrm{No-event\,bins\,are\,black;\,avg.}=' +
                format(np.mean(check_areas2), '0.3f') + r'$')
            fig3.tight_layout()
            fig3.savefig(pdfpgs, format='pdf')

            pdfpgs.close()

        return kernel4d
Exemplo n.º 19
0
    def _compute_nominal_transforms(self):
        """Compute new PID transforms."""
        logging.debug('Updating pid.hist PID histograms...')

        # TODO(shivesh): As of now, events do not have units as far as PISA
        # is concerned

        self.load_events(self.params.pid_events)
        self.cut_events(self.params.transform_events_keep_criteria)

        # TODO: in future, the events file will not have these combined
        # already, and it should be done here (or in a nominal transform,
        # etc.). See below about taking this step when we move to directly
        # using the I3-HDF5 files.
        #events_file_combined_flavints = tuple([
        #    NuFlavIntGroup(s)
        #    for s in self.events.metadata['flavints_joined']
        #])

        # TODO: take events object as an input instead of as a param that
        # specifies a file? Or handle both cases?

        pid_spec = OrderedDict(eval(self.params.pid_spec.value))
        if set(pid_spec.keys()) != set(self.output_channels):
            msg = 'PID criteria from `pid_spec` {0} does not match {1}'
            raise ValueError(msg.format(pid_spec.keys(), self.output_channels))

        # TODO: add importance weights, error computation

        logging.debug("Separating events by PID...")
        separated_events = OrderedDict()
        for sig in self.output_channels:
            this_sig_events = self.events.applyCut(pid_spec[sig])
            separated_events[sig] = this_sig_events

        # Derive transforms by combining flavints that behave similarly, but
        # apply the derived transforms to the input flavints separately
        # (leaving combining these together to later)
        transforms = []
        for flavint_group in self.transform_groups:
            logging.debug("Working on %s PID", flavint_group)

            repr_flavint = flavint_group[0]

            # TODO(shivesh): errors
            # TODO(shivesh): total histo check?
            sig_histograms = {}
            total_histo = np.zeros(self.output_binning.shape)
            for repr_flavint in flavint_group:
                histo = self.events.histogram(
                    kinds=repr_flavint,
                    binning=self.output_binning,
                    weights_col=self.params.pid_weights_name.value,
                    errors=None).hist
                total_histo += histo

            for sig in self.output_channels:
                sig_histograms[sig] = np.zeros(self.output_binning.shape)
                for repr_flavint in flavint_group:
                    this_sig_histo = separated_events[sig].histogram(
                        kinds=repr_flavint,
                        binning=self.output_binning,
                        weights_col=self.params.pid_weights_name.value,
                        errors=None).hist
                    sig_histograms[sig] += this_sig_histo

            for sig in self.output_channels:
                with np.errstate(divide='ignore', invalid='ignore'):
                    xform_array = sig_histograms[sig] / total_histo

                num_invalid = np.sum(~np.isfinite(xform_array))
                if num_invalid > 0:
                    logging.warn(
                        'Group "%s", PID signature "%s" has %d bins with no'
                        ' events (and hence the ability to separate events'
                        ' by PID cannot be ascertained). These are being'
                        ' masked off from any further computations.',
                        flavint_group, sig, num_invalid)
                    # TODO: this caused buggy event propagation for some
                    # reason; check and re-introduced the masked array idea
                    # when this is fixed. For now, replicating the behavior
                    # from PISA 2.
                    #xform_array = np.ma.masked_invalid(xform_array)

                # Double check that no NaN remain
                #assert not np.any(np.isnan(xform_array))

                # Copy this transform to use for each input in the group
                for input_name in self.input_names:
                    if input_name not in flavint_group:
                        continue
                    xform = BinnedTensorTransform(
                        input_names=input_name,
                        output_name=self.suffix_channel(input_name, sig),
                        input_binning=self.input_binning,
                        output_binning=self.output_binning,
                        xform_array=xform_array)
                    transforms.append(xform)

        return TransformSet(transforms=transforms)
Exemplo n.º 20
0
    'nue': {'filename': args.nue,'nfiles': args.nfiles_nue},
    'numu': {'filename': args.numu,'nfiles': args.nfiles_numu},
    'nutau': {'filename': args.nutau,'nfiles': args.nfiles_nutau}}

logging.info("input files:\n%s"%data_files)

# Ensure overwrite of existing filename...
outfilename = args.outfile
fh = h5py.File(outfilename,'w')
fh.close()
logging.info("Writing to file: %s",outfilename)

# Define V3, V4, or V5 cuts:
cut_list = []
if args.V3cuts:
    logging.warn("Using cuts V3...")
    cut_list.append(('NewestBgRejCutsStep1','value',True))
    cut_list.append(('NewestBgRejCutsStep2','value',True))
elif args.V4cuts:
    logging.warn("Using cuts V4...")
    cut_list.append(('Cuts_V4_Step1','value',True))
    cut_list.append(('Cuts_V4_Step2','value',True))
elif args.V5cuts:
    logging.warn("Using cuts V5...")
    cut_list.append(('Cuts_V5_Step1','value',True))
    cut_list.append(('Cuts_V5_Step2','value',True))
elif args.nocuts:
    logging.warn("Using NO S1/S2 selection CUTS")
    cut_list = []
elif args.custom:
    logging.warn("Using CUSTOM cuts: %s..."%args.custom_str)
Exemplo n.º 21
0
                    help='set verbosity level')
args = parser.parse_args()

set_verbosity(args.verbose)

#Read in the settings
template_settings = from_json(args.template_settings)
minimizer_settings = from_json(args.minimizer_settings)
pseudo_data_settings = from_json(
    args.pseudo_data_settings
) if args.pseudo_data_settings is not None else template_settings

#Workaround for old scipy versions
import scipy
if scipy.__version__ < '0.12.0':
    logging.warn('Detected scipy version %s < 0.12.0' % scipy.__version__)
    if 'maxiter' in minimizer_settings:
        logging.warn('Optimizer settings for \"maxiter\" will be ignored')
        minimizer_settings.pop('maxiter')

# make sure that both pseudo data and template are using the same
# channel. Raise Exception and quit otherwise
channel = template_settings['params']['channel']['value']
if channel != pseudo_data_settings['params']['channel']['value']:
    error_msg = "Both template and pseudo data must have same channel!\n"
    error_msg += " pseudo_data_settings chan: '%s', template chan: '%s' " % (
        pseudo_data_settings['params']['channel']['value'], channel)
    raise ValueError(error_msg)

if args.gpu_id is not None:
    template_settings['params']['gpu_id'] = {}
Exemplo n.º 22
0
def plot_map_comparisons(ref_map,
                         new_map,
                         ref_abv,
                         new_abv,
                         outdir,
                         subdir,
                         name,
                         texname,
                         stagename,
                         servicename,
                         shorttitles=False,
                         ftype='png'):
    """Plot comparisons between two identically-binned PISA 3 style maps"""
    path = [outdir]

    if subdir is None:
        subdir = stagename.lower()
    path.append(subdir)

    if outdir is not None:
        mkdir(os.path.join(*path), warn=False)

    if stagename is not None:
        fname = [
            '%s_%s_comparisons' % (ref_abv.lower(), new_abv.lower()),
            'stage_' + stagename
        ]
    else:
        fname = ['%s_%s_comparisons' % (ref_abv.lower(), new_abv.lower())]
    if servicename is not None:
        fname.append('service_' + servicename)
    if name is not None:
        fname.append(name.lower())
    fname = '__'.join(fname) + '.' + ftype

    path.append(fname)

    basetitle = []
    if stagename is not None:
        basetitle.append('%s' % stagename)
    if texname is not None:
        basetitle.append(r'$%s$' % texname)
    basetitle = ' '.join(basetitle)

    validate_map_objs(new_map, ref_map)
    with np.errstate(divide='ignore', invalid='ignore'):
        ratio_map = new_map / ref_map
    diff_map = new_map - ref_map
    with np.errstate(divide='ignore', invalid='ignore'):
        diff_ratio_map = diff_map / ref_map

    max_diff_ratio = np.nanmax(np.abs(diff_ratio_map.hist))

    # Handle cases where ratio returns infinite
    # This isn't necessarily a fail, since all it means is the referene was
    # zero If the new value is sufficiently close to zero then it's still fine
    if max_diff_ratio == float('inf'):
        logging.warn('Infinite value found in ratio tests. Difference tests '
                     'now also being calculated')
        # First find all the finite elements
        finite_map = np.isfinite(diff_ratio_map.hist)
        # Then find the nanmax of this, will be our new test value
        max_diff_ratio = np.nanmax(np.abs(diff_ratio_map.hist[finite_map]))
        # Also find all the infinite elements
        infinite_map = np.logical_not(finite_map)
        # This will be a second test value
        max_diff = np.nanmax(np.abs(diff_map.hist[infinite_map]))
    else:
        # Without any infinite elements we can ignore this second test
        max_diff = 0.0

    if outdir is not None:
        gridspec_kw = dict(left=0.03, right=0.968, wspace=0.32)
        fig, axes = plt.subplots(nrows=1,
                                 ncols=5,
                                 gridspec_kw=gridspec_kw,
                                 sharex=False,
                                 sharey=False,
                                 figsize=(20, 5))
        if shorttitles:
            ref_map.plot(fig=fig,
                         ax=axes[0],
                         title=basetitle + ' ' + ref_abv + ' (A)',
                         cmap=plt.cm.afmhot)
            new_map.plot(fig=fig,
                         ax=axes[1],
                         title=basetitle + ' ' + new_abv + ' (B)',
                         cmap=plt.cm.afmhot)
            ratio_map.plot(fig=fig,
                           ax=axes[2],
                           title='A/B',
                           cmap=plt.cm.afmhot)
            diff_map.plot(fig=fig,
                          ax=axes[3],
                          title='A-B',
                          symm=True,
                          cmap=plt.cm.seismic)
            diff_ratio_map.plot(fig=fig,
                                ax=axes[4],
                                title='(A-B)/A',
                                symm=True,
                                cmap=plt.cm.seismic)
        else:
            ref_map.plot(fig=fig,
                         ax=axes[0],
                         title=basetitle + ' ' + ref_abv,
                         cmap=plt.cm.afmhot)
            new_map.plot(fig=fig,
                         ax=axes[1],
                         title=basetitle + ' ' + new_abv,
                         cmap=plt.cm.afmhot)
            ratio_map.plot(fig=fig,
                           ax=axes[2],
                           title=basetitle + ' %s/%s' % (new_abv, ref_abv),
                           cmap=plt.cm.afmhot)
            diff_map.plot(fig=fig,
                          ax=axes[3],
                          title=basetitle + ' %s-%s' % (new_abv, ref_abv),
                          symm=True,
                          cmap=plt.cm.seismic)
            diff_ratio_map.plot(fig=fig,
                                ax=axes[4],
                                title=basetitle + ' (%s-%s)/%s' %
                                (new_abv, ref_abv, ref_abv),
                                symm=True,
                                cmap=plt.cm.seismic)
        logging.debug('>>>> Plot for inspection saved at %s' %
                      os.path.join(*path))
        fig.savefig(os.path.join(*path))
        plt.close(fig.number)

    return max_diff_ratio, max_diff
Exemplo n.º 23
0
                    help='set verbosity level')

args = parser.parse_args()
set_verbosity(args.verbose)

print "FILE NORMALIZATION: "
print "  >> nue: ",args.ne
print "  >> numu: ",args.nmu
print "  >> nutau: ",args.ntau

ebins = np.linspace(args.emin,args.emax,args.nebins) if args.elin else np.logspace(np.log10(args.emin), np.log10(args.emax), args.nebins)

# Cut definitions:
s1_s2_cuts = []
if args.v4cuts:
    logging.warn("Using cuts V4!")
    s1_s2_cuts = [("Cuts_V4_Step1",'value',True),("Cuts_V4_Step2",'value',True)]
elif args.v3cuts:
    logging.warn("Using cuts V3!")
    s1_s2_cuts = [('NewestBgRejCutsStep1','value',True), ('NewestBgRejCutsStep2','value',True)]
elif args.v5truth:
    logging.warn("USING V5 TRUTH information")
    s1_s2_cuts = [('Cuts_V5_Step2_upgoing_Truth','value',True)]
elif args.nocuts:
    logging.warn("Using no selection cuts!")
    s1_s2_cuts = []
else:
    logging.warn("Using cuts V5!")
    s1_s2_cuts= [("Cuts_V5_Step1",'value',True),("Cuts_V5_Step2",'value',True)]

Exemplo n.º 24
0
    def __init__(
        self,
        earth_model=None,
        detector_depth=None,
        prop_height=None,
        prop_height_min=None,
        YeI=None,
        YeO=None,
        YeM=None,
        rel_err=None,
        abs_err=None,
        prop_lowpass_cutoff=None,
        prop_lowpass_frac=None,
        eval_lowpass_cutoff=None,
        eval_lowpass_frac=None,
        node_mode=None,
        use_decoherence=False,
        num_decoherence_gamma=1,
        use_nsi=False,
        num_neutrinos=3,
        exact_mode=False,
        **std_kwargs,
    ):

        if use_nsi:
            raise NotImplementedError("NSI not implemented")
        if use_decoherence:
            raise NotImplementedError("Decoherence not implemented")
        if type(prop_height) is not ureg.Quantity:
            raise NotImplementedError(
                "Getting propagation heights from containers is "
                "not yet implemented")
        self.num_neutrinos = int(num_neutrinos)
        assert self.num_neutrinos < 5, "currently only supports up to 4 flavor oscillations"
        self.use_nsi = use_nsi
        self.use_decoherence = use_decoherence
        self.num_decoherence_gamma = num_decoherence_gamma
        self.node_mode = node_mode

        self.earth_model = earth_model
        self.YeI = YeI.m_as("dimensionless")
        self.YeO = YeO.m_as("dimensionless")
        self.YeM = YeM.m_as("dimensionless")
        self.detector_depth = detector_depth.m_as("km")
        self.prop_height = prop_height.m_as("km")
        self.avg_height = False
        self.prop_height_min = None
        if prop_height_min is not None:  # this is optional
            self.prop_height_min = prop_height_min.m_as("km")
            self.avg_height = True

        self.layers = None

        self.rel_err = rel_err.m_as(
            "dimensionless") if rel_err is not None else 1.0e-10
        self.abs_err = abs_err.m_as(
            "dimensionless") if abs_err is not None else 1.0e-10
        self.prop_lowpass_cutoff = (prop_lowpass_cutoff.m_as("1/km")
                                    if prop_lowpass_cutoff is not None else 0.)
        self.prop_lowpass_frac = (prop_lowpass_frac.m_as("dimensionless")
                                  if prop_lowpass_frac is not None else 0.)
        self.eval_lowpass_cutoff = (eval_lowpass_cutoff.m_as("1/km")
                                    if eval_lowpass_cutoff is not None else 0.)
        self.eval_lowpass_frac = (eval_lowpass_frac.m_as("dimensionless")
                                  if eval_lowpass_frac is not None else 0.)

        if self.prop_lowpass_frac > 1. or self.eval_lowpass_frac > 1.:
            raise ValueError(
                "lowpass filter fraction cannot be greater than one")

        if self.prop_lowpass_frac < 0. or self.eval_lowpass_frac < 0.:
            raise ValueError(
                "lowpass filter fraction cannot be smaller than zero")

        self.nus_layer = None
        self.nus_layerbar = None

        # Define standard params
        expected_params = [
            "theta12",
            "theta13",
            "theta23",
            "deltam21",
            "deltam31",
            "deltacp",
        ]

        # Add decoherence parameters
        assert self.num_decoherence_gamma in [
            1, 3
        ], ("Must choose either 1 or 3 "
            "decoherence gamma parameters")
        if self.use_decoherence:
            if self.num_decoherence_gamma == 1:
                expected_params.extend(["gamma"])
            elif self.num_decoherence_gamma == 3:
                expected_params.extend(["gamma21", "gamma31", "gamma32"])
            expected_params.extend(["n_energy"])

        # We may want to reparametrize this with the difference between deltacp14 and
        # deltacp24, as the absolute value seems to play a small role (see
        # https://arxiv.org/pdf/2010.06321.pdf)
        if self.num_neutrinos == 4:
            expected_params.extend([
                "theta14",
                "theta24",
                "theta34",
                "deltam41",
                "deltacp14",
                "deltacp24",
            ])

        # init base class
        super().__init__(
            expected_params=expected_params,
            **std_kwargs,
        )

        # This is special: We have an additional "binning" to account for. It is in
        # principle possible to work in event mode even for the nodes, which would mean
        # that the full oscillation problem is solved for all events individually.
        # Together with the constant oscillation mode, this can be used to calculate
        # probabilities in exact mode in a time that is reasonable at least for
        # generating pseudodata.

        assert not (self.use_nsi and self.use_decoherence), (
            "NSI and decoherence not "
            "suported together, must use one or the other")

        self.exact_mode = exact_mode

        if exact_mode:
            # No interpolation is happening in exact mode so any passed node_mode
            # will be ignored. Probabilities are calculated at calc_specs.
            if self.node_mode is not None:
                logging.warn(
                    "nuSQuIDS is configured in exact mode, the passed "
                    f"`node_mode`\n({self.node_mode})\n will be ignored!")
            if self.prop_lowpass_cutoff > 0 or self.eval_lowpass_cutoff > 0:
                logging.warn(
                    "nuSQuIDS is configured in exact mode, low-pass filters "
                    "will be ignored")
        else:
            if isinstance(self.calc_mode, MultiDimBinning):
                assert isinstance(self.node_mode, MultiDimBinning), (
                    "cannot use "
                    "event-wise nodes with binned calculation")

        self.e_node_mode = None
        self.e_mesh = None
        self.coszen_node_mode = None
        self.cosz_mesh = None
Exemplo n.º 25
0
    def __init__(
        self,
        fit_results_file,
        data=None,
        params=None,
        input_names=None,
        output_names=None,
        debug_mode=None,
        error_method=None,
        input_specs=None,
        calc_specs=None,
        output_specs=None,
        links=None,
    ):
        # -- Read fit_results_file and extract necessary info -- #

        fit_results = from_file(fit_results_file)

        # handle backwards compatibility for old style fit results files
        if "hyperplanes" in fit_results:
            using_old_fit_file = False
        elif "sys_list" in fit_results:
            using_old_fit_file = True
        else:
            raise ValueError("Unrecognised format for input fit file")

        # get list of systematic parameter names fitted; need to conserve order here!
        if using_old_fit_file:
            fit_param_names = fit_results["sys_list"]
        else:
            fit_param_names = fit_results["param_names"]

        if "param_units" in fit_results:
            fit_param_units = fit_results["param_units"]
        else:
            fit_param_units = ["dimensionless" for _ in fit_param_names]
        fit_param_units = [ureg.Unit(u) for u in fit_param_units]

        # Perfer to have the actual binning, so we can compare bin edges to
        # "reasonable" precision to make sure the hyperplane fits are applicable to the
        # current binning.
        #
        # If there is no binning in the hyperplane fit results file, look for a hash
        # value; barring that, just ensure that the dimensionality & number of bins
        # match.
        binning_spec = fit_results.get("binning", None)
        if binning_spec is not None:
            fit_binning = MultiDimBinning(**binning_spec)
        else:
            fit_binning = None

        if fit_binning is not None:
            fit_binning_hash = fit_binning.hash
        else:
            fit_binning_hash = fit_results.get("binning_hash", None)

        if fit_binning_hash is None:
            logging.warn("Cannot determine the hash of the binning employed"
                         " for the hyperplane fits. Correct application of"
                         " fits is not guaranteed!")

        # -- Expected input / output names -- #

        input_names = ()
        output_names = ()

        # -- Which keys are added or altered for the outputs during `apply` -- #

        input_calc_keys = ()
        output_calc_keys = ("hyperplane_scalefactors", )

        if error_method == "sumw2":
            output_apply_keys = ("weights", "errors")
            input_apply_keys = output_apply_keys
        else:
            output_apply_keys = ("weights", )
            input_apply_keys = output_apply_keys

        # -- Initialize base class -- #

        super(pi_hyperplanes, self).__init__(
            data=data,
            params=params,
            expected_params=fit_param_names,
            input_names=input_names,
            output_names=output_names,
            debug_mode=debug_mode,
            error_method=error_method,
            input_specs=input_specs,
            calc_specs=calc_specs,
            output_specs=output_specs,
            input_calc_keys=input_calc_keys,
            output_calc_keys=output_calc_keys,
            input_apply_keys=input_apply_keys,
            output_apply_keys=output_apply_keys,
        )

        # -- Only allowed/implemented modes -- #

        assert self.input_mode is not None
        assert self.calc_mode == "binned"
        assert self.output_mode is not None

        self.links = ast.literal_eval(links)

        # -- Add attrs to `self` specific to `pi_hyperplanes` -- #

        self.fit_results_file = fit_results_file
        """str : path to hyperplane fit results file"""

        self.using_old_fit_file = using_old_fit_file
        """bool : whether the hyperplane fit file is in the "old" format"""

        self.fit_results = fit_results
        """OrderedDict : parsed hyperplane fit file"""

        self.fit_param_names = fit_param_names
        """list : param names used in hyperplane fit, in order they appear in file"""

        self.fit_param_units = fit_param_units
        """list : param untis used in hyperplane fit, in order they appear in file"""

        self.fit_binning = fit_binning
        """MultiDimBinning : binning used for hyperplane fits; one hyperplane per bin"""

        self.fit_binning_hash = fit_binning_hash
        """str : hash of the binning used for hyperplane fits"""
Exemplo n.º 26
0
parser.add_argument('-v', '--verbose', action='count', default=None,
                    help='''set verbosity level''')
args = parser.parse_args()

set_verbosity(args.verbose)

#Read in the settings
template_settings = from_json(args.template_settings)
minimizer_settings  = from_json(args.minimizer_settings)
grid_settings = from_json(args.grid_settings)

channel = template_settings['params']['channel']['value']
#Workaround for old scipy versions
import scipy
if scipy.__version__ < '0.12.0':
    logging.warn('Detected scipy version %s < 0.12.0'%scipy.__version__)
    if 'maxiter' in minimizer_settings:
        logging.warn('Optimizer settings for \"maxiter\" will be ignored')
        minimizer_settings.pop('maxiter')

#Get the parameters
params = template_settings['params']

# Make sure that atmospheric parameters are fixed:
logging.warn("Ensuring that atmospheric parameters are fixed for this analysis")
params = fix_atm_params(params)
#print "params: ",params.items()

with Timer() as t:
    template_maker = TemplateMaker(get_values(params),**template_settings['binning'])
profile.info("==> elapsed time to initialize templates: %s sec"%t.secs)
Exemplo n.º 27
0
llh_data = from_hdf(args.llh_file)
df_true_h, df_false_h = get_llr_data_frames(llh_data)
template_params = llh_data['template_settings']['params']

if args.verbose > 1: show_frame(df_true_h)

print "\n  columns: ",df_true_h[0].columns

################################################################
### 1) Plot LLR Distributions
################################################################

# df_true_h MUST be filled, but df_false_h is allowed to be empty
llr_dict_true_h = get_llh_ratios(df_true_h)
if (len(df_false_h) == 0 or args.no_false_h):
    logging.warn("No false hierarchy best fit llr distributions...")
    fig = make_llr_only_true_h(llr_dict_true_h, args.nbins, args.xlim)
else:
    logging.warn("Making llr distributions with false hierarchy best fit.")
    llr_dict_false_h = get_llh_ratios(df_false_h)
    fig = make_llr_with_false_h(llr_dict_true_h, llr_dict_false_h, args.nbins, args.xlim)


################################################################
### 2) Plot Posterior Distributions
################################################################

if args.params:

    df = df_true_h if args.true_h else df_false_h
Exemplo n.º 28
0
    def single_kernel_set(self, e_true, cz_true, e_reco, cz_reco):
        """Construct a 4D kernel set from MC events using VBWKDE.

        Given a set of MC events and each of their {energy{true, reco},
        coszen{true, reco}}, generate a 4D NumPy array that maps a 2D true-flux
        histogram onto the corresponding 2D reco-flux histogram.

        The resulting 4D array can be indexed logically using
          kernel4d[e_true_i, cz_true_j][e_reco_k, cz_reco_l]
        where the 4 indices point from a single MC-true histogram bin (i,j) to
        a single reco histogram bin (k,l).

        Binning of both MC-true and reco histograms is the same and is given by
        the values in self.ebins and self.czbins which define the bin *edges*
        (not the bin centers; hence, len(self.ebins) is one greater than the
        number of bins, etc.).

        NOTE: Actual limits in energy used to group events into a single "true"
        bin may be extended beyond the bin edges defined by self.ebins in order
        to gather enough events to successfully apply VBWKDE.

        Parameters
        ----------
        e_true : sequence
            MC-true neutrino energies, one per event
        cz_true : sequence
            MC-true neutrino coszen, one per event
        e_reco : sequence
            Reconstructed neutrino energies, one per event
        cz_reco : sequence
            Reconstructed neutrino coszen, one per event

        Returns
        -------
        kernel4d : 4D array of float
            Mapping from the number of events in each bin of the 2D
            MC-true-events histogram to the number of events reconstructed in
            each bin of the 2D reconstructed-events histogram. Dimensions are
              len(self.ebins)-1 x len(self.czbins)-1 x len(self.ebins)-1 x
              len(self.czbins)-1
            since ebins and czbins define the histograms' bin edges.
        """
        OVERFIT_FACTOR = 1.0

        assert np.min(np.diff(self.ebins)) > 0, \
            "Energy bin edges not monotonically increasing."
        assert np.min(np.diff(self.czbins)) > 0, \
            "coszen bin edges not monotonically increasing."

        # NOTE: below defines bin centers on linear scale; other logic
        # in this method assumes this to be the case, so
        # **DO NOT USE** utils.utils.get_bin_centers in this method, which
        # may return logarithmically-defined centers instead.

        ebin_edges = np.array(self.ebins)
        left_ebin_edges = ebin_edges[0:-1]
        right_ebin_edges = ebin_edges[1:]
        ebin_centers = (left_ebin_edges+right_ebin_edges)/2.0
        n_ebins = len(ebin_centers)

        czbin_edges = np.array(self.czbins)
        left_czbin_edges = czbin_edges[0:-1]
        right_czbin_edges = czbin_edges[1:]
        czbin_centers = (left_czbin_edges+right_czbin_edges)/2.0
        n_czbins = len(czbin_centers)

        n_events = len(e_true)

        if self.MIN_NUM_EVENTS > n_events:
            self.MIN_NUM_EVENTS = n_events
        if self.TGT_NUM_EVENTS > n_events:
            self.TGT_NUM_EVENTS = n_events

        # Object with which to store the 4D kernels: np 4D array
        kernel4d = np.zeros((n_ebins, n_czbins, n_ebins, n_czbins))

        # Object with which to store the 2D "aggregate_map": the total number
        # of events reconstructed into a given (E, CZ) bin, used for sanity
        # checks
        aggregate_map = np.zeros((n_ebins, n_czbins))
        for ebin_n in range(n_ebins):
            ebin_min = left_ebin_edges[ebin_n]
            ebin_max = right_ebin_edges[ebin_n]
            ebin_mid = (ebin_min+ebin_max)/2.0
            ebin_wid = ebin_max-ebin_min
            
            logging.trace(
                '  processing true-energy bin_n=' + str(ebin_n) + ' of ' +
                str(n_ebins-1) + ', E_{nu,true} in ' +
                '[' + str(ebin_min) + ', ' + str(ebin_max) + '] ...'
            )

            # Absolute distance from these events' re-centered reco energies to
            # the center of this energy bin; sort in ascending-distance order
            abs_enu_dist = sorted(np.abs(e_true - ebin_mid))

            # Grab the distance the number-"TGT_NUM_EVENTS" event is from the
            # bin center
            tgt_thresh_enu_dist = abs_enu_dist[self.TGT_NUM_EVENTS-1]

            # Grab the distance the number-"MIN_NUM_EVENTS" event is from the
            # bin center
            min_thresh_enu_dist = abs_enu_dist[self.MIN_NUM_EVENTS-1]

            # TODO: revisit the below algorithm with proper testing

            # Make threshold distance (which is half the total width) no more
            # than 4x the true-energy-bin width in order to capture the
            # "target" number of points (TGT_NUM_EVENTS) but no less than half
            # the bin width (i.e., the bin should be at least be as wide as the
            # pre-defined bin width).
            #
            # HOWEVER, allow the threshold distance (bin half-width) to expand
            # to as much as 4x the original bin full-width in order to capture
            # the "minimum" number of points (MIN_NUM_EVENTS).
            thresh_enu_dist = \
                    max(min(max(tgt_thresh_enu_dist, ebin_wid/2),
                            4*ebin_wid),
                        min_thresh_enu_dist)

            # Grab all events within the threshold distance
            in_ebin_ind = np.where(abs_enu_dist <= thresh_enu_dist)[0]
            n_in_bin = len(in_ebin_ind)

            # Extract just the neutrino-energy/coszen error columns' values for
            # succinctness
            enu_err = e_reco[in_ebin_ind] - e_true[in_ebin_ind]
            cz_err = cz_reco[in_ebin_ind] - cz_true[in_ebin_ind]

            #==================================================================
            # Neutrino energy resolutions
            #==================================================================
            dmin = min(enu_err)
            dmax = max(enu_err)
            drange = dmax-dmin

            e_lowerlim = min(self.ENERGY_RANGE[0]-ebin_mid*1.5, dmin-drange*0.5)
            e_upperlim = max((np.max(ebin_edges)-ebin_mid)*1.5, dmax+drange*0.5)
            egy_kde_lims = np.array([e_lowerlim, e_upperlim])

            # Use at least min_num_pts points and at most the next-highest
            # integer-power-of-two that allows for at least 10 points in the
            # smallest energy bin
            min_num_pts = 2**12
            min_bin_width = np.min(ebin_edges)
            min_pts_smallest_bin = 10.0
            kde_range = np.diff(egy_kde_lims)
            num_pts0 = kde_range/(min_bin_width/min_pts_smallest_bin)
            kde_num_pts = int(max(2**10, 2**np.ceil(np.log2(num_pts0))))
            logging.debug(
                ' Nevts=' + str(n_in_bin) + ' taken from [' +
                str(ebin_mid-thresh_enu_dist) + ', ' +
                str(ebin_mid+thresh_enu_dist) + ']' + ', KDE lims=' +
                str(kde_range) + ', KDE_N: ' + str(kde_num_pts)
            )

            # Compute variable-bandwidth KDEs
            enu_bw, enu_mesh, enu_pdf = kde.vbw_kde(
                data           = enu_err,
                overfit_factor = OVERFIT_FACTOR,
                MIN            = egy_kde_lims[0],
                MAX            = egy_kde_lims[1],
                N              = kde_num_pts
            )

            if np.min(enu_pdf) < 0:
                # Only issue warning if the most-negative value is negative
                # beyond specified acceptable-numerical-precision threshold
                # (EPSILON)
                if np.min(enu_pdf) <= -self.EPSILON:
                    logging.warn(
                        "np.min(enu_pdf) < 0: Minimum value is " +
                        str(np.min(enu_pdf)) +
                        "; forcing all negative values to 0."
                    )
                # Otherwise, just quietly clip any negative values at 0
                enu_pdf = np.clip(a=enu_pdf, a_min=0, a_max=np.inf)

            assert np.min(enu_pdf) >= 0, str(np.min(enu_pdf))

            # Re-center distribution at the center of the energy bin for which
            # errors were computed
            offset_enu_mesh = enu_mesh+ebin_mid
            offset_enu_pdf = enu_pdf

            # Get reference area under the PDF, for checking after interpolated
            # values are added.
            #
            # NOTE There should be NO normalization because any events lost due
            # to cutting off tails outside the binned region are actually going
            # to be lost, and so should penalize the total area.
            int_val0 = np.trapz(y=offset_enu_pdf,
                                x=offset_enu_mesh)

            # Create linear interpolator for the PDF
            interp = interpolate.interp1d(
                x             = offset_enu_mesh,
                y             = offset_enu_pdf,
                kind          = 'linear',
                copy          = True,
                bounds_error  = True,
                fill_value    = np.nan
            )

            # Insert all bin edges' exact locations into the mesh (For accurate
            # accounting of area in each bin, must include values out to bin
            # edges)
            edge_locs = [be for be in
                         np.concatenate((left_ebin_edges, right_ebin_edges))
                         if not(be in offset_enu_mesh)]
            edge_locs.sort()
            edge_pdfs = interp(edge_locs)
            insert_ind = np.searchsorted(offset_enu_mesh, edge_locs)
            offset_enu_mesh = np.insert(offset_enu_mesh, insert_ind, edge_locs)
            offset_enu_pdf = np.insert(offset_enu_pdf, insert_ind, edge_pdfs)

            int_val = np.trapz(y=offset_enu_pdf, x=offset_enu_mesh)

            assert np.abs(int_val - int_val0) < self.EPSILON

            # Chop off distribution at extrema of energy bins
            valid_ind = np.where(
                (offset_enu_mesh >= np.min(ebin_edges)) &
                (offset_enu_mesh <= np.max(ebin_edges))
            )[0]
            offset_enu_mesh = offset_enu_mesh[valid_ind]
            offset_enu_pdf = offset_enu_pdf[valid_ind]

            # Check that there are no negative density values (after inserts)
            assert np.min(offset_enu_pdf) > 0-self.EPSILON, \
                str(np.min(offset_enu_pdf))

            # Record the integrated area after removing parts outside binned
            # range
            tot_ebin_area0 = np.trapz(y=offset_enu_pdf,
                                      x=offset_enu_mesh)

            # Check that it integrates to <= 1, sanity check
            assert tot_ebin_area0 < 1+self.EPSILON, str(tot_ebin_area0)

            # Identify indices encapsulating the defined energy bins' ranges,
            # and find the area of each bin
            lbinds = np.searchsorted(offset_enu_mesh, left_ebin_edges)
            rbinds = np.searchsorted(offset_enu_mesh, right_ebin_edges)
            bininds = zip(lbinds, rbinds)
            ebin_areas = [np.trapz(y=offset_enu_pdf[l:r+1],
                                   x=offset_enu_mesh[l:r+1])
                          for (l, r) in bininds]

            # Check that no bins have negative areas
            assert np.min(ebin_areas) >= 0

            # Sum the individual bins' areas
            tot_ebin_area = np.sum(ebin_areas)

            # Check that this total of all the bins is equal to the total area
            # under the curve (i.e., make sure there is no overlap or gaps
            # between bins)
            assert np.abs(tot_ebin_area-tot_ebin_area0) < self.EPSILON, \
                    'tot_ebin_area=' + str(tot_ebin_area) + \
                    ' should equal tot_ebin_area0=' + str(tot_ebin_area0)

            #==================================================================
            # Neutrino coszen resolutions
            #==================================================================
            dmin = min(cz_err)
            dmax = max(cz_err)
            drange = dmax-dmin

            # NOTE the limits are 1 less than / 1 greater than the limits that
            # the error will actually take on, so as to allow for any smooth
            # roll-off at edges of data. The calculation of areas below
            # captures all of the area, though, by reflecting bins defined in
            # [-1, 1] about the points -1 and 1, thereby capturing any
            # densities in the range [-3, +3]. This is not necessarily
            # accurate, but it's better than throwing that info out entirely.
            #
            # NOTE also that since reco events as of now are only in range -1
            # to 0, though, that there are "gaps" in the capture range, but
            # this is due to densities being in the upper-hemisphere which we
            # are intentionally ignoring, rather than the code here not taking
            # them into account. Normalization is based upon *all* events,
            # whether or not they fall within a bin specified above.

            # Number of points in the mesh used for VBWKDE; must be large
            # enough to capture fast changes in the data but the larger the
            # number, the longer it takes to compute the densities at all the
            # points. Here, just choosing a fixed number regardless of the data
            # or binning
            N_cz_mesh = 2**10

            # Data range for VBWKDE to consider
            cz_gaus_kde_min = -3
            cz_gaus_kde_max = +2

            cz_gaus_kde_failed = False
            previous_fail = False
            for n in xrange(3):
                # TODO: only catch specific exception
                try:
                    cz_bw, cz_mesh, cz_pdf = kde.vbw_kde(
                        data           = cz_err,
                        overfit_factor = OVERFIT_FACTOR,
                        MIN            = cz_gaus_kde_min,
                        MAX            = cz_gaus_kde_max,
                        N              = N_cz_mesh
                    )
                except:
                    cz_gaus_kde_failed = True
                    if n == 0:
                        logging.trace('(cz vbwkde ')
                    logging.trace('fail, ')
                    # If failure occurred in vbw_kde, expand the data range it
                    # takes into account; this usually helps
                    cz_gaus_kde_min -= 1
                    cz_gaus_kde_max += 1
                else:
                    if cz_gaus_kde_failed:
                        previous_fail = True
                        logging.trace('success!')
                    cz_gaus_kde_failed = False
                finally:
                    if previous_fail:
                        logging.trace(')')
                    previous_fail = False
                    if not cz_gaus_kde_failed:
                        break

            if cz_gaus_kde_failed:
                logging.warn('Failed to fit VBWKDE!')
                continue

            if np.min(cz_pdf) < 0:
                logging.warn("np.min(cz_pdf) < 0: Minimum value is " +
                             str(np.min(cz_pdf)) +
                             "; forcing all negative values to 0.")
                np.clip(a=cz_mesh, a_min=0, a_max=np.inf)

            assert np.min(cz_pdf) >= -self.EPSILON, \
                str(np.min(cz_pdf))

            for czbin_n in range(n_czbins):
                czbin_mid = czbin_centers[czbin_n]

                # Re-center distribution at the center of the current cz bin
                offset_cz_mesh = cz_mesh + czbin_mid

                # Create interpolation object, used to fill in bin edge values
                interp = interpolate.interp1d(
                    x             = offset_cz_mesh,
                    y             = cz_pdf,
                    kind          = 'linear',
                    copy          = True,
                    bounds_error  = False,
                    fill_value    = 0
                )

                # Figure out where all bin edges lie in this re-centered
                # distribution (some bins may be repeated since bins in [-1,0]
                # and err in [-2,1]:
                #
                # 1. Find limits of mesh values..
                mmin = offset_cz_mesh[0]
                mmax = offset_cz_mesh[-1]

                # 2. Map all bin edges into the full mesh-value range,
                # reflecting about -1 and +1. If the reflected edge is outside
                # the mesh range, use the exceeded limit of the mesh range as
                # the bin edge instead.
                #
                # This maps every bin edge {i} to 3 new edges, indexed
                # new_edges[i][{0,1,2}]. Bins are formed by adjacent indices
                # and same-subindices, so what started as, e.g., bin 3 now is
                # described by (left, right) edges at
                #   (new_edges[3][0], new_edges[4][0]),
                #   (new_edges[3][1], new_edges[4][1]), and
                #   (new_edges[3][2], new_edges[4][2]).

                # NOTE / TODO: It's tempting to dynamically set the number of
                # reflections to minimize computation time, but I think it
                # breaks the code. Just set to a reasonably large number for
                # now and accept the performance penalty. ALSO: if you change
                # the parity of the number of reflections, the code below that
                # has either (wrap_n % 2 == 0) or (wrap_n+1 % 2 == 0) must be
                # swapped!!!
                n_left_reflections = 4
                n_right_reflections = 4

                new_czbin_edges = []
                for edge in czbin_edges:
                    edges_refl_left = []
                    for n in xrange(n_left_reflections):
                        edge_refl_left = reflect1d(edge, -1-(2*n))
                        if edge_refl_left < mmin:
                            edge_refl_left = mmin
                        edges_refl_left.append(edge_refl_left)
                    edges_refl_right = []
                    for n in xrange(n_right_reflections):
                        edge_refl_right = reflect1d(edge, +1+(2*n))
                        if edge_refl_right > mmax:
                            edge_refl_right = mmax
                        edges_refl_right.append(edge_refl_right)
                    # Include all left-reflected versions of this bin edge, in
                    # increasing-x order + this bin edge + right-reflected
                    # versions of this bin edge
                    new_czbin_edges.append(edges_refl_left[::-1] + [edge]
                                           + edges_refl_right)

                # Record all unique bin edges
                edge_locs = set()
                [edge_locs.update(edges) for edges in new_czbin_edges]

                # Throw away bin edges that are already in the mesh
                [edge_locs.remove(edge) for edge in list(edge_locs)
                 if edge in offset_cz_mesh]

                # Make into sorted list
                edge_locs = sorted(edge_locs)

                # Record the total area under the curve
                int_val0 = np.trapz(y=cz_pdf, x=offset_cz_mesh)

                # Insert the missing bin edge locations & pdf-values into
                # the mesh & pdf, respectively
                edge_pdfs = interp(edge_locs)
                insert_ind = np.searchsorted(offset_cz_mesh, edge_locs)
                offset_cz_mesh = np.insert(offset_cz_mesh, insert_ind,
                                           edge_locs)
                offset_cz_pdf = np.insert(cz_pdf, insert_ind, edge_pdfs)
                assert np.min(offset_cz_pdf) > -self.EPSILON

                # Check that this total of all the bins is equal to the total
                # area under the curve (i.e., check there is no overlap between
                # or gaps between bins)
                int_val = np.trapz(y=offset_cz_pdf, x=offset_cz_mesh)
                assert np.abs(int_val-1) < self.EPSILON

                # Renormalize if it's not exactly 1
                if int_val != 1.0:
                    offset_cz_pdf = offset_cz_pdf / int_val

                # Add up the area in the bin and areas that are "reflected"
                # into this bin
                new_czbin_edges = np.array(new_czbin_edges)
                czbin_areas = np.zeros(np.shape(new_czbin_edges)[0]-1)
                for wrap_n in range(np.shape(new_czbin_edges)[1]):
                    bin_edge_inds = np.searchsorted(offset_cz_mesh,
                                                    new_czbin_edges[:,wrap_n])
                    lbinds = bin_edge_inds[0:-1]
                    rbinds = bin_edge_inds[1:]
                    # Make sure indices that appear first are less than indices
                    # that appear second in a pair of bin indices
                    if (wrap_n+1) % 2 == 0:
                        bininds = zip(rbinds, lbinds)
                    else:
                        bininds = zip(lbinds, rbinds)
                    tmp_areas = []
                    for (binind_left_edge, binind_right_edge) in bininds:
                        if binind_left_edge == binind_right_edge:
                            tmp_areas.append(0)
                            continue
                        this_bin_area = np.array(np.trapz(
                            y=offset_cz_pdf[binind_left_edge:binind_right_edge+1],
                            x=offset_cz_mesh[binind_left_edge:binind_right_edge+1]
                        ))
                        tmp_areas.append(this_bin_area)
                    czbin_areas += np.array(tmp_areas)

                assert np.min(czbin_areas) > -self.EPSILON

                tot_czbin_area = np.sum(czbin_areas)
                assert tot_czbin_area < int_val + self.EPSILON

                kernel4d[ebin_n, czbin_n] = np.outer(ebin_areas, czbin_areas)
                assert (np.sum(kernel4d[ebin_n, czbin_n]) -
                        tot_ebin_area*tot_czbin_area) < self.EPSILON

        check_areas = kernel4d.sum(axis=(2,3))

        assert np.max(check_areas) < 1 + self.EPSILON, str(np.max(check_areas))
        assert np.min(check_areas) > 0 - self.EPSILON, str(np.min(check_areas))

        return kernel4d
Exemplo n.º 29
0
        asimov_data_set = get_asimov_fmap(template_maker,
                                          asimov_params,
                                          channel=asimov_params['channel'])

        # Store injected true values in result:
        for key in free_params.keys():
            if 'theta23' in key: continue
            result['true_' + key].append(asimov_params[key])
        result['true_theta23'].append(step)

        result['asimov_data'].append(asimov_data_set)

        # now get fitted values of opposite hierarchy:
        hypo_normal = False if true_normal else True
        hypo_tag = 'hypo_IMH' if true_normal else 'hypo_NMH'
        llh_data = find_alt_hierarchy_fit(asimov_data_set,
                                          template_maker,
                                          params,
                                          hypo_normal,
                                          minimizer_settings,
                                          only_atm_params=False,
                                          check_octant=args.check_octant)

        for key in free_params.keys():
            result['fit_' + key].append(llh_data[key][-1])

    results[true_tag] = result

logging.warn("FINISHED. Saving to file: %s" % args.outfile)
to_json(results, args.outfile)
Exemplo n.º 30
0
def plot_cmp(new,
             ref,
             new_label,
             ref_label,
             plot_label,
             file_label,
             outdir,
             ftype='png'):
    """Plot comparisons between two (identically-binned) maps or map sets.

    Parameters
    ----------
    new : Map or MapSet
    ref : Map or MapSet
    new_label : str
    ref_label : str
    plot_label : str
    file_label : str
    outdir : str
    ftype : str

    """
    path = [outdir]

    if isinstance(ref, Map):
        assert isinstance(new, Map)
        ref_maps = [ref]
        new_maps = [new]

    if outdir is not None:
        mkdir(os.path.join(*path), warn=False)

    for ref, new in zip(ref_maps, new_maps):
        assert ref.binning == new.binning
        fname = get_valid_filename('__'.join([
            get_valid_filename(file_label),
            '%s_vs_%s' % (get_valid_filename(new_label.lower()),
                          get_valid_filename(ref_label.lower()))
        ]) + '.' + ftype)
        path.append(fname)

        ratio = new / ref
        diff = new - ref
        fract_diff = diff / ref

        finite_ratio = ratio.hist[np.isfinite(ratio.hist)]
        ratio_mean = np.mean(finite_ratio)
        ratio_median = np.median(finite_ratio)

        finite_diff = diff.hist[np.isfinite(diff.hist)]
        diff_mean = np.mean(finite_diff)
        diff_median = np.median(finite_diff)

        finite_fract_diff = fract_diff.hist[np.isfinite(fract_diff.hist)]
        fract_diff_mean = np.mean(finite_fract_diff)
        fract_diff_median = np.median(finite_fract_diff)

        max_diff_ratio = np.nanmax(fract_diff.hist)

        # Handle cases where ratio returns infinite
        # This isn't necessarily a fail, since all it means is the referene was
        # zero. If the new value is sufficiently close to zero then it's stil
        # fine.
        if max_diff_ratio == np.inf:
            logging.warn(
                'Infinite value found in ratio tests. Difference tests'
                ' now also being calculated')
            # First find all the finite elements
            finite_mask = np.isfinite(fract_diff.hist)
            # Then find the nanmax of this, will be our new test value
            max_diff_ratio = np.nanmax(fract_diff.hist[finite_mask])
            # Also find all the infinite elements; compute a second test value
            max_diff = np.nanmax(diff.hist[~finite_mask])
        else:
            # Without any infinite elements we can ignore this second test
            max_diff = 0.0

        if outdir is not None:
            if new.binning.num_dims == 2:
                n_dims = 2
                n_third_dim_bins = 1
            elif new.binning.num_dims == 3:
                n_dims = 3
                odd_dim_idx = new.binning.shape.index(np.min(
                    new.binning.shape))
                logging.debug('odd_dim_idx: %s', odd_dim_idx)
                n_third_dim_bins = new.binning.shape[odd_dim_idx]

            gridspec_kw = dict(left=0.03, right=0.968, wspace=0.32)
            fig, axes = plt.subplots(nrows=n_third_dim_bins,
                                     ncols=5,
                                     gridspec_kw=gridspec_kw,
                                     squeeze=False,
                                     sharex=False,
                                     sharey=False,
                                     figsize=(20, 5))

            refslice = ref
            newslice = new
            bin_names = None
            if n_dims == 3:
                if odd_dim_idx != 0:
                    refslice = np.moveaxis(ref,
                                           source=odd_dim_idx,
                                           destination=0)
                    newslice = np.moveaxis(new,
                                           source=odd_dim_idx,
                                           destination=0)
                bin_names = new.binning.dims[odd_dim_idx].bin_names

            for odd_bin_idx in range(n_third_dim_bins):
                if n_dims == 2:
                    thisbin_ref = refslice
                    thisbin_new = newslice
                    tmp_ref_label = ref_label
                    tmp_new_label = new_label

                elif n_dims == 3:
                    thisbin_ref = refslice[odd_bin_idx, ...].squeeze()
                    thisbin_new = newslice[odd_bin_idx, ...].squeeze()

                    if bin_names is not None:
                        suffix = bin_names[odd_bin_idx]
                    else:
                        suffix = format(odd_bin_idx, 'd')
                    tmp_new_label = new_label + ' ' + suffix
                    tmp_ref_label = ref_label + ' ' + suffix

                    ratio = thisbin_new / thisbin_ref
                    diff = thisbin_new - thisbin_ref
                    fract_diff = diff / thisbin_ref

                refmax = np.nanmax(thisbin_ref.hist)
                newmax = np.nanmax(thisbin_new.hist)
                vmax = refmax if refmax > newmax else newmax

                baseplot2(map=thisbin_new,
                          title=tmp_new_label,
                          vmax=vmax,
                          evtrate=True,
                          ax=axes[odd_bin_idx][0])

                baseplot2(map=thisbin_ref,
                          title=tmp_ref_label,
                          vmax=vmax,
                          evtrate=True,
                          ax=axes[odd_bin_idx][1])

                ax, _, _ = baseplot2(map=ratio,
                                     title='%s/%s' %
                                     (tmp_new_label, tmp_ref_label),
                                     ax=axes[odd_bin_idx][2])
                ax.text(0.95,
                        0.95,
                        "Mean: %.6f" % ratio_mean,
                        horizontalalignment='right',
                        transform=ax.transAxes,
                        color=(0, 0.8, 0.8))
                ax.text(0.95,
                        0.91,
                        "Median: %.6f" % ratio_median,
                        horizontalalignment='right',
                        transform=ax.transAxes,
                        color=(0, 0.8, 0.8))

                ax, _, _ = baseplot2(map=diff,
                                     title='%s-%s' %
                                     (tmp_new_label, tmp_ref_label),
                                     symm=True,
                                     ax=axes[odd_bin_idx][3])
                ax.text(0.95,
                        0.95,
                        "Mean: %.6f" % diff_mean,
                        horizontalalignment='right',
                        transform=ax.transAxes)
                ax.text(0.95,
                        0.91,
                        "Median: %.6f" % diff_median,
                        horizontalalignment='right',
                        transform=ax.transAxes)

                ax, _, _ = baseplot2(
                    map=fract_diff,
                    title='(%s-%s)/%s' %
                    (tmp_new_label, tmp_ref_label, tmp_ref_label),
                    symm=True,
                    ax=axes[odd_bin_idx][4])
                ax.text(0.95,
                        0.95,
                        "Mean: %.6f" % fract_diff_mean,
                        horizontalalignment='right',
                        transform=ax.transAxes)
                ax.text(0.95,
                        0.91,
                        "Median: %.6f" % fract_diff_median,
                        horizontalalignment='right',
                        transform=ax.transAxes)

            logging.debug('>>>> Plot for inspection saved at %s' %
                          os.path.join(*path))
            fig.savefig(os.path.join(*path))
            plt.close(fig.number)

        return max_diff_ratio, max_diff
Exemplo n.º 31
0
set_verbosity(args.verbose)

data_files = {'nue':args.nue,'numu':args.numu,'nutau':args.nutau}

logging.info("input files:\n%s"%data_files)

# Ensure overwrite of existing filename...
outfilename = args.outfile
fh = h5py.File(outfilename,'w')
fh.close()
logging.info("Writing to file: %s",outfilename)

# Define V3, V4, or V5 cuts:
cut_list = []
if args.cutsV3:
    logging.warn("Using cuts V3...")
    cut_list.append(('NewestBgRejCutsStep1','value',True))
    cut_list.append(('NewestBgRejCutsStep2','value',True))
elif args.cutsV4:
    logging.warn("Using cuts V4...")
    cut_list.append(('Cuts_V4_Step1','value',True))
    cut_list.append(('Cuts_V4_Step2','value',True))
elif args.cutsV5:
    logging.warn("Using cuts V5...")
    cut_list.append(('Cuts_V5_Step1','value',True))
    cut_list.append(('Cuts_V5_Step2','value',True))
elif args.nocuts:
    logging.warn("Using no selection cuts!")
    cut_list = []
elif args.custom:
    logging.warn("Using CUSTOM cuts: %s..."%args.custom_str)
Exemplo n.º 32
0
    def get_hypersurface(self, **param_kw):
        """
        Get a Hypersurface object with interpolated coefficients.

        Parameters
        ----------
        **param_kw
            Parameters are given as keyword arguments, where the names
            of the arguments must match the names of the parameters over
            which the hypersurfaces are interpolated. The values
            are given as :obj:`Quantity` objects with units.
        """
        assert set(param_kw.keys()) == set(
            self.interp_param_spec.keys()), "invalid parameters"
        # getting param magnitudes in the same units as the parameter specification
        x = np.array([
            param_kw[p].m_as(self.interp_param_spec[p]["values"][0].u)
            # we have checked that this is an OrderedDict so that the order of x is not
            # ambiguous here
            for p in self.interp_param_spec.keys()
        ])
        assert len(x) == len(self.param_bounds)
        for i, bounds in enumerate(self.param_bounds):
            x[i] = np.clip(x[i], *bounds)
        # if a parameter scales as log, we have to take the log here again
        for i, param_name in enumerate(self.interpolation_param_names):
            if self.interp_param_spec[param_name]["scales_log"]:
                # We must be strict with raising errors here, because otherwise
                # the Hypersurface will suddenly have NaNs everywhere! This shouldn't
                # happen because we clip values into the valid parameter range.
                if x[i] <= 0:
                    raise RuntimeError(
                        "A log-scaling parameter cannot become zero "
                        "or negative!")
                x[i] = np.log10(x[i])

        state = copy.deepcopy(self._reference_state)
        # fit covariance matrices are stored directly in the state while fit coeffts
        # must be assigned with the setter method...
        # need squeeze here because the RegularGridInterpolator always puts another
        # dimension around the output
        state["fit_cov_mat"] = np.squeeze(self.covars(x))
        assert state["fit_cov_mat"].shape == self.covars_shape
        for idx in np.ndindex(state['fit_cov_mat'].shape):
            if self.ignore_nan: continue
            assert np.isfinite(state['fit_cov_mat'][idx]), (
                "invalid cov matrix "
                f"element encountered at {param_kw} in loc {idx}")
        # check covariance matrices for symmetry, positive semi-definiteness
        for bin_idx in np.ndindex(state['fit_cov_mat'].shape[:-2]):
            m = state['fit_cov_mat'][bin_idx]
            if self.ignore_nan and np.any(~np.isfinite(m)):
                state['fit_cov_mat'][bin_idx] = np.identity(m.shape[0])
                m = state['fit_cov_mat'][bin_idx]
            assert np.allclose(
                m, m.T, rtol=ALLCLOSE_KW['rtol'] *
                10.), f'cov matrix not symmetric in bin {bin_idx}'
            if not matrix.is_psd(m):
                state['fit_cov_mat'][bin_idx] = matrix.fronebius_nearest_psd(m)
                if not bin_idx in self.covar_bins_warning_issued:
                    logging.warn(
                        f'Invalid covariance matrix fixed in bin: {bin_idx}')
                    self.covar_bins_warning_issued.append(bin_idx)
        hypersurface = Hypersurface.from_state(state)
        coeffts = np.squeeze(self.coefficients(x))  # calls interpolator
        assert coeffts.shape == self.coeff_shape
        # check that coefficients exist and if not replace with default values
        for idx in np.ndindex(self.coeff_shape):
            if self.ignore_nan and ~np.isfinite(coeffts[idx]):
                coeffts[idx] = 1 if idx[
                    -1] == 0 else 0  # set intercept to 1, slopes 0
            assert np.isfinite(coeffts[idx]), ("invalid coeff encountered at "
                                               f"{param_kw} in loc {idx}")
        # the setter method defined in the Hypersurface class takes care of
        # putting the coefficients in the right place in their respective parameters
        hypersurface.fit_coeffts = coeffts
        return hypersurface
Exemplo n.º 33
0
    def single_kernel_set(self, e_true, cz_true, e_reco, cz_reco,
                          flav, int_type, make_plots=False, out_dir=None):
        """Construct a 4D kernel set from MC events using VBWKDE.

        Given a set of MC events and each of their {energy{true, reco},
        coszen{true, reco}}, generate a 4D NumPy array that maps a 2D true-flux
        histogram onto the corresponding 2D reco-flux histogram.

        The resulting 4D array can be indexed logically using
          kernel4d[e_true_i, cz_true_j][e_reco_k, cz_reco_l]
        where the 4 indices point from a single MC-true histogram bin (i,j) to
        a single reco histogram bin (k,l).

        Binning of both MC-true and reco histograms is the same and is given by
        the values in self.ebins and self.czbins which define the bin *edges*
        (not the bin centers; hence, len(self.ebins) is one greater than the
        number of bins, etc.).

        NOTE: Actual limits in energy used to group events into a single "true"
        bin may be extended beyond the bin edges defined by self.ebins in order
        to gather enough events to successfully apply VBWKDE.

        Parameters
        ----------
        e_true : sequence
            MC-true neutrino energies, one per event
        cz_true : sequence
            MC-true neutrino coszen, one per event
        e_reco : sequence
            Reconstructed neutrino energies, one per event
        cz_reco : sequence
            Reconstructed neutrino coszen, one per event
        flav : str
        int_type : str
        make_plots : bool
        out_dir : str or None
            path to directory into which to save plots. ``None`` (default)
            saves to PWD.

        Returns
        -------
        kernel4d : 4D array of float
            Mapping from the number of events in each bin of the 2D
            MC-true-events histogram to the number of events reconstructed in
            each bin of the 2D reconstructed-events histogram. Dimensions are
              len(self.ebins)-1 x len(self.czbins)-1 x len(self.ebins)-1 x
              len(self.czbins)-1
            since ebins and czbins define the histograms' bin edges.
        """
        OVERFIT_FACTOR = 1.0

        if make_plots:
            import matplotlib as mpl
            import matplotlib.pyplot as plt
            from matplotlib.backends.backend_pdf import PdfPages
            from matplotlib.patches import Rectangle
            plt.close(1)
            plt.close(2)
            plt.close(3)
            def rugplot(a, y0, dy, ax, **kwargs):
                return ax.plot([a,a], [y0, y0+dy], **kwargs)
            plot_fname = '_'.join(['resolutions', 'vbwkde', flav, int_type]) + '.pdf'
            if out_dir is not None:
                plot_fname = os.path.join(out_dir, plot_fname)
            TOP = 0.925
            BOTTOM = 0.05
            RIGHT = 0.97
            LEFT = 0.07
            HSPACE = 0.12
            LABELPAD = 0.058
            AXISBG = (0.5, 0.5, 0.5)
            DARK_RED =  (0.7, 0.0, 0.0)
            HIST_PP = dict(
                facecolor=(1,0.5,0.5), edgecolor=DARK_RED,
                histtype='stepfilled', alpha=0.7, linewidth=2.0,
                label=r'$\mathrm{Histogram}$'
            )
            N_HBINS = 25
            DIFFUS_PP = dict(
                color=(0.0, 0.0, 0.0), linestyle='-', marker=None, alpha=0.6,
                linewidth=2.0, label=r'$\mathrm{VBWKDE}$'
            )
            RUG_PP = dict(color=(1.0, 1.0, 1.0), linewidth=0.4, alpha=0.5)
            RUG_LAB =r'$\mathrm{Rug\,plot}$'
            LEGFNTCOL = (1,1,1)
            LEGFACECOL = (0.2,0.2,0.2)
            GRIDCOL = (0.4, 0.4, 0.4)
            pdfpgs = PdfPages(plot_fname)

        assert np.min(np.diff(self.ebins)) > 0, \
            "Energy bin edges not monotonically increasing."
        assert np.min(np.diff(self.czbins)) > 0, \
            "coszen bin edges not monotonically increasing."

        # NOTE: below defines bin centers on linear scale; other logic
        # in this method assumes this to be the case, so
        # **DO NOT USE** utils.utils.get_bin_centers in this method, which
        # may return logarithmically-defined centers instead.

        ebin_edges = np.array(self.ebins)
        left_ebin_edges = ebin_edges[0:-1]
        right_ebin_edges = ebin_edges[1:]
        ebin_centers = (left_ebin_edges+right_ebin_edges)/2.0
        ebin_range = ebin_edges[-1] - ebin_edges[0]
        n_ebins = len(ebin_centers)

        czbin_edges = np.array(self.czbins)
        left_czbin_edges = czbin_edges[0:-1]
        right_czbin_edges = czbin_edges[1:]
        czbin_centers = (left_czbin_edges+right_czbin_edges)/2.0
        n_czbins = len(czbin_centers)

        n_events = len(e_true)

        if self.MIN_NUM_EVENTS > n_events:
            self.MIN_NUM_EVENTS = n_events
        if self.TGT_NUM_EVENTS > n_events:
            self.TGT_NUM_EVENTS = n_events

        # Object with which to store the 4D kernels: np 4D array
        kernel4d = np.zeros((n_ebins, n_czbins, n_ebins, n_czbins))

        # Object with which to store the 2D "aggregate_map": the total number
        # of events reconstructed into a given (E, CZ) bin, used for sanity
        # checks
        aggregate_map = np.zeros((n_ebins, n_czbins))
        for ebin_n in range(n_ebins):
            ebin_min = left_ebin_edges[ebin_n]
            ebin_max = right_ebin_edges[ebin_n]
            ebin_mid = (ebin_min+ebin_max)/2.0
            ebin_wid = ebin_max-ebin_min

            logging.debug(
                'Processing true-energy bin_n=' + format(ebin_n, 'd') + ' of ' +
                format(n_ebins-1, 'd') + ', E_{nu,true} in ' +
                '[' + format(ebin_min, '0.3f') + ', ' +
                format(ebin_max, '0.3f') + '] ...'
            )

            # Absolute distance from these events' re-centered reco energies to
            # the center of this energy bin; sort in ascending-distance order
            abs_enu_dist = np.abs(e_true - ebin_mid)
            sorted_abs_enu_dist = np.sort(abs_enu_dist)

            # Grab the distance the number-"TGT_NUM_EVENTS" event is from the
            # bin center
            tgt_thresh_enu_dist = sorted_abs_enu_dist[self.TGT_NUM_EVENTS-1]

            # Grab the distance the number-"MIN_NUM_EVENTS" event is from the
            # bin center
            min_thresh_enu_dist = sorted_abs_enu_dist[self.MIN_NUM_EVENTS-1]

            # TODO: revisit the below algorithm with proper testing

            # Make threshold distance (which is half the total width) no more
            # than 4x the true-energy-bin width in order to capture the
            # "target" number of points (TGT_NUM_EVENTS) but no less than half
            # the bin width (i.e., the bin should be at least be as wide as the
            # pre-defined bin width).
            #
            # HOWEVER, allow the threshold distance (bin half-width) to expand
            # to as much as 4x the original bin full-width in order to capture
            # the "minimum" number of points (MIN_NUM_EVENTS).
            thresh_enu_dist = \
                    max(min(max(tgt_thresh_enu_dist, ebin_wid/2),
                            4*ebin_wid),
                        min_thresh_enu_dist)

            # Grab all events within the threshold distance
            in_ebin_ind = np.where(abs_enu_dist <= thresh_enu_dist)[0]
            #print '** IN EBIN FIRST, LAST ENERGY:', e_reco[in_ebin_ind[0]], e_reco[in_ebin_ind[-1]]
            n_in_bin = len(in_ebin_ind)

            # Record lowest/highest energies that are included in the bin
            actual_left_ebin_edge = min(ebin_min, min(e_true[in_ebin_ind])) #max(min(ebins), ebin_mid-thresh_enu_dist)
            actual_right_ebin_edge = max(ebin_max, max(e_true[in_ebin_ind])) #(max(ebins), ebin_mid+thresh_enu_dist)

            # Extract just the neutrino-energy/coszen error columns' values for
            # succinctness
            enu_err = e_reco[in_ebin_ind] - e_true[in_ebin_ind]
            cz_err = cz_reco[in_ebin_ind] - cz_true[in_ebin_ind]

            #==================================================================
            # Neutrino energy resolutions
            #==================================================================
            dmin = min(enu_err)
            dmax = max(enu_err)
            drange = dmax-dmin

            e_lowerlim = min(self.ENERGY_RANGE[0]-ebin_mid*1.5, dmin-drange*0.5)
            e_upperlim = max((np.max(ebin_edges)-ebin_mid)*1.5, dmax+drange*0.5)
            egy_kde_lims = np.array([e_lowerlim, e_upperlim])

            # Use at least min_num_pts points and at most the next-highest
            # integer-power-of-two that allows for at least 10 points in the
            # smallest energy bin
            min_num_pts = 2**12
            min_bin_width = np.min(ebin_edges[1:]-ebin_edges[:-1])
            min_pts_smallest_bin = 5.0
            kde_range = np.diff(egy_kde_lims)
            num_pts0 = kde_range/(min_bin_width/min_pts_smallest_bin)
            kde_num_pts = int(max(min_num_pts, 2**np.ceil(np.log2(num_pts0))))
            logging.debug(
                '  N_evts=' + str(n_in_bin) + ', taken from [' +
                format(actual_left_ebin_edge, '0.3f') + ', ' +
                format(actual_right_ebin_edge, '0.3f') + ']' + ', VBWKDE lims=' +
                str(egy_kde_lims) + ', VBWKDE_N: ' + str(kde_num_pts)
            )

            # Compute variable-bandwidth KDEs
            enu_bw, enu_mesh, enu_pdf = kde.vbw_kde(
                data           = enu_err,
                overfit_factor = OVERFIT_FACTOR,
                MIN            = egy_kde_lims[0],
                MAX            = egy_kde_lims[1],
                N              = kde_num_pts
            )

            if np.min(enu_pdf) < 0:
                # Only issue warning if the most-negative value is negative
                # beyond specified acceptable-numerical-precision threshold
                # (EPSILON)
                if np.min(enu_pdf) <= -self.EPSILON:
                    logging.warn(
                        "np.min(enu_pdf) < 0: Minimum value is " +
                        str(np.min(enu_pdf)) +
                        "; forcing all negative values to 0."
                    )
                # Otherwise, just quietly clip any negative values at 0
                enu_pdf = np.clip(a=enu_pdf, a_min=0, a_max=np.inf)

            assert np.min(enu_pdf) >= 0, str(np.min(enu_pdf))

            # Re-center distribution at the center of the energy bin for which
            # errors were computed
            offset_enu_mesh = enu_mesh+ebin_mid
            offset_enu_pdf = enu_pdf

            # Get reference area under the PDF, for checking after interpolated
            # values are added.
            #
            # NOTE There should be NO normalization because any events lost due
            # to cutting off tails outside the binned region are actually going
            # to be lost, and so should penalize the total area.
            int_val0 = np.trapz(y=offset_enu_pdf,
                                x=offset_enu_mesh)

            # Create linear interpolator for the PDF
            interp = interpolate.interp1d(
                x             = offset_enu_mesh,
                y             = offset_enu_pdf,
                kind          = 'linear',
                copy          = True,
                bounds_error  = True,
                fill_value    = np.nan
            )

            # Insert all bin edges' exact locations into the mesh (For accurate
            # accounting of area in each bin, must include values out to bin
            # edges)
            edge_locs = [be for be in
                         np.concatenate((left_ebin_edges, right_ebin_edges))
                         if not(be in offset_enu_mesh)]
            edge_locs.sort()
            edge_pdfs = interp(edge_locs)
            insert_ind = np.searchsorted(offset_enu_mesh, edge_locs)
            offset_enu_mesh = np.insert(offset_enu_mesh, insert_ind, edge_locs)
            offset_enu_pdf = np.insert(offset_enu_pdf, insert_ind, edge_pdfs)

            int_val = np.trapz(y=offset_enu_pdf, x=offset_enu_mesh)

            assert np.abs(int_val - int_val0) < self.EPSILON

            # Chop off distribution at extrema of energy bins
            valid_ind = np.where(
                (offset_enu_mesh >= np.min(ebin_edges)) &
                (offset_enu_mesh <= np.max(ebin_edges))
            )[0]
            offset_enu_mesh = offset_enu_mesh[valid_ind]
            offset_enu_pdf = offset_enu_pdf[valid_ind]

            # Check that there are no negative density values (after inserts)
            assert np.min(offset_enu_pdf) > 0-self.EPSILON, \
                str(np.min(offset_enu_pdf))

            # Record the integrated area after removing parts outside binned
            # range
            tot_ebin_area0 = np.trapz(y=offset_enu_pdf,
                                      x=offset_enu_mesh)

            # Check that it integrates to <= 1, sanity check
            assert tot_ebin_area0 < 1+self.EPSILON, str(tot_ebin_area0)

            # Identify indices encapsulating the defined energy bins' ranges,
            # and find the area of each bin
            lbinds = np.searchsorted(offset_enu_mesh, left_ebin_edges)
            rbinds = np.searchsorted(offset_enu_mesh, right_ebin_edges)
            bininds = zip(lbinds, rbinds)
            ebin_areas = [np.trapz(y=offset_enu_pdf[l:r+1],
                                   x=offset_enu_mesh[l:r+1])
                          for (l, r) in bininds]

            # Check that no bins have negative areas
            assert np.min(ebin_areas) >= 0

            # Sum the individual bins' areas
            tot_ebin_area = np.sum(ebin_areas)

            # Check that this total of all the bins is equal to the total area
            # under the curve (i.e., make sure there is no overlap or gaps
            # between bins)
            assert np.abs(tot_ebin_area-tot_ebin_area0) < self.EPSILON, \
                    'tot_ebin_area=' + str(tot_ebin_area) + \
                    ' should equal tot_ebin_area0=' + str(tot_ebin_area0)

            if make_plots:
                fig1 = plt.figure(1, figsize=(8,10), dpi=90)
                fig1.clf()
                ax1 = fig1.add_subplot(211, axisbg=AXISBG)

                # Retrieve region where VBWKDE lives
                ml_ci = confInterval.MLConfInterval(x=enu_mesh, y=enu_pdf)
                #for conf in np.logspace(np.log10(0.999), np.log10(0.95), 50):
                #    try:
                #        lb, ub, yopt, r = ml_ci.findCI_lin(conf=conf)
                #    except:
                #        pass
                #    else:
                #        break
                #xlims = (min(-ebin_mid*1.5, lb),
                #         max(min(ub, 6*ebin_mid),2*ebin_mid))
                lb, ub, yopt, r = ml_ci.findCI_lin(conf=0.98)
                xlims = (lb, #min(-ebin_mid*1.5, lb),
                         max(min(ub, 6*ebin_mid),2*ebin_wid))

                #xlims = (
                #    -ebin_wid*1.5,
                #    ebin_wid*1.5
                #)
                #    min(ebin_mid*2, ebin_edges[-1]+(ebin_edges[-1]-ebin_edges[0])*0.1)
                #)

                # Histogram of events' reco error
                hbins = np.linspace(dmin-0.02*drange, dmax+0.02*drange,
                                    N_HBINS*np.round(drange/ebin_centers[ebin_n]))
                hvals, hbins, hpatches = ax1.hist(enu_err,
                                                  bins=hbins,
                                                  normed=True,
                                                  **HIST_PP)

                # Plot the VBWKDE
                ax1.plot(enu_mesh, enu_pdf, **DIFFUS_PP)
                axlims = ax1.axis('tight')
                ax1.set_xlim(xlims)
                ymax = axlims[3]*1.05
                ax1.set_ylim(0, ymax)

                # Grey-out regions outside binned region, so it's clear what
                # part of tail(s) will be thrown away
                width = -ebin_mid+ebin_edges[0]-xlims[0]
                unbinned_region_tex = r'$\mathrm{Unbinned}$'
                if width > 0:
                    ax1.add_patch(Rectangle((xlims[0],0), width, ymax, #zorder=-1,
                                            alpha=0.30, facecolor=(0.0 ,0.0, 0.0), fill=True,
                                            ec='none'))
                    ax1.text(xlims[0]+(xlims[1]-xlims[0])/40., ymax/10.,
                             unbinned_region_tex, fontsize=14, ha='left',
                             va='bottom', rotation=90, color='k')
                
                width = xlims[1] - (ebin_edges[-1]-ebin_mid)
                if width > 0:
                    ax1.add_patch(Rectangle((xlims[1]-width,0), width, ymax,
                                            alpha=0.30, facecolor=(0, 0, 0),
                                            fill=True, ec='none'))
                    ax1.text(xlims[1]-(xlims[1]-xlims[0])/40., ymax/10.,
                             unbinned_region_tex, fontsize=14, ha='right',
                             va='bottom', rotation=90, color='k')

                # Rug plot of events' reco energy errors
                ylim = ax1.get_ylim()
                dy = ylim[1] - ylim[0]
                ruglines = rugplot(enu_err, y0=ylim[1], dy=-dy/40., ax=ax1,
                                   **RUG_PP)
                ruglines[-1].set_label(RUG_LAB)

                # Legend
                leg_title_tex = r'$\mathrm{Normalized}\,E_\nu\mathrm{-err.\,distr.}$'
                x1lab = ax1.set_xlabel(
                    r'$E_{\nu,\mathrm{reco}}-E_{\nu,\mathrm{true}}\;' +
                    r'(\mathrm{GeV})$', labelpad=LABELPAD
                )
                leg = ax1.legend(loc='upper right', title=leg_title_tex,
                                 frameon=True, framealpha=0.8,
                                 fancybox=True, bbox_to_anchor=[1,0.975])

                # Other plot details
                ax1.xaxis.set_label_coords(0.9, -LABELPAD)
                ax1.xaxis.grid(color=GRIDCOL)
                ax1.yaxis.grid(color=GRIDCOL)
                leg.get_title().set_fontsize(16)
                leg.get_title().set_color(LEGFNTCOL)
                [t.set_color(LEGFNTCOL) for t in leg.get_texts()]
                frame = leg.get_frame()
                frame.set_facecolor(LEGFACECOL)
                frame.set_edgecolor(None)

            #==================================================================
            # Neutrino coszen resolution for events in this energy bin
            #==================================================================
            dmin = min(cz_err)
            dmax = max(cz_err)
            drange = dmax-dmin

            # NOTE the limits are 1 less than / 1 greater than the limits that
            # the error will actually take on, so as to allow for any smooth
            # roll-off at edges of data. The calculation of areas below
            # captures all of the area, though, by reflecting bins defined in
            # [-1, 1] about the points -1 and 1, thereby capturing any
            # densities in the range [-3, +3]. This is not necessarily
            # accurate, but it's better than throwing that info out entirely.
            #
            # NOTE also that since reco events as of now are only in range -1
            # to 0, though, that there are "gaps" in the capture range, but
            # this is due to densities being in the upper-hemisphere which we
            # are intentionally ignoring, rather than the code here not taking
            # them into account. Normalization is based upon *all* events,
            # whether or not they fall within a bin specified above.

            # Number of points in the mesh used for VBWKDE; must be large
            # enough to capture fast changes in the data but the larger the
            # number, the longer it takes to compute the densities at all the
            # points. Here, just choosing a fixed number regardless of the data
            # or binning
            N_cz_mesh = 2**10

            # Data range for VBWKDE to consider
            cz_kde_min = -3
            cz_kde_max = +2

            cz_kde_failed = False
            previous_fail = False
            for n in xrange(3):
                # TODO: only catch specific exception
                try:
                    cz_bw, cz_mesh, cz_pdf = kde.vbw_kde(
                        data           = cz_err,
                        overfit_factor = OVERFIT_FACTOR,
                        MIN            = cz_kde_min,
                        MAX            = cz_kde_max,
                        N              = N_cz_mesh
                    )
                except:
                    cz_kde_failed = True
                    if n == 0:
                        logging.trace('(cz vbwkde ')
                    logging.trace('fail, ')
                    # If failure occurred in vbw_kde, expand the data range it
                    # takes into account; this usually helps
                    cz_kde_min -= 1
                    cz_kde_max += 1
                else:
                    if cz_kde_failed:
                        previous_fail = True
                        logging.trace('success!')
                    cz_kde_failed = False
                finally:
                    if previous_fail:
                        logging.trace(')')
                    previous_fail = False
                    if not cz_kde_failed:
                        break

            if cz_kde_failed:
                logging.warn('Failed to fit VBWKDE!')
                continue

            if np.min(cz_pdf) < 0:
                logging.warn("np.min(cz_pdf) < 0: Minimum value is " +
                             str(np.min(cz_pdf)) +
                             "; forcing all negative values to 0.")
                np.clip(a=cz_mesh, a_min=0, a_max=np.inf)

            assert np.min(cz_pdf) >= -self.EPSILON, \
                str(np.min(cz_pdf))

            # TODO: test and/or visualize the shifting & re-binning process
            for czbin_n in range(n_czbins):
                czbin_mid = czbin_centers[czbin_n]

                # Re-center distribution at the center of the current cz bin
                offset_cz_mesh = cz_mesh + czbin_mid

                # Create interpolation object, used to fill in bin edge values
                interp = interpolate.interp1d(
                    x             = offset_cz_mesh,
                    y             = cz_pdf,
                    kind          = 'linear',
                    copy          = True,
                    bounds_error  = False,
                    fill_value    = 0
                )

                # Figure out where all bin edges lie in this re-centered
                # distribution (some bins may be repeated since bins in [-1,0]
                # and err in [-2,1]:
                #
                # 1. Find limits of mesh values..
                mmin = offset_cz_mesh[0]
                mmax = offset_cz_mesh[-1]

                # 2. Map all bin edges into the full mesh-value range,
                # reflecting about -1 and +1. If the reflected edge is outside
                # the mesh range, use the exceeded limit of the mesh range as
                # the bin edge instead.
                #
                # This maps every bin edge {i} to 3 new edges, indexed
                # new_edges[i][{0,1,2}]. Bins are formed by adjacent indices
                # and same-subindices, so what started as, e.g., bin 3 now is
                # described by (left, right) edges at
                #   (new_edges[3][0], new_edges[4][0]),
                #   (new_edges[3][1], new_edges[4][1]), and
                #   (new_edges[3][2], new_edges[4][2]).

                # NOTE / TODO: It's tempting to dynamically set the number of
                # reflections to minimize computation time, but I think it
                # breaks the code. Just set to a reasonably large number for
                # now and accept the performance penalty. ALSO: if you change
                # the parity of the number of reflections, the code below that
                # has either (wrap_n % 2 == 0) or (wrap_n+1 % 2 == 0) must be
                # swapped!!!
                n_left_reflections = 4
                n_right_reflections = 4

                new_czbin_edges = []
                for edge in czbin_edges:
                    edges_refl_left = []
                    for n in xrange(n_left_reflections):
                        edge_refl_left = reflect1d(edge, -1-(2*n))
                        if edge_refl_left < mmin:
                            edge_refl_left = mmin
                        edges_refl_left.append(edge_refl_left)
                    edges_refl_right = []
                    for n in xrange(n_right_reflections):
                        edge_refl_right = reflect1d(edge, +1+(2*n))
                        if edge_refl_right > mmax:
                            edge_refl_right = mmax
                        edges_refl_right.append(edge_refl_right)
                    # Include all left-reflected versions of this bin edge, in
                    # increasing-x order + this bin edge + right-reflected
                    # versions of this bin edge
                    new_czbin_edges.append(edges_refl_left[::-1] + [edge]
                                           + edges_refl_right)

                # Record all unique bin edges
                edge_locs = set()
                [edge_locs.update(edges) for edges in new_czbin_edges]

                # Throw away bin edges that are already in the mesh
                [edge_locs.remove(edge) for edge in list(edge_locs)
                 if edge in offset_cz_mesh]

                # Make into sorted list
                edge_locs = sorted(edge_locs)

                # Record the total area under the curve
                int_val0 = np.trapz(y=cz_pdf, x=offset_cz_mesh)

                # Insert the missing bin edge locations & pdf-values into
                # the mesh & pdf, respectively
                edge_pdfs = interp(edge_locs)
                insert_ind = np.searchsorted(offset_cz_mesh, edge_locs)
                offset_cz_mesh = np.insert(offset_cz_mesh, insert_ind,
                                           edge_locs)
                offset_cz_pdf = np.insert(cz_pdf, insert_ind, edge_pdfs)
                assert np.min(offset_cz_pdf) > -self.EPSILON

                # Check that this total of all the bins is equal to the total
                # area under the curve (i.e., check there is no overlap between
                # or gaps between bins)
                int_val = np.trapz(y=offset_cz_pdf, x=offset_cz_mesh)
                assert np.abs(int_val-1) < self.EPSILON

                # Renormalize if it's not exactly 1
                if int_val != 1.0:
                    offset_cz_pdf = offset_cz_pdf / int_val

                # Add up the area in the bin and areas that are "reflected"
                # into this bin
                new_czbin_edges = np.array(new_czbin_edges)
                czbin_areas = np.zeros(np.shape(new_czbin_edges)[0]-1)
                for wrap_n in range(np.shape(new_czbin_edges)[1]):
                    bin_edge_inds = np.searchsorted(offset_cz_mesh,
                                                    new_czbin_edges[:,wrap_n])
                    lbinds = bin_edge_inds[0:-1]
                    rbinds = bin_edge_inds[1:]
                    # Make sure indices that appear first are less than indices
                    # that appear second in a pair of bin indices
                    if (wrap_n+1) % 2 == 0:
                        bininds = zip(rbinds, lbinds)
                    else:
                        bininds = zip(lbinds, rbinds)
                    tmp_areas = []
                    for (binind_left_edge, binind_right_edge) in bininds:
                        if binind_left_edge == binind_right_edge:
                            tmp_areas.append(0)
                            continue
                        this_bin_area = np.array(np.trapz(
                            y=offset_cz_pdf[binind_left_edge:binind_right_edge+1],
                            x=offset_cz_mesh[binind_left_edge:binind_right_edge+1]
                        ))
                        tmp_areas.append(this_bin_area)
                    czbin_areas += np.array(tmp_areas)

                assert np.min(czbin_areas) > -self.EPSILON

                tot_czbin_area = np.sum(czbin_areas)
                assert tot_czbin_area < int_val + self.EPSILON

                kernel4d[ebin_n, czbin_n] = np.outer(ebin_areas, czbin_areas)
                assert (np.sum(kernel4d[ebin_n, czbin_n]) -
                        tot_ebin_area*tot_czbin_area) < self.EPSILON

            if make_plots:
                ax2 = fig1.add_subplot(212, axisbg=AXISBG)
                hbins = np.linspace(dmin-0.02*drange, dmax+0.02*drange, N_HBINS*3)
                hvals, hbins, hpatches = ax2.hist(cz_err, bins=hbins,
                                                  normed=True, **HIST_PP)
                ax2.plot(cz_mesh, cz_pdf, **DIFFUS_PP)
                fci = confInterval.MLConfInterval(x=cz_mesh,
                                                  y=cz_pdf)
                lb, ub, yopt, r = fci.findCI_lin(conf=0.995)
                axlims = ax2.axis('tight')
                ax2.set_xlim(lb, ub)
                ax2.set_ylim(0, axlims[3]*1.05)

                ylim = ax2.get_ylim()
                dy = ylim[1] - ylim[0]
                ruglines = rugplot(cz_err, y0=ylim[1], dy=-dy/40., ax=ax2, **RUG_PP)
                ruglines[-1].set_label(r'$\mathrm{Rug\,plot}$')

                x2lab = ax2.set_xlabel(
                    r'$\cos\vartheta_{\mathrm{track,reco}}-\cos\vartheta_{\nu,\mathrm{true}}$',
                    labelpad=LABELPAD
                )
                ax2.xaxis.set_label_coords(0.9, -LABELPAD)
                ax2.xaxis.grid(color=GRIDCOL)
                ax2.yaxis.grid(color=GRIDCOL)
                leg_title_tex = r'$\mathrm{Normalized}\,\cos\vartheta\mathrm{-err.\,distr.}$'
                leg = ax2.legend(loc='upper right', title=leg_title_tex,
                                 frameon=True, framealpha=0.8, fancybox=True,
                                 bbox_to_anchor=[1,0.975])
                leg.get_title().set_fontsize(16)
                leg.get_title().set_color(LEGFNTCOL)
                [t.set_color(LEGFNTCOL) for t in leg.get_texts()]
                frame = leg.get_frame()
                frame.set_facecolor(LEGFACECOL)
                frame.set_edgecolor(None)

                actual_bin_tex = ''
                if (actual_left_ebin_edge != ebin_min) or (actual_right_ebin_edge != ebin_max):
                    actual_bin_tex = r'E_{\nu,\mathrm{true}}\in [' + \
                            format(actual_left_ebin_edge, '0.2f') + r',\,' + \
                            format(actual_right_ebin_edge, '0.2f') + r'] \mapsto '
                stt = r'$\mathrm{Resolutions,\,' + flav_tex(flav) + r'\,' + \
                        int_tex(int_type) + r'}$' + '\n' + \
                        r'$' + actual_bin_tex + r'\mathrm{Bin}_{' + format(ebin_n, 'd') + r'}\equiv E_{\nu,\mathrm{true}}\in [' + format(ebin_min, '0.2f') + \
                        r',\,' + format(ebin_max, '0.2f') + r']\,\mathrm{GeV}' + \
                        r',\,N_\mathrm{events}=' + format(n_in_bin, 'd') + r'$'
                
                fig1.subplots_adjust(top=TOP, bottom=BOTTOM, left=LEFT, right=RIGHT, hspace=HSPACE)
                suptitle = fig1.suptitle(stt)
                suptitle.set_fontsize(16)
                suptitle.set_position((0.5,0.98))
                fig1.savefig(pdfpgs, format='pdf')

        check_areas = kernel4d.sum(axis=(2,3))

        assert np.max(check_areas) < 1 + self.EPSILON, str(np.max(check_areas))
        assert np.min(check_areas) > 0 - self.EPSILON, str(np.min(check_areas))

        if make_plots:
            fig2 = plt.figure(2, figsize=(8,10), dpi=90)
            fig2.clf()
            ax = fig2.add_subplot(111)
            X, Y = np.meshgrid(range(n_czbins), range(n_ebins))
            cm = mpl.cm.Paired_r
            cm.set_over((1,1,1), 1)
            cm.set_under((0,0,0), 1)
            plt.pcolor(X, Y, check_areas, vmin=0+self.EPSILON, vmax=1.0,
                       shading='faceted', cmap=cm)
            plt.colorbar(ticks=np.arange(0, 1.05, 0.05))
            ax.grid(0)
            ax.axis('tight')
            ax.set_xlabel(r'$\cos\vartheta_\mathrm{true}\mathrm{\,bin\,num.}$')
            ax.set_ylabel(r'$E_{\nu,\mathrm{true}}\mathrm{\,bin\,num.}$')
            ax.set_title(r'$\mathrm{Fract\,of\,evts\,starting\,in\,each}\,(E_{\nu,\mathrm{true}},\,\cos\vartheta_\mathrm{true})\,\mathrm{bin\,that\,reco\,in\,bounds}$'+
                 '\n'+r'$\mathrm{None\,should\,be\,>1\,(shown\,white);\,no-event\,bins\,are\,black;\,avg.}=' + format(np.mean(check_areas),'0.3f') + r'$')
            fig2.tight_layout()
            fig2.savefig(pdfpgs, format='pdf')

            check_areas2 = kernel4d.sum(axis=(0,1))
            fig3 = plt.figure(2, figsize=(8,10), dpi=90)
            fig3.clf()
            ax = fig3.add_subplot(111)
            X, Y = np.meshgrid(range(n_czbins), range(n_ebins))
            cm = mpl.cm.Paired_r
            cm.set_over((1,1,1), 1)
            cm.set_under((0,0,0), 1)
            plt.pcolor(X, Y, check_areas2, vmin=0+self.EPSILON,# vmax=1.0,
                       shading='faceted', cmap=cm)
            plt.colorbar(ticks=np.arange(0, 0.1+np.ceil(10.*np.max(check_areas2))/10., 0.05))
            ax.grid(0)
            ax.axis('tight')
            ax.set_xlabel(r'$\cos\vartheta_\mathrm{reco}\mathrm{\,bin\,num.}$')
            ax.set_ylabel(r'$E_{\nu,\mathrm{reco}}\mathrm{\,bin\,num.}$')
            ax.set_title(r'$\mathrm{Normed\,num\,events\,reconstructing\,into\,each}\,(E_{\nu,\mathrm{reco}},\,\cos\vartheta_\mathrm{reco})\,\mathrm{bin}$'+
                 '\n'+r'$\mathrm{No-event\,bins\,are\,black;\,avg.}=' + format(np.mean(check_areas2),'0.3f') + r'$')
            fig3.tight_layout()
            fig3.savefig(pdfpgs, format='pdf')

            pdfpgs.close()

        return kernel4d
Exemplo n.º 34
0
    # the asimov data set:
    for step in steplist:

        print "Running at asimov parameters: %s"%step
        asimov_params = get_values(getAsimovParams(params,true_normal,step))
        asimov_data_set = get_asimov_fmap(
            template_maker, asimov_params,
            chan=asimov_params['channel'])

        # Store injected true values in result:
        for key in free_params.keys():
            if 'theta23' in key: continue
            result['true_'+key].append(asimov_params[key])
        result['true_theta23'].append(step)

        result['asimov_data'].append(asimov_data_set)

        # now get fitted values of opposite hierarchy:
        hypo_normal = False if true_normal else True
        hypo_tag = 'hypo_IMH' if true_normal else 'hypo_NMH'
        llh_data = find_alt_hierarchy_fit(
            asimov_data_set,template_maker, params, hypo_normal,
            minimizer_settings, only_atm_params=False, check_octant=args.check_octant)

        for key in free_params.keys(): result['fit_'+key].append(llh_data[key][-1])

    results[true_tag] = result

logging.warn("FINISHED. Saving to file: %s"%args.outfile)
to_json(results,args.outfile)
Exemplo n.º 35
0
def make_toy_events(outdir, num_events, energy_range, spectral_index,
                    coszen_range, num_sets, first_set, aeff_energy_param,
                    aeff_coszen_param, reco_param, pid_param, pid_dist):
    """Make toy events and store to a file.

    Parameters
    ----------
    outdir : string
    num_events : int
    energy_range : 2-tuple of floats
    spectral_index : float
    coszen_range : 2-tuple of floats
    num_sets : int
    first_set : int
    aeff_energy_param : string
    aeff_coszen_param : string
    reco_param : string
    pid_param : string
    pid_dist : string

    Returns
    -------
    events : :class:`pisa.core.events.Events`

    """
    energy_range = sorted(energy_range)
    coszen_range = sorted(coszen_range)

    # Validation of args
    assert energy_range[0] > 0 and energy_range[1] < 1e9
    assert coszen_range[0] >= -1 and coszen_range[1] <= 1
    assert np.diff(energy_range)[0] > 0, str(energy_range)
    assert np.diff(coszen_range)[0] > 0, str(coszen_range)
    assert spectral_index >= 0, str(spectral_index)
    assert first_set >= 0, str(first_set)
    assert num_sets >= 1, str(first_set)

    # Make sure resources specified actually exist
    for arg in [aeff_energy_param, aeff_coszen_param, reco_param, pid_param]:
        find_resource(arg)

    mkdir(outdir, warn=False)

    set_indices = list(range(first_set, first_set + num_sets))

    # The following loop is for validation only
    for num, index in product(num_events, set_indices):
        mcgen_random_state(num_events=num, set_index=index)

    for num, set_index in product(num_events, set_indices):
        mcevts_fname = FNAME_TEMPLATE.format(
            file_type='events',
            detector='vlvnt',
            e_min=format_num(energy_range[0]),
            e_max=format_num(energy_range[1]),
            spectral_index=format_num(spectral_index,
                                      sigfigs=2,
                                      trailing_zeros=True),
            cz_min=format_num(coszen_range[0]),
            cz_max=format_num(coszen_range[1]),
            num_events=format_num(num, sigfigs=3, sci_thresh=(1, -1)),
            set_index=format_num(set_index, sci_thresh=(10, -10)),
            extension='hdf5')
        mcevts_fpath = os.path.join(outdir, mcevts_fname)
        if os.path.isfile(mcevts_fpath):
            logging.warn('File already exists, skipping: "%s"', mcevts_fpath)
            continue

        logging.info('Working on set "%s"', mcevts_fname)

        # TODO: pass filepaths / resource locations via command line args

        # Create a single random state object to pass from function to function
        random_state = mcgen_random_state(num_events=num, set_index=set_index)

        mc_events = generate_mc_events(
            num_events=num,
            energy_range=energy_range,
            coszen_range=coszen_range,
            spec_ind=spectral_index,
            aeff_energy_param_source=aeff_energy_param,
            aeff_coszen_param_source=aeff_coszen_param,
            random_state=random_state)
        populate_reco_observables(mc_events=mc_events,
                                  param_source=reco_param,
                                  random_state=random_state)
        populate_pid(mc_events=mc_events,
                     param_source=pid_param,
                     random_state=random_state,
                     dist=pid_dist)

        to_file(mc_events, mcevts_fpath)

        return mc_events
Exemplo n.º 36
0
                    help="Output filename.")
parser.add_argument('-v', '--verbose', action='count', default=None,
                    help='set verbosity level')
args = parser.parse_args()

set_verbosity(args.verbose)

#Read in the settings
template_settings = from_json(args.template_settings)
minimizer_settings  = from_json(args.minimizer_settings)
pseudo_data_settings = from_json(args.pseudo_data_settings) if args.pseudo_data_settings is not None else template_settings

#Workaround for old scipy versions
import scipy
if scipy.__version__ < '0.12.0':
    logging.warn('Detected scipy version %s < 0.12.0'%scipy.__version__)
    if 'maxiter' in minimizer_settings:
      logging.warn('Optimizer settings for \"maxiter\" will be ignored')
      minimizer_settings.pop('maxiter')


# make sure that both pseudo data and template are using the same
# channel. Raise Exception and quit otherwise
channel = template_settings['params']['channel']['value']
if channel != pseudo_data_settings['params']['channel']['value']:
    error_msg = "Both template and pseudo data must have same channel!\n"
    error_msg += " pseudo_data_settings chan: '%s', template chan: '%s' "%(pseudo_data_settings['params']['channel']['value'],channel)
    raise ValueError(error_msg)

if args.gpu_id is not None:
    template_settings['params']['gpu_id'] = {}
Exemplo n.º 37
0
def parse_fit_config(fit_cfg):
    """Perform sanity checks on and parse fit configuration file.

    Parameters
    ----------
    fit_cfg : str
        path to a fit configuration file

    Returns
    -------
    fit_cfg : PISAConfigParser
        parsed fit configuration
    sys_list : list of str
        parsed names of systematic parameters
    units_list : list of str
        units corresponding to each discrete systematic
    combine_regex : list of str
        each string is a regular expression for combining pipeline outputs; see
        :func:`pisa.core.map.MapSet.combine_regex` for details.

    """
    fit_cfg = from_file(fit_cfg)
    no_ws_section_map = {s.strip(): s for s in fit_cfg.sections()}

    if GENERAL_SECTION_NAME not in no_ws_section_map.values():
        raise KeyError('Fit config is missing the "%s" section!' %
                       GENERAL_SECTION_NAME)

    general_section = fit_cfg[GENERAL_SECTION_NAME]
    if SYS_LIST_OPTION not in general_section:
        raise KeyError(
            "Fit config has to specify systematic parameters as"
            ' "%s" option in "%s" section (comma-separated list of names).' %
            (SYS_LIST_OPTION, GENERAL_SECTION_NAME))

    sys_list = [s.strip() for s in general_section[SYS_LIST_OPTION].split(",")]

    if UNITS_OPTION in general_section:
        units_list = []
        units_specs = (general_section[UNITS_OPTION].replace(
            UNITS_SPECIFIER, "").split(","))
        for units_spec in units_specs:
            # Make sure units are interpret-able by Pint
            try:
                ureg.Unit(units_spec)
            except:
                logging.error(
                    'Unit "%s" specified by "%s" option in "general" section is not'
                    "interpret-able by Pint",
                    units_spec,
                    UNITS_OPTION,
                )
                raise
            units_list.append(units_spec)
    else:
        units_list = ["dimensionless" for s in sys_list]
        logging.warn(
            "No %s option found in %s section; assuming systematic parameters are"
            " dimensionless",
            UNITS_OPTION,
            GENERAL_SECTION_NAME,
        )

    if len(units_list) != len(sys_list):
        raise ValueError(
            '{} units specified by "{}" option but {} systematics specified by "{}"'
            "option; must be same number of each.".format(
                len(units_list), UNITS_OPTION, len(sys_list), SYS_LIST_OPTION))

    logging.info(
        "Found systematic parameters %s",
        ["{} ({})".format(s, u) for s, u in zip(sys_list, units_list)],
    )

    combine_regex = general_section.get(COMBINE_REGEX_OPTION, None)
    if combine_regex:
        try:
            combine_regex = literal_eval(combine_regex)
        except (SyntaxError, ValueError):
            logging.warn(
                'Deprecated syntax for "combine_re" (make into a Python-evaluatable'
                "sequence of strings instead) :: combine_regex = %s",
                combine_regex,
            )
            combine_regex = [r.strip() for r in combine_regex.split(",")]

    if APPLY_ALL_SECTION_NAME in no_ws_section_map:
        apply_all_section = fit_cfg[no_ws_section_map[APPLY_ALL_SECTION_NAME]]
        for no_ws_sname, sname in no_ws_section_map.items():
            if not (no_ws_sname.startswith(NOMINAL_SET_PFX)
                    or no_ws_sname.startswith(SYS_SET_PFX)):
                continue
            sys_set_section = fit_cfg[sname]
            for option, val in apply_all_section.items():
                sys_set_section[option] = val

    return fit_cfg, sys_list, units_list, combine_regex
Exemplo n.º 38
0
 def store_recursively(fhandle, node, path=None, node_hashes=None):
     if path is None:
         path = []
     if node_hashes is None:
         node_hashes = {}
     full_path = '/' + '/'.join(path)
     if isinstance(node, dict):
         logging.trace("  creating Group `%s`" % full_path)
         try:
             fhandle.create_group(full_path)
         except ValueError:
             pass
         for key in sorted(node.iterkeys()):
             key_str = str(key)
             if not isinstance(key, str):
                 logging.warn('Stringifying key `' + key_str +
                              '`for use as name in HDF5 file')
             val = node[key]
             new_path = path + [key_str]
             store_recursively(fhandle=fhandle,
                               node=val,
                               path=new_path,
                               node_hashes=node_hashes)
     else:
         # Check for existing node
         node_hash = utils.hash_obj(node)
         if node_hash in node_hashes:
             logging.trace("  creating hardlink for Dataset: `%s` -> `%s`" %
                           (full_path, node_hashes[node_hash]))
             # Hardlink the matching existing dataset
             fhandle[full_path] = fhandle[node_hashes[node_hash]]
             return
         # For now, convert None to np.nan since h5py appears to not handle None
         if node is None:
             node = np.nan
             logging.warn("  encountered `None` at node `%s`; converting to"
                          " np.nan" % full_path)
         # "Scalar datasets don't support chunk/filter options". Shuffling
         # is a good idea otherwise since subsequent compression will
         # generally benefit; shuffling requires chunking. Compression is
         # not done here since it is slow.
         if np.isscalar(node):
             shuffle = False
             chunks = None
         else:
             shuffle = True
             chunks = True
             # Store the node_hash for linking to later if this is more than
             # a scalar datatype. Assumed that "None" has
             node_hashes[node_hash] = full_path
         # TODO: Treat strings as follows? Would this break compatibility
         # with pytables/Pandas? What are benefits? Leaving out for now.
         # if isinstance(node, basestr):
         #     dtype = h5py.special_dtype(vlen=str)
         #     fh.create_dataset(k,data=v,dtype=dtype)
         logging.trace("  creating dataset at node `%s`" % full_path)
         try:
             fhandle.create_dataset(name=full_path,
                                    data=node,
                                    chunks=chunks,
                                    compression=None,
                                    shuffle=shuffle,
                                    fletcher32=False)
         except TypeError:
             try:
                 shuffle = False
                 chunks = None
                 fhandle.create_dataset(name=full_path,
                                        data=node,
                                        chunks=chunks,
                                        compression=None,
                                        shuffle=shuffle,
                                        fletcher32=False)
             except:
                 logging.error('  full_path: ' + full_path)
                 logging.error('  chunks   : ' + str(chunks))
                 logging.error('  shuffle  : ' + str(shuffle))
                 logging.error('  node     : ' + str(node))
                 raise
Exemplo n.º 39
0
def make_discrete_sys_distributions(fit_cfg, set_params=None):
    """Generate and store mapsets for different discrete systematics sets
    (with a single set characterised by a dedicated pipeline configuration)

    Parameters
    ----------
    fit_cfg : string
        Path to a fit config file

    Returns
    -------
    input_data : OrderedDict
        Container with the processed input data including MapSets
        resulting from each input pipelines

    """
    # check optional `set_params`
    if set_params is not None:
        if not isinstance(set_params, Mapping):
            raise TypeError("`set_params` must be dict-like")
        for param_name, param_value in set_params.items():
            if not isinstance(param_name, basestring):
                raise TypeError(
                    "`set_params` keys must be strings (parameter name)")
            if not isinstance(param_value, ureg.Quantity):
                raise TypeError("`set_params` values must be Quantities")

    parsed_fit_cfg, sys_list, units_list, combine_regex = parse_fit_config(
        fit_cfg)
    fit_cfg_txt_buf = StringIO()
    parsed_fit_cfg.write(fit_cfg_txt_buf)
    fit_cfg_txt = fit_cfg_txt_buf.getvalue()

    # prepare the data container
    input_data = OrderedDict()
    input_data["fit_cfg_path"] = fit_cfg
    input_data["fit_cfg_txt"] = fit_cfg_txt
    input_data["param_names"] = sys_list
    input_data["param_units"] = units_list
    input_data["datasets"] = []

    # -- Load systematics sets -- #

    found_nominal = False
    sys_sets_info = OrderedDict()

    for section in parsed_fit_cfg.sections():
        no_ws_section = section.strip()

        section_pfx = no_ws_section.split(":")[0].strip()
        is_nominal = section_pfx == NOMINAL_SET_PFX
        is_sys_set = is_nominal or section_pfx == SYS_SET_PFX

        if is_nominal:
            if found_nominal:
                raise ValueError(
                    "Found multiple nominal sets in fit cfg! There must be"
                    " exactly one.")
            found_nominal = True

        if is_sys_set:
            # Parse the list of systematics parameter values from the section name
            sys_param_point = tuple(
                float(x) for x in section.split(":")[1].split(","))

            if len(sys_param_point) != len(sys_list):
                raise ValueError(
                    "Section heading [{}] specifies {:d} systematic"
                    " parameter values, but there are {:d} systematics".format(
                        section, len(sys_param_point), len(sys_list)))

            parsed_pipeline_cfg, pipeline_cfg_path = load_and_modify_pipeline_cfg(
                fit_cfg=parsed_fit_cfg, section=section)

            pipeline_cfg_txt_buf = StringIO()
            parsed_pipeline_cfg.write(pipeline_cfg_txt_buf)
            pipeline_cfg_txt = pipeline_cfg_txt_buf.getvalue()

            sys_sets_info[sys_param_point] = dict(
                is_nominal=is_nominal,
                parsed_pipeline_cfgs=[parsed_pipeline_cfg],
                pipeline_cfg_paths=[pipeline_cfg_path],
                pipeline_cfg_txts=[pipeline_cfg_txt],
            )

        # In this loop, nothing to do for general & apply_to_all_sets sections
        elif no_ws_section in (GENERAL_SECTION_NAME, APPLY_ALL_SECTION_NAME):
            pass

        # Do not allow any other sections in the config
        else:
            raise ValueError("Invalid section in fit config file: [%s]" %
                             section)

    if not found_nominal:
        raise ValueError(
            "Could not find a nominal discrete systematics set in fit cfg."
            " There must be exactly one.")

    nsets = len(sys_sets_info)
    nsys = len(sys_list)
    if nsets <= nsys:
        logging.warn(
            "Fit will either fail or be unreliable since the number of"
            " systematics sets to be fit is small (%d <= %d).",
            nsets,
            nsys + 1,
        )

    for sys_param_point, info in sys_sets_info.items():
        point_str = " | ".join(
            ["%s=%.2f" % (p, v) for p, v in zip(sys_list, sys_param_point)])

        logging.info(
            "Generating maps for discrete systematics point: %s. Using"
            ' pipeline config(s) at "%s"',
            point_str,
            info["pipeline_cfg_paths"],
        )

        # make a dedicated distribution maker for each systematics set
        distribution_maker = DistributionMaker(info["parsed_pipeline_cfgs"])

        # update params if requested
        if set_params is not None:
            for pname, pval in set_params.items():
                if pname not in distribution_maker.params.names:
                    raise ValueError("Unknown param '%s' in `set_params`" %
                                     pname)
                if (pval.dimensionality !=
                        distribution_maker.params[pname].dimensionality):
                    raise ValueError(
                        'Incorrect units for param "%s" in `set_params`' %
                        pname)
                distribution_maker.params[pname].value = pval
                logging.info("Changed param '%s' to %s", pname, pval)

        distribution_maker_param_values = OrderedDict()
        for dmpname in sorted(distribution_maker.params.names):
            dmpval = distribution_maker.params[dmpname].value
            distribution_maker_param_values[dmpname] = dmpval

        # run the distribution maker to get the mapset
        # TODO This assumes only one pipeline, either make more general or enforce
        mapset = distribution_maker.get_outputs(return_sum=False)[0]

        if combine_regex:
            logging.info(
                "Combining maps according to regular expression(s) %s",
                combine_regex)
            mapset = mapset.combine_re(combine_regex)

        # Store the info
        dataset = OrderedDict()
        dataset["pipeline_cfg_paths"] = info["pipeline_cfg_paths"]
        dataset["pipeline_cfg_txts"] = info["pipeline_cfg_txts"]
        dataset[
            "distribution_maker_param_values"] = distribution_maker_param_values
        dataset["param_values"] = sys_param_point
        dataset["mapset"] = mapset
        dataset["nominal"] = info["is_nominal"]
        input_data["datasets"].append(dataset)

    return input_data
Exemplo n.º 40
0
args = parser.parse_args()
set_verbosity(args.verbose)

llhfiles = glob(os.path.join(args.data_dir, "llh_data*"))

if args.log_dir is not None:
    logfiles = glob(os.path.join(args.log_dir, "log*"))
    # These MUST have the same number initialized if we are using the logging
    # information. Otherwise, perhaps one of the directories are incorrect.
    # Sometimes there are fewere llh files, since they crash before writing out.
    assert len(llhfiles) <= len(logfiles), "Data and log directories don't match?"

# Output to save to hdf5 file:
output_data = {"minimizer_settings": {}, "template_settings": {}, "true_NMH": {}, "true_IMH": {}}

logging.warn("Processing {0:d} files".format(len(llhfiles)))

mod = len(llhfiles) // 20
start = time.time()
for i, filename in enumerate(llhfiles):

    if (mod > 0) and (i % mod == 0):
        logging.info("  >> {0:d} files done...".format(i))

    try:
        data = from_json(filename)
    except Exception as inst:
        # print(inst)
        print("Skipping file: ", filename)
        continue
Exemplo n.º 41
0
    def __init__(
        self,
        earth_model=None,
        detector_depth=None,
        prop_height=None,
        prop_height_range=None,
        YeI=None,
        YeO=None,
        YeM=None,
        rel_err=None,
        abs_err=None,
        prop_lowpass_cutoff=None,
        prop_lowpass_frac=None,
        eval_lowpass_cutoff=None,
        eval_lowpass_frac=None,
        apply_lowpass_above_hor=True,
        apply_height_avg_below_hor=True,
        suppress_interpolation_warning=False,
        node_mode=None,
        use_decoherence=False,
        num_decoherence_gamma=1,
        use_nsi=False,
        num_neutrinos=3,
        use_taus=False,
        exact_mode=False,
        vacuum=False,
        **std_kwargs,
    ):

        # Checks
        if use_nsi:
            raise NotImplementedError("NSI not implemented")
        if type(prop_height) is not ureg.Quantity:
            raise NotImplementedError(
                "Getting propagation heights from containers is "
                "not yet implemented, saw {} type".format(type(prop_height))
            )

        # Store args
        self.num_neutrinos = int(num_neutrinos)
        assert (
            self.num_neutrinos < 5
        ), "currently only supports up to 4 flavor oscillations"
        self.use_nsi = use_nsi
        self.use_decoherence = use_decoherence
        self.num_decoherence_gamma = num_decoherence_gamma
        self.node_mode = node_mode
        self.vacuum = vacuum
        self.use_taus = use_taus
        self.earth_model = earth_model
        self.YeI = YeI.m_as("dimensionless")
        self.YeO = YeO.m_as("dimensionless")
        self.YeM = YeM.m_as("dimensionless")
        self.detector_depth = detector_depth.m_as("km")
        self.prop_height = prop_height.m_as("km")
        self.avg_height = False
        self.concurrent_threads = PISA_NUM_THREADS if TARGET == "parallel" else 1
        self.prop_height_range = None
        self.apply_height_avg_below_hor = apply_height_avg_below_hor
        if prop_height_range is not None:  # this is optional
            self.prop_height_range = prop_height_range.m_as("km")
            self.avg_height = True

        self.layers = None

        self.rel_err = rel_err.m_as("dimensionless") if rel_err is not None else 1.0e-10
        self.abs_err = abs_err.m_as("dimensionless") if abs_err is not None else 1.0e-10
        self.prop_lowpass_cutoff = (
            prop_lowpass_cutoff.m_as("1/km") if prop_lowpass_cutoff is not None else 0.0
        )
        self.prop_lowpass_frac = (
            prop_lowpass_frac.m_as("dimensionless")
            if prop_lowpass_frac is not None
            else 0.0
        )
        self.eval_lowpass_cutoff = (
            eval_lowpass_cutoff.m_as("1/km") if eval_lowpass_cutoff is not None else 0.0
        )
        self.eval_lowpass_frac = (
            eval_lowpass_frac.m_as("dimensionless")
            if eval_lowpass_frac is not None
            else 0.0
        )

        if self.prop_lowpass_frac > 1.0 or self.eval_lowpass_frac > 1.0:
            raise ValueError("lowpass filter fraction cannot be greater than one")

        if self.prop_lowpass_frac < 0.0 or self.eval_lowpass_frac < 0.0:
            raise ValueError("lowpass filter fraction cannot be smaller than zero")

        self.apply_lowpass_above_hor = apply_lowpass_above_hor

        self.nus_layer = None
        self.nus_layerbar = None

        # Define the layers class
        self.nusquids_layers_class = nsq.nuSQUIDSLayers

        # Define standard params
        expected_params = [
            "theta12",
            "theta13",
            "theta23",
            "deltam21",
            "deltam31",
            "deltacp",
        ]

        # Add decoherence parameters
        if self.use_decoherence:
            # Use derived nuSQuIDS classes
            import nuSQUIDSDecohPy

            self.nusquids_layers_class = nuSQUIDSDecohPy.nuSQUIDSDecohLayers
            # Checks
            assert (
                self.num_neutrinos == 3
            ), "Decoherence only supports 3 neutrinos currently"
            # Add decoherence params
            expected_params.extend(["gamma0"])
            expected_params.extend(["n"])
            expected_params.extend(["E0"])

        # We may want to reparametrize this with the difference between deltacp14 and
        # deltacp24, as the absolute value seems to play a small role (see
        # https://arxiv.org/pdf/2010.06321.pdf)
        if self.num_neutrinos == 4:
            expected_params.extend(
                [
                    "theta14",
                    "theta24",
                    "theta34",
                    "deltam41",
                    "deltacp14",
                    "deltacp24",
                ]
            )

        # init base class
        super().__init__(
            expected_params=expected_params,
            **std_kwargs,
        )

        # This is special: We have an additional "binning" to account for. It is in
        # principle possible to work in event mode even for the nodes, which would mean
        # that the full oscillation problem is solved for all events individually.
        # Together with the constant oscillation mode, this can be used to calculate
        # probabilities in exact mode in a time that is reasonable at least for
        # generating pseudodata.

        assert not (self.use_nsi and self.use_decoherence), (
            "NSI and decoherence not " "suported together, must use one or the other"
        )

        self.exact_mode = exact_mode

        if exact_mode:
            # No interpolation is happening in exact mode so any passed node_mode
            # will be ignored. Probabilities are calculated at calc_specs.
            if self.node_mode is not None:
                logging.warn(
                    "nuSQuIDS is configured in exact mode, the passed "
                    f"`node_mode`\n({self.node_mode})\n will be ignored!"
                )
            if self.prop_lowpass_cutoff > 0 or self.eval_lowpass_cutoff > 0:
                logging.warn(
                    "nuSQuIDS is configured in exact mode, low-pass filters "
                    "will be ignored"
                )
        else:
            if isinstance(self.calc_mode, MultiDimBinning):
                assert isinstance(self.node_mode, MultiDimBinning), (
                    "cannot use " "event-wise nodes with binned calculation"
                )

        self.e_node_mode = None
        self.e_mesh = None
        self.coszen_node_mode = None
        self.cosz_mesh = None

        # We don't want to spam the user with repeated warnings about the same issue.
        self.interpolation_warning_issued = suppress_interpolation_warning
    def apply_function(self):
        '''
		Computes the main inputs to the generalized likelihood 
		function on every iteration of the minimizer

		'''
        N_bins = self.output_specs.tot_num_bins

        #
        # Step 4: Apply the empty bin strategy and mean adjustment
        #    Compute the alphas and betas that go into the
        #    poisson-gamma mixture of the llh
        #
        for container in self.data:

            self.data.data_specs = 'events'

            #
            # Step 3: Find the maximum weight accross all events
            #         of each MC set. The value of that weight defines
            #         the value of the pseudo-weight that will be included
            #         in empty bins

            # for this part we are in events mode
            # Find the mean weight of an entire MC set
            #
            # We only consider the first 90 percentiles of the weight
            # values, to avoid the high extreme weights that muongun
            # often gives
            #
            all_container_weights = container['weights'].get('host')

            if self.with_pseudo_weight:
                percentile90 = np.percentile(all_container_weights, 90)
                pseudo_weight = np.mean(all_container_weights[
                    all_container_weights <= percentile90])
                #pseudo_weight = np.amin(all_container_weights[all_container_weights>0])
                container.add_scalar_data(key='pseudo_weight',
                                          data=pseudo_weight)

            old_weight_sum = np.zeros(N_bins)
            new_weight_sum = np.zeros(N_bins)
            alphas_vector = np.zeros(N_bins)
            betas_vector = np.zeros(N_bins)

            #
            # Load the pseudo_weight and mean displacement values
            #
            if self.with_mean_adjust:
                mean_adjustment = container.scalar_data['mean_adjustment']

            for index in range(N_bins):

                index_mask = container['bin_{}_mask'.format(index)].get('host')
                if 'kfold_mask' in container:
                    index_mask *= container['kfold_mask'].get('host')
                current_weights = all_container_weights[index_mask]

                old_weight_sum[index] += np.sum(current_weights)

                assert np.all(current_weights >= 0), 'SOME WEIGHTS BELOW ZERO'
                n_weights = current_weights.shape[0]

                # If no weights and other datasets have some, include a pseudo weight
                # Bins with no mc event in all set will be ignore in the likelihood later
                #
                # make the whole bin treatment here
                if n_weights <= 0 and self.with_pseudo_weight:
                    current_weights = np.array([pseudo_weight])
                    n_weights = 1

                # write the new weight distribution down
                new_weight_sum[index] += np.sum(current_weights)

                # Mean of the current weight distribution
                mean_w = np.mean(current_weights)

                # variance of the current weight
                var_of_weights = (
                    (current_weights - mean_w)**2).sum() / (float(n_weights))

                #  Variance of the poisson-gamma distributed variable
                var_z = (var_of_weights + mean_w**2)

                if var_z < 0:
                    logging.warn('warning: var_z is less than zero')
                    logging.warn(container.name, var_z)
                    raise Exception

                # if the weights presents have a mean of zero,
                # default to alphas values of PSEUDO_WEIGHT and
                # of beta = 1.0, which mimicks a narrow PDF
                # close to 0.0
                beta = np.divide(mean_w,
                                 var_z,
                                 out=np.ones(1),
                                 where=var_z != 0)
                trad_alpha = np.divide(mean_w**2,
                                       var_z,
                                       out=np.ones(1) * np.NaN,
                                       where=var_z != 0)

                if self.with_mean_adjust:
                    alpha = (n_weights + mean_adjustment) * trad_alpha
                else:
                    alpha = n_weights * trad_alpha

                alphas_vector[index] = alpha
                betas_vector[index] = beta

            # Calculate alphas and betas
            self.data.data_specs = self.output_specs
            np.copyto(src=alphas_vector,
                      dst=container['llh_alphas'].get('host'))
            np.copyto(src=betas_vector, dst=container['llh_betas'].get('host'))

            #only change the weights if they were modified
            if self.with_pseudo_weight or self.with_mean_adjust:
                np.copyto(src=new_weight_sum,
                          dst=container['weights'].get('host'))
                container['weights'].mark_changed()

            np.copyto(src=old_weight_sum, dst=container['old_sum'].get('host'))
            container['llh_alphas'].mark_changed()
            container['llh_betas'].mark_changed()
            container['old_sum'].mark_changed()
Exemplo n.º 43
0
    def compute_function_interpolated(self):
        """
        Version of the compute function that does use interpolation between nodes.
        """
        nsq_units = nsq.Const()
        # We need to make two evolutions, one for numu and the other for nue.
        # These produce neutrino and antineutrino states at the same time thanks to
        # the "both" neutrino mode of nuSQuIDS.
        self.apply_prop_settings(self.nus_layer)
        self.set_osc_parameters(self.nus_layer)

        ini_state_nue = np.array([1, 0, 0] + [0] * (self.num_neutrinos - 3))
        ini_state_numu = np.array([0, 1, 0] + [0] * (self.num_neutrinos - 3))
        ini_state_nutau = np.array([0, 0, 1] + [0] * (self.num_neutrinos - 3))

        self.nus_layer.Set_initial_state(ini_state_nue, nsq.Basis.flavor)
        if not self.vacuum:
            self.nus_layer.EvolveState()
        evolved_states_nue = self.nus_layer.GetStates(0)
        evolved_states_nuebar = self.nus_layer.GetStates(1)

        self.nus_layer.Set_initial_state(ini_state_numu, nsq.Basis.flavor)
        if not self.vacuum:
            self.nus_layer.EvolveState()
        evolved_states_numu = self.nus_layer.GetStates(0)
        evolved_states_numubar = self.nus_layer.GetStates(1)

        if self.use_taus:
            self.nus_layer.Set_initial_state(ini_state_nutau, nsq.Basis.flavor)
            if not self.vacuum:
                self.nus_layer.EvolveState()
            evolved_states_nutau = self.nus_layer.GetStates(0)
            evolved_states_nutaubar = self.nus_layer.GetStates(1)

        # Now comes the step where we interpolate the interaction picture states
        # and project out oscillation probabilities. This can be done in either events
        # or binned mode.
        if isinstance(self.calc_mode, MultiDimBinning):
            self.data.link_containers(
                "nu", ["nue_cc", "numu_cc", "nutau_cc", "nue_nc", "numu_nc", "nutau_nc"]
            )
            self.data.link_containers(
                "nubar",
                [
                    "nuebar_cc",
                    "numubar_cc",
                    "nutaubar_cc",
                    "nuebar_nc",
                    "numubar_nc",
                    "nutaubar_nc",
                ],
            )
        for container in self.data:
            nubar = container["nubar"] < 0
            container["interp_states_e"] = self.calc_interpolated_states(
                evolved_states_nuebar if nubar else evolved_states_nue,
                container["true_energy"] * nsq_units.GeV,
                container["true_coszen"],
            )
            container["interp_states_mu"] = self.calc_interpolated_states(
                evolved_states_numubar if nubar else evolved_states_numu,
                container["true_energy"] * nsq_units.GeV,
                container["true_coszen"],
            )
            if self.use_taus:
                container["interp_states_tau"] = self.calc_interpolated_states(
                    evolved_states_nutaubar if nubar else evolved_states_nutau,
                    container["true_energy"] * nsq_units.GeV,
                    container["true_coszen"],
                )
        self.data.unlink_containers()

        if isinstance(self.calc_mode, MultiDimBinning):
            self.data.link_containers("nue", ["nue_cc", "nue_nc"])
            self.data.link_containers("numu", ["numu_cc", "numu_nc"])
            self.data.link_containers("nutau", ["nutau_cc", "nutau_nc"])
            self.data.link_containers("nuebar", ["nuebar_cc", "nuebar_nc"])
            self.data.link_containers("numubar", ["numubar_cc", "numubar_nc"])
            self.data.link_containers("nutaubar", ["nutaubar_cc", "nutaubar_nc"])

        for container in self.data:

            nubar = container["nubar"] < 0
            flav_out = container["flav"]
            input_flavs = ["e", "mu", "tau"] if self.use_taus else ["e", "mu"]

            for flav_in in input_flavs:
                container["prob_" + flav_in] = self.calc_probs_interp(
                    flav_out=flav_out,
                    nubar=nubar,
                    interp_states=container["interp_states_" + flav_in],
                    out_distances=container["tot_distances"] * nsq_units.km,
                    e_out=container["true_energy"] * nsq_units.GeV,
                    avg_ranges=container["avg_ranges"] * nsq_units.km,
                    lowpass_cutoff=container["lowpass_cutoff"] / nsq_units.km,
                )

                # It is possible to get slightly negative probabilities from imperfect
                # state interpolation between nodes.
                # It's impractical to avoid any probability dipping below zero in every
                # conceivable situation because that would require very dense node
                # spacing. We get around this by flooring the probability at zero.
                # However, dipping below zero by more than 1% may indicate that nodes
                # aren't spaced tightly enough to achieve an acceptable accuracy, so we
                # issue a warning.
                if (
                    np.any(container["prob_" + flav_in] < -0.01)
                    and not self.interpolation_warning_issued
                ):
                    mask = container["prob_" + flav_in] < -0.01
                    en_med = np.median(container["true_energy"][mask])
                    cz_med = np.median(container["true_coszen"][mask])
                    logging.warn(
                        f"Some probabilities in nu_{flav_in} -> {container.name} dip "
                        "below zero by more than 1%! This may indicate too few nodes "
                        f"in the problematic region. Median energy: {en_med}, median "
                        f"coszen: {cz_med}. This warning is only issued once."
                    )
                    self.interpolation_warning_issued = True
                container["prob_" + flav_in][container["prob_" + flav_in] < 0] = 0.0
            container.mark_changed("prob_e")
            container.mark_changed("prob_mu")
            if self.use_taus:
                container.mark_changed("prob_tau")
        self.data.unlink_containers()
Exemplo n.º 44
0
                    help='set verbosity level')

args = parser.parse_args()
set_verbosity(args.verbose)

print "FILE NORMALIZATION: "
print "  >> nue: ",args.nfiles_nue
print "  >> numu: ",args.nfiles_numu
print "  >> nutau: ",args.nfiles_nutau

ebins = np.linspace(args.emin,args.emax,args.nebins) if args.elin else np.logspace(np.log10(args.emin), np.log10(args.emax), args.nebins)

# Cut definitions:
s1_s2_cuts = []
if args.v4cuts:
    logging.warn("Using cuts V4!")
    s1_s2_cuts = [("Cuts_V4_Step1",'value',True),("Cuts_V4_Step2",'value',True)]
elif args.v3cuts:
    logging.warn("Using cuts V3!")
    s1_s2_cuts = [('NewestBgRejCutsStep1','value',True), ('NewestBgRejCutsStep2','value',True)]
elif args.v5truth:
    logging.warn("USING V5 TRUTH information")
    s1_s2_cuts = [('Cuts_V5_Step2_upgoing_Truth','value',True)]
elif args.nocuts:
    logging.warn("Using no selection cuts!")
    s1_s2_cuts = []
else:
    logging.warn("Using cuts V5!")
    s1_s2_cuts= [("Cuts_V5_Step1",'value',True),("Cuts_V5_Step2",'value',True)]

Exemplo n.º 45
0
Arquivo: kde.py Projeto: mamday/pisa
def vbw_kde(data, N=None, MIN=None, MAX=None, evaluate_dens=True,
            evaluate_at=None, overfit_factor=1.0):
    '''
    Parameters
    ----------
    data            The data points for which the density estimate is sought

    N               Number of points with which to form regular mesh, from MIN
                      to MAX; this gets DCT'd, so N should be a power of two.
                      -> Default: 2**14 (16384)

    MIN             Minimum of range over which to compute density.
                      -> Default: min(data) - range(data)/10

    MAX             Maximum of range over which to compute density>
                      -> Default: max(data) + range(data)/10

    evaluate_dens   Whether to evaluate the density either at the mesh points
                      defined by N, MIN, and MAX, or at the points specified by
                      the argument evaluate_at. If False, only the gaussians'
                      bandwidths and the mesh locations (no density) are
                      returned. Evaluating the density is a large fraction of
                      total execution time, so setting this to False saves time
                      if only the bandwidths are desired.
                      -> Default: True

    evaluate_at     Points at which to evaluate the density. If None is
                      specified, evaluates at points on the mesh defined by
                      MIN, MAX, and N.
                      -> Default: None

    overfit_factor  EXPERIMENTAL: For the first part of the algorithm, the
                      improved-Sheather-Jones fixed-bandwidth (ISJ-FBW) bit,
                      the density can be overfit by specifying overfit_factor >
                      1.0 and underfit using a value < 1.0.
                      -> Default: 1.0

    Returns
    -------
    kernel_bandwidths The gaussian bandwidths, one for each data point

    evaluate_at       Locations at which the density is evaluated

    vbw_dens_est      Density estimates at the mesh points, or None if
                        evaluate_dens is False

    Notes
    -----
    Specifying the range:

        The specification of MIN and MAX are critical for obtaining a
    reasonable density estimate. If the true underlying density slowly decays
    to zero on one side or the other, like a gaussian, specifying too-small a
    range will distort the edge the VBW-KDE finds. On the other hand, an abrupt
    cut-off in the distribution should be accompanied by a similar cutoff in
    the computational range (MIN and/or MAX). The algorithm here will
    approximate such a sharp cut-off with roughly the same performance to the
    reflection method for standard KDE's (as the fixed-BW portion uses a DCT of
    the data), but note that this will not perform as well as polynomial-edges
    or other modifications that have been proposed in the literature.

    Specifying overfit_factor; other tweaks:

        I've seen no improvement by changing this parameter, but it remains for
    experimental purposes. Other avenues to explore include changing the
    "normalization" of the variable-bandwidth bit that I use which forces it to
    have a bandwidth at the peak matching that found by the ISJ-FBW part
    '''
    # Parameters to set up the mesh on which to calculate
    if N is None:
        N = 2**14 #if N is None else int(2**np.ceil(np.log2(N)))
    if MIN is None or MAX is None:
        minimum = min(data)
        maximum = max(data)
        Range = maximum - minimum
        if Range == 0:
            logging.warn('Range of data is 0; there are ' + str(len(data)) +
                          ' data points.')
        MIN = minimum - Range/10 if MIN is None else MIN
        MAX = maximum + Range/10 if MAX is None else MAX

    # Range for computation
    R = MAX-MIN

    # Histogram the data to get a crude first approximation of the density
    M = len(data)
    DataHist, bins = np.histogram(data, bins=N, range=(MIN, MAX))
    DataHist = DataHist/M

    DCTData = fftpack.dct(DataHist, norm=None)

    M = M
    I = np.arange(1, N, dtype=np.float64)**2
    SqDCTData = np.float64((DCTData[1:]/2.0)**2)

    # The fixed point calculation finds the bandwidth = t_star
    failure = True
    for guess in np.logspace(-1, 2, 20):
        try:
            t_star = optimize.brentq(fixed_point,
                                     0, guess,
                                     args=(np.float64(M), I, SqDCTData))
            failure = False
            break
        except ValueError:
            failure = True

    if failure:
        raise ValueError('Initial root-finding failed.')

    # Smooth the DCTransformed data using t_star divided by an overfitting
    # param that allows sub-optimal but allows for "sharper" features
    SmDCTData = DCTData*np.exp(-np.arange(N)**2*pisq*t_star/(2*overfit_factor))

    # Inverse DCT to get density
    fbw_dens_on_mesh = fftpack.idct(SmDCTData, norm=None)*N/R

    # Start by defining the mesh as the bins' centers
    mesh = (bins[0:-1]+bins[1:])/2.
    # But add the lower and upper edges in case data points live there
    fbw_dens_on_mesh = fbw_dens_on_mesh/np.trapz(fbw_dens_on_mesh, mesh)
    isj_bandwidth = np.sqrt(t_star)*R

    # Create linear interpolator for this new density then find density est. at
    # the original data points' locations; call this fbw_dens_at_datapoints
    interp = interpolate.interp1d(x             = mesh,
                                  y             = fbw_dens_on_mesh,
                                  kind          = 'linear',
                                  copy          = False,
                                  bounds_error  = True,
                                  fill_value    = np.nan)
    fbw_dens_at_datapoints = interp(data)

    # Note below diverges from the published Ambramson method, by forcing the
    # bandwidth at the max of the density distribution to be exactly the
    # bandwidth found above with the improved Sheather-Jones BW selection
    # technique. Refs:
    #   I.S. Abramson, On bandwidth variation in kernel estimates - A square
    #       root law, Annals of Stat. Vol. 10, No. 4, 1217-1223 1982
    #   P. Hall, T. C. Hu, J. S. Marron, Improved Variable Window Kernel
    #       Estimates of Probability Densities, Annals of Statistics Vol. 23,
    #       No. 1, 1-10, 1995
    root_pknorm_fbw_dens_est = np.sqrt(fbw_dens_at_datapoints /
                                       np.max(fbw_dens_at_datapoints))
    kernel_bandwidths = isj_bandwidth/root_pknorm_fbw_dens_est

    if evaluate_at is None:
        evaluate_at = mesh

    if not evaluate_dens:
        return kernel_bandwidths, evaluate_at, None
    vbw_dens_est = np.zeros_like(evaluate_at, dtype=np.double)
    gaussians(outbuf  = vbw_dens_est,
              x       = evaluate_at.astype(np.double),
              mu      = data.astype(np.double),
              sigma   = kernel_bandwidths.astype(np.double),
              threads = int(openmp_num_threads))

    # Normalize distribution to have area of 1
    vbw_dens_est = vbw_dens_est/np.trapz(y=vbw_dens_est, x=evaluate_at)

    return kernel_bandwidths, evaluate_at, vbw_dens_est
Exemplo n.º 46
0
parser.add_argument('-o','--outfile',type=str,default='llh_data.json',metavar='JSONFILE',
                    help="Output filename.")
parser.add_argument('-v', '--verbose', action='count', default=None,
                    help='set verbosity level')
args = parser.parse_args()

set_verbosity(args.verbose)

#Read in the settings
template_settings = from_json(args.template_settings)
minimizer_settings  = from_json(args.minimizer_settings)

#Workaround for old scipy versions
import scipy
if scipy.__version__ < '0.12.0':
    logging.warn('Detected scipy version %s < 0.12.0'%scipy.__version__)
    if 'maxiter' in minimizer_settings:
      logging.warn('Optimizer settings for \"maxiter\" will be ignored')
      minimizer_settings.pop('maxiter')

#Get the parameters
params = template_settings['params']

#store results from all the trials
trials = []

template_maker = TemplateMaker(get_values(params),**template_settings['binning'])

for itrial in xrange(1,args.ntrials+1):
    profile.info("start trial %d"%itrial)
    logging.info(">"*10 + "Running trial: %05d"%itrial + "<"*10)
Exemplo n.º 47
0
    'nue': {'filename': args.nue,'nfiles': args.nfiles_nue},
    'numu': {'filename': args.numu,'nfiles': args.nfiles_numu},
    'nutau': {'filename': args.nutau,'nfiles': args.nfiles_nutau}}

logging.info("input files:\n%s"%data_files)

# Ensure overwrite of existing filename...
outfilename = args.outfile
fh = h5py.File(outfilename,'w')
fh.close()
logging.info("Writing to file: %s",outfilename)

# Define V3, V4, or V5 cuts:
cut_list = []
if args.cutsV3:
    logging.warn("Using cuts V3...")
    cut_list.append(('NewestBgRejCutsStep1','value',True))
    cut_list.append(('NewestBgRejCutsStep2','value',True))
elif args.cutsV4:
    logging.warn("Using cuts V4...")
    cut_list.append(('Cuts_V4_Step1','value',True))
    cut_list.append(('Cuts_V4_Step2','value',True))
elif args.cutsV5:
    logging.warn("Using cuts V5...")
    cut_list.append(('Cuts_V5_Step1','value',True))
    cut_list.append(('Cuts_V5_Step2','value',True))


nuDict = {}
if args.old_pid:
    nuDict = {'nue':66,'numu':68,'nutau':133,'nue_bar':67,'numu_bar':69,'nutau_bar':134}
Exemplo n.º 48
0
def find_max_llh_bfgs(fmap, template_maker, params, bfgs_settings, save_steps=False,
                      normal_hierarchy=None, check_octant=False):
    """
    Finds the template (and free systematic params) that maximize
    likelihood that the data came from the chosen template of true
    params, using the limited memory BFGS algorithm subject to bounds
    (l_bfgs_b).

    returns a dictionary of llh data and best fit params, in the format:
      {'llh': [...],
       'param1': [...],
       'param2': [...],
       ...}
    where 'param1', 'param2', ... are the free params varied by
    optimizer, and they hold a list of all the values tested in
    optimizer algorithm, unless save_steps is False, in which case
    they are one element in length-the best fit params and best fit llh.
    """

    # Get params dict which will be optimized (free_params) and which
    # won't be (fixed_params) but are still needed for get_template()
    fixed_params = get_fixed_params(select_hierarchy(params,normal_hierarchy))
    free_params = get_free_params(select_hierarchy(params,normal_hierarchy))

    if len(free_params) == 0:
        logging.warn("NO FREE PARAMS, returning LLH")
        true_template = template_maker.get_template(get_values(fixed_params))
        channel = params['channel']['value']
        true_fmap = flatten_map(true_template,chan=channel)
        return {'llh': [-get_binwise_llh(fmap,true_fmap)]}

    init_vals = get_param_values(free_params)
    scales = get_param_scales(free_params)
    bounds = get_param_bounds(free_params)
    priors = get_param_priors(free_params)
    names  = sorted(free_params.keys())

    # Scale init-vals and bounds to work with bfgs opt:
    init_vals = np.array(init_vals)*np.array(scales)
    bounds = [bounds[i]*scales[i] for i in range(len(bounds))]

    opt_steps_dict = {key:[] for key in names}
    opt_steps_dict['llh'] = []

    const_args = (names,scales,fmap,fixed_params,template_maker,opt_steps_dict,priors)

    display_optimizer_settings(free_params, names, init_vals, bounds, priors,
                               bfgs_settings)

    best_fit_vals,llh,dict_flags = opt.fmin_l_bfgs_b(
        llh_bfgs, init_vals, args=const_args, approx_grad=True, iprint=0,
        bounds=bounds, **get_values(bfgs_settings))

    # If needed, run optimizer again, checking for second octant solution:
    if check_octant and ('theta23' in free_params.keys()):
        physics.info("Checking alternative octant solution")
        old_th23_val = free_params['theta23']['value']
        delta = np.pi - old_th23_val
        free_params['theta23']['value'] = np.pi + delta
        init_vals = get_param_values(free_params)

        const_args = (names,scales,fmap,fixed_params,template_maker,opt_steps_dict,priors)
        display_optimizer_settings(free_params, names, init_vals, bounds, priors,
                                   bfgs_settings)
        alt_fit_vals,alt_llh,alt_dict_flags = opt.fmin_l_bfgs_b(
            llh_bfgs, init_vals, args=const_args, approx_grad=True, iprint=0,
            bounds=bounds, **get_values(bfgs_settings))

        # Alternative octant solution is optimal:
        if alt_llh < llh:
            best_fit_vals = alt_fit_vals
            llh = alt_llh
            dict_flags = alt_dict_flags


    best_fit_params = { name: value for name, value in zip(names, best_fit_vals) }

    #Report best fit
    physics.info('Found best LLH = %.2f in %d calls at:'
        %(llh,dict_flags['funcalls']))
    for name, val in best_fit_params.items():
        physics.info('  %20s = %6.4f'%(name,val))

    #Report any warnings if there are
    lvl = logging.WARN if (dict_flags['warnflag'] != 0) else logging.DEBUG
    for name, val in dict_flags.items():
        physics.log(lvl," %s : %s"%(name,val))

    if not save_steps:
        # Do not store the extra history of opt steps:
        for key in opt_steps_dict.keys():
            opt_steps_dict[key] = [opt_steps_dict[key][-1]]

    return opt_steps_dict
Exemplo n.º 49
0
def find_max_llh_bfgs(fmap,template_maker,params,bfgs_settings,save_steps=False,
                      normal_hierarchy=None):
    '''
    Finds the template (and free systematic params) that maximize
    likelihood that the data came from the chosen template of true
    params, using the limited memory BFGS algorithm subject to bounds
    (l_bfgs_b).

    returns a dictionary of llh data and best fit params, in the format:
      {'llh': [...],
       'param1': [...],
       'param2': [...],
       ...}
    where 'param1', 'param2', ... are the free params varied by
    optimizer, and they hold a list of all the values tested in
    optimizer algorithm, unless save_steps is False, in which case
    they are one element in length-the best fit params and best fit llh.
    '''

    # Get params dict which will be optimized (free_params) and which
    # won't be (fixed_params) but are still needed for get_template()
    fixed_params = get_fixed_params(select_hierarchy(params,normal_hierarchy))
    free_params = get_free_params(select_hierarchy(params,normal_hierarchy))

    if len(free_params) == 0:
        logging.warn("NO FREE PARAMS, returning LLH")
        true_template = template_maker.get_template(get_values(fixed_params))
        channel = params['channel']['value']
        true_fmap = flatten_map(true_template,chan=channel)
        return {'llh': [-get_binwise_llh(fmap,true_fmap)]}

    init_vals = get_param_values(free_params)
    scales = get_param_scales(free_params)
    bounds = get_param_bounds(free_params)
    priors = get_param_priors(free_params)
    names  = sorted(free_params.keys())

    # Scale init-vals and bounds to work with bfgs opt:
    init_vals = np.array(init_vals)*np.array(scales)
    bounds = [bounds[i]*scales[i] for i in range(len(bounds))]

    opt_steps_dict = {key:[] for key in names}
    opt_steps_dict['llh'] = []

    const_args = (names,scales,fmap,fixed_params,template_maker,opt_steps_dict,priors)

    physics.info('%d parameters to be optimized'%len(free_params))
    for name,init,(down,up),(prior, best) in zip(names, init_vals, bounds, priors):
        physics.info(('%20s : init = %6.4f, bounds = [%6.4f,%6.4f], '
                     'best = %6.4f, prior = '+
                     ('%6.4f' if prior else "%s"))%
                     (name, init, up, down, best, prior))

    physics.debug("Optimizer settings:")
    for key,item in bfgs_settings.items():
        physics.debug("  %s -> `%s` = %.2e"%(item['desc'],key,item['value']))

    best_fit_vals,llh,dict_flags = opt.fmin_l_bfgs_b(llh_bfgs,
                                                     init_vals,
                                                     args=const_args,
                                                     approx_grad=True,
                                                     iprint=0,
                                                     bounds=bounds,
                                                     **get_values(bfgs_settings))

    best_fit_params = { name: value for name, value in zip(names, best_fit_vals) }

    #Report best fit
    physics.info('Found best LLH = %.2f in %d calls at:'
        %(llh,dict_flags['funcalls']))
    for name, val in best_fit_params.items():
        physics.info('  %20s = %6.4f'%(name,val))

    #Report any warnings if there are
    lvl = logging.WARN if (dict_flags['warnflag'] != 0) else logging.DEBUG
    for name, val in dict_flags.items():
        physics.log(lvl," %s : %s"%(name,val))

    if not save_steps:
        # Do not store the extra history of opt steps:
        for key in opt_steps_dict.keys():
            opt_steps_dict[key] = [opt_steps_dict[key][-1]]

    return opt_steps_dict
Exemplo n.º 50
0
def load_and_modify_pipeline_cfg(fit_cfg, section):
    """Load and modify the pipeline config file as specified in that section of the fit
    config.

    Parameters
    ----------
    fit_cfg : pisa.utils.config_parser.PISAConfigParser
        any subclass of :class:`configparser.RawConfigParser` should work as well

    section : str
        name of the section to extract from the `fit_cfg`

    Returns
    -------
    pipeline_cfg : pisa.utils.config_parser.PISAConfigParser
        pipeline config

    pipeline_cfg_path : str
        path to the pipeline config as it is specified in the fit config

    """
    pipeline_cfg_path = fit_cfg.get(section, SYS_SET_OPTION)
    other_options = fit_cfg.options(section)
    other_options.remove(SYS_SET_OPTION)

    pipeline_cfg = from_file(pipeline_cfg_path)

    # Get a no-whitespace version of the section names
    section_map = {s.strip(): s for s in pipeline_cfg.sections()}

    for option in other_options:
        set_match = SET_OPTION_RE.match(option)
        remove_match = REMOVE_OPTION_RE.match(
            option) if not set_match else None
        if set_match:
            section_spec, set_option = set_match.groups()
            no_ws_section_spec = section_spec.strip()
            set_option = set_option.strip()
            if no_ws_section_spec not in section_map:
                logging.debug(
                    'Adding section [%s] to in-memory copy of pipeline config "%s"',
                    section_spec,
                    pipeline_cfg_path,
                )
                pipeline_cfg.add_section(section_spec)
                section_map[no_ws_section_spec] = section_spec
            if set_option:
                set_value = fit_cfg.get(section, option).strip()
                logging.debug(
                    'Setting section [%s] option "%s = %s" in in-memory'
                    ' copy of pipeline config "%s"',
                    section_spec,
                    set_option,
                    set_value,
                    pipeline_cfg_path,
                )
                pipeline_cfg.set(section_map[no_ws_section_spec], set_option,
                                 set_value)
        elif remove_match:
            section_spec, remove_option = remove_match.groups()
            no_ws_section_spec = section_spec.strip()
            remove_option = remove_option.strip()
            if no_ws_section_spec in section_map:
                if remove_option:
                    logging.debug(
                        'Removing section [%s] option "%s" from in-memory copy of'
                        ' pipeline config "%s"',
                        section_spec,
                        remove_option,
                        pipeline_cfg_path,
                    )
                    pipeline_cfg.remove_option(section_map[no_ws_section_spec],
                                               remove_option)
                else:
                    logging.debug(
                        "Removing section [%s] from in-memory copy of pipeline config"
                        ' "%s"',
                        section_spec,
                        pipeline_cfg_path,
                    )
                    pipeline_cfg.remove_section(
                        section_map[no_ws_section_spec])
            else:
                logging.warn(
                    "Told to remove section [%s] but section does not exist in"
                    ' pipline config "%s"',
                    section_spec,
                    pipeline_cfg_path,
                )
        else:
            raise ValueError(
                "Unhandled option in fit config: {}".format(option))

    return pipeline_cfg, pipeline_cfg_path
Exemplo n.º 51
0
Arquivo: hdf.py Projeto: gkrueckl/pisa
 def store_recursively(fhandle, node, path=None, node_hashes=None):
     if path is None:
         path = []
     if node_hashes is None:
         node_hashes = {}
     full_path = '/' + '/'.join(path)
     if isinstance(node, dict):
         logging.trace("  creating Group `%s`" % full_path)
         try:
             fhandle.create_group(full_path)
         except ValueError:
             pass
         for key in sorted(node.iterkeys()):
             key_str = str(key)
             if not isinstance(key, str):
                 logging.warn('Stringifying key `' + key_str +
                              '`for use as name in HDF5 file')
             val = node[key]
             new_path = path + [key_str]
             store_recursively(fhandle=fhandle, node=val, path=new_path,
                               node_hashes=node_hashes)
     else:
         # Check for existing node
         node_hash = utils.hash_obj(node)
         if node_hash in node_hashes:
             logging.trace("  creating hardlink for Dataset: `%s` -> `%s`" %
                           (full_path, node_hashes[node_hash]))
             # Hardlink the matching existing dataset
             fhandle[full_path] = fhandle[node_hashes[node_hash]]
             return
         # For now, convert None to np.nan since h5py appears to not handle None
         if node is None:
             node = np.nan
             logging.warn("  encountered `None` at node `%s`; converting to"
                          " np.nan" % full_path)
         # "Scalar datasets don't support chunk/filter options". Shuffling
         # is a good idea otherwise since subsequent compression will
         # generally benefit; shuffling requires chunking. Compression is
         # not done here since it is slow.
         if np.isscalar(node):
             shuffle = False
             chunks = None
         else:
             shuffle = True
             chunks = True
             # Store the node_hash for linking to later if this is more than
             # a scalar datatype. Assumed that "None" has 
             node_hashes[node_hash] = full_path
         # TODO: Treat strings as follows? Would this break compatibility
         # with pytables/Pandas? What are benefits? Leaving out for now.
         # if isinstance(node, basestr):
         #     dtype = h5py.special_dtype(vlen=str)
         #     fh.create_dataset(k,data=v,dtype=dtype)
         logging.trace("  creating dataset at node `%s`" % full_path)
         try:
             fhandle.create_dataset(name=full_path, data=node,
                                    chunks=chunks, compression=None,
                                    shuffle=shuffle, fletcher32=False)
         except TypeError:
             try:
                 shuffle = False
                 chunks = None
                 fhandle.create_dataset(name=full_path, data=node,
                                        chunks=chunks, compression=None,
                                        shuffle=shuffle, fletcher32=False)
             except:
                 logging.error('  full_path: ' + full_path)
                 logging.error('  chunks   : ' + str(chunks))
                 logging.error('  shuffle  : ' + str(shuffle))
                 logging.error('  node     : ' + str(node))
                 raise
Exemplo n.º 52
0
def fit_discrete_sys_distributions(input_data, p0=None, fit_method=None):
    """Fits a hyperplane to MapSets generated at given systematics parameters
    values.

    Parameters
    ----------
    input_data : OrderedDict
        The data container returned by `make_discrete_sys_distributions`
        and modified by `norm_sys_distributions`.
    p0 : list or dict
        Initial guess list (same initial guess for all maps) or dictionary
        (keys have to correspond to event groups/channels in maps)
        with one offset and len(sys_list) slopes. Default is list of ones.
    fit_method : None or string
        `method` arg to pass to `curve_fit` (see curve_fit docs).
        If None, will default to `trf` (this method supports covariance matrix
        calculation in the dimensionality we're dealing with).

    Returns
    -------
    fit_results : OrderedDict
        Container of the hyerplane fit results + supporting data

    """
    #
    # Prepare a few things before fitting
    #

    # Set a default fit method for curve_fit
    if fit_method is None:
        fit_method = "trf"  # lm, trf, dogbox
    # TODO Store in output data

    # prepare an output data container
    fit_results = OrderedDict()
    fit_results["hyperplanes"] = OrderedDict()

    # store info from the input data in the fit results
    fit_results["datasets"] = input_data["datasets"]
    fit_results["param_names"] = input_data["param_names"]
    fit_results["fit_cfg_path"] = input_data["fit_cfg_path"]
    fit_results["fit_cfg_txt"] = input_data["fit_cfg_txt"]

    # get number of systematic parameters and datasets
    n_sys_params = len(fit_results["param_names"])
    n_datasets = len(fit_results["datasets"])

    # get number of params in hyperplane fit
    # this is one slope per systematic, plus a single intercept
    n_fit_params = 1 + len(fit_results["param_names"])

    # get binning info
    binning = fit_results["datasets"][0]["mapset"][0].binning
    binning_shape = list(binning.shape)

    # normalise the systematics variations to the nominal distribution
    # with error propagation
    norm_sys_distributions(input_data)

    # re-organise normalised maps to be stored per event type (a list for each dataset)
    norm_sys_maps = OrderedDict()
    for map_name in input_data["datasets"][0]["norm_mapset"].names:
        norm_sys_maps[map_name] = [
            dataset_dict["norm_mapset"][map_name]
            for dataset_dict in input_data["datasets"]
        ]

    # get an array of the systematic parameter points sampled across all datasets
    # transpose to get format compatible with scipy.optimize.curve_fit
    sys_param_points = np.asarray([
        dataset_dict["param_values"]
        for dataset_dict in fit_results["datasets"]
    ])  # [datasets, params]
    sys_param_points_T = sys_param_points.T
    assert sys_param_points_T.shape[0] == n_sys_params
    assert sys_param_points_T.shape[1] == n_datasets

    # store some of this stuff
    fit_results["sys_param_points"] = sys_param_points
    fit_results["binning"] = binning
    fit_results["binning_hash"] = binning.hash

    #
    # Prepare initial parameter guesses
    #

    if p0:
        if isinstance(p0, Mapping):
            p0_keys = sorted(p0.keys())
            map_keys = sorted(norm_sys_maps.keys())
            if not p0_keys == map_keys:
                raise KeyError(
                    "Initial guess mapping contains keys %s which are not the"
                    " same as %s in maps." % (p0_keys, map_keys))
            for ini_guess in p0.values():
                assert len(ini_guess) == n_fit_params
        elif isinstance(p0, Sequence):
            assert len(p0) == n_fit_params
            p0 = {map_name: p0 for map_name in norm_sys_maps.keys()}
        else:
            raise TypeError(
                "Initial guess must be a mapping or a sequence. Found %s." %
                type(p0))
    else:
        p0 = {
            map_name: np.ones(n_fit_params)
            for map_name in norm_sys_maps.keys()
        }

    fit_results["p0"] = p0

    #
    # Loop over event types
    #

    for map_name, chan_norm_sys_maps in norm_sys_maps.items():
        logging.info('Fitting "%s" maps with initial guess %s.', map_name,
                     p0[map_name])

        # create a container for fit results for this event type
        fit_results["hyperplanes"][map_name] = OrderedDict()

        # initialise data arrays with NaNs
        fit_results["hyperplanes"][map_name]["fit_params"] = np.full(
            shape=binning_shape +
            [n_fit_params],  # [bins..., hyperplane params]
            fill_value=np.nan,
        )
        fit_results["hyperplanes"][map_name]["chi2s"] = np.full(
            shape=binning_shape + [n_datasets],
            fill_value=np.nan  # [bins..., datasets]
        )
        fit_results["hyperplanes"][map_name]["cov_matrices"] = np.full(
            shape=binning_shape + [
                n_fit_params,
                n_fit_params,
            ],  # [bins..., hyperplane params, hyperplane params]
            fill_value=np.nan,
        )
        fit_results["hyperplanes"][map_name]["finite_mask"] = np.full(
            shape=binning_shape + [n_datasets],
            fill_value=np.nan  # [bins..., datasets]
        )

        #
        # loop over bins
        #

        for idx in np.ndindex(*binning_shape):
            # get the bin content, including uncertainty and mask indicating if
            # the bin is finite treat the bin content as y values in the fit,
            # e.g. y(x0,...,xN) where N is the number of parameters each of
            # these 1D arrays has one element per input dataset
            y = np.asarray([m.hist[idx] for m in chan_norm_sys_maps])
            y_values = unp.nominal_values(y)
            y_sigma = unp.std_devs(y)
            finite_mask = np.isfinite(y_values) & np.isfinite(y_sigma)

            # empty bins have sigma=0 which causes the hyperplane fit to fail (silently)
            # replace with sigma=inf (e.g. we know nothing in this bin)
            empty_bin_mask = np.isclose(y_values, 0.0)
            if np.any(empty_bin_mask):
                empty_bin_zero_sigma_mask = empty_bin_mask & np.isclose(
                    y_sigma, 0.0)
                if np.any(empty_bin_zero_sigma_mask):
                    y_sigma[empty_bin_zero_sigma_mask] = np.inf

            # check no zero sigma values remaining
            if np.any(np.isclose(y_sigma, 0.0)):
                raise ValueError(
                    "Found histogram sigma values that are 0., which is unphysical"
                )

            #
            # Perform hyperplane fit in this bin
            #

            # case 1: uncertainties are available in the bins (ideal case)
            if np.any(y_sigma[finite_mask]):

                # fit
                popt, pcov = curve_fit(
                    hyperplane_fun,
                    sys_param_points_T[:, finite_mask],
                    y_values[finite_mask],
                    sigma=y_sigma[finite_mask],
                    p0=p0[map_name],
                    absolute_sigma=True,  # TODO Should we use this?
                    method=fit_method,
                )

                # Calculate chi-square values comparing the input data and the
                # fit results at each data point (e.g. per dataset, and of
                # course in each bin)
                for point_idx in range(n_datasets):  # Loop over datasets
                    # Get param values for this dataset
                    point = sys_param_points[point_idx, :]
                    # Predict counts in this bin accoridng to hyperplane for
                    # this dataset
                    predicted = hyperplane_fun(point, *popt)
                    observed = y_values[point_idx]
                    sigma = y_sigma[point_idx]
                    # TODO Is chi2 computation correct?
                    chi2 = ((predicted - observed) / sigma)**2
                    chi2_idx = tuple(list(idx) + [point_idx])
                    fit_results["hyperplanes"][map_name]["chi2s"][
                        chi2_idx] = chi2

            else:
                # if here, no uncertainties are available for this bin
                # note that cannot calculate chi2 without uncertainties

                # case 2: there are at least central values in the bins
                if np.any(y_values[finite_mask]):

                    # without error estimates each point has the same weight
                    # and we cannot get chi-square values (but can still fit)
                    logging.warn(
                        "No uncertainties for any of the normalised counts in bin"
                        ' %s ("%s") found. Fit is performed unweighted and no'
                        " chisquare values will be available.",
                        idx,
                        map_name,
                    )

                    # fit
                    popt, pcov = curve_fit(
                        hyperplane_fun,
                        sys_param_points_T[:, finite_mask],
                        y_values,
                        p0=p0[map_name],
                        methods=fit_method,
                    )

                # case 3: no data in this bin
                # this is the worst case, where there are no central values or
                # errors. Most likely this came about because this bin is
                # empty, which is not necessarily an error.
                else:

                    # Store NaN for fit params and chi2
                    popt = np.full_like(p0[map_name], np.NaN)
                    pcov = np.NaN  # TODO Shape?

            # store the results for this bin
            # note that chi2 is already stored above
            fit_results["hyperplanes"][map_name]["fit_params"][idx] = popt
            fit_results["hyperplanes"][map_name]["cov_matrices"][idx] = pcov
            fit_results["hyperplanes"][map_name]["finite_mask"][
                idx] = finite_mask

    return fit_results
Exemplo n.º 53
0
 )
 parser.add_argument(
     'infiles',
     nargs='*',
     help="input llh files to combine into one output hdf5 file."
 )
 parser.add_argument(
     '-v', '--verbose',
     action='count',
     default=None,
     help="set verbosity level"
 )
 args = parser.parse_args()
 set_verbosity(args.verbose)
 
 logging.warn("processing " + str(len(args.infiles)) + " files...")
 logging.warn("Saving to file: %s"%args.outfile)
 
 mod_num = len(args.infiles)/20
 
 start_time = datetime.now()
 
 minimizer_settings = {}
 template_settings = {}
 pseudo_data_settings = {}
 trials = {}
 for i,filename in enumerate(args.infiles):
     if mod_num > 0:
         if i%mod_num == 0: print "  >> %d files done..."%i
     try:
         data = from_json(filename)