Пример #1
0
    def __init__(self, params, output_names, events_file, output_binning=None,
                 output_events=True, error_method=None, debug_mode=None,
                 disk_cache=None, memcache_deepcopy=True,
                 transforms_cache_depth=20, outputs_cache_depth=20):

        self.sample_hash = None

        expected_params = ('dataset','keep_criteria') #TODO -> kwargs???

        self.events_file = events_file

        self.neutrinos = False
        self.muons = False
        self.noise = False

        output_names = output_names.replace(' ', '').split(',')
        clean_outnames = []
        self._output_nu_groups = []
        for name in output_names:
            if 'muons' in name:
                self.muons = True
                clean_outnames.append(name)
            elif 'noise' in name:
                self.noise = True
                clean_outnames.append(name)
            elif 'all_nu' in name:
                self.neutrinos = True
                self._output_nu_groups = \
                    [NuFlavIntGroup(f) for f in ALL_NUFLAVINTS]
            else:
                self.neutrinos = True
                self._output_nu_groups.append(NuFlavIntGroup(name))

        if self.neutrinos:
            clean_outnames += [str(f) for f in self._output_nu_groups]

        if not isinstance(output_events, bool):
            raise AssertionError(
                'output_events must be of type bool, instead it is supplied '
                'with type {0}'.format(type(output_events))
            )
        if output_events: #TODO Implement MapSet option or remove
            output_binning = None
        self.output_events = output_events

        super(events_to_data, self).__init__(
            use_transforms=False,
            params=params,
            expected_params=expected_params,
            output_names=clean_outnames,
            error_method=error_method,
            debug_mode=debug_mode,
            disk_cache=disk_cache,
            memcache_deepcopy=memcache_deepcopy,
            outputs_cache_depth=outputs_cache_depth,
            transforms_cache_depth=transforms_cache_depth,
            output_binning=output_binning
        )

        self._compute_outputs()
Пример #2
0
    def get_xs_ratio_value(self, flavintgroup0, flavintgroup1, energy,
                           gamma=0):
        """Get ratio of combined cross sections for `flavintgroup0` to combined
        cross sections for `flavintgroup1`, weighted by E^{-`gamma`}.

        Parameters
        ----------
        flavintgroup0, flavintgroup1 : NuFlavIntGroup or convertible thereto
        energy : numeric or sequence thereof
            Energy (or energies) at which to evaluate total cross section, in
            units of GeV

        Returns
        -------
        Ratio of combined cross sections flavintgroup0 / flavintgroup1
        evaluated at each energy. Shape of returned value matches that of
        passed `energy` parameter.
        """
        flavintgroup0 = NuFlavIntGroup(flavintgroup0)
        flavintgroup1 = NuFlavIntGroup(flavintgroup1)

        self._define_interpolant(flavintgroup=flavintgroup0)
        self._define_interpolant(flavintgroup=flavintgroup1)

        xs_ratio_vals = self._interpolants[flavintgroup0](energy) / \
                self._interpolants[flavintgroup1](energy)
        # Special case to avoid multiplying by array of ones
        if gamma == 0:
            return xs_ratio_vals
        return xs_ratio_vals * energy**(-gamma)
Пример #3
0
 def joined_string(self):
     """Concise string identifying _only_ joined flavints"""
     joined_groups = sorted(
         [NuFlavIntGroup(j) for j in self.metadata['flavints_joined']])
     if len(joined_groups) == 0:
         return 'unjoined'
     return 'joined_G_' + '_G_'.join([str(g) for g in joined_groups])
Пример #4
0
    def _compute_outputs(self, inputs=None):

        """Apply basic cuts and compute histograms for output channels."""

        logging.debug('Entering sample._compute_outputs')

        self.config = from_file(self.params['data_sample_config'].value)

        name = self.config.get('general', 'name')
        logging.trace('{0} sample sample_hash = '
                      '{1}'.format(name, self.sample_hash))
        self.load_sample_events()

        if self.params['keep_criteria'].value is not None:
            # TODO(shivesh)
            raise NotImplementedError(
                'needs check to make sure this works in a DistributionMaker'
            )
            self._data.applyCut(self.params['keep_criteria'].value)
            self._data.update_hash()

        if self.output_events:
            return self._data

        outputs = []
        if self.neutrinos:
            trans_nu_data = self._data.transform_groups(
                self._output_nu_groups
            )
            for fig in trans_nu_data.keys():
                outputs.append(trans_nu_data.histogram(
                    kinds       = fig,
                    binning     = self.output_binning,
                    weights_col = 'pisa_weight',
                    errors      = True,
                    name        = str(NuFlavIntGroup(fig)),
                ))

        if self.muons:
            outputs.append(self._data.histogram(
                kinds       = 'muons',
                binning     = self.output_binning,
                weights_col = 'pisa_weight',
                errors      = True,
                name        = 'muons',
                tex         = r'\rm{muons}'
            ))

        if self.noise:
            outputs.append(self._data.histogram(
                kinds       = 'noise',
                binning     = self.output_binning,
                weights_col = 'pisa_weight',
                errors      = True,
                name        = 'noise',
                tex         = r'\rm{noise}'
            ))

        name = self.config.get('general', 'name')
        return MapSet(maps=outputs, name=name)
Пример #5
0
    def __init__(self,
                 params,
                 input_binning,
                 output_binning,
                 input_names,
                 transform_groups,
                 error_method=None,
                 debug_mode=None,
                 disk_cache=None,
                 transforms_cache_depth=20,
                 outputs_cache_depth=20):
        self.xsec = None

        self.xsec_hash = None
        """Hash of GENIE spline file"""

        expected_params = ('xsec_file', 'livetime', 'ice_p', 'fid_vol',
                           'mr_h20', 'x_energy_scale')

        def suffix_channel(sign, suf):
            return '%s_%s' % (sign, suf)

        if isinstance(input_names, basestring):
            input_names = (''.join(input_names.split(' '))).split(',')

        self.output_channels = ('cc', 'nc')
        all_names = [
            suffix_channel(in_name, out_chan)
            for in_name, out_chan in product(input_names, self.output_channels)
        ]

        if transform_groups is None:
            output_names = all_names
        else:
            transform_groups = flavintGroupsFromString(transform_groups)
            output_names = []
            for grp in transform_groups:
                flavints = [str(g) for g in grp.flavints]
                if set(flavints).intersection(all_names) \
                   and str(grp) not in output_names:
                    output_names.append(str(grp))
        self.transform_groups = [
            NuFlavIntGroup(flavint) for flavint in output_names
        ]

        super(genie,
              self).__init__(use_transforms=True,
                             params=params,
                             expected_params=expected_params,
                             input_names=input_names,
                             output_names=output_names,
                             error_method=error_method,
                             disk_cache=disk_cache,
                             outputs_cache_depth=outputs_cache_depth,
                             transforms_cache_depth=transforms_cache_depth,
                             input_binning=input_binning,
                             output_binning=output_binning,
                             debug_mode=debug_mode)

        self.include_attrs_for_hashes('transform_groups')
Пример #6
0
def parse_event_type_names(names,return_flags=False) :

    #Split into list if has not already been done
    if isinstance(names, str):
        names = split(names)

    #Parse the names
    parsed_names = []
    for name in names :
        if 'all_nu' in name:
            parsed_names.extend( [str(NuFlavIntGroup(f)) for f in ALL_NUFLAVINTS] )
        else :
            parsed_names.append(name)
    parsed_names = [ n.lower() for n in parsed_names ]

    #Set some flags
    muons = False
    noise = False
    neutrinos = False
    for name in parsed_names:
        if 'muons' in name:
            muons = True
        elif 'noise' in name:
            noise = True
        elif name.startswith("nu"):
            neutrinos = True
        else :
            raise ValueError("Unrecognised event type '%s' found"%name)

    if return_flags : return parsed_names,muons,noise,neutrinos
    else : return parsed_names
Пример #7
0
    def _compute_outputs(self, inputs=None):
        """Compute histograms for output channels."""
        logging.debug('Entering fit._compute_outputs')
        if not isinstance(inputs, Data):
            raise AssertionError('inputs is not a Data object, instead is '
                                 'type {0}'.format(type(inputs)))
        self.weight_hash = deepcopy(inputs.metadata['weight_hash'])
        logging.trace('{0} fit weight_hash = '
                      '{1}'.format(inputs.metadata['name'], self.weight_hash))
        logging.trace('{0} fit fit_hash = '
                      '{1}'.format(inputs.metadata['name'], self.fit_hash))
        self._data = inputs
        self.reweight()

        if self.output_events:
            return self._data

        outputs = []
        if self.neutrinos:
            trans_nu_data = self._data.transform_groups(
                self._output_nu_groups
            )
            for fig in trans_nu_data.iterkeys():
                outputs.append(
                    trans_nu_data.histogram(
                        kinds=fig,
                        binning=self.output_binning,
                        weights_col='pisa_weight',
                        errors=True,
                        name=str(NuFlavIntGroup(fig)),
                    )
                )

        if self.muons:
            outputs.append(
                self._data.histogram(
                    kinds='muons',
                    binning=self.output_binning,
                    weights_col='pisa_weight',
                    errors=True,
                    name='muons',
                    tex=text2tex('muons')
                )
            )

        if self.noise:
            outputs.append(
                self._data.histogram(
                    kinds='noise',
                    binning=self.output_binning,
                    weights_col='pisa_weight',
                    errors=True,
                    name='noise',
                    tex=text2tex('noise')
                )
            )

        return MapSet(maps=outputs, name=self._data.metadata['name'])
Пример #8
0
    def _define_interpolant(self, flavintgroup=None):
        """If `flavintgroup` is None, compute all (separate) flavint
        interpolants; otherwise, compute interpolant for specified
        `flavintgroup`. Do not re-compute if already present.
        """
        if flavintgroup is None:
            flavintgroups = [NuFlavIntGroup(fi) for fi in self.flavints]
        else:
            flavintgroups = [NuFlavIntGroup(flavintgroup)]

        for fig in flavintgroups:
            if fig in self._interpolants:
                continue
            combined_xs = self._combine_xs(fig)
            self._interpolants[fig] = interp1d(
                x=self.energy, y=combined_xs, kind='linear', copy=False,
                bounds_error=True, fill_value=0
            )
Пример #9
0
    def validate_params(self, params):
        # do some checks on the parameters

        # Check type of pid_events
        assert isinstance(params.pid_events.value, (basestring, Events))

        # Check the groupings of the pid_events file
        events = Events(params.pid_events.value)
        should_be_joined = sorted([
            NuFlavIntGroup('nue_cc + nuebar_cc'),
            NuFlavIntGroup('numu_cc + numubar_cc'),
            NuFlavIntGroup('nutau_cc + nutaubar_cc'),
            NuFlavIntGroup('nuall_nc + nuallbar_nc'),
        ])
        are_joined = sorted(
            [NuFlavIntGroup(s) for s in events.metadata['flavints_joined']])
        if are_joined != should_be_joined:
            raise ValueError('Events passed have %s joined groupings but'
                             ' it is required to have %s joined groupings.' %
                             (are_joined, should_be_joined))
Пример #10
0
def load_pid_energy_param(source):
    """Load pid energy-dependent parameterisation from file or dictionary.

    Parameters
    ----------
    source : string or mapping
        If string, interprete as resource location of the file; if mapping, use
        directly.

    Returns
    -------
    pid_energy_param_dict : OrderedDict
        Keys are `NuFlavIntGroup`s and values are callables of one arg.

    """
    # Get the original dict
    if isinstance(source, str):
        orig_dict = from_file(source)
    elif isinstance(source, Mapping):
        orig_dict = source
    else:
        raise TypeError('`source` must either be string or mapping; got %s'
                        ' instead.' % type(source))

    # Build dict with flavintgroups as keys; subdict with signatures as keys
    # and callables as values
    pid_energy_param_dict = OrderedDict()

    for flavintgroup_str, subdict in orig_dict.items():
        flavintgroup = NuFlavIntGroup(flavintgroup_str)

        pid_energy_param_dict[flavintgroup] = OrderedDict()

        for signature, sig_param_spec in subdict.items():
            if isinstance(sig_param_spec, str):
                sig_param_func = eval(sig_param_spec)
                if not callable(sig_param_func):
                    raise ValueError(
                        'Group %s PID signature %s param spec "%s" does'
                        ' not evaluate to a callable.'
                        % (flavintgroup_str, signature, sig_param_spec)
                    )
            elif callable(sig_param_spec):
                sig_param_func = sig_param_spec
            else:
                raise TypeError(
                    'Group %s PID signature %s parameterization is a "%s"'
                    ' but must be a string or callable.'
                    % (flavintgroup_str, signature, type(sig_param_spec))
                )

            pid_energy_param_dict[flavintgroup][signature] = sig_param_func

    return pid_energy_param_dict
Пример #11
0
    def __init__(self,
                 params,
                 output_binning,
                 output_names,
                 output_events=True,
                 error_method=None,
                 debug_mode=None,
                 disk_cache=None,
                 memcache_deepcopy=True,
                 transforms_cache_depth=20,
                 outputs_cache_depth=20,
                 fix_truth_variable_names=False):
        self.sample_hash = None
        """Hash of event sample"""

        expected_params = (
            'data_sample_config',
            'dataset',
            'keep_criteria',
        )

        output_names, self.muons, self.noise, self.neutrinos = parse_event_type_names(
            output_names, return_flags=True)
        self._output_nu_groups = [
            NuFlavIntGroup(name) for name in output_names
        ]

        if not isinstance(output_events, bool):
            raise AssertionError(
                'output_events must be of type bool, instead it is supplied '
                'with type {0}'.format(type(output_events)))
        if output_events:
            output_binning = None
        self.output_events = output_events

        super(sample,
              self).__init__(use_transforms=False,
                             params=params,
                             expected_params=expected_params,
                             output_names=output_names,
                             error_method=error_method,
                             debug_mode=debug_mode,
                             disk_cache=disk_cache,
                             memcache_deepcopy=memcache_deepcopy,
                             outputs_cache_depth=outputs_cache_depth,
                             transforms_cache_depth=transforms_cache_depth,
                             output_binning=output_binning)

        #User can specify that truth variables have their names prefixed with "truth_"
        self.fix_truth_variable_names = fix_truth_variable_names
        self.truth_variables = ["energy", "coszen"]
        self.truth_variable_prefix = "true_"

        self._compute_outputs()
Пример #12
0
    def _combine_xs(self, flavintgroup):
        """Combine all cross sections specified by the flavints in
        `flavintgroup`. All CC and NC interactions are separately grouped
        together and averaged, then the average of each interaction type
        is added to the other.

        If CC and NC interactions are present, they *must* be from the same
        flavor(s). I.e., it doesn't make sense (and so causes an exception) if
        you combine numu CC with numubar NC. It does make sense if you combine
        numu and numubar CC with numu and numubar NC, though, and this is
        allowed.

        Notes
        -----
        Does not yet implement *Ngen/spectrum-weighted* averages, which are
        necessary when combining cross sections of disparate flavor/interaction
        types from different Monte Carlo simulation runs.
        """
        flavintgroup = NuFlavIntGroup(flavintgroup)
        # Trivial case: nothing to combine
        if len(flavintgroup.flavints) == 1:
            return self[flavintgroup.flavints[0]]

        cc_flavints = flavintgroup.cc_flavints
        nc_flavints = flavintgroup.nc_flavints
        if cc_flavints and nc_flavints:
            assert flavintgroup.cc_flavs == flavintgroup.nc_flavs, \
                    'Combining CC and NC but CC flavors do not match NC flavors'
        cc_avg_xs = 0
        if cc_flavints:
            logging.trace('cc_flavints = %s' % (cc_flavints,))
            cc_avg_xs = np.sum([self[k] for k in cc_flavints], axis=0) \
                    / len(cc_flavints)
        nc_avg_xs = 0
        if nc_flavints:
            logging.trace('nc_flavints = %s' % (nc_flavints,))
            nc_avg_xs = np.sum([self[k] for k in nc_flavints], axis=0) \
                    / len(nc_flavints)
        tot_xs = cc_avg_xs + nc_avg_xs
        logging.trace('mean(tot_xs) = %s' % (np.mean(tot_xs),))
        return tot_xs
Пример #13
0
    def get_xs_value(self, flavintgroup, energy):
        """Get (combined) cross section value (in units of m^2) for
        `flavintgroup` at `energy` (in units of GeV).

        Parameters
        ----------
        flavintgroup : NuFlavIntGroup or convertible thereto
        energy : numeric or sequence thereof
            Energy (or energies) at which to evaluate total cross section, in
            units of GeV

        Returns
        -------
        Combined cross section for flavor/interaction types in units of
        m^2, evaluated at each energy. Shape of returned value matches that of
        passed `energy` parameter.
        """
        flavintgroup = NuFlavIntGroup(flavintgroup)
        if flavintgroup not in self._interpolants:
            self._define_interpolant(flavintgroup=flavintgroup)
        return self._interpolants[flavintgroup](energy)
Пример #14
0
def test_CrossSections(outdir=None):
    """Unit tests for CrossSections class"""
    from shutil import rmtree
    from tempfile import mkdtemp

    remove_dir = False
    if outdir is None:
        remove_dir = True
        outdir = mkdtemp()

    try:
        # "Standard" location of cross sections file in PISA; retrieve 2.6.4 for
        # testing purposes
        pisa_xs_file = 'cross_sections/cross_sections.json'
        xs = CrossSections(ver='genie_2.6.4', xsec=pisa_xs_file)

        # Location of the root file to use (not included in PISA at the moment)
        test_dir = expand(os.path.join('/tmp', 'pisa_tests', 'cross_sections'))
        #root_xs_file = os.path.join(test_dir, 'genie_2.6.4_simplified.root')
        root_xs_file = find_resource(os.path.join(
            #'tests', 'data', 'xsec', 'genie_2.6.4_simplified.root'
            'cross_sections', 'genie_xsec_H2O.root'
        ))

        # Make sure that the XS newly-imported from ROOT match those stored in
        # PISA
        if os.path.isfile(root_xs_file):
            xs_from_root = CrossSections.new_from_root(root_xs_file,
                                                       ver='genie_2.6.4')
            logging.info('Found and loaded ROOT source cross sections file %s',
                         root_xs_file)
            #assert xs_from_root.allclose(xs, rtol=1e-7)

        # Check XS ratio for numu_cc to numu_cc + numu_nc (user must inspect)
        kg0 = NuFlavIntGroup('numu_cc')
        kg1 = NuFlavIntGroup('numu_nc')
        logging.info(
            r'\int_1^80 xs(numu_cc) E^{-1} dE = %e',
            xs.get_xs_ratio_integral(kg0, None, e_range=[1, 80], gamma=1)
        )
        logging.info(
            '(int E^{-gamma} * (sigma_numu_cc)/int(sigma_(numu_cc+numu_nc)) dE)'
            ' / (int E^{-gamma} dE) = %e',
            xs.get_xs_ratio_integral(kg0, kg0+kg1, e_range=[1, 80], gamma=1,
                                     average=True)
        )
        # Check that XS ratio for numu_cc+numu_nc to the same is 1.0
        int_val = xs.get_xs_ratio_integral(kg0+kg1, kg0+kg1, e_range=[1, 80],
                                           gamma=1, average=True)
        if not recursiveEquality(int_val, 1):
            raise ValueError('Integral of nc + cc should be 1.0; get %e'
                             ' instead.' % int_val)

        # Check via plot that the

        # Plot all cross sections stored in PISA xs file
        try:
            alldata = from_file(pisa_xs_file)
            xs_versions = alldata.keys()
            for ver in xs_versions:
                xs = CrossSections(ver=ver, xsec=pisa_xs_file)
                xs.plot(save=os.path.join(
                    outdir, 'pisa_' + ver + '_nuxCCNC_H2O_cross_sections.pdf'
                ))
        except ImportError as exc:
            logging.debug('Could not plot; possible that matplotlib not'
                          'installed. ImportError: %s', exc)

    finally:
        if remove_dir:
            rmtree(outdir)
Пример #15
0
    def get_xs_ratio_integral(self, flavintgroup0, flavintgroup1, e_range,
                              gamma=0, average=False):
        """Energy-spectrum-weighted integral of (possibly a ratio of)
        (possibly-combined) flavor/interaction type cross sections.

        Parameters
        ----------
        flavintgroup0 : NuFlavIntGroup or convertible thereto
            Flavor(s)/interaction type(s) for which to combine cross sections
            for numerator of ratio
        flavintgroup1 : None, NuFlavIntGroup or convertible thereto
            Flavor(s)/interaction type(s) for which to combine cross sections
            for denominator of ratio. If None is passed, the denominator of
            the "ratio" is effectively 1.
        e_range
            Range of energy over which to integrate (GeV)
        gamma : float >= 0
            Power law spectral index used for weighting the integral,
            E^{-`gamma`}. Note that `gamma` should be >= 0.
        average : bool
            If True, return the average of the cross section (ratio)
            If False, return the integral of the cross section (ratio)

        See also
        --------
        See _combine_xs for detals on how flavints are combined.
        """
        e_min = min(e_range)
        e_max = max(e_range)

        assert e_min > 0, '`e_range` must lie strictly above 0'
        assert e_max > e_min, \
                'max(`e_range`) must be strictly larger than min(`e_range`)'
        assert gamma >= 0, '`gamma` must be >= 0'

        if flavintgroup1 is None:
            flavintgroups = [NuFlavIntGroup(flavintgroup0)]
        else:
            flavintgroups = [NuFlavIntGroup(flavintgroup0),
                             NuFlavIntGroup(flavintgroup1)]

        # Create interpolant(s) (to get xs at  energy range's endpoints)
        for fg in flavintgroups:
            self._define_interpolant(flavintgroup=fg)

        all_energy = self._interpolants[flavintgroups[0]].x
        xs_data = [self._interpolants[fg].y for fg in flavintgroups]

        for xd in xs_data:
            logging.trace('mean(xs_data) = %e' % np.mean(xd))

        # Get indices of data points within the specified energy range
        idx = (all_energy > e_min) & (all_energy < e_max)

        # Get xsec at endpoints
        xs_endpoints = [self._interpolants[fg]((e_min, e_max))
                        for fg in flavintgroups]

        for ep in xs_endpoints:
            logging.trace('xs_emin = %e, xs_emax = %e' % (ep[0], ep[1]))

        # Attach endpoints
        energy = np.concatenate([[e_min], all_energy[idx], [e_max]])
        xs = [np.concatenate([[ep[0]], xsd[idx], [ep[1]]])
              for ep, xsd in zip(xs_endpoints, xs_data)]

        if len(xs) == 1:
            xs = xs[0]
        else:
            xs = xs[0] / xs[1]

        # Weight xsec (or ratio) by energy spectrum
        if gamma == 0:
            wtd_xs = xs
        else:
            wtd_xs = xs*energy**(-gamma)

        logging.trace('mean(wtd_xs) = %e' % np.mean(wtd_xs))

        # Integrate via trapezoidal rule
        wtd_xs_integral = np.trapz(y=wtd_xs, x=energy)

        logging.trace('wtd_xs_integral = %e' % wtd_xs_integral)

        # Need to divide by integral of the weight function (over the same
        # energy interval as wtd_xs integral was computed) to get the average
        if average:
            if gamma == 0:
                # Trivial case
                xs_average = wtd_xs_integral / (e_max - e_min)
            else:
                # Otherwise use trapezoidal rule to approximate integral
                xs_average = wtd_xs_integral / \
                        np.trapz(y=energy**(-gamma), x=energy) #* (e_max-e_min)
            logging.trace('xs_average = %e' %(xs_average))
            return xs_average

        return wtd_xs_integral
Пример #16
0
def load_reco_param(source):
    """Load reco parameterisation (energy-dependent) from file or dictionary.

    Parameters
    ----------
    source : string or mapping
        Source of the parameterization. If string, treat as file path or
        resource location and load from the file; this must yield a mapping. If
        `source` is a mapping, it is used directly. See notes below on format.

    Returns
    -------
    reco_params : OrderedDict
        Keys are stringified flavintgroups and values are dicts of strings
        representing the different reco dimensions and lists of distribution
        properties. These latter have a 'fraction', a 'dist' and a 'kwargs' key.
        The former two hold callables, while the latter holds a dict of
        key-callable pairs ('loc', 'scale'), which can be evaluated at the desired
        energies and passed into the respective `scipy.stats` distribution.
        The distributions for a given dimension will be superimposed according
        to their relative weights to form the reco kernels (via integration)
        when called with energy values (parameterisations are functions of
        energy only!).

    Notes
    -----
    The mapping passed via `source` or loaded therefrom must have the format:
        {
            <flavintgroup_string>:
                {
                    <dimension_string>:[
                        {
                            "dist": dist_id,
                            "fraction": val,
                            "kwargs": {
                                "loc": val,
                                "scale": val,
                                ...
                            }
                        },
                    ...
                    ]
                },
            <flavintgroup_string>:
                ...
        }

    `flavintgroup_string`s must be parsable by
    pisa.utils.flavInt.NuFlavIntGroup. Note that the `transform_groups` defined
    in a pipeline config file using this must match the groupings defined
    above.

    `dimension_string`s denote the observables/dimensions whose reco error
    distribution is parameterised (`"energy"` or `"coszen"`).

    `dist_id` needs to be a string identifying a probability distribution/statistical
    function provided by `scipy.stats`. No implicit assumptions about the
    distribution will be made if the `"dist"` key is missing.

    `"fraction"` holds the relative weight of the distribution. For a given
    dimension, the sum of all fractions present must be 1.

    Valid kwargs for distributions must at least include `"loc"` and `"scale"` -
    these will be passed into the respective `scipy.stats` function.

    `val`s can be one of the following:
        - Callable with one argument
        - String such that `eval(val)` yields a callable with one argument
    """
    if not (source is None or isinstance(source, (basestring, Mapping))):
        raise TypeError('`source` must be string, mapping, or None')

    if isinstance(source, basestring):
        orig_dict = from_file(source)

    elif isinstance(source, Mapping):
        orig_dict = source

    else:
        raise TypeError('Cannot load reco parameterizations from a %s' %
                        type(source))

    valid_dimensions = ('coszen', 'energy')
    required_keys = ('dist', 'fraction', 'kwargs')

    # Build dict of parameterizations (each a callable) per flavintgroup
    reco_params = OrderedDict()
    for flavint_key, dim_dict in orig_dict.iteritems():
        flavintgroup = NuFlavIntGroup(flavint_key)
        reco_params[flavintgroup] = {}
        for dimension in dim_dict.iterkeys():
            dim_dist_list = []

            if not isinstance(dimension, basestring):
                raise TypeError("The dimension needs to be given as a string!"
                                " Allowed: %s." % valid_dimensions)

            if dimension not in valid_dimensions:
                raise ValueError("Dimension '%s' not recognised!" % dimension)

            for dist_dict in dim_dict[dimension]:
                dist_spec_dict = {}

                # allow reading in even if kwargs not present - computation of
                # transform will fail because "loc" and "scale" hard-coded
                # requirement
                for required in required_keys:
                    if required not in dist_dict:
                        raise ValueError("Found distribution property dict "
                                         "without required '%s' key for "
                                         "%s - %s!" %
                                         (required, flavintgroup, dimension))

                for k in dist_dict.iterkeys():
                    if k not in required_keys:
                        logging.warn("Unrecognised key in distribution"
                                     " property dict: '%s'" % k)

                dist_spec = dist_dict['dist']

                if not isinstance(dist_spec, basestring):
                    raise TypeError(" The resolution function needs to be"
                                    " given as a string!")

                if not dist_spec:
                    raise ValueError("Empty string found for resolution"
                                     " function!")

                try:
                    dist = getattr(stats, dist_spec.lower())
                except AttributeError:
                    try:
                        import scipy
                        sp_ver_str = scipy.__version__
                    except:
                        sp_ver_str = "N/A"
                    raise AttributeError("'%s' is not a valid distribution"
                                         " from scipy.stats (your scipy"
                                         " version: '%s')." %
                                         (dist_spec.lower(), sp_ver_str))
                logging.debug("Found %s - %s resolution function: '%s'" %
                              (flavintgroup, dimension, dist.name))

                dist_spec_dict['dist'] = dist

                frac = dist_dict['fraction']

                if isinstance(frac, basestring):
                    frac_func = eval(frac)

                elif callable(frac):
                    frac_func = frac

                else:
                    raise TypeError(
                        "Expected 'fraction' to be either a string"
                        " that can be interpreted by eval or a callable."
                        " Got '%s'." % type(frac))

                dist_spec_dict['fraction'] = frac_func

                kwargs = dist_dict['kwargs']

                if not isinstance(kwargs, dict):
                    raise TypeError(
                        "'kwargs' must hold a dictionary. Got '%s' instead." %
                        type(kwargs))

                dist_spec_dict['kwargs'] = kwargs
                for kwarg, kwarg_spec in kwargs.iteritems():

                    if isinstance(kwarg_spec, basestring):
                        kwarg_eval = eval(kwarg_spec)

                    elif callable(kwarg_spec) or isscalar(kwarg_spec):
                        kwarg_eval = kwarg_spec

                    else:
                        raise TypeError(
                            "Expected kwarg '%s' spec to be either a string"
                            " that can be interpreted by eval, a callable or"
                            " a scalar. Got '%s'." % type(kwarg_spec))

                    dist_spec_dict['kwargs'][kwarg] = kwarg_eval

                dim_dist_list.append(dist_spec_dict)

            reco_params[flavintgroup][dimension] = dim_dist_list

    return reco_params
Пример #17
0
    def __init__(self, params, particles, transform_groups,
                 sum_grouped_flavints, input_binning, output_binning,
                 memcache_deepcopy, transforms_cache_depth,
                 outputs_cache_depth, input_names=None, error_method=None,
                 debug_mode=None):
        assert particles in ['neutrinos', 'muons']
        self.particles = particles
        """Whether stage is instantiated to process neutrinos or muons"""

        self.transform_groups = flavintGroupsFromString(transform_groups)
        """Particle/interaction types to group for computing transforms"""

        assert isinstance(sum_grouped_flavints, bool)
        self.sum_grouped_flavints = sum_grouped_flavints

        # All of the following params (and no more) must be passed via the
        # `params` argument.
        expected_params = [
            'aeff_energy_paramfile', 'aeff_coszen_paramfile',
            'livetime', 'aeff_scale'
        ]
        if particles == 'neutrinos':
            expected_params.append('nutau_cc_norm')

        if isinstance(input_names, str):
            input_names = input_names.replace(' ', '').split(',')
        elif input_names is None:
            if particles == 'neutrinos':
                input_names = ('nue', 'nuebar', 'numu', 'numubar', 'nutau',
                               'nutaubar')

        if self.particles == 'neutrinos':
            # TODO: if sum_grouped_flavints, then the output names should be
            # e.g. 'nue_cc_nuebar_cc' and 'nue_nc_nuebar_nc' if nue and nuebar
            # are grouped... (?)
            if self.sum_grouped_flavints:
                output_names = [str(g) for g in self.transform_groups]
            else:
                input_flavints = NuFlavIntGroup(input_names)
                output_names = [str(fi) for fi in input_flavints]
        elif self.particles == 'muons':
            raise NotImplementedError
        else:
            raise ValueError('Particle type `%s` is not valid'
                             % self.particles)

        logging.trace('transform_groups = %s', self.transform_groups)
        logging.trace('output_names = %s', ' :: '.join(output_names))

        super().__init__(
            use_transforms=True,
            params=params,
            expected_params=expected_params,
            input_names=input_names,
            output_names=output_names,
            error_method=error_method,
            memcache_deepcopy=memcache_deepcopy,
            outputs_cache_depth=outputs_cache_depth,
            transforms_cache_depth=transforms_cache_depth,
            input_binning=input_binning,
            output_binning=output_binning,
            debug_mode=debug_mode
        )

        self.include_attrs_for_hashes('particles')
        self.include_attrs_for_hashes('transform_groups')

        self.ecen = self.input_binning.true_energy.weighted_centers.magnitude
        """input energy-binning weighted centers"""

        self.has_cz = False
        """Whether the stage has true_coszen input binning"""

        self.czcen = None
        """input coszen-binning weighted centers (or None if no coszen dim)"""

        if 'true_coszen' in self.input_binning.names:
            self.has_cz = True
            self.czcen = self.input_binning.true_coszen.weighted_centers.m_as('dimensionless')

        self._param_hashes = dict(energy=None, coszen=None)
        self.aeff_params = dict(energy=dict())
        if self.has_cz:
            self.aeff_params['coszen'] = None
Пример #18
0
def compute_transforms(service):
    """Compute effective area transforms, taking aeff systematics into account.

    Systematics are: `aeff_scale`, `livetime`, and `nutau_cc_norm`

    """
    aeff_scale = service.params.aeff_scale.m_as('dimensionless')
    livetime_s = service.params.livetime.m_as('sec')
    base_scale = aeff_scale * livetime_s

    logging.trace('livetime = %s --> %s sec',
                  service.params.livetime.value, livetime_s)

    if service.particles == 'neutrinos':
        if not hasattr(service, 'nutau_cc_norm_must_be_one'):
            service.nutau_cc_norm_must_be_one = False
            """If any flav/ints besides nutau_cc and nutaubar_cc are grouped
            with one or both of those for transforms, then a
            `nutau_cc_norm` != 1 cannot be applied."""

            nutaucc_and_nutaubarcc = set(NuFlavIntGroup('nutau_cc+nutaubar_cc'))
            for group in service.transform_groups:
                # If nutau_cc, nutaubar_cc, or both are the group and other flavors
                # are present, nutau_cc_norm must be one!
                group_set = set(group)
                if group_set.intersection(nutaucc_and_nutaubarcc) and \
                        group_set.difference(nutaucc_and_nutaubarcc):
                    service.nutau_cc_norm_must_be_one = True

        nutau_cc_norm = service.params.nutau_cc_norm.m_as('dimensionless')
        if nutau_cc_norm != 1 and service.nutau_cc_norm_must_be_one:
            raise ValueError(
                '`nutau_cc_norm` = %e but can only be != 1 if nutau CC and'
                ' nutaubar CC are separated from other flav/ints.'
                ' Transform groups are: %s'
                % (nutau_cc_norm, service.transform_groups)
            )

    if hasattr(service, 'sum_grouped_flavints'):
        sum_grouped_flavints = service.sum_grouped_flavints
    else:
        sum_grouped_flavints = False

    new_transforms = []
    for transform in service.nominal_transforms:
        this_scale = base_scale
        if service.particles == 'neutrinos':
            out_nfig = NuFlavIntGroup(transform.output_name)
            if 'nutau_cc' in out_nfig or 'nutaubar_cc' in out_nfig:
                this_scale *= nutau_cc_norm

        if this_scale != 1:
            aeff_transform = transform.xform_array * this_scale
        else:
            aeff_transform = transform.xform_array

        new_xform = BinnedTensorTransform(
            input_names=transform.input_names,
            output_name=transform.output_name,
            input_binning=transform.input_binning,
            output_binning=transform.output_binning,
            xform_array=aeff_transform,
            sum_inputs=sum_grouped_flavints
        )
        new_transforms.append(new_xform)

    return TransformSet(new_transforms)
Пример #19
0
    def _compute_transforms(self):
        """Generate reconstruction "smearing kernels" by histogramming true and
        reconstructed variables from a Monte Carlo events file.

        The resulting transform is a 2N-dimensional histogram, where N is the
        dimensionality of the input binning. The transform maps the truth bin
        counts to the reconstructed bin counts.

        I.e., for the case of 1D input binning, the ith element of the
        reconstruction kernel will be a map showing the distribution of events
        over all the reco space from truth bin i. This will be normalised to
        the total number of events in truth bin i.

        Notes
        -----
        In the current implementation these histograms are made
        **UN**weighted. This is probably quite wrong...

        """
        e_res_scale = self.params.e_res_scale.value.m_as('dimensionless')
        cz_res_scale = self.params.cz_res_scale.value.m_as('dimensionless')
        e_reco_bias = self.params.e_reco_bias.value.m_as('GeV')
        cz_reco_bias = self.params.cz_reco_bias.value.m_as('dimensionless')
        res_scale_ref = self.params.res_scale_ref.value.strip().lower()
        assert res_scale_ref in ['zero']  # TODO: , 'mean', 'median']

        self.load_events(self.params.reco_events)
        self.cut_events(self.params.transform_events_keep_criteria)

        # Computational units must be the following for compatibility with
        # events file
        comp_units = dict(true_energy='GeV',
                          true_coszen=None,
                          true_azimuth='rad',
                          reco_energy='GeV',
                          reco_coszen=None,
                          reco_azimuth='rad',
                          pid=None)

        # Select only the units in the input/output binning for conversion
        # (can't pass more than what's actually there)
        in_units = {
            dim: unit
            for dim, unit in comp_units.items() if dim in self.input_binning
        }
        out_units = {
            dim: unit
            for dim, unit in comp_units.items() if dim in self.output_binning
        }

        # These binnings will be in the computational units defined above
        input_binning = self.input_binning.to(**in_units)
        output_binning = self.output_binning.to(**out_units)

        xforms = []
        for xform_flavints in self.transform_groups:
            logging.debug("Working on %s reco kernels" % xform_flavints)

            repr_flavint = xform_flavints[0]

            true_energy = self.events[repr_flavint]['true_energy']
            true_coszen = self.events[repr_flavint]['true_coszen']
            reco_energy = self.events[repr_flavint]['reco_energy']
            reco_coszen = self.events[repr_flavint]['reco_coszen']
            e_reco_err = reco_energy - true_energy
            cz_reco_err = reco_coszen - true_coszen

            if self.params.res_scale_ref.value.strip().lower() == 'zero':
                self.events[repr_flavint]['reco_energy'] = (
                    true_energy + e_reco_err * e_res_scale + e_reco_bias)
                self.events[repr_flavint]['reco_coszen'] = (
                    true_coszen + cz_reco_err * cz_res_scale + cz_reco_bias)

            # True (input) + reco {+ PID} (output)-dimensional histogram
            # is the basis for the transformation
            reco_kernel = self.events.histogram(
                kinds=xform_flavints,
                binning=input_binning * output_binning,
                weights_col=self.params.reco_weights_name.value,
                errors=(self.error_method not in [None, False]))
            # Extract just the numpy array to work with
            reco_kernel = reco_kernel.hist

            # This takes into account the correct kernel normalization:
            # What this means is that we have to normalise the reco map
            # to the number of events in the truth bin.
            #
            # I.e., we have N events from the truth bin which then become
            # spread out over the whole map due to reconstruction.
            # The normalisation is dividing this map by N.
            #
            # Previously this was hard-coded for 2 dimensions, but I have tried
            # to generalise it to arbitrary dimensionality.

            # Truth-only (N-dimensional) histogram will be used for
            # normalization (so transform is in terms of fraction-of-events in
            # input--i.e. truth--bin). Sum over the input dimensions.
            true_event_counts = self.events.histogram(
                kinds=xform_flavints,
                binning=input_binning,
                weights_col=self.params.reco_weights_name.value,
                errors=(self.error_method not in [None, False]))
            # Extract just the numpy array to work with
            true_event_counts = true_event_counts.hist

            # If there weren't any events in the input (true_*) bin, make this
            # bin have no effect -- i.e., populate all output bins
            # corresponding to the input bin with zeros via `nan_to_num`.
            with np.errstate(divide='ignore', invalid='ignore'):
                true_event_counts[true_event_counts == 0] = np.nan
                norm_factors = 1.0 / true_event_counts
                norm_factors = np.nan_to_num(norm_factors)

            # Numpy broadcasts lower-dimensional things to higher dimensions
            # from last dimension to first; if we simply mult the reco_kernel
            # by norm_factors, this will apply the normalization to the
            # __output__ dimensions rather than the input dimensions. Add
            # "dummy" dimensions to norm_factors where we want the "extra
            # dimensions": at the end.
            for dim in self.output_binning:
                norm_factors = np.expand_dims(norm_factors, axis=-1)

            # Apply the normalization to the kernels
            reco_kernel *= norm_factors

            assert np.all(reco_kernel >= 0), \
                    'number of elements less than 0 = %d' \
                    % np.sum(reco_kernel < 0)
            sum_over_axes = tuple(range(-len(self.output_binning), 0))
            totals = np.sum(reco_kernel, axis=sum_over_axes)
            assert np.all(
                totals <= 1 + 1e-14), 'max = ' + str(np.max(totals) - 1)

            # Now populate this transform to each input for which it applies.

            if self.sum_grouped_flavints:
                xform_input_names = []
                for input_name in self.input_names:
                    input_flavs = NuFlavIntGroup(input_name)
                    if len(set(xform_flavints).intersection(input_flavs)) > 0:
                        xform_input_names.append(input_name)

                for output_name in self.output_names:
                    if output_name not in xform_flavints:
                        continue
                    xform = BinnedTensorTransform(
                        input_names=xform_input_names,
                        output_name=output_name,
                        input_binning=self.input_binning,
                        output_binning=self.output_binning,
                        xform_array=reco_kernel,
                        sum_inputs=self.sum_grouped_flavints)
                    xforms.append(xform)
            else:
                # NOTES:
                # * Output name is same as input name
                # * Use `self.input_binning` and `self.output_binning` so maps
                #   are returned in user-defined units (rather than
                #   computational units, which are attached to the non-`self`
                #   versions of these binnings).
                for input_name in self.input_names:
                    if input_name not in xform_flavints:
                        continue
                    xform = BinnedTensorTransform(
                        input_names=input_name,
                        output_name=input_name,
                        input_binning=self.input_binning,
                        output_binning=self.output_binning,
                        xform_array=reco_kernel,
                    )
                    xforms.append(xform)

        return TransformSet(transforms=xforms)
Пример #20
0
    def load_neutrino_events(config, dataset):

        nu_data = []
        if dataset == 'neutrinos%sgen_lvl' % SEP:
            gen_cfg      = from_file(config.get(dataset, 'gen_cfg_file'))
            name         = gen_cfg.get('general', 'name')
            datadir      = gen_cfg.get('general', 'datadir')
            event_types  = split(gen_cfg.get('general', 'event_type'))
            weights      = split(gen_cfg.get('general', 'weights'))
            weight_units = gen_cfg.get('general', 'weight_units')
            keep_keys    = split(gen_cfg.get('general', 'keep_keys'))
            aliases      = gen_cfg.items('aliases')
            logging.info('Extracting neutrino dataset "{0}" from generator '
                         'level sample "{1}"'.format(dataset, name))

            for idx, flav in enumerate(event_types):
                fig = NuFlavIntGroup(flav)
                all_flavints = fig.flavints
                events_file = datadir + gen_cfg.get(flav, 'filename')

                flav_fidg = sample.load_from_nu_file(
                    events_file, all_flavints, weights[idx], weight_units,
                    keep_keys, aliases
                )
                nu_data.append(flav_fidg)
        else:

            name         = config.get('general', 'name')
            flavours     = split(config.get('neutrinos', 'flavours'))
            weights      = split(config.get('neutrinos', 'weights'))
            weight_units = config.get('neutrinos', 'weight_units')
            sys_list     = split(config.get('neutrinos', 'sys_list'))
            base_prefix  = config.get('neutrinos', 'baseprefix')
            keep_keys    = split(config.get('neutrinos', 'keep_keys'))
            aliases      = config.items('neutrinos%saliases' % SEP)
            logging.info('Extracting neutrino dataset "{0}" from sample '
                         '"{1}"'.format(dataset, name))
            if base_prefix == 'None':
                base_prefix = ''

            for idx, flav in enumerate(flavours):
                f = int(flav)
                all_flavints = NuFlavIntGroup(f, -f).flavints
                if dataset == 'nominal':
                    prefixes = []
                    for sys in sys_list:
                        ev_sys = 'neutrinos%s%s' % (SEP, sys)
                        nominal = config.get(ev_sys, 'nominal')
                        ev_sys_nom = ev_sys + SEP + nominal
                        prefixes.append(config.get(ev_sys_nom, 'file_prefix'))
                    if len(set(prefixes)) > 1:
                        raise AssertionError(
                            'Choice of nominal file is ambigous. Nominal '
                            'choice of systematic parameters must coincide '
                            'with one and only one file. Options found are: '
                            '{0}'.format(prefixes)
                        )
                    file_prefix = flav + prefixes[0]
                else:
                    file_prefix = flav + config.get(dataset, 'file_prefix')
                events_file = path.join( config.get('general', 'datadir'), base_prefix + file_prefix )

                flav_fidg = sample.load_from_nu_file(
                    events_file, all_flavints, weights[idx], weight_units,
                    keep_keys, aliases
                )
                nu_data.append(flav_fidg)
        nu_data = Data(
            reduce(add, nu_data),
            metadata={'name': name, 'sample': dataset}
        )

        return nu_data
Пример #21
0
    def __init__(self,
                 params,
                 particles,
                 transform_groups,
                 sum_grouped_flavints,
                 input_binning,
                 output_binning,
                 memcache_deepcopy,
                 transforms_cache_depth,
                 outputs_cache_depth,
                 input_names=None,
                 error_method=None,
                 debug_mode=None):
        assert particles in ['neutrinos', 'muons']
        self.particles = particles
        """Whether stage is instantiated to process neutrinos or muons"""

        self.transform_groups = flavintGroupsFromString(transform_groups)
        """Particle/interaction types to group for computing transforms"""

        self.sum_grouped_flavints = sum_grouped_flavints

        # All of the following params (and no more) must be passed via the
        # `params` argument.
        expected_params = [
            'aeff_events',
            'livetime',
            'aeff_scale',
            'aeff_e_smooth_factor',
            'aeff_cz_smooth_factor',
            'transform_events_keep_criteria',
        ]
        if particles == 'neutrinos':
            expected_params.append('nutau_cc_norm')

        if isinstance(input_names, str):
            input_names = input_names.replace(' ', '').split(',')
        elif input_names is None:
            if particles == 'neutrinos':
                input_names = ('nue', 'nuebar', 'numu', 'numubar', 'nutau',
                               'nutaubar')

        # Define the names of objects expected in inputs and produced as
        # outputs
        if self.particles == 'neutrinos':
            if self.sum_grouped_flavints:
                output_names = [str(g) for g in self.transform_groups]
            else:
                input_flavints = NuFlavIntGroup(input_names)
                output_names = [str(fi) for fi in input_flavints]
        elif self.particles == 'muons':
            raise NotImplementedError
        else:
            raise ValueError('Particle type `%s` is not valid' %
                             self.particles)

        logging.trace('transform_groups = %s' % self.transform_groups)
        logging.trace('output_names = %s' % ' :: '.join(output_names))

        # Invoke the init method from the parent class, which does a lot of
        # work for you.
        super().__init__(use_transforms=True,
                         params=params,
                         expected_params=expected_params,
                         input_names=input_names,
                         output_names=output_names,
                         error_method=error_method,
                         memcache_deepcopy=memcache_deepcopy,
                         outputs_cache_depth=outputs_cache_depth,
                         transforms_cache_depth=transforms_cache_depth,
                         input_binning=input_binning,
                         output_binning=output_binning,
                         debug_mode=debug_mode)
        # Can do these now that binning has been set up in call to Stage's init
        self.include_attrs_for_hashes('particles')
        self.include_attrs_for_hashes('transform_groups')
Пример #22
0
def load_aeff_param(source):
    """Load aeff parameterisation (energy- or coszen-dependent) from file
    or dictionary.

    Parameters
    ----------
    source : string or mapping
        Source of the parameterization. If string, treat as file path or
        resource location and load from the file; this must yield a mapping. If
        `source` is a mapping, it is used directly. See notes below on format.

    Returns
    -------
    aeff_params : OrderedDict
        Keys are stringified flavintgroups and values are the callables that
        produce aeff when called with energy or coszen values.

    Notes
    -----
    The mapping passed via `source` or loaded therefrom msut have the format:
        {
            <flavintgroup_string>: val,
            <flavintgroup_string>: val,
            ...
        }

    `flavintgroup_string`s must be parsable by
    pisa.utils.flavInt.NuFlavIntGroup. Note that the `transform_groups` defined
    in a pipeline config file using this must match the groupings defined
    above.

    `val`s can be one of the following:
        - Callable with one argument
        - String such that `eval(val)` yields a callable with one argument
        - Mapping with the format:
            {
                <"energy" or "coszen">: [sequence of values],
                "aeff": [sequence of values]A
            }
          the two sequences are used to form a linear interpolant callable that
          maps energy or coszen values to aeff values..

    """
    if not (source is None or isinstance(source, (str, Mapping))):
        raise TypeError('`source` must be string, mapping, or None')

    if isinstance(source, str):
        orig_dict = from_file(source)
    elif isinstance(source, Mapping):
        orig_dict = source
    else:
        raise TypeError('Cannot load aeff parameterizations from a %s'
                        % type(source))

    # Build dict of parameterizations (each a callable) per flavintgroup

    aeff_params = OrderedDict()
    for flavint_key, param_spec in orig_dict.items():
        flavintgroup = NuFlavIntGroup(flavint_key)

        if isinstance(param_spec, str):
            param_func = eval(param_spec)

        elif callable(param_spec):
            param_func = param_spec

        elif isinstance(param_spec, Mapping):
            is_energy = 'energy' in param_spec
            is_coszen = 'coszen' in param_spec

            valid = True
            if 'aeff' not in param_spec:
                valid = False
            elif not (is_energy or is_coszen):
                valid = False
            if not valid:
                raise ValueError(
                    'Expected keys of "aeff" and either "energy" or'
                    ' "coszen" to construct a spline. Got %s instead.'
                    ' Aeff param spec source: %s, flavintgroup %s'
                    % (param_spec.keys(), source, flavintgroup)
                )

            var = 'energy' if is_energy else 'coszen'
            x_vals = param_spec[var]
            aeff_vals = param_spec['aeff']

            # TODO: Could potentially add interp1d options to config
            param_func = interp1d(x_vals, aeff_vals, kind='linear',
                                  bounds_error=False, fill_value=0)

        else:
            raise TypeError(
                'Expected parameteriation spec to be either a string that'
                ' can be interpreted by eval or as a mapping of values'
                ' from which to construct a spline. Got "%s".'
                % type(param_spec)
            )

        aeff_params[flavintgroup] = param_func

    return aeff_params
Пример #23
0
    def histogram_set(self,
                      binning,
                      nu_weights_col,
                      mu_weights_col,
                      noise_weights_col,
                      mapset_name,
                      errors=False):
        """Uses the above histogram function but returns the set of all of them
        for everything in the Data object.

        Parameters
        ----------
        binning : OneDimBinning, MultiDimBinning
            The definition of the binning for the histograms.
        nu_weights_col : None or string
            The column in the Data object by which to weight the neutrino
            histograms. Specify None for unweighted histograms.
        mu_weights_col : None or string
            The column in the Data object by which to weight the muon
            histograms. Specify None for unweighted histograms.
        noise_weights_col : None or string
            The column in the Data object by which to weight the noise
            histograms. Specify None for unweighted histograms.
        mapset_name : string
            The name by which the resulting MapSet will be identified.
        errors : boolean
            A flag for whether to calculate errors on the histograms or not.
            This defaults to False.

        Returns
        -------
        MapSet : A MapSet containing all of the Maps for everything in this
                 Data object.

        """
        if not isinstance(binning, MultiDimBinning):
            if not isinstance(binning, OneDimBinning):
                raise TypeError('binning should be either MultiDimBinning or '
                                'OneDimBinning object. Got %s.' %
                                type(binning))
        if nu_weights_col is not None:
            if not isinstance(nu_weights_col, basestring):
                raise TypeError('nu_weights_col should be a string. Got %s' %
                                type(nu_weights_col))
        if mu_weights_col is not None:
            if not isinstance(mu_weights_col, basestring):
                raise TypeError('mu_weights_col should be a string. Got %s' %
                                type(mu_weights_col))
        if not isinstance(errors, bool):
            raise TypeError('flag for whether to calculate errors or not '
                            'should be a boolean. Got %s.' % type(errors))
        outputs = []
        if self.contains_neutrinos:
            for fig in self.iterkeys():
                outputs.append(
                    self.histogram(kinds=fig,
                                   binning=binning,
                                   weights_col=nu_weights_col,
                                   errors=errors,
                                   name=str(NuFlavIntGroup(fig))))
        if self.contains_muons:
            outputs.append(
                self.histogram(kinds='muons',
                               binning=binning,
                               weights_col=mu_weights_col,
                               errors=errors,
                               name='muons',
                               tex=r'\rm{muons}'))
        if self.contains_noise:
            outputs.append(
                self.histogram(kinds='noise',
                               binning=binning,
                               weights_col=mu_weights_col,
                               errors=errors,
                               name='noise',
                               tex=r'\rm{noise}'))
        return MapSet(maps=outputs, name=mapset_name)
Пример #24
0
    def histogram(self,
                  kinds,
                  binning,
                  binning_cols=None,
                  weights_col=None,
                  errors=False,
                  name=None,
                  tex=None):
        """Histogram the events of all `kinds` specified, with `binning` and
        optionally applying `weights`.

        Parameters
        ----------
        kinds : string, sequence of NuFlavInt, or NuFlavIntGroup
        binning : OneDimBinning, MultiDimBinning or sequence of arrays (one array per binning dimension)
        binning_cols : string or sequence of strings
            Bin only these dimensions, ignoring other dimensions in `binning`
        weights_col : None or string
            Column to use for weighting the events
        errors : bool
            Whether to attach errors to the resulting Map
        name : None or string
            Name to give to resulting Map. If None, a default is derived from
            `kinds` and `weights_col`.
        tex : None or string
            TeX label to give to the resulting Map. If None, default is
            dereived from the `name` specified (or its value derived from
            `kinds` and `weights_col`).

        Returns
        -------
        Map : numpy ndarray with as many dimensions as specified by `binning`
            argument

        """
        # TODO: make able to take integer for `binning` and--in combination
        # with units in the Events columns--generate an appropriate
        # MultiDimBinning object, attach this and return the package as a Map.

        if not isinstance(kinds, NuFlavIntGroup):
            kinds = NuFlavIntGroup(kinds)
        if isinstance(binning_cols, basestring):
            binning_cols = [binning_cols]
        assert weights_col is None or isinstance(weights_col, basestring)

        # TODO: units of columns, and convert bin edges if necessary
        if isinstance(binning, OneDimBinning):
            binning = MultiDimBinning([binning])
        elif isinstance(binning, MultiDimBinning):
            pass
        elif (isinstance(binning, Iterable)
              and not isinstance(binning, Sequence)):
            binning = list(binning)
        elif isinstance(binning, Sequence):
            pass
        else:
            raise TypeError('Unhandled type %s for `binning`.' % type(binning))

        if isinstance(binning, Sequence):
            raise NotImplementedError(
                'Simle sequences not handled at this time. Please specify a'
                ' OneDimBinning or MultiDimBinning object for `binning`.')
            #assert len(binning_cols) == len(binning)
            #bin_edges = binning

        # TODO: units support for Events will mean we can do `m_as(...)` here!
        bin_edges = [edges.magnitude for edges in binning.bin_edges]
        if binning_cols is None:
            binning_cols = binning.names
        else:
            assert set(binning_cols).issubset(set(binning.names))

        # Extract the columns' data into a list of array(s) for histogramming
        repr_flavint = kinds[0]
        sample = [self[repr_flavint][colname] for colname in binning_cols]
        err_weights = None
        hist_weights = None
        if weights_col is not None:
            hist_weights = self[repr_flavint][weights_col]
            if errors:
                err_weights = np.square(hist_weights)

        hist, edges = np.histogramdd(sample=sample,
                                     weights=hist_weights,
                                     bins=bin_edges)
        if errors:
            sumw2, edges = np.histogramdd(sample=sample,
                                          weights=err_weights,
                                          bins=bin_edges)
            hist = unp.uarray(hist, np.sqrt(sumw2))

        if name is None:
            if tex is None:
                tex = kinds.tex
                if weights_col is not None:
                    tex += r', \; {\rm weights=' + text2tex(weights_col) + r'}'

            name = str(kinds)
            if weights_col is not None:
                name += ', weights=' + weights_col

        if tex is None:
            tex = text2tex(name)

        return Map(name=name, hist=hist, binning=binning, tex=tex)
Пример #25
0
    def _compute_nominal_transforms(self):
        """Compute new PID transforms."""
        logging.debug('Updating pid.param PID histograms...')

        self.load_pid_energy_param(self.params.pid_energy_paramfile.value)

        nominal_transforms = []
        for xform_flavints in self.transform_groups:
            logging.debug('Working on %s PID', xform_flavints)

            xform_array = np.empty(self.transform_output_binning.shape)

            subdict = self.pid_energy_param_dict[xform_flavints]
            for signature, sig_param_func in subdict.items():
                # Get the PID probabilities vs. energy at the energy bins'
                # (weighted) centers
                pid1d = sig_param_func(self.ebin_centers)

                # Broadcast this 1d array across the reco_coszen dimension
                # since it's independent of reco_coszen
                broadcasted_pid = self.transform_output_binning.broadcast(
                    pid1d, from_dim='reco_energy', to_dims='reco_coszen')

                pid_indexer = (self.transform_output_binning.indexer(
                    pid=signature))

                # Assign the broadcasted array to the correct PID bin
                xform_array[pid_indexer] = broadcasted_pid

            if self.sum_grouped_flavints:
                xform_input_names = []
                for input_name in self.input_names:
                    input_flavs = NuFlavIntGroup(input_name)
                    if set(xform_flavints).intersection(input_flavs):
                        xform_input_names.append(input_name)

                for output_name in self.output_names:
                    if output_name not in xform_flavints:
                        continue
                    xform = BinnedTensorTransform(
                        input_names=xform_input_names,
                        output_name=str(xform_flavints),
                        input_binning=self.input_binning,
                        output_binning=self.transform_output_binning,
                        xform_array=xform_array,
                        sum_inputs=self.sum_grouped_flavints)
                    nominal_transforms.append(xform)

            else:
                for input_name in self.input_names:
                    if input_name not in xform_flavints:
                        continue
                    xform = BinnedTensorTransform(
                        input_names=input_name,
                        output_name=input_name,
                        input_binning=self.input_binning,
                        output_binning=self.transform_output_binning,
                        xform_array=xform_array,
                    )
                    nominal_transforms.append(xform)

        return TransformSet(transforms=nominal_transforms)
Пример #26
0
    def _calculate_fit_coeffs(data,
                              params,
                              fit_binning,
                              nu_params=None,
                              mu_params=None):
        """
        Calculate the fit coefficients for each systematic, flavint,
        bin for a polynomial.
        """
        logging.debug('Calculating fit coefficients')

        config = from_file(params['discr_sys_sample_config'].value)

        degree = int(params['poly_degree'].value)
        force_through_nominal = params['force_through_nominal'].value

        if force_through_nominal:

            def fit_func(vals, *poly_coeffs):
                return np.polynomial.polynomial.polyval(
                    vals, [1.] + list(poly_coeffs))
        else:

            def fit_func(vals, *poly_coeffs):
                return np.polynomial.polynomial.polyval(
                    vals, list(poly_coeffs))

            # add free param for constant term
            degree += 1

        template_maker = Pipeline(params['pipeline_config'].value)
        dataset_param = template_maker.params['dataset']

        def parse(string):
            return string.replace(' ', '').split(',')

        sys_fit_coeffs = OrderedDict()
        if nu_params is not None:
            sys_list = parse(config.get('neutrinos', 'sys_list'))
            nu_params = deepcopy(map(lambda x: x[3:], nu_params))

            if set(nu_params) != set(sys_list):
                raise AssertionError(
                    'Systematics list listed in the sample config file does '
                    'not match the params in the pipeline config file\n {0} '
                    '!= {1}'.format(set(nu_params), set(sys_list)))

            for sys in sys_list:
                ev_sys = 'neutrinos|' + sys
                runs = parse(config.get(ev_sys, 'runs')[1:-1])
                nominal = config.get(ev_sys, 'nominal')

                mapset_dict = OrderedDict()
                flavint_groups = None
                for run in runs:
                    logging.info('Loading run {0} of systematic '
                                 '{1}'.format(run, sys))
                    dataset_param.value = ev_sys + '|' + run
                    template_maker.update_params(dataset_param)
                    template = template_maker.get_outputs(
                        idx=int(params['stop_after_stage'].m))
                    if not isinstance(template, Data):
                        raise AssertionError(
                            'Template output is not a Data object, instead is '
                            'type {0}'.format(type(template)))
                    if flavint_groups is None:
                        flavint_groups = template.flavint_groups
                    else:
                        if set(flavint_groups) != set(template.flavint_groups):
                            raise AssertionError(
                                'Mismatch of flavint_groups - ({0}) does not '
                                'match flavint_groups '
                                '({1})'.format(flavint_groups,
                                               template.flavint_groups))

                    outputs = []
                    for fig in template.keys():
                        outputs.append(
                            template.histogram(kinds=fig,
                                               binning=fit_binning,
                                               weights_col='pisa_weight',
                                               errors=False,
                                               name=str(NuFlavIntGroup(fig))))
                    mapset_dict[run] = MapSet(outputs, name=run)

                nom_mapset = mapset_dict[nominal]
                fracdiff_mapset_dict = OrderedDict()
                for run in runs:
                    mapset = []
                    for flavintg_map in mapset_dict[run]:
                        # TODO(shivesh): error propagation?
                        flavintg = flavintg_map.name
                        mask = ~(nom_mapset[flavintg].hist == 0.)
                        div = np.zeros(flavintg_map.shape)
                        with np.errstate(divide='ignore', invalid='ignore'):
                            div[mask] = \
                                unp.nominal_values(flavintg_map.hist[mask]) /\
                                unp.nominal_values(nom_mapset[flavintg].hist[mask])
                        mapset.append(
                            Map(name=flavintg,
                                binning=flavintg_map.binning,
                                hist=div))
                    fracdiff_mapset_dict[run] = MapSet(mapset)

                delta_runs = np.array([float(x)
                                       for x in runs]) - float(nominal)

                coeff_binning = OneDimBinning(name='coeff',
                                              num_bins=degree,
                                              is_lin=True,
                                              domain=[-1, 1])
                combined_binning = fit_binning + coeff_binning

                params_mapset = []
                for fig in template.keys():
                    # TODO(shivesh): Fix numpy warning on this line
                    pvals_hist = np.empty(map(int, combined_binning.shape),
                                          dtype=object)
                    hists = [
                        fracdiff_mapset_dict[run][fig].hist for run in runs
                    ]
                    zip_hists = np.dstack(hists)
                    for idx in np.ndindex(fit_binning.shape):
                        y_values = []
                        y_sigma = []
                        for run in fracdiff_mapset_dict:
                            y_values.append(
                                unp.nominal_values(
                                    fracdiff_mapset_dict[run][fig].hist[idx]))
                            y_sigma.append(
                                unp.std_devs(
                                    fracdiff_mapset_dict[run][fig].hist[idx]))

                        if np.any(y_sigma):
                            popt, pcov = curve_fit(fit_func,
                                                   delta_runs,
                                                   y_values,
                                                   sigma=y_sigma,
                                                   p0=np.ones(degree))
                        else:
                            popt, pcov = curve_fit(fit_func,
                                                   delta_runs,
                                                   y_values,
                                                   p0=np.ones(degree))
                        # perr = np.sqrt(np.diag(pcov))
                        # pvals = unp.uarray(popt, perr)
                        pvals_hist[idx] = popt
                    pvals_hist = np.array(pvals_hist.tolist())
                    params_mapset.append(
                        Map(name=fig,
                            binning=combined_binning,
                            hist=pvals_hist))
                params_mapset = MapSet(params_mapset, name=sys)

                if sys in sys_fit_coeffs:
                    sys_fit_coeffs[sys] = MapSet(
                        [sys_fit_coeffs[sys], params_mapset])
                else:
                    sys_fit_coeffs[sys] = params_mapset

        if mu_params is not None:
            sys_list = parse(config.get('muons', 'sys_list'))
            mu_params = deepcopy(map(lambda x: x[3:], mu_params))

            if set(mu_params) != set(sys_list):
                raise AssertionError(
                    'Systematics list listed in the sample config file does '
                    'not match the params in the pipeline config file\n {0} '
                    '!= {1}'.format(set(mu_params), set(sys_list)))

            for sys in sys_list:
                ev_sys = 'muons|' + sys
                runs = parse(config.get(ev_sys, 'runs')[1:-1])
                nominal = config.get(ev_sys, 'nominal')

                map_dict = OrderedDict()
                flavint_groups = None
                for run in runs:
                    logging.info('Loading run {0} of systematic '
                                 '{1}'.format(run, sys))
                    dataset_param.value = ev_sys + '|' + run
                    template_maker.update_params(dataset_param)
                    template = template_maker.get_outputs(
                        idx=int(params['stop_after_stage'].m))
                    if not isinstance(template, Data):
                        raise AssertionError(
                            'Template output is not a Data object, instead is '
                            'type {0}'.format(type(template)))
                    if not template.contains_muons:
                        raise AssertionError(
                            'Template output does not contain muons')

                    output = template.histogram(
                        kinds='muons',
                        binning=fit_binning,
                        # NOTE: weights cancel in fraction
                        weights_col=None,
                        errors=False,
                        name='muons')
                    map_dict[run] = output

                nom_map = map_dict[nominal]
                fracdiff_map_dict = OrderedDict()
                for run in runs:
                    mask = ~(nom_map.hist == 0.)
                    div = np.zeros(nom_map.shape)
                    with np.errstate(divide='ignore', invalid='ignore'):
                        div[mask] = \
                            unp.nominal_values(map_dict[run].hist[mask]) /\
                            unp.nominal_values(nom_map.hist[mask])
                    fracdiff_map_dict[run] = Map(name='muons',
                                                 binning=nom_map.binning,
                                                 hist=div)

                delta_runs = np.array([float(x)
                                       for x in runs]) - float(nominal)

                coeff_binning = OneDimBinning(name='coeff',
                                              num_bins=degree,
                                              is_lin=True,
                                              domain=[-1, 1])
                combined_binning = fit_binning + coeff_binning

                pvals_hist = np.empty(map(int, combined_binning.shape),
                                      dtype=object)
                hists = [fracdiff_map_dict[run].hist for run in runs]
                zip_hists = np.dstack(hists)
                for idx in np.ndindex(fit_binning.shape):
                    y_values = []
                    y_sigma = []
                    for run in fracdiff_mapset_dict:
                        y_values.append(
                            unp.nominal_values(
                                fracdiff_mapset_dict[run][fig].hist[idx]))
                        y_sigma.append(
                            unp.std_devs(
                                fracdiff_mapset_dict[run][fig].hist[idx]))
                    if np.any(y_sigma):
                        popt, pcov = curve_fit(fit_func,
                                               delta_runs,
                                               y_values,
                                               sigma=y_sigma,
                                               p0=np.ones(degree))
                    else:
                        popt, pcov = curve_fit(fit_func,
                                               delta_runs,
                                               y_values,
                                               p0=np.ones(degree))
                    # perr = np.sqrt(np.diag(pcov))
                    # pvals = unp.uarray(popt, perr)
                    pvals_hist[idx] = popt
                pvals_hist = np.array(pvals_hist.tolist())
                params_map = Map(name='muons',
                                 binning=combined_binning,
                                 hist=pvals_hist)
                if sys in sys_fit_coeffs:
                    sys_fit_coeffs[sys] = MapSet(
                        [sys_fit_coeffs[sys], params_map])
                else:
                    sys_fit_coeffs[sys] = params_map

        return sys_fit_coeffs
Пример #27
0
def makeEventsFile(data_files,
                   detector,
                   proc_ver,
                   cut,
                   outdir,
                   run_settings=None,
                   data_proc_params=None,
                   join=None,
                   cust_cuts=None,
                   extract_fields=EXTRACT_FIELDS,
                   output_fields=OUTPUT_FIELDS):
    r"""Take the simulated and reconstructed HDF5 file(s) (as converted from I3
    by icecube.hdfwriter.I3HDFTableService) as input and write out a simplified
    PISA-standard-format HDF5 file for use in aeff, reco, and/or PID stages.

    Parameters
    ----------
    data_files : dict
        File paths for finding data files for each run, formatted as:
            {
                <string run>: <list of file paths>,
                <string run>: <list of file paths>,
                ...
                <string run>: <list of file paths>,
            }

    detector : string
        Name of the detector (e.g. IceCube, DeepCore, PINGU, etc.) as found in
        e.g. mc_sim_run_settings.json and data_proc_params.json files.

    proc_ver
        Version of processing applied to the events, as found in e.g.
        data_proc_params.json.

    cut
        Name of a standard cut to use; must be specified in the relevant
        detector/processing version node of the data processing parameters
        (file from which the data_proc_params object was instantiated)

    outdir
        Directory path in which to store resulting files; will be generated if
        it does not already exist (including any parent directories that do not
        exist)

    run_settings : string or MCSimRunSettings
        Resource location of mc_sim_run_settings.json or an MCSimRunSettings
        object instantiated therefrom.

    data_proc_params : string or DataProcParams
        Resource location of data_proc_params.json or a DataProcParams object
        instantiated therefrom.

    join
        String specifying any flavor/interaction types (flavInts) to join
        together. Separate flavInts with commas (',') and separate groups
        with semicolons (';'). E.g. an acceptable string is:
            'numucc+numubarcc; nuall bar NC, nuall NC'

    cust_cuts
        dict with a single DataProcParams cut specification or list of same
        (see help for DataProcParams for detailed description of cut spec)

    extract_fields : None or iterable of strings
        Field names to extract from source HDF5 file. If None, extract all
        fields.

    output_fields : None or iterable of strings
        Fields to include in the generated PISA-standard-format events HDF5
        file; note that if 'weighted_aeff' is not preent, effective area will
        not be computed. If None, all fields will be written.

    Notes
    -----
    Compute "weighted_aeff" field:

    Within each int type (CC or NC), ngen should be added together;
    events recorded of that int type then get their one_weight divided by the
    total *for that int type only* to obtain the "weighted_aeff" for that
    event (even if int types are being grouped/joined together).

    This has the effect that within a group, ...
      ... and within an interaction type, effective area is a weighted
      average of that of the flavors being combined. E.g. for CC,

                     \sum_{run x}\sum_{flav y} (Aeff_{x,y} * ngen_{x,y})
          Aeff_CC = ----------------------------------------------------- ,
                          \sum_{run x}\sum_{flav y} (ngen_{x,y})

      ... and then across interaction types, the results of the above for
      each int type need to be summed together, i.e.:

          Aeff_total = Aeff_CC + Aeff_NC

    Note that each grouping of flavors is calculated with the above math
    completely independently from other flavor groupings specified.

    See Justin Lanfranchi's presentation on the PINGU Analysis call,
    2015-10-21, for more details:
      https://wikispaces.psu.edu/download/attachments/282040606/meff_report_jllanfranchi_v05_2015-10-21.pdf

    """
    if isinstance(run_settings, str):
        run_settings = DetMCSimRunsSettings(find_resource(run_settings),
                                            detector=detector)
    assert isinstance(run_settings, DetMCSimRunsSettings)
    assert run_settings.detector == detector

    if isinstance(data_proc_params, str):
        data_proc_params = DataProcParams(
            detector=detector,
            proc_ver=proc_ver,
            data_proc_params=find_resource(data_proc_params))
    assert data_proc_params.detector == detector
    assert data_proc_params.proc_ver == proc_ver

    runs = sorted(data_files.keys())

    all_flavs = []
    flavs_by_run = {}
    run_norm_factors = {}
    bin_edges = set()

    runs_by_flavint = FlavIntData()
    for flavint in runs_by_flavint.flavints:
        runs_by_flavint[flavint] = []

    #ngen_flavint_by_run = {run:FlavIntData() for run in runs}
    ##ngen_per_flav_by_run = {run:FlavIntData() for run in runs}
    #eint_per_flav_by_run = {run:FlavIntData() for run in runs}
    #for run in runs:
    #    flavints_in_run = run_settings.get_flavints(run=run)
    #    e_range = run_settings.get_energy_range(run)
    #    gamma = run_settings.get_spectral_index(run)
    #    for flavint in flavints_in_run:
    #        runs_by_flavint[flavint].append(run)
    #        ngen_flav = run_settings.get_num_gen(
    #            run=run, flav_or_flavint=flavint, include_physical_fract=True
    #        )
    #        #runs_by_flavint[flavint].append(run)
    #        #this_flav = flavint.
    #        #xsec_fract_en_wtd_avg[run][flavint] = \
    #        ngen_flavint_by_run[run][flavint] = \
    #                xsec.get_xs_ratio_integral(
    #                    flavintgrp0=flavint,
    #                    flavintgrp1=flavint.flav,
    #                    e_range=e_range,
    #                    gamma=gamma,
    #                    average=True
    #                )
    #    xsec_ver = run_settings.get_xsec_version(run=run)
    #    if xsec_ver_ref is None:
    #        xsec_ver_ref = xsec_ver
    #    # An assumption of below logic is that all MC is generated using the
    #    # same cross sections version.
    #    #
    #    # TODO / NOTE:
    #    # It would be possible to combine runs with different cross sections so
    #    # long as each (flavor, interaction type) cross sections are
    #    # weighted-averaged together using weights
    #    #   N_gen_{n,flav+inttype} * E_x^{-gamma_n} /
    #    #       ( \int_{E_min_n}^{E_max_n} E^{-\gamma_n} dE )
    #    # where E_x are the energy sample points specified in the cross
    #    # sections (and hence these must also be identical across all cross
    #    # sections that get combined, unless interpolation is performed).
    #    assert xsec_ver == xsec_ver_ref
    #    #ngen_weighted_energy_integral[str(run)] = powerLawIntegral(
    #    #flavs_by_run[run] = run_settings.flavs(run)
    ##flavs_present =

    detector_geom = run_settings[runs[0]]['geom']

    # Create Events object to store data
    evts = Events()
    evts.metadata.update({
        'detector': run_settings.detector,
        'proc_ver': data_proc_params.proc_ver,
        'geom': detector_geom,
        'runs': runs,
    })

    cuts = []
    if isinstance(cust_cuts, dict):
        cust_cuts = [cust_cuts]
    if cut is not None:
        evts.metadata['cuts'].append(cut)
        cuts.append(cut)
    if cust_cuts is not None:
        for ccut in cust_cuts:
            evts.metadata['cuts'].append('custom: ' + ccut['pass_if'])
            cuts.append(ccut)

    orig_outdir = outdir
    outdir = expand(outdir)
    logging.info('Output dir spec\'d: %s', orig_outdir)
    if outdir != orig_outdir:
        logging.info('Output dir expands to: %s', outdir)
    mkdir(outdir)

    detector_label = str(data_proc_params.detector)
    proc_label = 'proc_' + str(data_proc_params.proc_ver)

    # What flavints to group together
    if join is None or join == '':
        grouped = []
        ungrouped = [NuFlavIntGroup(k) for k in ALL_NUFLAVINTS]
        groups_label = 'unjoined'
        logging.info('Events in the following groups will be joined together:'
                     ' (none)')
    else:
        grouped, ungrouped = xlateGroupsStr(join)
        evts.metadata['flavints_joined'] = [str(g) for g in grouped]
        groups_label = 'joined_G_' + '_G_'.join([str(g) for g in grouped])
        logging.info(
            'Events in the following groups will be joined together: ' +
            '; '.join([str(g) for g in grouped]))

    # Find any flavints not included in the above groupings
    flavint_groupings = grouped + ungrouped
    if len(ungrouped) == 0:
        ungrouped = ['(none)']
    logging.info('Events of the following flavints will NOT be joined'
                 'together: ' + '; '.join([str(k) for k in ungrouped]))

    # Enforce that flavints composing groups are mutually exclusive
    for grp_n, flavintgrp0 in enumerate(flavint_groupings[:-1]):
        for flavintgrp1 in flavint_groupings[grp_n + 1:]:
            assert len(set(flavintgrp0).intersection(set(flavintgrp1))) == 0

    flavintgrp_names = [str(flavintgrp) for flavintgrp in flavint_groupings]

    # Instantiate storage for all intermediate destination fields;
    # The data structure looks like:
    #   extracted_data[group #][interaction type][field name] = list of data
    if extract_fields is None:
        extracted_data = [{inttype: {}
                           for inttype in ALL_NUINT_TYPES}
                          for _ in flavintgrp_names]
    else:
        extracted_data = [{
            inttype: {field: []
                      for field in extract_fields}
            for inttype in ALL_NUINT_TYPES
        } for _ in flavintgrp_names]

    # Instantiate generated-event counts for destination fields; count
    # CClseparately from NC because aeff's for CC & NC add, whereas
    # aeffs intra-CC should be weighted-averaged (as for intra-NC)
    ngen = [{inttype: {}
             for inttype in ALL_NUINT_TYPES} for _ in flavintgrp_names]

    # Loop through all of the files, retrieving the events, filtering,
    # and recording the number of generated events pertinent to
    # calculating aeff
    filecount = {}
    detector_geom = None
    bad_files = []
    for run, fnames in data_files.items():
        file_count = 0
        for fname in fnames:
            # Retrieve data from all nodes specified in the processing
            # settings file
            logging.trace('Trying to get data from file %s', fname)
            try:
                data = data_proc_params.get_data(fname,
                                                 run_settings=run_settings)
            except (ValueError, KeyError, IOError):
                logging.warning('Bad file encountered: %s', fname)
                bad_files.append(fname)
                continue

            file_count += 1

            # Check to make sure only one run is present in the data
            runs_in_data = set(data['run'])
            assert len(runs_in_data) == 1, 'Must be just one run in data'

            #run = int(data['run'][0])
            if not run in filecount:
                filecount[run] = 0
            filecount[run] += 1
            rs_run = run_settings[run]

            # Record geom; check that geom is consistent with other runs
            if detector_geom is None:
                detector_geom = rs_run['geom']
            assert rs_run['geom'] == detector_geom, \
                    'All runs\' geometries must match!'

            # Loop through all flavints spec'd for run
            for run_flavint in rs_run['flavints']:
                barnobar = run_flavint.bar_code
                int_type = run_flavint.intType

                # Retrieve this-interaction-type- & this-barnobar-only events
                # that also pass cuts. (note that cut names are strings)
                intonly_cut_data = data_proc_params.apply_cuts(
                    data,
                    cuts=cuts + [str(int_type), str(barnobar)],
                    return_fields=extract_fields)

                # Record the generated count and data for this run/flavor for
                # each group to which it's applicable
                for grp_n, flavint_group in enumerate(flavint_groupings):
                    if not run_flavint in flavint_group:
                        continue

                    # Instantiate a field for particles and antiparticles,
                    # keyed by the output of the bar_code property for each
                    if not run in ngen[grp_n][int_type]:
                        ngen[grp_n][int_type][run] = {
                            NuFlav(12).bar_code: 0,
                            NuFlav(-12).bar_code: 0,
                        }

                    # Record count only if it hasn't already been recorded
                    if ngen[grp_n][int_type][run][barnobar] == 0:
                        # Note that one_weight includes cc/nc:total fraction,
                        # so DO NOT specify the full flavint here, only flav
                        # (since one_weight does NOT take bar/nobar fraction,
                        # it must be included here in the ngen computation)
                        flav_ngen = run_settings.get_num_gen(run=run,
                                                             barnobar=barnobar)
                        ngen[grp_n][int_type][run][barnobar] = flav_ngen

                    # Append the data. Note that extracted_data is:
                    # extracted_data[group n][int_type][extract field name] =
                    #   list
                    if extract_fields is None:
                        for f in intonly_cut_data.keys():
                            if f not in extracted_data[grp_n][int_type]:
                                extracted_data[grp_n][int_type][f] = []
                            extracted_data[grp_n][int_type][f].extend(
                                intonly_cut_data[f])
                    else:
                        for f in extract_fields:
                            extracted_data[grp_n][int_type][f].extend(
                                intonly_cut_data[f])
        logging.info('File count for run %s: %d', run, file_count)
    to_file(bad_files, '/tmp/bad_files.json')

    if ((output_fields is None and
         (extract_fields is None or 'one_weight' in extract_fields))
            or 'weighted_aeff' in output_fields):
        fmtfields = (' ' * 12 + 'flavint_group', 'int type', '     run',
                     'part/anti', 'part/anti count', 'aggregate count')
        fmt_n = [len(f) for f in fmtfields]
        fmt = '  '.join([r'%' + str(n) + r's' for n in fmt_n])
        lines = '  '.join(['-' * n for n in fmt_n])
        logging.info(fmt, fmtfields)
        logging.info(lines)
        for grp_n, flavint_group in enumerate(flavint_groupings):
            for int_type in set([fi.intType for fi in flavint_group.flavints]):
                ngen_it_tot = 0
                for run, run_counts in ngen[grp_n][int_type].items():
                    for barnobar, barnobar_counts in run_counts.items():
                        ngen_it_tot += barnobar_counts
                        logging.info(fmt, flavint_group.simple_str(), int_type,
                                     str(run), barnobar, int(barnobar_counts),
                                     int(ngen_it_tot))
                # Convert data to numpy array
                if extract_fields is None:
                    for field in extracted_data[grp_n][int_type].keys():
                        extracted_data[grp_n][int_type][field] = \
                                np.array(extracted_data[grp_n][int_type][field])
                else:
                    for field in extract_fields:
                        extracted_data[grp_n][int_type][field] = \
                                np.array(extracted_data[grp_n][int_type][field])
                # Generate weighted_aeff field for this group / int type's data
                extracted_data[grp_n][int_type]['weighted_aeff'] = \
                        extracted_data[grp_n][int_type]['one_weight'] \
                        / ngen_it_tot * CMSQ_TO_MSQ

    # Report file count per run
    for run, count in filecount.items():
        logging.info('Files read, run %s: %d', run, count)
        ref_num_i3_files = run_settings[run]['num_i3_files']
        if count != ref_num_i3_files:
            logging.warning(
                'Run %s, Number of files read (%d) != number of '
                'source I3 files (%d), which may indicate an error.', run,
                count, ref_num_i3_files)

    # Generate output data
    for flavint in ALL_NUFLAVINTS:
        int_type = flavint.intType
        for grp_n, flavint_group in enumerate(flavint_groupings):
            if not flavint in flavint_group:
                logging.trace('flavint %s not in flavint_group %s, passing.',
                              flavint, flavint_group)
                continue
            else:
                logging.trace(
                    'flavint %s **IS** in flavint_group %s, storing.', flavint,
                    flavint_group)
            if output_fields is None:
                evts[flavint] = extracted_data[grp_n][int_type]
            else:
                evts[flavint] = {
                    f: extracted_data[grp_n][int_type][f]
                    for f in output_fields
                }

    # Generate file name
    numerical_runs = []
    alphanumerical_runs = []
    for run in runs:
        try:
            int(run)
            numerical_runs.append(int(run))
        except ValueError:
            alphanumerical_runs.append(str(run))
    run_labels = []
    if len(numerical_runs) > 0:
        run_labels.append(list2hrlist(numerical_runs))
    if len(alphanumerical_runs) > 0:
        run_labels += sorted(alphanumerical_runs)
    run_label = 'runs_' + ','.join(run_labels)
    geom_label = '' + detector_geom
    fname = 'events__' + '__'.join([
        detector_label,
        geom_label,
        run_label,
        proc_label,
        groups_label,
    ]) + '.hdf5'

    outfpath = os.path.join(outdir, fname)
    logging.info('Writing events to %s', outfpath)

    # Save data to output file
    evts.save(outfpath)
Пример #28
0
def populate_transforms(service, xform_flavints, xform_array):
    """General function for populating a BinnedTensorTransform with a single
    aeff transform array, taking into account e.g. sum_grouped_flavints etc.

    Any rebinning is assumed to be performed outside of the transform, so the
    transform's `output_binning` is the same as its `input_binning`. This does
    _not_ mean that the stage's output binning needs to match its input
    binning, though, since a rebinning can occur after the transform is
    applied but before the maps are emitted from the stage.

    Note that, as certain assumptions (like the above) are made about input and
    outputs names and binning, this function should _only_ be applied to aeff
    services_ (unless very carefully considered).

    Parameters
    ----------
    service : Stage
        The aeff serivce

    xform_array : numpy.ndarray
        Raw transform array

    Returns
    -------
    transforms : list of BinnedTensorTransform

    """
    transforms = []

    # If combining grouped flavints:
    # Create a single transform for each group and assign all inputs
    # that contribute to the group as the single transform's inputs.
    # The actual sum of the input event rate maps will be performed by
    # the BinnedTensorTransform object upon invocation of the `apply`
    # method.
    if service.sum_grouped_flavints:
        xform_input_names = []
        for input_name in service.input_names:
            if set(NuFlavIntGroup(input_name)).isdisjoint(xform_flavints):
                continue
            xform_input_names.append(input_name)

        for output_name in service.output_names:
            if output_name not in xform_flavints:
                continue

            logging.trace('  inputs: %s, output: %s, xform: %s',
                          xform_input_names, output_name, xform_flavints)

            xform = BinnedTensorTransform(
                input_names=xform_input_names,
                output_name=output_name,
                input_binning=service.input_binning,
                output_binning=service.input_binning,
                xform_array=xform_array,
                sum_inputs=service.sum_grouped_flavints
            )
            transforms.append(xform)

    # If *not* combining grouped flavints:
    # Copy the transform for each input flavor, regardless if the
    # transform is computed from a combination of flavors.
    else:
        for input_name in service.input_names:
            # Since aeff "splits" neutrino flavors into
            # flavor+interaction types, need to check if the output
            # flavints are encapsulated by the input flavor(s).
            if set(NuFlavIntGroup(input_name)).isdisjoint(xform_flavints):
                continue

            for output_name in service.output_names:
                if (output_name not in NuFlavIntGroup(input_name)
                        or output_name not in xform_flavints):
                    continue

                logging.trace('  input: %s, output: %s, xform: %s',
                              input_name, output_name, xform_flavints)

                xform = BinnedTensorTransform(
                    input_names=input_name,
                    output_name=output_name,
                    input_binning=service.input_binning,
                    output_binning=service.input_binning,
                    xform_array=xform_array,
                    sum_inputs=service.sum_grouped_flavints
                )
                transforms.append(xform)

    return transforms
Пример #29
0
    def __init__(self,
                 params,
                 output_binning,
                 input_names,
                 output_names,
                 output_events=True,
                 error_method=None,
                 debug_mode=None,
                 disk_cache=None,
                 memcache_deepcopy=True,
                 outputs_cache_depth=20):
        self.sample_hash = None
        """Hash of input event sample."""
        self.weight_hash = None
        """Hash of event sample."""
        self.fit_hash = None
        """Hash of fit sample."""
        self.fitcoeffs_hash = None
        """Hash of fit coefficients."""
        self.fitcoeffs_cache_hash = None
        """Hash of cached fit coefficients."""

        self.fit_params = ('pipeline_config', 'discr_sys_sample_config',
                           'stop_after_stage', 'poly_degree',
                           'force_through_nominal', 'smoothing')

        self.nu_params = ('nu_dom_eff', 'nu_hole_ice')

        self.mu_params = ('mu_dom_eff', 'mu_hole_ice')

        self.other_params = ('cache_fit', )

        expected_params = self.fit_params + self.other_params
        if ('all_nu' in input_names) or ('neutrinos' in input_names):
            expected_params += self.nu_params
        if 'muons' in input_names:
            expected_params += self.mu_params

        self.neutrinos = False
        self.muons = False
        self.noise = False

        if input_names != output_names:
            raise AssertionError(
                'Input names must match output names for this '
                'stage\n{0}(input names) != {1}(output '
                'names)'.format(input_names, output_names))

        output_names = output_names.replace(' ', '').split(',')
        clean_outnames = []
        self._output_nu_groups = []
        for name in output_names:
            if 'muons' in name:
                self.muons = True
                clean_outnames.append(name)
            elif 'noise' in name:
                self.noise = True
                clean_outnames.append(name)
            elif 'all_nu' in name:
                self.neutrinos = True
                self._output_nu_groups = \
                    [NuFlavIntGroup(f) for f in ALL_NUFLAVINTS]
            else:
                self.neutrinos = True
                self._output_nu_groups.append(NuFlavIntGroup(name))

        if self.neutrinos:
            clean_outnames += [str(f) for f in self._output_nu_groups]

        if not isinstance(output_events, bool):
            raise AssertionError(
                'output_events must be of type bool, instead it is supplied '
                'with type {0}'.format(type(output_events)))
        self.fit_binning = deepcopy(output_binning)
        if output_events:
            output_binning = None
        self.output_events = output_events

        super().__init__(use_transforms=False,
                         params=params,
                         expected_params=expected_params,
                         input_names=clean_outnames,
                         output_names=clean_outnames,
                         error_method=error_method,
                         debug_mode=debug_mode,
                         disk_cache=disk_cache,
                         memcache_deepcopy=memcache_deepcopy,
                         outputs_cache_depth=outputs_cache_depth,
                         output_binning=output_binning)

        if self.params['smoothing'].value is not None:
            if self.params['smoothing'].value != 'gauss':
                raise AssertionError(
                    'Parameter "smoothing" accepts "none" or "gauss" as '
                    'input, instead got {0} as '
                    'input'.format(self.params['smoothing'].value))

        self.include_attrs_for_hashes('sample_hash')
Пример #30
0
    def _compute_transforms(self):
        """
        Generate reconstruction "smearing kernels" by reading in a set of
        parameterisation functions from a json file. This should have the same
        dimensionality as the input binning i.e. if you have energy and
        coszenith input binning then the kernels provided should have both
        energy and coszenith resolution functions.

        Any superposition of distributions from scipy.stats is supported.
        """
        res_scale_ref = self.params.res_scale_ref.value.strip().lower()
        assert res_scale_ref in ['zero']  # TODO: , 'mean', 'median']

        reco_param_source = self.params.reco_paramfile.value

        if reco_param_source is None:
            raise ValueError(
                'non-None reco parameterization params.reco_paramfile'
                ' must be provided')

        reco_param_hash = hash_obj(reco_param_source)

        if (self._reco_param_hash is None
                or reco_param_hash != self._reco_param_hash):
            reco_param = load_reco_param(reco_param_source)

            # Transform groups are implicitly defined by the contents of the
            # reco paramfile's keys
            implicit_transform_groups = reco_param.keys()

            # Make sure these match transform groups specified for the stage
            if set(implicit_transform_groups) != set(self.transform_groups):
                raise ValueError(
                    'Transform groups (%s) defined implicitly by'
                    ' %s reco parameterizations do not match those'
                    ' defined as the stage\'s `transform_groups` (%s).' %
                    (implicit_transform_groups, reco_param_source,
                     self.transform_groups))

            self.param_dict = reco_param
            self._reco_param_hash = reco_param_hash

            self.eval_dict = self.evaluate_reco_param()
            self.reco_scales_and_biases_applicable()

        # everything seems to be fine, so rescale and shift distributions
        eval_dict = self.scale_and_shift_reco_dists()

        # Computational units must be the following for compatibility with
        # events file
        comp_units = dict(true_energy='GeV',
                          true_coszen=None,
                          true_azimuth='rad',
                          reco_energy='GeV',
                          reco_coszen=None,
                          reco_azimuth='rad',
                          pid=None)

        # Select only the units in the input/output binning for conversion
        # (can't pass more than what's actually there)
        in_units = {
            dim: unit
            for dim, unit in comp_units.items() if dim in self.input_binning
        }
        out_units = {
            dim: unit
            for dim, unit in comp_units.items() if dim in self.output_binning
        }

        # These binnings will be in the computational units defined above
        input_binning = self.input_binning.to(**in_units)
        output_binning = self.output_binning.to(**out_units)
        en_centers_in = self.input_binning[
            'true_energy'].weighted_centers.magnitude
        en_edges_in = self.input_binning['true_energy'].bin_edges.magnitude
        cz_centers_in = self.input_binning[
            'true_coszen'].weighted_centers.magnitude
        cz_edges_in = self.input_binning['true_coszen'].bin_edges.magnitude
        en_edges_out = self.output_binning['reco_energy'].bin_edges.magnitude
        cz_edges_out = self.output_binning['reco_coszen'].bin_edges.magnitude

        n_e_in = len(en_centers_in)
        n_cz_in = len(cz_centers_in)
        n_e_out = len(en_edges_out) - 1
        n_cz_out = len(cz_edges_out) - 1

        if self.coszen_flipback:
            cz_edges_out, flipback_mask, keep = \
                self.extend_binning_for_coszen(ext_low=-3., ext_high=+3.)

        xforms = []
        for xform_flavints in self.transform_groups:
            logging.debug("Working on %s reco kernel..." % xform_flavints)

            this_params = eval_dict[xform_flavints]
            reco_kernel = np.zeros((n_e_in, n_cz_in, n_e_out, n_cz_out))

            for (i, j) in itertools.product(range(n_e_in), range(n_cz_in)):
                e_kern_cdf = self.make_cdf(bin_edges=en_edges_out,
                                           enval=en_centers_in[i],
                                           enindex=i,
                                           czval=None,
                                           czindex=j,
                                           dist_params=this_params['energy'])
                cz_kern_cdf = self.make_cdf(bin_edges=cz_edges_out,
                                            enval=en_centers_in[i],
                                            enindex=i,
                                            czval=cz_centers_in[j],
                                            czindex=j,
                                            dist_params=this_params['coszen'])

                if self.coszen_flipback:
                    cz_kern_cdf = perform_coszen_flipback(
                        cz_kern_cdf, flipback_mask, keep)

                reco_kernel[i, j] = np.outer(e_kern_cdf, cz_kern_cdf)

            # Sanity check of reco kernels - intolerable negative values?
            logging.trace(" Ensuring reco kernel sanity...")
            kern_neg_invalid = reco_kernel < -EQUALITY_PREC
            if np.any(kern_neg_invalid):
                raise ValueError("Detected intolerable negative entries in"
                                 " reco kernel! Min.: %.15e" %
                                 np.min(reco_kernel))

            # Set values numerically compatible with zero to zero
            np.where((np.abs(reco_kernel) < EQUALITY_PREC), reco_kernel, 0)
            sum_over_axes = tuple(range(-len(self.output_binning), 0))
            totals = np.sum(reco_kernel, axis=sum_over_axes)
            totals_large = totals > (1 + EQUALITY_PREC)
            if np.any(totals_large):
                raise ValueError("Detected overflow in reco kernel! Max.:"
                                 " %0.15e" % (np.max(totals)))

            if self.input_binning.basenames[0] == "coszen":
                # The reconstruction kernel has been set up with energy as its
                # first dimension, so swap axes if it is applied to an input
                # binning where 'coszen' is the first
                logging.trace(" Swapping kernel dimensions since 'coszen' has"
                              " been requested as the first.")
                reco_kernel = np.swapaxes(reco_kernel, 0, 1)
                reco_kernel = np.swapaxes(reco_kernel, 2, 3)

            if self.sum_grouped_flavints:
                xform_input_names = []
                for input_name in self.input_names:
                    if set(NuFlavIntGroup(input_name)).isdisjoint(
                            xform_flavints):
                        continue
                    xform_input_names.append(input_name)

                for output_name in self.output_names:
                    if output_name not in xform_flavints:
                        continue
                    xform = BinnedTensorTransform(
                        input_names=xform_input_names,
                        output_name=output_name,
                        input_binning=self.input_binning,
                        output_binning=self.output_binning,
                        xform_array=reco_kernel,
                        sum_inputs=self.sum_grouped_flavints)
                    xforms.append(xform)
            # If *not* combining grouped flavints:
            # Copy the transform for each input flavor, regardless if the
            # transform is computed from a combination of flavors.
            else:
                for input_name in self.input_names:
                    if set(NuFlavIntGroup(input_name)).isdisjoint(
                            xform_flavints):
                        continue
                    for output_name in self.output_names:
                        if (output_name not in NuFlavIntGroup(input_name)
                                or output_name not in xform_flavints):
                            continue
                        logging.trace('  input: %s, output: %s, xform: %s',
                                      input_name, output_name, xform_flavints)

                        xform = BinnedTensorTransform(
                            input_names=input_name,
                            output_name=output_name,
                            input_binning=self.input_binning,
                            output_binning=self.output_binning,
                            xform_array=reco_kernel,
                            sum_inputs=self.sum_grouped_flavints)
                        xforms.append(xform)

        return TransformSet(transforms=xforms)