Пример #1
0
    def load_gen_data(self):
        logging.debug('Loading generator level sample')
        unfold_pipeline_cfg = self.params['unfold_pipeline_cfg'].value
        if isinstance(unfold_pipeline_cfg, str):
            pipeline_cfg = from_file(unfold_pipeline_cfg)
            pipeline_hash = pipeline_cfg
            sa_cfg = from_file(
                pipeline_cfg.get('stage.data', 'param.data_sample_config'))
            template_maker = Pipeline(pipeline_cfg)
        elif isinstance(unfold_pipeline_cfg, Pipeline):
            pipeline_hash = unfold_pipeline_cfg.state_hash
            sa_cfg = from_file(
                unfold_pipeline_cfg.params['data_sample_config'].value)
            template_maker = unfold_pipeline_cfg
        gen_cfg = from_file(sa_cfg.get('neutrinos|gen_lvl', 'gen_cfg_file'))
        this_hash = hash_obj([gen_cfg, pipeline_hash, self.output_str],
                             full_hash=self.full_hash)
        if self.gen_data_hash == this_hash:
            return self._gen_data

        full_gen_data = template_maker.get_outputs()
        if not isinstance(full_gen_data, Data):
            raise AssertionError(
                'Output of pipeline is not a Data object, instead is type '
                '{0}'.format(type(full_gen_data)))
        trans_data = full_gen_data.transform_groups(self.output_str)
        gen_data = trans_data[self.output_str]

        self._gen_data = gen_data
        self.gen_data_hash = this_hash
        return gen_data
Пример #2
0
 def __load(self, fname):
     try:
         data, meta = from_file(fname, return_attrs=True)
     except TypeError:
         data = from_file(fname)
         meta = None
     return data, meta
Пример #3
0
 def load_discr_sys(self, sys_list):
     """Load the fit results from the file and make some check
     compatibility"""
     self.fit_results = from_file(self.params['fit_results_file'].value)
     if not set(self.input_names) == set(self.fit_results['map_names']):
         for name in self.input_names:
             if not name in self.fit_results['map_names']:
                 #check if there is somethingi uniquely compatible
                 compatible_names = [
                     mapname in name
                     for mapname in self.fit_results['map_names']
                 ]
                 if sum(compatible_names) == 1:
                     # compatible
                     compatible_name = self.fit_results['map_names'][
                         compatible_names.index(True)]
                     self.fit_results[name] = self.fit_results[
                         compatible_name]
                     logging.warning(
                         'Substituting hyperplane parameterization %s for %s'
                         % (compatible_name, name))
                 else:
                     logging.error('No compatible map for %s found!' % name)
     assert set(sys_list) == set(self.fit_results['sys_list'])
     self.sys_list = self.fit_results['sys_list']
Пример #4
0
    def _compute_outputs(self, inputs=None):

        """Apply basic cuts and compute histograms for output channels."""

        logging.debug('Entering sample._compute_outputs')

        self.config = from_file(self.params['data_sample_config'].value)

        name = self.config.get('general', 'name')
        logging.trace('{0} sample sample_hash = '
                      '{1}'.format(name, self.sample_hash))
        self.load_sample_events()

        if self.params['keep_criteria'].value is not None:
            # TODO(shivesh)
            raise NotImplementedError(
                'needs check to make sure this works in a DistributionMaker'
            )
            self._data.applyCut(self.params['keep_criteria'].value)
            self._data.update_hash()

        if self.output_events:
            return self._data

        outputs = []
        if self.neutrinos:
            trans_nu_data = self._data.transform_groups(
                self._output_nu_groups
            )
            for fig in trans_nu_data.keys():
                outputs.append(trans_nu_data.histogram(
                    kinds       = fig,
                    binning     = self.output_binning,
                    weights_col = 'pisa_weight',
                    errors      = True,
                    name        = str(NuFlavIntGroup(fig)),
                ))

        if self.muons:
            outputs.append(self._data.histogram(
                kinds       = 'muons',
                binning     = self.output_binning,
                weights_col = 'pisa_weight',
                errors      = True,
                name        = 'muons',
                tex         = r'\rm{muons}'
            ))

        if self.noise:
            outputs.append(self._data.histogram(
                kinds       = 'noise',
                binning     = self.output_binning,
                weights_col = 'pisa_weight',
                errors      = True,
                name        = 'noise',
                tex         = r'\rm{noise}'
            ))

        name = self.config.get('general', 'name')
        return MapSet(maps=outputs, name=name)
Пример #5
0
    def __init__(self, prem_file, detector_depth=1., prop_height=2.):
        # Load earth model
        if prem_file is not None:
            self.using_earth_model = True
            prem = from_file(prem_file, as_array=True)
            self.rhos = prem[..., 1][::-1].astype(FTYPE)
            self.radii = prem[..., 0][::-1].astype(FTYPE)
            r_earth = prem[-1][0]
            self.default_elec_frac = 0.5
            n_prem = len(self.radii) - 1
            self.max_layers = 2 * n_prem + 1
        else:
            self.using_earth_model = False
            r_earth = 6371.0  #If no Earth model provided, use a standard Earth radius value

        # Set some other
        self.r_detector = r_earth - detector_depth
        self.prop_height = prop_height
        self.detector_depth = detector_depth
        self.min_detector_depth = 1.0e-3  # <-- Why? // [km] so min is ~ 1 m

        # Some additional handling of the Earth model
        if self.using_earth_model:

            # Change outermost radius to a bit underground, where the detector
            if self.detector_depth >= self.min_detector_depth:
                self.radii[0] -= detector_depth
                self.max_layers += 1

            # Compute coszen limit
            self.computeMinLengthToLayers()
Пример #6
0
def sampleHypercube(n_dim, n_samp, rand_set_id=0, crit='m', iterations=5,
                    rdata_dir='~/cowen/data/random'):
    """Load (if file exists) or generate samples from within hypercube using
    Latin hypercube sampling

    Requires pyDOE to generate new samples.
    """
    fname = samplesFilename(n_dim=n_dim,
                            n_samp=n_samp,
                            rand_set_id=rand_set_id,
                            crit=crit,
                            iterations=iterations)
    rdata_dir = os.path.expandvars(os.path.expanduser(rdata_dir))
    fpath = os.path.join(rdata_dir, fname)

    if os.path.exists(fpath):
        samps = fileio.from_file(fpath)
    else:
        logging.info('File not found. Generating new set of samples & saving'
                     ' result to "%s"', fpath)
        import pyDOE
        mkdir(rdata_dir)
        # Set a deterministic random state based upon the critical hypercube
        # sampling parameters specified
        n_bad_seeds(n_dim, n_samp, rand_set_id)
        samps = pyDOE.lhs(n=n_dim, samples=n_samp, criterion=crit,
                          iterations=iterations)
        fileio.to_file(samps, fpath)
    return samps
Пример #7
0
    def save(self, fpath, ver=None, **kwargs):
        """Save cross sections (and the energy specification) to a file at
        `fpath`."""
        if ver is None:
            if self._ver is None:
                raise ValueError(
                    'Either a ver must be specified in call to `save` or it '
                    'must have been set prior to the invocation of `save`.'
                )
            ver = self._ver
        else:
            assert ver == self._ver

        try:
            fpath = find_resource(fpath)
        except IOError:
            pass
        fpath = os.path.expandvars(os.path.expanduser(fpath))
        all_xs = {}
        # Get any existing data from file
        if os.path.exists(fpath):
            all_xs = from_file(fpath)
        # Validate existing data by instantiating objects from each
        for v, d in all_xs.items():
            CrossSections(ver=v, energy=d['energy'], xsec=d['xsec'])
        if ver in all_xs:
            logging.warning('Overwriting existing version "' + ver +
                            '" in file ' + fpath)
        all_xs[ver] = {'xsec':self, 'energy':self.energy}
        to_file(all_xs, fpath, **kwargs)
Пример #8
0
    def _get_reco_kernels(self, kernelfile=None, **kwargs):

        for reco_scale in ['e_reco_scale', 'cz_reco_scale']:
            if reco_scale in kwargs:
                if not kwargs[reco_scale]==1:
                    raise ValueError('%s = %.2f not valid for RecoServiceKernelFile!'
                                     %(reco_scale, kwargs[reco_scale]))

        if not kernelfile in [self.kernelfile, None]:
            logging.info('Reconstruction from non-default kernel file %s!'%kernelfile)
            return fileio.from_file(find_resource(kernelfile))

        if not hasattr(self, 'kernels'):
            logging.info('Using file %s for default reconstruction'%(kernelfile))
            self.kernels = fileio.from_file(find_resource(kernelfile))

        return self.kernels
Пример #9
0
 def load(fpath, ver=None, **kwargs):
     """Load cross sections from a file locatable and readable by the PISA
     from_file command. If `ver` is provided, it is used to index into the
     top level of the loaded dictionary"""
     all_xsec = from_file(fpath, **kwargs)
     if ver not in all_xsec:
         raise ValueError('Version "%s" not found. Valid versions in file'
                          '"%s" are: %s' % (ver, fpath, all_xsec.keys()))
     return all_xsec[ver]['energy'], all_xsec[ver]['xsec']
Пример #10
0
    def load_noise_events(config, dataset):
        name = config.get('general', 'name')
        weight = config.get('noise', 'weight')
        weight_units = config.get('noise', 'weight_units')
        sys_list = split(config.get('noise', 'sys_list'))
        base_prefix = config.get('noise', 'baseprefix')
        keep_keys = split(config.get('noise', 'keep_keys'))
        aliases = config.items('noise%saliases' % SEP)
        if base_prefix == 'None':
            base_prefix = ''

        if dataset == 'nominal':
            paths = []
            for sys in sys_list:
                ev_sys = 'noise%s%s' % (SEP, sys)
                nominal = config.get(ev_sys, 'nominal')
                ev_sys_nom = ev_sys + SEP + nominal
                paths.append(config.get(ev_sys_nom, 'file_path'))
            if len(set(paths)) > 1:
                raise AssertionError(
                    'Choice of nominal file is ambigous. Nominal '
                    'choice of systematic parameters must coincide '
                    'with one and only one file. Options found are: '
                    '{0}'.format(paths))
            file_path = paths[0]
        else:
            file_path = config.get(dataset, 'file_path')
        logging.info('Extracting noise dataset "{0}" from sample '
                     '"{1}"'.format(dataset, name))

        noise = from_file(file_path)
        sample.strip_keys(keep_keys, noise)

        if weight == 'None' or weight == '1':
            noise['sample_weight'] = np.ones(noise['weights'].shape)
        elif weight == '0':
            noise['sample_weight'] = np.zeros(noise['weights'].shape)
        else:
            noise['sample_weight'] = noise[weight] * ureg(weight_units)
        noise['pisa_weight'] = deepcopy(noise['sample_weight'])

        for alias, expr in aliases:
            if alias in noise:
                logging.warning(
                    'Overwriting Data key {0} with aliased expression '
                    '{1}'.format(alias, expr))
            noise[alias] = eval(re.sub(r'\<(.*?)\>', r"noise['\1']", expr))

        noise_dict = {'noise': noise}
        return Data(noise_dict,
                    metadata={
                        'name': name,
                        'noise_sample': dataset
                    })
Пример #11
0
def load_pid_energy_param(source):
    """Load pid energy-dependent parameterisation from file or dictionary.

    Parameters
    ----------
    source : string or mapping
        If string, interprete as resource location of the file; if mapping, use
        directly.

    Returns
    -------
    pid_energy_param_dict : OrderedDict
        Keys are `NuFlavIntGroup`s and values are callables of one arg.

    """
    # Get the original dict
    if isinstance(source, str):
        orig_dict = from_file(source)
    elif isinstance(source, Mapping):
        orig_dict = source
    else:
        raise TypeError('`source` must either be string or mapping; got %s'
                        ' instead.' % type(source))

    # Build dict with flavintgroups as keys; subdict with signatures as keys
    # and callables as values
    pid_energy_param_dict = OrderedDict()

    for flavintgroup_str, subdict in orig_dict.items():
        flavintgroup = NuFlavIntGroup(flavintgroup_str)

        pid_energy_param_dict[flavintgroup] = OrderedDict()

        for signature, sig_param_spec in subdict.items():
            if isinstance(sig_param_spec, str):
                sig_param_func = eval(sig_param_spec)
                if not callable(sig_param_func):
                    raise ValueError(
                        'Group %s PID signature %s param spec "%s" does'
                        ' not evaluate to a callable.'
                        % (flavintgroup_str, signature, sig_param_spec)
                    )
            elif callable(sig_param_spec):
                sig_param_func = sig_param_spec
            else:
                raise TypeError(
                    'Group %s PID signature %s parameterization is a "%s"'
                    ' but must be a string or callable.'
                    % (flavintgroup_str, signature, type(sig_param_spec))
                )

            pid_energy_param_dict[flavintgroup][signature] = sig_param_func

    return pid_energy_param_dict
Пример #12
0
 def load_discr_sys(self, pnames):
     """Load the fit results from the file and make some check
     compatibility"""
     self.fit_results = {}
     for pname in pnames:
         self.fit_results[pname] = from_file(self.params[pname +
                                                         '_file'].value)
         if sorted(self.input_names) != sorted(
                 self.fit_results[pname]['map_names']):
             raise ValueError(
                 "Expected input names - %s. Got %s." %
                 (self.input_names, self.fit_results[pname]['map_names']))
     self.pnames = pnames
Пример #13
0
def earth_model(YeI, YeO, YeM, PREM_file='osc/nuSQuIDS_PREM.dat'):  # pylint: disable=invalid-name
    """Return a `nuSQUIDSpy.EarthAtm` object with
    user-defined electron fractions. Note that a
    temporary Earth model file is produced (over-
    written) each time this function is executed.

    Parameters
    ----------
    YeI, YeO, YeM : float
        electron fractions in Earth's inner core,
        outer core, and mantle
        (defined by spherical shells with radii of
         1121.5, 3480.0, and 6371.0 km)
    PREM_file : str
        path to nuSQuIDS PREM Earth Model file whose
        electron fractions will be modified

    Returns
    -------
    earth_atm : nuSQUIDSpy.EarthAtm
        can be passed to `Set_EarthModel` method of
        a nuSQuIDs propagator object
    """
    logging.debug("Regenerating nuSQuIDS Earth Model with electron"
                  " fractions: YeI=%s, YeO=%s, YeM=%s" % (YeI, YeO, YeM))
    earth_radius = 6371.0 # km
    # radii at which main transitions occur according to PREM
    transition_radii = np.array([1121.5, 3480.0, earth_radius]) # km

    fname_tmp = os.path.join(CACHE_DIR, "nuSQuIDS_PREM_TMP.dat")
    PREM_file = from_file(fname=PREM_file, as_array=True)
    for i, (r, _, _) in enumerate(PREM_file):
        # r is fraction of total radius
        current_radius = r*earth_radius
        if current_radius <= transition_radii[0]:
            # inner core region
            Ye_new = YeI
        elif current_radius <= transition_radii[1]:
            # outer core region
            Ye_new = YeO
        elif current_radius <= transition_radii[2]:
            # mantle region
            Ye_new = YeM
        # update electron fraction
        PREM_file[i][2] = Ye_new
    # make temporary file
    np.savetxt(fname=fname_tmp, X=PREM_file)
    # create and return the Earth model from file
    earth_atm = nsq.EarthAtm(fname_tmp)
    return earth_atm
Пример #14
0
def stability_test(func, func_kw, ref_path, ignore_fails=False, define_as_ref=False):
    """basic stability test of a Numba CPUDispatcher function (i.e., function
    compiled via @jit / @njit)"""
    func_name = func.py_func.__name__
    logging.info("stability testing `%s`", func_name)
    ref_path = expand(ref_path)

    test = execute_func(func=func, func_kw=func_kw)

    if define_as_ref:
        to_file(test, ref_path)

    # Even when we define the test case as ref, round-trip to/from file to
    # ensure that doesn't corrupt the values
    ref = from_file(ref_path)

    check(test=test, ref=ref, label=func_name, ignore_fails=ignore_fails)

    return test, ref
Пример #15
0
    def __init__(self, prem_file, detector_depth=1., prop_height=2.):
        # Load earth model
        if prem_file is not None:
            self.using_earth_model = True
            prem = from_file(prem_file, as_array=True)

            # The following radii and densities are extracted in reverse order
            # w.r.t the file. The first elements of the arrays below corresponds
            # the Earth's surface, and the following numbers go deeper toward the
            # planet's core
            self.rhos = prem[..., 1][::-1].astype(FTYPE)
            self.radii = prem[..., 0][::-1].astype(FTYPE)
            r_earth = prem[-1][0]
            self.default_elec_frac = 0.5

            # Add an external layer corresponding to the atmosphere / production boundary
            self.radii = np.concatenate(
                (np.array([r_earth + prop_height]), self.radii))
            self.rhos = np.concatenate((np.ones(1, dtype=FTYPE), self.rhos))
            self.max_layers = 2 * (len(self.radii))

        else:
            self.using_earth_model = False
            r_earth = 6371.0  #If no Earth model provided, use a standard Earth radius value

        #
        # Make some checks about the input production height and detector depth
        #
        assert detector_depth > 0, 'ERROR: detector depth must be a positive value'
        assert detector_depth <= r_earth, 'ERROR: detector depth is deeper than one Earth radius!'
        assert prop_height >= 0, 'ERROR: neutrino production height must be positive'

        # Set some other
        self.r_detector = r_earth - detector_depth
        self.prop_height = prop_height
        self.detector_depth = detector_depth

        if self.using_earth_model:
            # Compute the coszen_limits
            self.computeMinLengthToLayers()
Пример #16
0
    def __init__(self, run_settings, detector=None):
        super().__init__()
        if isinstance(run_settings, str):
            rsd = fileio.from_file(resources.find_resource(run_settings))
        elif isinstance(run_settings, dict):
            rsd = run_settings
        else:
            raise TypeError('Unhandled run_settings type passed in arg: ' +
                            type(run_settings))

        if detector:
            detector = str(detector).strip()
        self.detector = detector

        # Determine how deeply nested runs are in the dict to allow for
        # user to specify a dict that has multiple detectors in it OR
        # a dict with just a single detector in it
        if 'flavints' in rsd.values()[0]:
            runs_d = rsd
        elif 'flavints' in rsd.values()[0].values()[0]:
            if self.detector is None:
                if len(rsd) == 1:
                    runs_d = rsd.values()[0]
                else:
                    raise ValueError('Must specify which detector; detectors '
                                     'found: ' + str(rsd.keys()))
            else:
                runs_d = rsd[self.detector.strip()]
        else:
            raise Exception('dict must either be 3 levels: '
                            '{DET:{RUN:{...}}}; or 2 levels: {RUN:{...}}')

        # Force run numbers to be strings (JSON files cannot have an int as
        # a key, so it is a string upon import, and it's safest to keep it as
        # a string considering how non-standardized naming is in IceCube) and
        # convert actual run settings dict to MCSimRunSettings instances
        runs_d = {str(k): MCSimRunSettings(v) for k, v in runs_d.items()}

        # Save the runs_d to this object instance, which behaves like a dict
        self.update(runs_d)
Пример #17
0
    def load_from_nu_file(events_file, all_flavints, weight, weight_units,
                          keep_keys, aliases):
        flav_fidg = FlavIntDataGroup(flavint_groups=all_flavints)

        events = from_file(events_file)
        sample.strip_keys(keep_keys, events)

        nu_mask = events['ptype'] > 0
        nubar_mask = events['ptype'] < 0
        cc_mask = events['interaction'] == 1
        nc_mask = events['interaction'] == 2

        if weight == 'None' or weight == '1':
            events['sample_weight'] = \
                np.ones(events['ptype'].shape) * ureg.dimensionless
        elif weight == '0':
            events['sample_weight'] = \
                np.zeros(events['ptype'].shape) * ureg.dimensionless
        else:
            events['sample_weight'] = events[weight] * \
                ureg(weight_units)
        events['pisa_weight'] = deepcopy(events['sample_weight'])

        for alias, expr in aliases:
            if alias in events:
                logging.warning(
                    'Overwriting Data key {0} with aliased expression '
                    '{1}'.format(alias, expr))
            events[alias] = eval(re.sub(r'\<(.*?)\>', r"events['\1']", expr))

        for flavint in all_flavints:
            i_mask = cc_mask if flavint.cc else nc_mask
            t_mask = nu_mask if flavint.particle else nubar_mask

            flav_fidg[flavint] = {
                var: events[var][i_mask & t_mask]
                for var in events.iterkeys()
            }
        return flav_fidg
Пример #18
0
def parse_fit_config(fit_cfg):
    """Perform sanity checks on and parse fit configuration file.

    Parameters
    ----------
    fit_cfg : str
        path to a fit configuration file

    Returns
    -------
    fit_cfg : PISAConfigParser
        parsed fit configuration
    sys_list : list of str
        parsed names of systematic parameters
    units_list : list of str
        units corresponding to each discrete systematic
    combine_regex : list of str
        each string is a regular expression for combining pipeline outputs; see
        :func:`pisa.core.map.MapSet.combine_regex` for details.

    """
    fit_cfg = from_file(fit_cfg)
    no_ws_section_map = {s.strip(): s for s in fit_cfg.sections()}

    if GENERAL_SECTION_NAME not in no_ws_section_map.values():
        raise KeyError('Fit config is missing the "%s" section!' %
                       GENERAL_SECTION_NAME)

    general_section = fit_cfg[GENERAL_SECTION_NAME]
    if SYS_LIST_OPTION not in general_section:
        raise KeyError(
            "Fit config has to specify systematic parameters as"
            ' "%s" option in "%s" section (comma-separated list of names).' %
            (SYS_LIST_OPTION, GENERAL_SECTION_NAME))

    sys_list = [s.strip() for s in general_section[SYS_LIST_OPTION].split(",")]

    if UNITS_OPTION in general_section:
        units_list = []
        units_specs = (general_section[UNITS_OPTION].replace(
            UNITS_SPECIFIER, "").split(","))
        for units_spec in units_specs:
            # Make sure units are interpret-able by Pint
            try:
                ureg.Unit(units_spec)
            except:
                logging.error(
                    'Unit "%s" specified by "%s" option in "general" section is not'
                    "interpret-able by Pint",
                    units_spec,
                    UNITS_OPTION,
                )
                raise
            units_list.append(units_spec)
    else:
        units_list = ["dimensionless" for s in sys_list]
        logging.warn(
            "No %s option found in %s section; assuming systematic parameters are"
            " dimensionless",
            UNITS_OPTION,
            GENERAL_SECTION_NAME,
        )

    if len(units_list) != len(sys_list):
        raise ValueError(
            '{} units specified by "{}" option but {} systematics specified by "{}"'
            "option; must be same number of each.".format(
                len(units_list), UNITS_OPTION, len(sys_list), SYS_LIST_OPTION))

    logging.info(
        "Found systematic parameters %s",
        ["{} ({})".format(s, u) for s, u in zip(sys_list, units_list)],
    )

    combine_regex = general_section.get(COMBINE_REGEX_OPTION, None)
    if combine_regex:
        try:
            combine_regex = literal_eval(combine_regex)
        except (SyntaxError, ValueError):
            logging.warn(
                'Deprecated syntax for "combine_re" (make into a Python-evaluatable'
                "sequence of strings instead) :: combine_regex = %s",
                combine_regex,
            )
            combine_regex = [r.strip() for r in combine_regex.split(",")]

    if APPLY_ALL_SECTION_NAME in no_ws_section_map:
        apply_all_section = fit_cfg[no_ws_section_map[APPLY_ALL_SECTION_NAME]]
        for no_ws_sname, sname in no_ws_section_map.items():
            if not (no_ws_sname.startswith(NOMINAL_SET_PFX)
                    or no_ws_sname.startswith(SYS_SET_PFX)):
                continue
            sys_set_section = fit_cfg[sname]
            for option, val in apply_all_section.items():
                sys_set_section[option] = val

    return fit_cfg, sys_list, units_list, combine_regex
Пример #19
0
def parse_args(command, description):
    """Parse command line args.

    Returns
    -------
    init_args_d : dict

    """
    assert command in [discrete_hypo_test, inj_param_scan, systematics_tests]

    parser = ArgumentParser(
        description=description,
        formatter_class=ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument(
        '-d',
        '--logdir',
        required=True,
        metavar='DIR',
        type=str,
        help='Directory into which to store results and metadata.')
    parser.add_argument('--min-settings',
                        type=str,
                        metavar='MINIMIZER_CFG',
                        default=None,
                        help='''Minimizer settings config file.''')
    parser.add_argument(
        '--min-method',
        type=str,
        default=None,
        choices=('l-bfgs-b', 'slsqp'),
        help='''Name of minimizer to use. Note that this takes precedence over
        the minimizer method specified via the --min-settings config
        file.''')
    parser.add_argument(
        '--min-opt',
        type=str,
        metavar='OPTION:VALUE',
        nargs='+',
        default=None,
        help='''Minimizer option:value pair(s) (can specify multiple).
        Values specified here override any of the same name in the config file
        specified by --min-settings''')
    parser.add_argument(
        '--no-octant-check',
        action='store_true',
        help='''Disable fitting hypotheses in theta23 octant opposite initial
        octant.''')
    parser.add_argument(
        '--ordering-check',
        action='store_true',
        help='''Fit both ordering hypotheses. This should only be flagged if
        the ordering is NOT the discrete hypothesis being tested''')
    parser.add_argument(
        '--shared-params',
        type=str,
        default=None,
        action='append',
        help='''Shared parameters for multi detector analysis (repeat for 
        multiple). The values of these parameters are kept the same in all 
        detectors that contain the param.''')

    if command == discrete_hypo_test:
        # Data cannot be data for MC studies e.g. injected parameter scans so
        # these arguments are redundant there.
        group = parser.add_mutually_exclusive_group(required=True)
        group.add_argument(
            '--data-is-data',
            action='store_true',
            help='''Data pipeline is based upon actual, measured data. The
            naming scheme for stored results is chosen accordingly.''')
        group.add_argument(
            '--data-is-mc',
            action='store_true',
            help='''Data pipeline is based upon Monte Carlo simulation, and not
            actual data. The naming scheme for stored results is chosen
            accordingly. If this is selected, --fluctuate-data is forced off.'''
        )

        # For the MC tests (injected parameter scan, systematic tests etc.) you
        # must have the same pipeline for h0, h1 and data. So this argument is
        # instead replaced with a generic pipeline argument.
        parser.add_argument(
            '--h0-pipeline',
            required=True,
            type=str,
            action='append',
            metavar='PIPELINE_CFG',
            help='''Settings for the generation of hypothesis h0
            distributions; repeat this argument to specify multiple
            pipelines.''')

    else:
        assert command in [inj_param_scan, systematics_tests]
        parser.add_argument(
            '--pipeline',
            required=True,
            type=str,
            action='append',
            metavar='PIPELINE_CFG',
            help='''Settings for the generation of h0, h1 and data
            distributions; repeat this argument to specify multiple
            pipelines.''')

    parser.add_argument(
        '--h0-param-selections',
        type=str,
        default=None,
        metavar='PARAM_SELECTOR_LIST',
        help='''Comma-separated (no spaces) list of param selectors to apply to
        hypothesis h0's distribution maker's pipelines.''')
    parser.add_argument('--h0-name',
                        type=str,
                        metavar='NAME',
                        default=None,
                        help='''Name for hypothesis h0. E.g., "NO" for normal
        ordering in the neutrino mass ordering analysis. Note that the name
        here has no bearing on the actual process, so it's important that you
        be careful to use a name that appropriately identifies the
        hypothesis.''')
    # For the MC tests (injected parameter scan, systematic tests etc.) you
    # must have the same pipeline for h0, h1 and data. So this argument is
    # hidden.
    if command not in (inj_param_scan, systematics_tests):
        parser.add_argument(
            '--h1-pipeline',
            type=str,
            action='append',
            default=None,
            metavar='PIPELINE_CFG',
            help='''Settings for the generation of hypothesis h1 distributions;
            repeat this argument to specify multiple pipelines. If omitted, the
            same settings as specified for --h0-pipeline are used to generate
            hypothesis h1 distributions (and so you have to use the
            --h1-param-selections argument to generate a hypotheses distinct
            from hypothesis h0 but still use h0's distribution maker).''')
    parser.add_argument(
        '--h1-param-selections',
        type=str,
        default=None,
        metavar='PARAM_SELECTOR_LIST',
        help='''Comma-separated (no spaces) list of param selectors to apply to
        hypothesis h1 distribution maker's pipelines.''')
    parser.add_argument('--h1-name',
                        type=str,
                        metavar='NAME',
                        default=None,
                        help='''Name for hypothesis h1. E.g., "IO" for inverted
        ordering in the neutrino mass ordering analysis. Note that the name
        here has no bearing on the actual process, so it's important that you
        be careful to use a name that appropriately identifies the
        hypothesis.''')
    # For the MC tests (injected parameter scan, systematic tests etc.) you
    # must have the same pipeline for h0, h1 and data. So this argument is
    # hidden.
    if command not in (inj_param_scan, systematics_tests):
        parser.add_argument(
            '--data-pipeline',
            type=str,
            action='append',
            default=None,
            metavar='PIPELINE_CFG',
            help='''Settings for the generation of "data" distributions; repeat
            this argument to specify multiple pipelines. If omitted, the same
            settings as specified for --h0-pipeline are used to generate data
            distributions (i.e., data is assumed to come from hypothesis h0.'''
        )
    parser.add_argument(
        '--data-param-selections',
        type=str,
        default=None,
        metavar='PARAM_SELECTOR_LIST',
        help='''Comma-separated list of param selectors to apply to the data
        distribution maker's pipelines. If neither --data-pipeline nor
        --data-param-selections are specified, *both* are copied from
        --h0-pipeline and --h0-param-selections, respectively. However,
        if --data-pipeline is specified while --data-param-selections is not,
        then the param selections in the pipeline config file(s) specified are
        used to produce data distributions.''')
    parser.add_argument(
        '--data-name',
        type=str,
        metavar='NAME',
        default=None,
        help='''Name for the data. E.g., "NO" for normal ordering in the
        neutrino mass ordering analysis. Note that the name here has no bearing
        on the actual process, so it's important that you be careful to use a
        name that appropriately identifies the hypothesis.''')
    # For the injected parameter scan and systematic studies, only the Asimov
    # analysis should be used, so these arguments are not needed.
    if command not in (inj_param_scan, systematics_tests):
        parser.add_argument(
            '--fluctuate-data',
            action='store_true',
            help='''Apply fluctuations to the data distribution. This should
            *not* be set for analyzing "real" (measured) data, and it is common
            to not use this feature even for Monte Carlo analysis. Note that if
            this is not set, --num-data-trials and --data-start-ind are forced
            to 1 and 0, respectively.''')
        parser.add_argument(
            '--fluctuate-fid',
            action='store_true',
            help='''Apply fluctuations to the fiducaial distributions. If this
            flag is not set, --num-fid-trials and --fid-start-ind are forced to
            1 and 0, respectively.''')
    parser.add_argument(
        '--metric',
        type=str,
        required=True,
        metavar='METRIC',
        action='append',
        choices=sorted(ALL_METRICS),
        help='''Name of metric(s) to use for optimizing the fit. Must be one of
        %s. Repeat this argument if you want to use different metrics for
        different detectors. If only one metric is specified, all detectors will
        use the same. Otherwise you have to specify one metric for each detector
        (even if two use the same) and pay attention to the order.''' %
        (ALL_METRICS, ))
    parser.add_argument(
        '--other-metric',
        type=str,
        default=None,
        metavar='METRIC',
        action='append',
        choices=['all'] + sorted(ALL_METRICS),
        help='''Name of another metric to evaluate at the best-fit point. Must
        be either 'all' or one of %s. Repeat this argument (or use 'all') to
        specify multiple metrics.''' % (ALL_METRICS, ))
    if command not in (inj_param_scan, systematics_tests):
        parser.add_argument(
            '--num-data-trials',
            type=int,
            default=1,
            help='''When performing Monte Carlo analysis, set to > 1 to produce
            multiple pseudodata distributions from the data distribution maker's
            Asimov distribution. This is overridden if --fluctuate-data is not
            set (since each data distribution will be identical if it is not
            fluctuated). This is typically left at 1 (i.e., the Asimov
            distribution is assumed to be representative.''')
        parser.add_argument('--data-start-ind',
                            type=int,
                            default=0,
                            help='''Fluctated data set index.''')
        parser.add_argument(
            '--num-fid-trials',
            type=int,
            default=1,
            help='''Number of fiducial pseudodata trials to run. In our
            experience, it takes ~10^3-10^5 fiducial psuedodata trials to
            achieve low uncertainties on the resulting significance, though
            that exact number will vary based upon the details of an
            analysis.''')
        parser.add_argument('--fid-start-ind',
                            type=int,
                            default=0,
                            help='''Fluctated fiducial data index.''')
    # A blind analysis only makes sense when the possibility of actually
    # analysing data is available.
    if command not in (inj_param_scan, systematics_tests):
        parser.add_argument(
            '--blind',
            action='store_true',
            help='''Blinded analysis. Do not show parameter values or store to
            logfiles.''')
    parser.add_argument(
        '--allow-dirty',
        action='store_true',
        help='''Warning: Use with caution. (Allow for run despite dirty
        repository.)''')
    parser.add_argument(
        '--allow-no-git-info',
        action='store_true',
        help='''*** DANGER! Use with extreme caution! (Allow for run despite
        complete inability to track provenance of code.)''')
    parser.add_argument(
        '--no-min-history',
        action='store_true',
        help='''Do not store minimizer history (steps). This behavior is also
        enforced if --blind is specified.''')
    # Add in the arguments specific to the injected parameter scan.
    if command == inj_param_scan:
        parser.add_argument(
            '--param_name',
            type=str,
            metavar='NAME',
            required=True,
            help='''Name of param to scan over. This must be in the config
            files defined above. One exception is that you can define this as
            `sin2theta23` and it will be interpreted not as theta23 values but
            as the square of the sine of theta23 values instead.''')
        parser.add_argument(
            '--inj_vals',
            type=str,
            required=True,
            help='''List of values to inject as true points in the parameter
            defined above. Must be something that numpy can interpret. In this
            script, numpy is imported as np so please use np in your string. An
            example would be np.linspace(0.35,0.65,31).''')
        parser.add_argument(
            '--inj_units',
            type=str,
            required=True,
            help='''A string to be able to deal with the units in the parameter
            scan and make sure that they match those in the config files. Even
            if the parameter is dimensionless this must be stated.''')
        parser.add_argument(
            '--use-inj-prior',
            action='store_true',
            help='''Generally, one should not use a prior on the parameter of
            interest here since the Asimov analysis breaks down with the use of
            non-central prior i.e. injecting a truth that differs from the
            centre of the prior. Flag this to force the prior to be left on.'''
        )
    # Add in the arguments specific to the systematic tests.
    if command == systematics_tests:
        parser.add_argument(
            '--inject_wrong',
            action='store_true',
            help='''Inject a parameter to some systematically wrong value.
            This will be either +/- 1 sigma or +/- 10%% if such a definition
            is impossible. By default this parameter will be fixed unless
            the fit_wrong argument is also flagged.''')
        parser.add_argument(
            '--fit_wrong',
            action='store_true',
            help='''In the case of injecting a systematically wrong hypothesis
            setting this argument will get the minimiser to try correct for it.
            If inject_wrong is set to false then this must also be set to
            false or else the script will fail.''')
        parser.add_argument(
            '--only_syst',
            default=None,
            type=str,
            action='append',
            metavar='PARAM_NAME',
            help='''Specify the name of one of the systematics in the file to
            run the test for this systematic. Repeat this argument to specify
            multiple systematics. If none are provided, the test will be run
            over all systematics in the pipeline.''')
        parser.add_argument(
            '--skip_baseline',
            action='store_true',
            help='''Skip the baseline systematic test i.e. the one where none
            of them are fixed and/or modified. In most cases you will want this
            for comparison but if you are only interested in the effect of
            shifting certain systematics then this step can be skipped.''')
    parser.add_argument(
        '--pprint',
        action='store_true',
        help='''Live-updating one-line vew of metric and parameter values. (The
        latter are not displayed if --blind is specified.)''')
    parser.add_argument('-v',
                        action='count',
                        default=None,
                        help='set verbosity level')
    args = parser.parse_args(sys.argv[2:])
    assert args.min_settings is not None or args.min_method is not None
    init_args_d = vars(args)

    set_verbosity(init_args_d.pop('v'))

    min_settings_from_file = init_args_d.pop('min_settings')
    minimizer = init_args_d.pop('min_method')
    min_opt = init_args_d.pop('min_opt')

    # TODO: put this datastructure remnant from PISA 2 out of its misery...
    minimizer_settings = dict(method=dict(value='', desc='no desc'),
                              options=dict(value=dict(), desc=dict()))

    if min_settings_from_file is not None:
        minimizer_settings.update(from_file(min_settings_from_file))

    if minimizer is not None:
        minimizer_settings['method'] = dict(value=minimizer, desc='no desc')

    if min_opt is not None:
        for opt_val_str in min_opt:
            opt, val_str = [s.strip() for s in opt_val_str.split(':')]
            try:
                val = int(val_str)
            except ValueError:
                try:
                    val = float(val_str)
                except ValueError:
                    val = val_str
            minimizer_settings['options']['value'][opt] = val
            minimizer_settings['options']['desc'][opt] = 'no desc'
    init_args_d['minimizer_settings'] = minimizer_settings

    init_args_d['check_octant'] = not init_args_d.pop('no_octant_check')
    init_args_d['check_ordering'] = init_args_d.pop('ordering_check')

    if command not in (inj_param_scan, systematics_tests):
        init_args_d['data_is_data'] = not init_args_d.pop('data_is_mc')
    else:
        init_args_d['data_is_data'] = False
        init_args_d['fluctuate_data'] = False
        init_args_d['fluctuate_fid'] = False

    init_args_d['store_minimizer_history'] = (
        not init_args_d.pop('no_min_history'))

    other_metrics = init_args_d.pop('other_metric')
    if other_metrics is not None:
        other_metrics = [s.strip().lower() for s in other_metrics]
        if 'all' in other_metrics:
            other_metrics = sorted(ALL_METRICS)
        for m in init_args_d['metric']:
            if m in other_metrics:
                other_metrics.remove(m)
        if not other_metrics:
            other_metrics = None
        else:
            logging.info('Will evaluate other metrics %s', other_metrics)
        init_args_d['other_metrics'] = other_metrics

    return init_args_d
Пример #20
0
def parse_pipeline_config(config):
    """Parse pipeline config.

    Parameters
    ----------
    config : string or ConfigParser

    Returns
    -------
    stage_dicts : OrderedDict
        Keys are (stage_name, service_name) tuples and values are OrderedDicts
        with keys the argnames and values the arguments' values. Some known arg
        values are parsed out fully into Python objects, while the rest remain
        as strings that must be used or parsed elsewhere.

    """
    # Note: imports placed here to avoid circular imports
    from pisa.core.binning import MultiDimBinning, OneDimBinning
    from pisa.core.param import ParamSelector

    if isinstance(config, basestring):
        config = from_file(config)
    elif isinstance(config, PISAConfigParser):
        pass
    else:
        raise TypeError(
            '`config` must either be a string or PISAConfigParser. Got %s '
            'instead.' % type(config))

    if not config.has_section('binning'):
        raise NoSectionError(
            "Could not find 'binning'. Only found sections: %s" %
            config.sections())

    # Create binning objects
    binning_dict = {}
    for name, value in config['binning'].items():
        if name.endswith('.order'):
            order = split(config.get('binning', name))
            binning, _ = split(name, sep='.')
            bins = []
            for bin_name in order:
                try:
                    def_raw = config.get('binning', binning + '.' + bin_name)
                except:
                    dims_defined = [
                        split(dim, sep='.')[1]
                        for dim in config['binning'].keys()
                        if dim.startswith(binning +
                                          '.') and not dim.endswith('.order')
                    ]
                    logging.error(
                        "Failed to find definition of '%s' dimension of '%s'"
                        " binning entry. Only found definition(s) of: %s",
                        bin_name, binning, dims_defined)
                    del dims_defined
                    raise
                try:
                    kwargs = eval(def_raw)  # pylint: disable=eval-used
                except:
                    logging.error(
                        "Failed to evaluate definition of '%s' dimension of"
                        " '%s' binning entry:\n'%s'", bin_name, binning,
                        def_raw)
                    raise
                try:
                    bins.append(OneDimBinning(bin_name, **kwargs))
                except:
                    logging.error(
                        "Failed to instantiate new `OneDimBinning` from '%s'"
                        " dimension of '%s' binning entry with definition:\n"
                        "'%s'\n", bin_name, binning, kwargs)
                    raise
            binning_dict[binning] = MultiDimBinning(bins)

    # Pipeline section
    section = 'pipeline'

    # Get and parse the order of the stages (and which services implement them)
    order = [split(x, STAGE_SEP) for x in split(config.get(section, 'order'))]

    param_selections = []
    if config.has_option(section, 'param_selections'):
        param_selections = split(config.get(section, 'param_selections'))

    detector_name = None
    if config.has_option(section, 'detector_name'):
        detector_name = config.get(section, 'detector_name')

    # Parse [stage.<stage_name>] sections and store to stage_dicts
    stage_dicts = OrderedDict()
    for stage, service in order:
        old_section_header = 'stage%s%s' % (STAGE_SEP, stage)
        new_section_header = '%s%s%s' % (stage, STAGE_SEP, service)
        if config.has_section(old_section_header):
            logging.warning(
                '"%s" is an old-style section header, in the future use "%s"' %
                (old_section_header, new_section_header))
            section = old_section_header
        elif config.has_section(new_section_header):
            section = new_section_header
        else:
            raise IOError(
                'missing section in cfg for stage "%s" service "%s"' %
                (stage, service))

        # Instantiate dict to store args to pass to this stage
        service_kwargs = OrderedDict()

        param_selector = ParamSelector(selections=param_selections)
        service_kwargs['params'] = param_selector

        n_params = 0
        for fullname in config.options(section):
            try:
                value = config.get(section, fullname)
            except:
                logging.error(
                    'Unable to obtain value of option "%s" in section "%s".' %
                    (fullname, section))
                raise
            # See if this matches a param specification
            param_match = PARAM_RE.match(fullname)
            if param_match is not None:
                n_params += 1

                param_match_dict = param_match.groupdict()
                param_subfields = param_match_dict['subfields'].split('.')

                # Figure out what the dotted fields represent...
                infodict = interpret_param_subfields(subfields=param_subfields)

                # If field is an attr, skip since these are located manually
                if infodict['attr'] is not None:
                    continue

                # Check if this param already exists in a previous stage; if
                # so, make sure there are no specs for this param, but just a
                # link to previous the param object that is already
                # instantiated.
                for kw in stage_dicts.values():
                    # Stage did not get a `params` argument from config
                    if not kw.has_key('params'):
                        continue

                    # Retrieve the param from the ParamSelector
                    try:
                        param = kw['params'].get(name=infodict['pname'],
                                                 selector=infodict['selector'])
                    except KeyError:
                        continue

                    # Make sure there are no other specs (in this section) for
                    # the param defined defined in previous section
                    for a in PARAM_ATTRS:
                        if config.has_option(section, '%s.%s' % (fullname, a)):
                            raise ValueError("Parameter spec. '%s' of '%s' "
                                             "found in section '%s', but "
                                             "parameter exists in previous "
                                             "stage!" % (a, fullname, section))

                    break

                # Param *not* found in a previous stage (i.e., no explicit
                # `break` encountered in `for` loop above); therefore must
                # instantiate it.
                else:
                    param = parse_param(config=config,
                                        section=section,
                                        selector=infodict['selector'],
                                        fullname=fullname,
                                        pname=infodict['pname'],
                                        value=value)

                param_selector.update(param, selector=infodict['selector'])

            # If it's not a param spec but contains 'binning', assume it's a
            # binning spec for CAKE stages
            elif 'binning' in fullname:
                service_kwargs[fullname] = binning_dict[value]

            # it's gonna be a PI stage
            elif '_specs' in fullname:
                value = parse_string_literal(value)
                # is it None?
                if value is None:
                    service_kwargs[fullname] = value
                # is it evts?
                elif value in ['evnts', 'events']:
                    service_kwargs[fullname] = 'events'
                # so it gotta be a binning
                else:
                    service_kwargs[fullname] = binning_dict[value]

            # it's a list on in/output names list
            elif fullname.endswith('_names'):
                value = split(value)
                service_kwargs[fullname] = value
            # Otherwise it's some other stage instantiation argument; identify
            # this by its full name and try to interpret and instantiate a
            # Python object using the string
            else:
                try:
                    value = parse_quantity(value)
                    value = value.nominal_value * value.units
                except ValueError:
                    value = parse_string_literal(value)
                service_kwargs[fullname] = value

        # If no params actually specified in config, remove 'params' from the
        # service's keyword args
        if n_params == 0:
            service_kwargs.pop('params')

        # Store the service's kwargs to the stage_dicts
        stage_dicts[(stage, service)] = service_kwargs

    stage_dicts['detector_name'] = detector_name
    return stage_dicts
Пример #21
0
def parse_param(config, section, selector, fullname, pname, value):
    """Parse a param specification from a PISA config file.

    Note that if the param sepcification does not include ``fixed``,
    ``prior``, and/or ``range``, the defaults for these are:
    ``fixed = True``, ``prior = None``, and ``range = None``.

    If a prior is specified explicitly via ``.prior``, this takes precendence,
    but if no ``.prior`` is specified and the param's value is parsed to be a
    :class:`uncertainties.AffineScalarFunc` (i.e. have `std_dev` attribute), a
    Gaussian prior is constructed from that and then the AffineScalarFunc is
    stripped out of the param's value (such that it is just a
    :class:`~pint.quantity.Quantity`).

    Parameters
    ----------
    config : pisa.utils.config_parser.PISAConfigParser
    section : string
    selector : string or None
    fullname : string
    pname : string
    value : string

    Returns
    -------
    param : pisa.core.param.Param

    """
    # Note: imports placed here to avoid circular imports
    from pisa.core.param import Param
    from pisa.core.prior import Prior
    kwargs = dict(name=pname, is_fixed=True, prior=None, range=None)
    try:
        value = parse_quantity(value)
        kwargs['value'] = value.nominal_value * value.units
    except ValueError:
        value = parse_string_literal(value)
        kwargs['value'] = value

    # Search for explicit attr specifications
    if config.has_option(section, fullname + '.fixed'):
        kwargs['is_fixed'] = config.getboolean(section, fullname + '.fixed')

    if config.has_option(section, fullname + '.unique_id'):
        kwargs['unique_id'] = config.get(section, fullname + '.unique_id')

    if config.has_option(section, fullname + '.range'):
        range_ = config.get(section, fullname + '.range')
        # Note: `nominal` and `sigma` are called out in the `range_` string
        if 'nominal' in range_:
            nominal = value.n * value.units  # pylint: disable=unused-variable
        if 'sigma' in range_:
            sigma = value.s * value.units  # pylint: disable=unused-variable
        range_ = range_.replace('[', 'np.array([')
        range_ = range_.replace(']', '])')
        # Strip out uncertainties from value itself (as we will rely on the
        # prior from here on out)
        kwargs['range'] = eval(range_).to(value.units)  # pylint: disable=eval-used

    if config.has_option(section, fullname + '.prior'):
        prior = str(config.get(section, fullname + '.prior')).strip().lower()
        if prior == 'uniform':
            kwargs['prior'] = Prior(kind='uniform')
        elif prior == 'jeffreys':
            kwargs['prior'] = Prior(kind='jeffreys',
                                    A=kwargs['range'][0],
                                    B=kwargs['range'][1])
        elif prior == 'spline':
            priorname = pname
            if selector is not None:
                priorname += '_' + selector
            data = config.get(section, fullname + '.prior.data')
            data = from_file(data)
            data = data[priorname]
            knots = ureg.Quantity(np.asarray(data['knots']), data['units'])
            knots = knots.to(value.units)
            coeffs = np.asarray(data['coeffs'])
            deg = data['deg']
            kwargs['prior'] = Prior(kind='spline',
                                    knots=knots,
                                    coeffs=coeffs,
                                    deg=deg)
        elif prior == 'none':
            kwargs['prior'] = None
        elif 'gauss' in prior:
            raise Exception('Please use new style +/- notation for gaussian'
                            ' priors in config')
        else:
            raise Exception('Prior type unknown')

    elif hasattr(value, 'std_dev') and value.std_dev != 0:
        kwargs['prior'] = Prior(kind='gaussian',
                                mean=value.nominal_value * value.units,
                                stddev=value.std_dev * value.units)

    # Strip out any uncertainties from value itself (an explicit ``.prior``
    # specification takes precedence over this)
    if hasattr(value, 'std_dev'):
        value = value.nominal_value * value.units
    try:
        param = Param(**kwargs)
    except:
        logging.error('Failed to instantiate new Param object with kwargs %s',
                      kwargs)
        raise

    return param
Пример #22
0
            p_name, value = args.set_param_data.split("=")
            print("set param ", p_name, "to  ", value, "for data")
            value = parse_quantity(value)
            data_fixed_param = {p_name: value.n}
            value = value.n * value.units
            prm = data_maker.params[p_name]
            prm.value = value
            data_maker.update_params(prm)
            data_maker.params.fix(p_name)

        analysis = Analysis(data_maker=data_maker,
                            template_maker=template_maker,
                            metric=args.metric,
                            blind=args.blind)

        analysis.minimizer_settings = from_file(args.minimizer_settings)
        analysis.pseudodata_method = args.pseudo_data

        #analysis.randomize_free_params()

        results = []

        for i in range(args.num_trials):
            logging.info('Running trial %i' % i)
            np.random.seed()
            analysis.generate_psudodata()

            if args.function == 'profile':
                if args.mode == 'H0':
                    results.append(
                        analysis.profile(args.var, [0.] * ureg.dimensionless,
Пример #23
0
def test_CrossSections(outdir=None):
    """Unit tests for CrossSections class"""
    from shutil import rmtree
    from tempfile import mkdtemp

    remove_dir = False
    if outdir is None:
        remove_dir = True
        outdir = mkdtemp()

    try:
        # "Standard" location of cross sections file in PISA; retrieve 2.6.4 for
        # testing purposes
        pisa_xs_file = 'cross_sections/cross_sections.json'
        xs = CrossSections(ver='genie_2.6.4', xsec=pisa_xs_file)

        # Location of the root file to use (not included in PISA at the moment)
        test_dir = expand(os.path.join('/tmp', 'pisa_tests', 'cross_sections'))
        #root_xs_file = os.path.join(test_dir, 'genie_2.6.4_simplified.root')
        root_xs_file = find_resource(os.path.join(
            #'tests', 'data', 'xsec', 'genie_2.6.4_simplified.root'
            'cross_sections', 'genie_xsec_H2O.root'
        ))

        # Make sure that the XS newly-imported from ROOT match those stored in
        # PISA
        if os.path.isfile(root_xs_file):
            xs_from_root = CrossSections.new_from_root(root_xs_file,
                                                       ver='genie_2.6.4')
            logging.info('Found and loaded ROOT source cross sections file %s',
                         root_xs_file)
            #assert xs_from_root.allclose(xs, rtol=1e-7)

        # Check XS ratio for numu_cc to numu_cc + numu_nc (user must inspect)
        kg0 = NuFlavIntGroup('numu_cc')
        kg1 = NuFlavIntGroup('numu_nc')
        logging.info(
            r'\int_1^80 xs(numu_cc) E^{-1} dE = %e',
            xs.get_xs_ratio_integral(kg0, None, e_range=[1, 80], gamma=1)
        )
        logging.info(
            '(int E^{-gamma} * (sigma_numu_cc)/int(sigma_(numu_cc+numu_nc)) dE)'
            ' / (int E^{-gamma} dE) = %e',
            xs.get_xs_ratio_integral(kg0, kg0+kg1, e_range=[1, 80], gamma=1,
                                     average=True)
        )
        # Check that XS ratio for numu_cc+numu_nc to the same is 1.0
        int_val = xs.get_xs_ratio_integral(kg0+kg1, kg0+kg1, e_range=[1, 80],
                                           gamma=1, average=True)
        if not recursiveEquality(int_val, 1):
            raise ValueError('Integral of nc + cc should be 1.0; get %e'
                             ' instead.' % int_val)

        # Check via plot that the

        # Plot all cross sections stored in PISA xs file
        try:
            alldata = from_file(pisa_xs_file)
            xs_versions = alldata.keys()
            for ver in xs_versions:
                xs = CrossSections(ver=ver, xsec=pisa_xs_file)
                xs.plot(save=os.path.join(
                    outdir, 'pisa_' + ver + '_nuxCCNC_H2O_cross_sections.pdf'
                ))
        except ImportError as exc:
            logging.debug('Could not plot; possible that matplotlib not'
                          'installed. ImportError: %s', exc)

    finally:
        if remove_dir:
            rmtree(outdir)
Пример #24
0
def load_interpolated_hypersurfaces(input_file):
    '''
    Load a set of interpolated hypersurfaces from a file.

    Analogously to "load_hypersurfaces", this function returns a
    collection with a HypersurfaceInterpolator object for each Map.

    Parameters
    ----------
    input_file : str
        A JSON input file as produced by fit_hypersurfaces if interpolation params
        were given. It has the form::
            {
                interpolation_param_spec = {
                    'param1': {"values": [val1_1, val1_2, ...], "scales_log": True/False}
                    'param2': {"values": [val2_1, val2_2, ...], "scales_log": True/False}
                    ...
                    'paramN': {"values": [valN_1, valN_2, ...], "scales_log": True/False}
                },
                'hs_fits': [
                    <list of dicts where keys are map names such as 'nue_cc' and values
                    are hypersurface states>
                ]
            }

    Returns
    -------
    collections.OrderedDict
        dictionary with a :obj:`HypersurfaceInterpolator` for each map
    '''
    assert isinstance(input_file, str)

    logging.info(f"Loading interpolated hypersurfaces from file: {input_file}")

    # Load the data from the file
    input_data = from_file(input_file)

    # check the file contents
    assert set(['interpolation_param_spec',
                'hs_fits']).issubset(set(input_data.keys())), 'missing keys'

    # input_data['hs_fits'] is a list of dicts, each dict contains "param_values"
    # and "hs_fit"
    map_names = None
    logging.info("Reading file complete, generating hypersurfaces...")
    for hs_fit_dict in input_data['hs_fits']:
        # this is still not the actual Hypersurface, but a dict with the (linked)
        # maps and the HS fit for the map...
        hs_state_maps = hs_fit_dict["hs_fit"]
        if map_names is None:
            map_names = list(hs_state_maps.keys())
        else:
            assert set(map_names) == set(
                hs_state_maps.keys()), "inconsistent maps"
        # When data is recovered from JSON, the object states are not automatically
        # converted to the corresponding objects, so we need to do it manually here.
        for map_name in map_names:
            hs_state_maps[map_name] = Hypersurface.from_state(
                hs_state_maps[map_name])

    logging.info(f"Read hypersurface maps: {map_names}")

    # Now we have a list of dicts where the map names are on the lower level.
    # We need to convert this into a dict of HypersurfaceInterpolator objects.
    output = collections.OrderedDict()
    for m in map_names:
        hs_fits = [{
            "param_values": fd["param_values"],
            "hs_fit": fd['hs_fit'][m]
        } for fd in input_data['hs_fits']]
        output[m] = HypersurfaceInterpolator(
            input_data['interpolation_param_spec'], hs_fits)

    return output
Пример #25
0
def load_and_modify_pipeline_cfg(fit_cfg, section):
    """Load and modify the pipeline config file as specified in that section of the fit
    config.

    Parameters
    ----------
    fit_cfg : pisa.utils.config_parser.PISAConfigParser
        any subclass of :class:`configparser.RawConfigParser` should work as well

    section : str
        name of the section to extract from the `fit_cfg`

    Returns
    -------
    pipeline_cfg : pisa.utils.config_parser.PISAConfigParser
        pipeline config

    pipeline_cfg_path : str
        path to the pipeline config as it is specified in the fit config

    """
    pipeline_cfg_path = fit_cfg.get(section, SYS_SET_OPTION)
    other_options = fit_cfg.options(section)
    other_options.remove(SYS_SET_OPTION)

    pipeline_cfg = from_file(pipeline_cfg_path)

    # Get a no-whitespace version of the section names
    section_map = {s.strip(): s for s in pipeline_cfg.sections()}

    for option in other_options:
        set_match = SET_OPTION_RE.match(option)
        remove_match = REMOVE_OPTION_RE.match(
            option) if not set_match else None
        if set_match:
            section_spec, set_option = set_match.groups()
            no_ws_section_spec = section_spec.strip()
            set_option = set_option.strip()
            if no_ws_section_spec not in section_map:
                logging.debug(
                    'Adding section [%s] to in-memory copy of pipeline config "%s"',
                    section_spec,
                    pipeline_cfg_path,
                )
                pipeline_cfg.add_section(section_spec)
                section_map[no_ws_section_spec] = section_spec
            if set_option:
                set_value = fit_cfg.get(section, option).strip()
                logging.debug(
                    'Setting section [%s] option "%s = %s" in in-memory'
                    ' copy of pipeline config "%s"',
                    section_spec,
                    set_option,
                    set_value,
                    pipeline_cfg_path,
                )
                pipeline_cfg.set(section_map[no_ws_section_spec], set_option,
                                 set_value)
        elif remove_match:
            section_spec, remove_option = remove_match.groups()
            no_ws_section_spec = section_spec.strip()
            remove_option = remove_option.strip()
            if no_ws_section_spec in section_map:
                if remove_option:
                    logging.debug(
                        'Removing section [%s] option "%s" from in-memory copy of'
                        ' pipeline config "%s"',
                        section_spec,
                        remove_option,
                        pipeline_cfg_path,
                    )
                    pipeline_cfg.remove_option(section_map[no_ws_section_spec],
                                               remove_option)
                else:
                    logging.debug(
                        "Removing section [%s] from in-memory copy of pipeline config"
                        ' "%s"',
                        section_spec,
                        pipeline_cfg_path,
                    )
                    pipeline_cfg.remove_section(
                        section_map[no_ws_section_spec])
            else:
                logging.warn(
                    "Told to remove section [%s] but section does not exist in"
                    ' pipline config "%s"',
                    section_spec,
                    pipeline_cfg_path,
                )
        else:
            raise ValueError(
                "Unhandled option in fit config: {}".format(option))

    return pipeline_cfg, pipeline_cfg_path
Пример #26
0
    def load_neutrino_events(config, dataset):

        nu_data = []
        if dataset == 'neutrinos%sgen_lvl' % SEP:
            gen_cfg      = from_file(config.get(dataset, 'gen_cfg_file'))
            name         = gen_cfg.get('general', 'name')
            datadir      = gen_cfg.get('general', 'datadir')
            event_types  = split(gen_cfg.get('general', 'event_type'))
            weights      = split(gen_cfg.get('general', 'weights'))
            weight_units = gen_cfg.get('general', 'weight_units')
            keep_keys    = split(gen_cfg.get('general', 'keep_keys'))
            aliases      = gen_cfg.items('aliases')
            logging.info('Extracting neutrino dataset "{0}" from generator '
                         'level sample "{1}"'.format(dataset, name))

            for idx, flav in enumerate(event_types):
                fig = NuFlavIntGroup(flav)
                all_flavints = fig.flavints
                events_file = datadir + gen_cfg.get(flav, 'filename')

                flav_fidg = sample.load_from_nu_file(
                    events_file, all_flavints, weights[idx], weight_units,
                    keep_keys, aliases
                )
                nu_data.append(flav_fidg)
        else:

            name         = config.get('general', 'name')
            flavours     = split(config.get('neutrinos', 'flavours'))
            weights      = split(config.get('neutrinos', 'weights'))
            weight_units = config.get('neutrinos', 'weight_units')
            sys_list     = split(config.get('neutrinos', 'sys_list'))
            base_prefix  = config.get('neutrinos', 'baseprefix')
            keep_keys    = split(config.get('neutrinos', 'keep_keys'))
            aliases      = config.items('neutrinos%saliases' % SEP)
            logging.info('Extracting neutrino dataset "{0}" from sample '
                         '"{1}"'.format(dataset, name))
            if base_prefix == 'None':
                base_prefix = ''

            for idx, flav in enumerate(flavours):
                f = int(flav)
                all_flavints = NuFlavIntGroup(f, -f).flavints
                if dataset == 'nominal':
                    prefixes = []
                    for sys in sys_list:
                        ev_sys = 'neutrinos%s%s' % (SEP, sys)
                        nominal = config.get(ev_sys, 'nominal')
                        ev_sys_nom = ev_sys + SEP + nominal
                        prefixes.append(config.get(ev_sys_nom, 'file_prefix'))
                    if len(set(prefixes)) > 1:
                        raise AssertionError(
                            'Choice of nominal file is ambigous. Nominal '
                            'choice of systematic parameters must coincide '
                            'with one and only one file. Options found are: '
                            '{0}'.format(prefixes)
                        )
                    file_prefix = flav + prefixes[0]
                else:
                    file_prefix = flav + config.get(dataset, 'file_prefix')
                events_file = path.join( config.get('general', 'datadir'), base_prefix + file_prefix )

                flav_fidg = sample.load_from_nu_file(
                    events_file, all_flavints, weights[idx], weight_units,
                    keep_keys, aliases
                )
                nu_data.append(flav_fidg)
        nu_data = Data(
            reduce(add, nu_data),
            metadata={'name': name, 'sample': dataset}
        )

        return nu_data
Пример #27
0
def main():
    args = parse_args()
    set_verbosity(args.v)

    if args.plot:
        import matplotlib as mpl
        mpl.use('pdf')
        import matplotlib.pyplot as plt
        from pisa.utils.plotter import Plotter

    cfg = from_file(args.fit_settings)
    sys_list = cfg.get('general', 'sys_list').replace(' ', '').split(',')
    stop_idx = cfg.getint('general', 'stop_after_stage')


    for sys in sys_list:
        # Parse info for given systematic
        nominal = cfg.getfloat(sys, 'nominal')
        degree = cfg.getint(sys, 'degree')
        force_through_nominal = cfg.getboolean(sys, 'force_through_nominal')
        runs = eval(cfg.get(sys, 'runs'))
        #print "runs ", runs
        smooth = cfg.get(sys, 'smooth')

        x_values = np.array(sorted(runs))

        # Build fit function
        if force_through_nominal:
            function = "lambda x, *p: np.polynomial.polynomial.polyval(x, [1.] + list(p))"
        else:
            function = "lambda x, *p: np.polynomial.polynomial.polyval(x, list(p))"
            # Add free parameter for constant term
            degree += 1
        fit_fun = eval(function)

        # Instantiate template maker
        template_maker = Pipeline(args.template_settings)

        if not args.set_param == '':
            for one_set_param in args.set_param:
                p_name, value = one_set_param.split("=")
                #print "p_name,value= ", p_name, " ", value
                value = parse_quantity(value)
                value = value.n * value.units
                param = template_maker.params[p_name]
                #print "old ", p_name, "value = ", param.value
                param.value = value
                #print "new ", p_name, "value = ", param.value
                template_maker.update_params(param)

        inputs = {}
        map_names = None
        # Get sys templates
        for run in runs:
            for key, val in cfg.items('%s:%s'%(sys, run)):
                if key.startswith('param.'):
                    _, pname = key.split('.')
                    param = template_maker.params[pname]
                    try:
                        value = parse_quantity(val)
                        param.value = value.n * value.units
                    except ValueError:
                        value = parse_string_literal(val)
                        param.value = value
                    param.set_nominal_to_current_value()
                    template_maker.update_params(param)
            # Retreive maps
            template = template_maker.get_outputs(idx=stop_idx)
            if map_names is None: map_names = [m.name for m in template]
            inputs[run] = {}
            for m in template:
                inputs[run][m.name] = m.hist

        # Numpy acrobatics:
        arrays = {}
        for name in map_names:
            arrays[name] = []
            for x in x_values:
                arrays[name].append(
                    inputs[x][name] / unp.nominal_values(inputs[nominal][name])
                )
            a = np.array(arrays[name])
            arrays[name] = np.rollaxis(a, 0, len(a.shape))

        # Shift to get deltas
        x_values -= nominal

        # Binning object (assuming they're all the same)
        binning = template.maps[0].binning

        shape = [d.num_bins for d in binning] + [degree]
        shape_small = [d.num_bins for d in binning]

        outputs = {}
        errors = {}
        for name in map_names:
            # Now actualy perform some fits
            outputs[name] = np.ones(shape)
            errors[name] = np.ones(shape)


            for idx in np.ndindex(*shape_small):
                y_values = unp.nominal_values(arrays[name][idx])
                y_sigma = unp.std_devs(arrays[name][idx])
                if np.any(y_sigma):
                    popt, pcov = curve_fit(fit_fun, x_values, y_values,
                                           sigma=y_sigma, p0=np.ones(degree))
                else:
                    popt, pcov = curve_fit(fit_fun, x_values, y_values,
                                           p0=np.ones(degree))
                perr = np.sqrt(np.diag(pcov))
                for k, p in enumerate(popt):
                    outputs[name][idx][k] = p
                    errors[name][idx][k] = perr[k]

                # TODO(philippeller): the below block of code will fail

                # Maybe plot
                #if args.plot:
                #    fig_num = i + nx * j
                #    if fig_num == 0:
                #        fig = plt.figure(num=1, figsize=( 4*nx, 4*ny))
                #    subplot_idx = nx*(ny-1-j)+ i + 1
                #    plt.subplot(ny, nx, subplot_idx)
                #    #plt.snameter(x_values, y_values, color=plt_colors[name])
                #    plt.gca().errorbar(x_values, y_values, yerr=y_sigma,
                #                       fmt='o', color=plt_colors[name],
                #                       ecolor=plt_colors[name],
                #                       mec=plt_colors[name])
                #    # Plot nominal point again in black
                #    plt.snameter([0.0], [1.0], color='k')
                #    f_values = fit_fun(x_values, *popt)
                #    fun_plot, = plt.plot(x_values, f_values,
                #            color=plt_colors[name])
                #    plt.ylim(np.min(unp.nominal_values(arrays[name]))*0.9,
                #             np.max(unp.nominal_values(arrays[name]))*1.1)
                #    if i > 0:
                #        plt.setp(plt.gca().get_yticklabels(), visible=False)
                #    if j > 0:
                #        plt.setp(plt.gca().get_xticklabels(), visible=False)

        if smooth == 'gauss':
            for name in map_names:
                for d in range(degree):
                    outputs[name][...,d] = gaussian_filter(outputs[name][...,d],sigma=1)

        if smooth == 'gauss_pid':
            for name in map_names:
                split_idx = binning.names.index('pid')
                tot = len(binning)-1
                for d in range(degree):
                    for p in range(len(binning['pid'])):
                        outputs[name][...,p,d] = gaussian_filter(
                            np.swapaxes(outputs[name], split_idx, tot)[...,p,d],
                            sigma=1
                        )
                outputs[name] = np.swapaxes(outputs[name], split_idx, tot)

        # Save the raw ones anyway
        outputs['pname'] = sys
        outputs['nominal'] = nominal
        outputs['function'] = function
        outputs['map_names'] = map_names
        outputs['binning_hash'] = binning.hash
        to_file(outputs, '%s/%s_sysfits_%s_%s.json'%(args.out_dir, sys,
                                                     args.tag, smooth))

        if args.plot:
            for d in range(degree):
                maps = []
                for name in map_names:
                    maps.append(Map(name='%s_raw'%name, hist=outputs[name][...,d],
                                    binning=binning))
                maps = MapSet(maps)
                my_plotter = Plotter(
                    stamp='',
                    outdir=args.out_dir,
                    fmt='pdf',
                    log=False,
                    label=''
                )
                my_plotter.plot_2d_array(
                    maps,
                    fname='%s_%s_%s_%s'%(sys, args.tag, d, smooth),
                )
Пример #28
0
    def load_events_file(self, events_file, variable_mapping=None):
        """Fill this events container from an input HDF5 file filled with event
        data Optionally can provide a variable mapping so select a subset of
        variables, rename them, etc.

        Parameters
        ----------
        events_file : string or mapping
            If string, interpret as a path and load file at that path; the
            loaded object should be a mapping. If already a mapping, take and
            interpret events from that.

        variable_mapping : mapping, optional
            If specified, should be a mapping where the keys are the
            destination variable names and the items are either the source
            variable names or an iterable of source variables names. In the
            latter case, each of the specified source variables will become a
            column vector in the destination array.

        """
        # Validate `events_file`
        if not isinstance(events_file, (str, Mapping)):
            raise TypeError(
                "`events_file` must be either string or mapping; got (%s)"
                % type(events_file)
            )

        # Validate `variable_mapping`
        if variable_mapping is not None:
            if not isinstance(variable_mapping, Mapping):
                raise TypeError("'variable_mapping' must be a mapping (e.g., dict)")
            for dst, src in variable_mapping.items():
                if not isinstance(dst, str):
                    raise TypeError("`variable_mapping` 'dst' (key) must be a string")

                if isinstance(src, str):
                    pass  # Nothing to do
                elif isinstance(src, Iterable):
                    for v in src:
                        if not isinstance(v, str):
                            raise TypeError(
                                "`variable_mapping` 'src' (value) has at least"
                                " one element that is not a string"
                            )
                else:
                    raise TypeError(
                        "`variable_mapping` 'src' (value) must be a string or"
                        " an iterable of strings"
                    )

        if isinstance(events_file, str):
            input_data = from_file(events_file)
            if not isinstance(input_data, Mapping):
                raise TypeError(
                    'Contents loaded from "%s" must be a mapping; got: %s'
                    % (events_file, type(input_data))
                )
        else:  # isinstance(events_file, Mapping)
            input_data = events_file

        # Events and EventsPi objects have attr `metadata`
        metadata = getattr(input_data, 'metadata', None)

        # HDF files have attr `attrs` attached, if present (see pisa.utils.hdf)
        if not metadata:
            metadata = getattr(input_data, 'attrs', None)

        if metadata:
            if not isinstance(metadata, Mapping):
                raise TypeError(
                    "metadata or attrs expected to be a Mapping, but got {}".format(
                        type(metadata)
                    )
                )
            # TODO: events.py calls `tolist` method on all values that have
            # that method (e.g., convert numpy arrays to lists). Why? Is this
            # necessary? Should we do that here, too?
            self.metadata.update(metadata)

        #
        # Re-format inputs
        #

        # The following is intended to re-format input data into the desired
        # format. This is required to handle various inout cases and to ensure
        # backwards compatibility with older input file formats.

        # Convert to the required event keys, e.g. "numu_cc", "nutaubar_nc", etc.
        if self.neutrinos:
            input_data = split_nu_events_by_flavor_and_interaction(input_data)

        # The value for each category should itself be a dict of the event
        # variables, where each entry is has a variable name as the key and an
        # np.array filled once per event as the value.
        #
        # For backwards compatibility, convert to this format from known older
        # formats first
        if self.neutrinos:
            for key, cat_dict in input_data.items():
                if not isinstance(cat_dict, Mapping):
                    raise Exception(
                        "'%s' input data is not a mapping, unknown format (%s)"
                        % (key, type(cat_dict))
                    )
                for var_key, var_data in cat_dict.items():
                    if not isinstance(var_data, np.ndarray):
                        raise Exception(
                            "'%s/%s' input data is not a numpy array, unknown"
                            " format (%s)" % (key, var_key, type(var_data))
                        )

        # Ensure backwards compatibility with the old style "oppo" flux
        # variables
        if self.neutrinos:
            fix_oppo_flux(input_data)

        #
        # Load the event data
        #

        # Should be organised under a single layer of keys, each representing
        # some category of input data

        # Loop over the input types
        for data_key in input_data.keys():
            if data_key in self:
                raise ValueError(
                    "Key '%s' has already been added to this data structure"
                )

            self[data_key] = OrderedDict()

            # Loop through variable mapping
            # If none provided, just use all variables and keep the input names
            if variable_mapping is None:
                variable_mapping_to_use = tuple(
                    zip(input_data[data_key].keys(), input_data[data_key].keys())
                )
            else:
                variable_mapping_to_use = variable_mapping.items()

            # Get the array data (stacking if multiple input variables defined)
            # and check the variable exists in the input data
            for var_dst, var_src in variable_mapping_to_use:
                # TODO What about non-float data? Use dtype...
                array_data = None
                if isinstance(var_src, str):
                    var_src = [var_src]

                array_data_to_stack = []
                for var in var_src:
                    if var in input_data[data_key]:
                        array_data_to_stack.append(
                            input_data[data_key][var].astype(FTYPE)
                        )
                    else:
                        raise KeyError(
                            "Variable '%s' cannot be found for '%s' events"
                            % (var, data_key)
                        )

                # Note `squeeze` removes the extraneous 2nd dim in case of a
                # single `src`
                array_data = np.squeeze(np.stack(array_data_to_stack, axis=1))

                # Add each array to the event
                # TODO Memory copies?
                if array_data is None:
                    raise ValueError(
                        "Cannot find source variable(s) '%s' for '%s'"
                        % (var_src, data_key)
                    )
                else:
                    # Down sample events if required
                    if self.fraction_events_to_keep is not None:
                        rand = np.random.RandomState(123456) # Enforce same sample each time
                        num_events_to_keep = int(np.round(self.fraction_events_to_keep*float(array_data.size)))
                        array_data = rand.choice(array_data, size=num_events_to_keep, replace=False)

                    # Add to array
                    self[data_key][var_dst] = array_data
Пример #29
0
    def reweight(self):
        """Main rewighting function."""
        this_hash = hash_obj([self.weight_hash, self.params.values_hash],
                             full_hash=self.full_hash)
        if this_hash == self.fit_hash:
            return

        fit_coeffs = self.calculate_fit_coeffs()

        sample_config = from_file(self.params['discr_sys_sample_config'].value)
        degree = int(self.params['poly_degree'].value)
        force_through_nominal = self.params['force_through_nominal'].value

        if force_through_nominal:

            def fit_func(vals, *poly_coeffs):
                return np.polynomial.polynomial.polyval(
                    vals, [1.] + list(poly_coeffs))
        else:

            def fit_func(vals, *poly_coeffs):
                return np.polynomial.polynomial.polyval(
                    vals, list(poly_coeffs))

            # add free param for constant term
            degree += 1

        def parse(string):
            return string.replace(' ', '').split(',')

        if self.neutrinos:
            sys_list = parse(sample_config.get('neutrinos', 'sys_list'))

            for fig in self._data.keys():
                self._data[fig]['fit_weight'] = \
                    deepcopy(self._data[fig]['weight_weight'])

            for sys in sys_list:
                nominal = sample_config.get('neutrinos|' + sys, 'nominal')
                for fig in self._data.keys():
                    fit_map = unp.nominal_values(fit_coeffs[sys][fig].hist)

                    if self.params['smoothing'].value == 'gauss':
                        # TODO(shivesh): new MapSet functions?
                        for d in range(degree):
                            fit_map[..., d] = gaussian_filter(fit_map[..., d],
                                                              sigma=1)

                    shape = self.fit_binning.shape
                    transform = np.ones(shape)
                    sys_offset = self.params['nu_' +
                                             sys].value.m - float(nominal)
                    for idx in np.ndindex(shape):
                        transform[idx] *= fit_func(sys_offset, *fit_map[idx])

                    hist_idxs = self._data.digitize(
                        kinds=fig,
                        binning=self.fit_binning,
                    )

                    # Discrete systematics reweighting
                    # TODO(shivesh): speedup this
                    for idx, wght in enumerate(
                            np.nditer(self._data[fig]['fit_weight'],
                                      op_flags=['readwrite'])):
                        idx_slice = tuple(hist_idxs[idx])
                        if shape[0] == 0 or shape[1] == 0 or \
                           idx_slice[0] > shape[0] or idx_slice[1] > shape[1]:
                            # Outside binning range
                            wght *= 0
                        else:
                            wght *= transform[tuple([x - 1
                                                     for x in idx_slice])]

            for fig in self._data.keys():
                self._data[fig]['pisa_weight'] = \
                    deepcopy(self._data[fig]['fit_weight'])

        if self.muons:
            sys_list = parse(sample_config.get('muons', 'sys_list'))

            self._data['muons']['fit_weight'] = \
                deepcopy(self._data['muons']['weight_weight'])

            for sys in sys_list:
                fit_map = unp.nominal_values(fit_coeffs[sys]['muons'].hist)

                if self.params['smoothing'].value == 'gauss':
                    # TODO(shivesh): new MapSet functions?
                    for d in range(degree):
                        fit_map[..., d] = gaussian_filter(fit_map[..., d],
                                                          sigma=1)

                shape = self.fit_binning.shape
                transform = np.ones(shape)
                for idx in np.ndindex(shape):
                    transform[idx] *= fit_func(self.params['mu_' + sys].value,
                                               *fit_map[idx])

                hist_idxs = self._data.digitize(
                    kinds='muons',
                    binning=self.fit_binning,
                )

                # Discrete systematics reweighting
                for idx, wght in enumerate(self._data['muons']['fit_weight']):
                    idx_slice = tuple(hist_idxs[idx])
                    if shape[0] == 0 or shape[1] == 0 or \
                       idx_slice[0] > shape[0] or idx_slice[1] > shape[1]:
                        # Outside binning range
                        wght *= 0
                    else:
                        wght *= transform[tuple([x - 1 for x in idx_slice])]

                self._data['muons']['pisa_weight'] = \
                    deepcopy(self._data['muons']['fit_weight'])

        self.fit_hash = this_hash
        self._data.metadata['fit_hash'] = self.fit_hash
        self._data.update_hash()
Пример #30
0
    def fromFile(cls, filename):
        """
        Load a Fisher matrix from a json file
        """

        return cls(**from_file(filename))
Пример #31
0
    def _calculate_fit_coeffs(data,
                              params,
                              fit_binning,
                              nu_params=None,
                              mu_params=None):
        """
        Calculate the fit coefficients for each systematic, flavint,
        bin for a polynomial.
        """
        logging.debug('Calculating fit coefficients')

        config = from_file(params['discr_sys_sample_config'].value)

        degree = int(params['poly_degree'].value)
        force_through_nominal = params['force_through_nominal'].value

        if force_through_nominal:

            def fit_func(vals, *poly_coeffs):
                return np.polynomial.polynomial.polyval(
                    vals, [1.] + list(poly_coeffs))
        else:

            def fit_func(vals, *poly_coeffs):
                return np.polynomial.polynomial.polyval(
                    vals, list(poly_coeffs))

            # add free param for constant term
            degree += 1

        template_maker = Pipeline(params['pipeline_config'].value)
        dataset_param = template_maker.params['dataset']

        def parse(string):
            return string.replace(' ', '').split(',')

        sys_fit_coeffs = OrderedDict()
        if nu_params is not None:
            sys_list = parse(config.get('neutrinos', 'sys_list'))
            nu_params = deepcopy(map(lambda x: x[3:], nu_params))

            if set(nu_params) != set(sys_list):
                raise AssertionError(
                    'Systematics list listed in the sample config file does '
                    'not match the params in the pipeline config file\n {0} '
                    '!= {1}'.format(set(nu_params), set(sys_list)))

            for sys in sys_list:
                ev_sys = 'neutrinos|' + sys
                runs = parse(config.get(ev_sys, 'runs')[1:-1])
                nominal = config.get(ev_sys, 'nominal')

                mapset_dict = OrderedDict()
                flavint_groups = None
                for run in runs:
                    logging.info('Loading run {0} of systematic '
                                 '{1}'.format(run, sys))
                    dataset_param.value = ev_sys + '|' + run
                    template_maker.update_params(dataset_param)
                    template = template_maker.get_outputs(
                        idx=int(params['stop_after_stage'].m))
                    if not isinstance(template, Data):
                        raise AssertionError(
                            'Template output is not a Data object, instead is '
                            'type {0}'.format(type(template)))
                    if flavint_groups is None:
                        flavint_groups = template.flavint_groups
                    else:
                        if set(flavint_groups) != set(template.flavint_groups):
                            raise AssertionError(
                                'Mismatch of flavint_groups - ({0}) does not '
                                'match flavint_groups '
                                '({1})'.format(flavint_groups,
                                               template.flavint_groups))

                    outputs = []
                    for fig in template.keys():
                        outputs.append(
                            template.histogram(kinds=fig,
                                               binning=fit_binning,
                                               weights_col='pisa_weight',
                                               errors=False,
                                               name=str(NuFlavIntGroup(fig))))
                    mapset_dict[run] = MapSet(outputs, name=run)

                nom_mapset = mapset_dict[nominal]
                fracdiff_mapset_dict = OrderedDict()
                for run in runs:
                    mapset = []
                    for flavintg_map in mapset_dict[run]:
                        # TODO(shivesh): error propagation?
                        flavintg = flavintg_map.name
                        mask = ~(nom_mapset[flavintg].hist == 0.)
                        div = np.zeros(flavintg_map.shape)
                        with np.errstate(divide='ignore', invalid='ignore'):
                            div[mask] = \
                                unp.nominal_values(flavintg_map.hist[mask]) /\
                                unp.nominal_values(nom_mapset[flavintg].hist[mask])
                        mapset.append(
                            Map(name=flavintg,
                                binning=flavintg_map.binning,
                                hist=div))
                    fracdiff_mapset_dict[run] = MapSet(mapset)

                delta_runs = np.array([float(x)
                                       for x in runs]) - float(nominal)

                coeff_binning = OneDimBinning(name='coeff',
                                              num_bins=degree,
                                              is_lin=True,
                                              domain=[-1, 1])
                combined_binning = fit_binning + coeff_binning

                params_mapset = []
                for fig in template.keys():
                    # TODO(shivesh): Fix numpy warning on this line
                    pvals_hist = np.empty(map(int, combined_binning.shape),
                                          dtype=object)
                    hists = [
                        fracdiff_mapset_dict[run][fig].hist for run in runs
                    ]
                    zip_hists = np.dstack(hists)
                    for idx in np.ndindex(fit_binning.shape):
                        y_values = []
                        y_sigma = []
                        for run in fracdiff_mapset_dict:
                            y_values.append(
                                unp.nominal_values(
                                    fracdiff_mapset_dict[run][fig].hist[idx]))
                            y_sigma.append(
                                unp.std_devs(
                                    fracdiff_mapset_dict[run][fig].hist[idx]))

                        if np.any(y_sigma):
                            popt, pcov = curve_fit(fit_func,
                                                   delta_runs,
                                                   y_values,
                                                   sigma=y_sigma,
                                                   p0=np.ones(degree))
                        else:
                            popt, pcov = curve_fit(fit_func,
                                                   delta_runs,
                                                   y_values,
                                                   p0=np.ones(degree))
                        # perr = np.sqrt(np.diag(pcov))
                        # pvals = unp.uarray(popt, perr)
                        pvals_hist[idx] = popt
                    pvals_hist = np.array(pvals_hist.tolist())
                    params_mapset.append(
                        Map(name=fig,
                            binning=combined_binning,
                            hist=pvals_hist))
                params_mapset = MapSet(params_mapset, name=sys)

                if sys in sys_fit_coeffs:
                    sys_fit_coeffs[sys] = MapSet(
                        [sys_fit_coeffs[sys], params_mapset])
                else:
                    sys_fit_coeffs[sys] = params_mapset

        if mu_params is not None:
            sys_list = parse(config.get('muons', 'sys_list'))
            mu_params = deepcopy(map(lambda x: x[3:], mu_params))

            if set(mu_params) != set(sys_list):
                raise AssertionError(
                    'Systematics list listed in the sample config file does '
                    'not match the params in the pipeline config file\n {0} '
                    '!= {1}'.format(set(mu_params), set(sys_list)))

            for sys in sys_list:
                ev_sys = 'muons|' + sys
                runs = parse(config.get(ev_sys, 'runs')[1:-1])
                nominal = config.get(ev_sys, 'nominal')

                map_dict = OrderedDict()
                flavint_groups = None
                for run in runs:
                    logging.info('Loading run {0} of systematic '
                                 '{1}'.format(run, sys))
                    dataset_param.value = ev_sys + '|' + run
                    template_maker.update_params(dataset_param)
                    template = template_maker.get_outputs(
                        idx=int(params['stop_after_stage'].m))
                    if not isinstance(template, Data):
                        raise AssertionError(
                            'Template output is not a Data object, instead is '
                            'type {0}'.format(type(template)))
                    if not template.contains_muons:
                        raise AssertionError(
                            'Template output does not contain muons')

                    output = template.histogram(
                        kinds='muons',
                        binning=fit_binning,
                        # NOTE: weights cancel in fraction
                        weights_col=None,
                        errors=False,
                        name='muons')
                    map_dict[run] = output

                nom_map = map_dict[nominal]
                fracdiff_map_dict = OrderedDict()
                for run in runs:
                    mask = ~(nom_map.hist == 0.)
                    div = np.zeros(nom_map.shape)
                    with np.errstate(divide='ignore', invalid='ignore'):
                        div[mask] = \
                            unp.nominal_values(map_dict[run].hist[mask]) /\
                            unp.nominal_values(nom_map.hist[mask])
                    fracdiff_map_dict[run] = Map(name='muons',
                                                 binning=nom_map.binning,
                                                 hist=div)

                delta_runs = np.array([float(x)
                                       for x in runs]) - float(nominal)

                coeff_binning = OneDimBinning(name='coeff',
                                              num_bins=degree,
                                              is_lin=True,
                                              domain=[-1, 1])
                combined_binning = fit_binning + coeff_binning

                pvals_hist = np.empty(map(int, combined_binning.shape),
                                      dtype=object)
                hists = [fracdiff_map_dict[run].hist for run in runs]
                zip_hists = np.dstack(hists)
                for idx in np.ndindex(fit_binning.shape):
                    y_values = []
                    y_sigma = []
                    for run in fracdiff_mapset_dict:
                        y_values.append(
                            unp.nominal_values(
                                fracdiff_mapset_dict[run][fig].hist[idx]))
                        y_sigma.append(
                            unp.std_devs(
                                fracdiff_mapset_dict[run][fig].hist[idx]))
                    if np.any(y_sigma):
                        popt, pcov = curve_fit(fit_func,
                                               delta_runs,
                                               y_values,
                                               sigma=y_sigma,
                                               p0=np.ones(degree))
                    else:
                        popt, pcov = curve_fit(fit_func,
                                               delta_runs,
                                               y_values,
                                               p0=np.ones(degree))
                    # perr = np.sqrt(np.diag(pcov))
                    # pvals = unp.uarray(popt, perr)
                    pvals_hist[idx] = popt
                pvals_hist = np.array(pvals_hist.tolist())
                params_map = Map(name='muons',
                                 binning=combined_binning,
                                 hist=pvals_hist)
                if sys in sys_fit_coeffs:
                    sys_fit_coeffs[sys] = MapSet(
                        [sys_fit_coeffs[sys], params_map])
                else:
                    sys_fit_coeffs[sys] = params_map

        return sys_fit_coeffs
    '--infile',
    type=str,
    required=True
)
parser.add_argument(
    '--outfile',
    type=str,
    required=True
)
args = parser.parse_args()

import sys, os, re, traceback, time, warnings, itertools
import copy
#from pisa.utils import utils as putils
from pisa.utils.fileio import from_file, to_file
from pisa.utils import params as ppars
from pisa.utils import utils as putils

ts0 = from_file(args.infile)
ts1 = copy.deepcopy(ts0)
for paramname, param in sorted(ts0['params'].iteritems()):
    new_prior = ppars.Prior.from_param(param)
    if new_prior is None:
        continue
    print 'Converting prior for param `' + paramname + '`'
    new_param = copy.deepcopy(param)
    new_param.update(new_prior.build_dict())
    ts1['params'][paramname] = new_param

to_file(ts1, args.outfile)