def fix_oppo_flux(input_data): """Fix this `oppo` flux insanity someone added this in the nominal flux calculation that oppo flux is nue flux if flavour is nuebar, and vice versa here we revert that, incase these oppo keys are there """ for key, val in input_data.items(): if "neutrino_oppo_nue_flux" not in val: continue logging.warning( 'renaming the outdated "oppo" flux keys in "%s", in the future do' " not use those anymore", key, ) if "bar" in key: val["nominal_nue_flux"] = val.pop("neutrino_oppo_nue_flux") val["nominal_numu_flux"] = val.pop("neutrino_oppo_numu_flux") val["nominal_nuebar_flux"] = val.pop("neutrino_nue_flux") val["nominal_numubar_flux"] = val.pop("neutrino_numu_flux") else: val["nominal_nue_flux"] = val.pop("neutrino_nue_flux") val["nominal_numu_flux"] = val.pop("neutrino_numu_flux") val["nominal_nuebar_flux"] = val.pop("neutrino_oppo_nue_flux") val["nominal_numubar_flux"] = val.pop("neutrino_oppo_numu_flux")
def select_params(self, selections, error_on_missing=True): successes = 0 if selections is not None: for pipeline in self: try: pipeline.select_params(selections, error_on_missing=True) except KeyError: pass else: successes += 1 if error_on_missing and successes == 0: raise KeyError( 'None of the stages from any pipeline in this distribution' ' maker has all of the selections %s available.' %(selections,) ) else: for pipeline in self: possible_selections = pipeline.param_selections if possible_selections: logging.warning( "Although you didn't make a parameter " "selection, the following were available: %s." " This may cause issues.", possible_selections )
def load_discr_sys(self, sys_list): """Load the fit results from the file and make some check compatibility""" self.fit_results = from_file(self.params['fit_results_file'].value) if not set(self.input_names) == set(self.fit_results['map_names']): for name in self.input_names: if not name in self.fit_results['map_names']: #check if there is somethingi uniquely compatible compatible_names = [ mapname in name for mapname in self.fit_results['map_names'] ] if sum(compatible_names) == 1: # compatible compatible_name = self.fit_results['map_names'][ compatible_names.index(True)] self.fit_results[name] = self.fit_results[ compatible_name] logging.warning( 'Substituting hyperplane parameterization %s for %s' % (compatible_name, name)) else: logging.error('No compatible map for %s found!' % name) assert set(sys_list) == set(self.fit_results['sys_list']) self.sys_list = self.fit_results['sys_list']
def save(self, fpath, ver=None, **kwargs): """Save cross sections (and the energy specification) to a file at `fpath`.""" if ver is None: if self._ver is None: raise ValueError( 'Either a ver must be specified in call to `save` or it ' 'must have been set prior to the invocation of `save`.' ) ver = self._ver else: assert ver == self._ver try: fpath = find_resource(fpath) except IOError: pass fpath = os.path.expandvars(os.path.expanduser(fpath)) all_xs = {} # Get any existing data from file if os.path.exists(fpath): all_xs = from_file(fpath) # Validate existing data by instantiating objects from each for v, d in all_xs.items(): CrossSections(ver=v, energy=d['energy'], xsec=d['xsec']) if ver in all_xs: logging.warning('Overwriting existing version "' + ver + '" in file ' + fpath) all_xs[ver] = {'xsec':self, 'energy':self.energy} to_file(all_xs, fpath, **kwargs)
def link_containers(self, key, names): """Link containers together. When containers are linked, they are treated as a single (virtual) container for binned data Parameters ---------- key : str name of linked object names : list name of containers to be linked under the given key """ # intersection of names for linking and available names link_names = set(names) & set(self.names) if len(link_names) < len(names): logging.warning( "Skipping containers %s in linking, as those are not present" % (set(names) - set(self.names))) containers = [self.__getitem__(name) for name in link_names] logging.trace('Linking containers %s into %s' % (link_names, key)) new_container = VirtualContainer(key, containers) self.linked_containers.append(new_container)
def plot_2d_array(self, map_set, n_rows=None, n_cols=None, fname=None, **kwargs): """plot all maps or transforms in a single plot""" if fname is None: fname = map_set.name # if dimensionality is 3, then still define a spli_axis automatically new_maps = [] split_axis = kwargs.pop('split_axis', None) for map in map_set: if len(map.binning) == 3: if split_axis is None: # Find shortest dimension l = map.binning.num_bins idx = l.index(min(l)) split_axis_ = map.binning.names[idx] logging.warning( 'Plotter automatically splitting map %s along %s axis', map.name, split_axis_) new_maps.extend(map.split(split_axis_)) elif len(map.binning) == 2: new_maps.append(map) else: raise Exception('Cannot plot %i dimensional map in 2d' % len(map)) map_set = MapSet(new_maps) self.plot_array(map_set, 'plot_2d_map', n_rows=n_rows, n_cols=n_cols, **kwargs) self.dump(fname)
def compare_numeric(test, ref, label=None, ac_kw=deepcopy(AC_KW), ignore_fails=False): """Compare scalars or numpy ndarrays. Parameters ---------- test : scalar or numpy.ndarray ref : scalar or numpy.ndarray label : str or None, optional ac_kw : mapping, optional Keyword args to pass via **ac_kw to `numpy.isclose` / `numpy.allclose` ignore_fails : bool, optional Returns ------- rslt : bool """ pfx = f"{label} :: " if label else "" with np.printoptions(**PRINTOPTS): if np.isscalar(test): if np.isclose(test, ref, **ac_kw): return True msg = f"{pfx}test: {test} != ref: {ref}" if ignore_fails: logging.warning(msg) else: logging.error(msg) return False # Arrays if np.allclose(test, ref, **ac_kw): return True diff = test - ref msg = f"{pfx}test:" f"\n{(test)}\n!= ref:\n{(ref)}" f"\ndiff:\n{(diff)}" if not np.all(ref == 1): nzmask = ref != 0 zmask = ref == 0 fdiff = np.empty_like(ref) fdiff[nzmask] = diff[nzmask] / ref[nzmask] fdiff[zmask] = np.nan msg += f"\nfractdiff:\n{(fdiff)}" if ignore_fails: logging.warning(msg) else: logging.error(msg) return False
def load_noise_events(config, dataset): name = config.get('general', 'name') weight = config.get('noise', 'weight') weight_units = config.get('noise', 'weight_units') sys_list = split(config.get('noise', 'sys_list')) base_prefix = config.get('noise', 'baseprefix') keep_keys = split(config.get('noise', 'keep_keys')) aliases = config.items('noise%saliases' % SEP) if base_prefix == 'None': base_prefix = '' if dataset == 'nominal': paths = [] for sys in sys_list: ev_sys = 'noise%s%s' % (SEP, sys) nominal = config.get(ev_sys, 'nominal') ev_sys_nom = ev_sys + SEP + nominal paths.append(config.get(ev_sys_nom, 'file_path')) if len(set(paths)) > 1: raise AssertionError( 'Choice of nominal file is ambigous. Nominal ' 'choice of systematic parameters must coincide ' 'with one and only one file. Options found are: ' '{0}'.format(paths)) file_path = paths[0] else: file_path = config.get(dataset, 'file_path') logging.info('Extracting noise dataset "{0}" from sample ' '"{1}"'.format(dataset, name)) noise = from_file(file_path) sample.strip_keys(keep_keys, noise) if weight == 'None' or weight == '1': noise['sample_weight'] = np.ones(noise['weights'].shape) elif weight == '0': noise['sample_weight'] = np.zeros(noise['weights'].shape) else: noise['sample_weight'] = noise[weight] * ureg(weight_units) noise['pisa_weight'] = deepcopy(noise['sample_weight']) for alias, expr in aliases: if alias in noise: logging.warning( 'Overwriting Data key {0} with aliased expression ' '{1}'.format(alias, expr)) noise[alias] = eval(re.sub(r'\<(.*?)\>', r"noise['\1']", expr)) noise_dict = {'noise': noise} return Data(noise_dict, metadata={ 'name': name, 'noise_sample': dataset })
def find_best_fit(self, check_octant=True, pprint=True, skip=False): """ find best fit points (max likelihood) for the free parameters and return likelihood + found parameter values. """ # Reset free parameters to nominal values logging.info('resetting params') self.template_maker.params.reset_free() if not check_octant: logging.warning('Skipping octant check in fit!') best_fit_vals, metric_val, all_metrics, dict_flags = self.run_minimizer( pprint=pprint, skip=skip) best_fit = {} best_fit[self.metric] = metric_val best_fit['warnflag'] = dict_flags['warnflag'] best_fit['avg_tmp_time'] = dict_flags['avg_tmp_time'] best_fit['n_minimizer_calls'] = dict_flags['n_minimizer_calls'] best_fit['funcalls'] = dict_flags['funcalls'] best_fit['all_metrics'] = all_metrics if not self.blind: for pname in self.template_maker.params.free.names: best_fit[pname] = self.template_maker.params[pname].value # decide wether fit for second octant is necessary if 'theta23' in self.template_maker.params.free.names and not skip: if check_octant: logging.info('checking other octant of theta23') self.template_maker.params.reset_free() # changing to other octant theta23 = self.template_maker.params['theta23'] inflection_point = 45 * ureg.degree theta23.value = 2 * inflection_point.to( theta23.value.units) - theta23.value self.template_maker.update_params(theta23) best_fit_vals, metric_val, all_metrics, dict_flags = self.run_minimizer( pprint=pprint) # compare results a and b, take one with lower llh if metric_val < best_fit[self.metric]: # accept these values logging.info('Accepting other octant fit') best_fit[self.metric] = metric_val best_fit['warnflag'] = dict_flags['warnflag'] best_fit['all_metrics'] = all_metrics if not self.blind: for pname in self.template_maker.params.free.names: best_fit[pname] = self.template_maker.params[ pname].value else: logging.info('Accepting initial octant fit') return best_fit
def get_binned_data(self, key, out_binning=None): """Get data array from binned data: if the key is a binning dimensions, then unroll the binning otherwise return the corresponding flattened array """ if out_binning is not None: # check if key is binning dimension if key in out_binning.names: return self.unroll_binning(key, out_binning) binning, data = self.binned_data[key] if out_binning is not None: if not binning == out_binning: logging.warning('Automatically re-beinning data %s'%key) sample = [SmartArray(self.unroll_binning(name, binning)) for name in binning.names] new_sample = [SmartArray(self.unroll_binning(name, out_binning)) for name in out_binning.names] return resample(data, sample, binning, new_sample, out_binning) return data
def fix_oppo_flux(input_data): """Fix this `oppo` flux insanity someone added this in the nominal flux calculation that oppo flux is nue flux if flavour is nuebar, and vice versa here we revert that, incase these oppo keys are there """ for key, val in input_data.items(): if "neutrino_oppo_nue_flux" not in val: continue logging.warning( 'renaming the outdated "oppo" flux keys in "%s", in the future do' " not use those anymore", key, ) if "bar" in key: for new, old in OPPO_FLUX_LEGACY_FIX_MAPPING_NUBAR.items(): val[new] = val.pop(old) else: for new, old in OPPO_FLUX_LEGACY_FIX_MAPPING_NU.items(): val[new] = val.pop(old)
def get_outputs(self): """ Get the outputs of the PISA stage Depending on `self.output_mode`, this may be a binned object, or the event container itself """ if self.output_mode == 'binned' and len(self.output_apply_keys) == 1: self.outputs = self.data.get_mapset(self.output_apply_keys[0]) elif len(self.output_apply_keys ) == 2 and 'errors' in self.output_apply_keys: other_key = [ key for key in self.output_apply_keys if not key == 'errors' ][0] self.outputs = self.data.get_mapset(other_key, error='errors') elif self.output_mode == "events": self.outputs = self.data else: self.outputs = None logging.warning('Cannot create CAKE style output mapset') return self.outputs
def load_from_nu_file(events_file, all_flavints, weight, weight_units, keep_keys, aliases): flav_fidg = FlavIntDataGroup(flavint_groups=all_flavints) events = from_file(events_file) sample.strip_keys(keep_keys, events) nu_mask = events['ptype'] > 0 nubar_mask = events['ptype'] < 0 cc_mask = events['interaction'] == 1 nc_mask = events['interaction'] == 2 if weight == 'None' or weight == '1': events['sample_weight'] = \ np.ones(events['ptype'].shape) * ureg.dimensionless elif weight == '0': events['sample_weight'] = \ np.zeros(events['ptype'].shape) * ureg.dimensionless else: events['sample_weight'] = events[weight] * \ ureg(weight_units) events['pisa_weight'] = deepcopy(events['sample_weight']) for alias, expr in aliases: if alias in events: logging.warning( 'Overwriting Data key {0} with aliased expression ' '{1}'.format(alias, expr)) events[alias] = eval(re.sub(r'\<(.*?)\>', r"events['\1']", expr)) for flavint in all_flavints: i_mask = cc_mask if flavint.cc else nc_mask t_mask = nu_mask if flavint.particle else nubar_mask flav_fidg[flavint] = { var: events[var][i_mask & t_mask] for var in events.iterkeys() } return flav_fidg
def __init__(self, pipelines, label=None, set_livetime_from_data=True): self.label = label self._source_code_hash = None self.metadata = OrderedDict() self._pipelines = [] if isinstance(pipelines, (str, PISAConfigParser, OrderedDict, Pipeline)): pipelines = [pipelines] for pipeline in pipelines: if not isinstance(pipeline, Pipeline): pipeline = Pipeline(pipeline) self._pipelines.append(pipeline) data_run_livetime = None if set_livetime_from_data: # # Get livetime metadata if defined in any stage in any pipeline # for pipeline_idx, pipeline in enumerate(self): for stage_idx, stage in enumerate(pipeline): if not (hasattr(stage, "metadata") and isinstance(stage.metadata, Mapping) and "livetime" in stage.metadata): continue if data_run_livetime is None: data_run_livetime = stage.metadata["livetime"] if stage.metadata["livetime"] != data_run_livetime: raise ValueError( "Pipeline index {}, stage index {} has data" " livetime = {}, in disagreement with" " previously-found livetime = {}".format( pipeline_idx, stage_idx, stage.metadata["livetime"], data_run_livetime, )) # Save the last livetime found inside the pipeline's metadata # TODO: implement metadata in the pipeline class instead self.metadata['livetime'] = data_run_livetime # # Set param `params.livetime` for any pipelines that have it # if data_run_livetime is not None: data_run_livetime *= ureg.sec for pipeline_idx, pipeline in enumerate(self): if "livetime" not in pipeline.params.names: continue pipeline.params.livetime.is_fixed = True if pipeline.params.livetime != data_run_livetime: logging.warning( "Pipeline index %d has params.livetime = %s, in" " disagreement with data livetime = %s defined by" " data. The pipeline's livetime param will be" " reset to the latter value and set to be fixed" " (if it is not alredy).", pipeline_idx, pipeline.params.livetime.value, data_run_livetime, ) pipeline.params.livetime = data_run_livetime #for pipeline in self: # pipeline.select_params(self.param_selections, # error_on_missing=False) # Make sure that all the pipelines have the same detector name (or None) self._detector_name = 'no_name' for p in self._pipelines: name = p._detector_name if name != self._detector_name and self._detector_name != 'no_name': raise NameError( 'Different detector names in distribution_maker pipelines') self._detector_name = name
def main(return_outputs=False): """Run unit tests if `pipeline.py` is called as a script.""" from pisa.utils.plotter import Plotter args = parse_args() set_verbosity(args.v) # Even if user specifies an integer on command line, it comes in as a # string. Try to convert to int (e.g. if `'1'` is passed to indicate the # second stage), and -- if successful -- use this as `args.only_stage`. # Otherwise, the string value passed will be used (e.g. `'osc'` could be # passed). try: only_stage_int = int(args.only_stage) except (ValueError, TypeError): pass else: args.only_stage = only_stage_int if args.outdir: mkdir(args.outdir) else: if args.pdf or args.png: raise ValueError("No --outdir provided, so cannot save images.") # Most basic parsing of the pipeline config (parsing only to this level # allows for simple strings to be specified as args for updating) bcp = PISAConfigParser() bcp.read(args.pipeline) # Update the config with any args specified on command line if args.arg is not None: for arg_list in args.arg: if len(arg_list) < 2: raise ValueError( 'Args must be formatted as: "section arg=val". Got "%s"' " instead." % " ".join(arg_list)) section = arg_list[0] remainder = " ".join(arg_list[1:]) eq_split = remainder.split("=") newarg = eq_split[0].strip() value = ("=".join(eq_split[1:])).strip() logging.debug('Setting config section "%s" arg "%s" = "%s"', section, newarg, value) try: bcp.set(section, newarg, value) except NoSectionError: logging.error( 'Invalid section "%s" specified. Must be one of %s', section, bcp.sections(), ) raise # Instantiate the pipeline pipeline = Pipeline(bcp) # pylint: disable=redefined-outer-name if args.select is not None: pipeline.select_params(args.select, error_on_missing=True) if args.only_stage is None: stop_idx = args.stop_after_stage try: stop_idx = int(stop_idx) except (TypeError, ValueError): pass if isinstance(stop_idx, str): stop_idx = pipeline.index(stop_idx) outputs = pipeline.get_outputs(idx=stop_idx) # pylint: disable=redefined-outer-name if stop_idx is not None: stop_idx += 1 indices = slice(0, stop_idx) else: assert args.stop_after_stage is None idx = pipeline.index(args.only_stage) stage = pipeline[idx] indices = slice(idx, idx + 1) # Create dummy inputs if necessary inputs = None if hasattr(stage, "input_binning"): logging.warning( "Stage requires input, so building dummy" " inputs of random numbers, with random state set to the input" " index according to alphabetical ordering of input names and" " filled in alphabetical ordering of dimension names.") input_maps = [] tmp = deepcopy(stage.input_binning) alphabetical_binning = tmp.reorder_dimensions(sorted(tmp.names)) for input_num, input_name in enumerate(sorted(stage.input_names)): # Create a new map with all 3's; name according to the input hist = np.full(shape=alphabetical_binning.shape, fill_value=3.0) input_map = Map(name=input_name, binning=alphabetical_binning, hist=hist) # Apply Poisson fluctuations to randomize the values in the map input_map.fluctuate(method="poisson", random_state=input_num) # Reorder dimensions according to user's original binning spec input_map.reorder_dimensions(stage.input_binning) input_maps.append(input_map) inputs = MapSet(maps=input_maps, name="ones", hash=1) outputs = stage.run(inputs=inputs) for stage in pipeline[indices]: if not args.outdir: break stg_svc = stage.stage_name + "__" + stage.service_name fbase = os.path.join(args.outdir, stg_svc) if args.intermediate or stage == pipeline[indices][-1]: stage.outputs.to_json(fbase + "__output.json.bz2") # also only plot if args intermediate or last stage if args.intermediate or stage == pipeline[indices][-1]: formats = OrderedDict(png=args.png, pdf=args.pdf) if isinstance(stage.outputs, Data): # TODO(shivesh): plots made here will use the most recent # "pisa_weight" column and so all stages will have identical plots # (one workaround is to turn on "memcache_deepcopy") # TODO(shivesh): intermediate stages have no output binning if stage.output_binning is None: logging.debug("Skipping plot of intermediate stage %s", stage) continue outputs = stage.outputs.histogram_set( binning=stage.output_binning, nu_weights_col="pisa_weight", mu_weights_col="pisa_weight", noise_weights_col="pisa_weight", mapset_name=stg_svc, errors=True, ) try: for fmt, enabled in formats.items(): if not enabled: continue my_plotter = Plotter( stamp="Event rate", outdir=args.outdir, fmt=fmt, log=False, annotate=args.annotate, ) my_plotter.ratio = True my_plotter.plot_2d_array(outputs, fname=stg_svc + "__output", cmap="RdBu") except ValueError as exc: logging.error( "Failed to save plot to format %s. See exception" " message below", fmt, ) traceback.format_exc() logging.exception(exc) logging.warning("I can't go on, I'll go on.") if return_outputs: return pipeline, outputs
def _init_stages(self): """Stage factory: Instantiate stages specified by self.config. Conventions required for this to work: * Stage and service names must be lower-case * Service implementations must be found at Python path `pisa.stages.<stage_name>.<service_name>` * `service` cannot be an instantiation argument for a service """ stages = [] for stage_num, item in enumerate(self.config.items()): try: name, settings = item if isinstance(name, str): if name == 'pipeline': continue stage_name, service_name = name # old cfgs compatibility if service_name.startswith('pi_'): logging.warning( f"Old stage name `{service_name}` is automatically renamed to `{service_name.replace('pi_', '')}`. " + "Please change your config in the future!") service_name = service_name.replace('pi_', '') logging.debug("instantiating stage %s / service %s", stage_name, service_name) # Import service's module logging.trace( f"Importing service module: {stage_name}.{service_name}") try: module_path = f"pisa.stages.{stage_name}.{service_name}" module = import_module(module_path) except: logging.debug( f"Module {stage_name}.{service_name} not found in PISA, trying " "to import from external definition.") module_path = f"{stage_name}.{service_name}" module = import_module(module_path) # Get service class from module service_cls = getattr(module, service_name) # Instantiate service logging.trace( "initializing stage.service %s.%s with settings %s" % (stage_name, service_name, settings)) try: service = service_cls(**settings, profile=self._profile) except Exception: logging.error( "Failed to instantiate stage.service %s.%s with settings %s", stage_name, service_name, settings.keys(), ) raise if not isinstance(service, Stage): raise TypeError( 'Trying to create service "%s" for stage #%d (%s),' " but object %s instantiated from class %s is not a" " PISA Stage type but instead is of type %s." % ( service_name, stage_num, stage_name, service, service_cls, type(service), )) stages.append(service) except: logging.error( "Failed to initialize stage #%d (stage=%s, service=%s).", stage_num, stage_name, service_name, ) raise # set parameters with an identical name to the same object # otherwise we get inconsistent behaviour when setting repeated params # See Isues #566 and #648 all_parans = self.params self.update_params(all_parans, existing_must_match=True, extend=False) param_selections = set() for service in stages: param_selections.update(service.param_selections) param_selections = sorted(param_selections) for stage in stages: stage.select_params(param_selections, error_on_missing=False) self._stages = stages self.setup()
def test_example_pipelines(ignore_gpu=False, ignore_root=False, ignore_missing_data=False): """Run example pipelines. Parameters ---------- ignore_gpu : bool Do not count errors initializing a GPU as failures ignore_root : bool Do not count errors importing ROOT as failures ignore_missing_data : bool Do not count errors due to missing data files as failures """ # Set up the lists of strings needed to search the error messages for # things to ignore e.g. cuda stuff and ROOT stuff root_err_strings = ['ROOT', 'Roo', 'root', 'roo'] cuda_err_strings = ['cuda'] missing_data_string = ('Could not find resource "(.*)" in' ' filesystem OR in PISA package.') example_directory = find_resource('settings/pipeline') settings_files = glob.glob(example_directory + '/*example*.cfg') num_configs = len(settings_files) failure_count = 0 skip_count = 0 for settings_file in settings_files: allow_error = False msg = '' try: logging.info('Instantiating pipeline from file "%s" ...', settings_file) pipeline = Pipeline(settings_file) logging.info(' retrieving outputs...') _ = pipeline.get_outputs() except ImportError as err: exc = sys.exc_info() if any(errstr in err.message for errstr in root_err_strings) and \ ignore_root: skip_count += 1 allow_error = True msg = (' Skipping pipeline, %s, as it has ROOT dependencies' ' (ROOT cannot be imported)' % settings_file) elif any(errstr in err.message for errstr in cuda_err_strings) and \ ignore_gpu: skip_count += 1 allow_error = True msg = (' Skipping pipeline, %s, as it has cuda dependencies' ' (pycuda cannot be imported)' % settings_file) else: failure_count += 1 except IOError as err: exc = sys.exc_info() match = re.match(missing_data_string, err.message, re.M | re.I) if match is not None and ignore_missing_data: skip_count += 1 allow_error = True msg = (' Skipping pipeline, %s, as it has data that cannot' ' be found in the local PISA environment' % settings_file) else: failure_count += 1 except: # pylint: disable=bare-except exc = sys.exc_info() failure_count += 1 else: exc = None finally: if exc is not None: if allow_error: logging.warning(msg) else: logging.error( ' FAILURE! %s failed to run. Please review the' ' error message below and fix the problem. Continuing' ' with any other configs now...', settings_file) for line in format_exception(*exc): for sub_line in line.splitlines(): logging.error(' ' * 4 + sub_line) else: logging.info(' Seems fine!') if skip_count > 0: logging.warning('%d of %d example pipeline config files were skipped', skip_count, num_configs) if failure_count > 0: msg = ('<< FAIL : test_example_pipelines : (%d of %d EXAMPLE PIPELINE' ' CONFIG FILES FAILED) >>' % (failure_count, num_configs)) logging.error(msg) raise Exception(msg) logging.info('<< PASS : test_example_pipelines >>')
def parse_pipeline_config(config): """Parse pipeline config. Parameters ---------- config : string or ConfigParser Returns ------- stage_dicts : OrderedDict Keys are (stage_name, service_name) tuples and values are OrderedDicts with keys the argnames and values the arguments' values. Some known arg values are parsed out fully into Python objects, while the rest remain as strings that must be used or parsed elsewhere. """ # Note: imports placed here to avoid circular imports from pisa.core.binning import MultiDimBinning, OneDimBinning from pisa.core.param import ParamSelector if isinstance(config, basestring): config = from_file(config) elif isinstance(config, PISAConfigParser): pass else: raise TypeError( '`config` must either be a string or PISAConfigParser. Got %s ' 'instead.' % type(config)) if not config.has_section('binning'): raise NoSectionError( "Could not find 'binning'. Only found sections: %s" % config.sections()) # Create binning objects binning_dict = {} for name, value in config['binning'].items(): if name.endswith('.order'): order = split(config.get('binning', name)) binning, _ = split(name, sep='.') bins = [] for bin_name in order: try: def_raw = config.get('binning', binning + '.' + bin_name) except: dims_defined = [ split(dim, sep='.')[1] for dim in config['binning'].keys() if dim.startswith(binning + '.') and not dim.endswith('.order') ] logging.error( "Failed to find definition of '%s' dimension of '%s'" " binning entry. Only found definition(s) of: %s", bin_name, binning, dims_defined) del dims_defined raise try: kwargs = eval(def_raw) # pylint: disable=eval-used except: logging.error( "Failed to evaluate definition of '%s' dimension of" " '%s' binning entry:\n'%s'", bin_name, binning, def_raw) raise try: bins.append(OneDimBinning(bin_name, **kwargs)) except: logging.error( "Failed to instantiate new `OneDimBinning` from '%s'" " dimension of '%s' binning entry with definition:\n" "'%s'\n", bin_name, binning, kwargs) raise binning_dict[binning] = MultiDimBinning(bins) # Pipeline section section = 'pipeline' # Get and parse the order of the stages (and which services implement them) order = [split(x, STAGE_SEP) for x in split(config.get(section, 'order'))] param_selections = [] if config.has_option(section, 'param_selections'): param_selections = split(config.get(section, 'param_selections')) detector_name = None if config.has_option(section, 'detector_name'): detector_name = config.get(section, 'detector_name') # Parse [stage.<stage_name>] sections and store to stage_dicts stage_dicts = OrderedDict() for stage, service in order: old_section_header = 'stage%s%s' % (STAGE_SEP, stage) new_section_header = '%s%s%s' % (stage, STAGE_SEP, service) if config.has_section(old_section_header): logging.warning( '"%s" is an old-style section header, in the future use "%s"' % (old_section_header, new_section_header)) section = old_section_header elif config.has_section(new_section_header): section = new_section_header else: raise IOError( 'missing section in cfg for stage "%s" service "%s"' % (stage, service)) # Instantiate dict to store args to pass to this stage service_kwargs = OrderedDict() param_selector = ParamSelector(selections=param_selections) service_kwargs['params'] = param_selector n_params = 0 for fullname in config.options(section): try: value = config.get(section, fullname) except: logging.error( 'Unable to obtain value of option "%s" in section "%s".' % (fullname, section)) raise # See if this matches a param specification param_match = PARAM_RE.match(fullname) if param_match is not None: n_params += 1 param_match_dict = param_match.groupdict() param_subfields = param_match_dict['subfields'].split('.') # Figure out what the dotted fields represent... infodict = interpret_param_subfields(subfields=param_subfields) # If field is an attr, skip since these are located manually if infodict['attr'] is not None: continue # Check if this param already exists in a previous stage; if # so, make sure there are no specs for this param, but just a # link to previous the param object that is already # instantiated. for kw in stage_dicts.values(): # Stage did not get a `params` argument from config if not kw.has_key('params'): continue # Retrieve the param from the ParamSelector try: param = kw['params'].get(name=infodict['pname'], selector=infodict['selector']) except KeyError: continue # Make sure there are no other specs (in this section) for # the param defined defined in previous section for a in PARAM_ATTRS: if config.has_option(section, '%s.%s' % (fullname, a)): raise ValueError("Parameter spec. '%s' of '%s' " "found in section '%s', but " "parameter exists in previous " "stage!" % (a, fullname, section)) break # Param *not* found in a previous stage (i.e., no explicit # `break` encountered in `for` loop above); therefore must # instantiate it. else: param = parse_param(config=config, section=section, selector=infodict['selector'], fullname=fullname, pname=infodict['pname'], value=value) param_selector.update(param, selector=infodict['selector']) # If it's not a param spec but contains 'binning', assume it's a # binning spec for CAKE stages elif 'binning' in fullname: service_kwargs[fullname] = binning_dict[value] # it's gonna be a PI stage elif '_specs' in fullname: value = parse_string_literal(value) # is it None? if value is None: service_kwargs[fullname] = value # is it evts? elif value in ['evnts', 'events']: service_kwargs[fullname] = 'events' # so it gotta be a binning else: service_kwargs[fullname] = binning_dict[value] # it's a list on in/output names list elif fullname.endswith('_names'): value = split(value) service_kwargs[fullname] = value # Otherwise it's some other stage instantiation argument; identify # this by its full name and try to interpret and instantiate a # Python object using the string else: try: value = parse_quantity(value) value = value.nominal_value * value.units except ValueError: value = parse_string_literal(value) service_kwargs[fullname] = value # If no params actually specified in config, remove 'params' from the # service's keyword args if n_params == 0: service_kwargs.pop('params') # Store the service's kwargs to the stage_dicts stage_dicts[(stage, service)] = service_kwargs stage_dicts['detector_name'] = detector_name return stage_dicts
def main(): args = parse_args() init_args_d = vars(args) # NOTE: Removing extraneous args that won't get passed to instantiate the # HypoTesting object via dictionary's `pop()` method. set_verbosity(init_args_d.pop('v')) detector = init_args_d.pop('detector') selection = init_args_d.pop('selection') atype = init_args_d.pop('atype') return_total = not init_args_d.pop('return_bits') # Normalize and convert `*_pipeline` filenames; store to `*_maker` # (which is argument naming convention that HypoTesting init accepts). for maker in ['h0', 'h1']: filenames = init_args_d.pop(maker + '_pipeline') if filenames is not None: filenames = sorted( [normcheckpath(fname) for fname in filenames] ) init_args_d[maker + '_maker'] = filenames ps_name = maker + '_param_selections' ps_str = init_args_d[ps_name] if ps_str is None: ps_list = None else: ps_list = [x.strip().lower() for x in ps_str.split(',')] init_args_d[ps_name] = ps_list # Add dummies to the argument we don't care about for making these plots init_args_d['minimizer_settings'] = {} init_args_d['data_is_data'] = None init_args_d['fluctuate_data'] = None init_args_d['fluctuate_fid'] = None init_args_d['metric'] = 'chi2' if init_args_d['h1_maker'] is None: init_args_d['h1_maker'] = init_args_d['h0_maker'] init_args_d['h0_maker'] = DistributionMaker(init_args_d['h0_maker']) init_args_d['h1_maker'] = DistributionMaker(init_args_d['h1_maker']) init_args_d['h1_maker'].select_params(init_args_d['h1_param_selections']) # Instantiate the analysis object hypo_testing = HypoTesting(**init_args_d) h0_maker = hypo_testing.h0_maker h0_maker.select_params(init_args_d['h0_param_selections']) for h0_pipeline in h0_maker.pipelines: # Need a special case where PID is a separate stage if 'pid' in h0_pipeline.stage_names: if return_total: raise ValueError( "PID is a separate stage but you have requested" " return_total in the arguments to this script." ) return_h0_sum = False else: return_h0_sum = return_total h0_maps = h0_maker.get_outputs(return_sum=return_h0_sum) # Assume just a singular pipeline used here. # Not sure how else to deal with PID as a separate stage. if not return_h0_sum: h0_maps = h0_maps[0] h1_maker = hypo_testing.h1_maker h1_maker.select_params(init_args_d['h1_param_selections']) for h1_pipeline in h1_maker.pipelines: # Need a special case where PID is a separate stage if 'pid' in h1_pipeline.stage_names: if return_total: raise ValueError( "PID is a separate stage but you have requested" " return_total in the arguments to this script." ) return_h1_sum = False else: return_h1_sum = return_total h1_maps = h1_maker.get_outputs(return_sum=return_h1_sum) # Assume just a singular pipeline used here. # Not sure how else to deal with PID as a separate stage. if not return_h1_sum: h1_maps = h1_maps[0] if not sorted(h0_maps.names) == sorted(h1_maps.names): raise ValueError( "The output names of your h0 and h1 pipelines " "do not agree - %s and %s."%( sorted(h0_maps.names), sorted(h1_maps.names) ) ) det_sel = [] if detector.strip() != '': det_sel.append(detector.strip()) if selection.strip() != '': det_sel.append(selection.strip()) det_sel_label = ' '.join(det_sel) det_sel_plot_label = det_sel_label if det_sel_plot_label != '': det_sel_plot_label += ', ' det_sel_file_label = det_sel_label if det_sel_file_label != '': det_sel_file_label += '_' det_sel_file_label = det_sel_file_label.replace(' ', '_') # Need a special case where PID is a separate stage if fnmatch(''.join(h0_maps.names), '*_tr*ck*'): h0_trck_map = h0_maps.combine_wildcard('*_tr*ck') h1_trck_map = h1_maps.combine_wildcard('*_tr*ck') h0_cscd_map = h0_maps.combine_wildcard('*_c*sc*d*') h1_cscd_map = h1_maps.combine_wildcard('*_c*sc*d*') plot_asymmetry( h0_map=h0_trck_map, h1_map=h1_trck_map, h0_name='%s' % args.h0_name, h1_name='%s' % args.h1_name, fulltitle='%sevents identified as track' % det_sel_plot_label, savename='%strck' % det_sel_file_label, outdir=args.logdir, atype=atype ) plot_asymmetry( h0_map=h0_cscd_map, h1_map=h1_cscd_map, h0_name='%s' % args.h0_name, h1_name='%s' % args.h1_name, fulltitle=('%sevents identified as cascade' % det_sel_plot_label), savename='%scscd' % det_sel_file_label, outdir=args.logdir, atype=atype ) # Otherwise, PID is assumed to be a binning dimension elif 'pid' in h0_maps[h0_maps.names[0]].binning.names: for map_name in h0_maps.names: h0_map = h0_maps[map_name] h0_map.set_errors(error_hist=None) h1_map = h1_maps[map_name] h1_map.set_errors(error_hist=None) pid_names = h0_map.binning['pid'].bin_names if pid_names != h1_map.binning['pid'].bin_names: raise ValueError( "h0 and h1 maps must have same PID bin names" " in order to make the asymmetry plots" ) if pid_names is None: logging.warning( "There are no names given for the PID bins, thus " "they will just be numbered in both the the plot " "save names and titles." ) pid_names = [ x for x in range(0, h0_map.binning['pid'].num_bins) ] for pid_name in pid_names: h0_to_plot = h0_map.split( dim='pid', bin=pid_name ) h1_to_plot = h1_map.split( dim='pid', bin=pid_name ) if isinstance(pid_name, int): pid_name = 'PID Bin %i' % (pid_name) plot_asymmetry( h0_map=h0_to_plot, h1_map=h1_to_plot, h0_name='%s' % args.h0_name, h1_name='%s' % args.h1_name, fulltitle=('%sevents identified as %s' % (det_sel_plot_label, pid_name)), savename=('%s_%s%s' % (map_name, det_sel_file_label, pid_name)), outdir=args.logdir, atype=atype ) else: for map_name in h0_maps.names: h0_map = h0_maps[map_name] h0_map.set_errors(error_hist=None) h1_map = h1_maps[map_name] h1_map.set_errors(error_hist=None) plot_asymmetry( h0_map=h0_map, h1_map=h1_map, h0_name='%s' % args.h0_name, h1_name='%s' % args.h1_name, fulltitle=('%sevents'%(det_sel_plot_label)), savename=('%s_%s' % (map_name, det_sel_file_label)), outdir=args.logdir, atype=atype )
def test_kde_histogramdd(): """Unit tests for kde_histogramdd""" from argparse import ArgumentParser from shutil import rmtree from tempfile import mkdtemp from pisa import ureg from pisa.core.map import Map, MapSet from pisa.utils.log import logging, set_verbosity from pisa.utils.plotter import Plotter parser = ArgumentParser() parser.add_argument("-v", action="count", default=None, help="set verbosity level") args = parser.parse_args() set_verbosity(args.v) temp_dir = mkdtemp() try: my_plotter = Plotter( stamp="", outdir=temp_dir, fmt="pdf", log=False, annotate=False, symmetric=False, ratio=True, ) b1 = OneDimBinning(name="coszen", num_bins=20, is_lin=True, domain=[-1, 1], tex=r"\cos(\theta)") b2 = OneDimBinning(name="energy", num_bins=10, is_log=True, domain=[1, 80] * ureg.GeV, tex=r"E") b3 = OneDimBinning(name="pid", num_bins=2, bin_edges=[0, 1, 2], tex=r"pid") binning = b1 * b2 * b3 # now let's generate some toy data N = 100000 cz = np.random.normal(1, 1.2, N) # cut away coszen outside -1, 1 cz = cz[(cz >= -1) & (cz <= 1)] e = np.random.normal(30, 20, len(cz)) pid = np.random.uniform(0, 2, len(cz)) data = np.array([cz, e, pid]).T # make numpy histogram for validation bins = [unp.nominal_values(b.bin_edges) for b in binning] raw_hist, _ = np.histogramdd(data, bins=bins) # get KDE'ed histo hist = kde_histogramdd( data, binning, bw_method="silverman", coszen_name="coszen", oversample=10, use_cuda=True, stack_pid=True, ) # put into mapsets and plot m1 = Map(name="KDE", hist=hist, binning=binning) m2 = Map(name="raw", hist=raw_hist, binning=binning) with np.errstate(divide="ignore", invalid="ignore"): m3 = m2 / m1 m3.name = "hist/KDE" m3.tex = m3.name m4 = m1 - m2 m4.name = "KDE - hist" m4.tex = m4.name ms = MapSet([m1, m2, m3, m4]) my_plotter.plot_2d_array(ms, fname="test_kde", cmap="summer") except: rmtree(temp_dir) raise else: logging.warning("Inspect and manually clean up output(s) saved to %s" % temp_dir)
def store_recursively(fhandle, node, path=None, attrs=None, node_hashes=None): """Function for iteratively doing the work""" path = [] if path is None else path full_path = '/' + '/'.join(path) node_hashes = OrderedDict() if node_hashes is None else node_hashes if attrs is None: sorted_attr_keys = [] else: if isinstance(attrs, OrderedDict): sorted_attr_keys = attrs.keys() else: sorted_attr_keys = sorted(attrs.keys()) if isinstance(node, Mapping): logging.trace(' creating Group "%s"', full_path) try: dset = fhandle.create_group(full_path) for key in sorted_attr_keys: dset.attrs[key] = attrs[key] except ValueError: pass for key in sorted(node.keys()): if isinstance(key, str): key_str = key else: key_str = str(key) logging.warning( 'Making string from key "%s", %s for use as' ' name in HDF5 file', key_str, type(key) ) val = node[key] new_path = path + [key_str] store_recursively(fhandle=fhandle, node=val, path=new_path, node_hashes=node_hashes) else: # Check for existing node node_hash = hash_obj(node) if node_hash in node_hashes: logging.trace(' creating hardlink for Dataset: "%s" -> "%s"', full_path, node_hashes[node_hash]) # Hardlink the matching existing dataset fhandle[full_path] = fhandle[node_hashes[node_hash]] return # For now, convert None to np.nan since h5py appears to not handle # None if node is None: node = np.nan logging.warning( ' encountered `None` at node "%s"; converting to' ' np.nan', full_path ) # "Scalar datasets don't support chunk/filter options". Shuffling # is a good idea otherwise since subsequent compression will # generally benefit; shuffling requires chunking. Compression is # not done here since it is slow, but can be done by # post-processing the generated file(s). if np.isscalar(node): shuffle = False chunks = None else: shuffle = True chunks = True # Store the node_hash for linking to later if this is more than # a scalar datatype. Assumed that "None" has node_hashes[node_hash] = full_path # -- Handle special types -- # # See h5py docs at # # https://docs.h5py.org/en/stable/strings.html#how-to-store-text-strings # # where using `bytes` objects (i.e., in numpy, np.string_) is # deemed the most compatible way to encode objects, but apparently # we don't have pytables compatibility right now. # # For boolean support, see # # https://docs.h5py.org/en/stable/faq.html#faq # TODO: make written hdf5 files compatible with pytables # see docs at https://www.pytables.org/usersguide/datatypes.html if isinstance(node, string_types): node = np.string_(node) elif isinstance(node, bool): # includes np.bool node = np.bool_(node) # same as np.bool8 elif isinstance(node, np.ndarray): if issubclass(node.dtype.type, string_types): node = node.astype(np.string_) elif node.dtype.type in (bool, np.bool): node = node.astype(np.bool_) logging.trace(' creating dataset at path "%s", hash %s', full_path, node_hash) try: dset = fhandle.create_dataset( name=full_path, data=node, chunks=chunks, compression=None, shuffle=shuffle, fletcher32=False ) except TypeError: try: shuffle = False chunks = None dset = fhandle.create_dataset( name=full_path, data=node, chunks=chunks, compression=None, shuffle=shuffle, fletcher32=False ) except Exception: logging.error(' full_path: "%s"', full_path) logging.error(' chunks : %s', str(chunks)) logging.error(' shuffle : %s', str(shuffle)) logging.error(' node : "%s"', str(node)) raise for key in sorted_attr_keys: dset.attrs[key] = attrs[key]
def run_unit_tests(path=PISA_PATH, allow_missing=OPTIONAL_MODULES, verbosity=Levels.WARN): """Run all tests found at `path` (or recursively below if `path` is a directory). Each module is imported and each test function is run initially with `set_verbosity(verbosity)`, but if an exception is caught, the module is re-imported or the test function is re-run with `set_verbosity(Levels.TRACE)`, then the traceback from the (original) exception emitted is displayed. Parameters ---------- path : str Path to file or directory allow_missing : None or sequence of str verbosity : int in pisa.utils.log.Levels Raises ------ Exception If any import or test fails not in `allow_missing` """ set_verbosity(verbosity) logging.info("%sPlatform information:", PFX) logging.info("%s HOSTNAME = %s", PFX, socket.gethostname()) logging.info("%s FQDN = %s", PFX, socket.getfqdn()) logging.info("%s OS = %s %s", PFX, platform.system(), platform.release()) for key, val in cpuinfo.get_cpu_info().items(): logging.info("%s %s = %s", PFX, key, val) logging.info(PFX) logging.info("%sModule versions:", PFX) for module_name in REQUIRED_MODULES + OPTIONAL_MODULES: try: module = import_module(module_name) except ImportError: if module_name in REQUIRED_MODULES: raise ver = "optional module not installed or not import-able" else: if hasattr(module, "__version__"): ver = module.__version__ else: ver = "?" logging.info("%s %s : %s", PFX, module_name, ver) logging.info(PFX) path = expand(path, absolute=True, resolve_symlinks=True) if allow_missing is None: allow_missing = [] elif isinstance(allow_missing, str): allow_missing = [allow_missing] tests = find_unit_tests(path) module_pypaths_succeeded = [] module_pypaths_failed = [] module_pypaths_failed_ignored = [] test_pypaths_succeeded = [] test_pypaths_failed = [] test_pypaths_failed_ignored = [] for rel_file_path, test_func_names in tests.items(): pypath = ["pisa"] + rel_file_path[:-3].split("/") parent_pypath = ".".join(pypath[:-1]) module_name = pypath[-1].replace(".", "_") module_pypath = f"{parent_pypath}.{module_name}" try: set_verbosity(verbosity) logging.info(PFX + f"importing {module_pypath}") set_verbosity(Levels.WARN) module = import_module(module_pypath, package=parent_pypath) except Exception as err: if (isinstance(err, ImportError) and hasattr(err, "name") and err.name in allow_missing # pylint: disable=no-member ): err_name = err.name # pylint: disable=no-member module_pypaths_failed_ignored.append(module_pypath) logging.warning( f"{PFX}module {err_name} failed to import wile importing" f" {module_pypath}, but ok to ignore") continue module_pypaths_failed.append(module_pypath) set_verbosity(verbosity) msg = f"<< FAILURE IMPORTING : {module_pypath} >>" logging.error(PFX + "=" * len(msg)) logging.error(PFX + msg) logging.error(PFX + "=" * len(msg)) # Reproduce the failure with full output set_verbosity(Levels.TRACE) try: import_module(module_name, package=parent_pypath) except Exception: pass set_verbosity(Levels.TRACE) logging.exception(err) set_verbosity(verbosity) logging.error(PFX + "#" * len(msg)) continue else: module_pypaths_succeeded.append(module_pypath) for test_func_name in test_func_names: test_pypath = f"{module_pypath}.{test_func_name}" try: set_verbosity(verbosity) logging.debug(PFX + f"getattr({module}, {test_func_name})") set_verbosity(Levels.WARN) test_func = getattr(module, test_func_name) # Run the test function set_verbosity(verbosity) logging.info(PFX + f"{test_pypath}()") set_verbosity(Levels.WARN) test_func() except Exception as err: if (isinstance(err, ImportError) and hasattr(err, "name") and err.name in allow_missing # pylint: disable=no-member ): err_name = err.name # pylint: disable=no-member test_pypaths_failed_ignored.append(module_pypath) logging.warning( PFX + f"{test_pypath} failed because module {err_name} failed to" + f" load, but ok to ignore") continue test_pypaths_failed.append(test_pypath) set_verbosity(verbosity) msg = f"<< FAILURE RUNNING : {test_pypath} >>" logging.error(PFX + "=" * len(msg)) logging.error(PFX + msg) logging.error(PFX + "=" * len(msg)) # Reproduce the error with full output set_verbosity(Levels.TRACE) try: test_func = getattr(module, test_func_name) with np.printoptions( precision=np.finfo(pisa.FTYPE).precision + 2, floatmode="fixed", sign=" ", linewidth=200, ): test_func() except Exception: pass set_verbosity(Levels.TRACE) logging.exception(err) set_verbosity(verbosity) logging.error(PFX + "#" * len(msg)) else: test_pypaths_succeeded.append(test_pypath) finally: # remove references to the test function, e.g. to remove refs # to pycuda / numba.cuda contexts so these can be closed try: del test_func except NameError: pass # NOTE: Until we get all GPU code into Numba, need to unload pycuda # and/or numba.cuda contexts before a module requiring the other one is # to be imported. # NOTE: the following causes a traceback to be emitted at the very end # of the script, regardless of the exception catching here. if (pisa.TARGET == "cuda" and pycuda is not None and hasattr(pycuda, "autoinit") and hasattr(pycuda.autoinit, "context")): try: pycuda.autoinit.context.detach() except Exception: pass # Attempt to unload the imported module # TODO: pipeline, etc. fail as isinstance(service, (Stage, PiStage)) is False #if module_pypath in sys.modules and module_pypath != "pisa": # del sys.modules[module_pypath] #del module # TODO: crashes program; subseqeunt calls in same shell crash(!?!?) # if pisa.TARGET == 'cuda' and nbcuda is not None: # try: # nbcuda.close() # except Exception: # pass # Summarize results n_import_successes = len(module_pypaths_succeeded) n_import_failures = len(module_pypaths_failed) n_import_failures_ignored = len(module_pypaths_failed_ignored) n_test_successes = len(test_pypaths_succeeded) n_test_failures = len(test_pypaths_failed) n_test_failures_ignored = len(test_pypaths_failed_ignored) set_verbosity(verbosity) logging.info( PFX + f"<< IMPORT TESTS : {n_import_successes} imported," f" {n_import_failures} failed," f" {n_import_failures_ignored} failed to import but ok to ignore >>") logging.info(PFX + f"<< UNIT TESTS : {n_test_successes} succeeded," f" {n_test_failures} failed," f" {n_test_failures_ignored} failed but ok to ignore >>") # Exit with error if any failures (import or unit test) if module_pypaths_failed or test_pypaths_failed: msgs = [] if module_pypaths_failed: msgs.append( f"{n_import_failures} module(s) failed to import:\n " + ", ".join(module_pypaths_failed)) if test_pypaths_failed: msgs.append(f"{n_test_failures} unit test(s) failed:\n " + ", ".join(test_pypaths_failed)) # Note the extra newlines before the exception to make it stand out; # and newlines after the exception are due to the pycuda error message # that is emitted when we call pycuda.autoinit.context.detach() sys.stdout.flush() sys.stderr.write("\n\n\n") raise Exception("\n".join(msgs) + "\n\n\n")
def run_minimizer(self, pprint=True, skip=False): # Get initial values x0 = self.template_maker.params.free._rescaled_values # bfgs steps outside of given bounds by 1 epsilon to evaluate gradients try: epsilon = self.minimizer_settings['options']['value']['eps'] except: epsilon = self.minimizer_settings['options']['value']['epsilon'] bounds = [(0 + epsilon, 1 - epsilon)] * len(x0) logging.info('running the %s optimizer' % self.minimizer_settings['method']['value']) # Using scipy.opt.minimize allows a whole host of minimisers to be used # This set by the method value in your minimiser settings file self.n_minimizer_calls = 0 if skip: best_fit_vals = x0 metric_val = self._minimizer_callable(x0, False) dict_flags = { 'warnflag': 0, 'task': 'skip', 'funcalls': 0, 'nit': 0, 'avg_tmp_time': 0, 'n_minimizer_calls': 0 } else: start_t = time.time() minim_result = opt.minimize( fun=self._minimizer_callable, x0=x0, args=(pprint, ), bounds=bounds, method=self.minimizer_settings['method']['value'], options=self.minimizer_settings['options']['value']) # get aditional metrics: end_t = time.time() if pprint: # clear the line print('') print( '\naverage template generation time during minimizer run: %.4f ms' % ((end_t - start_t) * 1000. / self.n_minimizer_calls)) avg_tmp_time = (end_t - start_t) * 1000. / self.n_minimizer_calls best_fit_vals = minim_result.x metric_val = minim_result.fun template = self.template_maker.get_outputs() template = [t.combine_wildcard('*') for t in template] template[0].name = 'total' dict_flags = {} mod_chi2_val = ( self.pseudodata.metric_total(expected_values=template, metric='mod_chi2') + template_maker.params.priors_penalty(metric='mod_chi2')) dict_flags['agreement_mod_chi2'] = mod_chi2_val dict_flags['warnflag'] = minim_result.status dict_flags['task'] = minim_result.message if 'jac' in minim_result: dict_flags['grad'] = minim_result.jac dict_flags['funcalls'] = minim_result.nfev dict_flags['nit'] = minim_result.nit dict_flags['avg_tmp_time'] = avg_tmp_time dict_flags['n_minimizer_calls'] = self.n_minimizer_calls if dict_flags['warnflag'] > 0: logging.warning(str(dict_flags)) all_metrics = {} template = self.template_maker.get_outputs() template = [t.combine_wildcard('*') for t in template] template[0].name = 'total' #for metric in ['llh', 'conv_llh', 'barlow_llh','chi2', 'mod_chi2']: for metric in ['llh', 'chi2']: all_metrics[metric] = self.pseudodata.metric_total( expected_values=template, metric=metric) + template_maker.params.priors_penalty( metric=metric) return best_fit_vals, metric_val, all_metrics, dict_flags
def plot_map_comparisons(ref_map, new_map, ref_abv, new_abv, outdir, subdir, name, texname, stagename, servicename, shorttitles=False, ftype='png'): """Plot comparisons between two identically-binned PISA 3 style maps""" path = [outdir] if subdir is None: subdir = stagename.lower() path.append(subdir) if outdir is not None: mkdir(os.path.join(*path), warn=False) if stagename is not None: fname = ['%s_%s_comparisons' %(ref_abv.lower(), new_abv.lower()), 'stage_'+stagename] else: fname = ['%s_%s_comparisons' %(ref_abv.lower(), new_abv.lower())] if servicename is not None: fname.append('service_'+servicename) if name is not None: fname.append(name.lower()) fname = '__'.join(fname) + '.' + ftype path.append(fname) basetitle = [] if stagename is not None: basetitle.append('%s' % stagename) if texname is not None: basetitle.append(r'$%s$' % texname) basetitle = ' '.join(basetitle) validate_map_objs(new_map, ref_map) with np.errstate(divide='ignore', invalid='ignore'): ratio_map = new_map/ref_map diff_map = new_map - ref_map with np.errstate(divide='ignore', invalid='ignore'): diff_ratio_map = diff_map/ref_map max_diff_ratio = np.nanmax(np.abs(diff_ratio_map.hist)) # Handle cases where ratio returns infinite # This isn't necessarily a fail, since all it means is the referene was # zero If the new value is sufficiently close to zero then it's still fine if max_diff_ratio == float('inf'): logging.warning( 'Infinite value found in ratio tests. Difference tests ' 'now also being calculated' ) # First find all the finite elements finite_map = np.isfinite(diff_ratio_map.hist) # Then find the nanmax of this, will be our new test value max_diff_ratio = np.nanmax(np.abs(diff_ratio_map.hist[finite_map])) # Also find all the infinite elements infinite_map = np.logical_not(finite_map) # This will be a second test value max_diff = np.nanmax(np.abs(diff_map.hist[infinite_map])) else: # Without any infinite elements we can ignore this second test max_diff = 0.0 if outdir is not None: gridspec_kw = dict(left=0.03, right=0.968, wspace=0.32) fig, axes = plt.subplots(nrows=1, ncols=5, gridspec_kw=gridspec_kw, sharex=False, sharey=False, figsize=(20, 5)) if shorttitles: ref_map.plot( fig=fig, ax=axes[0], title=basetitle+' '+ref_abv+' (A)', cmap=plt.cm.afmhot ) new_map.plot( fig=fig, ax=axes[1], title=basetitle+' '+new_abv+' (B)', cmap=plt.cm.afmhot ) ratio_map.plot( fig=fig, ax=axes[2], title='A/B', cmap=plt.cm.afmhot ) diff_map.plot( fig=fig, ax=axes[3], title='A-B', symm=True, cmap=plt.cm.seismic ) diff_ratio_map.plot( fig=fig, ax=axes[4], title='(A-B)/A', symm=True, cmap=plt.cm.seismic ) else: ref_map.plot( fig=fig, ax=axes[0], title=basetitle+' '+ref_abv, cmap=plt.cm.afmhot ) new_map.plot( fig=fig, ax=axes[1], title=basetitle+' '+new_abv, cmap=plt.cm.afmhot ) ratio_map.plot( fig=fig, ax=axes[2], title=basetitle+' %s/%s' %(new_abv, ref_abv), cmap=plt.cm.afmhot ) diff_map.plot( fig=fig, ax=axes[3], title=basetitle+' %s-%s' %(new_abv, ref_abv), symm=True, cmap=plt.cm.seismic ) diff_ratio_map.plot( fig=fig, ax=axes[4], title=basetitle+' (%s-%s)/%s' %(new_abv, ref_abv, ref_abv), symm=True, cmap=plt.cm.seismic ) logging.debug('>>>> Plot for inspection saved at %s' %os.path.join(*path)) fig.savefig(os.path.join(*path)) plt.close(fig.number) return max_diff_ratio, max_diff
def get_outputs(self, output_mode=None, force_standard_output=True): """Get the outputs of the PISA stage Depending on `self.output_mode`, this may be a binned object, or the event container itself add option to force an output mode force_standard_output: in binned mode, force the return of a single mapset """ # Figure out if the user has specifiec an output mode if output_mode is None: output_mode = self.output_mode else: assert output_mode == 'binned' or output_mode == 'events', 'ERROR: user-specified output mode is unrecognized' # Handle the binned case if output_mode == 'binned': if force_standard_output: # If we want the error on the map counts to be specified by something # other than something called "error" use the key specified in map_output_key # (see pi_resample for an example) if self.map_output_key: self.outputs = self.data.get_mapset( self.map_output_key, error=self.map_output_error_key, ) # Very specific case where the output has two keys and one of them is error (compatibility) elif len(self.output_apply_keys ) == 2 and 'errors' in self.output_apply_keys: other_key = [ key for key in self.output_apply_keys if not key == 'errors' ][0] self.outputs = self.data.get_mapset(other_key, error='errors') # return the first key in output_apply_key as the map output. add errors to the # map only if "errors" is part of the list of output keys else: if 'errors' in self.output_apply_keys: self.outputs = self.data.get_mapset( self.output_apply_keys[0], error='errors') else: self.outputs = self.data.get_mapset( self.output_apply_keys[0]) # More generally: produce one map per output key desired, in a dict else: self.outputs = OrderedDict() for key in self.output_apply_keys: self.outputs[key] = self.data.get_mapset(key) # Handle Events mode elif output_mode == "events": self.outputs = self.data # Throw warning that output mode failed else: self.outputs = None logging.warning( 'pi_stage.py: Cannot create CAKE style output mapset') return self.outputs
def plot_cmp(new, ref, new_label, ref_label, plot_label, file_label, outdir, ftype='png'): """Plot comparisons between two (identically-binned) maps or map sets. Parameters ---------- new : Map or MapSet ref : Map or MapSet new_label : str ref_label : str plot_label : str file_label : str outdir : str ftype : str """ path = [outdir] if isinstance(ref, Map): assert isinstance(new, Map) ref_maps = [ref] new_maps = [new] if outdir is not None: mkdir(os.path.join(*path), warn=False) for ref, new in zip(ref_maps, new_maps): assert ref.binning == new.binning fname = get_valid_filename( '__'.join([ get_valid_filename(file_label), '%s_vs_%s' %(get_valid_filename(new_label.lower()), get_valid_filename(ref_label.lower())) ]) + '.' + ftype ) path.append(fname) ratio = new / ref diff = new - ref fract_diff = diff / ref finite_ratio = ratio.hist[np.isfinite(ratio.hist)] ratio_mean = np.mean(finite_ratio) ratio_median = np.median(finite_ratio) finite_diff = diff.hist[np.isfinite(diff.hist)] diff_mean = np.mean(finite_diff) diff_median = np.median(finite_diff) finite_fract_diff = fract_diff.hist[np.isfinite(fract_diff.hist)] fract_diff_mean = np.mean(finite_fract_diff) fract_diff_median = np.median(finite_fract_diff) max_diff_ratio = np.nanmax(fract_diff.hist) # Handle cases where ratio returns infinite # This isn't necessarily a fail, since all it means is the referene was # zero. If the new value is sufficiently close to zero then it's stil # fine. if max_diff_ratio == np.inf: logging.warning( 'Infinite value found in ratio tests. Difference tests' ' now also being calculated' ) # First find all the finite elements finite_mask = np.isfinite(fract_diff.hist) # Then find the nanmax of this, will be our new test value max_diff_ratio = np.nanmax(fract_diff.hist[finite_mask]) # Also find all the infinite elements; compute a second test value max_diff = np.nanmax(diff.hist[~finite_mask]) else: # Without any infinite elements we can ignore this second test max_diff = 0.0 if outdir is not None: if new.binning.num_dims == 2: n_dims = 2 n_third_dim_bins = 1 elif new.binning.num_dims == 3: n_dims = 3 odd_dim_idx = new.binning.shape.index(np.min(new.binning.shape)) logging.debug('odd_dim_idx: %s', odd_dim_idx) n_third_dim_bins = new.binning.shape[odd_dim_idx] gridspec_kw = dict(left=0.03, right=0.968, wspace=0.32) fig, axes = plt.subplots(nrows=n_third_dim_bins, ncols=5, gridspec_kw=gridspec_kw, squeeze=False, sharex=False, sharey=False, figsize=(20, 5)) refslice = ref newslice = new bin_names = None if n_dims == 3: if odd_dim_idx != 0: refslice = np.moveaxis(ref, source=odd_dim_idx, destination=0) newslice = np.moveaxis(new, source=odd_dim_idx, destination=0) bin_names = new.binning.dims[odd_dim_idx].bin_names for odd_bin_idx in range(n_third_dim_bins): if n_dims == 2: thisbin_ref = refslice thisbin_new = newslice tmp_ref_label = ref_label tmp_new_label = new_label elif n_dims == 3: thisbin_ref = refslice[odd_bin_idx, ...].squeeze() thisbin_new = newslice[odd_bin_idx, ...].squeeze() if bin_names is not None: suffix = bin_names[odd_bin_idx] else: suffix = format(odd_bin_idx, 'd') tmp_new_label = new_label + ' ' + suffix tmp_ref_label = ref_label + ' ' + suffix ratio = thisbin_new / thisbin_ref diff = thisbin_new - thisbin_ref fract_diff = diff / thisbin_ref refmax = np.nanmax(thisbin_ref.hist) newmax = np.nanmax(thisbin_new.hist) vmax = refmax if refmax > newmax else newmax baseplot2(map=thisbin_new, title=tmp_new_label, vmax=vmax, evtrate=True, ax=axes[odd_bin_idx][0]) baseplot2(map=thisbin_ref, title=tmp_ref_label, vmax=vmax, evtrate=True, ax=axes[odd_bin_idx][1]) ax, _, _ = baseplot2(map=ratio, title='%s/%s' %(tmp_new_label, tmp_ref_label), ax=axes[odd_bin_idx][2]) ax.text(0.95, 0.95, "Mean: %.6f"%ratio_mean, horizontalalignment='right', transform=ax.transAxes, color=(0, 0.8, 0.8)) ax.text(0.95, 0.91, "Median: %.6f"%ratio_median, horizontalalignment='right', transform=ax.transAxes, color=(0, 0.8, 0.8)) ax, _, _ = baseplot2(map=diff, title='%s-%s' %(tmp_new_label, tmp_ref_label), symm=True, ax=axes[odd_bin_idx][3]) ax.text(0.95, 0.95, "Mean: %.6f"%diff_mean, horizontalalignment='right', transform=ax.transAxes) ax.text(0.95, 0.91, "Median: %.6f"%diff_median, horizontalalignment='right', transform=ax.transAxes) ax, _, _ = baseplot2(map=fract_diff, title='(%s-%s)/%s' %(tmp_new_label, tmp_ref_label, tmp_ref_label), symm=True, ax=axes[odd_bin_idx][4]) ax.text(0.95, 0.95, "Mean: %.6f"%fract_diff_mean, horizontalalignment='right', transform=ax.transAxes) ax.text(0.95, 0.91, "Median: %.6f"%fract_diff_median, horizontalalignment='right', transform=ax.transAxes) logging.debug('>>>> Plot for inspection saved at %s' %os.path.join(*path)) fig.savefig(os.path.join(*path)) plt.close(fig.number) return max_diff_ratio, max_diff
def __init__( self, use_transforms, params=None, expected_params=None, input_names=None, output_names=None, error_method=None, disk_cache=None, memcache_deepcopy=True, transforms_cache_depth=10, outputs_cache_depth=0, input_binning=None, output_binning=None, debug_mode=None, ): # Allow for string inputs, but have to populate into lists for # consistent interfacing to one or multiple of these things logging.warning('This is a cake-style PISA stage, which is DEPRECATED!') self.use_transforms = use_transforms """Whether or not stage uses transforms""" self._events_hash = None self.input_binning = input_binning self.output_binning = output_binning self.validate_binning() # init base class! super(Stage, self).__init__( params=params, expected_params=expected_params, input_names=input_names, output_names=output_names, debug_mode=debug_mode, error_method=error_method, ) # Storage of latest transforms and outputs; default to empty # TransformSet and None, respectively. self.transforms = TransformSet([]) """A stage that takes to-be-transformed inputs and has had these transforms computed stores them here. Before computation, `transforms` is an empty TransformSet; a stage that does not make use of these (such as a no-input stage) has an empty TransformSet.""" self.memcache_deepcopy = memcache_deepcopy self.transforms_cache_depth = int(transforms_cache_depth) self.transforms_cache = None """Memory cache object for storing transforms""" self.nominal_transforms_cache = None """Memory cache object for storing nominal transforms""" self.full_hash = True """Whether to do full hashing if true, otherwise do fast hashing""" self.transforms_cache = MemoryCache( max_depth=self.transforms_cache_depth, is_lru=True, deepcopy=self.memcache_deepcopy, ) self.nominal_transforms_cache = MemoryCache( max_depth=self.transforms_cache_depth, is_lru=True, deepcopy=self.memcache_deepcopy, ) self.outputs_cache_depth = int(outputs_cache_depth) self.outputs_cache = None """Memory cache object for storing outputs (excludes sideband objects).""" self.outputs_cache = None if self.outputs_cache_depth > 0: self.outputs_cache = MemoryCache( max_depth=self.outputs_cache_depth, is_lru=True, deepcopy=self.memcache_deepcopy, ) self.disk_cache = disk_cache """Disk cache object""" self.disk_cache_path = None """Path to disk cache file for this stage/service (or None).""" # Include each attribute here for hashing if it is defined and its # value is not None default_attrs_to_hash = [ "input_names", "output_names", "input_binning", "output_binning", ] self._attrs_to_hash = set([]) for attr in default_attrs_to_hash: if not hasattr(self, attr): continue val = getattr(self, attr) if val is None: continue try: self.include_attrs_for_hashes(attr) except ValueError(): pass self.events = None self.nominal_transforms = None # Define useful flags and values for debugging behavior after running self.nominal_transforms_loaded_from_cache = None """Records which cache nominal transforms were loaded from, or None.""" self.nominal_transforms_computed = False """Records whether nominal transforms were (re)computed.""" self.transforms_loaded_from_cache = None """Records which cache transforms were loaded from, or None.""" self.transforms_computed = False """Records whether transforms were (re)computed.""" self.nominal_outputs_computed = False """Records whether nominal outputs were (re)computed.""" self.outputs_loaded_from_cache = None """Records which cache outputs were loaded from, or None.""" self.outputs_computed = False """Records whether outputs were (re)computed.""" self.nominal_transforms_hash = None self.transforms_hash = None self.nominal_outputs_hash = None self.outputs_hash = None self.instantiate_disk_cache()
def inj_param_scan(return_outputs=False): """Load the HypoTesting class and use it to do an Asimov test across the space of one of the injected parameters. The user will define the parameter and pass a numpy-interpretable string to set the range of values. For example, one could scan over the space of theta23 by using a string such as `"numpy.linspace(0.35, 0.65, 31)"` which will then be evaluated to figure out a space of theta23 to inject and run Asimov tests. """ # NOTE: import here to avoid circular refs from pisa.scripts.analysis import parse_args init_args_d = parse_args(description=inj_param_scan.__doc__, command=inj_param_scan) # Normalize and convert `*_pipeline` filenames; store to `*_maker` # (which is argument naming convention that HypoTesting init accepts). # For this test, pipeline is required so we don't need the try arguments # or the checks on it being None filenames = init_args_d.pop('pipeline') filenames = sorted([normcheckpath(fname) for fname in filenames]) init_args_d['h0_maker'] = filenames # However, we do need them for the selections, since they can be different for maker in ['h0', 'h1', 'data']: ps_name = maker + '_param_selections' ps_str = init_args_d[ps_name] if ps_str is None: ps_list = None else: ps_list = [x.strip().lower() for x in ps_str.split(',')] init_args_d[ps_name] = ps_list init_args_d['data_maker'] = init_args_d['h0_maker'] init_args_d['h1_maker'] = init_args_d['h0_maker'] init_args_d['h0_maker'] = DistributionMaker(init_args_d['h0_maker']) init_args_d['h1_maker'] = DistributionMaker(init_args_d['h1_maker']) init_args_d['h1_maker'].select_params(init_args_d['h1_param_selections']) init_args_d['data_maker'] = DistributionMaker(init_args_d['data_maker']) if init_args_d['data_param_selections'] is None: init_args_d['data_param_selections'] = \ init_args_d['h0_param_selections'] init_args_d['data_name'] = init_args_d['h0_name'] init_args_d['data_maker'].select_params( init_args_d['data_param_selections']) # Remove final parameters that don't want to be passed to HypoTesting param_name = init_args_d.pop('param_name') inj_vals = eval(init_args_d.pop('inj_vals')) inj_units = init_args_d.pop('inj_units') force_prior = init_args_d.pop('use_inj_prior') # Instantiate the analysis object hypo_testing = HypoTesting(**init_args_d) logging.info('Scanning over %s between %.4f and %.4f with %i vals', param_name, min(inj_vals), max(inj_vals), len(inj_vals)) # Modify parameters if necessary if param_name == 'sin2theta23': requested_vals = inj_vals inj_vals = np.arcsin(np.sqrt(inj_vals)) logging.info( 'Converting to theta23 values. Equivalent range is %.4f to %.4f' ' radians, or %.4f to %.4f degrees', min(inj_vals), max(inj_vals), min(inj_vals) * 180 / np.pi, max(inj_vals) * 180 / np.pi) test_name = 'theta23' inj_units = 'radians' elif param_name == 'deltam31': raise ValueError('Need to implement a test where it ensures the sign ' 'of the requested values matches those in truth and ' 'the hypo makers (else it makes no sense). For now, ' 'please select deltam3l instead.') elif param_name == 'deltam3l': # Ensure all values are the same sign, else it doesn't make any sense if not np.alltrue(np.sign(inj_vals)): raise ValueError("Not all requested values to inject are the same " "sign. This doesn't make any sense given that you" " have requested to inject different values of " "deltam3l.") logging.info('Parameter requested was deltam3l - will convert assuming' ' that this is always the largest of the two splittings ' 'i.e. deltam3l = deltam31 for deltam3l > 0 and deltam3l ' '= deltam32 for deltam3l < 0.') inj_sign = np.sign(inj_vals)[0] requested_vals = inj_vals test_name = 'deltam31' deltam21_val = hypo_testing.data_maker.params['deltam21'].value.to( inj_units).magnitude if inj_sign == 1: no_inj_vals = requested_vals io_inj_vals = (requested_vals - deltam21_val) * -1.0 else: io_inj_vals = requested_vals no_inj_vals = (requested_vals * -1.0) + deltam21_val inj_vals = [] for no_inj_val, io_inj_val in zip(no_inj_vals, io_inj_vals): o_vals = {} o_vals['nh'] = no_inj_val o_vals['ih'] = io_inj_val inj_vals.append(o_vals) else: test_name = param_name requested_vals = inj_vals unit_inj_vals = [] for inj_val in inj_vals: if isinstance(inj_val, dict): o_vals = {} for ivkey in inj_val.keys(): o_vals[ivkey] = inj_val[ivkey] * ureg(inj_units) unit_inj_vals.append(o_vals) else: unit_inj_vals.append(inj_val * ureg(inj_units)) inj_vals = unit_inj_vals # Extend the ranges of the distribution makers so that they reflect the # range of the scan. This is a pain if there are different values depending # on the ordering. Need to extend the ranges of both values in the # hypothesis maker since the hypotheses may minimise over the ordering, # and could then go out of range. # Also, some parameters CANNOT go negative or else things won't work. # To account for this, check if parameters lower value was positive and, # if so, enforce that it is positive now. if isinstance(inj_vals[0], dict): # Calculate ranges for both parameters norangediff = max(no_inj_vals) - min(no_inj_vals) norangediff = norangediff * ureg(inj_units) norangetuple = (min(no_inj_vals) * ureg(inj_units) - 0.5 * norangediff, max(no_inj_vals) * ureg(inj_units) + 0.5 * norangediff) iorangediff = max(io_inj_vals) - min(io_inj_vals) iorangediff = iorangediff * ureg(inj_units) iorangetuple = (min(io_inj_vals) * ureg(inj_units) - 0.5 * iorangediff, max(io_inj_vals) * ureg(inj_units) + 0.5 * iorangediff) # Do it for both hierarchies for hierarchy, rangetuple in zip(['nh', 'ih'], [norangetuple, iorangetuple]): hypo_testing.set_param_ranges(selection=hierarchy, test_name=test_name, rangetuple=rangetuple, inj_units=inj_units) # Select the proper params again hypo_testing.h0_maker.select_params(init_args_d['h0_param_selections']) hypo_testing.h1_maker.select_params(init_args_d['h1_param_selections']) # Otherwise it's way simpler... else: rangediff = max(inj_vals) - min(inj_vals) rangetuple = (min(inj_vals) - 0.5 * rangediff, max(inj_vals) + 0.5 * rangediff) hypo_testing.set_param_ranges(selection=None, test_name=test_name, rangetuple=rangetuple, inj_units=inj_units) if hypo_testing.data_maker.params[test_name].prior is not None: if hypo_testing.data_maker.params[test_name].prior.kind != 'uniform': if force_prior: logging.warning( 'Parameter to be scanned, %s, has a %s prior that you have' ' requested to be left on. This will likely make the' ' results wrong.', test_name, hypo_testing.data_maker.params[test_name].prior.kind) else: logging.info( 'Parameter to be scanned, %s, has a %s prior.This will be' ' changed to a uniform prior (i.e. no prior) for this' ' test.', test_name, hypo_testing.data_maker.params[test_name].prior.kind) uniformprior = Prior(kind='uniform') hypo_testing.h0_maker.params[test_name].prior = uniformprior hypo_testing.h1_maker.params[test_name].prior = uniformprior else: if force_prior: raise ValueError('Parameter to be scanned, %s, does not have a' ' prior but you have requested to force one to be' ' left on. Something is potentially wrong.' % test_name) else: logging.info( 'Parameter to be scanned, %s, does not have a prior.' ' So nothing needs to be done.', test_name) # Everything is set up. Now do the scan. outputs = hypo_testing.asimov_inj_param_scan( # pylint: disable=redefined-outer-name param_name=param_name, test_name=test_name, inj_vals=inj_vals, requested_vals=requested_vals, h0_name=init_args_d['h0_name'], h1_name=init_args_d['h1_name'], data_name=init_args_d['data_name']) if return_outputs: return outputs
def test_nsi_parameterization(): """Unit test for Hvac-like NSI parameterization.""" rand = np.random.RandomState(0) alpha1, alpha2, deltansi = rand.rand(3) * 2. * np.pi phi12, phi13, phi23 = rand.rand(3) * 2*np.pi - np.pi eps_max_abs = 10.0 eps_scale, eps_prime = rand.rand(2) * 2 * eps_max_abs - eps_max_abs nsi_params = VacuumLikeNSIParams() nsi_params.eps_scale = eps_scale nsi_params.eps_prime = eps_prime nsi_params.phi12 = phi12 nsi_params.phi13 = phi13 nsi_params.phi23 = phi23 nsi_params.alpha1 = alpha1 nsi_params.alpha2 = alpha2 nsi_params.deltansi = deltansi logging.trace('Checking agreement between numerical & analytical NSI matrix...') eps_mat_numerical = nsi_params.eps_matrix eps_mat_analytical = nsi_params.eps_matrix_analytical try: close = np.isclose(eps_mat_numerical, eps_mat_analytical, **ALLCLOSE_KW) if not np.all(close): logging.debug( "Numerical NSI matrix:\n%s", np.array2string(eps_mat_numerical, **ARY2STR_KW) ) logging.debug( "Analytical expansion (by hand):\n%s", np.array2string(eps_mat_analytical, **ARY2STR_KW) ) raise ValueError( 'Evaluating analytical expressions for NSI matrix elements' ' does not give agreement with numerical calculation!' ' Elementwise agreement:\n%s' % close ) except ValueError as err: logging.warning( "%s\nThis is expected." " Going ahead with numerical calculation for now.", err ) logging.trace('Now checking agreement with sympy calculation...') eps_mat_sympy = nsi_sympy_mat_mult( eps_scale_val=eps_scale, eps_prime_val=eps_prime, phi12_val=phi12, phi13_val=phi13, phi23_val=phi23, alpha1_val=alpha1, alpha2_val=alpha2, deltansi_val=deltansi ) logging.trace('ALLCLOSE_KW = {}'.format(ALLCLOSE_KW)) close = np.isclose(eps_mat_numerical, eps_mat_sympy, **ALLCLOSE_KW) if not np.all(close): logging.error( 'Numerical NSI matrix:\n%s', np.array2string(eps_mat_numerical, **ARY2STR_KW) ) logging.error( 'Sympy NSI matrix:\n%s', np.array2string(eps_mat_sympy, **ARY2STR_KW) ) raise ValueError( 'Sympy and numerical calculations disagree! Elementwise agreement:\n' '%s' % close )
def plot_1d_ratio(self, maps, plot_axis, **kwargs): """make a ratio plot for a 1d projection""" r_vmin = kwargs.pop('r_vmin', None) r_vmax = kwargs.pop('r_vmax', None) axis = plt.gca() map0 = maps[0] plt_binning = map0.binning[plot_axis] hist = self.project_1d(map0, plot_axis) hist0 = unp.nominal_values(hist) # TODO: should this be used somewhere? err0 = unp.std_devs(hist) axis.set_xlim( inf2finite(plt_binning.bin_edges.m)[0], inf2finite(plt_binning.bin_edges.m)[-1]) maximum = 1.0 minimum = 1.0 self.reset_colors() for map in maps: self.next_color() hist = self.project_1d(map, plot_axis) hist1 = unp.nominal_values(hist) err1 = unp.std_devs(hist) ratio = np.zeros_like(hist0) ratio_error = np.zeros_like(hist0) for i, hist0i in enumerate(hist0): if hist1[i] == 0 and hist0i == 0: ratio[i] = 1. ratio_error[i] = 1. elif hist1[i] != 0 and hist0i == 0: logging.warning('deviding non 0 by 0 for ratio') ratio[i] = 0. ratio_error[i] = 1. else: ratio[i] = hist1[i] / hist0i ratio_error[i] = err1[i] / hist0i minimum = min(minimum, ratio[i]) maximum = max(maximum, ratio[i]) if map.tex == 'data': axis.errorbar(plt_binning.weighted_centers.m, ratio, yerr=ratio_error, fmt='o', markersize='4', label=tex_dollars(text2tex('data')), color='k', ecolor='k', mec='k') else: _ = axis.hist(inf2finite(plt_binning.weighted_centers.m), weights=ratio, bins=inf2finite(plt_binning.bin_edges.m), histtype='step', lw=1.5, label=tex_dollars(text2tex(map.tex)), color=self.color) axis.bar(plt_binning.bin_edges.m[:-1], 2 * ratio_error, bottom=ratio - ratio_error, width=plt_binning.bin_widths.m, alpha=0.25, linewidth=0, color=self.color) if self.grid: plt.grid(True, which="both", ls='-', alpha=0.2) self.fig.subplots_adjust(hspace=0) axis.set_ylabel(tex_dollars(text2tex('ratio'))) axis.set_xlabel(tex_dollars(plt_binning.label)) # Calculate nice scale: if r_vmin is not None and r_vmax is not None: axis.set_ylim(1 - r_vmin, 1 + r_vmax) else: off = max(maximum - 1, 1 - minimum) axis.set_ylim(1 - 1.2 * off, 1 + 1.2 * off)