def _compute_transforms(self): dims = self.input_binning.names transforms = [] for group, in_names in self.combine_groups.items(): xform_shape = [len(in_names) ] + [self.input_binning[d].num_bins for d in dims] xform = np.ones(xform_shape) input_names = self.input_names for i, name in enumerate(in_names): scale = 1. if '_nc' in name: scale *= self.params.nu_nc_norm.value.m_as('dimensionless') #if 'nutau' in name: # scale *= self.params.nutau_norm.value.m_as('dimensionless') #if name in ['nutau_cc','nutaubar_cc']: # scale *= self.params.nutau_cc_norm.value.m_as('dimensionless') if scale != 1: xform[i] *= scale transforms.append( BinnedTensorTransform(input_names=in_names, output_name=group, input_binning=self.input_binning, output_binning=self.output_binning, xform_array=xform)) return TransformSet(transforms=transforms)
def _compute_transforms(self): """For the current parameter values, evaluate the fit function and write the resulting scaling into an x-form array""" # TODO: use iterators to collapse nested loops transforms = [] for input_name in self.input_names: transform = None sys_values = [] for sys in self.sys_list: sys_values.append(self.params[sys].magnitude) fit_params = self.fit_results[input_name] shape = fit_params.shape[:-1] if transform is None: transform = np.ones(shape) for idx in np.ndindex(*shape): # At every point evaluate the function transform[idx] *= fit_fun(sys_values, *fit_params[idx]) xform = BinnedTensorTransform( input_names=(input_name), output_name=input_name, input_binning=self.input_binning, output_binning=self.output_binning, xform_array=transform, error_method=self.error_method, ) transforms.append(xform) return TransformSet(transforms)
def _compute_transforms(self): """Compute new oscillation transforms.""" # The seed is created from parameter values to produce different sets # of transforms for different sets of parameters seed = hash_obj(self.params.values, hash_to='int') % (2**32 - 1) np.random.seed(seed) # Read parameters in in the units used for computation, e.g. theta23 = self.params.theta23.m_as('rad') transforms = [] for out_idx, output_name in enumerate(self.output_names): if out_idx < 3: # neutrinos (-> input names are neutrinos) input_names = self.input_names[0:2] else: # anti-neutrinos (-> input names are anti-neutrinos) input_names = self.input_names[2:4] # generate the "oscillation probabilities" xform = self.create_dummy_osc_probs() # create object of type `BinnedTensorTransform` and attach # to list of transforms with correct set of input names for the # output name in question transforms.append( BinnedTensorTransform( input_names=input_names, output_name=output_name, # we have already made sure that input and output binnings # are identical input_binning=self.input_binning, output_binning=self.output_binning, xform_array=xform)) return TransformSet(transforms=transforms)
def _compute_transforms(self): # pylint: disable=no-self-use """Stages that apply transforms to inputs should override this method for deriving the transform. No-input stages should leave this as-is.""" return TransformSet([])
class Stage(BaseStage): """ PISA stage base class. Should encompass all behaviors common to (almost) all stages. Specialization should be done via subclasses. Parameters ---------- use_transforms : bool (required) Whether or not this stage takes inputs to be transformed (and hence implements transforms). input_names : None or list of strings output_names : None or list of strings disk_cache : None, bool, string, or DiskCache * If None or False, no disk cache is available. * If True, a disk cache is generated at the path `CACHE_DIR/<stage_name>/<service_name>.sqlite` where CACHE_DIR is defined in pisa.__init__ * If string, this is interpreted as a path. If an absolute path is provided (e.g. "/home/myuser/mycache.sqlite'), this locates the disk cache file exactly, while a relative path (e.g., "relative/dir/mycache.sqlite") is taken relative to the CACHE_DIR; the aforementioned example will be turned into `CACHE_DIR/relative/dir/mycache.sqlite`. * If a DiskCache object is passed, it will be used directly memcache_deepcopy : bool Whether to deepcopy objects prior to storing to the memory cache and upon loading these objects from the memory cache. Setting to True ensures no modification of mutable objects stored to a memory cache will affect other logic relying on that object remaining unchanged. However, this comes at the cost of more memory used and slower operations. outputs_cache_depth : int >= 0 transforms_cache_depth : int >= 0 input_binning : None or interpretable as MultiDimBinning output_binning : None or interpretable as MultiDimBinning Notes ----- The following methods can be overridden in derived classes where applicable: _derive_nominal_transforms_hash _derive_transforms_hash _derive_nominal_outputs_hash _derive_outputs_hash _compute_nominal_transforms This is called during initialization to compute what are termed "nominal" transforms -- i.e, transforms with all systematic parameters set to their nominal values, such that they have no effect on the transform. It is optional to use this stage, but if it *is* used, then the result will be cached to memory (and optionally to disk cache, if one is provided) for future use. A nominal transform is useful when systematic parameters merely have the effect of modifying the nominal transform, rather than requiring a complete recomputation of the transform. _compute_nominal_outputs same as nominal transforms, but for outputs (e.g. used for non-input stages) _compute_transforms Do the actual work to produce the stage's transforms. For stages that specify use_transforms=False, this method is never called. _compute_outputs Do the actual work to compute the stage's output. Default implementation is to call self.transforms.apply(inputs); override if no transforms are present or if more needs to be done to compute outputs than this. validate_params Perform validation on any parameters. """ def __init__( self, use_transforms, params=None, expected_params=None, input_names=None, output_names=None, error_method=None, disk_cache=None, memcache_deepcopy=True, transforms_cache_depth=10, outputs_cache_depth=0, input_binning=None, output_binning=None, debug_mode=None, ): # Allow for string inputs, but have to populate into lists for # consistent interfacing to one or multiple of these things logging.warning('This is a cake-style PISA stage, which is DEPRECATED!') self.use_transforms = use_transforms """Whether or not stage uses transforms""" self._events_hash = None self.input_binning = input_binning self.output_binning = output_binning self.validate_binning() # init base class! super(Stage, self).__init__( params=params, expected_params=expected_params, input_names=input_names, output_names=output_names, debug_mode=debug_mode, error_method=error_method, ) # Storage of latest transforms and outputs; default to empty # TransformSet and None, respectively. self.transforms = TransformSet([]) """A stage that takes to-be-transformed inputs and has had these transforms computed stores them here. Before computation, `transforms` is an empty TransformSet; a stage that does not make use of these (such as a no-input stage) has an empty TransformSet.""" self.memcache_deepcopy = memcache_deepcopy self.transforms_cache_depth = int(transforms_cache_depth) self.transforms_cache = None """Memory cache object for storing transforms""" self.nominal_transforms_cache = None """Memory cache object for storing nominal transforms""" self.full_hash = True """Whether to do full hashing if true, otherwise do fast hashing""" self.transforms_cache = MemoryCache( max_depth=self.transforms_cache_depth, is_lru=True, deepcopy=self.memcache_deepcopy, ) self.nominal_transforms_cache = MemoryCache( max_depth=self.transforms_cache_depth, is_lru=True, deepcopy=self.memcache_deepcopy, ) self.outputs_cache_depth = int(outputs_cache_depth) self.outputs_cache = None """Memory cache object for storing outputs (excludes sideband objects).""" self.outputs_cache = None if self.outputs_cache_depth > 0: self.outputs_cache = MemoryCache( max_depth=self.outputs_cache_depth, is_lru=True, deepcopy=self.memcache_deepcopy, ) self.disk_cache = disk_cache """Disk cache object""" self.disk_cache_path = None """Path to disk cache file for this stage/service (or None).""" # Include each attribute here for hashing if it is defined and its # value is not None default_attrs_to_hash = [ "input_names", "output_names", "input_binning", "output_binning", ] self._attrs_to_hash = set([]) for attr in default_attrs_to_hash: if not hasattr(self, attr): continue val = getattr(self, attr) if val is None: continue try: self.include_attrs_for_hashes(attr) except ValueError(): pass self.events = None self.nominal_transforms = None # Define useful flags and values for debugging behavior after running self.nominal_transforms_loaded_from_cache = None """Records which cache nominal transforms were loaded from, or None.""" self.nominal_transforms_computed = False """Records whether nominal transforms were (re)computed.""" self.transforms_loaded_from_cache = None """Records which cache transforms were loaded from, or None.""" self.transforms_computed = False """Records whether transforms were (re)computed.""" self.nominal_outputs_computed = False """Records whether nominal outputs were (re)computed.""" self.outputs_loaded_from_cache = None """Records which cache outputs were loaded from, or None.""" self.outputs_computed = False """Records whether outputs were (re)computed.""" self.nominal_transforms_hash = None self.transforms_hash = None self.nominal_outputs_hash = None self.outputs_hash = None self.instantiate_disk_cache() @profile def get_nominal_transforms(self, nominal_transforms_hash): """Load a cached transform from the nominal transform memory cache (which is backed by a disk cache, if one is specified) if the nominal transform is in the cache, or else recompute it and store to the cache(s). This method calls the `_compute_nominal_transforms` method, which by default does nothing. However, if you want to use the nominal transforms feature, override the `_compute_nominal_transforms` method and fill in the logic there. Deciding whether to invoke the `_compute_nominal_transforms` method or to load the nominal transforms from cache is done here, so you needn't think about any of this within the `_compute_nominal_transforms` method. Returns ------- nominal_transforms, hash """ # Reset flags self.nominal_transforms_loaded_from_cache = None self.nominal_transforms_computed = False if nominal_transforms_hash is None: nominal_transforms_hash = self._derive_nominal_transforms_hash() nominal_transforms = None # Quick way to avoid further logic is if hash value is None if nominal_transforms_hash is None: self.nominal_transforms_hash = None self.nominal_transforms = None return self.nominal_transforms, self.nominal_transforms_hash recompute = True # If hash found in memory cache, load nominal transforms from there if ( nominal_transforms_hash in self.nominal_transforms_cache and self.debug_mode is None ): nominal_transforms = self.nominal_transforms_cache[nominal_transforms_hash] self.nominal_transforms_loaded_from_cache = "memory" recompute = False # Otherwise try to load from an extant disk cache elif self.disk_cache is not None and self.debug_mode is None: try: nominal_transforms = self.disk_cache[nominal_transforms_hash] except KeyError: pass else: self.nominal_transforms_loaded_from_cache = "disk" recompute = False # Save to memory cache self.nominal_transforms_cache[ nominal_transforms_hash ] = nominal_transforms if recompute: self.nominal_transforms_computed = True nominal_transforms = self._compute_nominal_transforms() if nominal_transforms is None: # Invalidate hash value since found transforms nominal_transforms_hash = None else: nominal_transforms.hash = nominal_transforms_hash self.nominal_transforms_cache[ nominal_transforms_hash ] = nominal_transforms if self.disk_cache is not None: self.disk_cache[nominal_transforms_hash] = nominal_transforms self.nominal_transforms = nominal_transforms self.nominal_transforms_hash = nominal_transforms_hash return nominal_transforms, nominal_transforms_hash @profile def get_transforms(self, transforms_hash=None, nominal_transforms_hash=None): """Load a cached transform (keyed on hash of parameter values) if it is in the cache, or else compute a new transform from currently-set parameter values and store this new transform to the cache. This calls the private method _compute_transforms (which must be implemented in subclasses if the nominal transform feature is desired) to generate a new transform if the nominal transform is not found in the nominal transform cache. Notes ----- The hash used here is only meant to be valid within the scope of a session; a hash on the full parameter set used to generate the transform *and* the version of the generating software is required for non-volatile storage. """ # Reset flags self.transforms_loaded_from_cache = None self.transforms_computed = False # TODO: store nominal transforms to the transforms cache as well, but # derive the hash value the same way as it is done for transforms, # to avoid needing to apply no systematics to the nominal transforms # to get the (identical) transforms? # Problem: assumes the nominal transform is the same as the transforms # that will result, which *might* not be true (though it seems it will # usually be so) # Compute nominal transforms; if feature is not used, this doesn't # actually do much of anything. To do more than this, override the # `_compute_nominal_transforms` method. _, nominal_transforms_hash = self.get_nominal_transforms( nominal_transforms_hash=nominal_transforms_hash ) # Generate hash from param values if transforms_hash is None: transforms_hash = self._derive_transforms_hash( nominal_transforms_hash=nominal_transforms_hash ) logging.trace("transforms_hash: %s" % str(transforms_hash)) # Load and return existing transforms if in the cache if ( self.transforms_cache is not None and transforms_hash in self.transforms_cache and self.debug_mode is None ): self.transforms_loaded_from_cache = "memory" logging.trace("loading transforms from cache.") transforms = self.transforms_cache[transforms_hash] # Otherwise: compute transforms, set hash, and store to cache else: self.transforms_computed = True logging.trace("computing transforms.") transforms = self._compute_transforms() transforms.hash = transforms_hash if self.transforms_cache is not None: self.transforms_cache[transforms_hash] = transforms self.check_transforms(transforms) self.transforms = transforms return transforms @profile def get_nominal_outputs(self, nominal_outputs_hash): """Load a cached output from the nominal outputs memory cache (which is backed by a disk cache, if one is specified) if the nominal outout is in the cache, or else recompute it and store to the cache(s). This method calls the `_compute_nominal_outputs` method, which by default does nothing. However, if you want to use the nominal outputs feature, override the `_compute_nominal_outputs` method and fill in the logic there. Deciding whether to invoke the `_compute_nominal_outputs` method or to load the nominal outputs from cache is done here, so you needn't think about any of this within the `_compute_nominal_outputs` method. Returns ------- nominal_outputs, hash """ if nominal_outputs_hash is None: nominal_outputs_hash = self._derive_nominal_outputs_hash() if ( self.nominal_outputs_hash is None or self.nominal_outputs_hash != nominal_outputs_hash ): self._compute_nominal_outputs() self.nominal_outputs_hash = nominal_outputs_hash # for PI compatibility def run(self, inputs=None): return self.get_outputs(inputs=inputs) @profile def get_outputs(self, inputs=None): """Top-level function for computing outputs. Use this method to get outputs if you live outside this stage/service. Caching is handled here, so if the output hash returned by `_derive_outputs_hash` is in `outputs_cache`, it is simply returned. Otherwise, the `_compute_outputs` private method is invoked to do the actual work of computing outputs. Parameters ---------- inputs : None or Mapping Any inputs to be transformed, plus any sideband objects that are to be passed on (untransformed) to subsequent stages. See also -------- Overloadable methods called directly from this: _derive_outputs_hash _compute_outputs """ # Reset flags self.outputs_loaded_from_cache = None self.outputs_computed = False # TODO: store nominal outputs to the outputs cache as well, but # derive the hash value the same way as it is done for outputs, # to avoid needing to apply no systematics to the nominal outputs # to get the (identical) outputs? # Problem: assumes the nominal transform is the same as the outputs # that will result, which *might* not be true (though it seems it will # usually be so) # Keep inputs for internal use and for inspection later self.inputs = inputs outputs_hash, transforms_hash, nominal_transforms_hash = ( self._derive_outputs_hash() ) # Compute nominal outputs; if feature is not used, this doesn't # actually do much of anything. To do more than this, override the # `_compute_nominal_outputs` method. self.get_nominal_outputs(nominal_outputs_hash=nominal_transforms_hash) logging.trace("outputs_hash: %s" % outputs_hash) if ( self.outputs_cache is not None and outputs_hash is not None and outputs_hash in self.outputs_cache and self.debug_mode is None ): self.outputs_loaded_from_cache = "memory" logging.trace("Loading outputs from cache.") outputs = self.outputs_cache[outputs_hash] else: logging.trace("Need to compute outputs...") if self.use_transforms: self.get_transforms( transforms_hash=transforms_hash, nominal_transforms_hash=nominal_transforms_hash, ) logging.trace("... now computing outputs.") outputs = self._compute_outputs(inputs=self.inputs) self.check_outputs(outputs) if isinstance(outputs, (Map, MapSet)): outputs = outputs.rebin(self.output_binning) outputs.hash = outputs_hash self.outputs_computed = True # Store output to cache if self.outputs_cache is not None and outputs_hash is not None: self.outputs_cache[outputs_hash] = outputs # Keep outputs for inspection later self.outputs = outputs # Attach sideband objects (i.e., inputs not specified in # `self.input_names`) to the "augmented" output object if self.inputs is None: names_in_inputs = set() else: names_in_inputs = set(self.inputs.names) unused_input_names = names_in_inputs.difference(self.input_names) if len(unused_input_names) == 0: return outputs # TODO: update logic for Data object, generic sideband objects # Create a new output container different from `outputs` but copying # the contents, for purposes of attaching the sideband objects found. if isinstance(outputs, MapSet): augmented_outputs = MapSet(outputs) for name in unused_input_names: augmented_outputs.append(inputs[name]) return augmented_outputs else: raise TypeError( "Outputs are %s, but must currently be a MapSet in" " the case that the input includes sideband" " objects." % type(outputs) ) def check_transforms(self, transforms): """Check that transforms' inputs and outputs match those specified for this service. Parameters ---------- transforms Raises ------ ValueError if transforms' inputs/outputs don't match stage spec """ assert set(transforms.input_names) == set(self.input_names), ( "Transforms' inputs: " + str(transforms.input_names) + "\nStage inputs: " + str(self.input_names) ) assert set(transforms.output_names) == set(self.output_names), ( "Transforms' outputs: " + str(transforms.output_names) + "\nStage outputs: " + str(self.output_names) ) def check_outputs(self, outputs): """Check that the output names are those expected""" if set(outputs.names) != set(self.output_names): raise ValueError( "'{}' : Outputs found do not match expected outputs for this stage:\n" " Outputs found: {}\n" " Expected stage outputs: {}".format( self.stage_name, outputs.names, self.output_names ) ) def load_events(self, events): """Load events from path given by `events`. Stored as `self.events`. Parameters ---------- events : string or Events object If string, load events from that location. If Events object, deepcopy to obtain `self.events` """ if isinstance(events, Param): events = events.value elif isinstance(events, basestring): events = find_resource(events) this_hash = hash_obj(events, full_hash=self.full_hash) if self._events_hash is not None and this_hash == self._events_hash: return logging.debug("Extracting events from Events obj or file: %s", events) events_obj = Events(events) events_hash = this_hash self.events = events_obj self._events_hash = events_hash def cut_events(self, keep_criteria): """Apply a cut to `self.events`, keeping only events that pass `keep_criteria`. Parameters ---------- keep_criteria : string See pisa.core.Events.applyCut for more info on specifying this. """ if isinstance(keep_criteria, Param): keep_criteria = keep_criteria.value if keep_criteria is not None: events = self.events.applyCut(keep_criteria=keep_criteria) events_hash = hash_obj(events, full_hash=self.full_hash) self.events = events self._events_hash = events_hash def instantiate_disk_cache(self): """Instantiate a disk cache for use by the stage.""" if isinstance(self.disk_cache, DiskCache): self.disk_cache_path = self.disk_cache.path return if self.disk_cache is False or self.disk_cache is None: self.disk_cache = None self.disk_cache_path = None return if isinstance(self.disk_cache, basestring): dirpath, filename = os.path.split( os.path.expandvars(os.path.expanduser(self.disk_cache)) ) if os.path.isabs(dirpath): self.disk_cache_path = os.path.join(dirpath, filename) else: self.disk_cache_path = os.path.join(CACHE_DIR, dirpath, filename) elif self.disk_cache is True: dirs = [CACHE_DIR, self.stage_name] dirpath = os.path.expandvars(os.path.expanduser(os.path.join(*dirs))) if self.service_name is not None and self.service_name != "": filename = self.service_name + ".sqlite" else: filename = "generic.sqlite" mkdir(dirpath, warn=False) self.disk_cache_path = os.path.join(dirpath, filename) else: raise ValueError("Don't know what to do with a %s." % type(self.disk_cache)) self.disk_cache = DiskCache(self.disk_cache_path, max_depth=10, is_lru=False) def _derive_outputs_hash(self): """Derive a hash value that unique identifies the outputs that will be generated based upon the current state of the stage. This implementation hashes together: * Input and output binning objects' hash values (if either input or output binning is not None) * Current params' values hash * Hashes from any input objects with names in `self.input_names` If any of the above objects is specified but returns None for its hash value, the entire output hash is invalidated, and None is returned. """ id_objects = [] # If stage uses inputs, grab hash from the inputs container object if self.outputs_cache is not None and len(self.input_names) > 0: inhash = self.inputs.hash logging.trace("inputs.hash = %s" % inhash) id_objects.append(inhash) # If stage uses transforms, get hash from the transforms transforms_hash = None if self.use_transforms: transforms_hash, nominal_transforms_hash = self._derive_transforms_hash() id_objects.append(transforms_hash) logging.trace("derived transforms hash = %s" % id_objects[-1]) # Otherwise, generate sub-hash on binning and param values here else: transforms_hash, nominal_transforms_hash = None, None if self.outputs_cache is not None: id_subobjects = [] # Include all parameter values id_subobjects.append(self.params.values_hash) # Include additional attributes of this object for attr in sorted(self._attrs_to_hash): val = getattr(self, attr) if hasattr(val, "hash"): attr_hash = val.hash elif self.full_hash: norm_val = normQuant(val) attr_hash = hash_obj(norm_val, full_hash=self.full_hash) else: attr_hash = hash_obj(val, full_hash=self.full_hash) id_subobjects.append(attr_hash) # Generate the "sub-hash" if any([(h is None) for h in id_subobjects]): sub_hash = None else: sub_hash = hash_obj(id_subobjects, full_hash=self.full_hash) id_objects.append(sub_hash) # If any hashes are missing (i.e, None), invalidate the entire hash if self.outputs_cache is None or any([(h is None) for h in id_objects]): outputs_hash = None else: outputs_hash = hash_obj(id_objects, full_hash=self.full_hash) return outputs_hash, transforms_hash, nominal_transforms_hash def _derive_transforms_hash(self, nominal_transforms_hash=None): """Compute a hash that uniquely identifies the transforms that will be produced from the current configuration. Note that this hash needs only to be valid for this run (i.e., it is a volatile hash). This implementation returns a hash from the current parameters' values. """ id_objects = [] h = self.params.values_hash logging.trace("self.params.values_hash = %s" % h) id_objects.append(h) # Grab any provided nominal transforms hash, or derive it again if nominal_transforms_hash is None: nominal_transforms_hash = self._derive_nominal_transforms_hash() # If a valid hash has been gotten, include it if nominal_transforms_hash is not None: id_objects.append(nominal_transforms_hash) for attr in sorted(self._attrs_to_hash): val = getattr(self, attr) if hasattr(val, "hash"): attr_hash = val.hash elif self.full_hash: norm_val = normQuant(val) attr_hash = hash_obj(norm_val, full_hash=self.full_hash) else: attr_hash = hash_obj(val, full_hash=self.full_hash) id_objects.append(attr_hash) # If any hashes are missing (i.e, None), invalidate the entire hash if any([(h is None) for h in id_objects]): transforms_hash = None else: transforms_hash = hash_obj(id_objects, full_hash=self.full_hash) return transforms_hash, nominal_transforms_hash def _derive_nominal_transforms_hash(self): """Derive a hash to uniquely identify the nominal transform. This should be unique across processes and invocations bacuase the nominal transforms can be non-volatile (cached to disk) and must still be valid given their hash value upon loading from disk in the future. This implementation uses the nominal parameter values' hash combined with the source code hash to generate the final nominal transforms hash. Notes ----- The hashing scheme implemented here might be sufficiently unique for many cases, but override this method in services according to the following guidelines: * Stages that use a nominal transform should override this method if the hash is more accurately computed differently from here. * Stages that use transforms but do not use nominal transforms can override this method with a simpler version that simply returns None to save computation time (if this method is found to be a significant performance hit). (This method is called each time an output is computed if `self.use_transforms == True`.) * Stages that use no transforms (i.e., `self.use_transforms == False`) will not call any built-in methods related to transforms, so overriding this method is irrelevant to such stages. If this method *is* overridden (and not just to return None), since the nominal transform may be stored to a disk cache, make sure that `self.source_code_hash` is included in the objects used to compute the final hash value. Even if all parameters are the same, a nominal transform stored to disk is ***invalid if the source code changes***, and `_derive_nominal_transforms_hash` must reflect this. """ id_objects = [] id_objects.append(self.params.nominal_values_hash) for attr in sorted(self._attrs_to_hash): val = getattr(self, attr) if hasattr(val, "hash"): attr_hash = val.hash elif self.full_hash: norm_val = normQuant(val) attr_hash = hash_obj(norm_val, full_hash=self.full_hash) else: attr_hash = hash_obj(val, full_hash=self.full_hash) id_objects.append(attr_hash) id_objects.append(self.source_code_hash) # If any hashes are missing (i.e, None), invalidate the entire hash if any([(h is None) for h in id_objects]): nominal_transforms_hash = None else: nominal_transforms_hash = hash_obj(id_objects, full_hash=self.full_hash) return nominal_transforms_hash def _derive_nominal_outputs_hash(self): return self._derive_nominal_transforms_hash() def _compute_nominal_transforms(self): # pylint: disable=no-self-use """Stages that start with a nominal transform and use systematic parameters to modify the nominal transform in order to obtain the final transforms should override this method for deriving the nominal transform.""" return None def _compute_transforms(self): # pylint: disable=no-self-use """Stages that apply transforms to inputs should override this method for deriving the transform. No-input stages should leave this as-is.""" return TransformSet([]) def _compute_nominal_outputs(self): # pylint: disable=no-self-use return None @profile def _compute_outputs(self, inputs): """Override this method for no-input stages which do not use transforms. Input stages that compute a TransformSet needn't override this, as the work for computing outputs is done by the TransfromSet below.""" return self.transforms.apply(inputs) def validate_binning(self): # pylint: disable=no-self-use """Override this method to test if the input and output binning (e.g., dimensionality, domains, separately or in combination) conform to the transform applied by the stage.""" return
def __init__( self, use_transforms, params=None, expected_params=None, input_names=None, output_names=None, error_method=None, disk_cache=None, memcache_deepcopy=True, transforms_cache_depth=10, outputs_cache_depth=0, input_binning=None, output_binning=None, debug_mode=None, ): # Allow for string inputs, but have to populate into lists for # consistent interfacing to one or multiple of these things logging.warning('This is a cake-style PISA stage, which is DEPRECATED!') self.use_transforms = use_transforms """Whether or not stage uses transforms""" self._events_hash = None self.input_binning = input_binning self.output_binning = output_binning self.validate_binning() # init base class! super(Stage, self).__init__( params=params, expected_params=expected_params, input_names=input_names, output_names=output_names, debug_mode=debug_mode, error_method=error_method, ) # Storage of latest transforms and outputs; default to empty # TransformSet and None, respectively. self.transforms = TransformSet([]) """A stage that takes to-be-transformed inputs and has had these transforms computed stores them here. Before computation, `transforms` is an empty TransformSet; a stage that does not make use of these (such as a no-input stage) has an empty TransformSet.""" self.memcache_deepcopy = memcache_deepcopy self.transforms_cache_depth = int(transforms_cache_depth) self.transforms_cache = None """Memory cache object for storing transforms""" self.nominal_transforms_cache = None """Memory cache object for storing nominal transforms""" self.full_hash = True """Whether to do full hashing if true, otherwise do fast hashing""" self.transforms_cache = MemoryCache( max_depth=self.transforms_cache_depth, is_lru=True, deepcopy=self.memcache_deepcopy, ) self.nominal_transforms_cache = MemoryCache( max_depth=self.transforms_cache_depth, is_lru=True, deepcopy=self.memcache_deepcopy, ) self.outputs_cache_depth = int(outputs_cache_depth) self.outputs_cache = None """Memory cache object for storing outputs (excludes sideband objects).""" self.outputs_cache = None if self.outputs_cache_depth > 0: self.outputs_cache = MemoryCache( max_depth=self.outputs_cache_depth, is_lru=True, deepcopy=self.memcache_deepcopy, ) self.disk_cache = disk_cache """Disk cache object""" self.disk_cache_path = None """Path to disk cache file for this stage/service (or None).""" # Include each attribute here for hashing if it is defined and its # value is not None default_attrs_to_hash = [ "input_names", "output_names", "input_binning", "output_binning", ] self._attrs_to_hash = set([]) for attr in default_attrs_to_hash: if not hasattr(self, attr): continue val = getattr(self, attr) if val is None: continue try: self.include_attrs_for_hashes(attr) except ValueError(): pass self.events = None self.nominal_transforms = None # Define useful flags and values for debugging behavior after running self.nominal_transforms_loaded_from_cache = None """Records which cache nominal transforms were loaded from, or None.""" self.nominal_transforms_computed = False """Records whether nominal transforms were (re)computed.""" self.transforms_loaded_from_cache = None """Records which cache transforms were loaded from, or None.""" self.transforms_computed = False """Records whether transforms were (re)computed.""" self.nominal_outputs_computed = False """Records whether nominal outputs were (re)computed.""" self.outputs_loaded_from_cache = None """Records which cache outputs were loaded from, or None.""" self.outputs_computed = False """Records whether outputs were (re)computed.""" self.nominal_transforms_hash = None self.transforms_hash = None self.nominal_outputs_hash = None self.outputs_hash = None self.instantiate_disk_cache()
def _compute_nominal_transforms(self): """Compute new PID transforms.""" logging.debug('Updating pid.hist PID histograms...') # TODO(shivesh): As of now, events do not have units as far as PISA # is concerned self.load_events(self.params.pid_events) self.cut_events(self.params.transform_events_keep_criteria) # TODO: in future, the events file will not have these combined # already, and it should be done here (or in a nominal transform, # etc.). See below about taking this step when we move to directly # using the I3-HDF5 files. #events_file_combined_flavints = tuple([ # NuFlavIntGroup(s) # for s in self.events.metadata['flavints_joined'] #]) # TODO: take events object as an input instead of as a param that # specifies a file? Or handle both cases? pid_spec = OrderedDict(eval(self.params.pid_spec.value)) if set(pid_spec.keys()) != set(self.output_channels): msg = 'PID criteria from `pid_spec` {0} does not match {1}' raise ValueError(msg.format(pid_spec.keys(), self.output_channels)) # TODO: add importance weights, error computation logging.debug("Separating events by PID...") separated_events = OrderedDict() for sig in self.output_channels: this_sig_events = self.events.applyCut(pid_spec[sig]) separated_events[sig] = this_sig_events # Derive transforms by combining flavints that behave similarly, but # apply the derived transforms to the input flavints separately # (leaving combining these together to later) transforms = [] for flavint_group in self.transform_groups: logging.debug("Working on %s PID", flavint_group) repr_flavint = flavint_group[0] # TODO(shivesh): errors # TODO(shivesh): total histo check? sig_histograms = {} total_histo = np.zeros(self.output_binning.shape) for repr_flavint in flavint_group: histo = self.events.histogram( kinds=repr_flavint, binning=self.output_binning, weights_col=self.params.pid_weights_name.value, errors=None).hist total_histo += histo for sig in self.output_channels: sig_histograms[sig] = np.zeros(self.output_binning.shape) for repr_flavint in flavint_group: this_sig_histo = separated_events[sig].histogram( kinds=repr_flavint, binning=self.output_binning, weights_col=self.params.pid_weights_name.value, errors=None).hist sig_histograms[sig] += this_sig_histo for sig in self.output_channels: with np.errstate(divide='ignore', invalid='ignore'): xform_array = sig_histograms[sig] / total_histo num_invalid = np.sum(~np.isfinite(xform_array)) if num_invalid > 0: logging.warn( 'Group "%s", PID signature "%s" has %d bins with no' ' events (and hence the ability to separate events' ' by PID cannot be ascertained). These are being' ' masked off from any further computations.', flavint_group, sig, num_invalid) # TODO: this caused buggy event propagation for some # reason; check and re-introduced the masked array idea # when this is fixed. For now, replicating the behavior # from PISA 2. #xform_array = np.ma.masked_invalid(xform_array) # Double check that no NaN remain #assert not np.any(np.isnan(xform_array)) # Copy this transform to use for each input in the group for input_name in self.input_names: if input_name not in flavint_group: continue xform = BinnedTensorTransform( input_names=input_name, output_name=self.suffix_channel(input_name, sig), input_binning=self.input_binning, output_binning=self.output_binning, xform_array=xform_array) transforms.append(xform) return TransformSet(transforms=transforms)
def _compute_nominal_transforms(self): self.load_events(self.params.aeff_events) self.cut_events(self.params.transform_events_keep_criteria) # Units must be the following for correctly converting a sum-of- # OneWeights-in-bin to an average effective area across the bin. comp_units = dict(true_energy='GeV', true_coszen=None, true_azimuth='rad') # Select only the units in the input/output binning for conversion # (can't pass more than what's actually there) in_units = { dim: unit for dim, unit in comp_units.items() if dim in self.input_binning } #out_units = {dim: unit for dim, unit in comp_units.items() # if dim in self.output_binning} # These will be in the computational units input_binning = self.input_binning.to(**in_units) # Account for "missing" dimension(s) (dimensions OneWeight expects for # computation of bin volume), and accommodate with a factor equal to # the full range. See IceCube wiki/documentation for OneWeight for # more info. missing_dims_vol = 1 # TODO: currently, azimuth required to *not* be part of input binning if 'true_azimuth' not in input_binning: missing_dims_vol *= 2 * np.pi # TODO: Following is currently never the case, handle? if 'true_coszen' not in input_binning: missing_dims_vol *= 2 nominal_transforms = [] for xform_flavints in self.transform_groups: logging.info("Working on %s effective areas xform", xform_flavints) raw_hist = self.events.histogram(kinds=xform_flavints, binning=input_binning, weights_col='weighted_aeff', errors=True) raw_transform = unp.nominal_values(raw_hist.hist) raw_errors = unp.std_devs(raw_hist.hist) # Divide histogram by # (energy bin width x coszen bin width x azimuth bin width) # volumes to convert from sums-of-OneWeights-in-bins to # effective areas. Note that volume correction factor for # missing dimensions is applied here. bin_volumes = input_binning.bin_volumes(attach_units=False) raw_transform /= (bin_volumes * missing_dims_vol) raw_errors /= (bin_volumes * missing_dims_vol) e_idx = input_binning.index('true_energy') if e_idx == 1: # transpose raw_transform = raw_transform.T raw_errors = raw_errors.T # Do the smoothing smooth_transform = self.smooth(raw_transform, raw_errors, input_binning['true_energy'], input_binning['true_coszen']) if e_idx == 1: # transpose back smooth_transform = smooth_transform.T nominal_transforms.extend( populate_transforms(service=self, xform_flavints=xform_flavints, xform_array=smooth_transform)) return TransformSet(transforms=nominal_transforms)
def _compute_nominal_transforms(self): self.load_events(self.params.aeff_events) self.cut_events(self.params.transform_events_keep_criteria) # Units must be the following for correctly converting a sum-of- # OneWeights-in-bin to an average effective area across the bin. comp_units = dict(true_energy='GeV', true_coszen=None, true_azimuth='rad') # Select only the units in the input/output binning for conversion # (can't pass more than what's actually there) in_units = {dim: unit for dim, unit in comp_units.items() if dim in self.input_binning} # TODO: use out_units for some kind of conversion? #out_units = {dim: unit for dim, unit in comp_units.items() # if dim in self.output_binning} # These will be in the computational units input_binning = self.input_binning.to(**in_units) # Account for "missing" dimension(s) (dimensions OneWeight expects for # computation of bin volume), and accommodate with a factor equal to # the full range. See IceCube wiki/documentation for OneWeight for # more info. missing_dims_vol = 1 if 'true_azimuth' not in input_binning: missing_dims_vol *= 2*np.pi if 'true_coszen' not in input_binning: missing_dims_vol *= 2 if bool(self.debug_mode): outdir = os.path.join(find_resource('debug'), self.stage_name, self.service_name) mkdir(outdir) #hex_hash = hash2hex(kde_hash) bin_volumes = input_binning.bin_volumes(attach_units=False) norm_volumes = bin_volumes * missing_dims_vol nominal_transforms = [] for xform_flavints in self.transform_groups: logging.debug('Working on %s effective areas xform', xform_flavints) aeff_transform = self.events.histogram( kinds=xform_flavints, binning=input_binning, weights_col='weighted_aeff', errors=(self.error_method not in [None, False]) ) aeff_transform = aeff_transform.hist # Divide histogram by # (energy bin width x coszen bin width x azimuth bin width) # volumes to convert from sums-of-OneWeights-in-bins to # effective areas. Note that volume correction factor for # missing dimensions is applied here. aeff_transform /= norm_volumes if self.debug_mode: outfile = os.path.join( outdir, 'aeff_' + str(xform_flavints) + '.pkl' ) to_file(aeff_transform, outfile) nominal_transforms.extend( populate_transforms( service=self, xform_flavints=xform_flavints, xform_array=aeff_transform ) ) return TransformSet(transforms=nominal_transforms)
def compute_transforms(service): """Compute effective area transforms, taking aeff systematics into account. Systematics are: `aeff_scale`, `livetime`, and `nutau_cc_norm` """ aeff_scale = service.params.aeff_scale.m_as('dimensionless') livetime_s = service.params.livetime.m_as('sec') base_scale = aeff_scale * livetime_s logging.trace('livetime = %s --> %s sec', service.params.livetime.value, livetime_s) if service.particles == 'neutrinos': if not hasattr(service, 'nutau_cc_norm_must_be_one'): service.nutau_cc_norm_must_be_one = False """If any flav/ints besides nutau_cc and nutaubar_cc are grouped with one or both of those for transforms, then a `nutau_cc_norm` != 1 cannot be applied.""" nutaucc_and_nutaubarcc = set(NuFlavIntGroup('nutau_cc+nutaubar_cc')) for group in service.transform_groups: # If nutau_cc, nutaubar_cc, or both are the group and other flavors # are present, nutau_cc_norm must be one! group_set = set(group) if group_set.intersection(nutaucc_and_nutaubarcc) and \ group_set.difference(nutaucc_and_nutaubarcc): service.nutau_cc_norm_must_be_one = True nutau_cc_norm = service.params.nutau_cc_norm.m_as('dimensionless') if nutau_cc_norm != 1 and service.nutau_cc_norm_must_be_one: raise ValueError( '`nutau_cc_norm` = %e but can only be != 1 if nutau CC and' ' nutaubar CC are separated from other flav/ints.' ' Transform groups are: %s' % (nutau_cc_norm, service.transform_groups) ) if hasattr(service, 'sum_grouped_flavints'): sum_grouped_flavints = service.sum_grouped_flavints else: sum_grouped_flavints = False new_transforms = [] for transform in service.nominal_transforms: this_scale = base_scale if service.particles == 'neutrinos': out_nfig = NuFlavIntGroup(transform.output_name) if 'nutau_cc' in out_nfig or 'nutaubar_cc' in out_nfig: this_scale *= nutau_cc_norm if this_scale != 1: aeff_transform = transform.xform_array * this_scale else: aeff_transform = transform.xform_array new_xform = BinnedTensorTransform( input_names=transform.input_names, output_name=transform.output_name, input_binning=transform.input_binning, output_binning=transform.output_binning, xform_array=aeff_transform, sum_inputs=sum_grouped_flavints ) new_transforms.append(new_xform) return TransformSet(new_transforms)
def _compute_nominal_transforms(self): """Compute cross-section transforms.""" logging.info('Updating xsec.genie cross-section histograms...') self.load_xsec_splines() livetime = self._ev_param(self.params['livetime'].value) ice_p = self._ev_param(self.params['ice_p'].value) fid_vol = self._ev_param(self.params['fid_vol'].value) mr_h20 = self._ev_param(self.params['mr_h20'].value) x_energy_scale = self.params['x_energy_scale'].value input_binning = self.input_binning ebins = input_binning.true_energy for idx, name in enumerate(input_binning.names): if 'true_energy' in name: e_idx = idx xsec_transforms = {} for flav in self.input_names: for int_ in ALL_NUINT_TYPES: flavint = flav + '_' + str(int_) logging.debug('Obtaining cross-sections for %s', flavint) xsec_map = self.xsec.get_map(flavint, MultiDimBinning([ebins]), x_energy_scale=x_energy_scale) def func(idx): if idx == e_idx: return xsec_map.hist return tuple(range(input_binning.shape[idx])) num_dims = input_binning.num_dims xsec_trns = np.meshgrid(*map(func, range(num_dims)), indexing='ij')[e_idx] xsec_trns *= (livetime * fid_vol * (ice_p / mr_h20) * (6.022140857e+23 / ureg.mol)) xsec_transforms[NuFlavInt(flavint)] = xsec_trns nominal_transforms = [] for flavint_group in self.transform_groups: flav_names = [str(flav) for flav in flavint_group.flavs] for input_name in self.input_names: if input_name not in flav_names: continue xform_array = [] for flavint in flavint_group.flavints: if flavint in xsec_transforms: xform_array.append(xsec_transforms[flavint]) xform_array = reduce(add, xform_array) xform = BinnedTensorTransform( input_names=input_name, output_name=str(flavint_group), input_binning=input_binning, output_binning=self.output_binning, xform_array=xform_array) nominal_transforms.append(xform) return TransformSet(transforms=nominal_transforms)
def _compute_transforms(self): """Generate reconstruction "smearing kernels" by histogramming true and reconstructed variables from a Monte Carlo events file. The resulting transform is a 2N-dimensional histogram, where N is the dimensionality of the input binning. The transform maps the truth bin counts to the reconstructed bin counts. I.e., for the case of 1D input binning, the ith element of the reconstruction kernel will be a map showing the distribution of events over all the reco space from truth bin i. This will be normalised to the total number of events in truth bin i. Notes ----- In the current implementation these histograms are made **UN**weighted. This is probably quite wrong... """ e_res_scale = self.params.e_res_scale.value.m_as('dimensionless') cz_res_scale = self.params.cz_res_scale.value.m_as('dimensionless') e_reco_bias = self.params.e_reco_bias.value.m_as('GeV') cz_reco_bias = self.params.cz_reco_bias.value.m_as('dimensionless') res_scale_ref = self.params.res_scale_ref.value.strip().lower() assert res_scale_ref in ['zero'] # TODO: , 'mean', 'median'] self.load_events(self.params.reco_events) self.cut_events(self.params.transform_events_keep_criteria) # Computational units must be the following for compatibility with # events file comp_units = dict(true_energy='GeV', true_coszen=None, true_azimuth='rad', reco_energy='GeV', reco_coszen=None, reco_azimuth='rad', pid=None) # Select only the units in the input/output binning for conversion # (can't pass more than what's actually there) in_units = { dim: unit for dim, unit in comp_units.items() if dim in self.input_binning } out_units = { dim: unit for dim, unit in comp_units.items() if dim in self.output_binning } # These binnings will be in the computational units defined above input_binning = self.input_binning.to(**in_units) output_binning = self.output_binning.to(**out_units) xforms = [] for xform_flavints in self.transform_groups: logging.debug("Working on %s reco kernels" % xform_flavints) repr_flavint = xform_flavints[0] true_energy = self.events[repr_flavint]['true_energy'] true_coszen = self.events[repr_flavint]['true_coszen'] reco_energy = self.events[repr_flavint]['reco_energy'] reco_coszen = self.events[repr_flavint]['reco_coszen'] e_reco_err = reco_energy - true_energy cz_reco_err = reco_coszen - true_coszen if self.params.res_scale_ref.value.strip().lower() == 'zero': self.events[repr_flavint]['reco_energy'] = ( true_energy + e_reco_err * e_res_scale + e_reco_bias) self.events[repr_flavint]['reco_coszen'] = ( true_coszen + cz_reco_err * cz_res_scale + cz_reco_bias) # True (input) + reco {+ PID} (output)-dimensional histogram # is the basis for the transformation reco_kernel = self.events.histogram( kinds=xform_flavints, binning=input_binning * output_binning, weights_col=self.params.reco_weights_name.value, errors=(self.error_method not in [None, False])) # Extract just the numpy array to work with reco_kernel = reco_kernel.hist # This takes into account the correct kernel normalization: # What this means is that we have to normalise the reco map # to the number of events in the truth bin. # # I.e., we have N events from the truth bin which then become # spread out over the whole map due to reconstruction. # The normalisation is dividing this map by N. # # Previously this was hard-coded for 2 dimensions, but I have tried # to generalise it to arbitrary dimensionality. # Truth-only (N-dimensional) histogram will be used for # normalization (so transform is in terms of fraction-of-events in # input--i.e. truth--bin). Sum over the input dimensions. true_event_counts = self.events.histogram( kinds=xform_flavints, binning=input_binning, weights_col=self.params.reco_weights_name.value, errors=(self.error_method not in [None, False])) # Extract just the numpy array to work with true_event_counts = true_event_counts.hist # If there weren't any events in the input (true_*) bin, make this # bin have no effect -- i.e., populate all output bins # corresponding to the input bin with zeros via `nan_to_num`. with np.errstate(divide='ignore', invalid='ignore'): true_event_counts[true_event_counts == 0] = np.nan norm_factors = 1.0 / true_event_counts norm_factors = np.nan_to_num(norm_factors) # Numpy broadcasts lower-dimensional things to higher dimensions # from last dimension to first; if we simply mult the reco_kernel # by norm_factors, this will apply the normalization to the # __output__ dimensions rather than the input dimensions. Add # "dummy" dimensions to norm_factors where we want the "extra # dimensions": at the end. for dim in self.output_binning: norm_factors = np.expand_dims(norm_factors, axis=-1) # Apply the normalization to the kernels reco_kernel *= norm_factors assert np.all(reco_kernel >= 0), \ 'number of elements less than 0 = %d' \ % np.sum(reco_kernel < 0) sum_over_axes = tuple(range(-len(self.output_binning), 0)) totals = np.sum(reco_kernel, axis=sum_over_axes) assert np.all( totals <= 1 + 1e-14), 'max = ' + str(np.max(totals) - 1) # Now populate this transform to each input for which it applies. if self.sum_grouped_flavints: xform_input_names = [] for input_name in self.input_names: input_flavs = NuFlavIntGroup(input_name) if len(set(xform_flavints).intersection(input_flavs)) > 0: xform_input_names.append(input_name) for output_name in self.output_names: if output_name not in xform_flavints: continue xform = BinnedTensorTransform( input_names=xform_input_names, output_name=output_name, input_binning=self.input_binning, output_binning=self.output_binning, xform_array=reco_kernel, sum_inputs=self.sum_grouped_flavints) xforms.append(xform) else: # NOTES: # * Output name is same as input name # * Use `self.input_binning` and `self.output_binning` so maps # are returned in user-defined units (rather than # computational units, which are attached to the non-`self` # versions of these binnings). for input_name in self.input_names: if input_name not in xform_flavints: continue xform = BinnedTensorTransform( input_names=input_name, output_name=input_name, input_binning=self.input_binning, output_binning=self.output_binning, xform_array=reco_kernel, ) xforms.append(xform) return TransformSet(transforms=xforms)
def _compute_transforms(self): """ Generate reconstruction "smearing kernels" by reading in a set of parameterisation functions from a json file. This should have the same dimensionality as the input binning i.e. if you have energy and coszenith input binning then the kernels provided should have both energy and coszenith resolution functions. Any superposition of distributions from scipy.stats is supported. """ res_scale_ref = self.params.res_scale_ref.value.strip().lower() assert res_scale_ref in ['zero'] # TODO: , 'mean', 'median'] reco_param_source = self.params.reco_paramfile.value if reco_param_source is None: raise ValueError( 'non-None reco parameterization params.reco_paramfile' ' must be provided') reco_param_hash = hash_obj(reco_param_source) if (self._reco_param_hash is None or reco_param_hash != self._reco_param_hash): reco_param = load_reco_param(reco_param_source) # Transform groups are implicitly defined by the contents of the # reco paramfile's keys implicit_transform_groups = reco_param.keys() # Make sure these match transform groups specified for the stage if set(implicit_transform_groups) != set(self.transform_groups): raise ValueError( 'Transform groups (%s) defined implicitly by' ' %s reco parameterizations do not match those' ' defined as the stage\'s `transform_groups` (%s).' % (implicit_transform_groups, reco_param_source, self.transform_groups)) self.param_dict = reco_param self._reco_param_hash = reco_param_hash self.eval_dict = self.evaluate_reco_param() self.reco_scales_and_biases_applicable() # everything seems to be fine, so rescale and shift distributions eval_dict = self.scale_and_shift_reco_dists() # Computational units must be the following for compatibility with # events file comp_units = dict(true_energy='GeV', true_coszen=None, true_azimuth='rad', reco_energy='GeV', reco_coszen=None, reco_azimuth='rad', pid=None) # Select only the units in the input/output binning for conversion # (can't pass more than what's actually there) in_units = { dim: unit for dim, unit in comp_units.items() if dim in self.input_binning } out_units = { dim: unit for dim, unit in comp_units.items() if dim in self.output_binning } # These binnings will be in the computational units defined above input_binning = self.input_binning.to(**in_units) output_binning = self.output_binning.to(**out_units) en_centers_in = self.input_binning[ 'true_energy'].weighted_centers.magnitude en_edges_in = self.input_binning['true_energy'].bin_edges.magnitude cz_centers_in = self.input_binning[ 'true_coszen'].weighted_centers.magnitude cz_edges_in = self.input_binning['true_coszen'].bin_edges.magnitude en_edges_out = self.output_binning['reco_energy'].bin_edges.magnitude cz_edges_out = self.output_binning['reco_coszen'].bin_edges.magnitude n_e_in = len(en_centers_in) n_cz_in = len(cz_centers_in) n_e_out = len(en_edges_out) - 1 n_cz_out = len(cz_edges_out) - 1 if self.coszen_flipback: cz_edges_out, flipback_mask, keep = \ self.extend_binning_for_coszen(ext_low=-3., ext_high=+3.) xforms = [] for xform_flavints in self.transform_groups: logging.debug("Working on %s reco kernel..." % xform_flavints) this_params = eval_dict[xform_flavints] reco_kernel = np.zeros((n_e_in, n_cz_in, n_e_out, n_cz_out)) for (i, j) in itertools.product(range(n_e_in), range(n_cz_in)): e_kern_cdf = self.make_cdf(bin_edges=en_edges_out, enval=en_centers_in[i], enindex=i, czval=None, czindex=j, dist_params=this_params['energy']) cz_kern_cdf = self.make_cdf(bin_edges=cz_edges_out, enval=en_centers_in[i], enindex=i, czval=cz_centers_in[j], czindex=j, dist_params=this_params['coszen']) if self.coszen_flipback: cz_kern_cdf = perform_coszen_flipback( cz_kern_cdf, flipback_mask, keep) reco_kernel[i, j] = np.outer(e_kern_cdf, cz_kern_cdf) # Sanity check of reco kernels - intolerable negative values? logging.trace(" Ensuring reco kernel sanity...") kern_neg_invalid = reco_kernel < -EQUALITY_PREC if np.any(kern_neg_invalid): raise ValueError("Detected intolerable negative entries in" " reco kernel! Min.: %.15e" % np.min(reco_kernel)) # Set values numerically compatible with zero to zero np.where((np.abs(reco_kernel) < EQUALITY_PREC), reco_kernel, 0) sum_over_axes = tuple(range(-len(self.output_binning), 0)) totals = np.sum(reco_kernel, axis=sum_over_axes) totals_large = totals > (1 + EQUALITY_PREC) if np.any(totals_large): raise ValueError("Detected overflow in reco kernel! Max.:" " %0.15e" % (np.max(totals))) if self.input_binning.basenames[0] == "coszen": # The reconstruction kernel has been set up with energy as its # first dimension, so swap axes if it is applied to an input # binning where 'coszen' is the first logging.trace(" Swapping kernel dimensions since 'coszen' has" " been requested as the first.") reco_kernel = np.swapaxes(reco_kernel, 0, 1) reco_kernel = np.swapaxes(reco_kernel, 2, 3) if self.sum_grouped_flavints: xform_input_names = [] for input_name in self.input_names: if set(NuFlavIntGroup(input_name)).isdisjoint( xform_flavints): continue xform_input_names.append(input_name) for output_name in self.output_names: if output_name not in xform_flavints: continue xform = BinnedTensorTransform( input_names=xform_input_names, output_name=output_name, input_binning=self.input_binning, output_binning=self.output_binning, xform_array=reco_kernel, sum_inputs=self.sum_grouped_flavints) xforms.append(xform) # If *not* combining grouped flavints: # Copy the transform for each input flavor, regardless if the # transform is computed from a combination of flavors. else: for input_name in self.input_names: if set(NuFlavIntGroup(input_name)).isdisjoint( xform_flavints): continue for output_name in self.output_names: if (output_name not in NuFlavIntGroup(input_name) or output_name not in xform_flavints): continue logging.trace(' input: %s, output: %s, xform: %s', input_name, output_name, xform_flavints) xform = BinnedTensorTransform( input_names=input_name, output_name=output_name, input_binning=self.input_binning, output_binning=self.output_binning, xform_array=reco_kernel, sum_inputs=self.sum_grouped_flavints) xforms.append(xform) return TransformSet(transforms=xforms)
def _compute_nominal_transforms(self): """Compute new PID transforms.""" logging.debug('Updating pid.param PID histograms...') self.load_pid_energy_param(self.params.pid_energy_paramfile.value) nominal_transforms = [] for xform_flavints in self.transform_groups: logging.debug('Working on %s PID', xform_flavints) xform_array = np.empty(self.transform_output_binning.shape) subdict = self.pid_energy_param_dict[xform_flavints] for signature, sig_param_func in subdict.items(): # Get the PID probabilities vs. energy at the energy bins' # (weighted) centers pid1d = sig_param_func(self.ebin_centers) # Broadcast this 1d array across the reco_coszen dimension # since it's independent of reco_coszen broadcasted_pid = self.transform_output_binning.broadcast( pid1d, from_dim='reco_energy', to_dims='reco_coszen') pid_indexer = (self.transform_output_binning.indexer( pid=signature)) # Assign the broadcasted array to the correct PID bin xform_array[pid_indexer] = broadcasted_pid if self.sum_grouped_flavints: xform_input_names = [] for input_name in self.input_names: input_flavs = NuFlavIntGroup(input_name) if set(xform_flavints).intersection(input_flavs): xform_input_names.append(input_name) for output_name in self.output_names: if output_name not in xform_flavints: continue xform = BinnedTensorTransform( input_names=xform_input_names, output_name=str(xform_flavints), input_binning=self.input_binning, output_binning=self.transform_output_binning, xform_array=xform_array, sum_inputs=self.sum_grouped_flavints) nominal_transforms.append(xform) else: for input_name in self.input_names: if input_name not in xform_flavints: continue xform = BinnedTensorTransform( input_names=input_name, output_name=input_name, input_binning=self.input_binning, output_binning=self.transform_output_binning, xform_array=xform_array, ) nominal_transforms.append(xform) return TransformSet(transforms=nominal_transforms)
def _compute_nominal_transforms(self): """Compute parameterised effective area transforms""" energy_param_source = self.params.aeff_energy_paramfile.value coszen_param_source = self.params.aeff_coszen_paramfile.value energy_param_hash = hash_obj(energy_param_source) coszen_param_hash = hash_obj(coszen_param_source) load_energy = False load_coszen = False if (self._param_hashes['energy'] is None or energy_param_hash != self._param_hashes['energy']): load_energy = True if (self.has_cz and (self._param_hashes['coszen'] is None or energy_param_hash != self._param_hashes)): load_coszen = True if energy_param_source is None: raise ValueError( 'non-None energy parameterization params.aeff_energy_paramfile' ' must be provided' ) if not self.has_cz and coszen_param_source is not None: raise ValueError( 'true_coszen dimension was not found in the binning but a' ' coszen parameterisation file has been provided by' ' `params.aeff_coszen_paramfile`.' ) if not (load_energy or load_coszen): return dims = ['energy', 'coszen'] loads = [load_energy, load_coszen] sources = [energy_param_source, coszen_param_source] hashes = [energy_param_hash, coszen_param_hash] for dim, load, source, hash_ in zip(dims, loads, sources, hashes): if not load: continue self._param_hashes[dim] = None self.aeff_params[dim] = None params = load_aeff_param(source) # Transform groups are implicitly defined by the contents of the # `pid_energy_paramfile`'s keys implicit_transform_groups = params.keys() # Make sure these match transform groups specified for the stage if set(implicit_transform_groups) != set(self.transform_groups): raise ValueError( 'Transform groups (%s) defined implicitly by' ' %s aeff parameterizations "%s" do not match those' ' defined as the stage\'s `transform_groups` (%s).' % (implicit_transform_groups, dim, source, self.transform_groups) ) self.aeff_params[dim] = params self._param_hashes[dim] = hash_ nominal_transforms = [] for xform_flavints in self.transform_groups: logging.debug('Working on %s effective areas xform', xform_flavints) energy_param_func = self.aeff_params['energy'][xform_flavints] coszen_param_func = None if self.aeff_params['coszen'] is not None: coszen_param_func = self.aeff_params['coszen'][xform_flavints] # Now calculate the 1D aeff along energy aeff_vs_e = energy_param_func(self.ecen) # NOTE/TODO: Below is taken from the PISA 2 implementation of this. # Almost certainly comes from the fact that the highest knot there # was 79.5 GeV with the upper energy bin edge being 80 GeV. There's # probably something better that could be done here... # Correct for final energy bin, since interpolation does not # extend to JUST right outside the final bin if aeff_vs_e[-1] == 0: aeff_vs_e[-1] = aeff_vs_e[-2] if self.has_cz: aeff_vs_e = self.input_binning.broadcast( aeff_vs_e, from_dim='true_energy', to_dims='true_coszen' ) if coszen_param_func is not None: aeff_vs_cz = coszen_param_func(self.czcen) # Normalize aeff_vs_cz *= len(aeff_vs_cz) / np.sum(aeff_vs_cz) else: aeff_vs_cz = np.ones(shape=len(self.czcen)) cz_broadcasted = self.input_binning.broadcast( aeff_vs_cz, from_dim='true_coszen', to_dims='true_energy' ) aeff_transform = aeff_vs_e * cz_broadcasted else: aeff_transform = aeff_vs_e nominal_transforms.extend( populate_transforms( service=self, xform_flavints=xform_flavints, xform_array=aeff_transform ) ) return TransformSet(transforms=nominal_transforms)