def hashable_state(self): """OrderedDict : State of the objec that can be used for hashing""" state = super(BinnedTensorTransform, self).hashable_state state['xform_array'] = normQuant(self.nominal_values, sigfigs=HASH_SIGFIGS) state['error_array'] = normQuant(self.std_devs, sigfigs=HASH_SIGFIGS) return state
def _derive_nominal_transforms_hash(self): """Derive a hash to uniquely identify the nominal transform. This should be unique across processes and invocations bacuase the nominal transforms can be non-volatile (cached to disk) and must still be valid given their hash value upon loading from disk in the future. This implementation uses the nominal parameter values' hash combined with the source code hash to generate the final nominal transforms hash. Notes ----- The hashing scheme implemented here might be sufficiently unique for many cases, but override this method in services according to the following guidelines: * Stages that use a nominal transform should override this method if the hash is more accurately computed differently from here. * Stages that use transforms but do not use nominal transforms can override this method with a simpler version that simply returns None to save computation time (if this method is found to be a significant performance hit). (This method is called each time an output is computed if `self.use_transforms == True`.) * Stages that use no transforms (i.e., `self.use_transforms == False`) will not call any built-in methods related to transforms, so overriding this method is irrelevant to such stages. If this method *is* overridden (and not just to return None), since the nominal transform may be stored to a disk cache, make sure that `self.source_code_hash` is included in the objects used to compute the final hash value. Even if all parameters are the same, a nominal transform stored to disk is ***invalid if the source code changes***, and `_derive_nominal_transforms_hash` must reflect this. """ id_objects = [] id_objects.append(self.params.nominal_values_hash) for attr in sorted(self._attrs_to_hash): val = getattr(self, attr) if hasattr(val, "hash"): attr_hash = val.hash elif self.full_hash: norm_val = normQuant(val) attr_hash = hash_obj(norm_val, full_hash=self.full_hash) else: attr_hash = hash_obj(val, full_hash=self.full_hash) id_objects.append(attr_hash) id_objects.append(self.source_code_hash) # If any hashes are missing (i.e, None), invalidate the entire hash if any([(h is None) for h in id_objects]): nominal_transforms_hash = None else: nominal_transforms_hash = hash_obj(id_objects, full_hash=self.full_hash) return nominal_transforms_hash
def _derive_transforms_hash(self, nominal_transforms_hash=None): """Compute a hash that uniquely identifies the transforms that will be produced from the current configuration. Note that this hash needs only to be valid for this run (i.e., it is a volatile hash). This implementation returns a hash from the current parameters' values. """ id_objects = [] h = self.params.values_hash logging.trace("self.params.values_hash = %s" % h) id_objects.append(h) # Grab any provided nominal transforms hash, or derive it again if nominal_transforms_hash is None: nominal_transforms_hash = self._derive_nominal_transforms_hash() # If a valid hash has been gotten, include it if nominal_transforms_hash is not None: id_objects.append(nominal_transforms_hash) for attr in sorted(self._attrs_to_hash): val = getattr(self, attr) if hasattr(val, "hash"): attr_hash = val.hash elif self.full_hash: norm_val = normQuant(val) attr_hash = hash_obj(norm_val, full_hash=self.full_hash) else: attr_hash = hash_obj(val, full_hash=self.full_hash) id_objects.append(attr_hash) # If any hashes are missing (i.e, None), invalidate the entire hash if any([(h is None) for h in id_objects]): transforms_hash = None else: transforms_hash = hash_obj(id_objects, full_hash=self.full_hash) return transforms_hash, nominal_transforms_hash
def _derive_outputs_hash(self): """Derive a hash value that unique identifies the outputs that will be generated based upon the current state of the stage. This implementation hashes together: * Input and output binning objects' hash values (if either input or output binning is not None) * Current params' values hash * Hashes from any input objects with names in `self.input_names` If any of the above objects is specified but returns None for its hash value, the entire output hash is invalidated, and None is returned. """ id_objects = [] # If stage uses inputs, grab hash from the inputs container object if self.outputs_cache is not None and len(self.input_names) > 0: inhash = self.inputs.hash logging.trace("inputs.hash = %s" % inhash) id_objects.append(inhash) # If stage uses transforms, get hash from the transforms transforms_hash = None if self.use_transforms: transforms_hash, nominal_transforms_hash = self._derive_transforms_hash() id_objects.append(transforms_hash) logging.trace("derived transforms hash = %s" % id_objects[-1]) # Otherwise, generate sub-hash on binning and param values here else: transforms_hash, nominal_transforms_hash = None, None if self.outputs_cache is not None: id_subobjects = [] # Include all parameter values id_subobjects.append(self.params.values_hash) # Include additional attributes of this object for attr in sorted(self._attrs_to_hash): val = getattr(self, attr) if hasattr(val, "hash"): attr_hash = val.hash elif self.full_hash: norm_val = normQuant(val) attr_hash = hash_obj(norm_val, full_hash=self.full_hash) else: attr_hash = hash_obj(val, full_hash=self.full_hash) id_subobjects.append(attr_hash) # Generate the "sub-hash" if any([(h is None) for h in id_subobjects]): sub_hash = None else: sub_hash = hash_obj(id_subobjects, full_hash=self.full_hash) id_objects.append(sub_hash) # If any hashes are missing (i.e, None), invalidate the entire hash if self.outputs_cache is None or any([(h is None) for h in id_objects]): outputs_hash = None else: outputs_hash = hash_obj(id_objects, full_hash=self.full_hash) return outputs_hash, transforms_hash, nominal_transforms_hash
def update_hash(self): """Update the cached hash value""" self._hash = hash_obj(normQuant(self.metadata))