def conn_hash(self): res = 0 for a in self.attributes(): attr = getattr(self, a) if (attr is not None): res += smvhash(attr) return res
def _sourceHash(module): src = inspect.getsource(module) src_no_comm = _stripComments(src) # DO NOT use the compiled byte code for the hash computation as # it doesn't change when constant values are changed. For example, # "a = 5" and "a = 6" compile to same byte code. # co_code = compile(src, inspect.getsourcefile(cls), 'exec').co_code return smvhash(src_no_comm)
def _config_hash(self): """Integer value representing the SMV config's contribution to the dataset hash Only the keys declared in requiresConfig will be considered. """ kvs = [(k, self.smvGetRunConfig(k)) for k in self.requiresConfig()] # the config_hash should change IFF the config changes # sort keys to ensure config hash is independent from key order sorted_kvs = sorted(kvs) # we need a unique string representation of sorted_kvs to hash # repr should change iff sorted_kvs changes kv_str = repr(sorted_kvs) return smvhash(kv_str)
def _file_hash(self, path, msg): _file_path_hash = smvhash(path) smv.logger.debug("{} {} file path hash: {}".format( self.fqn(), msg, _file_path_hash)) # It is possible that the file doesn't exist try: _m_time = self.smvApp._jvm.SmvHDFS.modificationTime(path) except Py4JJavaError: _m_time = 0 smv.logger.debug("{} {} file mtime: {}".format(self.fqn(), msg, _m_time)) res = _file_path_hash + _m_time return res
def instanceValHash(self): """Hash of file with schema include data file hash (path and mtime), and schema hash (userSchema or schema file) """ _data_file_hash = self._file_hash(self._full_path(), "data") smv.logger.debug("{} data file hash: {}".format( self.fqn(), _data_file_hash)) if (self.userSchema() is not None): _schema_hash = smvhash(self.userSchema()) else: _schema_hash = self._file_hash(self._full_schema_path(), "schema") smv.logger.debug("{} schema hash: {}".format(self.fqn(), _schema_hash)) res = _data_file_hash + _schema_hash return res
def _sourceCodeHash(self): """Hash computed based on the source code of and config, lib usage Adding config and lib to base class's soruce code hash """ res = super(SmvProcessModule, self)._sourceCodeHash() cls = self.__class__ # incorporate hash of KVs for config keys listed in requiresConfig config_hash = self._config_hash() smv.logger.debug("{} config_hash: {}".format(self.fqn(), config_hash)) res += config_hash # iterate through libs/modules that this DataSet depends on and use their source towards hash as well for lib in self.requiresLib(): # It is possible that inspect.getsource raises # (Python 2) "IOError: source code not available" # (Python 3) "TypeError" # for c-lib, e.g. time try: lib_src_hash = _sourceHash(lib) except (IOError, TypeError) as e: lib_src_hash = 0 smv.logger.debug("{} sourceHash: {}".format( lib.__name__, lib_src_hash)) res += lib_src_hash # if module has high order historical validation rules, add their hash to sum. # they key() of a validator should change if its parameters change. if hasattr(cls, "_smvHistoricalValidatorsList"): keys_hash = [ smvhash(v._key()) for v in cls._smvHistoricalValidatorsList ] historical_keys_hash = sum(keys_hash) smv.logger.debug( "{} historical keys hash: {}".format(historical_keys_hash)) res += historical_keys_hash return res
def _sourceCodeHash(self): """Hash computed based on the source code of and config, lib usage Adding config and lib to base class's soruce code hash """ res = super(SmvProcessModule, self)._sourceCodeHash() cls = self.__class__ # incorporate hash of KVs for config keys listed in requiresConfig config_hash = self._config_hash() smv.logger.debug("{} config_hash: {}".format(self.fqn(), config_hash)) res += config_hash # iterate through libs/modules that this DataSet depends on and use their source towards hash as well for lib in self.requiresLib(): # It is possible that inspect.getsource raises # (Python 2) "IOError: source code not available" # (Python 3) "TypeError" # for c-lib, e.g. time try: lib_src_hash = _sourceHash(lib) except (IOError, TypeError) as e: lib_src_hash = 0 smv.logger.debug("{} sourceHash: {}".format(lib.__name__, lib_src_hash)) res += lib_src_hash # if module has high order historical validation rules, add their hash to sum. # they key() of a validator should change if its parameters change. if hasattr(cls, "_smvHistoricalValidatorsList"): keys_hash = [smvhash(v._key()) for v in cls._smvHistoricalValidatorsList] historical_keys_hash = sum(keys_hash) smv.logger.debug("{} historical keys hash: {}".format(historical_keys_hash)) res += historical_keys_hash return res
def fileNameHash(self): res = smvhash(self.fileName()) return res
def tableNameHash(self): res = smvhash(self.tableName()) return res