async def model_fingerprint( file_importer: "TrainingDataImporter") -> Fingerprint: """Create a model fingerprint from its used configuration and training data. Args: file_importer: File importer which provides the training data and model config. Returns: The fingerprint. """ from rasa.core.domain import Domain import rasa import time # bf mod # config = await file_importer.get_config() domain = await file_importer.get_domain() # stories = await file_importer.get_stories() stories_hash = await file_importer.get_stories_hash() nlu_data = await file_importer.get_nlu_data() nlu_config = await file_importer.get_nlu_config() core_config = await file_importer.get_core_config() domain_dict = domain.as_dict() templates = domain_dict.pop("responses") domain_without_nlg = Domain.from_dict(domain_dict) return { FINGERPRINT_CONFIG_KEY: _get_hash_of_config(core_config, exclude_keys=CONFIG_MANDATORY_KEYS), FINGERPRINT_CONFIG_CORE_KEY: _get_hash_of_config(core_config, include_keys=CONFIG_MANDATORY_KEYS_CORE), FINGERPRINT_CONFIG_NLU_KEY: { lang: _get_hash_of_config(config, include_keys=CONFIG_MANDATORY_KEYS_NLU) for (lang, config) in nlu_config.items() }, FINGERPRINT_DOMAIN_WITHOUT_NLG_KEY: hash(domain_without_nlg), FINGERPRINT_NLG_KEY: get_dict_hash(templates), FINGERPRINT_NLU_DATA_KEY: {lang: hash(nlu_data[lang]) for lang in nlu_data}, FINGERPRINT_STORIES_KEY: stories_hash, FINGERPRINT_TRAINED_AT_KEY: time.time(), FINGERPRINT_RASA_VERSION_KEY: rasa.__version__, }
async def model_fingerprint( file_importer: "TrainingDataImporter") -> Fingerprint: """Create a model fingerprint from its used configuration and training data. Args: file_importer: File importer which provides the training data and model config. Returns: The fingerprint. """ import time # bf mod config = await file_importer.get_config() domain = await file_importer.get_domain() stories_hash = await file_importer.get_stories_hash() nlu_data = await file_importer.get_nlu_data() nlu_config = await file_importer.get_nlu_config() responses = domain.templates # Do a copy of the domain to not change the actual domain (shallow is enough) domain = copy.copy(domain) # don't include the response texts in the fingerprint. # Their fingerprint is separate. domain.templates = [] return { FINGERPRINT_CONFIG_KEY: _get_hash_of_config(config, exclude_keys=CONFIG_KEYS), FINGERPRINT_CONFIG_CORE_KEY: _get_hash_of_config(config, include_keys=CONFIG_KEYS_CORE), FINGERPRINT_CONFIG_NLU_KEY: { lang: _get_hash_of_config(config, include_keys=CONFIG_KEYS_NLU) for (lang, config) in nlu_config.items() } if len(nlu_config) else "", FINGERPRINT_DOMAIN_WITHOUT_NLG_KEY: hash(domain), FINGERPRINT_NLG_KEY: get_dict_hash(responses), FINGERPRINT_PROJECT: project_fingerprint(), FINGERPRINT_NLU_DATA_KEY: {lang: hash(nlu_data[lang]) for lang in nlu_data}, FINGERPRINT_STORIES_KEY: stories_hash, FINGERPRINT_TRAINED_AT_KEY: time.time(), FINGERPRINT_RASA_VERSION_KEY: rasa.__version__, # pytype: disable=module-attr }
def _get_hash_of_config( config: Optional[Dict], include_keys: Optional[List[Text]] = None, exclude_keys: Optional[List[Text]] = None, ) -> Text: if not config: return "" keys = include_keys or list(filter(lambda k: k not in exclude_keys, config.keys())) sub_config = {k: config[k] for k in keys if k in config} return get_dict_hash(sub_config)
async def model_fingerprint( file_importer: "TrainingDataImporter") -> Fingerprint: """Create a model fingerprint from its used configuration and training data. Args: file_importer: File importer which provides the training data and model config. Returns: The fingerprint. """ import time config = await file_importer.get_config() domain = await file_importer.get_domain() stories = await file_importer.get_stories() nlu_data = await file_importer.get_nlu_data() responses = domain.templates # don't include the response texts in the fingerprint. # Their fingerprint is separate. domain.templates = [] return { FINGERPRINT_CONFIG_KEY: _get_hash_of_config(config, exclude_keys=CONFIG_KEYS), FINGERPRINT_CONFIG_CORE_KEY: _get_hash_of_config(config, include_keys=CONFIG_KEYS_CORE), FINGERPRINT_CONFIG_NLU_KEY: _get_hash_of_config(config, include_keys=CONFIG_KEYS_NLU), FINGERPRINT_DOMAIN_WITHOUT_NLG_KEY: hash(domain), FINGERPRINT_NLG_KEY: get_dict_hash(responses), FINGERPRINT_PROJECT: project_fingerprint(), FINGERPRINT_NLU_DATA_KEY: hash(nlu_data), FINGERPRINT_STORIES_KEY: hash(stories), FINGERPRINT_TRAINED_AT_KEY: time.time(), FINGERPRINT_RASA_VERSION_KEY: rasa.__version__, # pytype: disable=module-attr }
def _get_hash_of_config( config_path: Text, include_keys: Optional[List[Text]] = None, exclude_keys: Optional[List[Text]] = None, ) -> Text: if not config_path or not os.path.exists(config_path): return "" try: config_dict = rasa.utils.io.read_config_file(config_path) keys = include_keys or list( filter(lambda k: k not in exclude_keys, config_dict.keys())) sub_config = dict( (k, config_dict[k]) for k in keys if k in config_dict) return get_dict_hash(sub_config) except yaml.parser.ParserError as e: logger.debug("Failed to read config file '{}'. Error: {}".format( config_path, e)) return ""
async def test_create_fingerprint_from_invalid_paths(project, project_files): from rasa.nlu.training_data import TrainingData from rasa.core.training.structures import StoryGraph project_files = _project_files(project, *project_files) expected = _fingerprint( config="", config_nlu="", config_core="", domain=hash(Domain.empty()), nlg=get_dict_hash(Domain.empty().templates), stories=0, nlu={}, # bf rasa_version=rasa.__version__, ) actual = await model_fingerprint(project_files) assert actual[FINGERPRINT_TRAINED_AT_KEY] is not None del actual[FINGERPRINT_TRAINED_AT_KEY] del expected[FINGERPRINT_TRAINED_AT_KEY] assert actual == expected