def test_resource_with_model_storage(default_model_storage: ModelStorage, tmp_path: Path, temp_cache: TrainingCache): node_name = "some node" resource = Resource(node_name) test_filename = "persisted_model.json" test_content = {"epochs": 500} with default_model_storage.write_to(resource) as temporary_directory: rasa.shared.utils.io.dump_obj_as_json_to_file( temporary_directory / test_filename, test_content) test_fingerprint_key = uuid.uuid4().hex test_output_fingerprint_key = uuid.uuid4().hex temp_cache.cache_output( test_fingerprint_key, resource, test_output_fingerprint_key, default_model_storage, ) new_model_storage_location = tmp_path / "new_model_storage" new_model_storage_location.mkdir() new_model_storage = LocalModelStorage(new_model_storage_location) restored_resource = temp_cache.get_cached_result( test_output_fingerprint_key, node_name, new_model_storage) assert isinstance(restored_resource, Resource) assert restored_resource == restored_resource with new_model_storage.read_from(restored_resource) as temporary_directory: cached_content = rasa.shared.utils.io.read_json_file( temporary_directory / test_filename) assert cached_content == test_content
def test_write_to_and_read(default_model_storage: ModelStorage): test_filename = "file.txt" test_file_content = "hi" test_sub_filename = "sub_file" test_sub_dir_name = "sub_directory" test_sub_file_content = "sub file" resource = Resource("some_node123") # Fill model storage for resource with default_model_storage.write_to(resource) as resource_directory: file = resource_directory / test_filename file.write_text(test_file_content) sub_directory = resource_directory / test_sub_dir_name sub_directory.mkdir() file_in_sub_directory = sub_directory / test_sub_filename file_in_sub_directory.write_text(test_sub_file_content) # Read written resource data from model storage to see whether all expected # content is there with default_model_storage.read_from(resource) as resource_directory: assert (resource_directory / test_filename).read_text() == test_file_content assert (resource_directory / test_sub_dir_name / test_sub_filename).read_text() == test_sub_file_content
def test_loading_from_resource_eager(default_model_storage: ModelStorage): previous_resource = Resource("previous resource") test_value = {"test": "test value"} # Pretend resource persisted itself before with default_model_storage.write_to(previous_resource) as directory: rasa.shared.utils.io.dump_obj_as_json_to_file(directory / "test.json", test_value) node_name = "some_name" node = GraphNode( node_name=node_name, component_class=PersistableTestComponent, constructor_name="load", component_config={}, fn_name="run_inference", inputs={}, eager=True, model_storage=default_model_storage, # The `GraphComponent` should load from this resource resource=previous_resource, execution_context=ExecutionContext(GraphSchema({}), "123"), ) actual_node_name, value = node() assert actual_node_name == node_name assert value == test_value
def from_cache(cls, node_name: Text, directory: Path, model_storage: ModelStorage) -> Resource: """Loads a `Resource` from the cache. This automatically loads the persisted resource into the given `ModelStorage`. Args: node_name: The node name of the `Resource`. directory: The directory with the cached `Resource`. model_storage: The `ModelStorage` which the cached `Resource` will be added to so that the `Resource` is accessible for other graph nodes. Returns: The ready-to-use and accessible `Resource`. """ logger.debug(f"Loading resource '{node_name}' from cache.") resource = Resource(node_name) with model_storage.write_to(resource) as resource_directory: rasa.utils.common.copy_directory(directory, resource_directory) logger.debug( f"Successfully initialized resource '{node_name}' from cache.") return resource
def test_read_from_not_existing_resource(default_model_storage: ModelStorage): with default_model_storage.write_to( Resource("resource1")) as temporary_directory: file = temporary_directory / "file.txt" file.write_text("test") with pytest.raises(ValueError): with default_model_storage.read_from( Resource("a different resource")) as _: pass
def from_cache( cls, node_name: Text, directory: Path, model_storage: ModelStorage, output_fingerprint: Text, ) -> Resource: """Loads a `Resource` from the cache. This automatically loads the persisted resource into the given `ModelStorage`. Args: node_name: The node name of the `Resource`. directory: The directory with the cached `Resource`. model_storage: The `ModelStorage` which the cached `Resource` will be added to so that the `Resource` is accessible for other graph nodes. output_fingerprint: The fingerprint of the cached `Resource`. Returns: The ready-to-use and accessible `Resource`. """ logger.debug(f"Loading resource '{node_name}' from cache.") resource = Resource(node_name, output_fingerprint=output_fingerprint) if not any(directory.glob("*")): logger.debug(f"Cached resource for '{node_name}' was empty.") return resource try: with model_storage.write_to(resource) as resource_directory: rasa.utils.common.copy_directory(directory, resource_directory) except ValueError: # This might happen during finetuning as in this case the model storage # is already filled if not rasa.utils.io.are_directories_equal(directory, resource_directory): # We skip caching in case we see the cached output and output # from the model which we want to finetune are not the same raise logger.debug( f"Successfully initialized resource '{node_name}' from cache.") return resource
def create( cls, config: Dict[Text, Any], model_storage: ModelStorage, resource: Resource, execution_context: ExecutionContext, ) -> RuleOnlyDataProvider: """Creates component (see parent class for docstring).""" rule_only_data = {} try: with model_storage.write_to(resource) as directory: rule_only_data = rasa.shared.utils.io.read_json_file( directory / "rule_only_data.json" ) except ValueError: logger.debug( "Failed to load rule-only data from a trained 'RulePolicy'. " "Providing empty rule-only data instead." ) return cls(rule_only_data)
def test_load_from_untrained_but_with_resource_existing( default_model_storage: ModelStorage, default_execution_context: ExecutionContext, mitie_model: MitieModel, ): resource = Resource("some_resource") with default_model_storage.write_to(resource): # This makes sure the directory exists but the model file itself doesn't pass component = MitieIntentClassifierGraphComponent.load( MitieIntentClassifierGraphComponent.get_default_config(), default_model_storage, resource, default_execution_context, ) test_message = Message({TEXT: "hi"}) MitieTokenizer().process(test_message) component.process([test_message], mitie_model) assert test_message.data[INTENT] == {"name": None, "confidence": 0.0}