def _resolve_memory(step_uuid: str, consumer: str = None) -> Dict[str, Any]: """Returns information of the most recent write to memory. Resolves the timestamp via the `create_time` attribute from the info of the plasma store. It also sets the arguments to call the :func:`get_output_memory` method with. Args: step_uuid: The UUID of the step to resolve its most recent write to memory. consumer: The consumer of the output data. This is put inside the metadata of an empty object to trigger a notification in the plasma store, which is then used to manage eviction of objects. Returns: Dictionary containing the information of the function to be called to get the most recent data from the step. Additionally, returns fill-in arguments for the function and metadata related to the data that would be retrieved. Raises: MemoryOutputNotFoundError: If output from `step_uuid` cannot be found. OrchestNetworkError: Could not connect to the ``Config.STORE_SOCKET_NAME``, because it does not exist. Which might be because the specified value was wrong or the store died. """ client = _PlasmaConnector().client obj_id = _convert_uuid_to_object_id(step_uuid) # get metadata of the object if it exists metadata = client.get_metadata([obj_id], timeout_ms=0) metadata = metadata[0] if metadata is None: raise error.MemoryOutputNotFoundError( f'Output from incoming step "{step_uuid}" cannot be found. ' "Try rerunning it.") # this is a pyarrow.Buffer, gotta make it into pybytes to decode, # not much overhead given that this is just metadata metadata = metadata.to_pybytes() metadata = _interpret_metadata(metadata.decode("utf-8")) timestamp, serialization, name = metadata res = { "method_to_call": _get_output_memory, "method_args": (step_uuid, ), "method_kwargs": { "consumer": consumer }, "metadata": { "timestamp": timestamp, "serialization": serialization, "name": name, }, } return res
def _get_output_memory(step_uuid: str, consumer: Optional[str] = None) -> Any: """Gets data from memory. Args: step_uuid: The UUID of the step to get output data from. consumer: The consumer of the output data. This is put inside the metadata of an empty object to trigger a notification in the plasma store, which is then used to manage eviction of objects. Returns: Data from step identified by `step_uuid`. Raises: DeserializationError: If the data could not be deserialized. MemoryOutputNotFoundError: If output from `step_uuid` cannot be found. OrchestNetworkError: Could not connect to the ``Config.STORE_SOCKET_NAME``, because it does not exist. Which might be because the specified value was wrong or the store died. """ client = _PlasmaConnector().client obj_id = _convert_uuid_to_object_id(step_uuid) try: obj = _deserialize_output_memory(obj_id, client) except error.ObjectNotFoundError: raise error.MemoryOutputNotFoundError( f'Output from incoming step "{step_uuid}" cannot be found. ' "Try rerunning it.") # IOError is to try to catch pyarrow deserialization errors. except (pickle.UnpicklingError, IOError): raise error.DeserializationError( f'Output from incoming step "{step_uuid}" could not be deserialized.' ) else: # TODO: note somewhere (maybe in the docstring) that it might # although very unlikely raise MemoryError, because the # receive is now actually also outputing data. # NOTE: the "ORCHEST_MEMORY_EVICTION" ENV variable is set in the # orchest-api. Now we always know when we are running inside a # jupyter kernel interactively. And in that case we never want # to do eviction. if os.getenv("ORCHEST_MEMORY_EVICTION") is not None: empty_obj, _ = _serialize("") msg = f"{Config.IDENTIFIER_EVICTION};{step_uuid},{consumer}" metadata = bytes(msg, "utf-8") _output_to_memory(empty_obj, client, metadata=metadata) return obj