示例#1
0
def _resolve_memory(step_uuid: str, consumer: str = None) -> Dict[str, Any]:
    """Returns information of the most recent write to memory.

    Resolves the timestamp via the `create_time` attribute from the info
    of the plasma store. It also sets the arguments to call the
    :func:`get_output_memory` method with.

    Args:
        step_uuid: The UUID of the step to resolve its most recent write
            to memory.
        consumer: The consumer of the output data. This is put inside
            the metadata of an empty object to trigger a notification in
            the plasma store, which is then used to manage eviction of
            objects.

    Returns:
        Dictionary containing the information of the function to be
        called to get the most recent data from the step. Additionally,
        returns fill-in arguments for the function and metadata
        related to the data that would be retrieved.

    Raises:
        MemoryOutputNotFoundError: If output from `step_uuid` cannot be
            found.
        OrchestNetworkError: Could not connect to the
            ``Config.STORE_SOCKET_NAME``, because it does not exist.
            Which might be because the specified value was wrong or the
            store died.
    """
    client = _PlasmaConnector().client

    obj_id = _convert_uuid_to_object_id(step_uuid)

    # get metadata of the object if it exists
    metadata = client.get_metadata([obj_id], timeout_ms=0)
    metadata = metadata[0]
    if metadata is None:
        raise error.MemoryOutputNotFoundError(
            f'Output from incoming step "{step_uuid}" cannot be found. '
            "Try rerunning it.")
    # this is a pyarrow.Buffer, gotta make it into pybytes to decode,
    # not much overhead given that this is just metadata
    metadata = metadata.to_pybytes()
    metadata = _interpret_metadata(metadata.decode("utf-8"))
    timestamp, serialization, name = metadata

    res = {
        "method_to_call": _get_output_memory,
        "method_args": (step_uuid, ),
        "method_kwargs": {
            "consumer": consumer
        },
        "metadata": {
            "timestamp": timestamp,
            "serialization": serialization,
            "name": name,
        },
    }
    return res
示例#2
0
def _get_output_memory(step_uuid: str, consumer: Optional[str] = None) -> Any:
    """Gets data from memory.

    Args:
        step_uuid: The UUID of the step to get output data from.
        consumer: The consumer of the output data. This is put inside
            the metadata of an empty object to trigger a notification in
            the plasma store, which is then used to manage eviction of
            objects.

    Returns:
        Data from step identified by `step_uuid`.

    Raises:
        DeserializationError: If the data could not be deserialized.
        MemoryOutputNotFoundError: If output from `step_uuid` cannot be
            found.
        OrchestNetworkError: Could not connect to the
            ``Config.STORE_SOCKET_NAME``, because it does not exist.
            Which might be because the specified value was wrong or the
            store died.
    """
    client = _PlasmaConnector().client

    obj_id = _convert_uuid_to_object_id(step_uuid)
    try:
        obj = _deserialize_output_memory(obj_id, client)

    except error.ObjectNotFoundError:
        raise error.MemoryOutputNotFoundError(
            f'Output from incoming step "{step_uuid}" cannot be found. '
            "Try rerunning it.")
    # IOError is to try to catch pyarrow deserialization errors.
    except (pickle.UnpicklingError, IOError):
        raise error.DeserializationError(
            f'Output from incoming step "{step_uuid}" could not be deserialized.'
        )
    else:
        # TODO: note somewhere (maybe in the docstring) that it might
        #       although very unlikely raise MemoryError, because the
        #       receive is now actually also outputing data.
        # NOTE: the "ORCHEST_MEMORY_EVICTION" ENV variable is set in the
        # orchest-api. Now we always know when we are running inside a
        # jupyter kernel interactively. And in that case we never want
        # to do eviction.
        if os.getenv("ORCHEST_MEMORY_EVICTION") is not None:
            empty_obj, _ = _serialize("")
            msg = f"{Config.IDENTIFIER_EVICTION};{step_uuid},{consumer}"
            metadata = bytes(msg, "utf-8")
            _output_to_memory(empty_obj, client, metadata=metadata)

    return obj