Пример #1
0
    def _is_eligible_previous_execution(
            self, current_execution: metadata_store_pb2.Execution,
            target_execution: metadata_store_pb2.Execution) -> bool:
        """Compare if the previous execution is same as current execution.

    This method will ignore ID and time related fields.

    Args:
      current_execution: the current execution.
      target_execution: the previous execution to be compared with.

    Returns:
      whether the previous and current executions are the same.
    """
        current_execution.properties['run_id'].string_value = ''
        target_execution.properties['run_id'].string_value = ''
        current_execution.id = target_execution.id
        # Skip comparing time sensitive fields.
        # The execution might not have the create_time_since_epoch or
        # create_time_since_epoch field if the execution is created by an old
        # version before this field is introduced.
        if hasattr(current_execution, 'create_time_since_epoch'):
            current_execution.ClearField('create_time_since_epoch')
        if hasattr(target_execution, 'create_time_since_epoch'):
            target_execution.ClearField('create_time_since_epoch')
        if hasattr(current_execution, 'last_update_time_since_epoch'):
            current_execution.ClearField('last_update_time_since_epoch')
        if hasattr(target_execution, 'last_update_time_since_epoch'):
            target_execution.ClearField('last_update_time_since_epoch')
        return current_execution == target_execution
Пример #2
0
    def _update_execution_proto(
        self,
        execution: metadata_store_pb2.Execution,
        pipeline_info: Optional[data_types.PipelineInfo] = None,
        component_info: Optional[data_types.ComponentInfo] = None,
        state: Optional[Text] = None,
        exec_properties: Optional[Dict[Text, Any]] = None,
    ) -> metadata_store_pb2.Execution:
        """Updates the execution proto with given type and state."""
        if state is not None:
            execution.properties[
                _EXECUTION_TYPE_KEY_STATE].string_value = tf.compat.as_text(
                    state)
        # Forward-compatible change to leverage built-in schema to track states.
        if state == EXECUTION_STATE_CACHED:
            execution.last_known_state = metadata_store_pb2.Execution.CACHED
        elif state == EXECUTION_STATE_COMPLETE:
            execution.last_known_state = metadata_store_pb2.Execution.COMPLETE
        elif state == EXECUTION_STATE_NEW:
            execution.last_known_state = metadata_store_pb2.Execution.RUNNING

        exec_properties = exec_properties or {}
        # TODO(ruoyu): Enforce a formal rule for execution schema change.
        for k, v in exec_properties.items():
            # We always convert execution properties to unicode.
            execution.properties[k].string_value = tf.compat.as_text(
                tf.compat.as_str_any(v))
        # We also need to checksum UDF file to identify different binary being
        # used. Do we have a better way to checksum a file than hashlib.md5?
        # TODO(ruoyu): Find a better place / solution to the checksum logic.
        # TODO(ruoyu): SHA instead of MD5.
        if 'module_file' in exec_properties and exec_properties[
                'module_file'] and fileio.exists(
                    exec_properties['module_file']):
            contents = file_io.read_file_to_string(
                exec_properties['module_file'])
            execution.properties[
                'checksum_md5'].string_value = tf.compat.as_text(
                    tf.compat.as_str_any(
                        hashlib.md5(tf.compat.as_bytes(contents)).hexdigest()))
        if pipeline_info:
            execution.properties[
                'pipeline_name'].string_value = pipeline_info.pipeline_name
            execution.properties[
                'pipeline_root'].string_value = pipeline_info.pipeline_root
            if pipeline_info.run_id:
                execution.properties[
                    'run_id'].string_value = pipeline_info.run_id
        if component_info:
            execution.properties[
                'component_id'].string_value = component_info.component_id
        return execution
Пример #3
0
 def _is_eligible_previous_execution(
         self, currrent_execution: metadata_store_pb2.Execution,
         target_execution: metadata_store_pb2.Execution) -> bool:
     currrent_execution.properties['run_id'].string_value = ''
     target_execution.properties['run_id'].string_value = ''
     currrent_execution.id = target_execution.id
     return currrent_execution == target_execution
Пример #4
0
    def __init__(self,
                 port: int,
                 mlmd_connection: metadata.Metadata,
                 execution: metadata_store_pb2.Execution,
                 address: Optional[str] = None,
                 creds: Optional[grpc.ServerCredentials] = None):
        """Initializes the gRPC server.

    Args:
      port: Which port the service will be using.
      mlmd_connection: ML metadata connection.
      execution: The MLMD Execution to keep track of.
      address: Remote address used to contact the server. Should be formatted as
               an ipv4 or ipv6 address in the format `address:port`. If left as
               None, server will use local address.
      creds: gRPC server credentials. If left as None, server will use an
             insecure port.
    """
        super().__init__()
        self._port = port
        self._address = address
        self._creds = creds
        self._mlmd_connection = mlmd_connection
        self._server = self._create_server()
        if not execution.HasField('id'):
            raise ValueError(
                'execution id must be set to be tracked by ExecutionWatcher.')
        self._execution = execution
Пример #5
0
def put_execution(
    metadata_handler: metadata.Metadata,
    execution: metadata_store_pb2.Execution,
    contexts: Sequence[metadata_store_pb2.Context],
    input_artifacts: Optional[typing_utils.ArtifactMultiMap] = None,
    output_artifacts: Optional[typing_utils.ArtifactMultiMap] = None,
    input_event_type: metadata_store_pb2.Event.Type = metadata_store_pb2.Event.
    INPUT,
    output_event_type: metadata_store_pb2.Event.Type = metadata_store_pb2.
    Event.OUTPUT
) -> metadata_store_pb2.Execution:
    """Writes an execution-centric subgraph to MLMD.

  This function mainly leverages metadata.put_execution() method to write the
  execution centric subgraph to MLMD.

  Args:
    metadata_handler: A handler to access MLMD.
    execution: The execution to be written to MLMD.
    contexts: MLMD contexts to associated with the execution.
    input_artifacts: Input artifacts of the execution. Each artifact will be
      linked with the execution through an event with type input_event_type.
      Each artifact will also be linked with every context in the `contexts`
      argument.
    output_artifacts: Output artifacts of the execution. Each artifact will be
      linked with the execution through an event with type output_event_type.
      Each artifact will also be linked with every context in the `contexts`
      argument.
    input_event_type: The type of the input event, default to be INPUT.
    output_event_type: The type of the output event, default to be OUTPUT.

  Returns:
    An MLMD execution that is written to MLMD, with id pupulated.
  """
    artifact_and_events = []
    if input_artifacts:
        artifact_and_events.extend(
            _create_artifact_and_event_pairs(metadata_handler=metadata_handler,
                                             artifact_dict=input_artifacts,
                                             event_type=input_event_type))
    if output_artifacts:
        artifact_and_events.extend(
            _create_artifact_and_event_pairs(metadata_handler=metadata_handler,
                                             artifact_dict=output_artifacts,
                                             event_type=output_event_type))
    execution_id, artifact_ids, contexts_ids = (
        metadata_handler.store.put_execution(
            execution=execution,
            artifact_and_events=artifact_and_events,
            contexts=contexts,
            reuse_context_if_already_exist=True))
    execution.id = execution_id
    for artifact_and_event, a_id in zip(artifact_and_events, artifact_ids):
        artifact, _ = artifact_and_event
        artifact.id = a_id
    for context, c_id in zip(contexts, contexts_ids):
        context.id = c_id

    return execution
Пример #6
0
    def update_execution(
            self,
            execution: metadata_store_pb2.Execution,
            component_info: data_types.ComponentInfo,
            input_artifacts: Optional[Dict[Text, List[Artifact]]] = None,
            output_artifacts: Optional[Dict[Text, List[Artifact]]] = None,
            exec_properties: Optional[Dict[Text, Any]] = None,
            execution_state: Optional[Text] = None,
            artifact_state: Optional[Text] = None,
            contexts: Optional[List[metadata_store_pb2.Context]] = None
    ) -> None:
        """Updates the given execution in MLMD based on given information.

    All artifacts provided will be registered if not already. Registered id will
    be reflected inline.

    Args:
      execution: the execution to be updated. It is required that the execution
        passed in has an id.
      component_info: the information of the current running component
      input_artifacts: artifacts to be declared as inputs of the execution
      output_artifacts: artifacts to be declared as outputs of the execution
      exec_properties: execution properties of the execution
      execution_state: state the execution to be updated to
      artifact_state: state the artifacts to be updated to
      contexts: a list of contexts the execution and artifacts to be linked to

    Raises:
      RuntimeError: if the execution to be updated has no id.
    """
        if not execution.id:
            raise RuntimeError(
                'No id attached to the execution to be updated.')
        events = self.store.get_events_by_execution_ids([execution.id])
        registered_input_artifact_ids = set(
            e.artifact_id for e in events
            if e.type == metadata_store_pb2.Event.INPUT)
        registered_output_artifact_ids = set(
            e.artifact_id for e in events
            if e.type == metadata_store_pb2.Event.OUTPUT)
        artifacts_and_events = []
        if input_artifacts:
            artifacts_and_events.extend(
                self._artifact_and_event_pairs(
                    artifact_dict=input_artifacts,
                    event_type=metadata_store_pb2.Event.INPUT,
                    new_state=artifact_state,
                    registered_artifacts_ids=registered_input_artifact_ids))
        if output_artifacts:
            artifacts_and_events.extend(
                self._artifact_and_event_pairs(
                    artifact_dict=output_artifacts,
                    event_type=metadata_store_pb2.Event.OUTPUT,
                    new_state=artifact_state,
                    registered_artifacts_ids=registered_output_artifact_ids))
        # If execution properties change, we need to potentially update execution
        # schema.
        if exec_properties:
            execution.type_id = self._prepare_execution_type(
                component_info.component_type, exec_properties)
        if exec_properties or execution_state:
            self._update_execution_proto(
                execution=execution,
                exec_properties=exec_properties,
                state=execution_state,
                pipeline_info=component_info.pipeline_info,
                component_info=component_info)
        _, a_ids, _ = self.store.put_execution(execution, artifacts_and_events,
                                               contexts or [])
        for artifact_and_event, a_id in zip(artifacts_and_events, a_ids):
            artifact_and_event[0].id = a_id