def _is_eligible_previous_execution( self, current_execution: metadata_store_pb2.Execution, target_execution: metadata_store_pb2.Execution) -> bool: """Compare if the previous execution is same as current execution. This method will ignore ID and time related fields. Args: current_execution: the current execution. target_execution: the previous execution to be compared with. Returns: whether the previous and current executions are the same. """ current_execution.properties['run_id'].string_value = '' target_execution.properties['run_id'].string_value = '' current_execution.id = target_execution.id # Skip comparing time sensitive fields. # The execution might not have the create_time_since_epoch or # create_time_since_epoch field if the execution is created by an old # version before this field is introduced. if hasattr(current_execution, 'create_time_since_epoch'): current_execution.ClearField('create_time_since_epoch') if hasattr(target_execution, 'create_time_since_epoch'): target_execution.ClearField('create_time_since_epoch') if hasattr(current_execution, 'last_update_time_since_epoch'): current_execution.ClearField('last_update_time_since_epoch') if hasattr(target_execution, 'last_update_time_since_epoch'): target_execution.ClearField('last_update_time_since_epoch') return current_execution == target_execution
def _update_execution_proto( self, execution: metadata_store_pb2.Execution, pipeline_info: Optional[data_types.PipelineInfo] = None, component_info: Optional[data_types.ComponentInfo] = None, state: Optional[Text] = None, exec_properties: Optional[Dict[Text, Any]] = None, ) -> metadata_store_pb2.Execution: """Updates the execution proto with given type and state.""" if state is not None: execution.properties[ _EXECUTION_TYPE_KEY_STATE].string_value = tf.compat.as_text( state) # Forward-compatible change to leverage built-in schema to track states. if state == EXECUTION_STATE_CACHED: execution.last_known_state = metadata_store_pb2.Execution.CACHED elif state == EXECUTION_STATE_COMPLETE: execution.last_known_state = metadata_store_pb2.Execution.COMPLETE elif state == EXECUTION_STATE_NEW: execution.last_known_state = metadata_store_pb2.Execution.RUNNING exec_properties = exec_properties or {} # TODO(ruoyu): Enforce a formal rule for execution schema change. for k, v in exec_properties.items(): # We always convert execution properties to unicode. execution.properties[k].string_value = tf.compat.as_text( tf.compat.as_str_any(v)) # We also need to checksum UDF file to identify different binary being # used. Do we have a better way to checksum a file than hashlib.md5? # TODO(ruoyu): Find a better place / solution to the checksum logic. # TODO(ruoyu): SHA instead of MD5. if 'module_file' in exec_properties and exec_properties[ 'module_file'] and fileio.exists( exec_properties['module_file']): contents = file_io.read_file_to_string( exec_properties['module_file']) execution.properties[ 'checksum_md5'].string_value = tf.compat.as_text( tf.compat.as_str_any( hashlib.md5(tf.compat.as_bytes(contents)).hexdigest())) if pipeline_info: execution.properties[ 'pipeline_name'].string_value = pipeline_info.pipeline_name execution.properties[ 'pipeline_root'].string_value = pipeline_info.pipeline_root if pipeline_info.run_id: execution.properties[ 'run_id'].string_value = pipeline_info.run_id if component_info: execution.properties[ 'component_id'].string_value = component_info.component_id return execution
def _is_eligible_previous_execution( self, currrent_execution: metadata_store_pb2.Execution, target_execution: metadata_store_pb2.Execution) -> bool: currrent_execution.properties['run_id'].string_value = '' target_execution.properties['run_id'].string_value = '' currrent_execution.id = target_execution.id return currrent_execution == target_execution
def __init__(self, port: int, mlmd_connection: metadata.Metadata, execution: metadata_store_pb2.Execution, address: Optional[str] = None, creds: Optional[grpc.ServerCredentials] = None): """Initializes the gRPC server. Args: port: Which port the service will be using. mlmd_connection: ML metadata connection. execution: The MLMD Execution to keep track of. address: Remote address used to contact the server. Should be formatted as an ipv4 or ipv6 address in the format `address:port`. If left as None, server will use local address. creds: gRPC server credentials. If left as None, server will use an insecure port. """ super().__init__() self._port = port self._address = address self._creds = creds self._mlmd_connection = mlmd_connection self._server = self._create_server() if not execution.HasField('id'): raise ValueError( 'execution id must be set to be tracked by ExecutionWatcher.') self._execution = execution
def put_execution( metadata_handler: metadata.Metadata, execution: metadata_store_pb2.Execution, contexts: Sequence[metadata_store_pb2.Context], input_artifacts: Optional[typing_utils.ArtifactMultiMap] = None, output_artifacts: Optional[typing_utils.ArtifactMultiMap] = None, input_event_type: metadata_store_pb2.Event.Type = metadata_store_pb2.Event. INPUT, output_event_type: metadata_store_pb2.Event.Type = metadata_store_pb2. Event.OUTPUT ) -> metadata_store_pb2.Execution: """Writes an execution-centric subgraph to MLMD. This function mainly leverages metadata.put_execution() method to write the execution centric subgraph to MLMD. Args: metadata_handler: A handler to access MLMD. execution: The execution to be written to MLMD. contexts: MLMD contexts to associated with the execution. input_artifacts: Input artifacts of the execution. Each artifact will be linked with the execution through an event with type input_event_type. Each artifact will also be linked with every context in the `contexts` argument. output_artifacts: Output artifacts of the execution. Each artifact will be linked with the execution through an event with type output_event_type. Each artifact will also be linked with every context in the `contexts` argument. input_event_type: The type of the input event, default to be INPUT. output_event_type: The type of the output event, default to be OUTPUT. Returns: An MLMD execution that is written to MLMD, with id pupulated. """ artifact_and_events = [] if input_artifacts: artifact_and_events.extend( _create_artifact_and_event_pairs(metadata_handler=metadata_handler, artifact_dict=input_artifacts, event_type=input_event_type)) if output_artifacts: artifact_and_events.extend( _create_artifact_and_event_pairs(metadata_handler=metadata_handler, artifact_dict=output_artifacts, event_type=output_event_type)) execution_id, artifact_ids, contexts_ids = ( metadata_handler.store.put_execution( execution=execution, artifact_and_events=artifact_and_events, contexts=contexts, reuse_context_if_already_exist=True)) execution.id = execution_id for artifact_and_event, a_id in zip(artifact_and_events, artifact_ids): artifact, _ = artifact_and_event artifact.id = a_id for context, c_id in zip(contexts, contexts_ids): context.id = c_id return execution
def update_execution( self, execution: metadata_store_pb2.Execution, component_info: data_types.ComponentInfo, input_artifacts: Optional[Dict[Text, List[Artifact]]] = None, output_artifacts: Optional[Dict[Text, List[Artifact]]] = None, exec_properties: Optional[Dict[Text, Any]] = None, execution_state: Optional[Text] = None, artifact_state: Optional[Text] = None, contexts: Optional[List[metadata_store_pb2.Context]] = None ) -> None: """Updates the given execution in MLMD based on given information. All artifacts provided will be registered if not already. Registered id will be reflected inline. Args: execution: the execution to be updated. It is required that the execution passed in has an id. component_info: the information of the current running component input_artifacts: artifacts to be declared as inputs of the execution output_artifacts: artifacts to be declared as outputs of the execution exec_properties: execution properties of the execution execution_state: state the execution to be updated to artifact_state: state the artifacts to be updated to contexts: a list of contexts the execution and artifacts to be linked to Raises: RuntimeError: if the execution to be updated has no id. """ if not execution.id: raise RuntimeError( 'No id attached to the execution to be updated.') events = self.store.get_events_by_execution_ids([execution.id]) registered_input_artifact_ids = set( e.artifact_id for e in events if e.type == metadata_store_pb2.Event.INPUT) registered_output_artifact_ids = set( e.artifact_id for e in events if e.type == metadata_store_pb2.Event.OUTPUT) artifacts_and_events = [] if input_artifacts: artifacts_and_events.extend( self._artifact_and_event_pairs( artifact_dict=input_artifacts, event_type=metadata_store_pb2.Event.INPUT, new_state=artifact_state, registered_artifacts_ids=registered_input_artifact_ids)) if output_artifacts: artifacts_and_events.extend( self._artifact_and_event_pairs( artifact_dict=output_artifacts, event_type=metadata_store_pb2.Event.OUTPUT, new_state=artifact_state, registered_artifacts_ids=registered_output_artifact_ids)) # If execution properties change, we need to potentially update execution # schema. if exec_properties: execution.type_id = self._prepare_execution_type( component_info.component_type, exec_properties) if exec_properties or execution_state: self._update_execution_proto( execution=execution, exec_properties=exec_properties, state=execution_state, pipeline_info=component_info.pipeline_info, component_info=component_info) _, a_ids, _ = self.store.put_execution(execution, artifacts_and_events, contexts or []) for artifact_and_event, a_id in zip(artifacts_and_events, a_ids): artifact_and_event[0].id = a_id