def _save_pipeline(self, pipeline_args: Dict[str, Any]) -> None: """Creates/updates pipeline folder in the handler directory. Args: pipeline_args: Pipeline details obtained from DSL. """ pipeline_name = pipeline_args[labels.PIPELINE_NAME] handler_pipeline_path = self._get_pipeline_info_path(pipeline_name) # If updating pipeline, first delete pipeline directory. if fileio.exists(handler_pipeline_path): io_utils.delete_dir(handler_pipeline_path) # Dump pipeline_args to handler pipeline folder as json. fileio.makedirs(handler_pipeline_path) with open(os.path.join( handler_pipeline_path, 'pipeline_args.json'), 'w') as f: json.dump(pipeline_args, f) # Copy dsl to pipeline folder pipeline_dsl_path = self.flags_dict[labels.PIPELINE_DSL_PATH] io_utils.copy_file( pipeline_dsl_path, os.path.join(handler_pipeline_path, os.path.basename(pipeline_dsl_path)))
def _save_pipeline(self, pipeline_args: Dict[Text, Any]) -> None: """Creates/updates pipeline folder in the handler directory. Args: pipeline_args: Pipeline details obtained from DSL. """ # Path to pipeline folder in Airflow. handler_pipeline_path = os.path.join( self._handler_home_dir, pipeline_args[labels.PIPELINE_NAME], '') # If updating pipeline, first delete pipeline directory. if tf.io.gfile.exists(handler_pipeline_path): io_utils.delete_dir(handler_pipeline_path) # Dump pipeline_args to handler pipeline folder as json. tf.io.gfile.makedirs(handler_pipeline_path) with open(os.path.join(handler_pipeline_path, 'pipeline_args.json'), 'w') as f: json.dump(pipeline_args, f) # Copy dsl to pipeline folder pipeline_dsl_path = self.flags_dict[labels.PIPELINE_DSL_PATH] io_utils.copy_file( pipeline_dsl_path, os.path.join(handler_pipeline_path, os.path.basename(pipeline_dsl_path)))
def _save_pipeline(self, pipeline_args) -> None: """Creates/updates pipeline folder in the handler directory.""" # Path to pipeline folder in airflow. handler_pipeline_path = self._get_handler_pipeline_path( pipeline_args[labels.PIPELINE_NAME]) # If updating pipeline, first delete pipeline directory. if tf.io.gfile.exists(handler_pipeline_path): io_utils.delete_dir(handler_pipeline_path) # Dump pipeline_args to handler pipeline folder as json. tf.io.gfile.makedirs(handler_pipeline_path) with open(os.path.join( handler_pipeline_path, 'pipeline_args.json'), 'w') as f: json.dump(pipeline_args, f) # Copy dsl to pipeline folder io_utils.copy_file( self.flags_dict[labels.PIPELINE_DSL_PATH], os.path.join( handler_pipeline_path, os.path.basename(self.flags_dict[labels.PIPELINE_DSL_PATH]) ) )
def delete_pipeline(self) -> None: """Delete pipeline in Kubeflow.""" try: # Check if pipeline exists. pipeline_id = self._get_pipeline_id(self.flags_dict[labels.PIPELINE_NAME]) self._client._pipelines_api.get_pipeline(pipeline_id) # pylint: disable=protected-access # Delete pipeline for kfp server. self._client._pipelines_api.delete_pipeline(id=pipeline_id) # pylint: disable=protected-access # Delete experiment from server. experiment_id = self._client.get_experiment( experiment_name=self.flags_dict[labels.PIPELINE_NAME]).id self._client._experiment_api.delete_experiment(experiment_id) # pylint: disable=protected-access except kfp_server_api.rest.ApiException as err: sys.exit(self._print_error(err)) # Path to pipeline folder. handler_pipeline_path = self._get_handler_pipeline_path( self.flags_dict[labels.PIPELINE_NAME]) # Delete pipeline for home directory. io_utils.delete_dir(handler_pipeline_path) click.echo('Pipeline ' + self.flags_dict[labels.PIPELINE_NAME] + ' deleted successfully.')
def _extract_pipeline_args(self) -> Dict[Text, Any]: """Get pipeline args from the DSL.""" if os.path.isdir(self.flags_dict[labels.PIPELINE_DSL_PATH]): sys.exit('Provide dsl file path.') # Create an environment for subprocess. temp_env = os.environ.copy() # Create temp file to store pipeline_args from pipeline dsl. temp_file = tempfile.mkstemp(prefix='cli_tmp_', suffix='_pipeline_args')[1] # Store temp_file path in temp_env. temp_env[labels.TFX_JSON_EXPORT_PIPELINE_ARGS_PATH] = temp_file # Run dsl with mock environment to store pipeline args in temp_file. subprocess.call(['python', self.flags_dict[labels.PIPELINE_DSL_PATH]], env=temp_env) if os.stat(temp_file).st_size != 0: # Load pipeline_args from temp_file for TFX pipelines with open(temp_file, 'r') as f: pipeline_args = json.load(f) else: # For non-TFX pipelines, extract pipeline name from the dsl filename. pipeline_args = { labels.PIPELINE_NAME: os.path.basename( self.flags_dict[labels.PIPELINE_DSL_PATH]).split('.')[0] } # Delete temp file io_utils.delete_dir(temp_file) return pipeline_args
def testDeleteDir(self): self.createFiles({ 'dir': { 'file.txt': 'testing' } }) io_utils.delete_dir(self.relpath('dir')) self.assertDirectoryEqual(self._base_dir, {})
def delete_pipeline(self) -> None: """Delete pipeline in the environment.""" pipeline_name = self.flags_dict[labels.PIPELINE_NAME] self._check_pipeline_existence(pipeline_name) io_utils.delete_dir(os.path.join(self._handler_home_dir, pipeline_name)) click.echo('Pipeline ' + pipeline_name + ' deleted successfully.')
def delete_pipeline(self) -> None: """Delete pipeline in Airflow.""" pipeline_name = self.flags_dict[labels.PIPELINE_NAME] # Check if pipeline exists. self._check_pipeline_existence(pipeline_name) # Delete pipeline folder. io_utils.delete_dir(self._get_pipeline_info_path(pipeline_name)) click.echo('Pipeline "{}" deleted successfully.'.format(pipeline_name))
def delete_pipeline(self) -> None: """Delete pipeline in Airflow.""" # Path to pipeline folder in airflow. handler_pipeline_path = self._get_handler_pipeline_path( self.flags_dict[labels.PIPELINE_NAME]) # Check if pipeline exists. if not tf.io.gfile.exists(handler_pipeline_path): sys.exit('Pipeline {} does not exist.' .format(self.flags_dict[labels.PIPELINE_NAME])) # Delete pipeline folder. io_utils.delete_dir(handler_pipeline_path)
def _save_pipeline(self, pipeline_args: Dict[Text, Any]) -> None: """Creates/updates pipeline folder in the handler directory.""" pipeline_name = pipeline_args[labels.PIPELINE_NAME] # Path to pipeline folder. handler_pipeline_path = os.path.join(self._handler_home_dir, pipeline_name, '') # When updating pipeline delete pipeline from server and home dir. if tf.io.gfile.exists(handler_pipeline_path): # Delete pipeline for kfp server. pipeline_id = self._get_pipeline_id(pipeline_name) try: self._client._pipelines_api.delete_pipeline(id=pipeline_id) # pylint: disable=protected-access except kfp_server_api.rest.ApiException as err: sys.exit(self._print_error(err)) # Delete pipeline for home directory. io_utils.delete_dir(handler_pipeline_path) pipeline_package_path = self.flags_dict[labels.PIPELINE_PACKAGE_PATH] try: # Now upload pipeline to server. upload_response = self._client.upload_pipeline( pipeline_package_path=pipeline_package_path, pipeline_name=pipeline_name) click.echo(upload_response) # Create experiment with pipeline name as experiment name. experiment_name = pipeline_name experiment_id = self._client.create_experiment(experiment_name).id except kfp_server_api.rest.ApiException as err: sys.exit(self._print_error(err)) # Add pipeline details to pipeline_args. pipeline_args[labels.PIPELINE_NAME] = upload_response.name pipeline_args[labels.PIPELINE_ID] = upload_response.id pipeline_args[labels.PIPELINE_PACKAGE_PATH] = pipeline_package_path pipeline_args[labels.EXPERIMENT_ID] = experiment_id # Path to pipeline_args.json . pipeline_args_path = os.path.join(handler_pipeline_path, 'pipeline_args.json') # Copy pipeline_args to pipeline folder. tf.io.gfile.makedirs(handler_pipeline_path) with open(pipeline_args_path, 'w') as f: json.dump(pipeline_args, f)
def _save_pipeline(self, pipeline_args: Dict[Text, Any]) -> None: """Creates/updates pipeline folder in the handler directory.""" pipeline_name = pipeline_args[labels.PIPELINE_NAME] # Path to pipeline folder. handler_pipeline_path = os.path.join(self._handler_home_dir, pipeline_name, '') # When updating pipeline delete pipeline from server and home dir. if tf.io.gfile.exists(handler_pipeline_path): # Delete pipeline for kfp server. pipeline_id = self._get_pipeline_id(pipeline_name) self._client._pipelines_api.delete_pipeline(id=pipeline_id) # pylint: disable=protected-access # Delete pipeline for home directory. io_utils.delete_dir(handler_pipeline_path) pipeline_package_path = self.flags_dict[labels.PIPELINE_PACKAGE_PATH] # Now upload pipeline to server. upload_response = self._client.upload_pipeline( pipeline_package_path=pipeline_package_path, pipeline_name=pipeline_name) # Display the link to the pipeline detail page in KFP UI. click.echo(upload_response) click.echo('Please access the pipeline detail page at ' '{prefix}/#/pipelines/details/{pipeline_id}'.format( prefix=self._client._get_url_prefix(), # pylint: disable=protected-access pipeline_id=upload_response.id)) # Create experiment with pipeline name as experiment name. experiment_name = pipeline_name experiment_id = self._client.create_experiment(experiment_name).id # Add pipeline details to pipeline_args. pipeline_args[labels.PIPELINE_NAME] = upload_response.name pipeline_args[labels.PIPELINE_ID] = upload_response.id pipeline_args[labels.PIPELINE_PACKAGE_PATH] = pipeline_package_path pipeline_args[labels.EXPERIMENT_ID] = experiment_id # Path to pipeline_args.json . pipeline_args_path = os.path.join(handler_pipeline_path, 'pipeline_args.json') # Copy pipeline_args to pipeline folder. tf.io.gfile.makedirs(handler_pipeline_path) with open(pipeline_args_path, 'w') as f: json.dump(pipeline_args, f)
def delete_pipeline(self) -> None: """Delete pipeline in Airflow.""" pipeline_name = self.flags_dict[labels.PIPELINE_NAME] # Path to pipeline folder. handler_pipeline_path = os.path.join(self._handler_home_dir, pipeline_name, '') # Check if pipeline exists. self._check_pipeline_existence(pipeline_name) # Delete pipeline folder. io_utils.delete_dir(handler_pipeline_path) click.echo('Pipeline "{}" deleted successfully.'.format(pipeline_name))
def _prepare_pipeline_dir(self, pipeline_name: str, required: bool) -> str: """Create a directory for pipeline definition in the handler directory.""" self._check_pipeline_existence(pipeline_name, required) handler_pipeline_path = self._get_pipeline_dir(pipeline_name) # If updating pipeline, first delete the pipeline directory. if fileio.exists(handler_pipeline_path): io_utils.delete_dir(handler_pipeline_path) fileio.makedirs(handler_pipeline_path) # pipeline.json will be stored in KubeflowV2DagRunner.run(). return handler_pipeline_path
def delete_pipeline(self) -> None: """Delete pipeline in the environment.""" pipeline_name = self.flags_dict[labels.PIPELINE_NAME] # Path to pipeline folder. handler_pipeline_path = os.path.join(self._handler_home_dir, pipeline_name, '') # Check if pipeline exists. self._check_pipeline_existence(pipeline_name) # Delete pipeline for home directory. io_utils.delete_dir(handler_pipeline_path) click.echo('Pipeline ' + pipeline_name + ' deleted successfully.')
def delete_pipeline(self) -> None: """Deletes pipeline in Beam.""" # Path to pipeline folder. handler_pipeline_path = self._get_handler_pipeline_path( self.flags_dict[labels.PIPELINE_NAME]) # Check if pipeline exists. if not tf.io.gfile.exists(handler_pipeline_path): sys.exit('Pipeline "{}" does not exist.'.format( self.flags_dict[labels.PIPELINE_NAME])) # Delete pipeline folder. io_utils.delete_dir(handler_pipeline_path) click.echo('Pipeline "{}" deleted successfully.'.format( self.flags_dict[labels.PIPELINE_NAME]))
def _extract_pipeline_args(self) -> Dict[Text, Any]: """Get pipeline args from the DSL. Returns: Python dictionary with pipeline details extracted from DSL. """ # TODO(b/157599419): Consider using a better way to extract pipeline info: # e.g. pipeline name/root. Currently we relies on consulting a env var when # creating Pipeline object, which is brittle. pipeline_dsl_path = self.flags_dict[labels.PIPELINE_DSL_PATH] if os.path.isdir(pipeline_dsl_path): sys.exit('Provide dsl file path.') # Create an environment for subprocess. temp_env = os.environ.copy() # Create temp file to store pipeline_args from pipeline dsl. temp_file = tempfile.mkstemp(prefix='cli_tmp_', suffix='_pipeline_args')[1] # Store temp_file path in temp_env. # LINT.IfChange temp_env[labels.TFX_JSON_EXPORT_PIPELINE_ARGS_PATH] = temp_file # LINT.ThenChange( # ../../../orchestration/beam/beam_dag_runner.py, # ../../../orchestration/local/local_dag_runner.py, # ../../../orchestration/portable/beam_dag_runner.py, # ) # Run dsl with mock environment to store pipeline args in temp_file. self._subprocess_call([sys.executable, pipeline_dsl_path], env=temp_env) if os.stat(temp_file).st_size != 0: # Load pipeline_args from temp_file for TFX pipelines with open(temp_file, 'r') as f: pipeline_args = json.load(f) else: # For non-TFX pipelines, extract pipeline name from the dsl filename. pipeline_args = { labels.PIPELINE_NAME: os.path.basename(pipeline_dsl_path).split('.')[0] } # Delete temp file io_utils.delete_dir(temp_file) return pipeline_args
def _save_pipeline(self, pipeline_args: Dict[Text, Any]) -> None: """Creates/updates pipeline folder in the handler directory.""" # Add pipeline dsl path to pipeline args. pipeline_args[labels.PIPELINE_DSL_PATH] = self.flags_dict[ labels.PIPELINE_DSL_PATH] # Path to pipeline folder in beam. handler_pipeline_path = os.path.join( self._handler_home_dir, pipeline_args[labels.PIPELINE_NAME], '') # If updating pipeline, first delete pipeline directory. if tf.io.gfile.exists(handler_pipeline_path): io_utils.delete_dir(handler_pipeline_path) # Dump pipeline_args to handler pipeline folder as json. tf.io.gfile.makedirs(handler_pipeline_path) with open(os.path.join(handler_pipeline_path, 'pipeline_args.json'), 'w') as f: json.dump(pipeline_args, f)
def _extract_pipeline_args(self) -> Dict[Text, Any]: """Get pipeline args from the DSL. Returns: Python dictionary with pipeline details extracted from DSL. """ pipeline_dsl_path = self.flags_dict[labels.PIPELINE_DSL_PATH] if os.path.isdir(pipeline_dsl_path): sys.exit('Provide dsl file path.') # Create an environment for subprocess. temp_env = os.environ.copy() # Create temp file to store pipeline_args from pipeline dsl. temp_file = tempfile.mkstemp(prefix='cli_tmp_', suffix='_pipeline_args')[1] # Store temp_file path in temp_env. temp_env[labels.TFX_JSON_EXPORT_PIPELINE_ARGS_PATH] = temp_file # Mark the SDK environment if not in a template. if 'pipelines.kubeflow.org/pipeline-sdk-type' not in temp_env: temp_env['pipelines.kubeflow.org/pipeline-sdk-type'] = 'tfx-cli' # Run dsl with mock environment to store pipeline args in temp_file. self._subprocess_call([sys.executable, pipeline_dsl_path], env=temp_env) if os.stat(temp_file).st_size != 0: # Load pipeline_args from temp_file for TFX pipelines with open(temp_file, 'r') as f: pipeline_args = json.load(f) else: # For non-TFX pipelines, extract pipeline name from the dsl filename. pipeline_args = { labels.PIPELINE_NAME: os.path.basename(pipeline_dsl_path).split('.')[0] } # Delete temp file io_utils.delete_dir(temp_file) return pipeline_args
def _save_pipeline(self, pipeline_args: Dict[Text, Any]) -> None: """Creates/updates pipeline folder in the handler directory.""" # Add pipeline dsl path to pipeline args. pipeline_args[labels.PIPELINE_DSL_PATH] = self.flags_dict[ labels.PIPELINE_DSL_PATH] # Path to pipeline folder in beam. handler_pipeline_path = os.path.join( self._handler_home_dir, pipeline_args[labels.PIPELINE_NAME]) # If updating pipeline, first delete the pipeline directory. if fileio.exists(handler_pipeline_path): io_utils.delete_dir(handler_pipeline_path) # TODO(b/157599419): Consider deprecating PipelineArgs. # Dump pipeline_args to handler pipeline folder as json. fileio.makedirs(handler_pipeline_path) with open(os.path.join(handler_pipeline_path, _PIPELINE_ARG_FILE), 'w') as f: json.dump(pipeline_args, f)
def _save_pipeline(self, pipeline_args: Dict[Text, Any]) -> None: """Creates/updates pipeline folder in the handler directory.""" # Path to pipeline folder in Kubeflow. handler_pipeline_path = self._get_handler_pipeline_path( pipeline_args[labels.PIPELINE_NAME]) # Path to pipeline_args.json . pipeline_args_path = os.path.join(handler_pipeline_path, 'pipeline_args.json') # When updating pipeline delete pipeline from server and home dir. if tf.io.gfile.exists(handler_pipeline_path): # Get pipeline_id from pipeline_args.json with open(pipeline_args_path, 'r') as f: pipeline_args = json.load(f) pipeline_id = pipeline_args[labels.PIPELINE_ID] # Delete pipeline for home directory. io_utils.delete_dir(handler_pipeline_path) # Delete pipeline for kfp server. self._client._pipelines_api.delete_pipeline(id=pipeline_id) # pylint: disable=protected-access # Now upload pipeline to server. upload_response = self._client.upload_pipeline( pipeline_package_path=self.flags_dict[ labels.PIPELINE_PACKAGE_PATH], pipeline_name=pipeline_args[labels.PIPELINE_NAME]) click.echo(upload_response) # Add pipeline_id and pipeline_name to pipeline_args. pipeline_args[labels.PIPELINE_NAME] = upload_response.name pipeline_args[labels.PIPELINE_ID] = upload_response.id # Copy pipeline_args to pipeline folder. tf.io.gfile.makedirs(handler_pipeline_path) with open(pipeline_args_path, 'w') as f: json.dump(pipeline_args, f)
def delete_pipeline(self) -> None: """Delete pipeline in Kubeflow.""" pipeline_name = self.flags_dict[labels.PIPELINE_NAME] # Check if pipeline exists on server. pipeline_id, experiment_id = self._get_pipeline_id_and_experiment_id( pipeline_name) self._client._pipelines_api.get_pipeline(pipeline_id) # pylint: disable=protected-access # Delete pipeline for kfp server. self._client._pipelines_api.delete_pipeline(id=pipeline_id) # pylint: disable=protected-access # Delete experiment from server. self._client._experiment_api.delete_experiment(experiment_id) # pylint: disable=protected-access # Path to pipeline folder. handler_pipeline_path = os.path.join(self._handler_home_dir, pipeline_name, '') # Delete pipeline for home directory. io_utils.delete_dir(handler_pipeline_path) click.echo('Pipeline ' + pipeline_name + ' deleted successfully.')
def _delete_pipeline_output(self, pipeline_name: str): """Deletes output produced by the named pipeline.""" io_utils.delete_dir(self._pipeline_root(pipeline_name))
def Do(self, input_dict: Dict[Text, List[types.Artifact]], output_dict: Dict[Text, List[types.Artifact]], exec_properties: Dict[Text, Any]) -> None: """Validate current model against last blessed model. Args: input_dict: Input dict from input key to a list of Artifacts. - examples: examples for eval the model. - model: current model for validation. output_dict: Output dict from output key to a list of Artifacts. - blessing: model blessing result. exec_properties: A dict of execution properties. - blessed_model: last blessed model for validation. - blessed_model_id: last blessed model id. Returns: None """ self._log_startup(input_dict, output_dict, exec_properties) self._temp_path = self._get_tmp_dir() absl.logging.info('Using temp path {} for tft.beam'.format( self._temp_path)) eval_examples_uri = artifact_utils.get_split_uri( input_dict[constants.EXAMPLES_KEY], 'eval') blessing = artifact_utils.get_single_instance( output_dict[constants.BLESSING_KEY]) # Current model to be validated. current_model = artifact_utils.get_single_instance( input_dict[constants.MODEL_KEY]) absl.logging.info('Using {} as current model.'.format( current_model.uri)) blessing.set_string_custom_property( constants.ARTIFACT_PROPERTY_CURRENT_MODEL_URI_KEY, current_model.uri) blessing.set_int_custom_property( constants.ARTIFACT_PROPERTY_CURRENT_MODEL_ID_KEY, current_model.id) # Denote model component_name. component_id = exec_properties['current_component_id'] blessing.set_string_custom_property('component_id', component_id) # Previous blessed model to be validated against. blessed_model_dir = exec_properties['blessed_model'] blessed_model_id = exec_properties['blessed_model_id'] absl.logging.info( 'Using {} as blessed model.'.format(blessed_model_dir)) if blessed_model_dir: blessing.set_string_custom_property( constants.ARTIFACT_PROPERTY_BLESSED_MODEL_URI_KEY, blessed_model_dir) blessing.set_int_custom_property( constants.ARTIFACT_PROPERTY_BLESSED_MODEL_ID_KEY, blessed_model_id) absl.logging.info('Validating model.') # TODO(b/125853306): support customized slice spec. blessed = self._generate_blessing_result( eval_examples_uri=eval_examples_uri, slice_spec=[tfma.slicer.SingleSliceSpec()], current_model_dir=current_model.uri, blessed_model_dir=blessed_model_dir) if blessed: io_utils.write_string_file( os.path.join(blessing.uri, constants.BLESSED_FILE_NAME), '') blessing.set_int_custom_property( constants.ARTIFACT_PROPERTY_BLESSED_KEY, constants.BLESSED_VALUE) else: io_utils.write_string_file( os.path.join(blessing.uri, constants.NOT_BLESSED_FILE_NAME), '') blessing.set_int_custom_property( constants.ARTIFACT_PROPERTY_BLESSED_KEY, constants.NOT_BLESSED_VALUE) absl.logging.info('Blessing result {} written to {}.'.format( blessed, blessing.uri)) io_utils.delete_dir(self._temp_path) absl.logging.info( 'Cleaned up temp path {} on executor success.'.format( self._temp_path))
def Do(self, input_dict: Dict[Text, List[types.Artifact]], output_dict: Dict[Text, List[types.Artifact]], exec_properties: Dict[Text, Any]) -> None: """Validate current model against last blessed model. Args: input_dict: Input dict from input key to a list of Artifacts. - examples: examples for eval the model. - model: current model for validation. output_dict: Output dict from output key to a list of Artifacts. - blessing: model blessing result. exec_properties: A dict of execution properties. - blessed_model: last blessed model for validation. - blessed_model_id: last blessed model id. Returns: None """ self._log_startup(input_dict, output_dict, exec_properties) self._temp_path = self._get_tmp_dir() tf.logging.info('Using temp path {} for tft.beam'.format( self._temp_path)) eval_examples_uri = artifact_utils.get_split_uri( input_dict['examples'], 'eval') blessing = artifact_utils.get_single_instance(output_dict['blessing']) # Current model. current_model = artifact_utils.get_single_instance(input_dict['model']) tf.logging.info('Using {} as current model.'.format(current_model.uri)) blessing.set_string_custom_property('current_model', current_model.uri) blessing.set_int_custom_property('current_model_id', current_model.id) # Denote model component_name. component_id = exec_properties['component_id'] blessing.set_string_custom_property('component_id', component_id) # Blessed model. blessed_model_dir = exec_properties['blessed_model'] blessed_model_id = exec_properties['blessed_model_id'] tf.logging.info('Using {} as blessed model.'.format(blessed_model_dir)) if blessed_model_dir: blessing.set_string_custom_property('blessed_model', blessed_model_dir) blessing.set_int_custom_property('blessed_model_id', blessed_model_id) tf.logging.info('Validating model.') # TODO(b/125853306): support customized slice spec. blessed = self._generate_blessing_result( eval_examples_uri=eval_examples_uri, slice_spec=[tfma.slicer.slicer.SingleSliceSpec()], current_model_dir=current_model.uri, blessed_model_dir=blessed_model_dir) if blessed: io_utils.write_string_file(os.path.join(blessing.uri, 'BLESSED'), '') blessing.set_int_custom_property('blessed', 1) else: io_utils.write_string_file( os.path.join(blessing.uri, 'NOT_BLESSED'), '') blessing.set_int_custom_property('blessed', 0) tf.logging.info('Blessing result {} written to {}.'.format( blessed, blessing.uri)) io_utils.delete_dir(self._temp_path) tf.logging.info('Cleaned up temp path {} on executor success.'.format( self._temp_path))
def testDeleteDir(self): file_path = os.path.join(self._base_dir, 'file', 'path') io_utils.write_string_file(file_path, 'testing') self.assertTrue(tf.gfile.Exists(file_path)) io_utils.delete_dir(os.path.dirname(file_path)) self.assertFalse(tf.gfile.Exists(file_path))
def Do(self, input_dict: Dict[Text, List[types.Artifact]], output_dict: Dict[Text, List[types.Artifact]], exec_properties: Dict[Text, Any]) -> None: """TensorFlow Transform executor entrypoint. This implements BaseExecutor.Do() and is invoked by orchestration systems. This is not inteded for manual usage or further customization. Please use the Transform() function which takes an input format with no artifact dependency. Args: input_dict: Input dict from input key to a list of artifacts, including: - input_data: A list of 'ExamplesPath' type which should contain two splits 'train' and 'eval'. - schema: A list of 'SchemaPath' type which should contain a single schema artifact. output_dict: Output dict from key to a list of artifacts, including: - transform_output: Output of 'tf.Transform', which includes an exported Tensorflow graph suitable for both training and serving; - transformed_examples: Materialized transformed examples, which includes both 'train' and 'eval' splits. exec_properties: A dict of execution properties, including: - module_file: The file path to a python module file, from which the 'preprocessing_fn' function will be loaded. Returns: None """ self._log_startup(input_dict, output_dict, exec_properties) train_data_uri = artifact_utils.get_split_uri(input_dict['input_data'], 'train') eval_data_uri = artifact_utils.get_split_uri(input_dict['input_data'], 'eval') schema_file = io_utils.get_only_uri_in_dir( artifact_utils.get_single_uri(input_dict['schema'])) transform_output = artifact_utils.get_single_uri( output_dict['transform_output']) transformed_train_output = artifact_utils.get_split_uri( output_dict['transformed_examples'], 'train') transformed_eval_output = artifact_utils.get_split_uri( output_dict['transformed_examples'], 'eval') temp_path = os.path.join(transform_output, _TEMP_DIR_IN_TRANSFORM_OUTPUT) tf.logging.debug('Using temp path %s for tft.beam', temp_path) def _GetCachePath(label, params_dict): if label not in params_dict: return None else: return artifact_utils.get_single_uri(params_dict[label]) label_inputs = { labels.COMPUTE_STATISTICS_LABEL: False, labels.SCHEMA_PATH_LABEL: schema_file, labels.EXAMPLES_DATA_FORMAT_LABEL: labels.FORMAT_TF_EXAMPLE, labels.ANALYZE_AND_TRANSFORM_DATA_PATHS_LABEL: io_utils.all_files_pattern(train_data_uri), labels.TRANSFORM_ONLY_DATA_PATHS_LABEL: io_utils.all_files_pattern(eval_data_uri), labels.TFT_STATISTICS_USE_TFDV_LABEL: True, labels.PREPROCESSING_FN: exec_properties['module_file'], } cache_input = _GetCachePath('cache_input_path', input_dict) if cache_input is not None: label_inputs[labels.CACHE_INPUT_PATH_LABEL] = cache_input label_outputs = { labels.TRANSFORM_METADATA_OUTPUT_PATH_LABEL: transform_output, labels.TRANSFORM_MATERIALIZE_OUTPUT_PATHS_LABEL: [ os.path.join(transformed_train_output, _DEFAULT_TRANSFORMED_EXAMPLES_PREFIX), os.path.join(transformed_eval_output, _DEFAULT_TRANSFORMED_EXAMPLES_PREFIX), ], labels.TEMP_OUTPUT_LABEL: str(temp_path), } cache_output = _GetCachePath('cache_output_path', output_dict) if cache_output is not None: label_outputs[labels.CACHE_OUTPUT_PATH_LABEL] = cache_output status_file = 'status_file' # Unused self.Transform(label_inputs, label_outputs, status_file) tf.logging.info('Cleaning up temp path %s on executor success', temp_path) io_utils.delete_dir(temp_path)
def Do(self, input_dict: Dict[Text, List[types.Artifact]], output_dict: Dict[Text, List[types.Artifact]], exec_properties: Dict[Text, Any]) -> None: """Contract for running InfraValidator Executor. Args: input_dict: - `model`: Single `Model` artifact that we're validating. - `examples`: `Examples` artifacts to be used for test requests. output_dict: - `blessing`: Single `InfraBlessing` artifact containing the validated result. It is an empty file with the name either of INFRA_BLESSED or INFRA_NOT_BLESSED. exec_properties: - `serving_spec`: Serialized `ServingSpec` configuration. - `validation_spec`: Serialized `ValidationSpec` configuration. - `request_spec`: Serialized `RequestSpec` configuration. """ self._log_startup(input_dict, output_dict, exec_properties) model = artifact_utils.get_single_instance(input_dict['model']) blessing = artifact_utils.get_single_instance(output_dict['blessing']) serving_spec = infra_validator_pb2.ServingSpec() json_format.Parse(exec_properties['serving_spec'], serving_spec) if not serving_spec.model_name: serving_spec.model_name = _DEFAULT_MODEL_NAME validation_spec = infra_validator_pb2.ValidationSpec() if 'validation_spec' in exec_properties: json_format.Parse(exec_properties['validation_spec'], validation_spec) if not validation_spec.num_tries: validation_spec.num_tries = _DEFAULT_NUM_TRIES if not validation_spec.max_loading_time_seconds: validation_spec.max_loading_time_seconds = _DEFAULT_MAX_LOADING_TIME_SEC if _is_query_mode(input_dict, exec_properties): logging.info('InfraValidator will be run in LOAD_AND_QUERY mode.') request_spec = infra_validator_pb2.RequestSpec() json_format.Parse(exec_properties['request_spec'], request_spec) examples = artifact_utils.get_single_instance( input_dict['examples']) requests = request_builder.build_requests( model_name=os.path.basename( os.path.dirname(path_utils.serving_model_path(model.uri))), examples=examples, request_spec=request_spec) else: logging.info('InfraValidator will be run in LOAD_ONLY mode.') requests = [] model_path = self._PrepareModelPath(model.uri, serving_spec) try: # TODO(jjong): Make logic parallel. all_passed = True for serving_binary in serving_bins.parse_serving_binaries( serving_spec): all_passed &= self._ValidateWithRetry( model_path=model_path, serving_binary=serving_binary, serving_spec=serving_spec, validation_spec=validation_spec, requests=requests) finally: io_utils.delete_dir(self._get_tmp_dir()) if all_passed: _mark_blessed(blessing) else: _mark_not_blessed(blessing)
def Do(self, input_dict: Dict[Text, List[types.Artifact]], output_dict: Dict[Text, List[types.Artifact]], exec_properties: Dict[Text, Any]) -> None: """TensorFlow Transform executor entrypoint. This implements BaseExecutor.Do() and is invoked by orchestration systems. This is not inteded for manual usage or further customization. Please use the Transform() function which takes an input format with no artifact dependency. Args: input_dict: Input dict from input key to a list of artifacts, including: - input_data: A list of type `standard_artifacts.Examples` which should contain two splits 'train' and 'eval'. - schema: A list of type `standard_artifacts.Schema` which should contain a single schema artifact. output_dict: Output dict from key to a list of artifacts, including: - transform_output: Output of 'tf.Transform', which includes an exported Tensorflow graph suitable for both training and serving; - transformed_examples: Materialized transformed examples, which includes both 'train' and 'eval' splits. exec_properties: A dict of execution properties, including either one of: - module_file: The file path to a python module file, from which the 'preprocessing_fn' function will be loaded. - preprocessing_fn: The module path to a python function that implements 'preprocessing_fn'. Returns: None """ self._log_startup(input_dict, output_dict, exec_properties) train_data_uri = artifact_utils.get_split_uri(input_dict[EXAMPLES_KEY], 'train') eval_data_uri = artifact_utils.get_split_uri(input_dict[EXAMPLES_KEY], 'eval') payload_format, data_view_uri = ( tfxio_utils.resolve_payload_format_and_data_view_uri( input_dict[EXAMPLES_KEY])) schema_file = io_utils.get_only_uri_in_dir( artifact_utils.get_single_uri(input_dict[SCHEMA_KEY])) transform_graph_uri = artifact_utils.get_single_uri( input_dict[TRANSFORM_GRAPH_KEY]) transform_output = artifact_utils.get_single_uri( output_dict[TRANSFORM_OUTPUT_KEY]) temp_path = os.path.join(transform_output, _TEMP_DIR_IN_TRANSFORM_OUTPUT) absl.logging.debug('Using temp path %s for tft.beam', temp_path) materialize_output_paths = [] if output_dict.get(TRANSFORMED_EXAMPLES_KEY) is not None: transformed_example_artifact = artifact_utils.get_single_instance( output_dict[TRANSFORMED_EXAMPLES_KEY]) # TODO(b/161490287): move the split_names setting to executor for all # components. transformed_example_artifact.split_names = ( artifact_utils.encode_split_names( artifact.DEFAULT_EXAMPLE_SPLITS)) transformed_train_output = artifact_utils.get_split_uri( output_dict[TRANSFORMED_EXAMPLES_KEY], 'train') transformed_eval_output = artifact_utils.get_split_uri( output_dict[TRANSFORMED_EXAMPLES_KEY], 'eval') materialize_output_paths = [ os.path.join(transformed_train_output, _DEFAULT_TRANSFORMED_EXAMPLES_PREFIX), os.path.join(transformed_eval_output, _DEFAULT_TRANSFORMED_EXAMPLES_PREFIX) ] def _GetCachePath(label, params_dict): if label not in params_dict: return None else: return artifact_utils.get_single_uri(params_dict[label]) label_inputs = { 'transform_graph_uri': transform_graph_uri, labels.COMPUTE_STATISTICS_LABEL: False, labels.SCHEMA_PATH_LABEL: schema_file, labels.EXAMPLES_DATA_FORMAT_LABEL: payload_format, labels.DATA_VIEW_LABEL: data_view_uri, labels.ANALYZE_DATA_PATHS_LABEL: io_utils.all_files_pattern(train_data_uri), labels.ANALYZE_PATHS_FILE_FORMATS_LABEL: labels.FORMAT_TFRECORD, labels.TRANSFORM_DATA_PATHS_LABEL: [ io_utils.all_files_pattern(train_data_uri), io_utils.all_files_pattern(eval_data_uri) ], labels.TRANSFORM_PATHS_FILE_FORMATS_LABEL: [labels.FORMAT_TFRECORD, labels.FORMAT_TFRECORD], labels.CUSTOM_CONFIG: exec_properties.get('custom_config', None), } cache_input = _GetCachePath('cache_input_path', input_dict) if cache_input is not None: label_inputs[labels.CACHE_INPUT_PATH_LABEL] = cache_input label_outputs = { labels.TRANSFORM_METADATA_OUTPUT_PATH_LABEL: transform_output, labels.TRANSFORM_MATERIALIZE_OUTPUT_PATHS_LABEL: materialize_output_paths, labels.TEMP_OUTPUT_LABEL: str(temp_path), } cache_output = _GetCachePath('cache_output_path', output_dict) if cache_output is not None: label_outputs[labels.CACHE_OUTPUT_PATH_LABEL] = cache_output status_file = 'status_file' # Unused self.Transform(label_inputs, label_outputs, status_file) absl.logging.debug('Cleaning up temp path %s on executor success', temp_path) io_utils.delete_dir(temp_path)
def Do(self, input_dict, output_dict, exec_properties): """TensorFlow Transform executor entrypoint. This implements BaseExecutor.Do() and is invoked by orchestration systems. This is not inteded for manual usage or further customization. Please use the Transform() function which takes an input format with no artifact dependency. Args: input_dict: Input dict from input key to a list of artifacts, including: - input_data: A list of 'ExamplesPath' type which should contain two splits 'train' and 'eval'. - schema: A list of 'SchemaPath' type which should contain a single schema artifact. output_dict: Output dict from key to a list of artifacts, including: - transform_output: Output of 'tf.Transform', which includes an exported Tensorflow graph suitable for both training and serving; - transformed_examples: Materialized transformed examples, which includes both 'train' and 'eval' splits. exec_properties: A dict of execution properties, including: - module_file: The file path to a python module file, from which the 'preprocessing_fn' function will be loaded. Returns: None """ self._log_startup(input_dict, output_dict, exec_properties) train_data_uri = types.get_split_uri(input_dict['input_data'], 'train') eval_data_uri = types.get_split_uri(input_dict['input_data'], 'eval') schema_file = io_utils.get_only_uri_in_dir( types.get_single_uri(input_dict['schema'])) transform_output = types.get_single_uri(output_dict['transform_output']) if tf.gfile.Exists(transform_output): io_utils.delete_dir(transform_output) transformed_train_output = types.get_split_uri( output_dict['transformed_examples'], 'train') if tf.gfile.Exists(transformed_train_output): io_utils.delete_dir(transformed_train_output) transformed_eval_output = types.get_split_uri( output_dict['transformed_examples'], 'eval') if tf.gfile.Exists(transformed_eval_output): io_utils.delete_dir(transformed_eval_output) temp_path = os.path.join(transform_output, _TEMP_DIR_IN_TRANSFORM_OUTPUT) tf.logging.debug('Using temp path %s for tft.beam', temp_path) label_inputs = { labels.COMPUTE_STATISTICS_LABEL: False, labels.SCHEMA_PATH_LABEL: schema_file, labels.EXAMPLES_DATA_FORMAT_LABEL: labels.FORMAT_TF_EXAMPLE, labels.ANALYZE_AND_TRANSFORM_DATA_PATHS_LABEL: io_utils.all_files_pattern(train_data_uri), labels.TRANSFORM_ONLY_DATA_PATHS_LABEL: io_utils.all_files_pattern(eval_data_uri), labels.TFT_STATISTICS_USE_TFDV_LABEL: True, labels.PREPROCESSING_FN: exec_properties['module_file'], } label_outputs = { labels.TRANSFORM_METADATA_OUTPUT_PATH_LABEL: transform_output, labels.TRANSFORM_MATERIALIZE_OUTPUT_PATHS_LABEL: [ os.path.join(transformed_train_output, _DEFAULT_TRANSFORMED_EXAMPLES_PREFIX), os.path.join(transformed_eval_output, _DEFAULT_TRANSFORMED_EXAMPLES_PREFIX), ], labels.TEMP_OUTPUT_LABEL: str(temp_path), } status_file = 'status_file' # Unused self.Transform(label_inputs, label_outputs, status_file) tf.logging.info('Cleaning up temp path %s on executor success', temp_path) io_utils.delete_dir(temp_path)
def Do(self, input_dict: Dict[Text, List[Artifact]], output_dict: Dict[Text, List[Artifact]], exec_properties: Dict[Text, Any]) -> None: split_uris: List[Text] = [] for artifact in input_dict[executor.EXAMPLES_KEY]: for split in artifact_utils.decode_split_names( artifact.split_names): split_uris.append(split) self._log_startup(input_dict, output_dict, exec_properties) data_uris = [] for split in split_uris: data_uris.append( artifact_utils.get_split_uri(input_dict[executor.EXAMPLES_KEY], split)) schema_file = io_utils.get_only_uri_in_dir( artifact_utils.get_single_uri(input_dict[executor.SCHEMA_KEY])) transform_output = artifact_utils.get_single_uri( output_dict[executor.TRANSFORM_GRAPH_KEY]) transformed_data_uris = [] for split in split_uris: transformed_data_uris.append( artifact_utils.get_split_uri( output_dict[executor.TRANSFORMED_EXAMPLES_KEY], split)) temp_path = os.path.join(transform_output, executor._TEMP_DIR_IN_TRANSFORM_OUTPUT) logging.debug('Using temp path %s for tft.beam', temp_path) def _GetCachePath(label, params_dict): if label not in params_dict: return None else: return artifact_utils.get_single_uri(params_dict[label]) label_inputs = { labels.COMPUTE_STATISTICS_LABEL: False, labels.SCHEMA_PATH_LABEL: schema_file, labels.EXAMPLES_DATA_FORMAT_LABEL: labels.FORMAT_TF_EXAMPLE, labels.ANALYZE_DATA_PATHS_LABEL: io_utils.all_files_pattern(data_uris[0]), labels.ANALYZE_PATHS_FILE_FORMATS_LABEL: labels.FORMAT_TFRECORD, labels.TRANSFORM_DATA_PATHS_LABEL: [io_utils.all_files_pattern(uri) for uri in data_uris], labels.TRANSFORM_PATHS_FILE_FORMATS_LABEL: [labels.FORMAT_TFRECORD for uri in data_uris], labels.TFT_STATISTICS_USE_TFDV_LABEL: True, labels.MODULE_FILE: exec_properties.get('module_file', None), labels.PREPROCESSING_FN: exec_properties.get('preprocessing_fn', None), # TODO(b/149754658): switch to True once the TFXIO integration is # complete. labels.USE_TFXIO_LABEL: False, } cache_input = _GetCachePath('cache_input_path', input_dict) if cache_input is not None: label_inputs[labels.CACHE_INPUT_PATH_LABEL] = cache_input label_outputs = { labels.TRANSFORM_METADATA_OUTPUT_PATH_LABEL: transform_output, labels.TRANSFORM_MATERIALIZE_OUTPUT_PATHS_LABEL: [ os.path.join(uri, executor._DEFAULT_TRANSFORMED_EXAMPLES_PREFIX) for uri in transformed_data_uris ], labels.TEMP_OUTPUT_LABEL: str(temp_path), } cache_output = _GetCachePath('cache_output_path', output_dict) if cache_output is not None: label_outputs[labels.CACHE_OUTPUT_PATH_LABEL] = cache_output status_file = 'status_file' # Unused self.Transform(label_inputs, label_outputs, status_file) logging.debug('Cleaning up temp path %s on executor success', temp_path) io_utils.delete_dir(temp_path)
def _delete_pipeline_data(self): path = f'gs://{self._BUCKET_NAME}/tfx_pipeline_output/{self._pipeline_name}' io_utils.delete_dir(path) path = (f'gs://{self._BUCKET_NAME}/{self._DATA_DIRECTORY_NAME}/' f'{self._pipeline_name}') io_utils.delete_dir(path)