def setUp(self): super().setUp() self._test_dir = tempfile.mkdtemp() self._executor_invocation = pipeline_pb2.ExecutorInput() self._executor_invocation.outputs.output_file = _TEST_OUTPUT_METADATA_JSON self._executor_invocation.inputs.parameters[ 'input_base_uri'].string_value = _TEST_INPUT_DIR self._executor_invocation.inputs.parameters[ 'input_config'].string_value = json_format.MessageToJson( example_gen_pb2.Input(splits=[ example_gen_pb2.Input.Split(name='s1', pattern='span{SPAN}/split1/*'), example_gen_pb2.Input.Split(name='s2', pattern='span{SPAN}/split2/*') ])) self._executor_invocation.outputs.artifacts[ 'examples'].artifacts.append( pipeline_pb2.RuntimeArtifact( type=pipeline_pb2.ArtifactTypeSchema( instance_schema=compiler_utils.get_artifact_schema( standard_artifacts.Examples())))) self._executor_invocation_from_file = fileio.open( os.path.join(os.path.dirname(__file__), 'testdata', 'executor_invocation.json'), 'r').read() self._expected_result_from_file = fileio.open( os.path.join(os.path.dirname(__file__), 'testdata', 'expected_output_metadata.json'), 'r').read() self._olddir = os.getcwd() os.chdir(self._test_dir) fileio.makedirs(os.path.dirname(_TEST_OUTPUT_METADATA_JSON)) fileio.makedirs(os.path.dirname(_TEST_INPUT_DIR))
def setUp(self): self._executor_invocation = pipeline_pb2.ExecutorInput() self._executor_invocation.outputs.output_file = _TEST_OUTPUT_METADATA_JSON self._executor_invocation.inputs.parameters[ 'input_base_uri'].string_value = _TEST_INPUT_DIR self._executor_invocation.inputs.parameters[ 'input_config'].string_value = json_format.MessageToJson( example_gen_pb2.Input(splits=[ example_gen_pb2.Input.Split( name='s1', pattern='span{SPAN}/split1/*'), example_gen_pb2.Input.Split( name='s2', pattern='span{SPAN}/split2/*') ])) self._executor_invocation.outputs.artifacts['examples'].artifacts.append( pipeline_pb2.RuntimeArtifact( type=pipeline_pb2.ArtifactTypeSchema( instance_schema=compiler_utils.get_artifact_schema( standard_artifacts.Examples())))) self._executor_invocation_from_file = fileio.open( os.path.join( os.path.dirname(__file__), 'testdata', 'executor_invocation.json'), 'r').read() logging.debug('Executor invocation under test: %s', self._executor_invocation_from_file) self._expected_result_from_file = fileio.open( os.path.join( os.path.dirname(__file__), 'testdata', 'expected_output_metadata.json'), 'r').read() logging.debug('Expecting output metadata JSON: %s', self._expected_result_from_file) # The initialization of TempWorkingDirTestCase has to be called after all # the testdata files have been read. Otherwise the original testdata files # are not accessible after cwd is changed. super().setUp() fileio.makedirs(os.path.dirname(_TEST_OUTPUT_METADATA_JSON)) fileio.makedirs(os.path.dirname(_TEST_INPUT_DIR))
def setUp(self): super().setUp() self._executor_invocation = pipeline_pb2.ExecutorInput() self._executor_invocation.outputs.output_file = _TEST_OUTPUT_METADATA_JSON self._executor_invocation.inputs.parameters[ 'input_base_uri'].string_value = _TEST_INPUT_DIR self._executor_invocation.inputs.parameters[ 'input_config'].string_value = json_format.MessageToJson( example_gen_pb2.Input(splits=[ example_gen_pb2.Input.Split( name='s1', pattern='span{SPAN}/split1/*'), example_gen_pb2.Input.Split( name='s2', pattern='span{SPAN}/split2/*') ])) self._executor_invocation.outputs.artifacts['examples'].artifacts.append( pipeline_pb2.RuntimeArtifact( type=pipeline_pb2.ArtifactTypeSchema( instance_schema=compiler_utils.get_artifact_schema( standard_artifacts.Examples())))) self._executor_invocation_from_file = fileio.open( os.path.join( os.path.dirname(__file__), 'testdata', 'executor_invocation.json'), 'r').read() logging.debug('Executor invocation under test: %s', self._executor_invocation_from_file) self._expected_result_from_file = fileio.open( os.path.join( os.path.dirname(__file__), 'testdata', 'expected_output_metadata.json'), 'r').read() logging.debug('Expecting output metadata JSON: %s', self._expected_result_from_file) # Change working directory after all the testdata files have been read. self.enter_context(test_case_utils.change_working_dir(self.tmp_dir)) fileio.makedirs(os.path.dirname(_TEST_INPUT_DIR))
def to_runtime_artifact( artifact_instance: artifact.Artifact, name_from_id: Mapping[int, str]) -> pipeline_pb2.RuntimeArtifact: """Converts TFX artifact instance to RuntimeArtifact proto message.""" metadata = struct_pb2.Struct() json_format.ParseDict(_get_json_metadata_mapping(artifact_instance), metadata) result = pipeline_pb2.RuntimeArtifact(uri=artifact_instance.uri, metadata=metadata) # TODO(b/135056715): Change to a unified getter/setter of Artifact type # once it's ready. # Try convert tfx artifact id to string-typed name. This should be the case # when running on an environment where metadata access layer is not running # in user space. id_or_none = getattr(artifact_instance, 'id', None) if (id_or_none is not None and id_or_none in name_from_id): result.name = name_from_id[id_or_none] else: logging.warning( 'Cannot convert ID back to runtime name for artifact %s', artifact_instance) return result