Exemplo n.º 1
0
    def setUp(self):
        super().setUp()
        self._test_dir = tempfile.mkdtemp()

        self._executor_invocation = pipeline_pb2.ExecutorInput()
        self._executor_invocation.outputs.output_file = _TEST_OUTPUT_METADATA_JSON
        self._executor_invocation.inputs.parameters[
            'input_base_uri'].string_value = _TEST_INPUT_DIR
        self._executor_invocation.inputs.parameters[
            'input_config'].string_value = json_format.MessageToJson(
                example_gen_pb2.Input(splits=[
                    example_gen_pb2.Input.Split(name='s1',
                                                pattern='span{SPAN}/split1/*'),
                    example_gen_pb2.Input.Split(name='s2',
                                                pattern='span{SPAN}/split2/*')
                ]))
        self._executor_invocation.outputs.artifacts[
            'examples'].artifacts.append(
                pipeline_pb2.RuntimeArtifact(
                    type=pipeline_pb2.ArtifactTypeSchema(
                        instance_schema=compiler_utils.get_artifact_schema(
                            standard_artifacts.Examples()))))

        self._executor_invocation_from_file = fileio.open(
            os.path.join(os.path.dirname(__file__), 'testdata',
                         'executor_invocation.json'), 'r').read()
        self._expected_result_from_file = fileio.open(
            os.path.join(os.path.dirname(__file__), 'testdata',
                         'expected_output_metadata.json'), 'r').read()

        self._olddir = os.getcwd()
        os.chdir(self._test_dir)
        fileio.makedirs(os.path.dirname(_TEST_OUTPUT_METADATA_JSON))
        fileio.makedirs(os.path.dirname(_TEST_INPUT_DIR))
Exemplo n.º 2
0
  def setUp(self):
    self._executor_invocation = pipeline_pb2.ExecutorInput()
    self._executor_invocation.outputs.output_file = _TEST_OUTPUT_METADATA_JSON
    self._executor_invocation.inputs.parameters[
        'input_base_uri'].string_value = _TEST_INPUT_DIR
    self._executor_invocation.inputs.parameters[
        'input_config'].string_value = json_format.MessageToJson(
            example_gen_pb2.Input(splits=[
                example_gen_pb2.Input.Split(
                    name='s1', pattern='span{SPAN}/split1/*'),
                example_gen_pb2.Input.Split(
                    name='s2', pattern='span{SPAN}/split2/*')
            ]))
    self._executor_invocation.outputs.artifacts['examples'].artifacts.append(
        pipeline_pb2.RuntimeArtifact(
            type=pipeline_pb2.ArtifactTypeSchema(
                instance_schema=compiler_utils.get_artifact_schema(
                    standard_artifacts.Examples()))))

    self._executor_invocation_from_file = fileio.open(
        os.path.join(
            os.path.dirname(__file__), 'testdata', 'executor_invocation.json'),
        'r').read()

    logging.debug('Executor invocation under test: %s',
                  self._executor_invocation_from_file)
    self._expected_result_from_file = fileio.open(
        os.path.join(
            os.path.dirname(__file__), 'testdata',
            'expected_output_metadata.json'), 'r').read()
    logging.debug('Expecting output metadata JSON: %s',
                  self._expected_result_from_file)

    # The initialization of TempWorkingDirTestCase has to be called after all
    # the testdata files have been read. Otherwise the original testdata files
    # are not accessible after cwd is changed.
    super().setUp()

    fileio.makedirs(os.path.dirname(_TEST_OUTPUT_METADATA_JSON))
    fileio.makedirs(os.path.dirname(_TEST_INPUT_DIR))
Exemplo n.º 3
0
  def setUp(self):
    super().setUp()

    self._executor_invocation = pipeline_pb2.ExecutorInput()
    self._executor_invocation.outputs.output_file = _TEST_OUTPUT_METADATA_JSON
    self._executor_invocation.inputs.parameters[
        'input_base_uri'].string_value = _TEST_INPUT_DIR
    self._executor_invocation.inputs.parameters[
        'input_config'].string_value = json_format.MessageToJson(
            example_gen_pb2.Input(splits=[
                example_gen_pb2.Input.Split(
                    name='s1', pattern='span{SPAN}/split1/*'),
                example_gen_pb2.Input.Split(
                    name='s2', pattern='span{SPAN}/split2/*')
            ]))
    self._executor_invocation.outputs.artifacts['examples'].artifacts.append(
        pipeline_pb2.RuntimeArtifact(
            type=pipeline_pb2.ArtifactTypeSchema(
                instance_schema=compiler_utils.get_artifact_schema(
                    standard_artifacts.Examples()))))

    self._executor_invocation_from_file = fileio.open(
        os.path.join(
            os.path.dirname(__file__), 'testdata', 'executor_invocation.json'),
        'r').read()

    logging.debug('Executor invocation under test: %s',
                  self._executor_invocation_from_file)
    self._expected_result_from_file = fileio.open(
        os.path.join(
            os.path.dirname(__file__), 'testdata',
            'expected_output_metadata.json'), 'r').read()
    logging.debug('Expecting output metadata JSON: %s',
                  self._expected_result_from_file)

    # Change working directory after all the testdata files have been read.
    self.enter_context(test_case_utils.change_working_dir(self.tmp_dir))

    fileio.makedirs(os.path.dirname(_TEST_INPUT_DIR))
Exemplo n.º 4
0
def to_runtime_artifact(
        artifact_instance: artifact.Artifact,
        name_from_id: Mapping[int, str]) -> pipeline_pb2.RuntimeArtifact:
    """Converts TFX artifact instance to RuntimeArtifact proto message."""
    metadata = struct_pb2.Struct()
    json_format.ParseDict(_get_json_metadata_mapping(artifact_instance),
                          metadata)
    result = pipeline_pb2.RuntimeArtifact(uri=artifact_instance.uri,
                                          metadata=metadata)
    # TODO(b/135056715): Change to a unified getter/setter of Artifact type
    # once it's ready.
    # Try convert tfx artifact id to string-typed name. This should be the case
    # when running on an environment where metadata access layer is not running
    # in user space.
    id_or_none = getattr(artifact_instance, 'id', None)
    if (id_or_none is not None and id_or_none in name_from_id):
        result.name = name_from_id[id_or_none]
    else:
        logging.warning(
            'Cannot convert ID back to runtime name for artifact %s',
            artifact_instance)
    return result