Пример #1
0
    def _testDo(self, exec_properties):
        output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        # Create output dict.
        self.examples = standard_artifacts.Examples()
        self.examples.uri = output_data_dir
        self.examples.split_names = artifact_utils.encode_split_names(
            ['train', 'eval'])
        output_dict = {utils.EXAMPLES_KEY: [self.examples]}

        # Run executor.
        import_example_gen = executor.Executor()
        import_example_gen.Do({}, output_dict, exec_properties)

        # Check import_example_gen outputs.
        train_output_file = os.path.join(self.examples.uri, 'train',
                                         'data_tfrecord-00000-of-00001.gz')
        eval_output_file = os.path.join(self.examples.uri, 'eval',
                                        'data_tfrecord-00000-of-00001.gz')

        # Check import_example_gen outputs.
        train_output_file = os.path.join(self.examples.uri, 'train',
                                         'data_tfrecord-00000-of-00001.gz')
        eval_output_file = os.path.join(self.examples.uri, 'eval',
                                        'data_tfrecord-00000-of-00001.gz')
        self.assertTrue(tf.io.gfile.exists(train_output_file))
        self.assertTrue(tf.io.gfile.exists(eval_output_file))
        self.assertGreater(
            tf.io.gfile.GFile(train_output_file).size(),
            tf.io.gfile.GFile(eval_output_file).size())
Пример #2
0
    def testDo(self):
        output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        # Create output dict.
        train_examples = types.TfxArtifact(type_name='ExamplesPath',
                                           split='train')
        train_examples.uri = os.path.join(output_data_dir, 'train')
        eval_examples = types.TfxArtifact(type_name='ExamplesPath',
                                          split='eval')
        eval_examples.uri = os.path.join(output_data_dir, 'eval')
        output_dict = {'examples': [train_examples, eval_examples]}

        # Create exec proterties.
        exec_properties = {
            'input':
            json_format.MessageToJson(
                example_gen_pb2.Input(splits=[
                    example_gen_pb2.Input.Split(name='tfrecord',
                                                pattern='tfrecord/*'),
                ])),
            'output':
            json_format.MessageToJson(
                example_gen_pb2.Output(
                    split_config=example_gen_pb2.SplitConfig(splits=[
                        example_gen_pb2.SplitConfig.Split(name='train',
                                                          hash_buckets=2),
                        example_gen_pb2.SplitConfig.Split(name='eval',
                                                          hash_buckets=1)
                    ])))
        }

        # Run executor.
        import_example_gen = executor.Executor()
        import_example_gen.Do(self._input_dict, output_dict, exec_properties)

        # Check import_example_gen outputs.
        train_output_file = os.path.join(train_examples.uri,
                                         'data_tfrecord-00000-of-00001.gz')
        eval_output_file = os.path.join(eval_examples.uri,
                                        'data_tfrecord-00000-of-00001.gz')
        self.assertTrue(tf.gfile.Exists(train_output_file))
        self.assertTrue(tf.gfile.Exists(eval_output_file))
        self.assertGreater(
            tf.gfile.GFile(train_output_file).size(),
            tf.gfile.GFile(eval_output_file).size())
Пример #3
0
    def testDo(self):
        output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        # Create output dict.
        examples = standard_artifacts.Examples()
        examples.uri = output_data_dir
        examples.split_names = artifact_utils.encode_split_names(
            ['train', 'eval'])
        output_dict = {'examples': [examples]}

        # Create exec proterties.
        exec_properties = {
            'input_config':
            json_format.MessageToJson(example_gen_pb2.Input(splits=[
                example_gen_pb2.Input.Split(name='tfrecord',
                                            pattern='tfrecord/*'),
            ]),
                                      preserving_proto_field_name=True),
            'output_config':
            json_format.MessageToJson(example_gen_pb2.Output(
                split_config=example_gen_pb2.SplitConfig(splits=[
                    example_gen_pb2.SplitConfig.Split(name='train',
                                                      hash_buckets=2),
                    example_gen_pb2.SplitConfig.Split(name='eval',
                                                      hash_buckets=1)
                ])),
                                      preserving_proto_field_name=True)
        }

        # Run executor.
        import_example_gen = executor.Executor()
        import_example_gen.Do(self._input_dict, output_dict, exec_properties)

        # Check import_example_gen outputs.
        train_output_file = os.path.join(examples.uri, 'train',
                                         'data_tfrecord-00000-of-00001.gz')
        eval_output_file = os.path.join(examples.uri, 'eval',
                                        'data_tfrecord-00000-of-00001.gz')
        self.assertTrue(tf.io.gfile.exists(train_output_file))
        self.assertTrue(tf.io.gfile.exists(eval_output_file))
        self.assertGreater(
            tf.io.gfile.GFile(train_output_file).size(),
            tf.io.gfile.GFile(eval_output_file).size())
Пример #4
0
  def _testDo(self, payload_format):
    exec_properties = {
        utils.INPUT_BASE_KEY: self._input_data_dir,
        utils.INPUT_CONFIG_KEY: self._input_config,
        utils.OUTPUT_CONFIG_KEY: self._output_config,
        utils.OUTPUT_DATA_FORMAT_KEY: payload_format,
    }

    output_data_dir = os.path.join(
        os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
        self._testMethodName)

    # Create output dict.
    self.examples = standard_artifacts.Examples()
    self.examples.uri = output_data_dir
    output_dict = {utils.EXAMPLES_KEY: [self.examples]}

    # Run executor.
    import_example_gen = executor.Executor()
    import_example_gen.Do({}, output_dict, exec_properties)

    self.assertEqual(
        artifact_utils.encode_split_names(['train', 'eval']),
        self.examples.split_names)

    # Check import_example_gen outputs.
    train_output_file = os.path.join(self.examples.uri, 'train',
                                     'data_tfrecord-00000-of-00001.gz')
    eval_output_file = os.path.join(self.examples.uri, 'eval',
                                    'data_tfrecord-00000-of-00001.gz')

    # Check import_example_gen outputs.
    train_output_file = os.path.join(self.examples.uri, 'train',
                                     'data_tfrecord-00000-of-00001.gz')
    eval_output_file = os.path.join(self.examples.uri, 'eval',
                                    'data_tfrecord-00000-of-00001.gz')
    self.assertTrue(fileio.exists(train_output_file))
    self.assertTrue(fileio.exists(eval_output_file))
    self.assertGreater(
        fileio.open(train_output_file).size(),
        fileio.open(eval_output_file).size())