Пример #1
0
    def testDo(self):
        source_data_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')

        statistics_artifact = standard_artifacts.ExampleStatistics()
        statistics_artifact.uri = os.path.join(source_data_dir,
                                               'statistics_gen')
        statistics_artifact.split_names = artifact_utils.encode_split_names(
            ['train'])

        output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        schema_output = standard_artifacts.Schema()
        schema_output.uri = os.path.join(output_data_dir, 'schema_output')

        input_dict = {
            'stats': [statistics_artifact],
        }
        output_dict = {
            'output': [schema_output],
        }

        exec_properties = {'infer_feature_shape': False}

        schema_gen_executor = executor.Executor()
        schema_gen_executor.Do(input_dict, output_dict, exec_properties)
        self.assertNotEqual(0, len(tf.io.gfile.listdir(schema_output.uri)))
Пример #2
0
 def testDoWithStatistics(self):
     schema_gen_executor = executor.Executor()
     schema_gen_executor.Do(self.input_dict, self.output_dict,
                            self.exec_properties)
     self.assertNotEqual(0,
                         len(tf.io.gfile.listdir(self.schema_output.uri)))
     self._assertSchemaEqual(self.expected_schema, self.schema_output)
Пример #3
0
  def test_do(self):
    source_data_dir = os.path.join(
        os.path.dirname(os.path.dirname(__file__)), 'testdata')

    train_stats_artifact = types.Artifact('ExampleStatsPath', split='train')
    train_stats_artifact.uri = os.path.join(source_data_dir,
                                            'statistics_gen/train/')

    output_data_dir = os.path.join(
        os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
        self._testMethodName)

    schema_output = standard_artifacts.Schema()
    schema_output.uri = os.path.join(output_data_dir, 'schema_output')

    input_dict = {
        'stats': [train_stats_artifact],
    }
    output_dict = {
        'output': [schema_output],
    }

    exec_properties = {}

    schema_gen_executor = executor.Executor()
    schema_gen_executor.Do(input_dict, output_dict, exec_properties)
    self.assertNotEqual(0, len(tf.gfile.ListDirectory(schema_output.uri)))
Пример #4
0
  def testDo(self):
    source_data_dir = os.path.join(
        os.path.dirname(os.path.dirname(__file__)), 'testdata')

    statistics_artifact = standard_artifacts.ExampleStatistics()
    statistics_artifact.uri = os.path.join(source_data_dir, 'statistics_gen')
    statistics_artifact.split_names = artifact_utils.encode_split_names(
        ['train', 'eval', 'test'])

    output_data_dir = os.path.join(
        os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
        self._testMethodName)

    schema_output = standard_artifacts.Schema()
    schema_output.uri = os.path.join(output_data_dir, 'schema_output')

    input_dict = {
        standard_component_specs.STATISTICS_KEY: [statistics_artifact],
    }

    exec_properties = {
        # List needs to be serialized before being passed into Do function.
        standard_component_specs.EXCLUDE_SPLITS_KEY:
            json_utils.dumps(['test'])
    }

    output_dict = {
        standard_component_specs.SCHEMA_KEY: [schema_output],
    }

    schema_gen_executor = executor.Executor()
    schema_gen_executor.Do(input_dict, output_dict, exec_properties)
    self.assertNotEqual(0, len(fileio.listdir(schema_output.uri)))
Пример #5
0
 def testDoWithSchema(self):
     self.input_dict['schema'] = [self.schema]
     self.input_dict.pop('stats')
     schema_gen_executor = executor.Executor()
     schema_gen_executor.Do(self.input_dict, self.output_dict,
                            self.exec_properties)
     self.assertNotEqual(0,
                         len(tf.io.gfile.listdir(self.schema_output.uri)))
     self._assertSchemaEqual(self.schema, self.schema_output)
Пример #6
0
    def testDoWithNonExistentSchema(self):
        non_existent_schema = standard_artifacts.Schema()
        non_existent_schema.uri = '/path/to/non_existent/schema'

        self.input_dict['schema'] = [non_existent_schema]
        self.input_dict.pop('stats')

        with self.assertRaises(ValueError):
            schema_gen_executor = executor.Executor()
            schema_gen_executor.Do(self.input_dict, self.output_dict,
                                   self.exec_properties)