Exemplo n.º 1
0
 def test_construct(self):
     big_query_example_gen = component.BigQueryExampleGen(query='query')
     self.assertEqual('ExamplesPath',
                      big_query_example_gen.outputs.examples.type_name)
     artifact_collection = big_query_example_gen.outputs.examples.get()
     self.assertEqual('train', artifact_collection[0].split)
     self.assertEqual('eval', artifact_collection[1].split)
Exemplo n.º 2
0
 def testConstruct(self):
     big_query_example_gen = component.BigQueryExampleGen(query='query')
     self.assertEqual(standard_artifacts.Examples.TYPE_NAME,
                      big_query_example_gen.outputs['examples'].type_name)
     artifact_collection = big_query_example_gen.outputs['examples'].get()
     self.assertEqual('train', artifact_collection[0].split)
     self.assertEqual('eval', artifact_collection[1].split)
Exemplo n.º 3
0
 def testConstruct(self):
     big_query_example_gen = component.BigQueryExampleGen(query='query')
     self.assertEqual(standard_artifacts.Examples.TYPE_NAME,
                      big_query_example_gen.outputs['examples'].type_name)
     artifact_collection = big_query_example_gen.outputs['examples'].get()
     self.assertEqual(1, len(artifact_collection))
     self.assertEqual(['train', 'eval'],
                      artifact_utils.decode_split_names(
                          artifact_collection[0].split_names))
Exemplo n.º 4
0
def _two_step_pipeline() -> tfx_pipeline.Pipeline:
    example_gen = big_query_example_gen_component.BigQueryExampleGen(
        query='SELECT * FROM TABLE')
    statistics_gen = statistics_gen_component.StatisticsGen(
        input_data=example_gen.outputs.examples)
    return tfx_pipeline.Pipeline(
        pipeline_name='two_step_pipeline',
        pipeline_root='pipeline_root',
        components=[example_gen, statistics_gen],
    )
Exemplo n.º 5
0
def _two_step_pipeline() -> tfx_pipeline.Pipeline:
  example_gen = big_query_example_gen_component.BigQueryExampleGen(
      query='SELECT * FROM TABLE')
  statistics_gen = statistics_gen_component.StatisticsGen(
      examples=example_gen.outputs['examples'])
  return tfx_pipeline.Pipeline(
      pipeline_name='two_step_pipeline',
      pipeline_root='pipeline_root',
      metadata_connection_config=metadata_store_pb2.ConnectionConfig(),
      components=[example_gen, statistics_gen],
  )
Exemplo n.º 6
0
 def testConstructWithInputConfig(self):
     big_query_example_gen = component.BigQueryExampleGen(
         input_config=example_gen_pb2.Input(splits=[
             example_gen_pb2.Input.Split(name='train', pattern='query1'),
             example_gen_pb2.Input.Split(name='eval', pattern='query2'),
             example_gen_pb2.Input.Split(name='test', pattern='query3')
         ]))
     self.assertEqual(standard_artifacts.Examples.TYPE_NAME,
                      big_query_example_gen.outputs['examples'].type_name)
     artifact_collection = big_query_example_gen.outputs['examples'].get()
     self.assertEqual('train', artifact_collection[0].split)
     self.assertEqual('eval', artifact_collection[1].split)
     self.assertEqual('test', artifact_collection[2].split)
Exemplo n.º 7
0
 def test_construct_with_input_config(self):
     big_query_example_gen = component.BigQueryExampleGen(
         input_config=example_gen_pb2.Input(splits=[
             example_gen_pb2.Input.Split(name='train', pattern='query1'),
             example_gen_pb2.Input.Split(name='eval', pattern='query2'),
             example_gen_pb2.Input.Split(name='test', pattern='query3')
         ]))
     self.assertEqual('ExamplesPath',
                      big_query_example_gen.outputs.examples.type_name)
     artifact_collection = big_query_example_gen.outputs.examples.get()
     self.assertEqual('train', artifact_collection[0].split)
     self.assertEqual('eval', artifact_collection[1].split)
     self.assertEqual('test', artifact_collection[2].split)
Exemplo n.º 8
0
def _two_step_pipeline() -> tfx_pipeline.Pipeline:
  table_name = data_types.RuntimeParameter(
      name='table-name', ptype=Text, default='default-table')
  example_gen = big_query_example_gen_component.BigQueryExampleGen(
      query='SELECT * FROM %s' % str(table_name))
  statistics_gen = statistics_gen_component.StatisticsGen(
      examples=example_gen.outputs['examples'])
  return tfx_pipeline.Pipeline(
      pipeline_name='two_step_pipeline',
      pipeline_root='pipeline_root',
      metadata_connection_config=metadata_store_pb2.ConnectionConfig(),
      components=[example_gen, statistics_gen],
  )
Exemplo n.º 9
0
 def test_construct_with_output_config(self):
   big_query_example_gen = component.BigQueryExampleGen(
       query='',
       output_config=example_gen_pb2.Output(
           split_config=example_gen_pb2.SplitConfig(splits=[
               example_gen_pb2.SplitConfig.Split(name='train', hash_buckets=2),
               example_gen_pb2.SplitConfig.Split(name='eval', hash_buckets=1),
               example_gen_pb2.SplitConfig.Split(name='test', hash_buckets=1)
           ])))
   self.assertEqual('ExamplesPath',
                    big_query_example_gen.outputs.examples.type_name)
   artifact_collection = big_query_example_gen.outputs.examples.get()
   self.assertEqual('train', artifact_collection[0].split)
   self.assertEqual('eval', artifact_collection[1].split)
   self.assertEqual('test', artifact_collection[2].split)
Exemplo n.º 10
0
 def testConstructWithOutputConfig(self):
     big_query_example_gen = component.BigQueryExampleGen(
         query='query',
         output_config=example_gen_pb2.
         Output(split_config=example_gen_pb2.SplitConfig(splits=[
             example_gen_pb2.SplitConfig.Split(name='train',
                                               hash_buckets=2),
             example_gen_pb2.SplitConfig.Split(name='eval', hash_buckets=1),
             example_gen_pb2.SplitConfig.Split(name='test', hash_buckets=1)
         ])))
     self.assertEqual(standard_artifacts.Examples.TYPE_NAME,
                      big_query_example_gen.outputs['examples'].type_name)
     artifact_collection = big_query_example_gen.outputs['examples'].get()
     self.assertEqual('train', artifact_collection[0].split)
     self.assertEqual('eval', artifact_collection[1].split)
     self.assertEqual('test', artifact_collection[2].split)
 def __init__(self, args):
   component = big_query_example_gen_component.BigQueryExampleGen(args.query)
   super(BigQueryExampleGenRunner, self).__init__(args, component)
Exemplo n.º 12
0
 def __init__(self, query: dsl.PipelineParam):
     component = big_query_example_gen_component.BigQueryExampleGen('')
     super().__init__(component, {"query": query})
Exemplo n.º 13
0
def _two_step_pipeline():
    example_gen = big_query_example_gen_component.BigQueryExampleGen(
        query='SELECT * FROM TABLE')
    statistics_gen = statistics_gen_component.StatisticsGen(
        input_data=example_gen.outputs.examples)
    return [example_gen, statistics_gen]
Exemplo n.º 14
0
 def test_construct(self):
     big_query_example_gen = component.BigQueryExampleGen(query='')
     self.assertEqual('ExamplesPath',
                      big_query_example_gen.outputs.examples.type_name)
 def __init__(self, query: str):
     component = big_query_example_gen_component.BigQueryExampleGen(query)
     super().__init__(component)
Exemplo n.º 16
0
 def testEnableCache(self):
   big_query_example_gen_1 = component.BigQueryExampleGen(query='query')
   self.assertEqual(None, big_query_example_gen_1.enable_cache)
   big_query_example_gen_2 = component.BigQueryExampleGen(
       query='query', enable_cache=True)
   self.assertEqual(True, big_query_example_gen_2.enable_cache)