Пример #1
0
    def testBadConstruction(self):
        empty_config = presto_config_pb2.PrestoConnConfig()
        self.assertRaises(RuntimeError,
                          component.PrestoExampleGen,
                          conn_config=empty_config,
                          query='')

        port_only_config = presto_config_pb2.PrestoConnConfig(port=8080)
        self.assertRaises(RuntimeError,
                          component.PrestoExampleGen,
                          conn_config=port_only_config,
                          query='')
Пример #2
0
    def _extract_conn_config(self, custom_config):
        unpacked_custom_config = example_gen_pb2.CustomConfig()
        proto_utils.json_to_proto(custom_config, unpacked_custom_config)

        conn_config = presto_config_pb2.PrestoConnConfig()
        unpacked_custom_config.custom_config.Unpack(conn_config)
        return conn_config
Пример #3
0
  def _extract_conn_config(self, custom_config):
    unpacked_custom_config = example_gen_pb2.CustomConfig()
    json_format.Parse(custom_config, unpacked_custom_config)

    conn_config = presto_config_pb2.PrestoConnConfig()
    unpacked_custom_config.custom_config.Unpack(conn_config)
    return conn_config
Пример #4
0
  def testDeserializeConnConfig(self):
    conn_config = presto_config_pb2.PrestoConnConfig(
        host='presto.localhost', max_attempts=10)

    deseralized_conn = executor._deserialize_conn_config(conn_config)
    truth_conn = prestodb.dbapi.connect('presto.localhost', max_attempts=10)
    self.assertEqual(truth_conn.host, deseralized_conn.host)
    self.assertEqual(truth_conn.port,
                     deseralized_conn.port)  # test for default port value
    self.assertEqual(truth_conn.auth,
                     deseralized_conn.auth)  # test for default auth value
    self.assertEqual(truth_conn.max_attempts, deseralized_conn.max_attempts)
Пример #5
0
def _PrestoToExample(  # pylint: disable=invalid-name
        pipeline: beam.Pipeline, exec_properties: Dict[Text, Any],
        split_pattern: Text) -> beam.pvalue.PCollection:
    """Read from Presto and transform to TF examples.

  Args:
    pipeline: beam pipeline.
    exec_properties: A dict of execution properties.
    split_pattern: Split.pattern in Input config, a Presto sql string.

  Returns:
    PCollection of TF examples.
  """
    conn_config = example_gen_pb2.CustomConfig()
    json_format.Parse(exec_properties['custom_config'], conn_config)
    presto_config = presto_config_pb2.PrestoConnConfig()
    conn_config.custom_config.Unpack(presto_config)

    client = _deserialize_conn_config(presto_config)
    return (pipeline
            | 'Query' >> beam.Create([split_pattern])
            | 'QueryTable' >> beam.ParDo(_ReadPrestoDoFn(client))
            | 'ToTFExample' >> beam.Map(_row_to_example))
Пример #6
0
from tfx.orchestration import metadata
from tfx.orchestration import pipeline
from tfx.orchestration.beam.beam_dag_runner import BeamDagRunner
from tfx.proto import evaluator_pb2
from tfx.proto import pusher_pb2
from tfx.proto import trainer_pb2

_pipeline_name = 'chicago_taxi_presto'

# This example assumes that the taxi data is stored in ~/taxi/data and the
# taxi utility function is in ~/taxi.  Feel free to customize this as needed.
_taxi_root = os.path.join(os.environ['HOME'], 'taxi')
# Presto configuration that corresponds with tutorial in README.md
_presto_config = presto_config_pb2.PrestoConnConfig(host='localhost',
                                                    port=8080,
                                                    user='******',
                                                    catalog='hive',
                                                    schema='default')
# The query that extracts the Chicago taxi data examples from Presto, following
# setup as described in the README.md
_query = 'SELECT * FROM chicago_taxi_trips_parquet'
# Python module file to inject customized logic into the TFX components. The
# Transform and Trainer both require user-defined functions to run successfully.
_module_file = os.path.join(_taxi_root, 'taxi_utils.py')
# Path which can be listened to by the model server.  Pusher will output the
# trained model here.
_serving_model_dir = os.path.join(_taxi_root, 'serving_model', _pipeline_name)

# Directory and data locations.  This example assumes all of the chicago taxi
# example code and metadata library is relative to $HOME, but you can store
# these files anywhere on your local filesystem.
from tfx.examples.custom_components.presto_example_gen.proto import presto_config_pb2
from tfx.examples.custom_components.presto_example_gen.presto_component.component import PrestoExampleGen

query = """
    SELECT * FROM `<project_id>.<database>.<table_name>`
"""
presto_config = presto_config_pb2.PrestoConnConfig(
    host='localhost',
    port=8080)
example_gen = PrestoExampleGen(presto_config, query=query)
Пример #8
0
 def setUp(self):
     super().setUp()
     self.conn_config = presto_config_pb2.PrestoConnConfig(host='localhost',
                                                           port=8080)