def __init__(self, component_name, input_dict, output_dict, exec_properties, driver_args, driver_class, executor_class, additional_pipeline_args, metadata_connection_config, logger_config): """Constructs an AirflowAdaptor. Args: component_name: Name of the component in the dag. input_dict: a dict from key name to a list of TfxArtifact artifacts. output_dict: a dict from key name to a list of TfxArtifact artifacts. exec_properties: a dict of execution properties. driver_args: an instance of orchestration.data_types.DriverArgs to serve as additional args to driver; driver_class: Python class of driver; executor_class: Python class of executor; additional_pipeline_args: a dict of additional pipeline args. Currently supporting following keys: beam_pipeline_args. metadata_connection_config: configuration for how to connect to metadata. logger_config: dict of logging parameters for configuring the logger. """ self._component_name = component_name self._input_dict = dict((k, v) for k, v in input_dict.items() if v) self._output_dict = output_dict self._exec_properties = exec_properties self._driver_args = driver_args self._driver_class = driver_class self._executor_class = executor_class self._logger = logging_utils.get_logger(logger_config) # Resolve source from input_dict and output_dict to decouple this earlier. self._input_source_dict = self._make_source_dict(self._input_dict) self._output_source_dict = self._make_source_dict(self._output_dict) self._execution_id = None self._additional_pipeline_args = additional_pipeline_args or {} self._metadata_connection_config = metadata_connection_config
def __init__(self, component_name, input_dict, output_dict, exec_properties, driver_options, driver_class, executor_class, additional_pipeline_args, metadata_connection_config): """Constructs an AirflowAdaptor. Args: component_name: Name of the component in the dag. input_dict: a dict from key name to a list of TfxType artifacts. output_dict: a dict from key name to list of TfxType artifacts. exec_properties: a dict of execution properties. driver_options: an instance of base_driver.DriverOptions to communicate with driver; driver_class: Python class of driver; executor_class: Python class of executor; additional_pipeline_args: a dict of additional pipeline args. Currently supporting following keys; - beam_pipeline_args; metadata_connection_config: configuration for how to connect to metadata. """ self._component_name = component_name self._input_dict = dict((k, v) for k, v in input_dict.items() if v) self._output_dict = output_dict self._exec_properties = exec_properties self._driver_options = driver_options self._driver_class = driver_class self._executor_class = executor_class self._logger = logging_utils.get_logger(exec_properties['log_root'], 'comp') # Resolve source from input_dict and output_dict to decouple this earlier. self._input_source_dict = self._make_source_dict(self._input_dict) self._output_source_dict = self._make_source_dict(self._output_dict) self._execution_id = None self._additional_pipeline_args = additional_pipeline_args or {} self._metadata_connection_config = metadata_connection_config
def __init__(self, pipeline_name, start_date, schedule_interval, pipeline_root, metadata_db_root, metadata_connection_config=None, additional_pipeline_args=None, docker_operator_cfg=None, enable_cache=False): super(AirflowPipeline, self).__init__( dag_id=pipeline_name, schedule_interval=schedule_interval, start_date=start_date) self.project_path = os.path.join(pipeline_root, pipeline_name) self.additional_pipeline_args = additional_pipeline_args self.docker_operator_cfg = docker_operator_cfg self.enable_cache = enable_cache if additional_pipeline_args is None: additional_pipeline_args = {} # Configure logging self.logger_config = logging_utils.LoggerConfig(pipeline_name=pipeline_name) if 'logger_args' in additional_pipeline_args: self.logger_config.update(additional_pipeline_args.get('logger_args')) self._logger = logging_utils.get_logger(self.logger_config) self.metadata_connection_config = metadata_connection_config or _get_default_metadata_connection_config( metadata_db_root, pipeline_name) self._producer_map = {} self._consumer_map = {} self._upstreams_map = collections.defaultdict(set)
def setUp(self): self._mock_metadata = tf.test.mock.Mock() self._input_dict = { 'input_data': [types.TfxArtifact(type_name='InputType')], } input_dir = os.path.join( os.environ.get('TEST_TMP_DIR', self.get_temp_dir()), self._testMethodName, 'input_dir') # valid input artifacts must have a uri pointing to an existing directory. for key, input_list in self._input_dict.items(): for index, artifact in enumerate(input_list): artifact.id = index + 1 uri = os.path.join(input_dir, key, str(artifact.id), '') artifact.uri = uri tf.gfile.MakeDirs(uri) self._output_dict = { 'output_data': [types.TfxArtifact(type_name='OutputType')], } self._exec_properties = { 'key': 'value', } self._base_output_dir = os.path.join( os.environ.get('TEST_TMP_DIR', self.get_temp_dir()), self._testMethodName, 'base_output_dir') self._driver_options = base_driver.DriverOptions( worker_name='worker_name', base_output_dir=self._base_output_dir, enable_cache=True) self._execution_id = 100 log_root = os.path.join(self._base_output_dir, 'log_dir') logger_config = logging_utils.LoggerConfig(log_root=log_root) self._logger = logging_utils.get_logger(logger_config)
def __init__(self, executor_cls, name, input_dict, outputs, exec_properties): raw_args = exec_properties.get('beam_pipeline_args', []) # Beam expects str types for it's pipeline args. Ensure unicode type is # converted to str if required. beam_pipeline_args = [] for arg in raw_args: if isinstance(arg, unicode): arg = arg.encode('ascii', 'ignore') beam_pipeline_args.append(arg) module_dir = os.path.dirname( os.path.dirname(tfx.__file__)) setup_file = os.path.join(module_dir, 'setup.py') beam_pipeline_args.append('--setup_file={}'.format(setup_file)) self._executor = executor_cls(beam_pipeline_args) self._input_dict = input_dict self._output_dict = types.parse_tfx_type_dict(outputs) self._exec_properties = exec_properties self._component_name = to_snake_case(name) self._logger = logging_utils.get_logger( exec_properties['log_root'], self._component_name + '_driver.logs')
def test_logging(self): logger = logging_utils.get_logger(self._log_dir, 'test') logger.info('Test') log_file_path = os.path.join(self._log_dir, 'test') f = file_io.FileIO(log_file_path, mode='r') self.assertRegexpMatches( f.read(), r'^\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d,\d\d\d - logging_utils_test.py:35 - INFO: Test$' )
def testLogging(self): """Ensure a logged string actually appears in the log file.""" logger = logging_utils.get_logger(self._logger_config) logger.info('Test') log_file_path = os.path.join(self._log_root) f = file_io.FileIO(os.path.join(log_file_path, 'tfx.log'), mode='r') self.assertRegexpMatches( f.read(), r'^\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d,\d\d\d - : \(logging_utils_test.py:\d\d\) - INFO: Test$' )
def test_fetch_warm_starting_model(self): mock_metadata = tf.test.mock.Mock() artifacts = [] for span in [3, 2, 1]: model = types.TfxArtifact(type_name='ModelExportPath') model.span = span model.uri = 'uri-%d' % span artifacts.append(model.artifact) mock_metadata.get_all_artifacts.return_value = artifacts output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) log_root = os.path.join(output_data_dir, 'log_dir') logger_config = logging_utils.LoggerConfig(log_root=log_root) logger = logging_utils.get_logger(logger_config) trainer_driver = driver.Driver(logger, mock_metadata) result = trainer_driver._fetch_latest_model() self.assertEqual('uri-3', result)
def __init__(self, executor_cls, name, input_dict, outputs, exec_properties): raw_args = exec_properties.get('beam_pipeline_args', []) # Beam expects str types for it's pipeline args. Ensure unicode type is # converted to str if required. beam_pipeline_args = [] for arg in raw_args: if isinstance(arg, unicode): arg = arg.encode('ascii', 'ignore') beam_pipeline_args.append(arg) module_dir = os.path.dirname(os.path.dirname(tfx.__file__)) setup_file = os.path.join(module_dir, 'setup.py') beam_pipeline_args.append('--setup_file={}'.format(setup_file)) self._executor = executor_cls(beam_pipeline_args) self._input_dict = input_dict self._output_dict = types.parse_tfx_type_dict(outputs) self._exec_properties = exec_properties self._component_name = to_snake_case(name) self._logger = logging_utils.get_logger( exec_properties['log_root'], self._component_name + '_driver.logs')
def start_cmle_training(input_dict, output_dict, exec_properties, training_inputs): """Start a trainer job on CMLE.""" training_inputs = training_inputs.copy() logger = logging_utils.get_logger(exec_properties['log_root'], 'exec') # Remove cmle_args from exec_properties so CMLE trainer doesn't call itself exec_properties['custom_config'].pop('cmle_training_args') json_inputs = types.jsonify_tfx_type_dict(input_dict) logger.info('json_inputs=\'%s\'.', json_inputs) json_outputs = types.jsonify_tfx_type_dict(output_dict) logger.info('json_outputs=\'%s\'.', json_outputs) json_exec_properties = json.dumps(exec_properties) logger.info('json_exec_properties=\'%s\'.', json_exec_properties) # Configure CMLE job api_client = discovery.build('ml', 'v1') job_args = [ '--executor', 'Trainer', '--inputs', json_inputs, '--outputs', json_outputs, '--exec-properties', json_exec_properties ] training_inputs['args'] = job_args training_inputs['pythonModule'] = 'tfx.scripts.run_executor' # Pop project_id so CMLE doesn't complain about an unexpected parameter. # It's been a stowaway in cmle_args and has finally reached its destination. project = training_inputs.pop('project') project_id = 'projects/{}'.format(project) if 'packageUris' not in training_inputs: # Create TFX dist and add it to training_inputs local_package = io_utils.build_package() cloud_package = os.path.join(training_inputs['jobDir'], os.path.basename(local_package)) io_utils.copy_file(local_package, cloud_package, True) training_inputs['packageUris'] = [cloud_package] job_name = 'tfx_' + datetime.datetime.now().strftime('%Y%m%d%H%M%S') job_spec = {'jobId': job_name, 'trainingInput': training_inputs} # Submit job to CMLE logger.info('Submitting job=\'{}\', project=\'{}\' to CMLE.'.format( job_name, project)) request = api_client.projects().jobs().create( body=job_spec, parent=project_id) request.execute() # Wait for CMLE job to finish job_id = '{}/jobs/{}'.format(project_id, job_name) request = api_client.projects().jobs().get(name=job_id) response = request.execute() while response['state'] not in ('SUCCEEDED', 'FAILED'): time.sleep(_POLLING_INTERVAL_IN_SECONDS) response = request.execute() if response['state'] == 'FAILED': err_msg = 'Job \'{}\' did not succeed. Detailed response {}.'.format( job_name, response) logger.error(err_msg) raise RuntimeError(err_msg) # CMLE training complete logger.info('Job \'{}\' successful.'.format(job_name))
def deploy_model_for_serving(serving_path, model_version, cmle_serving_args, log_root): """Deploys a model for serving with CMLE. Args: serving_path: The path to the model. Must be a GCS URI. model_version: Version of the model being deployed. Must be different from what is currently being served. cmle_serving_args: Dictionary containing arguments for pushing to CMLE. log_root: Logging root directory. Raises: RuntimeError: if an error is encountered when trying to push. """ logger = logging_utils.get_logger(log_root, 'exec') logger.info( 'Deploying to model with version {} to CMLE for serving: {}'.format( model_version, cmle_serving_args)) model_name = cmle_serving_args['model_name'] project_id = cmle_serving_args['project_id'] runtime_version = cmle_serving_args['runtime_version'] api = discovery.build('ml', 'v1') body = {'name': model_name} parent = 'projects/{}'.format(project_id) try: api.projects().models().create(body=body, parent=parent).execute() except errors.HttpError as e: # If the error is to create an already existing model, it's ok to ignore. if e.resp.status == 409: logger.warn('Model {} already exists'.format(model_name)) else: raise RuntimeError('CMLE Push failed: {}'.format(e)) body = { 'name': 'v{}'.format(model_version), 'deployment_uri': serving_path, 'runtime_version': runtime_version, } # Push to CMLE, and record the operation name so we can poll for its state. model_name = 'projects/{}/models/{}'.format(project_id, model_name) response = api.projects().models().versions().create( body=body, parent=model_name).execute() op_name = response['name'] while True: deploy_status = api.projects().operations().get(name=op_name).execute() if deploy_status.get('done'): break if 'error' in deploy_status: # The operation completed with an error. logger.error(deploy_status['error']) raise RuntimeError( 'Failed to deploy model to CMLE for serving: {}'.format( deploy_status['error'])) time.sleep(_POLLING_INTERVAL_IN_SECONDS) logger.info('Model still being deployed...') logger.info( 'Successfully deployed model {} with version {}, serving from {}'.format( model_name, model_version, serving_path))
def setUp(self): self._connection_config = metadata_store_pb2.ConnectionConfig() self._connection_config.sqlite.SetInParent() log_root = os.path.join(self.get_temp_dir(), 'log_dir') logger_config = logging_utils.LoggerConfig(log_root=log_root) self._logger = logging_utils.get_logger(logger_config)
def __init__(self, log_root, metadata_handler): self._metadata_handler = metadata_handler self._logger = logging_utils.get_logger(log_root, 'driver')
def deploy_model_for_serving(serving_path, model_version, cmle_serving_args, log_root): """Deploys a model for serving with CMLE. Args: serving_path: The path to the model. Must be a GCS URI. model_version: Version of the model being deployed. Must be different from what is currently being served. cmle_serving_args: Dictionary containing arguments for pushing to CMLE. log_root: Logging root directory. Raises: RuntimeError: if an error is encountered when trying to push. """ logger = logging_utils.get_logger(log_root, 'exec') logger.info( 'Deploying to model with version {} to CMLE for serving: {}'.format( model_version, cmle_serving_args)) model_name = cmle_serving_args['model_name'] project_id = cmle_serving_args['project_id'] runtime_version = cmle_serving_args['runtime_version'] api = discovery.build('ml', 'v1') body = {'name': model_name} parent = 'projects/{}'.format(project_id) try: api.projects().models().create(body=body, parent=parent).execute() except errors.HttpError as e: # If the error is to create an already existing model, it's ok to ignore. if e.resp.status == 409: logger.warn('Model {} already exists'.format(model_name)) else: raise RuntimeError('CMLE Push failed: {}'.format(e)) body = { 'name': 'v{}'.format(model_version), 'deployment_uri': serving_path, 'runtime_version': runtime_version, } # Push to CMLE, and record the operation name so we can poll for its state. model_name = 'projects/{}/models/{}'.format(project_id, model_name) response = api.projects().models().versions().create( body=body, parent=model_name).execute() op_name = response['name'] while True: deploy_status = api.projects().operations().get(name=op_name).execute() if deploy_status.get('done'): break if 'error' in deploy_status: # The operation completed with an error. logger.error(deploy_status['error']) raise RuntimeError( 'Failed to deploy model to CMLE for serving: {}'.format( deploy_status['error'])) time.sleep(_POLLING_INTERVAL_IN_SECONDS) logger.info('Model still being deployed...') logger.info( 'Successfully deployed model {} with version {}, serving from {}'. format(model_name, model_version, serving_path))
def start_cmle_training(input_dict, output_dict, exec_properties, training_inputs): """Start a trainer job on CMLE.""" training_inputs = training_inputs.copy() logger = logging_utils.get_logger(exec_properties['log_root'], 'exec') # Remove cmle_args from exec_properties so CMLE trainer doesn't call itself exec_properties['custom_config'].pop('cmle_training_args') json_inputs = types.jsonify_tfx_type_dict(input_dict) logger.info('json_inputs=\'%s\'.', json_inputs) json_outputs = types.jsonify_tfx_type_dict(output_dict) logger.info('json_outputs=\'%s\'.', json_outputs) json_exec_properties = json.dumps(exec_properties) logger.info('json_exec_properties=\'%s\'.', json_exec_properties) # Configure CMLE job api_client = discovery.build('ml', 'v1') job_args = [ '--executor', 'Trainer', '--inputs', json_inputs, '--outputs', json_outputs, '--exec-properties', json_exec_properties ] training_inputs['args'] = job_args training_inputs['pythonModule'] = 'tfx.scripts.run_executor' # Pop project_id so CMLE doesn't complain about an unexpected parameter. # It's been a stowaway in cmle_args and has finally reached its destination. project = training_inputs.pop('project') project_id = 'projects/{}'.format(project) if 'packageUris' not in training_inputs: # Create TFX dist and add it to training_inputs local_package = io_utils.build_package() cloud_package = os.path.join(training_inputs['jobDir'], os.path.basename(local_package)) io_utils.copy_file(local_package, cloud_package, True) training_inputs['packageUris'] = [cloud_package] job_name = 'tfx_' + datetime.datetime.now().strftime('%Y%m%d%H%M%S') job_spec = {'jobId': job_name, 'trainingInput': training_inputs} # Submit job to CMLE logger.info('Submitting job=\'{}\', project=\'{}\' to CMLE.'.format( job_name, project)) request = api_client.projects().jobs().create(body=job_spec, parent=project_id) request.execute() # Wait for CMLE job to finish job_id = '{}/jobs/{}'.format(project_id, job_name) request = api_client.projects().jobs().get(name=job_id) response = request.execute() while response['state'] not in ('SUCCEEDED', 'FAILED'): time.sleep(_POLLING_INTERVAL_IN_SECONDS) response = request.execute() if response['state'] == 'FAILED': err_msg = 'Job \'{}\' did not succeed. Detailed response {}.'.format( job_name, response) logger.error(err_msg) raise RuntimeError(err_msg) # CMLE training complete logger.info('Job \'{}\' successful.'.format(job_name))