def testStartAIPTrainingWithUserContainer(self, mock_discovery): self._training_inputs['masterConfig'] = {'imageUri': 'my-custom-image'} mock_discovery.build.return_value = self._mock_api_client mock_create = mock.Mock() self._mock_api_client.projects().jobs().create = mock_create mock_get = mock.Mock() self._mock_api_client.projects().jobs().get.return_value = mock_get mock_get.execute.return_value = { 'state': 'SUCCEEDED', } class_path = 'foo.bar.class' runner.start_aip_training(self._inputs, self._outputs, self._exec_properties, class_path, self._training_inputs) mock_create.assert_called_with( body=mock.ANY, parent='projects/{}'.format(self._project_id)) (_, kwargs) = mock_create.call_args body = kwargs['body'] self.assertDictContainsSubset( { 'masterConfig': { 'imageUri': 'my-custom-image', }, 'args': [ '--executor_class_path', class_path, '--inputs', '{}', '--outputs', '{}', '--exec-properties', '{"custom_config": {}}' ], }, body['trainingInput']) self.assertStartsWith(body['jobId'], 'tfx_') mock_get.execute.assert_called_with()
def testStartAIPTraining(self, mock_discovery): mock_discovery.build.return_value = self._mock_api_client self._setUpTrainingMocks() class_path = 'foo.bar.class' runner.start_aip_training(self._inputs, self._outputs, self._serialize_custom_config_under_test(), class_path, self._training_inputs, None) self._mock_create.assert_called_with( body=mock.ANY, parent='projects/{}'.format(self._project_id)) (_, kwargs) = self._mock_create.call_args body = kwargs['body'] default_image = 'gcr.io/tfx-oss-public/tfx:{}'.format(version.__version__) self.assertDictContainsSubset( { 'masterConfig': { 'imageUri': default_image, 'containerCommand': runner._CONTAINER_COMMAND + [ '--executor_class_path', class_path, '--inputs', '{}', '--outputs', '{}', '--exec-properties', '{"custom_config": ' '"{\\"ai_platform_training_args\\": {\\"project\\": \\"12345\\"' '}}"}' ], }, }, body['trainingInput']) self.assertStartsWith(body['jobId'], 'tfx_') self._mock_get.execute.assert_called_with()
def testStartAIPTraining(self, mock_discovery): mock_discovery.build.return_value = self._mock_api_client self._setUpTrainingMocks() class_path = 'foo.bar.class' runner.start_aip_training(self._inputs, self._outputs, self._exec_properties, class_path, self._training_inputs, None) self._mock_create.assert_called_with(body=mock.ANY, parent='projects/{}'.format( self._project_id)) (_, kwargs) = self._mock_create.call_args body = kwargs['body'] default_image = 'gcr.io/tfx-oss-public/tfx:%s' % (version.__version__) self.assertDictContainsSubset( { 'masterConfig': { 'imageUri': default_image, }, 'args': [ '--executor_class_path', class_path, '--inputs', '{}', '--outputs', '{}', '--exec-properties', '{"custom_config": ' '{"ai_platform_training_args": {"project": "12345"}}}' ], }, body['trainingInput']) self.assertStartsWith(body['jobId'], 'tfx_') self._mock_get.execute.assert_called_with()
def testStartAIPTrainingWithUserContainer(self, mock_discovery): mock_discovery.build.return_value = self._mock_api_client self._setUpTrainingMocks() class_path = 'foo.bar.class' self._training_inputs['masterConfig'] = {'imageUri': 'my-custom-image'} self._exec_properties['custom_config'][ executor.JOB_ID_KEY] = self._job_id runner.start_aip_training(self._inputs, self._outputs, self._exec_properties, class_path, self._training_inputs, self._job_id) self._mock_create.assert_called_with(body=mock.ANY, parent='projects/{}'.format( self._project_id)) (_, kwargs) = self._mock_create.call_args body = kwargs['body'] self.assertDictContainsSubset( { 'masterConfig': { 'imageUri': 'my-custom-image', }, 'args': [ '--executor_class_path', class_path, '--inputs', '{}', '--outputs', '{}', '--exec-properties', '{"custom_config": ' '{"ai_platform_training_args": ' '{"masterConfig": {"imageUri": "my-custom-image"}, ' '"project": "12345"}, ' '"ai_platform_training_job_id": "my_jobid"}}' ], }, body['trainingInput']) self.assertEqual(body['jobId'], 'my_jobid') self._mock_get.execute.assert_called_with()
def testStartAIPTrainingWithUserContainer_uCAIP(self, mock_gapic): mock_gapic.JobServiceClient.return_value = self._mock_api_client self._setUpUcaipTrainingMocks() class_path = 'foo.bar.class' self._training_inputs['worker_pool_specs'] = [{ 'container_spec': { 'image_uri': 'my-custom-image' } }] self._exec_properties['custom_config'][ executor.JOB_ID_KEY] = self._job_id region = 'us-central2' runner.start_aip_training(self._inputs, self._outputs, self._serialize_custom_config_under_test(), class_path, self._training_inputs, self._job_id, True, region) self._mock_create.assert_called_with( parent='projects/{}/locations/{}'.format(self._project_id, region), custom_job=mock.ANY) (_, kwargs) = self._mock_create.call_args body = kwargs['custom_job'] self.assertDictContainsSubset( { 'worker_pool_specs': [ { 'container_spec': { 'image_uri': 'my-custom-image', 'command': runner._CONTAINER_COMMAND + [ '--executor_class_path', class_path, '--inputs', '{}', '--outputs', '{}', '--exec-properties', '{"custom_config": ' '"{\\"ai_platform_training_args\\": ' '{\\"project\\": \\"12345\\", ' '\\"worker_pool_specs\\": ' '[{\\"container_spec\\": ' '{\\"image_uri\\": \\"my-custom-image\\"}}]}, ' '\\"ai_platform_training_job_id\\": ' '\\"my_jobid\\"}"}' ], }, }, ], }, body['job_spec']) self.assertEqual(body['display_name'], 'my_jobid') self._mock_get.assert_called_with(name='ucaip_job_study_id')
def Do(self, input_dict: Dict[Text, List[types.Artifact]], output_dict: Dict[Text, List[types.Artifact]], exec_properties: Dict[Text, Any]) -> None: """Starts a Tuner component as a job on Google Cloud AI Platform.""" self._log_startup(input_dict, output_dict, exec_properties) custom_config = json_utils.loads( exec_properties.get(constants.CUSTOM_CONFIG_KEY, 'null')) if custom_config is None: raise ValueError('custom_config is not provided') if not isinstance(custom_config, Dict): raise TypeError( 'custom_config in execution properties must be a dict, ' 'but received %s' % type(custom_config)) training_inputs = custom_config.get( ai_platform_trainer_executor.TRAINING_ARGS_KEY) if training_inputs is None: err_msg = ('\'%s\' not found in custom_config.' % ai_platform_trainer_executor.TRAINING_ARGS_KEY) logging.error(err_msg) raise ValueError(err_msg) tune_args = tuner_executor.get_tune_args(exec_properties) num_parallel_trials = (1 if not tune_args else tune_args.num_parallel_trials) if num_parallel_trials > 1: # Chief node is also responsible for conducting tuning loop. desired_worker_count = num_parallel_trials - 1 if training_inputs.get('workerCount') != desired_worker_count: logging.warning('workerCount is overridden with %s', desired_worker_count) training_inputs['workerCount'] = desired_worker_count training_inputs['scaleTier'] = 'CUSTOM' training_inputs['masterType'] = (training_inputs.get('masterType') or 'standard') training_inputs['workerType'] = (training_inputs.get('workerType') or 'standard') # 'tfx_tuner_YYYYmmddHHMMSS' is the default job ID if not specified. job_id = (custom_config.get(ai_platform_trainer_executor.JOB_ID_KEY) or 'tfx_tuner_{}'.format( datetime.datetime.now().strftime('%Y%m%d%H%M%S'))) # TODO(b/160059039): Factor out label creation to a utility function. executor_class = _WorkerExecutor executor_class_path = '%s.%s' % (executor_class.__module__, executor_class.__name__) # Note: exec_properties['custom_config'] here is a dict. return runner.start_aip_training(input_dict, output_dict, exec_properties, executor_class_path, training_inputs, job_id)
def testStartAIPTraining_uCAIP(self, mock_gapic): mock_gapic.JobServiceClient.return_value = self._mock_api_client self._setUpUcaipTrainingMocks() class_path = 'foo.bar.class' region = 'us-central1' runner.start_aip_training(self._inputs, self._outputs, self._serialize_custom_config_under_test(), class_path, self._training_inputs, None, True, region) self._mock_create.assert_called_with( parent='projects/{}/locations/{}'.format(self._project_id, region), custom_job=mock.ANY) (_, kwargs) = self._mock_create.call_args body = kwargs['custom_job'] default_image = 'gcr.io/tfx-oss-public/tfx:{}'.format( version_utils.get_image_version()) self.assertDictContainsSubset( { 'worker_pool_specs': [ { 'container_spec': { 'image_uri': default_image, 'command': runner._CONTAINER_COMMAND + [ '--executor_class_path', class_path, '--inputs', '{}', '--outputs', '{}', '--exec-properties', '{"custom_config": ' '"{\\"ai_platform_training_args\\": ' '{\\"project\\": \\"12345\\"' '}}"}' ], }, }, ], }, body['job_spec']) self.assertStartsWith(body['display_name'], 'tfx_') self._mock_get.assert_called_with(name='ucaip_job_study_id')
def testStartAIPTrainingWithUserContainer(self, mock_discovery): mock_discovery.build.return_value = self._mock_api_client self._setUpTrainingMocks() class_path = 'foo.bar.class' self._training_inputs['masterConfig'] = {'imageUri': 'my-custom-image'} self._exec_properties['custom_config'][ executor.JOB_ID_KEY] = self._job_id runner.start_aip_training(self._inputs, self._outputs, self._serialize_custom_config_under_test(), class_path, self._training_inputs, self._job_id) self._mock_create.assert_called_with(body=mock.ANY, parent='projects/{}'.format( self._project_id)) (_, kwargs) = self._mock_create.call_args body = kwargs['body'] self.assertDictContainsSubset( { 'masterConfig': { 'imageUri': 'my-custom-image', 'containerCommand': runner._CONTAINER_COMMAND + [ '--executor_class_path', class_path, '--inputs', '{}', '--outputs', '{}', '--exec-properties', '{"custom_config": ' '"{\\"ai_platform_training_args\\": ' '{\\"masterConfig\\": {\\"imageUri\\": \\"my-custom-image\\"}, ' '\\"project\\": \\"12345\\"}, ' '\\"ai_platform_training_job_id\\": \\"my_jobid\\"}"}' ], } }, body['trainingInput']) self.assertEqual(body['jobId'], 'my_jobid') self._mock_get.execute.assert_called_with() self._mock_create_request.execute.assert_called_with()
def Do(self, input_dict: Dict[Text, List[types.Artifact]], output_dict: Dict[Text, List[types.Artifact]], exec_properties: Dict[Text, Any]): """Starts a trainer job on Google Cloud AI Platform. Args: input_dict: Passthrough input dict for tfx.components.Trainer.executor. output_dict: Passthrough input dict for tfx.components.Trainer.executor. exec_properties: Mostly a passthrough input dict for tfx.components.Trainer.executor. custom_config.ai_platform_training_args and custom_config.ai_platform_training_job_id are consumed by this class. For the full set of parameters supported by Google Cloud AI Platform, refer to https://cloud.google.com/ml-engine/docs/tensorflow/training-jobs#configuring_the_job Returns: None Raises: ValueError: if ai_platform_training_args is not in exec_properties.custom_config. RuntimeError: if the Google Cloud AI Platform training job failed. """ self._log_startup(input_dict, output_dict, exec_properties) custom_config = json_utils.loads( exec_properties.get(standard_component_specs.CUSTOM_CONFIG_KEY, 'null')) if custom_config is not None and not isinstance(custom_config, Dict): raise ValueError( 'custom_config in execution properties needs to be a ' 'dict.') training_inputs = custom_config.get(TRAINING_ARGS_KEY) if training_inputs is None: err_msg = '\'%s\' not found in custom_config.' % TRAINING_ARGS_KEY absl.logging.error(err_msg) raise ValueError(err_msg) job_id = custom_config.get(JOB_ID_KEY) enable_ucaip = custom_config.get(ENABLE_UCAIP_KEY, False) ucaip_region = custom_config.get(UCAIP_REGION_KEY) executor_class = self._GetExecutorClass() executor_class_path = '%s.%s' % (executor_class.__module__, executor_class.__name__) # Note: exec_properties['custom_config'] here is a dict. return runner.start_aip_training(input_dict, output_dict, exec_properties, executor_class_path, training_inputs, job_id, enable_ucaip, ucaip_region)
def Do(self, input_dict: Dict[Text, List[types.Artifact]], output_dict: Dict[Text, List[types.Artifact]], exec_properties: Dict[Text, Any]): """Starts a trainer job on Google Cloud AI Platform. Args: input_dict: Passthrough input dict for tfx.components.Trainer.executor. output_dict: Passthrough input dict for tfx.components.Trainer.executor. exec_properties: Mostly a passthrough input dict for tfx.components.Trainer.executor. custom_config.ai_platform_training_args and custom_config.ai_platform_training_job_id are consumed by this class. For the full set of parameters supported by Google Cloud AI Platform, refer to https://cloud.google.com/ml-engine/docs/tensorflow/training-jobs#configuring_the_job Returns: None Raises: ValueError: if ai_platform_training_args is not in exec_properties.custom_config. RuntimeError: if the Google Cloud AI Platform training job failed. """ self._log_startup(input_dict, output_dict, exec_properties) custom_config = exec_properties.get('custom_config', {}) training_inputs = custom_config.get(TRAINING_ARGS_KEY) if training_inputs is None: err_msg = '\'%s\' not found in custom_config.' % TRAINING_ARGS_KEY absl.logging.error(err_msg) raise ValueError(err_msg) job_id = custom_config.get(JOB_ID_KEY) executor_class = self._GetExecutorClass() executor_class_path = '%s.%s' % (executor_class.__module__, executor_class.__name__) return runner.start_aip_training(input_dict, output_dict, exec_properties, executor_class_path, training_inputs, job_id)