def test_overwritten_values(self): model_io_stub = stub_io.ModelIoStub() get_models = model_io_stub.get_models_ptransform options = job_options.JobOptions(model_getter=get_models) self.assertIs(options.model_getter, get_models)
def test_run_with_empty_model_getter(self): pipeline = test_pipeline.TestPipeline( runner=runners.DirectRunner(), options=job_options.JobOptions(model_getter=None)) self.assertRaisesRegexp( ValueError, 'JobOptions.model_getter must not be None', audit_jobs.AuditAllStorageModelsJob(pipeline).run)
def run_job( job_class: Type[base_jobs.JobBase], sync: bool, namespace: Optional[str] = None, pipeline: Optional[beam.Pipeline] = None ) -> beam_job_models.BeamJobRunModel: """Runs the specified job synchronously. In other words, the function will wait for the job to finish running before returning a value. Args: job_class: type(base_jobs.JobBase). The type of job to run. sync: bool. Whether to run the job synchronously. namespace: str. The namespace in which models should be created. pipeline: Pipeline. The pipeline to run the job upon. If omitted, then a new pipeline will be used instead. Returns: BeamJobRun. Contains metadata related to the execution status of the job. """ if pipeline is None: pipeline = beam.Pipeline( runner=runners.DirectRunner() if sync else runners.DataflowRunner(), options=job_options.JobOptions(namespace=namespace)) job = job_class(pipeline) job_name = job_class.__name__ # NOTE: Exceptions raised within this context are logged and suppressed. with _job_bookkeeping_context(job_name) as run_model: _ = job.run() | job_io.PutResults(run_model.id) run_result = pipeline.run() if sync: run_result.wait_until_finish() run_model.latest_job_state = beam_job_models.BeamJobState.DONE.value elif run_result.has_job: run_model.dataflow_job_id = run_result.job_id() run_model.latest_job_state = run_result.state else: raise RuntimeError( 'Failed to deploy %s to the Dataflow service. Please try again ' 'after a few minutes.' % job_name) return run_model
def test_async_job(self) -> None: mock_run_result = mock.Mock() mock_run_result.has_job = True mock_run_result.job_id.return_value = '123' mock_run_result.state = 'PENDING' pipeline = beam.Pipeline( runner=runners.DirectRunner(), options=job_options.JobOptions(namespace=self.namespace)) with self.swap_to_always_return(pipeline, 'run', value=mock_run_result): run = jobs_manager.run_job(WorkingJob, False, pipeline=pipeline) self.assertEqual(run.dataflow_job_id, '123') self.assertEqual(run.latest_job_state, 'PENDING')
def test_async_job_that_does_not_start(self) -> None: mock_run_result = mock.Mock() mock_run_result.has_job = False mock_run_result.job_id.return_value = None mock_run_result.state = 'UNKNOWN' pipeline = beam.Pipeline( runner=runners.DirectRunner(), options=job_options.JobOptions(namespace=self.namespace)) with self.swap_to_always_return(pipeline, 'run', value=mock_run_result): run = jobs_manager.run_job(WorkingJob, False, pipeline=pipeline) self.assertIsNone(run.dataflow_job_id) self.assertEqual(run.latest_job_state, 'FAILED') result = beam_job_services.get_beam_job_run_result(run.id) self.assertIn('Failed to deploy WorkingJob', result.stderr)
def run_job_sync( job_name: str, job_args: List[str], namespace: Optional[str] = None) -> beam_job_domain.BeamJobRun: """Runs the specified job synchronously. In other words, the function will wait for the job to finish running before returning a value. Args: job_name: str. The name of the job to run. job_args: list(str). The arguments to the job's run() method. namespace: str. The namespace in which models should be created. Returns: BeamJobRun. Contains metadata related to the execution status of the job. """ job_pipeline = beam.Pipeline( runner=runners.DirectRunner(), options=job_options.JobOptions(namespace=namespace)) job_class = registry.get_job_class_by_name(job_name) job = job_class(job_pipeline) run_model = beam_job_services.create_beam_job_run_model(job_name, job_args) try: with job_pipeline: unused_pdone = job.run(*job_args) | job_io.PutResults(run_model.id) except Exception as exception: run_model.latest_job_state = beam_job_models.BeamJobState.FAILED.value # If the pipeline fails to put the results into storage, then we'll # explicitly write them to storage by using the caught exception. result_model = beam_job_services.create_beam_job_run_result_model( run_model.id, '', python_utils.UNICODE(exception)) result_model.put() else: run_model.latest_job_state = beam_job_models.BeamJobState.DONE.value finally: run_model.put() return beam_job_services.get_beam_job_run_from_model(run_model)
def test_unsupported_values(self): self.assertRaisesRegexp(ValueError, re.escape('Unsupported option(s): a, b'), lambda: job_options.JobOptions(a=1, b=2))
def test_overwritten_values(self): datastoreio_stub = stub_io.DatastoreioStub() options = job_options.JobOptions(datastoreio_stub=datastoreio_stub) self.assertIs(options.datastoreio_stub, datastoreio_stub)
def test_default_values(self): options = job_options.JobOptions() self.assertIsNone(options.datastoreio_stub)
def test_overwritten_values(self): options = job_options.JobOptions(namespace='abc') self.assertIs(options.namespace, 'abc')
def test_default_values(self): options = job_options.JobOptions() self.assertIsNone(options.namespace)
def test_unsupported_values(self): self.assertRaisesRegexp(ValueError, r'Unsupported option\(s\): a, b', lambda: job_options.JobOptions(a=1, b=2))
def test_default_values(self): options = job_options.JobOptions() self.assertIsNone(options.model_getter)
def __init__(self, *args, **kwargs): super(PipelinedTestBase, self).__init__(*args, **kwargs) self.pipeline = test_pipeline.TestPipeline( runner=runners.DirectRunner(), options=job_options.JobOptions(namespace=self.namespace)) self._pipeline_context_stack = None
def test_unsupported_values(self) -> None: with self.assertRaisesRegexp( ValueError, r'Unsupported option\(s\)'): # type: ignore[no-untyped-call] job_options.JobOptions(a='a', b='b')
def test_overwritten_values(self) -> None: options = job_options.JobOptions(namespace='abc') self.assertEqual(options.namespace, 'abc')