def test_run_beam_job_using_job_class(self) -> None: model = beam_job_services.create_beam_job_run_model('NoOpJob') with self.swap_to_always_return(jobs_manager, 'run_job', value=model): run = beam_job_services.run_beam_job(job_class=NoOpJob) self.assertEqual( beam_job_services.get_beam_job_run_from_model(model).to_dict(), run.to_dict())
def test_create_beam_job_run_model(self) -> None: model = beam_job_services.create_beam_job_run_model( 'FooJob', dataflow_job_id='123') model.put() all_runs = beam_job_services.get_beam_job_runs(refresh=False) self.assertEqual(len(all_runs), 1) run = all_runs[0] self.assertEqual(run.job_name, 'FooJob') self.assertFalse(run.job_is_synchronous)
def test_cancel_beam_job_which_has_no_dataflow_job_id_raises_an_error( self) -> None: model = beam_job_services.create_beam_job_run_model( 'NoOpJob', dataflow_job_id=None) model.put() with self.swap_to_always_return(jobs_manager, 'cancel_job'): self.assertRaisesRegex( # type: ignore[no-untyped-call] ValueError, 'cannot be cancelled', lambda: beam_job_services.cancel_beam_job(model.id))
def test_create_beam_job_run_model(self): model = beam_job_services.create_beam_job_run_model( 'FooJob', ['num_foos'], dataflow_job_id='123') model.put() all_runs = beam_job_services.get_beam_job_runs() self.assertEqual(len(all_runs), 1) run = all_runs[0] self.assertEqual(run.job_name, 'FooJob') self.assertEqual(run.job_arguments, ['num_foos']) self.assertFalse(run.job_is_synchronous)
def test_cancel_beam_job(self) -> None: model = beam_job_services.create_beam_job_run_model( 'NoOpJob', dataflow_job_id='123') model.put() with self.swap_to_always_return(jobs_manager, 'cancel_job'): run = beam_job_services.cancel_beam_job(model.id) self.assertEqual( run.to_dict(), beam_job_services.get_beam_job_run_from_model(model).to_dict())
def test_put_starts_new_job(self) -> None: model = beam_job_services.create_beam_job_run_model('FooJob') with self.swap_to_always_return(jobs_manager, 'run_job', value=model): response = self.put_json( # type: ignore[no-untyped-call] '/beam_job_run', {'job_name': 'FooJob'}, csrf_token=self.get_new_csrf_token()) # type: ignore[no-untyped-call] self.assertEqual( response, beam_job_services.get_beam_job_run_from_model(model).to_dict())
def test_get_returns_job_output(self) -> None: run_model = beam_job_services.create_beam_job_run_model('WorkingJob') run_model.put() result_model = beam_job_services.create_beam_job_run_result_model( run_model.id, 'o', '') result_model.put() response = (self.get_json('/beam_job_run_result?job_id=%s' % run_model.id)) self.assertEqual(response, {'stdout': 'o', 'stderr': ''})
def test_run_beam_job(self) -> None: run_model = beam_job_services.create_beam_job_run_model('WorkingJob') get_job_class_by_name_swap = self.swap_to_always_return( jobs_registry, 'get_job_class_by_name') run_job_swap = self.swap_to_always_return( jobs_manager, 'run_job', value=run_model) with get_job_class_by_name_swap, run_job_swap: run = beam_job_services.run_beam_job('WorkingJob') self.assertEqual( beam_job_services.get_beam_job_run_from_model(run_model).to_dict(), run.to_dict())
def test_delete_cancels_job(self) -> None: model = beam_job_services.create_beam_job_run_model('FooJob') model.put() run = beam_job_domain.BeamJobRun( model.id, 'FooJob', 'CANCELLING', datetime.datetime.utcnow(), datetime.datetime.utcnow(), False) swap_cancel_beam_job = self.swap_to_always_return( beam_job_services, 'cancel_beam_job', value=run) with swap_cancel_beam_job: response = self.delete_json('/beam_job_run', {'job_id': model.id}) # type: ignore[no-untyped-call] self.assertEqual(response, run.to_dict())
def test_get_returns_all_runs(self) -> None: beam_job_services.create_beam_job_run_model('FooJob').put() beam_job_services.create_beam_job_run_model('FooJob').put() beam_job_services.create_beam_job_run_model('FooJob').put() response = self.get_json('/beam_job_run') self.assertIn('runs', response) runs = response['runs'] self.assertEqual(len(runs), 3) self.assertCountEqual([run['job_name'] for run in runs], ['FooJob'] * 3)
def setUp(self) -> None: super().setUp() self.run_model = beam_job_services.create_beam_job_run_model( 'WorkingJob', dataflow_job_id='123') self.dataflow_job = dataflow.Job( id='123', project_id=feconf.OPPIA_PROJECT_ID, location=feconf.GOOGLE_APP_ENGINE_REGION, current_state=dataflow.JobState.JOB_STATE_CANCELLING, current_state_time=datetime.datetime.utcnow()) self.dataflow_client_mock = mock.Mock() self.dataflow_client_mock.update_job.return_value = self.dataflow_job self.exit_stack = contextlib.ExitStack() self.exit_stack.enter_context(self.swap_to_always_return( dataflow, 'JobsV1Beta3Client', value=self.dataflow_client_mock))
def run_job_sync( job_name: str, job_args: List[str], namespace: Optional[str] = None) -> beam_job_domain.BeamJobRun: """Runs the specified job synchronously. In other words, the function will wait for the job to finish running before returning a value. Args: job_name: str. The name of the job to run. job_args: list(str). The arguments to the job's run() method. namespace: str. The namespace in which models should be created. Returns: BeamJobRun. Contains metadata related to the execution status of the job. """ job_pipeline = beam.Pipeline( runner=runners.DirectRunner(), options=job_options.JobOptions(namespace=namespace)) job_class = registry.get_job_class_by_name(job_name) job = job_class(job_pipeline) run_model = beam_job_services.create_beam_job_run_model(job_name, job_args) try: with job_pipeline: unused_pdone = job.run(*job_args) | job_io.PutResults(run_model.id) except Exception as exception: run_model.latest_job_state = beam_job_models.BeamJobState.FAILED.value # If the pipeline fails to put the results into storage, then we'll # explicitly write them to storage by using the caught exception. result_model = beam_job_services.create_beam_job_run_result_model( run_model.id, '', python_utils.UNICODE(exception)) result_model.put() else: run_model.latest_job_state = beam_job_models.BeamJobState.DONE.value finally: run_model.put() return beam_job_services.get_beam_job_run_from_model(run_model)
def _job_bookkeeping_context( job_name: str) -> Iterator[beam_job_models.BeamJobRunModel]: """Returns a context manager which commits failure details if an exception occurs. Args: job_name: str. The name of the job. Yields: BeamJobRunModel. The bookkeeping model used to record execution details. """ run_model = beam_job_services.create_beam_job_run_model(job_name) try: yield run_model except Exception as exception: run_model.latest_job_state = beam_job_models.BeamJobState.FAILED.value _put_job_stderr(run_model.id, str(exception)) finally: run_model.put()