def test_check_concurrency(self): # Operation without concurrency defaults to infinite concurrency operation = OperationFactory() operation_run = OperationRunFactory(operation=operation) assert operation_run.check_concurrency() is True # Operation with concurrency and operation run with operation runs operation.concurrency = 2 operation.save() # No running operation runs assert operation_run.check_concurrency() is True # One operation run operation_run1 = OperationRunFactory(operation=operation) assert operation_run.check_concurrency() is True # One operation run with RUNNING status OperationRunStatus.objects.create(status=OperationStatuses.RUNNING, operation_run=operation_run1) assert operation_run.check_concurrency() is True # Second operation run operation_run2 = OperationRunFactory(operation=operation) assert operation_run.check_concurrency() is True # Second operation run with RUNNING status OperationRunStatus.objects.create(status=OperationStatuses.RUNNING, operation_run=operation_run2) assert operation_run.check_concurrency() is False
def setUp(self): self.operation_run = OperationRunFactory() self.pipeline_run = self.operation_run.pipeline_run # Manually set status to scheduled self.operation_run.on_scheduled() return super().setUp()
def test_check_concurrency(self): # Pipeline without concurrency defaults to infinite concurrency pipeline = PipelineFactory() pipeline_run = PipelineRunFactory(pipeline=pipeline) assert pipeline_run.check_concurrency() is True # Pipeline with concurrency and pipeline run with operation runs pipeline.concurrency = 2 pipeline.save() # No running operation runs assert pipeline_run.check_concurrency() is True # One operation run operation_run1 = OperationRunFactory(pipeline_run=pipeline_run) assert pipeline_run.check_concurrency() is True # One operation run with RUNNING status OperationRunStatus.objects.create(status=OperationStatuses.RUNNING, operation_run=operation_run1) assert pipeline_run.check_concurrency() is True # Second operation run operation_run2 = OperationRunFactory(pipeline_run=pipeline_run) assert pipeline_run.check_concurrency() is True # Second operation run with RUNNING status OperationRunStatus.objects.create(status=OperationStatuses.RUNNING, operation_run=operation_run2) assert pipeline_run.check_concurrency() is False
def test_dag_property(self): pipeline_run = PipelineRunFactory() operation_runs = [ OperationRunFactory(pipeline_run=pipeline_run) for _ in range(4) ] operation_runs[0].upstream_runs.set(operation_runs[2:]) operation_runs[1].upstream_runs.set(operation_runs[2:]) operation_by_ids = {op.id: op for op in operation_runs} assert pipeline_run.dag == ({ operation_runs[0].id: set(), operation_runs[1].id: set(), operation_runs[2].id: {operation_runs[0].id, operation_runs[1].id}, operation_runs[3].id: {operation_runs[0].id, operation_runs[1].id}, }, operation_by_ids) # Add operations outside the dag operation_run1 = OperationRunFactory() operation_run1.downstream_runs.set( [operation_runs[1], operation_runs[2], operation_runs[3]]) operation_run2 = OperationRunFactory() operation_run2.upstream_runs.set( [operation_runs[0], operation_runs[2]]) assert pipeline_run.dag == ({ operation_runs[0].id: { operation_run2.id, }, operation_runs[1].id: set(), operation_runs[2].id: {operation_runs[0].id, operation_runs[1].id, operation_run2.id}, operation_runs[3].id: {operation_runs[0].id, operation_runs[1].id}, }, operation_by_ids)
def test_scheduling_operation_run_sets_pipeline_run_to_scheduled(self): operation_run = OperationRunFactory() assert operation_run.last_status == OperationStatuses.CREATED assert operation_run.statuses.count() == 1 pipeline_run = operation_run.pipeline_run assert pipeline_run.last_status == PipelineStatuses.CREATED assert pipeline_run.statuses.count() == 1 operation_run.on_scheduled() pipeline_run.refresh_from_db() assert operation_run.last_status == OperationStatuses.SCHEDULED assert operation_run.statuses.count() == 2 assert pipeline_run.last_status == PipelineStatuses.SCHEDULED assert pipeline_run.statuses.count() == 2
def test_failed_upstream_operation_runs_sets_pipeline_run_to_finished( self): operation_run = OperationRunFactory() assert operation_run.last_status == OperationStatuses.CREATED assert operation_run.statuses.count() == 1 pipeline_run = operation_run.pipeline_run assert pipeline_run.last_status == PipelineStatuses.CREATED assert pipeline_run.statuses.count() == 1 # Stopping the first operation does not stop the pipeline operation_run.on_upstream_failed() pipeline_run.refresh_from_db() assert operation_run.last_status == OperationStatuses.UPSTREAM_FAILED assert operation_run.statuses.count() == 2 assert pipeline_run.last_status == PipelineStatuses.FINISHED assert pipeline_run.statuses.count() == 2
def test_operation_run_creation_sets_created_status(self): assert OperationRunStatus.objects.count() == 0 # Assert `new_pipeline_run_status` task is also called operation_run = OperationRunFactory() assert OperationRunStatus.objects.filter( operation_run=operation_run).count() == 1 assert operation_run.last_status == OperationStatuses.CREATED
def test_schedule_start_works_with_pipeline_concurrency(self): operation_run = OperationRunFactory() operation_run.operation.trigger_policy = TriggerPolicy.ONE_DONE operation_run.operation.save() pipeline_run = operation_run.pipeline_run # Set pipeline concurrency to 1 pipeline_run.pipeline.concurrency = 1 pipeline_run.pipeline.save() # Add a failed upstream upstream_run1 = OperationRunFactory(pipeline_run=pipeline_run) upstream_run2 = OperationRunFactory(pipeline_run=pipeline_run) operation_run.upstream_runs.set([upstream_run1, upstream_run2]) with patch('pipelines.tasks.start_operation_run.delay' ) as start_operation_run: OperationRunStatus.objects.create(status=OperationStatuses.FAILED, operation_run=upstream_run1) OperationRunStatus.objects.create(status=OperationStatuses.RUNNING, operation_run=upstream_run2) assert start_operation_run.call_count == 1 with patch( 'pipelines.models.OperationRun.start') as start_operation_run: assert operation_run.schedule_start() is True assert start_operation_run.call_count == 0 operation_run.refresh_from_db() assert operation_run.last_status == OperationStatuses.CREATED
def test_is_upstream_done(self): operation_run = OperationRunFactory() # No upstream assert operation_run.is_upstream_done is True # Add non done upstream upstream_run1 = OperationRunFactory() operation_run.upstream_runs.set([upstream_run1]) assert operation_run.is_upstream_done is False # A running upstream OperationRunStatus.objects.create(status=OperationStatuses.RUNNING, operation_run=upstream_run1) assert operation_run.is_upstream_done is False # A failed upstream OperationRunStatus.objects.create(status=OperationStatuses.FAILED, operation_run=upstream_run1) assert operation_run.is_upstream_done is True # Add skipped upstream upstream_run2 = OperationRunFactory() operation_run.upstream_runs.set([upstream_run2]) OperationRunStatus.objects.create(status=OperationStatuses.SKIPPED, operation_run=upstream_run2) assert operation_run.is_upstream_done is True # Add succeeded upstream upstream_run3 = OperationRunFactory() operation_run.upstream_runs.set([upstream_run3]) OperationRunStatus.objects.create(status=OperationStatuses.SUCCEEDED, operation_run=upstream_run3) assert operation_run.is_upstream_done is True # Many done upstreams operation_run.upstream_runs.set( [upstream_run1, upstream_run2, upstream_run3]) assert operation_run.is_upstream_done is True # Add another upstream upstream_run4 = OperationRunFactory() operation_run.upstream_runs.add(upstream_run4) assert operation_run.is_upstream_done is False
def test_succeeded_operation_runs_sets_pipeline_run_to_finished(self): operation_run = OperationRunFactory() assert operation_run.last_status == OperationStatuses.CREATED assert operation_run.statuses.count() == 1 pipeline_run = operation_run.pipeline_run assert pipeline_run.last_status == PipelineStatuses.CREATED assert pipeline_run.statuses.count() == 1 # Stopping the first operation does not stop the pipeline operation_run.on_scheduled() operation_run.on_run() operation_run.on_success() pipeline_run.refresh_from_db() assert operation_run.last_status == OperationStatuses.SUCCEEDED assert operation_run.statuses.count() == 4 assert pipeline_run.last_status == PipelineStatuses.FINISHED assert pipeline_run.statuses.count() == 4
def test_schedule_start_with_operation_run_already_scheduled_operation_run( self): operation_run = OperationRunFactory() OperationRunStatus.objects.create(operation_run=operation_run, status=OperationStatuses.FAILED) assert operation_run.schedule_start() is False operation_run = OperationRunFactory() OperationRunStatus.objects.create(operation_run=operation_run, status=OperationStatuses.SCHEDULED) assert operation_run.schedule_start() is False
def test_stopping_pipeline_run_stops_operation_runs(self): pipeline_run = PipelineRunFactory() [OperationRunFactory(pipeline_run=pipeline_run) for _ in range(2)] assert pipeline_run.statuses.count() == 1 assert pipeline_run.last_status == PipelineStatuses.CREATED assert OperationRunStatus.objects.filter().count() == 2 assert set(OperationRunStatus.objects.values_list( 'status', flat=True)) == { OperationStatuses.CREATED, } # Set pipeline run to stopped pipeline_run.on_stop() assert pipeline_run.statuses.count() == 2 assert pipeline_run.last_status == PipelineStatuses.STOPPED # Operation run are also stopped assert OperationRunStatus.objects.filter().count() == 4 assert set(OperationRunStatus.objects.values_list( 'status', flat=True)) == { OperationStatuses.CREATED, OperationStatuses.STOPPED }
def test_skipping_all_operation_runs_sets_pipeline_run_to_finished(self): operation_run = OperationRunFactory() assert operation_run.last_status == OperationStatuses.CREATED assert operation_run.statuses.count() == 1 pipeline_run = operation_run.pipeline_run assert pipeline_run.last_status == PipelineStatuses.CREATED assert pipeline_run.statuses.count() == 1 # Create another operation run for this pipeline_run operation_run2 = OperationRunFactory(pipeline_run=pipeline_run) # Stopping the first operation does not stop the pipeline operation_run.on_skip() pipeline_run.refresh_from_db() assert operation_run.last_status == OperationStatuses.SKIPPED assert operation_run.statuses.count() == 2 assert pipeline_run.last_status == PipelineStatuses.CREATED assert pipeline_run.statuses.count() == 1 # Stopping the second operation stops the pipeline operation_run2.on_skip() pipeline_run.refresh_from_db() assert pipeline_run.last_status == PipelineStatuses.FINISHED assert pipeline_run.statuses.count() == 2
def test_running_operation_run_sets_pipeline_run_to_running(self): operation_run = OperationRunFactory() assert operation_run.last_status == OperationStatuses.CREATED assert operation_run.statuses.count() == 1 pipeline_run = operation_run.pipeline_run assert pipeline_run.last_status == PipelineStatuses.CREATED assert pipeline_run.statuses.count() == 1 # Create another operation run for this pipeline_run OperationRunFactory(pipeline_run=pipeline_run) operation_run.on_scheduled() pipeline_run.refresh_from_db() assert operation_run.last_status == OperationStatuses.SCHEDULED assert operation_run.statuses.count() == 2 assert pipeline_run.last_status == PipelineStatuses.SCHEDULED assert pipeline_run.statuses.count() == 2 operation_run.on_run() pipeline_run.refresh_from_db() assert operation_run.last_status == OperationStatuses.RUNNING assert operation_run.statuses.count() == 3 assert pipeline_run.last_status == PipelineStatuses.RUNNING assert pipeline_run.statuses.count() == 3
def test_schedule_start_with_failed_upstream(self): operation_run = OperationRunFactory() operation_run.operation.trigger_policy = TriggerPolicy.ALL_SUCCEEDED operation_run.operation.save() # Add a failed upstream upstream_run1 = OperationRunFactory() operation_run.upstream_runs.set([upstream_run1]) with patch('pipelines.tasks.start_operation_run.delay' ) as start_operation_run: OperationRunStatus.objects.create(status=OperationStatuses.FAILED, operation_run=upstream_run1) assert start_operation_run.call_count == 1 assert operation_run.schedule_start() is False # Check also that the task is marked as UPSTREAM_FAILED # Since this operation cannot be started anymore operation_run.refresh_from_db() assert operation_run.last_status == OperationStatuses.UPSTREAM_FAILED
def test_schedule_start_works_when_conditions_are_met(self): operation_run = OperationRunFactory() operation_run.operation.trigger_policy = TriggerPolicy.ONE_DONE operation_run.operation.save() pipeline_run = operation_run.pipeline_run # Add a failed upstream upstream_run1 = OperationRunFactory(pipeline_run=pipeline_run) operation_run.upstream_runs.set([upstream_run1]) with patch('pipelines.tasks.start_operation_run.delay' ) as start_operation_run: OperationRunStatus.objects.create(status=OperationStatuses.FAILED, operation_run=upstream_run1) assert start_operation_run.call_count == 1 with patch( 'pipelines.models.OperationRun.start') as start_operation_run: assert operation_run.schedule_start() is False assert start_operation_run.call_count == 1 operation_run.refresh_from_db() assert operation_run.last_status == OperationStatuses.SCHEDULED
def test_trigger_policy_all_failed(self): operation_run = OperationRunFactory() operation = operation_run.operation operation.trigger_policy = TriggerPolicy.ALL_FAILED operation.save() # No upstream assert operation_run.check_upstream_trigger() is True # Add non done upstream upstream_run1 = OperationRunFactory() operation_run.upstream_runs.set([upstream_run1]) assert operation_run.check_upstream_trigger() is False # A running upstream OperationRunStatus.objects.create(status=OperationStatuses.RUNNING, operation_run=upstream_run1) assert operation_run.check_upstream_trigger() is False # A failed upstream OperationRunStatus.objects.create(status=OperationStatuses.FAILED, operation_run=upstream_run1) assert operation_run.check_upstream_trigger() is True # Add skipped upstream upstream_run2 = OperationRunFactory() operation_run.upstream_runs.set([upstream_run2]) OperationRunStatus.objects.create(status=OperationStatuses.SKIPPED, operation_run=upstream_run2) assert operation_run.check_upstream_trigger() is False # Add succeeded upstream upstream_run3 = OperationRunFactory() operation_run.upstream_runs.set([upstream_run3]) OperationRunStatus.objects.create(status=OperationStatuses.SUCCEEDED, operation_run=upstream_run3) assert operation_run.check_upstream_trigger() is False # Add many failed upstream upstream_run4 = OperationRunFactory() operation_run.upstream_runs.set([upstream_run1, upstream_run4]) OperationRunStatus.objects.create(status=OperationStatuses.FAILED, operation_run=upstream_run4) assert operation_run.check_upstream_trigger() is True # Many done upstreams operation_run.upstream_runs.set( [upstream_run1, upstream_run2, upstream_run3]) assert operation_run.check_upstream_trigger() is False
def test_trigger_policy_one_done(self): operation_run = OperationRunFactory() operation = operation_run.operation operation.trigger_policy = TriggerPolicy.ONE_DONE operation.save() # No upstream assert operation_run.check_upstream_trigger() is False # Add non done upstream upstream_run1 = OperationRunFactory() operation_run.upstream_runs.set([upstream_run1]) assert operation_run.check_upstream_trigger() is False # A running upstream OperationRunStatus.objects.create(status=OperationStatuses.RUNNING, operation_run=upstream_run1) assert operation_run.check_upstream_trigger() is False # A failed upstream OperationRunStatus.objects.create(status=OperationStatuses.FAILED, operation_run=upstream_run1) assert operation_run.check_upstream_trigger() is True # Add skipped upstream upstream_run2 = OperationRunFactory() operation_run.upstream_runs.set([upstream_run2]) OperationRunStatus.objects.create(status=OperationStatuses.SKIPPED, operation_run=upstream_run2) assert operation_run.check_upstream_trigger() is True # Add succeeded upstream upstream_run3 = OperationRunFactory() operation_run.upstream_runs.set([upstream_run3]) OperationRunStatus.objects.create(status=OperationStatuses.SUCCEEDED, operation_run=upstream_run3) assert operation_run.check_upstream_trigger() is True # Add another upstream still True upstream_run4 = OperationRunFactory() operation_run.upstream_runs.add(upstream_run4) assert operation_run.check_upstream_trigger() is True
def test_schedule_start_works_with_operation_concurrency(self): operation_run = OperationRunFactory() operation_run.operation.trigger_policy = TriggerPolicy.ONE_DONE operation_run.operation.save() pipeline_run = operation_run.pipeline_run # Set operation concurrency to 1 operation_run.operation.concurrency = 1 operation_run.operation.save() # Add a failed upstream upstream_run1 = OperationRunFactory(pipeline_run=pipeline_run) upstream_run2 = OperationRunFactory(pipeline_run=pipeline_run) operation_run.upstream_runs.set([upstream_run1, upstream_run2]) with patch('pipelines.tasks.start_operation_run.delay' ) as start_operation_run: OperationRunStatus.objects.create(status=OperationStatuses.FAILED, operation_run=upstream_run1) OperationRunStatus.objects.create(status=OperationStatuses.RUNNING, operation_run=upstream_run2) assert start_operation_run.call_count == 1 # Add another operation run for this operation with scheduled new_operation_run = OperationRunFactory( operation=operation_run.operation) new_operation_run.upstream_runs.set([upstream_run1, upstream_run2]) with patch( 'pipelines.models.OperationRun.start') as start_operation_run: assert operation_run.schedule_start() is False assert start_operation_run.call_count == 1 operation_run.refresh_from_db() assert operation_run.last_status == OperationStatuses.SCHEDULED # Check if we can start another instance new_operation_run.refresh_from_db() assert new_operation_run.last_status == OperationStatuses.CREATED with patch( 'pipelines.models.OperationRun.start') as start_operation_run: assert new_operation_run.schedule_start() is True assert start_operation_run.call_count == 0 new_operation_run.refresh_from_db() assert new_operation_run.last_status == OperationStatuses.CREATED
class TestOperationTask(BaseTest): def setUp(self): self.operation_run = OperationRunFactory() self.pipeline_run = self.operation_run.pipeline_run # Manually set status to scheduled self.operation_run.on_scheduled() return super().setUp() def test_task_without_operation_run_raises(self): @celery_app.task(base=OperationTask, shared=False) def dummy_task(): return with self.assertRaises(TypeError): dummy_task.apply_async() def test_task_with_operation_run_succeeds(self): @celery_app.task(base=OperationTask, shared=False) def dummy_task(operation_run_id): return kwargs = {'operation_run_id': self.operation_run.id} dummy_task.apply_async(kwargs=kwargs) self.operation_run.refresh_from_db() assert self.operation_run.succeeded is True assert set(self.operation_run.statuses.values_list( 'status', flat=True)) == { OperationStatuses.CREATED, OperationStatuses.SCHEDULED, OperationStatuses.RUNNING, OperationStatuses.SUCCEEDED, } self.pipeline_run.refresh_from_db() assert self.operation_run.pipeline_run.last_status == PipelineStatuses.FINISHED assert set( self.operation_run.pipeline_run.statuses.values_list( 'status', flat=True)) == { PipelineStatuses.CREATED, PipelineStatuses.SCHEDULED, PipelineStatuses.RUNNING, PipelineStatuses.FINISHED, } def test_task_with_error_fails(self): @celery_app.task(base=OperationTask, shared=False) def raising_task(operation_run_id): raise KeyError kwargs = {'operation_run_id': self.operation_run.id} raising_task.apply_async(kwargs=kwargs) self.operation_run.refresh_from_db() assert self.operation_run.failed is True assert set(self.operation_run.statuses.values_list( 'status', flat=True)) == { OperationStatuses.CREATED, OperationStatuses.SCHEDULED, OperationStatuses.RUNNING, OperationStatuses.FAILED, } self.pipeline_run.refresh_from_db() assert self.operation_run.pipeline_run.last_status == PipelineStatuses.FINISHED assert set( self.operation_run.pipeline_run.statuses.values_list( 'status', flat=True)) == { PipelineStatuses.CREATED, PipelineStatuses.SCHEDULED, PipelineStatuses.RUNNING, PipelineStatuses.FINISHED, } def test_task_retries_for_specified_exception(self): class RetryTask(ClassBasedTask): retry_for = (KeyError, ) @staticmethod def _run(task_bind, *args, **kwargs): raise KeyError @celery_app.task(base=OperationTask, bind=True, shared=False) def retry_task(task_bind, operation_run_id): assert task_bind.max_retries == 2 assert task_bind.countdown == 0 RetryTask.run(task_bind=task_bind, operation_run_id=operation_run_id) # Add retries and count to the operation self.operation_run.operation.max_retries = 2 self.operation_run.operation.retry_delay = 0 self.operation_run.operation.save() kwargs = {'operation_run_id': self.operation_run.id} retry_task.apply_async(kwargs=kwargs) self.operation_run.refresh_from_db() assert self.operation_run.last_status == OperationStatuses.RETRYING assert set(self.operation_run.statuses.values_list( 'status', flat=True)) == { OperationStatuses.CREATED, OperationStatuses.SCHEDULED, OperationStatuses.RUNNING, OperationStatuses.RETRYING, } self.pipeline_run.refresh_from_db() assert self.operation_run.pipeline_run.last_status == PipelineStatuses.RUNNING assert set( self.operation_run.pipeline_run.statuses.values_list( 'status', flat=True)) == { PipelineStatuses.CREATED, PipelineStatuses.SCHEDULED, PipelineStatuses.RUNNING, }