Пример #1
0
    def test_check_concurrency(self):
        # Operation without concurrency defaults to infinite concurrency
        operation = OperationFactory()
        operation_run = OperationRunFactory(operation=operation)
        assert operation_run.check_concurrency() is True

        # Operation with concurrency and operation run with operation runs
        operation.concurrency = 2
        operation.save()

        # No running operation runs
        assert operation_run.check_concurrency() is True

        # One operation run
        operation_run1 = OperationRunFactory(operation=operation)
        assert operation_run.check_concurrency() is True

        # One operation run with RUNNING status
        OperationRunStatus.objects.create(status=OperationStatuses.RUNNING,
                                          operation_run=operation_run1)
        assert operation_run.check_concurrency() is True

        # Second operation run
        operation_run2 = OperationRunFactory(operation=operation)
        assert operation_run.check_concurrency() is True

        # Second operation run with RUNNING status
        OperationRunStatus.objects.create(status=OperationStatuses.RUNNING,
                                          operation_run=operation_run2)
        assert operation_run.check_concurrency() is False
    def setUp(self):

        self.operation_run = OperationRunFactory()
        self.pipeline_run = self.operation_run.pipeline_run
        # Manually set status to scheduled
        self.operation_run.on_scheduled()
        return super().setUp()
Пример #3
0
    def test_check_concurrency(self):
        # Pipeline without concurrency defaults to infinite concurrency
        pipeline = PipelineFactory()
        pipeline_run = PipelineRunFactory(pipeline=pipeline)
        assert pipeline_run.check_concurrency() is True

        # Pipeline with concurrency and pipeline run with operation runs
        pipeline.concurrency = 2
        pipeline.save()

        # No running operation runs
        assert pipeline_run.check_concurrency() is True

        # One operation run
        operation_run1 = OperationRunFactory(pipeline_run=pipeline_run)
        assert pipeline_run.check_concurrency() is True

        # One operation run with RUNNING status
        OperationRunStatus.objects.create(status=OperationStatuses.RUNNING,
                                          operation_run=operation_run1)
        assert pipeline_run.check_concurrency() is True

        # Second operation run
        operation_run2 = OperationRunFactory(pipeline_run=pipeline_run)
        assert pipeline_run.check_concurrency() is True

        # Second operation run with RUNNING status
        OperationRunStatus.objects.create(status=OperationStatuses.RUNNING,
                                          operation_run=operation_run2)
        assert pipeline_run.check_concurrency() is False
Пример #4
0
    def test_dag_property(self):
        pipeline_run = PipelineRunFactory()
        operation_runs = [
            OperationRunFactory(pipeline_run=pipeline_run) for _ in range(4)
        ]
        operation_runs[0].upstream_runs.set(operation_runs[2:])
        operation_runs[1].upstream_runs.set(operation_runs[2:])
        operation_by_ids = {op.id: op for op in operation_runs}
        assert pipeline_run.dag == ({
            operation_runs[0].id: set(),
            operation_runs[1].id: set(),
            operation_runs[2].id: {operation_runs[0].id, operation_runs[1].id},
            operation_runs[3].id: {operation_runs[0].id, operation_runs[1].id},
        }, operation_by_ids)

        # Add operations outside the dag
        operation_run1 = OperationRunFactory()
        operation_run1.downstream_runs.set(
            [operation_runs[1], operation_runs[2], operation_runs[3]])

        operation_run2 = OperationRunFactory()
        operation_run2.upstream_runs.set(
            [operation_runs[0], operation_runs[2]])

        assert pipeline_run.dag == ({
            operation_runs[0].id: {
                operation_run2.id,
            },
            operation_runs[1].id: set(),
            operation_runs[2].id:
            {operation_runs[0].id, operation_runs[1].id, operation_run2.id},
            operation_runs[3].id: {operation_runs[0].id, operation_runs[1].id},
        }, operation_by_ids)
Пример #5
0
    def test_scheduling_operation_run_sets_pipeline_run_to_scheduled(self):
        operation_run = OperationRunFactory()
        assert operation_run.last_status == OperationStatuses.CREATED
        assert operation_run.statuses.count() == 1
        pipeline_run = operation_run.pipeline_run
        assert pipeline_run.last_status == PipelineStatuses.CREATED
        assert pipeline_run.statuses.count() == 1

        operation_run.on_scheduled()
        pipeline_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.SCHEDULED
        assert operation_run.statuses.count() == 2
        assert pipeline_run.last_status == PipelineStatuses.SCHEDULED
        assert pipeline_run.statuses.count() == 2
Пример #6
0
    def test_failed_upstream_operation_runs_sets_pipeline_run_to_finished(
            self):
        operation_run = OperationRunFactory()
        assert operation_run.last_status == OperationStatuses.CREATED
        assert operation_run.statuses.count() == 1
        pipeline_run = operation_run.pipeline_run
        assert pipeline_run.last_status == PipelineStatuses.CREATED
        assert pipeline_run.statuses.count() == 1

        # Stopping the first operation does not stop the pipeline
        operation_run.on_upstream_failed()
        pipeline_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.UPSTREAM_FAILED
        assert operation_run.statuses.count() == 2
        assert pipeline_run.last_status == PipelineStatuses.FINISHED
        assert pipeline_run.statuses.count() == 2
Пример #7
0
    def test_operation_run_creation_sets_created_status(self):
        assert OperationRunStatus.objects.count() == 0

        # Assert `new_pipeline_run_status` task is also called
        operation_run = OperationRunFactory()
        assert OperationRunStatus.objects.filter(
            operation_run=operation_run).count() == 1
        assert operation_run.last_status == OperationStatuses.CREATED
Пример #8
0
    def test_schedule_start_works_with_pipeline_concurrency(self):
        operation_run = OperationRunFactory()
        operation_run.operation.trigger_policy = TriggerPolicy.ONE_DONE
        operation_run.operation.save()
        pipeline_run = operation_run.pipeline_run
        # Set pipeline concurrency to 1
        pipeline_run.pipeline.concurrency = 1
        pipeline_run.pipeline.save()

        # Add a failed upstream
        upstream_run1 = OperationRunFactory(pipeline_run=pipeline_run)
        upstream_run2 = OperationRunFactory(pipeline_run=pipeline_run)
        operation_run.upstream_runs.set([upstream_run1, upstream_run2])
        with patch('pipelines.tasks.start_operation_run.delay'
                   ) as start_operation_run:
            OperationRunStatus.objects.create(status=OperationStatuses.FAILED,
                                              operation_run=upstream_run1)
            OperationRunStatus.objects.create(status=OperationStatuses.RUNNING,
                                              operation_run=upstream_run2)

        assert start_operation_run.call_count == 1

        with patch(
                'pipelines.models.OperationRun.start') as start_operation_run:
            assert operation_run.schedule_start() is True

        assert start_operation_run.call_count == 0

        operation_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.CREATED
Пример #9
0
    def test_is_upstream_done(self):
        operation_run = OperationRunFactory()

        # No upstream
        assert operation_run.is_upstream_done is True

        # Add non done upstream
        upstream_run1 = OperationRunFactory()
        operation_run.upstream_runs.set([upstream_run1])
        assert operation_run.is_upstream_done is False

        # A running upstream
        OperationRunStatus.objects.create(status=OperationStatuses.RUNNING,
                                          operation_run=upstream_run1)
        assert operation_run.is_upstream_done is False

        # A failed upstream
        OperationRunStatus.objects.create(status=OperationStatuses.FAILED,
                                          operation_run=upstream_run1)
        assert operation_run.is_upstream_done is True

        # Add skipped upstream
        upstream_run2 = OperationRunFactory()
        operation_run.upstream_runs.set([upstream_run2])
        OperationRunStatus.objects.create(status=OperationStatuses.SKIPPED,
                                          operation_run=upstream_run2)
        assert operation_run.is_upstream_done is True

        # Add succeeded upstream
        upstream_run3 = OperationRunFactory()
        operation_run.upstream_runs.set([upstream_run3])
        OperationRunStatus.objects.create(status=OperationStatuses.SUCCEEDED,
                                          operation_run=upstream_run3)
        assert operation_run.is_upstream_done is True

        # Many done upstreams
        operation_run.upstream_runs.set(
            [upstream_run1, upstream_run2, upstream_run3])
        assert operation_run.is_upstream_done is True

        # Add another upstream
        upstream_run4 = OperationRunFactory()
        operation_run.upstream_runs.add(upstream_run4)
        assert operation_run.is_upstream_done is False
Пример #10
0
    def test_succeeded_operation_runs_sets_pipeline_run_to_finished(self):
        operation_run = OperationRunFactory()
        assert operation_run.last_status == OperationStatuses.CREATED
        assert operation_run.statuses.count() == 1
        pipeline_run = operation_run.pipeline_run
        assert pipeline_run.last_status == PipelineStatuses.CREATED
        assert pipeline_run.statuses.count() == 1

        # Stopping the first operation does not stop the pipeline
        operation_run.on_scheduled()
        operation_run.on_run()
        operation_run.on_success()
        pipeline_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.SUCCEEDED
        assert operation_run.statuses.count() == 4
        assert pipeline_run.last_status == PipelineStatuses.FINISHED
        assert pipeline_run.statuses.count() == 4
Пример #11
0
    def test_schedule_start_with_operation_run_already_scheduled_operation_run(
            self):
        operation_run = OperationRunFactory()
        OperationRunStatus.objects.create(operation_run=operation_run,
                                          status=OperationStatuses.FAILED)
        assert operation_run.schedule_start() is False

        operation_run = OperationRunFactory()
        OperationRunStatus.objects.create(operation_run=operation_run,
                                          status=OperationStatuses.SCHEDULED)
        assert operation_run.schedule_start() is False
Пример #12
0
 def test_stopping_pipeline_run_stops_operation_runs(self):
     pipeline_run = PipelineRunFactory()
     [OperationRunFactory(pipeline_run=pipeline_run) for _ in range(2)]
     assert pipeline_run.statuses.count() == 1
     assert pipeline_run.last_status == PipelineStatuses.CREATED
     assert OperationRunStatus.objects.filter().count() == 2
     assert set(OperationRunStatus.objects.values_list(
         'status', flat=True)) == {
             OperationStatuses.CREATED,
         }
     # Set pipeline run to stopped
     pipeline_run.on_stop()
     assert pipeline_run.statuses.count() == 2
     assert pipeline_run.last_status == PipelineStatuses.STOPPED
     # Operation run are also stopped
     assert OperationRunStatus.objects.filter().count() == 4
     assert set(OperationRunStatus.objects.values_list(
         'status', flat=True)) == {
             OperationStatuses.CREATED, OperationStatuses.STOPPED
         }
Пример #13
0
    def test_skipping_all_operation_runs_sets_pipeline_run_to_finished(self):
        operation_run = OperationRunFactory()
        assert operation_run.last_status == OperationStatuses.CREATED
        assert operation_run.statuses.count() == 1
        pipeline_run = operation_run.pipeline_run
        assert pipeline_run.last_status == PipelineStatuses.CREATED
        assert pipeline_run.statuses.count() == 1

        # Create another operation run for this pipeline_run
        operation_run2 = OperationRunFactory(pipeline_run=pipeline_run)

        # Stopping the first operation does not stop the pipeline
        operation_run.on_skip()
        pipeline_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.SKIPPED
        assert operation_run.statuses.count() == 2
        assert pipeline_run.last_status == PipelineStatuses.CREATED
        assert pipeline_run.statuses.count() == 1

        # Stopping the second operation stops the pipeline
        operation_run2.on_skip()
        pipeline_run.refresh_from_db()
        assert pipeline_run.last_status == PipelineStatuses.FINISHED
        assert pipeline_run.statuses.count() == 2
Пример #14
0
    def test_running_operation_run_sets_pipeline_run_to_running(self):
        operation_run = OperationRunFactory()
        assert operation_run.last_status == OperationStatuses.CREATED
        assert operation_run.statuses.count() == 1
        pipeline_run = operation_run.pipeline_run
        assert pipeline_run.last_status == PipelineStatuses.CREATED
        assert pipeline_run.statuses.count() == 1

        # Create another operation run for this pipeline_run
        OperationRunFactory(pipeline_run=pipeline_run)

        operation_run.on_scheduled()
        pipeline_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.SCHEDULED
        assert operation_run.statuses.count() == 2
        assert pipeline_run.last_status == PipelineStatuses.SCHEDULED
        assert pipeline_run.statuses.count() == 2

        operation_run.on_run()
        pipeline_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.RUNNING
        assert operation_run.statuses.count() == 3
        assert pipeline_run.last_status == PipelineStatuses.RUNNING
        assert pipeline_run.statuses.count() == 3
Пример #15
0
    def test_schedule_start_with_failed_upstream(self):
        operation_run = OperationRunFactory()
        operation_run.operation.trigger_policy = TriggerPolicy.ALL_SUCCEEDED
        operation_run.operation.save()

        # Add a failed upstream
        upstream_run1 = OperationRunFactory()
        operation_run.upstream_runs.set([upstream_run1])
        with patch('pipelines.tasks.start_operation_run.delay'
                   ) as start_operation_run:
            OperationRunStatus.objects.create(status=OperationStatuses.FAILED,
                                              operation_run=upstream_run1)

        assert start_operation_run.call_count == 1

        assert operation_run.schedule_start() is False

        # Check also that the task is marked as UPSTREAM_FAILED
        # Since this operation cannot be started anymore
        operation_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.UPSTREAM_FAILED
Пример #16
0
    def test_schedule_start_works_when_conditions_are_met(self):
        operation_run = OperationRunFactory()
        operation_run.operation.trigger_policy = TriggerPolicy.ONE_DONE
        operation_run.operation.save()
        pipeline_run = operation_run.pipeline_run

        # Add a failed upstream
        upstream_run1 = OperationRunFactory(pipeline_run=pipeline_run)
        operation_run.upstream_runs.set([upstream_run1])
        with patch('pipelines.tasks.start_operation_run.delay'
                   ) as start_operation_run:
            OperationRunStatus.objects.create(status=OperationStatuses.FAILED,
                                              operation_run=upstream_run1)

        assert start_operation_run.call_count == 1

        with patch(
                'pipelines.models.OperationRun.start') as start_operation_run:
            assert operation_run.schedule_start() is False

        assert start_operation_run.call_count == 1

        operation_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.SCHEDULED
Пример #17
0
    def test_trigger_policy_all_failed(self):
        operation_run = OperationRunFactory()
        operation = operation_run.operation
        operation.trigger_policy = TriggerPolicy.ALL_FAILED
        operation.save()

        # No upstream
        assert operation_run.check_upstream_trigger() is True

        # Add non done upstream
        upstream_run1 = OperationRunFactory()
        operation_run.upstream_runs.set([upstream_run1])
        assert operation_run.check_upstream_trigger() is False

        # A running upstream
        OperationRunStatus.objects.create(status=OperationStatuses.RUNNING,
                                          operation_run=upstream_run1)
        assert operation_run.check_upstream_trigger() is False

        # A failed upstream
        OperationRunStatus.objects.create(status=OperationStatuses.FAILED,
                                          operation_run=upstream_run1)
        assert operation_run.check_upstream_trigger() is True

        # Add skipped upstream
        upstream_run2 = OperationRunFactory()
        operation_run.upstream_runs.set([upstream_run2])
        OperationRunStatus.objects.create(status=OperationStatuses.SKIPPED,
                                          operation_run=upstream_run2)
        assert operation_run.check_upstream_trigger() is False

        # Add succeeded upstream
        upstream_run3 = OperationRunFactory()
        operation_run.upstream_runs.set([upstream_run3])
        OperationRunStatus.objects.create(status=OperationStatuses.SUCCEEDED,
                                          operation_run=upstream_run3)
        assert operation_run.check_upstream_trigger() is False

        # Add many failed upstream
        upstream_run4 = OperationRunFactory()
        operation_run.upstream_runs.set([upstream_run1, upstream_run4])
        OperationRunStatus.objects.create(status=OperationStatuses.FAILED,
                                          operation_run=upstream_run4)
        assert operation_run.check_upstream_trigger() is True

        # Many done upstreams
        operation_run.upstream_runs.set(
            [upstream_run1, upstream_run2, upstream_run3])
        assert operation_run.check_upstream_trigger() is False
Пример #18
0
    def test_trigger_policy_one_done(self):
        operation_run = OperationRunFactory()
        operation = operation_run.operation
        operation.trigger_policy = TriggerPolicy.ONE_DONE
        operation.save()

        # No upstream
        assert operation_run.check_upstream_trigger() is False

        # Add non done upstream
        upstream_run1 = OperationRunFactory()
        operation_run.upstream_runs.set([upstream_run1])
        assert operation_run.check_upstream_trigger() is False

        # A running upstream
        OperationRunStatus.objects.create(status=OperationStatuses.RUNNING,
                                          operation_run=upstream_run1)
        assert operation_run.check_upstream_trigger() is False

        # A failed upstream
        OperationRunStatus.objects.create(status=OperationStatuses.FAILED,
                                          operation_run=upstream_run1)
        assert operation_run.check_upstream_trigger() is True

        # Add skipped upstream
        upstream_run2 = OperationRunFactory()
        operation_run.upstream_runs.set([upstream_run2])
        OperationRunStatus.objects.create(status=OperationStatuses.SKIPPED,
                                          operation_run=upstream_run2)
        assert operation_run.check_upstream_trigger() is True

        # Add succeeded upstream
        upstream_run3 = OperationRunFactory()
        operation_run.upstream_runs.set([upstream_run3])
        OperationRunStatus.objects.create(status=OperationStatuses.SUCCEEDED,
                                          operation_run=upstream_run3)
        assert operation_run.check_upstream_trigger() is True

        # Add another upstream still True
        upstream_run4 = OperationRunFactory()
        operation_run.upstream_runs.add(upstream_run4)
        assert operation_run.check_upstream_trigger() is True
Пример #19
0
    def test_schedule_start_works_with_operation_concurrency(self):
        operation_run = OperationRunFactory()
        operation_run.operation.trigger_policy = TriggerPolicy.ONE_DONE
        operation_run.operation.save()
        pipeline_run = operation_run.pipeline_run
        # Set operation concurrency to 1
        operation_run.operation.concurrency = 1
        operation_run.operation.save()

        # Add a failed upstream
        upstream_run1 = OperationRunFactory(pipeline_run=pipeline_run)
        upstream_run2 = OperationRunFactory(pipeline_run=pipeline_run)
        operation_run.upstream_runs.set([upstream_run1, upstream_run2])
        with patch('pipelines.tasks.start_operation_run.delay'
                   ) as start_operation_run:
            OperationRunStatus.objects.create(status=OperationStatuses.FAILED,
                                              operation_run=upstream_run1)
            OperationRunStatus.objects.create(status=OperationStatuses.RUNNING,
                                              operation_run=upstream_run2)

        assert start_operation_run.call_count == 1

        # Add another operation run for this operation with scheduled
        new_operation_run = OperationRunFactory(
            operation=operation_run.operation)
        new_operation_run.upstream_runs.set([upstream_run1, upstream_run2])

        with patch(
                'pipelines.models.OperationRun.start') as start_operation_run:
            assert operation_run.schedule_start() is False

        assert start_operation_run.call_count == 1

        operation_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.SCHEDULED

        # Check if we can start another instance
        new_operation_run.refresh_from_db()
        assert new_operation_run.last_status == OperationStatuses.CREATED

        with patch(
                'pipelines.models.OperationRun.start') as start_operation_run:
            assert new_operation_run.schedule_start() is True

        assert start_operation_run.call_count == 0

        new_operation_run.refresh_from_db()
        assert new_operation_run.last_status == OperationStatuses.CREATED
Пример #20
0
class TestOperationTask(BaseTest):
    def setUp(self):

        self.operation_run = OperationRunFactory()
        self.pipeline_run = self.operation_run.pipeline_run
        # Manually set status to scheduled
        self.operation_run.on_scheduled()
        return super().setUp()

    def test_task_without_operation_run_raises(self):
        @celery_app.task(base=OperationTask, shared=False)
        def dummy_task():
            return

        with self.assertRaises(TypeError):
            dummy_task.apply_async()

    def test_task_with_operation_run_succeeds(self):
        @celery_app.task(base=OperationTask, shared=False)
        def dummy_task(operation_run_id):
            return

        kwargs = {'operation_run_id': self.operation_run.id}
        dummy_task.apply_async(kwargs=kwargs)
        self.operation_run.refresh_from_db()
        assert self.operation_run.succeeded is True
        assert set(self.operation_run.statuses.values_list(
            'status', flat=True)) == {
                OperationStatuses.CREATED,
                OperationStatuses.SCHEDULED,
                OperationStatuses.RUNNING,
                OperationStatuses.SUCCEEDED,
            }
        self.pipeline_run.refresh_from_db()
        assert self.operation_run.pipeline_run.last_status == PipelineStatuses.FINISHED
        assert set(
            self.operation_run.pipeline_run.statuses.values_list(
                'status', flat=True)) == {
                    PipelineStatuses.CREATED,
                    PipelineStatuses.SCHEDULED,
                    PipelineStatuses.RUNNING,
                    PipelineStatuses.FINISHED,
                }

    def test_task_with_error_fails(self):
        @celery_app.task(base=OperationTask, shared=False)
        def raising_task(operation_run_id):
            raise KeyError

        kwargs = {'operation_run_id': self.operation_run.id}
        raising_task.apply_async(kwargs=kwargs)
        self.operation_run.refresh_from_db()
        assert self.operation_run.failed is True
        assert set(self.operation_run.statuses.values_list(
            'status', flat=True)) == {
                OperationStatuses.CREATED,
                OperationStatuses.SCHEDULED,
                OperationStatuses.RUNNING,
                OperationStatuses.FAILED,
            }
        self.pipeline_run.refresh_from_db()
        assert self.operation_run.pipeline_run.last_status == PipelineStatuses.FINISHED
        assert set(
            self.operation_run.pipeline_run.statuses.values_list(
                'status', flat=True)) == {
                    PipelineStatuses.CREATED,
                    PipelineStatuses.SCHEDULED,
                    PipelineStatuses.RUNNING,
                    PipelineStatuses.FINISHED,
                }

    def test_task_retries_for_specified_exception(self):
        class RetryTask(ClassBasedTask):
            retry_for = (KeyError, )

            @staticmethod
            def _run(task_bind, *args, **kwargs):
                raise KeyError

        @celery_app.task(base=OperationTask, bind=True, shared=False)
        def retry_task(task_bind, operation_run_id):
            assert task_bind.max_retries == 2
            assert task_bind.countdown == 0
            RetryTask.run(task_bind=task_bind,
                          operation_run_id=operation_run_id)

        # Add retries and count to the operation
        self.operation_run.operation.max_retries = 2
        self.operation_run.operation.retry_delay = 0
        self.operation_run.operation.save()

        kwargs = {'operation_run_id': self.operation_run.id}
        retry_task.apply_async(kwargs=kwargs)
        self.operation_run.refresh_from_db()
        assert self.operation_run.last_status == OperationStatuses.RETRYING
        assert set(self.operation_run.statuses.values_list(
            'status', flat=True)) == {
                OperationStatuses.CREATED,
                OperationStatuses.SCHEDULED,
                OperationStatuses.RUNNING,
                OperationStatuses.RETRYING,
            }
        self.pipeline_run.refresh_from_db()
        assert self.operation_run.pipeline_run.last_status == PipelineStatuses.RUNNING
        assert set(
            self.operation_run.pipeline_run.statuses.values_list(
                'status', flat=True)) == {
                    PipelineStatuses.CREATED,
                    PipelineStatuses.SCHEDULED,
                    PipelineStatuses.RUNNING,
                }