Exemplo n.º 1
0
def pipelines_start_operation(operation_run_id: int) -> None:
    operation_run = get_valid_operation_run(operation_run_id=operation_run_id)
    if not operation_run:
        _logger.info('Operation `%s` does not exist any more.',
                     operation_run_id)

    start_operation_run(operation_run)
Exemplo n.º 2
0
    def test_skipping_all_operation_runs_sets_pipeline_run_to_finished(self):
        operation_run = OperationRunFactory()
        start_operation_run(operation_run)
        operation_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.CREATED
        pipeline_run = operation_run.pipeline_run
        assert pipeline_run.last_status == PipelineLifeCycle.CREATED
        assert pipeline_run.statuses.count() == 1

        # Create another operation run for this pipeline_run
        operation_run2 = OperationRunFactory(pipeline_run=pipeline_run)
        start_operation_run(operation_run2)
        operation_run2.refresh_from_db()

        # Stopping the first operation does not stop the pipeline
        with patch('scheduler.tasks.jobs.jobs_stop.apply_async'
                   ) as spawner_mock_stop:
            skip_operation_run(operation_run)
        assert spawner_mock_stop.call_count == 1
        operation_run.refresh_from_db()
        pipeline_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.SKIPPED
        assert pipeline_run.last_status == PipelineLifeCycle.CREATED
        assert pipeline_run.statuses.count() == 1

        # Stopping the second operation stops the pipeline
        with patch('scheduler.tasks.jobs.jobs_stop.apply_async'
                   ) as spawner_mock_stop:
            skip_operation_run(operation_run2)
        assert spawner_mock_stop.call_count == 1
        pipeline_run.refresh_from_db()
        assert pipeline_run.last_status == PipelineLifeCycle.DONE
        assert pipeline_run.statuses.count() == 2
Exemplo n.º 3
0
    def test_running_operation_run_sets_pipeline_run_to_running(self):
        operation_run = OperationRunFactory()
        start_operation_run(operation_run)
        operation_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.CREATED
        pipeline_run = operation_run.pipeline_run
        assert pipeline_run.last_status == PipelineLifeCycle.CREATED
        assert pipeline_run.statuses.count() == 1

        # Create another operation run for this pipeline_run
        operation_run2 = OperationRunFactory(pipeline_run=pipeline_run)
        start_operation_run(operation_run2)
        operation_run2.refresh_from_db()

        operation_run.set_status(OperationStatuses.SCHEDULED)
        pipeline_run.refresh_from_db()
        operation_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.SCHEDULED
        assert pipeline_run.last_status == PipelineLifeCycle.SCHEDULED
        assert pipeline_run.statuses.count() == 2

        operation_run.set_status(OperationStatuses.RUNNING)
        pipeline_run.refresh_from_db()
        operation_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.RUNNING
        assert pipeline_run.last_status == PipelineLifeCycle.RUNNING
        assert pipeline_run.statuses.count() == 3

        operation_run2.set_status(OperationStatuses.SCHEDULED)
        assert pipeline_run.last_status == PipelineLifeCycle.RUNNING
        assert pipeline_run.statuses.count() == 3
Exemplo n.º 4
0
 def test_operation_run_creation_sets_created_status(self):
     assert JobStatus.objects.count() == 0
     # Assert `new_pipeline_run_status` task is also called
     operation_run = OperationRunFactory()
     start_operation_run(operation_run)
     operation_run.refresh_from_db()
     assert JobStatus.objects.count() == 1
     assert operation_run.last_status == OperationStatuses.CREATED
     assert operation_run.entity.last_status == OperationStatuses.CREATED
Exemplo n.º 5
0
    def test_schedule_start_with_operation_run_already_scheduled_operation_run(
            self):
        operation_run = OperationRunFactory()
        operation_run.status = OperationStatuses.FAILED
        operation_run.save()
        assert start_operation_run(operation_run) is False

        operation_run = OperationRunFactory()
        operation_run.status = OperationStatuses.SCHEDULED
        operation_run.save()
        assert start_operation_run(operation_run) is False
Exemplo n.º 6
0
    def test_scheduling_operation_run_sets_pipeline_run_to_scheduled(self):
        operation_run = OperationRunFactory()
        start_operation_run(operation_run)
        operation_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.CREATED
        pipeline_run = operation_run.pipeline_run
        assert pipeline_run.last_status == PipelineLifeCycle.CREATED
        assert pipeline_run.statuses.count() == 1

        operation_run.set_status(OperationStatuses.SCHEDULED)
        operation_run.refresh_from_db()
        pipeline_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.SCHEDULED
        assert pipeline_run.last_status == PipelineLifeCycle.SCHEDULED
        assert pipeline_run.statuses.count() == 2
Exemplo n.º 7
0
def pipelines_start(self: 'celery_app.task', pipeline_run_id: int) -> None:
    pipeline_run = get_valid_pipeline_run(pipeline_run_id=pipeline_run_id)
    if not pipeline_run:
        _logger.info('Pipeline `%s` does not exist any more.', pipeline_run_id)

    pipeline_run.on_schedule()
    dag, op_runs = pipeline_run.dag
    sorted_ops = dags.sort_topologically(dag=dag)
    op_runs_to_start = [
        op_runs[op_run_id] for op_run_id in sorted_ops
        if op_runs[op_run_id].last_status == OperationStatuses.CREATED
    ]
    concurrency = pipeline_run.pipeline.n_operation_runs_to_start
    future_check = False
    while op_runs_to_start and concurrency > 0:
        op_run = op_runs_to_start.pop()
        if start_operation_run(op_run):
            # If we end up here it means that the task
            future_check = True
        else:
            concurrency -= 1

    if op_runs_to_start or future_check:
        # Schedule another task
        self.retry(countdown=Intervals.PIPELINES_SCHEDULER)
Exemplo n.º 8
0
    def test_failed_upstream_operation_runs_sets_pipeline_run_to_finished(
            self):
        operation_run = OperationRunFactory()
        start_operation_run(operation_run)
        operation_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.CREATED
        pipeline_run = operation_run.pipeline_run
        assert pipeline_run.last_status == PipelineLifeCycle.CREATED
        assert pipeline_run.statuses.count() == 1

        # Stopping the first operation does not stop the pipeline
        operation_run.on_upstream_failed()
        operation_run.refresh_from_db()
        pipeline_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.UPSTREAM_FAILED
        assert pipeline_run.last_status == PipelineLifeCycle.DONE
        assert pipeline_run.statuses.count() == 2
Exemplo n.º 9
0
    def test_failed_operation_runs_sets_pipeline_run_to_finished(self):
        operation_run = OperationRunFactory()
        start_operation_run(operation_run)
        operation_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.CREATED
        pipeline_run = operation_run.pipeline_run
        assert pipeline_run.last_status == PipelineLifeCycle.CREATED
        assert pipeline_run.statuses.count() == 1

        # Stopping the first operation does not stop the pipeline
        operation_run.set_status(OperationStatuses.SCHEDULED)
        operation_run.set_status(OperationStatuses.RUNNING)
        operation_run.on_failure()
        pipeline_run.refresh_from_db()
        operation_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.FAILED
        assert pipeline_run.last_status == PipelineLifeCycle.DONE
        assert pipeline_run.statuses.count() == 4
Exemplo n.º 10
0
    def test_trigger_policy_one_done(self):
        operation_run = OperationRunFactory()
        start_operation_run(operation_run)
        operation_run.refresh_from_db()
        operation = operation_run.operation
        operation.trigger_policy = TriggerPolicy.ONE_DONE
        operation.save()

        # No upstream
        assert operation_run.check_upstream_trigger() is False

        # Add non done upstream
        upstream_run1 = OperationRunFactory()
        operation_run.upstream_runs.set([upstream_run1])
        assert operation_run.check_upstream_trigger() is False

        # A running upstream
        upstream_run1.status = OperationStatuses.RUNNING
        upstream_run1.save()
        assert operation_run.check_upstream_trigger() is False

        # A failed upstream
        upstream_run1.status = OperationStatuses.FAILED
        upstream_run1.save()
        assert operation_run.check_upstream_trigger() is True

        # Add skipped upstream
        upstream_run2 = OperationRunFactory()
        operation_run.upstream_runs.set([upstream_run2])
        upstream_run2.status = OperationStatuses.SKIPPED
        upstream_run2.save()
        assert operation_run.check_upstream_trigger() is True

        # Add succeeded upstream
        upstream_run3 = OperationRunFactory()
        operation_run.upstream_runs.set([upstream_run3])
        upstream_run3.status = OperationStatuses.SUCCEEDED
        upstream_run3.save()
        assert operation_run.check_upstream_trigger() is True

        # Add another upstream still True
        upstream_run4 = OperationRunFactory()
        operation_run.upstream_runs.add(upstream_run4)
        assert operation_run.check_upstream_trigger() is True
Exemplo n.º 11
0
    def test_schedule_start_works_with_pipeline_concurrency(self):
        operation_run = OperationRunFactory()
        operation_run.operation.trigger_policy = TriggerPolicy.ONE_DONE
        operation_run.operation.save()
        pipeline_run = operation_run.pipeline_run
        # Set pipeline concurrency to 1
        pipeline_run.pipeline.concurrency = 1
        pipeline_run.pipeline.save()

        # Add a failed upstream
        upstream_run1 = OperationRunFactory(pipeline_run=pipeline_run)
        upstream_run2 = OperationRunFactory(pipeline_run=pipeline_run)
        assert start_operation_run(upstream_run1) is False
        assert start_operation_run(upstream_run2) is True
        upstream_run1.refresh_from_db()
        upstream_run2.refresh_from_db()
        operation_run.upstream_runs.set([upstream_run1, upstream_run2])
        with patch('pipelines.tasks.pipelines_start_operation.apply_async'
                   ) as mock_fct:
            upstream_run1.set_status(OperationStatuses.FAILED)

        assert mock_fct.call_count == 1
        operation_run.refresh_from_db()
        assert operation_run.last_status is None
        upstream_run2.refresh_from_db()
        assert upstream_run2.last_status is None  # Should be started but e mocked the process

        with patch('pipelines.tasks.pipelines_start_operation.apply_async'
                   ) as mock_fct:
            assert start_operation_run(upstream_run2) is False
            upstream_run2.refresh_from_db()
            upstream_run2.set_status(OperationStatuses.RUNNING)

        assert mock_fct.call_count == 0

        assert start_operation_run(operation_run) is True

        assert operation_run.last_status is None

        upstream_run2.set_status(OperationStatuses.SUCCEEDED)

        operation_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.CREATED
Exemplo n.º 12
0
    def test_schedule_start_works_with_operation_concurrency(self):
        operation_run = OperationRunFactory()
        operation_run.operation.trigger_policy = TriggerPolicy.ONE_DONE
        operation_run.operation.save()
        pipeline_run = operation_run.pipeline_run
        # Set operation concurrency to 1
        operation_run.operation.concurrency = 1
        operation_run.operation.save()

        # Add a failed upstream
        upstream_run1 = OperationRunFactory(pipeline_run=pipeline_run)
        upstream_run2 = OperationRunFactory(pipeline_run=pipeline_run)
        assert start_operation_run(upstream_run1) is False
        assert start_operation_run(upstream_run2) is False
        upstream_run1.refresh_from_db()
        upstream_run2.refresh_from_db()
        operation_run.upstream_runs.set([upstream_run1, upstream_run2])
        with patch('pipelines.tasks.pipelines_start_operation.apply_async'
                   ) as mock_fct:
            upstream_run1.set_status(OperationStatuses.FAILED)
            upstream_run2.set_status(OperationStatuses.RUNNING)

        assert mock_fct.call_count == 1
        operation_run.refresh_from_db()
        assert operation_run.last_status is None
        assert start_operation_run(operation_run) is False
        operation_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.CREATED

        # Add another operation run for this operation with scheduled
        new_operation_run = OperationRunFactory(
            operation=operation_run.operation)
        new_operation_run.upstream_runs.set([upstream_run1, upstream_run2])
        assert new_operation_run.status is None

        # Check if we can start another instance
        new_operation_run.refresh_from_db()
        assert start_operation_run(new_operation_run) is True
        new_operation_run.refresh_from_db()
        assert new_operation_run.last_status is None
Exemplo n.º 13
0
    def test_schedule_start_works_when_conditions_are_met_manual(self):
        operation_run = OperationRunFactory()
        operation_run.operation.trigger_policy = TriggerPolicy.ONE_DONE
        operation_run.operation.save()
        pipeline_run = operation_run.pipeline_run

        # Add a failed upstream
        upstream_run1 = OperationRunFactory(pipeline_run=pipeline_run)
        assert start_operation_run(upstream_run1) is False
        upstream_run1.refresh_from_db()
        operation_run.upstream_runs.set([upstream_run1])
        with patch('pipelines.tasks.pipelines_start_operation.apply_async'
                   ) as mock_fct:
            upstream_run1.set_status(OperationStatuses.FAILED)

        assert mock_fct.call_count == 1
        operation_run.refresh_from_db()
        assert operation_run.last_status is None

        assert start_operation_run(operation_run) is False
        operation_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.CREATED
Exemplo n.º 14
0
    def test_schedule_start_with_failed_upstream(self):
        operation_run = OperationRunFactory()
        operation_run.operation.trigger_policy = TriggerPolicy.ALL_SUCCEEDED
        operation_run.operation.save()

        # Add a failed upstream
        upstream_run1 = OperationRunFactory()
        assert start_operation_run(upstream_run1) is False
        upstream_run1.refresh_from_db()
        operation_run.upstream_runs.set([upstream_run1])
        with patch('pipelines.tasks.pipelines_start_operation.apply_async'
                   ) as mock_fct:
            upstream_run1.set_status(OperationStatuses.FAILED)

        assert mock_fct.call_count == 1

        assert start_operation_run(operation_run) is False

        # Check also that the task is marked as UPSTREAM_FAILED
        # Since this operation cannot be started anymore
        operation_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.UPSTREAM_FAILED
Exemplo n.º 15
0
    def test_schedule_start_works_when_conditions_are_met_auto(self):
        operation_run = OperationRunFactory()
        operation_run.operation.trigger_policy = TriggerPolicy.ONE_DONE
        operation_run.operation.save()
        pipeline_run = operation_run.pipeline_run

        # Add a failed upstream
        upstream_run1 = OperationRunFactory(pipeline_run=pipeline_run)
        assert start_operation_run(upstream_run1) is False
        upstream_run1.refresh_from_db()
        operation_run.upstream_runs.set([upstream_run1])
        upstream_run1.set_status(OperationStatuses.FAILED)

        operation_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.CREATED
Exemplo n.º 16
0
 def test_skipping_pipeline_run_stops_operation_runs(self):
     pipeline_run = PipelineRunFactory()
     for _ in range(2):
         op_run = OperationRunFactory(pipeline_run=pipeline_run)
         assert start_operation_run(op_run) is False
     assert pipeline_run.statuses.count() == 1
     assert pipeline_run.last_status == PipelineLifeCycle.CREATED
     assert JobStatus.objects.filter().count() == 2
     assert set(JobStatus.objects.values_list('status', flat=True)) == {
         OperationStatuses.CREATED,
     }
     # Set pipeline run to skipped
     with patch('scheduler.tasks.jobs.jobs_stop.apply_async'
                ) as spawner_mock_stop:
         pipeline_run.on_skip()
     assert pipeline_run.statuses.count() == 2
     assert pipeline_run.last_status == PipelineLifeCycle.SKIPPED
     # Operation run are also skipped
     assert JobStatus.objects.filter().count(
     ) + spawner_mock_stop.call_count == 6
     assert set(JobStatus.objects.values_list('status', flat=True)) == {
         OperationStatuses.CREATED, OperationStatuses.SKIPPED
     }