Пример #1
0
def test_create_execution_plan_snapshot():
    @solid
    def noop_solid(_):
        pass

    @pipeline
    def noop_pipeline():
        noop_solid()

    instance = DagsterInstance.local_temp()

    execution_plan = create_execution_plan(noop_pipeline)

    ep_snapshot = snapshot_from_execution_plan(
        execution_plan, noop_pipeline.get_pipeline_snapshot_id())
    ep_snapshot_id = create_execution_plan_snapshot_id(ep_snapshot)

    result = execute_pipeline(noop_pipeline, instance=instance)
    assert result.success

    run = instance.get_run_by_id(result.run_id)

    assert run.execution_plan_snapshot_id == ep_snapshot_id
    assert run.execution_plan_snapshot_id == create_execution_plan_snapshot_id(
        ep_snapshot)
Пример #2
0
 def add_execution_plan_snapshot(self, execution_plan_snapshot):
     check.inst_param(execution_plan_snapshot, 'execution_plan_snapshot',
                      ExecutionPlanSnapshot)
     execution_plan_snapshot_id = create_execution_plan_snapshot_id(
         execution_plan_snapshot)
     self._ep_snapshots[
         execution_plan_snapshot_id] = execution_plan_snapshot
     return execution_plan_snapshot_id
Пример #3
0
 def add_execution_plan_snapshot(self, execution_plan_snapshot):
     check.inst_param(execution_plan_snapshot, 'execution_plan_snapshot',
                      ExecutionPlanSnapshot)
     execution_plan_snapshot_id = create_execution_plan_snapshot_id(
         execution_plan_snapshot)
     return self._add_snapshot(
         snapshot_id=execution_plan_snapshot_id,
         snapshot_obj=execution_plan_snapshot,
         snapshot_type=SnapshotType.EXECUTION_PLAN,
     )
Пример #4
0
    def _ensure_persisted_execution_plan_snapshot(self,
                                                  execution_plan_snapshot,
                                                  pipeline_snapshot_id,
                                                  step_keys_to_execute):
        from dagster.core.snap.execution_plan_snapshot import (
            ExecutionPlanSnapshot,
            create_execution_plan_snapshot_id,
        )

        check.inst_param(execution_plan_snapshot, 'execution_plan_snapshot',
                         ExecutionPlanSnapshot)
        check.str_param(pipeline_snapshot_id, 'pipeline_snapshot_id')
        check.opt_list_param(step_keys_to_execute,
                             'step_keys_to_execute',
                             of_type=str)

        check.invariant(
            execution_plan_snapshot.pipeline_snapshot_id ==
            pipeline_snapshot_id,
            ('Snapshot mismatch: Snapshot ID in execution plan snapshot is '
             '"{ep_pipeline_snapshot_id}" and snapshot_id created in memory is '
             '"{pipeline_snapshot_id}"').format(
                 ep_pipeline_snapshot_id=execution_plan_snapshot.
                 pipeline_snapshot_id,
                 pipeline_snapshot_id=pipeline_snapshot_id,
             ),
        )

        check.invariant(
            set(step_keys_to_execute) == set(
                execution_plan_snapshot.step_keys_to_execute)
            if step_keys_to_execute else set(
                execution_plan_snapshot.step_keys_to_execute) == set(
                    [step.key for step in execution_plan_snapshot.steps]),
            'We encode step_keys_to_execute twice in our stack, unfortunately. This check '
            'ensures that they are consistent. We check that step_keys_to_execute in the plan '
            'matches the step_keys_to_execute params if it is set. If it is not, this indicates '
            'a full execution plan, and so we verify that.',
        )

        execution_plan_snapshot_id = create_execution_plan_snapshot_id(
            execution_plan_snapshot)

        if not self._run_storage.has_execution_plan_snapshot(
                execution_plan_snapshot_id):
            returned_execution_plan_snapshot_id = self._run_storage.add_execution_plan_snapshot(
                execution_plan_snapshot)

            check.invariant(execution_plan_snapshot_id ==
                            returned_execution_plan_snapshot_id)

        return execution_plan_snapshot_id
Пример #5
0
    def get_or_create_run(
        self,
        pipeline_name=None,
        run_id=None,
        environment_dict=None,
        mode=None,
        selector=None,
        step_keys_to_execute=None,
        status=None,
        tags=None,
        root_run_id=None,
        parent_run_id=None,
        pipeline_snapshot=None,
        execution_plan_snapshot=None,
    ):

        if tags and IS_AIRFLOW_INGEST_PIPELINE_STR in tags:
            if AIRFLOW_EXECUTION_DATE_STR not in tags:
                tags[AIRFLOW_EXECUTION_DATE_STR] = get_current_datetime_in_utc(
                ).isoformat()

        pipeline_run = PipelineRun(
            pipeline_name=pipeline_name,
            run_id=run_id,
            environment_dict=environment_dict,
            mode=mode,
            selector=selector,
            step_keys_to_execute=step_keys_to_execute,
            status=status,
            tags=tags,
            root_run_id=root_run_id,
            parent_run_id=parent_run_id,
        )

        if pipeline_snapshot is not None:
            from dagster.core.snap.pipeline_snapshot import create_pipeline_snapshot_id

            pipeline_snapshot_id = create_pipeline_snapshot_id(
                pipeline_snapshot)

            if not self._run_storage.has_pipeline_snapshot(
                    pipeline_snapshot_id):
                returned_pipeline_snapshot_id = self._run_storage.add_pipeline_snapshot(
                    pipeline_snapshot)

                check.invariant(
                    pipeline_snapshot_id == returned_pipeline_snapshot_id)

            pipeline_run = pipeline_run.with_pipeline_snapshot_id(
                pipeline_snapshot_id)

        if execution_plan_snapshot is not None:
            from dagster.core.snap.execution_plan_snapshot import create_execution_plan_snapshot_id

            check.invariant(execution_plan_snapshot.pipeline_snapshot_id ==
                            pipeline_snapshot_id)

            execution_plan_snapshot_id = create_execution_plan_snapshot_id(
                execution_plan_snapshot)

            if not self._run_storage.has_execution_plan_snapshot(
                    execution_plan_snapshot_id):
                returned_execution_plan_snapshot_id = self._run_storage.add_execution_plan_snapshot(
                    execution_plan_snapshot)

                check.invariant(execution_plan_snapshot_id ==
                                returned_execution_plan_snapshot_id)

            pipeline_run = pipeline_run.with_execution_plan_snapshot_id(
                execution_plan_snapshot_id)

        if self.has_run(pipeline_run.run_id):
            candidate_run = self.get_run_by_id(pipeline_run.run_id)

            field_diff = _check_run_equality(pipeline_run, candidate_run)

            if field_diff:
                raise DagsterRunConflict(
                    'Found conflicting existing run with same id {run_id}. Runs differ in:'
                    '\n{field_diff}'.format(
                        run_id=pipeline_run.run_id,
                        field_diff=_format_field_diff(field_diff),
                    ), )
            return candidate_run

        return self._run_storage.add_run(pipeline_run)