示例#1
0
    def _ensure_persisted_pipeline_snapshot(self, pipeline_snapshot, parent_pipeline_snapshot):
        from dagster.core.snap import create_pipeline_snapshot_id, PipelineSnapshot

        check.inst_param(pipeline_snapshot, "pipeline_snapshot", PipelineSnapshot)
        check.opt_inst_param(parent_pipeline_snapshot, "parent_pipeline_snapshot", PipelineSnapshot)

        if pipeline_snapshot.lineage_snapshot:
            if not self._run_storage.has_pipeline_snapshot(
                pipeline_snapshot.lineage_snapshot.parent_snapshot_id
            ):
                check.invariant(
                    create_pipeline_snapshot_id(parent_pipeline_snapshot)
                    == pipeline_snapshot.lineage_snapshot.parent_snapshot_id,
                    "Parent pipeline snapshot id out of sync with passed parent pipeline snapshot",
                )

                returned_pipeline_snapshot_id = self._run_storage.add_pipeline_snapshot(
                    parent_pipeline_snapshot
                )
                check.invariant(
                    pipeline_snapshot.lineage_snapshot.parent_snapshot_id
                    == returned_pipeline_snapshot_id
                )

        pipeline_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot)
        if not self._run_storage.has_pipeline_snapshot(pipeline_snapshot_id):
            returned_pipeline_snapshot_id = self._run_storage.add_pipeline_snapshot(
                pipeline_snapshot
            )
            check.invariant(pipeline_snapshot_id == returned_pipeline_snapshot_id)

        return pipeline_snapshot_id
示例#2
0
    def __init__(self, pipeline_snapshot, parent_pipeline_snapshot):
        self.pipeline_snapshot = check.inst_param(pipeline_snapshot,
                                                  'pipeline_snapshot',
                                                  PipelineSnapshot)
        self.parent_pipeline_snapshot = check.opt_inst_param(
            parent_pipeline_snapshot, 'parent_pipeline_snapshot',
            PipelineSnapshot)

        if self.pipeline_snapshot.lineage_snapshot:
            check.invariant(
                self.parent_pipeline_snapshot is not None,
                'Can not create PipelineIndex for pipeline_snapshot with lineage without parent_pipeline_snapshot',
            )
            parent_id = create_pipeline_snapshot_id(
                self.parent_pipeline_snapshot)
            check.invariant(
                pipeline_snapshot.lineage_snapshot.parent_snapshot_id ==
                parent_id,
                'Mismatch in IDs between pipeline_snapshot lineage and parent_pipeline_snapshot',
            )

        self._solid_defs_snaps_index = {
            sd.name: sd
            for sd in
            pipeline_snapshot.solid_definitions_snapshot.solid_def_snaps +
            pipeline_snapshot.solid_definitions_snapshot.
            composite_solid_def_snaps
        }

        self._dagster_type_snaps_by_name_index = {
            dagster_type_snap.name: dagster_type_snap
            for dagster_type_snap in
            pipeline_snapshot.dagster_type_namespace_snapshot.
            all_dagster_type_snaps_by_key.values() if dagster_type_snap.name
        }

        self.dep_structure_index = DependencyStructureIndex(
            pipeline_snapshot.dep_structure_snapshot)

        self._comp_dep_structures = {
            comp_snap.name:
            DependencyStructureIndex(comp_snap.dep_structure_snapshot)
            for comp_snap in pipeline_snapshot.solid_definitions_snapshot.
            composite_solid_def_snaps
        }

        self.pipeline_snapshot_id = create_pipeline_snapshot_id(
            pipeline_snapshot)
示例#3
0
def test_run_created_in_0_7_9_snapshot_id_change():
    test_dir = file_relative_path(
        __file__, 'snapshot_0_7_9_shapshot_id_creation_change/sqlite')
    with restore_directory(test_dir):

        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))
        # run_id = 'e297fa70-49e8-43f8-abfe-1634f02644f6'

        old_pipeline_snapshot_id = '88528edde2ed64da3c39cca0da8ba2f7586c1a5d'
        old_execution_plan_snapshot_id = '2246f8e5a10d21e15fbfa3773d7b2d0bc1fa9d3d'

        pipeline_snapshot = instance.get_pipeline_snapshot(
            old_pipeline_snapshot_id)
        ep_snapshot = instance.get_execution_plan_snapshot(
            old_execution_plan_snapshot_id)

        # It is the pipeline snapshot that changed
        # Verify that snapshot ids are not equal. This changed in 0.7.10
        assert create_pipeline_snapshot_id(
            pipeline_snapshot) != old_pipeline_snapshot_id

        # We also changed execution plan schema in 0.7.11.post1
        assert create_execution_plan_snapshot_id(
            ep_snapshot) != old_execution_plan_snapshot_id

        # This previously failed with a check error
        assert ExecutionPlanIndex(ep_snapshot,
                                  PipelineIndex(pipeline_snapshot))
示例#4
0
    def test_single_write_read_with_snapshot(self, storage):
        run_with_snapshot_id = "lkasjdflkjasdf"
        pipeline_def = PipelineDefinition(name="some_pipeline", solid_defs=[])

        pipeline_snapshot = pipeline_def.get_pipeline_snapshot()

        pipeline_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot)

        run_with_snapshot = PipelineRun(
            run_id=run_with_snapshot_id,
            pipeline_name=pipeline_def.name,
            pipeline_snapshot_id=pipeline_snapshot_id,
        )

        assert not storage.has_pipeline_snapshot(pipeline_snapshot_id)

        assert storage.add_pipeline_snapshot(
            pipeline_snapshot) == pipeline_snapshot_id

        assert serialize_pp(storage.get_pipeline_snapshot(
            pipeline_snapshot_id)) == serialize_pp(pipeline_snapshot)

        storage.add_run(run_with_snapshot)

        assert storage.get_run_by_id(run_with_snapshot_id) == run_with_snapshot

        storage.wipe()

        assert not storage.has_pipeline_snapshot(pipeline_snapshot_id)
        assert not storage.has_run(run_with_snapshot_id)
示例#5
0
    def __init__(self, pipeline_snapshot):
        self.pipeline_snapshot = check.inst_param(pipeline_snapshot,
                                                  'pipeline_snapshot',
                                                  PipelineSnapshot)

        self._solid_defs_snaps_index = {
            sd.name: sd
            for sd in
            pipeline_snapshot.solid_definitions_snapshot.solid_def_snaps +
            pipeline_snapshot.solid_definitions_snapshot.
            composite_solid_def_snaps
        }

        self._dagster_type_snaps_by_name_index = {
            dagster_type_snap.name: dagster_type_snap
            for dagster_type_snap in
            pipeline_snapshot.dagster_type_namespace_snapshot.
            all_dagster_type_snaps_by_key.values() if dagster_type_snap.name
        }

        self.dep_structure_index = DependencyStructureIndex(
            pipeline_snapshot.dep_structure_snapshot)

        self._comp_dep_structures = {
            comp_snap.name:
            DependencyStructureIndex(comp_snap.dep_structure_snapshot)
            for comp_snap in pipeline_snapshot.solid_definitions_snapshot.
            composite_solid_def_snaps
        }

        self.pipeline_snapshot_id = create_pipeline_snapshot_id(
            pipeline_snapshot)
示例#6
0
 def add_pipeline_snapshot(self, pipeline_snapshot):
     check.inst_param(pipeline_snapshot, 'pipeline_snapshot', PipelineSnapshot)
     return self._add_snapshot(
         snapshot_id=create_pipeline_snapshot_id(pipeline_snapshot),
         snapshot_obj=pipeline_snapshot,
         snapshot_type=SnapshotType.PIPELINE,
     )
示例#7
0
def test_run_created_in_0_7_9_snapshot_id_change():
    test_dir = file_relative_path(
        __file__, 'snapshot_0_7_9_shapshot_id_creation_change/sqlite')
    with restore_directory(test_dir):

        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))
        # run_id = 'e297fa70-49e8-43f8-abfe-1634f02644f6'

        old_pipeline_snapshot_id = '88528edde2ed64da3c39cca0da8ba2f7586c1a5d'
        old_execution_plan_snapshot_id = '2246f8e5a10d21e15fbfa3773d7b2d0bc1fa9d3d'
        with pytest.warns(
                UserWarning,
                match=re.escape(
                    '"input_hydration_schema_key" is deprecated and will be removed in 0.10.0, use '
                    '"loader_schema_key" instead.'),
        ):
            historical_pipeline = instance.get_historical_pipeline(
                old_pipeline_snapshot_id)
        pipeline_snapshot = historical_pipeline.pipeline_snapshot
        ep_snapshot = instance.get_execution_plan_snapshot(
            old_execution_plan_snapshot_id)

        # It is the pipeline snapshot that changed
        # Verify that snapshot ids are not equal. This changed in 0.7.10
        assert create_pipeline_snapshot_id(
            pipeline_snapshot) != old_pipeline_snapshot_id

        # We also changed execution plan schema in 0.7.11.post1
        assert create_execution_plan_snapshot_id(
            ep_snapshot) != old_execution_plan_snapshot_id

        # This previously failed with a check error
        assert ExternalExecutionPlan(ep_snapshot, historical_pipeline)
示例#8
0
    def test_debug_snapshot_import(self, storage):
        from dagster.core.execution.api import create_execution_plan
        from dagster.core.snap import (
            snapshot_from_execution_plan,
            create_execution_plan_snapshot_id,
        )

        run_id = make_new_run_id()
        run_to_add = TestRunStorage.build_run(pipeline_name="pipeline_name",
                                              run_id=run_id)
        storage.add_run(run_to_add)

        pipeline_def = PipelineDefinition(name="some_pipeline", solid_defs=[])

        pipeline_snapshot = pipeline_def.get_pipeline_snapshot()
        pipeline_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot)
        new_pipeline_snapshot_id = f"{pipeline_snapshot_id}-new-snapshot"

        storage.add_snapshot(pipeline_snapshot,
                             snapshot_id=new_pipeline_snapshot_id)
        assert not storage.has_snapshot(pipeline_snapshot_id)
        assert storage.has_snapshot(new_pipeline_snapshot_id)

        execution_plan = create_execution_plan(pipeline_def)
        ep_snapshot = snapshot_from_execution_plan(execution_plan,
                                                   new_pipeline_snapshot_id)
        ep_snapshot_id = create_execution_plan_snapshot_id(ep_snapshot)
        new_ep_snapshot_id = f"{ep_snapshot_id}-new-snapshot"

        storage.add_snapshot(ep_snapshot, snapshot_id=new_ep_snapshot_id)
        assert not storage.has_snapshot(ep_snapshot_id)
        assert storage.has_snapshot(new_ep_snapshot_id)
示例#9
0
def test_run_created_in_0_7_9_snapshot_id_change():
    src_dir = file_relative_path(
        __file__, "snapshot_0_7_9_shapshot_id_creation_change/sqlite")
    with copy_directory(src_dir) as test_dir:

        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))
        # run_id = 'e297fa70-49e8-43f8-abfe-1634f02644f6'

        old_pipeline_snapshot_id = "88528edde2ed64da3c39cca0da8ba2f7586c1a5d"
        old_execution_plan_snapshot_id = "2246f8e5a10d21e15fbfa3773d7b2d0bc1fa9d3d"

        historical_pipeline = instance.get_historical_pipeline(
            old_pipeline_snapshot_id)
        pipeline_snapshot = historical_pipeline.pipeline_snapshot
        ep_snapshot = instance.get_execution_plan_snapshot(
            old_execution_plan_snapshot_id)

        # It is the pipeline snapshot that changed
        # Verify that snapshot ids are not equal. This changed in 0.7.10
        created_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot)
        assert created_snapshot_id != old_pipeline_snapshot_id

        # verify that both are accessible off of the historical pipeline
        assert historical_pipeline.computed_pipeline_snapshot_id == created_snapshot_id
        assert historical_pipeline.identifying_pipeline_snapshot_id == old_pipeline_snapshot_id

        # We also changed execution plan schema in 0.7.11.post1
        assert create_execution_plan_snapshot_id(
            ep_snapshot) != old_execution_plan_snapshot_id

        # This previously failed with a check error
        assert ExternalExecutionPlan(ep_snapshot)
示例#10
0
 def add_pipeline_snapshot(self,
                           pipeline_snapshot: PipelineSnapshot,
                           snapshot_id: Optional[str] = None) -> str:
     check.inst_param(pipeline_snapshot, "pipeline_snapshot",
                      PipelineSnapshot)
     check.opt_str_param(snapshot_id, "snapshot_id")
     if not snapshot_id:
         snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot)
     self._pipeline_snapshots[snapshot_id] = pipeline_snapshot
     return snapshot_id
示例#11
0
    def test_fetch_by_snapshot_id(self, storage):
        assert storage
        pipeline_def_a = PipelineDefinition(name="some_pipeline",
                                            solid_defs=[])
        pipeline_def_b = PipelineDefinition(name="some_other_pipeline",
                                            solid_defs=[])
        pipeline_snapshot_a = pipeline_def_a.get_pipeline_snapshot()
        pipeline_snapshot_b = pipeline_def_b.get_pipeline_snapshot()
        pipeline_snapshot_a_id = create_pipeline_snapshot_id(
            pipeline_snapshot_a)
        pipeline_snapshot_b_id = create_pipeline_snapshot_id(
            pipeline_snapshot_b)

        assert storage.add_pipeline_snapshot(
            pipeline_snapshot_a) == pipeline_snapshot_a_id
        assert storage.add_pipeline_snapshot(
            pipeline_snapshot_b) == pipeline_snapshot_b_id

        one = make_new_run_id()
        two = make_new_run_id()
        storage.add_run(
            TestRunStorage.build_run(
                run_id=one,
                pipeline_name="some_pipeline",
                pipeline_snapshot_id=pipeline_snapshot_a_id,
            ))
        storage.add_run(
            TestRunStorage.build_run(
                run_id=two,
                pipeline_name="some_other_pipeline",
                pipeline_snapshot_id=pipeline_snapshot_b_id,
            ))
        assert len(storage.get_runs()) == 2
        runs_a = storage.get_runs(
            PipelineRunsFilter(snapshot_id=pipeline_snapshot_a_id))
        assert len(runs_a) == 1
        assert runs_a[0].run_id == one

        runs_b = storage.get_runs(
            PipelineRunsFilter(snapshot_id=pipeline_snapshot_b_id))
        assert len(runs_b) == 1
        assert runs_b[0].run_id == two
def test_empty_pipeline_snap_props(snapshot):

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(get_noop_pipeline())

    assert pipeline_snapshot.name == 'noop_pipeline'
    assert pipeline_snapshot.description is None
    assert pipeline_snapshot.tags == {}

    assert pipeline_snapshot == serialize_rt(pipeline_snapshot)

    snapshot.assert_match(serialize_pp(pipeline_snapshot))
    snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
示例#13
0
    def add_pipeline_snapshot(
        self, pipeline_snapshot: PipelineSnapshot, snapshot_id: Optional[str] = None
    ) -> str:
        check.inst_param(pipeline_snapshot, "pipeline_snapshot", PipelineSnapshot)
        check.opt_str_param(snapshot_id, "snapshot_id")

        if not snapshot_id:
            snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot)

        return self._add_snapshot(
            snapshot_id=snapshot_id,
            snapshot_obj=pipeline_snapshot,
            snapshot_type=SnapshotType.PIPELINE,
        )
示例#14
0
    def test_add_get_snapshot(self, storage):
        pipeline_def = PipelineDefinition(name="some_pipeline", solid_defs=[])
        pipeline_snapshot = pipeline_def.get_pipeline_snapshot()
        pipeline_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot)

        assert storage.add_pipeline_snapshot(pipeline_snapshot) == pipeline_snapshot_id
        fetched_pipeline_snapshot = storage.get_pipeline_snapshot(pipeline_snapshot_id)
        assert fetched_pipeline_snapshot
        assert serialize_pp(fetched_pipeline_snapshot) == serialize_pp(pipeline_snapshot)
        assert storage.has_pipeline_snapshot(pipeline_snapshot_id)
        assert not storage.has_pipeline_snapshot("nope")

        storage.wipe()

        assert not storage.has_pipeline_snapshot(pipeline_snapshot_id)
示例#15
0
def test_create_pipeline_snapshot():
    @solid
    def noop_solid(_):
        pass

    @pipeline
    def noop_pipeline():
        noop_solid()

    with instance_for_test() as instance:
        result = execute_pipeline(noop_pipeline, instance=instance)
        assert result.success

        run = instance.get_run_by_id(result.run_id)

        assert run.pipeline_snapshot_id == create_pipeline_snapshot_id(
            noop_pipeline.get_pipeline_snapshot())
示例#16
0
def test_create_noop_execution_plan_with_tags(snapshot):
    @solid(tags={'foo': 'bar', 'bar': 'baaz'})
    def noop_solid(_):
        pass

    @pipeline
    def noop_pipeline():
        noop_solid()

    execution_plan = create_execution_plan(noop_pipeline)

    snapshot.assert_match(
        serialize_pp(
            snapshot_from_execution_plan(
                execution_plan,
                create_pipeline_snapshot_id(
                    noop_pipeline.get_pipeline_snapshot()))))
示例#17
0
def test_create_noop_execution_plan(snapshot):
    @solid
    def noop_solid(_):
        pass

    @pipeline
    def noop_pipeline():
        noop_solid()

    execution_plan = create_execution_plan(noop_pipeline)

    snapshot.assert_match(
        serialize_pp(
            snapshot_from_execution_plan(
                execution_plan,
                create_pipeline_snapshot_id(
                    noop_pipeline.get_pipeline_snapshot()))))
示例#18
0
def test_create_pipeline_snapshot():
    @solid
    def noop_solid(_):
        pass

    @pipeline
    def noop_pipeline():
        noop_solid()

    instance = DagsterInstance.local_temp()

    result = execute_pipeline(noop_pipeline, instance=instance)
    assert result.success

    run = instance.get_run_by_id(result.run_id)

    assert run.pipeline_snapshot_id == create_pipeline_snapshot_id(
        noop_pipeline.get_pipeline_snapshot())
示例#19
0
def test_basic_dep_fan_out(snapshot):
    @solid
    def return_one(_):
        return 1

    @solid(input_defs=[InputDefinition("value", int)])
    def passthrough(_, value):
        return value

    @pipeline
    def single_dep_pipeline():
        return_one_result = return_one()
        passthrough.alias("passone")(return_one_result)
        passthrough.alias("passtwo")(return_one_result)

    dep_structure_snapshot = build_dep_structure_snapshot_from_icontains_solids(
        single_dep_pipeline.graph
    )
    index = DependencyStructureIndex(dep_structure_snapshot)

    assert index.get_invocation("return_one")
    assert index.get_invocation("passone")
    assert index.get_invocation("passtwo")

    assert index.get_upstream_output("passone", "value") == OutputHandleSnap("return_one", "result")
    assert index.get_upstream_output("passtwo", "value") == OutputHandleSnap("return_one", "result")

    assert set(index.get_downstream_inputs("return_one", "result")) == set(
        [
            InputHandle("passthrough", "passone", "value"),
            InputHandle("passthrough", "passtwo", "value"),
        ]
    )

    assert (
        deserialize_json_to_dagster_namedtuple(serialize_dagster_namedtuple(dep_structure_snapshot))
        == dep_structure_snapshot
    )

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(single_dep_pipeline)
    assert pipeline_snapshot == serialize_rt(pipeline_snapshot)

    snapshot.assert_match(serialize_pp(pipeline_snapshot))
    snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
示例#20
0
    def __init__(self, pipeline_snapshot, parent_pipeline_snapshot, is_historical=False):
        self.pipeline_snapshot = check.inst_param(
            pipeline_snapshot, "pipeline_snapshot", PipelineSnapshot
        )
        self.parent_pipeline_snapshot = check.opt_inst_param(
            parent_pipeline_snapshot, "parent_pipeline_snapshot", PipelineSnapshot
        )

        if self.pipeline_snapshot.lineage_snapshot:
            check.invariant(
                self.parent_pipeline_snapshot is not None,
                "Can not create PipelineIndex for pipeline_snapshot with lineage without parent_pipeline_snapshot",
            )

        self._node_defs_snaps_index = {
            sd.name: sd
            for sd in pipeline_snapshot.solid_definitions_snapshot.solid_def_snaps
            + pipeline_snapshot.solid_definitions_snapshot.composite_solid_def_snaps
        }

        self._dagster_type_snaps_by_name_index = {
            dagster_type_snap.name: dagster_type_snap
            for dagster_type_snap in pipeline_snapshot.dagster_type_namespace_snapshot.all_dagster_type_snaps_by_key.values()
            if dagster_type_snap.name
        }

        self.dep_structure_index = DependencyStructureIndex(
            pipeline_snapshot.dep_structure_snapshot
        )

        self._comp_dep_structures = {
            comp_snap.name: DependencyStructureIndex(comp_snap.dep_structure_snapshot)
            for comp_snap in pipeline_snapshot.solid_definitions_snapshot.composite_solid_def_snaps
        }

        if is_historical:
            # defer the pipeline snapshot calculation for historical pipelines.  This tends to be an
            # expensive operation, so we want to avoid it unless we need it.  Also, because this is
            # a historical pipeline, we already have an identifying pipeline snapshot id (which may
            # or may not be the same as this calculated snapshot id). The identifying pipeline
            # snapshot id is the calculated snapshot id at the time that the run was created.
            self._pipeline_snapshot_id = None
        else:
            self._pipeline_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot)
def test_pipeline_snap_all_props(snapshot):
    @solid
    def noop_solid(_):
        pass

    @pipeline(description='desc', tags={'key': 'value'})
    def noop_pipeline():
        noop_solid()

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline)

    assert pipeline_snapshot.name == 'noop_pipeline'
    assert pipeline_snapshot.description == 'desc'
    assert pipeline_snapshot.tags == {'key': 'value'}

    assert pipeline_snapshot == serialize_rt(pipeline_snapshot)

    snapshot.assert_match(serialize_pp(pipeline_snapshot))
    snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
示例#22
0
def test_pipeline_snap_all_props(snapshot):
    @solid
    def noop_solid(_):
        pass

    @pipeline(description="desc", tags={"key": "value"})
    def noop_pipeline():
        noop_solid()

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline)

    assert pipeline_snapshot.name == "noop_pipeline"
    assert pipeline_snapshot.description == "desc"
    assert pipeline_snapshot.tags == {"key": "value"}

    assert pipeline_snapshot == serialize_rt(pipeline_snapshot)

    snapshot.assert_match(serialize_pp(pipeline_snapshot))
    snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
示例#23
0
def test_two_invocations_deps_snap(snapshot):
    @solid
    def noop_solid(_):
        pass

    @pipeline
    def two_solid_pipeline():
        noop_solid.alias('one')()
        noop_solid.alias('two')()

    index = DependencyStructureIndex(
        build_dep_structure_snapshot_from_icontains_solids(two_solid_pipeline))
    assert index.get_invocation('one')
    assert index.get_invocation('two')

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(two_solid_pipeline)
    assert pipeline_snapshot == serialize_rt(pipeline_snapshot)

    snapshot.assert_match(serialize_pp(pipeline_snapshot))
    snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
示例#24
0
def test_create_execution_plan_with_dep(snapshot):
    @solid
    def solid_one(_):
        return 1

    @solid
    def solid_two(_, num):
        return num + 1

    @pipeline
    def noop_pipeline():
        solid_two(solid_one())

    execution_plan = create_execution_plan(noop_pipeline)

    snapshot.assert_match(
        serialize_pp(
            snapshot_from_execution_plan(
                execution_plan,
                create_pipeline_snapshot_id(
                    noop_pipeline.get_pipeline_snapshot()))))
示例#25
0
def test_create_with_composite(snapshot):
    @solid(output_defs=[OutputDefinition(name='out_num', dagster_type=int)])
    def return_one(_):
        return 1

    @solid(
        input_defs=[InputDefinition(name='num', dagster_type=int)],
        output_defs=[OutputDefinition(int)],
    )
    def add_one(_, num):
        return num + 1

    @composite_solid(
        output_defs=[OutputDefinition(name='named_output', dagster_type=int)])
    def comp_1():
        return add_one(return_one())

    @composite_solid(
        output_defs=[OutputDefinition(name='named_output', dagster_type=int)])
    def comp_2():
        return add_one(return_one())

    @solid
    def add(_, num_one, num_two):
        return num_one + num_two

    @pipeline
    def do_comps():
        add(num_one=comp_1(), num_two=comp_2())

    execution_plan = create_execution_plan(do_comps)

    snapshot.assert_match(
        serialize_pp(
            snapshot_from_execution_plan(
                execution_plan,
                create_pipeline_snapshot_id(
                    do_comps.get_pipeline_snapshot()))))
示例#26
0
def test_basic_fan_in(snapshot):
    @solid(output_defs=[OutputDefinition(Nothing)])
    def return_nothing(_):
        return None

    @solid(input_defs=[InputDefinition('nothing', Nothing)])
    def take_nothings(_):
        return None

    @pipeline
    def fan_in_test():
        take_nothings([
            return_nothing.alias('nothing_one')(),
            return_nothing.alias('nothing_two')()
        ])

    dep_structure_snapshot = build_dep_structure_snapshot_from_icontains_solids(
        fan_in_test)
    index = DependencyStructureIndex(dep_structure_snapshot)

    assert index.get_invocation('nothing_one')
    assert index.get_invocation('take_nothings')

    assert index.get_upstream_outputs('take_nothings', 'nothing') == [
        OutputHandleSnap('nothing_one', 'result'),
        OutputHandleSnap('nothing_two', 'result'),
    ]

    assert (deserialize_json_to_dagster_namedtuple(
        serialize_dagster_namedtuple(dep_structure_snapshot)) ==
            dep_structure_snapshot)

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(fan_in_test)
    assert pipeline_snapshot == serialize_rt(pipeline_snapshot)

    snapshot.assert_match(serialize_pp(pipeline_snapshot))
    snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
示例#27
0
    def _construct_run_with_snapshots(
        self,
        pipeline_name,
        run_id,
        environment_dict,
        mode,
        solid_subset,
        step_keys_to_execute,
        status,
        tags,
        root_run_id,
        parent_run_id,
        pipeline_snapshot,
        execution_plan_snapshot,
        parent_pipeline_snapshot,
    ):

        # https://github.com/dagster-io/dagster/issues/2403
        if tags and IS_AIRFLOW_INGEST_PIPELINE_STR in tags:
            if AIRFLOW_EXECUTION_DATE_STR not in tags:
                tags[AIRFLOW_EXECUTION_DATE_STR] = get_current_datetime_in_utc(
                ).isoformat()

        pipeline_run = PipelineRun(
            pipeline_name=pipeline_name,
            run_id=run_id,
            environment_dict=environment_dict,
            mode=mode,
            solid_subset=solid_subset,
            step_keys_to_execute=step_keys_to_execute,
            status=status,
            tags=tags,
            root_run_id=root_run_id,
            parent_run_id=parent_run_id,
        )

        if pipeline_snapshot is not None:
            from dagster.core.snap import create_pipeline_snapshot_id

            if pipeline_snapshot.lineage_snapshot:
                if not self._run_storage.has_pipeline_snapshot(
                        pipeline_snapshot.lineage_snapshot.parent_snapshot_id):
                    check.invariant(
                        create_pipeline_snapshot_id(
                            parent_pipeline_snapshot) ==
                        pipeline_snapshot.lineage_snapshot.parent_snapshot_id,
                        'Parent pipeline snapshot id out of sync with passed parent pipeline snapshot',
                    )

                    returned_pipeline_snapshot_id = self._run_storage.add_pipeline_snapshot(
                        parent_pipeline_snapshot)
                    check.invariant(
                        pipeline_snapshot.lineage_snapshot.parent_snapshot_id
                        == returned_pipeline_snapshot_id)

            pipeline_snapshot_id = create_pipeline_snapshot_id(
                pipeline_snapshot)
            if not self._run_storage.has_pipeline_snapshot(
                    pipeline_snapshot_id):
                returned_pipeline_snapshot_id = self._run_storage.add_pipeline_snapshot(
                    pipeline_snapshot)
                check.invariant(
                    pipeline_snapshot_id == returned_pipeline_snapshot_id)

            pipeline_run = pipeline_run.with_pipeline_snapshot_id(
                pipeline_snapshot_id)

        if execution_plan_snapshot is not None:
            from dagster.core.snap import create_execution_plan_snapshot_id

            check.invariant(execution_plan_snapshot.pipeline_snapshot_id ==
                            pipeline_snapshot_id)

            check.invariant(
                set(step_keys_to_execute) == set(
                    execution_plan_snapshot.step_keys_to_execute)
                if step_keys_to_execute else set(
                    execution_plan_snapshot.step_keys_to_execute) == set(
                        [step.key for step in execution_plan_snapshot.steps]),
                'We encode step_keys_to_execute twice in our stack, unfortunately. This check '
                'ensures that they are consistent. We check that step_keys_to_execute in the plan '
                'matches the step_keys_to_execute params if it is set. If it is not, this indicates '
                'a full execution plan, and so we verify that.',
            )

            execution_plan_snapshot_id = create_execution_plan_snapshot_id(
                execution_plan_snapshot)

            if not self._run_storage.has_execution_plan_snapshot(
                    execution_plan_snapshot_id):
                returned_execution_plan_snapshot_id = self._run_storage.add_execution_plan_snapshot(
                    execution_plan_snapshot)

                check.invariant(execution_plan_snapshot_id ==
                                returned_execution_plan_snapshot_id)

            pipeline_run = pipeline_run.with_execution_plan_snapshot_id(
                execution_plan_snapshot_id)

        return pipeline_run
示例#28
0
文件: in_memory.py 项目: drat/dagster
 def add_pipeline_snapshot(self, pipeline_snapshot):
     check.inst_param(pipeline_snapshot, "pipeline_snapshot",
                      PipelineSnapshot)
     pipeline_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot)
     self._pipeline_snapshots[pipeline_snapshot_id] = pipeline_snapshot
     return pipeline_snapshot_id
示例#29
0
    def _construct_run_with_snapshots(
        self,
        pipeline_name=None,
        run_id=None,
        environment_dict=None,
        mode=None,
        solid_subset=None,
        step_keys_to_execute=None,
        status=None,
        tags=None,
        root_run_id=None,
        parent_run_id=None,
        pipeline_snapshot=None,
        execution_plan_snapshot=None,
    ):

        if tags and IS_AIRFLOW_INGEST_PIPELINE_STR in tags:
            if AIRFLOW_EXECUTION_DATE_STR not in tags:
                tags[AIRFLOW_EXECUTION_DATE_STR] = get_current_datetime_in_utc(
                ).isoformat()

        pipeline_run = PipelineRun(
            pipeline_name=pipeline_name,
            run_id=run_id,
            environment_dict=environment_dict,
            mode=mode,
            solid_subset=solid_subset,
            step_keys_to_execute=step_keys_to_execute,
            status=status,
            tags=tags,
            root_run_id=root_run_id,
            parent_run_id=parent_run_id,
        )

        if pipeline_snapshot is not None:
            from dagster.core.snap import create_pipeline_snapshot_id

            pipeline_snapshot_id = create_pipeline_snapshot_id(
                pipeline_snapshot)

            if not self._run_storage.has_pipeline_snapshot(
                    pipeline_snapshot_id):
                returned_pipeline_snapshot_id = self._run_storage.add_pipeline_snapshot(
                    pipeline_snapshot)

                check.invariant(
                    pipeline_snapshot_id == returned_pipeline_snapshot_id)

            pipeline_run = pipeline_run.with_pipeline_snapshot_id(
                pipeline_snapshot_id)

        if execution_plan_snapshot is not None:
            from dagster.core.snap import create_execution_plan_snapshot_id

            check.invariant(execution_plan_snapshot.pipeline_snapshot_id ==
                            pipeline_snapshot_id)

            execution_plan_snapshot_id = create_execution_plan_snapshot_id(
                execution_plan_snapshot)

            if not self._run_storage.has_execution_plan_snapshot(
                    execution_plan_snapshot_id):
                returned_execution_plan_snapshot_id = self._run_storage.add_execution_plan_snapshot(
                    execution_plan_snapshot)

                check.invariant(execution_plan_snapshot_id ==
                                returned_execution_plan_snapshot_id)

            pipeline_run = pipeline_run.with_execution_plan_snapshot_id(
                execution_plan_snapshot_id)

        return pipeline_run
示例#30
0
def pid(pipeline_def):
    return create_pipeline_snapshot_id(
        PipelineSnapshot.from_pipeline_def(pipeline_def))