示例#1
0
    def test_debug_snapshot_import(self, storage):
        from dagster.core.execution.api import create_execution_plan
        from dagster.core.snap import (
            snapshot_from_execution_plan,
            create_execution_plan_snapshot_id,
        )

        run_id = make_new_run_id()
        run_to_add = TestRunStorage.build_run(pipeline_name="pipeline_name",
                                              run_id=run_id)
        storage.add_run(run_to_add)

        pipeline_def = PipelineDefinition(name="some_pipeline", solid_defs=[])

        pipeline_snapshot = pipeline_def.get_pipeline_snapshot()
        pipeline_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot)
        new_pipeline_snapshot_id = f"{pipeline_snapshot_id}-new-snapshot"

        storage.add_snapshot(pipeline_snapshot,
                             snapshot_id=new_pipeline_snapshot_id)
        assert not storage.has_snapshot(pipeline_snapshot_id)
        assert storage.has_snapshot(new_pipeline_snapshot_id)

        execution_plan = create_execution_plan(pipeline_def)
        ep_snapshot = snapshot_from_execution_plan(execution_plan,
                                                   new_pipeline_snapshot_id)
        ep_snapshot_id = create_execution_plan_snapshot_id(ep_snapshot)
        new_ep_snapshot_id = f"{ep_snapshot_id}-new-snapshot"

        storage.add_snapshot(ep_snapshot, snapshot_id=new_ep_snapshot_id)
        assert not storage.has_snapshot(ep_snapshot_id)
        assert storage.has_snapshot(new_ep_snapshot_id)
示例#2
0
    def test_single_write_read_with_snapshot(self, storage):
        run_with_snapshot_id = "lkasjdflkjasdf"
        pipeline_def = PipelineDefinition(name="some_pipeline", solid_defs=[])

        pipeline_snapshot = pipeline_def.get_pipeline_snapshot()

        pipeline_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot)

        run_with_snapshot = PipelineRun(
            run_id=run_with_snapshot_id,
            pipeline_name=pipeline_def.name,
            pipeline_snapshot_id=pipeline_snapshot_id,
        )

        assert not storage.has_pipeline_snapshot(pipeline_snapshot_id)

        assert storage.add_pipeline_snapshot(
            pipeline_snapshot) == pipeline_snapshot_id

        assert serialize_pp(storage.get_pipeline_snapshot(
            pipeline_snapshot_id)) == serialize_pp(pipeline_snapshot)

        storage.add_run(run_with_snapshot)

        assert storage.get_run_by_id(run_with_snapshot_id) == run_with_snapshot

        storage.wipe()

        assert not storage.has_pipeline_snapshot(pipeline_snapshot_id)
        assert not storage.has_run(run_with_snapshot_id)
示例#3
0
    def test_single_write_read_with_snapshot(self, storage):
        if not isinstance(storage, InMemoryRunStorage):
            pytest.skip()

        run_with_snapshot_id = 'lkasjdflkjasdf'
        pipeline_def = PipelineDefinition(name='some_pipeline', solid_defs=[])

        pipeline_snapshot = pipeline_def.get_pipeline_snapshot()

        pipeline_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot)

        run_with_snapshot = PipelineRun.create_empty_run(
            run_id=run_with_snapshot_id,
            pipeline_name=pipeline_def.name,
            pipeline_snapshot_id=pipeline_snapshot_id,
        )

        assert not storage.has_pipeline_snapshot(pipeline_snapshot_id)

        assert storage.add_pipeline_snapshot(pipeline_snapshot) == pipeline_snapshot_id

        assert storage.get_pipeline_snapshot(pipeline_snapshot_id) == pipeline_snapshot

        storage.add_run(run_with_snapshot)

        assert storage.get_run_by_id(run_with_snapshot_id) == run_with_snapshot

        storage.wipe()

        assert not storage.has_pipeline_snapshot(pipeline_snapshot_id)
        assert not storage.has_run(run_with_snapshot_id)
示例#4
0
def external_pipeline_data_from_def(
        pipeline_def: PipelineDefinition) -> ExternalPipelineData:
    check.inst_param(pipeline_def, "pipeline_def", PipelineDefinition)
    return ExternalPipelineData(
        name=pipeline_def.name,
        pipeline_snapshot=pipeline_def.get_pipeline_snapshot(),
        parent_pipeline_snapshot=pipeline_def.get_parent_pipeline_snapshot(),
        active_presets=sorted(
            list(map(external_preset_data_from_def, pipeline_def.preset_defs)),
            key=lambda pd: pd.name,
        ),
        is_job=isinstance(pipeline_def, JobDefinition),
    )
示例#5
0
    def test_add_get_snapshot(self, storage):
        pipeline_def = PipelineDefinition(name="some_pipeline", solid_defs=[])
        pipeline_snapshot = pipeline_def.get_pipeline_snapshot()
        pipeline_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot)

        assert storage.add_pipeline_snapshot(pipeline_snapshot) == pipeline_snapshot_id
        fetched_pipeline_snapshot = storage.get_pipeline_snapshot(pipeline_snapshot_id)
        assert fetched_pipeline_snapshot
        assert serialize_pp(fetched_pipeline_snapshot) == serialize_pp(pipeline_snapshot)
        assert storage.has_pipeline_snapshot(pipeline_snapshot_id)
        assert not storage.has_pipeline_snapshot("nope")

        storage.wipe()

        assert not storage.has_pipeline_snapshot(pipeline_snapshot_id)
示例#6
0
def validate_run_config(
    pipeline_def: PipelineDefinition,
    run_config: Optional[Dict[str, Any]] = None,
    mode: Optional[str] = None,
) -> Dict[str, Any]:
    """Function to validate a provided run config blob against a given pipeline and mode.

    If validation is successful, this function will return a dictionary representation of the
    validated config actually used during execution.

    Args:
        pipeline_def (PipelineDefinition): The pipeline definition to validate run config against
        run_config (Optional[Dict[str, Any]]): The run config to validate
        mode (str): The mode of the pipeline to validate against (different modes may require
            different config)

    Returns:
        Dict[str, Any]: A dictionary representation of the validated config.
    """

    pipeline_def = check.inst_param(pipeline_def, "pipeline_def",
                                    PipelineDefinition)
    run_config = check.opt_dict_param(run_config, "run_config", key_type=str)
    mode = check.opt_str_param(mode,
                               "mode",
                               default=pipeline_def.get_default_mode_name())

    return ResolvedRunConfig.build(pipeline_def, run_config,
                                   mode=mode).to_dict()
示例#7
0
    def test_write_conflicting_run_id(self, storage):
        double_run_id = "double_run_id"
        pipeline_def = PipelineDefinition(name="some_pipeline", solid_defs=[])

        run = DagsterRun(run_id=double_run_id, pipeline_name=pipeline_def.name)

        assert storage.add_run(run)
        with pytest.raises(DagsterRunAlreadyExists):
            storage.add_run(run)
示例#8
0
    def test_add_get_execution_snapshot(self, storage):
        from dagster.core.execution.api import create_execution_plan
        from dagster.core.snap import snapshot_from_execution_plan

        pipeline_def = PipelineDefinition(name="some_pipeline", solid_defs=[])
        execution_plan = create_execution_plan(pipeline_def)
        ep_snapshot = snapshot_from_execution_plan(
            execution_plan, pipeline_def.get_pipeline_snapshot_id())

        snapshot_id = storage.add_execution_plan_snapshot(ep_snapshot)
        fetched_ep_snapshot = storage.get_execution_plan_snapshot(snapshot_id)
        assert fetched_ep_snapshot
        assert serialize_pp(fetched_ep_snapshot) == serialize_pp(ep_snapshot)
        assert storage.has_execution_plan_snapshot(snapshot_id)
        assert not storage.has_execution_plan_snapshot("nope")

        storage.wipe()

        assert not storage.has_execution_plan_snapshot(snapshot_id)
示例#9
0
 def get_associated_input_def(
         self, pipeline_def: PipelineDefinition) -> InputDefinition:
     """
     Returns the InputDefinition along the potential composition InputMapping chain
     that the config was provided at.
     """
     if self.solid_handle:
         return pipeline_def.get_solid(self.solid_handle).input_def_named(
             self.input_name)
     else:
         return pipeline_def.graph.input_def_named(self.input_name)
示例#10
0
文件: inputs.py 项目: keyz/dagster
    def compute_version(
        self,
        step_versions: Dict[str, Optional[str]],
        pipeline_def: PipelineDefinition,
        resolved_run_config: ResolvedRunConfig,
    ) -> Optional[str]:
        solid_config = resolved_run_config.solids.get(str(self.solid_handle))
        config_data = solid_config.inputs.get(self.input_name) if solid_config else None

        solid_def = pipeline_def.get_solid(self.solid_handle)
        dagster_type = solid_def.input_def_named(self.input_name).dagster_type
        return dagster_type.loader.compute_loaded_input_version(config_data)
示例#11
0
    def test_single_write_with_missing_snapshot(self, storage):

        run_with_snapshot_id = "lkasjdflkjasdf"
        pipeline_def = PipelineDefinition(name="some_pipeline", solid_defs=[])

        run_with_missing_snapshot = PipelineRun(
            run_id=run_with_snapshot_id,
            pipeline_name=pipeline_def.name,
            pipeline_snapshot_id="nope",
        )

        with pytest.raises(DagsterSnapshotDoesNotExist):
            storage.add_run(run_with_missing_snapshot)
示例#12
0
    def do_test_single_write_with_missing_snapshot(self, storage):
        if not isinstance(storage, InMemoryRunStorage):
            pytest.skip()

        run_with_snapshot_id = 'lkasjdflkjasdf'
        pipeline_def = PipelineDefinition(name='some_pipeline', solid_defs=[])

        run_with_missing_snapshot = PipelineRun.create_empty_run(
            run_id=run_with_snapshot_id,
            pipeline_name=pipeline_def.name,
            pipeline_snapshot_id='nope',
        )

        with pytest.raises(DagsterSnapshotDoesNotExist):
            storage.add_run(run_with_missing_snapshot)
示例#13
0
    def test_fetch_by_snapshot_id(self, storage):
        assert storage
        pipeline_def_a = PipelineDefinition(name="some_pipeline",
                                            solid_defs=[])
        pipeline_def_b = PipelineDefinition(name="some_other_pipeline",
                                            solid_defs=[])
        pipeline_snapshot_a = pipeline_def_a.get_pipeline_snapshot()
        pipeline_snapshot_b = pipeline_def_b.get_pipeline_snapshot()
        pipeline_snapshot_a_id = create_pipeline_snapshot_id(
            pipeline_snapshot_a)
        pipeline_snapshot_b_id = create_pipeline_snapshot_id(
            pipeline_snapshot_b)

        assert storage.add_pipeline_snapshot(
            pipeline_snapshot_a) == pipeline_snapshot_a_id
        assert storage.add_pipeline_snapshot(
            pipeline_snapshot_b) == pipeline_snapshot_b_id

        one = make_new_run_id()
        two = make_new_run_id()
        storage.add_run(
            TestRunStorage.build_run(
                run_id=one,
                pipeline_name="some_pipeline",
                pipeline_snapshot_id=pipeline_snapshot_a_id,
            ))
        storage.add_run(
            TestRunStorage.build_run(
                run_id=two,
                pipeline_name="some_other_pipeline",
                pipeline_snapshot_id=pipeline_snapshot_b_id,
            ))
        assert len(storage.get_runs()) == 2
        runs_a = storage.get_runs(
            PipelineRunsFilter(snapshot_id=pipeline_snapshot_a_id))
        assert len(runs_a) == 1
        assert runs_a[0].run_id == one

        runs_b = storage.get_runs(
            PipelineRunsFilter(snapshot_id=pipeline_snapshot_b_id))
        assert len(runs_b) == 1
        assert runs_b[0].run_id == two
示例#14
0
 def get_input_def(self,
                   pipeline_def: PipelineDefinition) -> InputDefinition:
     return pipeline_def.get_solid(self.solid_handle).input_def_named(
         self.input_name)
示例#15
0
文件: execute.py 项目: xjhc/dagster
def execute_in_process(
    node: NodeDefinition,
    run_config: Optional[dict] = None,
    resources: Optional[Dict[str, ResourceDefinition]] = None,
    loggers: Optional[Dict[str, LoggerDefinition]] = None,
    input_values: Optional[Dict[str, Any]] = None,
    instance: DagsterInstance = None,
    output_capturing_enabled: Optional[bool] = True,
) -> NodeExecutionResult:
    node = check.inst_param(node, "node", NodeDefinition)
    resources = check.opt_dict_param(resources,
                                     "resources",
                                     key_type=str,
                                     value_type=ResourceDefinition)
    loggers = check.opt_dict_param(loggers,
                                   "logger",
                                   key_type=str,
                                   value_type=LoggerDefinition)
    run_config = check.opt_dict_param(run_config, "run_config", key_type=str)
    input_values = check.opt_dict_param(input_values,
                                        "input_values",
                                        key_type=str)

    node_defs = [node]

    dependencies: Dict[str, Dict[str,
                                 DependencyDefinition]] = defaultdict(dict)

    for input_name, input_value in input_values.items():
        dependencies[node.name][input_name] = DependencyDefinition(input_name)
        node_defs.append(_create_value_solid(input_name, input_value))

    mode_def = ModeDefinition(
        "created",
        resource_defs=merge_dicts(resources,
                                  {EPHEMERAL_IO_MANAGER_KEY: mem_io_manager}),
        logger_defs=loggers,
    )

    pipeline_def = PipelineDefinition(
        node_defs,
        name=f"ephemeral_{node.name}_node_pipeline",
        mode_defs=[mode_def],
        dependencies=dependencies,
    )

    pipeline = InMemoryPipeline(pipeline_def)

    execution_plan = create_execution_plan(pipeline,
                                           run_config=run_config,
                                           mode=mode_def.name)

    recorder: Dict[StepOutputHandle, Any] = {}

    with ephemeral_instance_if_missing(instance) as execute_instance:
        pipeline_run = execute_instance.create_run_for_pipeline(
            pipeline_def=pipeline_def,
            run_config=run_config,
            mode=mode_def.name,
        )

        _execute_run_iterable = ExecuteRunWithPlanIterable(
            execution_plan=execution_plan,
            iterator=pipeline_execution_iterator,
            execution_context_manager=PipelineExecutionContextManager(
                pipeline=pipeline,
                execution_plan=execution_plan,
                pipeline_run=pipeline_run,
                instance=execute_instance,
                run_config=run_config,
                output_capture=recorder if output_capturing_enabled else None,
            ),
        )
        event_list = list(_execute_run_iterable)

    top_level_node_handle = SolidHandle.from_string(node.name)

    event_list_for_top_lvl_node = [
        event for event in event_list if event.solid_handle
        and event.solid_handle.is_or_descends_from(top_level_node_handle)
    ]

    if isinstance(node, SolidDefinition):
        return InProcessSolidResult(node, SolidHandle(node.name, None),
                                    event_list_for_top_lvl_node, recorder)
    else:
        return InProcessGraphResult(node, SolidHandle(node.name, None),
                                    event_list_for_top_lvl_node, recorder)
示例#16
0
def execute_in_process(
    node: NodeDefinition,
    run_config: Optional[dict] = None,
    resources: Optional[Dict[str, ResourceDefinition]] = None,
    loggers: Optional[Dict[str, LoggerDefinition]] = None,
    instance: DagsterInstance = None,
) -> ExecutionResult:
    node = check.inst_param(node, "node", NodeDefinition)
    resources = check.opt_dict_param(resources,
                                     "resources",
                                     key_type=str,
                                     value_type=ResourceDefinition)
    loggers = check.opt_dict_param(loggers,
                                   "logger",
                                   key_type=str,
                                   value_type=LoggerDefinition)
    run_config = check.opt_dict_param(run_config, "run_config", key_type=str)

    node_defs = [node]

    mode_def = ModeDefinition(
        "created",
        resource_defs=resources,
        logger_defs=loggers,
    )

    pipeline_def = PipelineDefinition(
        node_defs,
        name=f"ephemeral_{node.name}_node_pipeline",
        mode_defs=[mode_def],
    )

    pipeline = InMemoryPipeline(pipeline_def)

    execution_plan = create_execution_plan(pipeline,
                                           run_config=run_config,
                                           mode=mode_def.name)

    with ephemeral_instance_if_missing(instance) as execute_instance:
        pipeline_run = execute_instance.create_run_for_pipeline(
            pipeline_def=pipeline_def,
            run_config=run_config,
            mode=mode_def.name,
        )

        _execute_run_iterable = ExecuteRunWithPlanIterable(
            execution_plan=execution_plan,
            iterator=pipeline_execution_iterator,
            execution_context_manager=PipelineExecutionContextManager(
                execution_plan=execution_plan,
                pipeline_run=pipeline_run,
                instance=execute_instance,
                run_config=run_config,
            ),
        )
        event_list = list(_execute_run_iterable)

    top_level_node_handle = SolidHandle.from_string(node.name)

    event_list_for_top_lvl_node = [
        event for event in event_list if event.solid_handle
        and event.solid_handle.is_or_descends_from(top_level_node_handle)
    ]

    return ExecutionResult(node, event_list_for_top_lvl_node)
示例#17
0
    def required_resource_keys(self,
                               pipeline_def: PipelineDefinition) -> Set[str]:
        input_def = pipeline_def.get_solid(self.solid_handle).input_def_named(
            self.input_name)

        return {input_def.root_manager_key}
def define_foo_pipeline():
    @lambda_solid
    def do_something():
        return 1

    return PipelineDefinition(name='foo', solid_defs=[do_something])
示例#19
0
 def _load_value(self, pipeline_def: PipelineDefinition):
     return pipeline_def.get_solid(
         self.solid_handle).definition.default_value_for_input(
             self.input_name)
示例#20
0
def execute_in_process(
    node: NodeDefinition,
    run_config: Optional[dict] = None,
    resources: Optional[Dict[str, Any]] = None,
    loggers: Optional[Dict[str, LoggerDefinition]] = None,
    input_values: Optional[Dict[str, Any]] = None,
    instance: Optional[DagsterInstance] = None,
    output_capturing_enabled: bool = True,
) -> NodeExecutionResult:
    node = check.inst_param(node, "node", NodeDefinition)
    loggers = check.opt_dict_param(loggers,
                                   "logger",
                                   key_type=str,
                                   value_type=LoggerDefinition)
    run_config = check.opt_dict_param(run_config, "run_config", key_type=str)
    input_values = check.opt_dict_param(input_values,
                                        "input_values",
                                        key_type=str)
    resources = check.opt_dict_param(resources, "resources", key_type=str)

    resource_defs = {}
    # Wrap instantiated resource values in a resource definition.
    # If an instantiated IO manager is provided, wrap it in an IO manager definition.
    for resource_key, resource in resources.items():
        if isinstance(resource, ResourceDefinition):
            resource_defs[resource_key] = resource
        elif isinstance(resource, IOManager):
            resource_defs[
                resource_key] = IOManagerDefinition.hardcoded_io_manager(
                    resource)
        else:
            resource_defs[
                resource_key] = ResourceDefinition.hardcoded_resource(resource)

    node_defs = [node]

    dependencies: Dict[Union[str, SolidInvocation],
                       Dict[str, IDependencyDefinition]] = defaultdict(dict)

    for input_name, input_value in input_values.items():
        dependencies[node.name][input_name] = DependencyDefinition(input_name)
        node_defs.append(_create_value_solid(input_name, input_value))

    mode_def = ModeDefinition(
        "created",
        resource_defs=merge_dicts(resource_defs,
                                  {EPHEMERAL_IO_MANAGER_KEY: mem_io_manager}),
        logger_defs=loggers,
    )

    pipeline_def = PipelineDefinition(
        node_defs,
        name=f"ephemeral_{node.name}_node_pipeline",
        mode_defs=[mode_def],
        dependencies=dependencies,
    )

    return core_execute_in_process(
        node=node,
        ephemeral_pipeline=pipeline_def,
        run_config=run_config,
        instance=instance,
        output_capturing_enabled=output_capturing_enabled,
    )