Пример #1
0
    def _root_manager(input_context: InputContext) -> Any:
        source_asset_key = cast(AssetKey, input_context.asset_key)
        source_asset = source_assets_by_key[source_asset_key]

        @op(out={source_asset_key.path[-1]: Out(asset_key=source_asset_key)})
        def _op():
            pass

        output_context = build_output_context(
            name=source_asset_key.path[-1],
            step_key="none",
            solid_def=_op,
            metadata=source_asset.metadata,
        )
        input_context_with_upstream = build_input_context(
            name=input_context.name,
            metadata=input_context.metadata,
            config=input_context.config,
            dagster_type=input_context.dagster_type,
            upstream_output=output_context,
            op_def=input_context.op_def,
        )

        io_manager = getattr(cast(Any, input_context.resources), source_asset.io_manager_key)
        return io_manager.load_input(input_context_with_upstream)
Пример #2
0
    def _t_fn(step_context, inputs):
        check.inst_param(step_context, "step_context", SolidExecutionContext)
        check.param_invariant(
            isinstance(step_context.run_config, dict),
            "context",
            "StepExecutionContext must have valid run_config",
        )

        step_execution_context = step_context.get_step_execution_context()

        with tempfile.TemporaryDirectory() as output_notebook_dir:
            with safe_tempfile_path() as output_log_path:

                prefix = str(uuid.uuid4())
                parameterized_notebook_path = os.path.join(
                    output_notebook_dir, f"{prefix}-inter.ipynb")

                executed_notebook_path = os.path.join(output_notebook_dir,
                                                      f"{prefix}-out.ipynb")

                # Scaffold the registration here
                nb = load_notebook_node(notebook_path)
                compute_descriptor = ("solid" if dagster_factory_name
                                      == "define_dagstermill_solid" else "op")
                nb_no_parameters = replace_parameters(
                    step_execution_context,
                    nb,
                    get_papermill_parameters(step_execution_context, inputs,
                                             output_log_path,
                                             compute_descriptor),
                )
                write_ipynb(nb_no_parameters, parameterized_notebook_path)

                try:
                    papermill_engines.register("dagstermill",
                                               DagstermillEngine)
                    papermill.execute_notebook(
                        input_path=parameterized_notebook_path,
                        output_path=executed_notebook_path,
                        engine_name="dagstermill",
                        log_output=True,
                    )

                except Exception as ex:
                    step_execution_context.log.warn(
                        "Error when attempting to materialize executed notebook: {exc}"
                        .format(exc=str(
                            serializable_error_info_from_exc_info(
                                sys.exc_info()))))
                    # pylint: disable=no-member
                    # compat:
                    if isinstance(
                            ex,
                            ExecutionError) and (ex.ename == "RetryRequested"
                                                 or ex.ename == "Failure"):
                        step_execution_context.log.warn(
                            f"Encountered raised {ex.ename} in notebook. Use dagstermill.yield_event "
                            "with RetryRequested or Failure to trigger their behavior."
                        )

                    raise

            step_execution_context.log.debug(
                "Notebook execution complete for {name} at {executed_notebook_path}."
                .format(
                    name=name,
                    executed_notebook_path=executed_notebook_path,
                ))
            if output_notebook_name is not None:
                # yield output notebook binary stream as a solid output
                with open(executed_notebook_path, "rb") as fd:
                    yield Output(fd.read(), output_notebook_name)

            else:
                # backcompat
                executed_notebook_file_handle = None
                try:
                    # use binary mode when when moving the file since certain file_managers such as S3
                    # may try to hash the contents
                    with open(executed_notebook_path, "rb") as fd:
                        executed_notebook_file_handle = step_context.resources.file_manager.write(
                            fd, mode="wb", ext="ipynb")
                        executed_notebook_materialization_path = (
                            executed_notebook_file_handle.path_desc)

                    yield AssetMaterialization(
                        asset_key=(asset_key_prefix +
                                   [f"{name}_output_notebook"]),
                        description=
                        "Location of output notebook in file manager",
                        metadata_entries=[
                            MetadataEntry.fspath(
                                executed_notebook_materialization_path)
                        ],
                    )

                except Exception:
                    # if file manager writing errors, e.g. file manager is not provided, we throw a warning
                    # and fall back to the previously stored temp executed notebook.
                    step_context.log.warning(
                        "Error when attempting to materialize executed notebook using file manager: "
                        f"{str(serializable_error_info_from_exc_info(sys.exc_info()))}"
                        f"\nNow falling back to local: notebook execution was temporarily materialized at {executed_notebook_path}"
                        "\nIf you have supplied a file manager and expect to use it for materializing the "
                        'notebook, please include "file_manager" in the `required_resource_keys` argument '
                        f"to `{dagster_factory_name}`")

                if output_notebook is not None:
                    yield Output(executed_notebook_file_handle,
                                 output_notebook)

            # deferred import for perf
            import scrapbook

            output_nb = scrapbook.read_notebook(executed_notebook_path)

            for (output_name,
                 _) in step_execution_context.solid_def.output_dict.items():
                data_dict = output_nb.scraps.data_dict
                if output_name in data_dict:
                    # read outputs that were passed out of process via io manager from `yield_result`
                    step_output_handle = StepOutputHandle(
                        step_key=step_execution_context.step.key,
                        output_name=output_name)
                    output_context = step_execution_context.get_output_context(
                        step_output_handle)
                    io_manager = step_execution_context.get_io_manager(
                        step_output_handle)
                    value = io_manager.load_input(
                        build_input_context(upstream_output=output_context))

                    yield Output(value, output_name)

            for key, value in output_nb.scraps.items():
                if key.startswith("event-"):
                    with open(value.data, "rb") as fd:
                        event = pickle.loads(fd.read())
                        if isinstance(event, (Failure, RetryRequested)):
                            raise event
                        else:
                            yield event