示例#1
0
    def __init__(
        self,
        local_path: os.PathLike = None,
        remote_path: str = None,
        supported_mode: SchemaOpenMode = SchemaOpenMode.WRITE,
        downloader: typing.Callable[[str, os.PathLike], None] = None,
    ):

        if supported_mode == SchemaOpenMode.READ and remote_path is None:
            raise ValueError(
                "To create a FlyteSchema in read mode, remote_path is required"
            )
        if (supported_mode == SchemaOpenMode.WRITE and local_path is None
                and FlyteContext.current_context().file_access is None):
            raise ValueError(
                "To create a FlyteSchema in write mode, local_path is required"
            )

        if local_path is None:
            local_path = FlyteContext.current_context(
            ).file_access.get_random_local_directory()
        self._local_path = local_path
        self._remote_path = remote_path
        self._supported_mode = supported_mode
        # This is a special attribute that indicates if the data was either downloaded or uploaded
        self._downloaded = False
        self._downloader = downloader
示例#2
0
    def _flyte_schema(self, dataset: FlyteSchema) -> str:
        if not self._local_file_path:
            raise ValueError("local_file_path is missing!")

        # copy parquet file to user-given directory
        FlyteContext.current_context().file_access.get_data(
            dataset.remote_path, self._local_file_path, is_multipart=True)
        return os.path.basename(self._local_file_path)
示例#3
0
    def add_entity(self, entity: PythonAutoContainerTask, **kwargs) -> Node:
        """
        Anytime you add an entity, all the inputs to the entity must be bound.
        """
        # circular import
        from flytekit.core.node_creation import create_node

        ctx = FlyteContext.current_context()
        if ctx.compilation_state is not None:
            raise Exception("Can't already be compiling")
        with FlyteContextManager.with_context(ctx.with_compilation_state(self.compilation_state)) as ctx:
            n = create_node(entity=entity, **kwargs)

            def get_input_values(input_value):
                if isinstance(input_value, list):
                    input_promises = []
                    for x in input_value:
                        input_promises.extend(get_input_values(x))
                    return input_promises
                if isinstance(input_value, dict):
                    input_promises = []
                    for _, v in input_value.items():
                        input_promises.extend(get_input_values(v))
                    return input_promises
                else:
                    return [input_value]

            # Every time an entity is added, mark it as used.
            for input_value in get_input_values(kwargs):
                if input_value in self._unbound_inputs:
                    self._unbound_inputs.remove(input_value)
            return n
示例#4
0
    def add_workflow_output(
        self, output_name: str, p: Union[Promise, List[Promise], Dict[str, Promise]], python_type: Optional[Type] = None
    ):
        """
        Add an output with the given name from the given node output.
        """
        if output_name in self._python_interface.outputs:
            raise FlyteValidationException(f"Output {output_name} already exists in workflow {self.name}")

        if python_type is None:
            if type(p) == list or type(p) == dict:
                raise FlyteValidationException(
                    f"If specifying a list or dict of Promises, you must specify the python_type type for {output_name}"
                    f" starting with the container type (e.g. List[int]"
                )
            python_type = p.ref.node.flyte_entity.python_interface.outputs[p.var]
            logger.debug(f"Inferring python type for wf output {output_name} from Promise provided {python_type}")

        flyte_type = TypeEngine.to_literal_type(python_type=python_type)

        ctx = FlyteContext.current_context()
        if ctx.compilation_state is not None:
            raise Exception("Can't already be compiling")
        with FlyteContextManager.with_context(ctx.with_compilation_state(self.compilation_state)) as ctx:
            b = binding_from_python_std(
                ctx, output_name, expected_literal_type=flyte_type, t_value=p, t_value_type=python_type
            )
            self._output_bindings.append(b)
            self._python_interface = self._python_interface.with_outputs(extra_outputs={output_name: python_type})
            self._interface = transform_interface_to_typed_interface(self._python_interface)
示例#5
0
    def __call__(self, *args, **kwargs):
        # When a Task is () aka __called__, there are three things we may do:
        #  a. Plain execution Mode - just run the execute function. If not overridden, we should raise an exception
        #  b. Compilation Mode - this happens when the function is called as part of a workflow (potentially
        #     dynamic task). Produce promise objects and create a node.
        #  c. Workflow Execution Mode - when a workflow is being run locally. Even though workflows are functions
        #     and everything should be able to be passed through naturally, we'll want to wrap output values of the
        #     function into objects, so that potential .with_cpu or other ancillary functions can be attached to do
        #     nothing. Subsequent tasks will have to know how to unwrap these. If by chance a non-Flyte task uses a
        #     task output as an input, things probably will fail pretty obviously.
        #     Since this is a reference entity, it still needs to be mocked otherwise an exception will be raised.
        if len(args) > 0:
            raise _user_exceptions.FlyteAssertion(
                f"Cannot call reference entity with args - detected {len(args)} positional args {args}"
            )

        ctx = FlyteContext.current_context()
        if ctx.compilation_state is not None and ctx.compilation_state.mode == 1:
            return self.compile(ctx, *args, **kwargs)
        elif (ctx.execution_state is not None and ctx.execution_state.mode
              == ExecutionState.Mode.LOCAL_WORKFLOW_EXECUTION):
            if ctx.execution_state.branch_eval_mode == BranchEvalMode.BRANCH_SKIPPED:
                return
            return self._local_execute(ctx, **kwargs)
        else:
            logger.debug("Reference entity - running raw execute")
            return self.execute(**kwargs)
    def dynamic_execute(self, task_function: Callable, **kwargs) -> Any:
        """
        By the time this function is invoked, the _local_execute function should have unwrapped the Promises and Flyte
        literal wrappers so that the kwargs we are working with here are now Python native literal values. This
        function is also expected to return Python native literal values.

        Since the user code within a dynamic task constitute a workflow, we have to first compile the workflow, and
        then execute that workflow.

        When running for real in production, the task would stop after the compilation step, and then create a file
        representing that newly generated workflow, instead of executing it.
        """
        ctx = FlyteContext.current_context()

        if ctx.execution_state and ctx.execution_state.mode == ExecutionState.Mode.LOCAL_WORKFLOW_EXECUTION:
            with ctx.new_execution_context(ExecutionState.Mode.TASK_EXECUTION):
                logger.info("Executing Dynamic workflow, using raw inputs")
                return task_function(**kwargs)

        if ctx.execution_state and ctx.execution_state.mode == ExecutionState.Mode.TASK_EXECUTION:
            is_fast_execution = bool(
                ctx.execution_state and ctx.execution_state.additional_context
                and ctx.execution_state.additional_context.get(
                    "dynamic_addl_distro"))
            return self.compile_into_workflow(ctx, is_fast_execution,
                                              task_function, **kwargs)
示例#7
0
    def create(
        cls,
        name: str,
        workflow: _annotated_workflow.WorkflowBase,
        default_inputs: Dict[str, Any] = None,
        fixed_inputs: Dict[str, Any] = None,
        schedule: _schedule_model.Schedule = None,
        notifications: List[_common_models.Notification] = None,
        auth_role: _common_models.AuthRole = None,
    ) -> LaunchPlan:
        ctx = FlyteContext.current_context()
        default_inputs = default_inputs or {}
        fixed_inputs = fixed_inputs or {}
        # Default inputs come from two places, the original signature of the workflow function, and the default_inputs
        # argument to this function. We'll take the latter as having higher precedence.
        wf_signature_parameters = transform_inputs_to_parameters(
            ctx, workflow.python_interface)

        # Construct a new Interface object with just the default inputs given to get Parameters, maybe there's an
        # easier way to do this, think about it later.
        temp_inputs = {}
        for k, v in default_inputs.items():
            temp_inputs[k] = (workflow.python_interface.inputs[k], v)
        temp_interface = Interface(inputs=temp_inputs, outputs={})
        temp_signature = transform_inputs_to_parameters(ctx, temp_interface)
        wf_signature_parameters._parameters.update(temp_signature.parameters)

        # These are fixed inputs that cannot change at launch time. If the same argument is also in default inputs,
        # it'll be taken out from defaults in the LaunchPlan constructor
        fixed_literals = translate_inputs_to_literals(
            ctx,
            incoming_values=fixed_inputs,
            flyte_interface_types=workflow.interface.inputs,
            native_types=workflow.python_interface.inputs,
        )
        fixed_lm = _literal_models.LiteralMap(literals=fixed_literals)

        lp = cls(
            name=name,
            workflow=workflow,
            parameters=wf_signature_parameters,
            fixed_inputs=fixed_lm,
            schedule=schedule,
            notifications=notifications,
            auth_role=auth_role,
        )

        # This is just a convenience - we'll need the fixed inputs LiteralMap for when serializing the Launch Plan out
        # to protobuf, but for local execution and such, why not save the original Python native values as well so
        # we don't have to reverse it back every time.
        default_inputs.update(fixed_inputs)
        lp._saved_inputs = default_inputs

        if name in cls.CACHE:
            raise AssertionError(
                f"Launch plan named {name} was already created! Make sure your names are unique."
            )
        cls.CACHE[name] = lp
        return lp
示例#8
0
def test_zero_floats():
    ctx = FlyteContext.current_context()

    l0 = Literal(scalar=Scalar(primitive=Primitive(integer=0)))
    l1 = Literal(scalar=Scalar(primitive=Primitive(float_value=0.0)))

    assert TypeEngine.to_python_value(ctx, l0, float) == 0
    assert TypeEngine.to_python_value(ctx, l1, float) == 0
示例#9
0
def test_list_transformer():
    l0 = Literal(scalar=Scalar(primitive=Primitive(integer=3)))
    l1 = Literal(scalar=Scalar(primitive=Primitive(integer=4)))
    lc = LiteralCollection(literals=[l0, l1])
    lit = Literal(collection=lc)

    ctx = FlyteContext.current_context()
    xx = TypeEngine.to_python_value(ctx, lit, typing.List[int])
    assert xx == [3, 4]
示例#10
0
文件: deck.py 项目: flyteorg/flytekit
def _output_deck(task_name: str, new_user_params: ExecutionParameters):
    ctx = FlyteContext.current_context()
    if ctx.execution_state.mode == ExecutionState.Mode.TASK_EXECUTION:
        output_dir = ctx.execution_state.engine_dir
    else:
        output_dir = ctx.file_access.get_random_local_directory()
    deck_path = os.path.join(output_dir, DECK_FILE_NAME)
    with open(deck_path, "w") as f:
        f.write(_get_deck(new_user_params))
    logger.info(
        f"{task_name} task creates flyte deck html to file://{deck_path}")
示例#11
0
 def start_branch(self, c: Case, last_case: bool = False) -> Case:
     """
     At the start of an execution of every branch this method should be called.
     :param c: -> the case that represents this branch
     :param last_case: -> a boolean that indicates if this is the last branch in the ifelseblock
     """
     self._last_case = last_case
     self._cases.append(c)
     ctx = FlyteContext.current_context()
     # In case of Local workflow execution, we will actually evaluate the expression and based on the result
     # make the branch to be active using `take_branch` method
     if ctx.execution_state and ctx.execution_state.mode == ExecutionState.Mode.LOCAL_WORKFLOW_EXECUTION:
         # This is a short-circuit for the case when the branch was taken
         # We already have a candidate case selected
         if self._selected_case is None:
             if c.expr is None or c.expr.eval() or last_case:
                 ctx = FlyteContext.current_context().execution_state
                 ctx.take_branch()
                 self._selected_case = self._cases[-1]
     return self._cases[-1]
示例#12
0
    def end_branch(self) -> Union[Condition, Promise]:
        """
        This should be invoked after every branch has been visited
        """
        ctx = FlyteContext.current_context()
        if ctx.execution_state and ctx.execution_state.mode == ExecutionState.Mode.LOCAL_WORKFLOW_EXECUTION:
            """
            In case of Local workflow execution, we should first mark the branch as complete, then
            Then we first check for if this is the last case,
            In case this is the last case, we return the output from the selected case - A case should always
            be selected (see start_branch)
            If this is not the last case, we should return the condition so that further chaining can be done
            """
            # Let us mark the execution state as complete
            ctx.execution_state.branch_complete()
            if self._last_case:
                ctx.execution_state.exit_conditional_section()
                if self._selected_case.output_promise is None and self._selected_case.err is None:
                    raise AssertionError("Bad conditional statements, did not resolve in a promise")
                elif self._selected_case.output_promise is not None:
                    return self._selected_case.output_promise
                raise ValueError(self._selected_case.err)
            return self._condition
        elif ctx.compilation_state:
            ########
            # COMPILATION MODE
            """
            In case this is not local workflow execution then, we should check if this is the last case.
            If so then return the promise, else return the condition
            """
            if self._last_case:
                ctx.compilation_state.exit_conditional_section()
                # branch_nodes = ctx.compilation_state.nodes
                node, promises = to_branch_node(self._name, self)
                # Verify branch_nodes == nodes in bn
                bindings: typing.List[Binding] = []
                upstream_nodes = set()
                for p in promises:
                    if not p.is_ready:
                        bindings.append(Binding(var=p.var, binding=BindingData(promise=p.ref)))
                        upstream_nodes.add(p.ref.node)

                n = Node(
                    id=f"{ctx.compilation_state.prefix}node-{len(ctx.compilation_state.nodes)}",
                    metadata=_core_wf.NodeMetadata(self._name, timeout=datetime.timedelta(), retries=RetryStrategy(0)),
                    bindings=sorted(bindings, key=lambda b: b.var),
                    upstream_nodes=list(upstream_nodes),  # type: ignore
                    flyte_entity=node,
                )
                ctx.compilation_state.add_node(n)
                return self._compute_outputs(n)
            return self._condition

        raise AssertionError("Branches can only be invoked within a workflow context!")
示例#13
0
    def _localize_feature_view(self, feature_view: FeatureView):
        """
        This function ensures that the `FeatureView` object points to files in the local disk
        """
        if not isinstance(feature_view.batch_source, FileSource):
            return

        # Copy parquet file to a local file
        file_source: FileSource = feature_view.batch_source
        random_local_path = (
            FlyteContext.current_context().file_access.get_random_local_path(
                file_source.path))
        FlyteContext.current_context().file_access.get_data(
            file_source.path,
            random_local_path,
            is_multipart=True,
        )
        feature_view.batch_source = FileSource(
            path=random_local_path,
            event_timestamp_column=file_source.event_timestamp_column,
        )
示例#14
0
 def get_type_for_output_var(self, k: str, v: Any) -> Optional[Type[Any]]:
     """
     We override this method from flytekit.core.base_task Task because the dispatch_execute method uses this
     interface to construct outputs. Each instance of an container_array task will however produce outputs
     according to the underlying run_task interface and the array plugin handler will actually create a collection
     from these individual outputs as the final output value.
     """
     ctx = FlyteContext.current_context()
     if ctx.execution_state is not None and ctx.execution_state.mode == ExecutionState.Mode.LOCAL_WORKFLOW_EXECUTION:
         # In workflow execution mode we actually need to use the parent (mapper) task output interface.
         return self._python_interface.outputs[k]
     return self._run_task._python_interface.outputs[k]
    def load_task(self, loader_args: List[str]) -> ExecutableTemplateShimTask:
        logger.info(f"Task template loader args: {loader_args}")
        ctx = FlyteContext.current_context()
        task_template_local_path = os.path.join(
            ctx.execution_state.working_dir, "task_template.pb")
        ctx.file_access.get_data(loader_args[0], task_template_local_path)
        task_template_proto = common_utils.load_proto_from_file(
            _tasks_pb2.TaskTemplate, task_template_local_path)
        task_template_model = _task_model.TaskTemplate.from_flyte_idl(
            task_template_proto)

        executor_class = load_object_from_module(loader_args[1])
        return ExecutableTemplateShimTask(task_template_model, executor_class)
示例#16
0
    def _outputs_interface(self) -> Dict[Any, Variable]:
        """
        We override this method from PythonTask because the dispatch_execute method uses this
        interface to construct outputs. Each instance of an container_array task will however produce outputs
        according to the underlying run_task interface and the array plugin handler will actually create a collection
        from these individual outputs as the final output value.
        """

        ctx = FlyteContext.current_context()
        if ctx.execution_state is not None and ctx.execution_state.mode == ExecutionState.Mode.LOCAL_WORKFLOW_EXECUTION:
            # In workflow execution mode we actually need to use the parent (mapper) task output interface.
            return self.interface.outputs
        return self._run_task.interface.outputs
示例#17
0
    def get_or_create(
        cls,
        workflow: _annotated_workflow.WorkflowBase,
        name: Optional[str] = None,
        default_inputs: Dict[str, Any] = None,
        fixed_inputs: Dict[str, Any] = None,
        schedule: _schedule_model.Schedule = None,
        notifications: List[_common_models.Notification] = None,
        auth_role: _common_models.AuthRole = None,
    ) -> LaunchPlan:
        """
        This function offers a friendlier interface for creating launch plans. If the name for the launch plan is not
        supplied, this assumes you are looking for the default launch plan for the workflow. If it is specified, it
        will be used. If creating the default launch plan, none of the other arguments may be specified.

        The resulting launch plan is also cached and if called again with the same name, the
        cached version is returned

        :param workflow: The Workflow to create a launch plan for.
        :param name: If you supply a name, keep it mind it needs to be unique. That is, project, domain, version, and
          this name form a primary key. If you do not supply a name, this function will assume you want the default
          launch plan for the given workflow.
        :param default_inputs: Default inputs, expressed as Python values.
        :param fixed_inputs: Fixed inputs, expressed as Python values. At call time, these cannot be changed.
        :param schedule: Optional schedule to run on.
        :param notifications: Notifications to send.
        :param auth_role: Add an auth role if necessary.
        """
        if name is None and (default_inputs is not None or fixed_inputs
                             is not None or schedule is not None
                             or notifications is not None
                             or auth_role is not None):
            raise ValueError(
                "Only named launchplans can be created that have other properties. Drop the name if you want to create a default launchplan. Default launchplans cannot have any other associations"
            )

        if name is not None and name in LaunchPlan.CACHE:
            # TODO: Add checking of the other arguments (default_inputs, fixed_inputs, etc.) to make sure they match
            return LaunchPlan.CACHE[name]
        elif name is None and workflow.name in LaunchPlan.CACHE:
            return LaunchPlan.CACHE[workflow.name]

        # Otherwise, handle the default launch plan case
        if name is None:
            ctx = FlyteContext.current_context()
            lp = cls.get_default_launch_plan(ctx, workflow)
        else:
            lp = cls.create(name, workflow, default_inputs, fixed_inputs,
                            schedule, notifications, auth_role)
        LaunchPlan.CACHE[name or workflow.name] = lp
        return lp
示例#18
0
def current_context() -> ExecutionParameters:
    """
    Use this method to get a handle of specific parameters available in a flyte task.

    Usage

    .. code-block::

        flytekit.current_context().logging.info(...)

    Available params are documented in :py:class:`flytekit.core.context_manager.ExecutionParams`.
    There are some special params, that should be available
    """
    return FlyteContext.current_context().user_space_params
示例#19
0
 def __init__(self, lhs: Union["Promise", Any], op: ComparisonOps,
              rhs: Union["Promise", Any]):
     self._op = op
     self._lhs = None
     self._rhs = None
     if isinstance(lhs, Promise):
         self._lhs = lhs
         if lhs.is_ready:
             if lhs.val.scalar is None or lhs.val.scalar.primitive is None:
                 raise ValueError(
                     "Only primitive values can be used in comparison")
     if isinstance(rhs, Promise):
         self._rhs = rhs
         if rhs.is_ready:
             if rhs.val.scalar is None or rhs.val.scalar.primitive is None:
                 raise ValueError(
                     "Only primitive values can be used in comparison")
     if self._lhs is None:
         self._lhs = type_engine.TypeEngine.to_literal(
             FlyteContext.current_context(), lhs, type(lhs), None)
     if self._rhs is None:
         self._rhs = type_engine.TypeEngine.to_literal(
             FlyteContext.current_context(), rhs, type(rhs), None)
示例#20
0
def test_file_format_getting_python_value():
    transformer = TypeEngine.get_transformer(FlyteFile)

    ctx = FlyteContext.current_context()

    # This file probably won't exist, but it's okay. It won't be downloaded unless we try to read the thing returned
    lv = Literal(scalar=Scalar(blob=Blob(metadata=BlobMetadata(
        type=BlobType(format="txt", dimensionality=0)),
                                         uri="file:///tmp/test")))

    pv = transformer.to_python_value(ctx,
                                     lv,
                                     expected_python_type=FlyteFile["txt"])
    assert isinstance(pv, FlyteFile)
    assert pv.extension() == "txt"
示例#21
0
    def __call__(self, *args, **kwargs):
        if len(args) > 0:
            raise AssertionError("Only Keyword Arguments are supported for launch plan executions")

        ctx = FlyteContext.current_context()
        if ctx.compilation_state is not None:
            inputs = self.saved_inputs
            inputs.update(kwargs)
            return create_and_link_node(ctx, entity=self, interface=self.workflow._native_interface, **inputs)
        else:
            # Calling a launch plan should just forward the call to the workflow, nothing more. But let's add in the
            # saved inputs.
            inputs = self.saved_inputs
            inputs.update(kwargs)
            return self.workflow(*args, **inputs)
示例#22
0
def test_protos():
    ctx = FlyteContext.current_context()

    pb = errors_pb2.ContainerError(code="code", message="message")
    lt = TypeEngine.to_literal_type(errors_pb2.ContainerError)
    assert lt.simple == SimpleType.STRUCT
    assert lt.metadata["pb_type"] == "flyteidl.core.errors_pb2.ContainerError"

    lit = TypeEngine.to_literal(ctx, pb, errors_pb2.ContainerError, lt)
    new_python_val = TypeEngine.to_python_value(ctx, lit, errors_pb2.ContainerError)
    assert new_python_val == pb

    # Test error
    l0 = Literal(scalar=Scalar(primitive=Primitive(integer=4)))
    with pytest.raises(AssertionError):
        TypeEngine.to_python_value(ctx, l0, errors_pb2.ContainerError)
示例#23
0
def test_additional_context():
    with FlyteContext.current_context() as ctx:
        with ctx.new_execution_context(mode=ExecutionState.Mode.TASK_EXECUTION,
                                       additional_context={
                                           1: "outer",
                                           2: "foo"
                                       }) as exec_ctx_outer:
            with exec_ctx_outer.new_execution_context(
                    mode=ExecutionState.Mode.TASK_EXECUTION,
                    additional_context={
                        1: "inner",
                        3: "baz"
                    }) as exec_ctx_inner:
                assert exec_ctx_inner.execution_state.additional_context == {
                    1: "inner",
                    2: "foo",
                    3: "baz"
                }
示例#24
0
    def __call__(self, *args, **kwargs):
        # When a Task is () aka __called__, there are three things we may do:
        #  a. Task Execution Mode - just run the Python function as Python normally would. Flyte steps completely
        #     out of the way.
        #  b. Compilation Mode - this happens when the function is called as part of a workflow (potentially
        #     dynamic task?). Instead of running the user function, produce promise objects and create a node.
        #  c. Workflow Execution Mode - when a workflow is being run locally. Even though workflows are functions
        #     and everything should be able to be passed through naturally, we'll want to wrap output values of the
        #     function into objects, so that potential .with_cpu or other ancillary functions can be attached to do
        #     nothing. Subsequent tasks will have to know how to unwrap these. If by chance a non-Flyte task uses a
        #     task output as an input, things probably will fail pretty obviously.
        if len(args) > 0:
            raise _user_exceptions.FlyteAssertion(
                f"When calling tasks, only keyword args are supported. "
                f"Aborting execution as detected {len(args)} positional args {args}"
            )

        ctx = FlyteContext.current_context()
        if ctx.compilation_state is not None and ctx.compilation_state.mode == 1:
            return self.compile(ctx, *args, **kwargs)
        elif (ctx.execution_state is not None and ctx.execution_state.mode
              == ExecutionState.Mode.LOCAL_WORKFLOW_EXECUTION):
            if ctx.execution_state.branch_eval_mode == BranchEvalMode.BRANCH_SKIPPED:
                if self.python_interface and self.python_interface.output_tuple_name:
                    variables = [
                        k for k in self.python_interface.outputs.keys()
                    ]
                    output_tuple = collections.namedtuple(
                        self.python_interface.output_tuple_name, variables)
                    nones = [
                        None for _ in self.python_interface.outputs.keys()
                    ]
                    return output_tuple(*nones)
                else:
                    # Should we return multiple None's here?
                    return None
            return self._local_execute(ctx, **kwargs)
        else:
            logger.warning("task run without context - executing raw function")
            new_user_params = self.pre_execute(ctx.user_space_params)
            with ctx.new_execution_context(
                    mode=ExecutionState.Mode.LOCAL_TASK_EXECUTION,
                    execution_params=new_user_params):
                return self.execute(**kwargs)
示例#25
0
 def if_(self, expr: bool) -> Case:
     ctx = FlyteContext.current_context()
     if ctx.execution_state:
         if ctx.execution_state.branch_eval_mode is not None:
             """
             TODO implement nested branches
             """
             raise NotImplementedError("Nested branches are not yet supported")
         if ctx.execution_state.mode == ExecutionState.Mode.LOCAL_WORKFLOW_EXECUTION:
             """
             In case of local workflow execution we should ensure a conditional section
             is created so that skipped branches result in tasks not being executed
             """
             ctx.execution_state.enter_conditional_section()
     elif ctx.compilation_state:
         if ctx.compilation_state.is_in_a_branch():
             """
             TODO implement nested branches
             """
             raise NotImplementedError("Nested branches are not yet supported")
         ctx.compilation_state.enter_conditional_section()
     return self._condition._if(expr)
示例#26
0
def test_additional_context():
    ctx = FlyteContext.current_context()
    with FlyteContextManager.with_context(
            ctx.with_execution_state(ctx.new_execution_state().with_params(
                mode=ExecutionState.Mode.TASK_EXECUTION,
                additional_context={
                    1: "outer",
                    2: "foo"
                }))) as exec_ctx_outer:
        with FlyteContextManager.with_context(
                ctx.with_execution_state(
                    exec_ctx_outer.execution_state.with_params(
                        mode=ExecutionState.Mode.TASK_EXECUTION,
                        additional_context={
                            1: "inner",
                            3: "baz"
                        }))) as exec_ctx_inner:
            assert exec_ctx_inner.execution_state.additional_context == {
                1: "inner",
                2: "foo",
                3: "baz"
            }
示例#27
0
    def add_entity(self, entity: Union[PythonTask, LaunchPlan, WorkflowBase],
                   **kwargs) -> Node:
        """
        Anytime you add an entity, all the inputs to the entity must be bound.
        """
        # circular import
        from flytekit.core.node_creation import create_node

        ctx = FlyteContext.current_context()
        if ctx.compilation_state is not None:
            raise Exception("Can't already be compiling")
        with FlyteContextManager.with_context(
                ctx.with_compilation_state(self.compilation_state)) as ctx:
            n = create_node(entity=entity, **kwargs)

            def get_input_values(input_value):
                if isinstance(input_value, list):
                    input_promises = []
                    for x in input_value:
                        input_promises.extend(get_input_values(x))
                    return input_promises
                if isinstance(input_value, dict):
                    input_promises = []
                    for _, v in input_value.items():
                        input_promises.extend(get_input_values(v))
                    return input_promises
                else:
                    return [input_value]

            # Every time an entity is added, mark it as used. The above function though will gather all the input
            # values but we're only interested in the ones that are Promises so let's filter for those.
            # There's probably a way to clean this up, maybe key off of the name instead of value?
            all_input_values = get_input_values(kwargs)
            for input_value in filter(lambda x: isinstance(x, Promise),
                                      all_input_values):
                if input_value in self._unbound_inputs:
                    self._unbound_inputs.remove(input_value)
            return n
示例#28
0
def create_node(
        entity: Union[PythonTask, LaunchPlan, WorkflowBase,
                      RemoteEntity], *args,
        **kwargs) -> Union[Node, VoidPromise, Type[collections.namedtuple]]:
    """
    This is the function you want to call if you need to specify dependencies between tasks that don't consume and/or
    don't produce outputs. For example, if you have t1() and t2(), both of which do not take in nor produce any
    outputs, how do you specify that t2 should run before t1?

        t1_node = create_node(t1)
        t2_node = create_node(t2)

        t2_node.runs_before(t1_node)
        # OR
        t2_node >> t1_node

    This works for tasks that take inputs as well, say a ``t3(in1: int)``

        t3_node = create_node(t3, in1=some_int)  # basically calling t3(in1=some_int)

    You can still use this method to handle setting certain overrides

        t3_node = create_node(t3, in1=some_int).with_overrides(...)

    Outputs, if there are any, will be accessible. A `t4() -> (int, str)`

        t4_node = create_node(t4)

        in compilation node.o0 has the promise.
        t5(in1=t4_node.o0)

        in local workflow execution, what is the node?  Can it just be the named tuple?
        t5(in1=t4_node.o0)

    @workflow
    def wf():
        create_node(sub_wf)
        create_node(wf2)

    @dynamic
    def sub_wf():
        create_node(other_sub)
        create_node(task)

    If t1 produces only one output, note that in local execution, you still get a wrapper object that
    needs to be dereferenced by the output name.

        t1_node = create_node(t1)
        t2(t1_node.o0)

    """
    from flytekit.remote.remote_callable import RemoteEntity

    if len(args) > 0:
        raise _user_exceptions.FlyteAssertion(
            f"Only keyword args are supported to pass inputs to workflows and tasks."
            f"Aborting execution as detected {len(args)} positional args {args}"
        )

    if (not isinstance(entity, PythonTask)
            and not isinstance(entity, WorkflowBase)
            and not isinstance(entity, LaunchPlan)
            and not isinstance(entity, RemoteEntity)):
        raise AssertionError(
            f"Should be a callable Flyte entity (either local or fetched) but is {type(entity)}"
        )

    # This function is only called from inside workflows and dynamic tasks.
    # That means there are two scenarios we need to take care of, compilation and local workflow execution.

    # When compiling, calling the entity will create a node.
    ctx = FlyteContext.current_context()
    if ctx.compilation_state is not None and ctx.compilation_state.mode == 1:
        outputs = entity(**kwargs)
        # This is always the output of create_and_link_node which returns create_task_output, which can be
        # VoidPromise, Promise, or our custom namedtuple of Promises.
        node = ctx.compilation_state.nodes[-1]

        # In addition to storing the outputs on the object itself, we also want to set them in a map. When used by
        # the imperative workflow patterns, users will probably find themselves doing things like
        #   n = create_node(...)  # then
        #   output_name = "o0"
        #   n.outputs[output_name]  # rather than
        #   n.o0
        # That is, they'll likely have the name of the output stored as a string variable, and dicts provide cleaner
        # access than getattr
        node._outputs = {}

        # If a VoidPromise, just return the node.
        if isinstance(outputs, VoidPromise):
            return node

        # If a Promise or custom namedtuple of Promises, we need to attach each output as an attribute to the node.
        # todo: fix the noqas below somehow... can't add abstract property to RemoteEntity because it has to come
        #  before the model Template classes in FlyteTask/Workflow/LaunchPlan
        if entity.interface.outputs:  # noqa
            if isinstance(outputs, tuple):
                for output_name in entity.interface.outputs.keys():  # noqa
                    attr = getattr(outputs, output_name)
                    if attr is None:
                        raise _user_exceptions.FlyteAssertion(
                            f"Output {output_name} in outputs when calling {entity.name} is empty {attr}."
                        )
                    if hasattr(node, output_name):
                        raise _user_exceptions.FlyteAssertion(
                            f"Node {node} already has attribute {output_name}, change the name of output."
                        )
                    setattr(node, output_name, attr)
                    node.outputs[output_name] = attr
            else:
                output_names = [k for k in entity.interface.outputs.keys()
                                ]  # noqa
                if len(output_names) != 1:
                    raise _user_exceptions.FlyteAssertion(
                        f"Output of length 1 expected but {len(output_names)} found"
                    )

                if hasattr(node, output_names[0]):
                    raise _user_exceptions.FlyteAssertion(
                        f"Node {node} already has attribute {output_names[0]}, change the name of output."
                    )

                setattr(node, output_names[0],
                        outputs)  # This should be a singular Promise
                node.outputs[output_names[0]] = outputs

        return node

    # Handling local execution
    elif ctx.execution_state is not None and ctx.execution_state.mode == ExecutionState.Mode.LOCAL_WORKFLOW_EXECUTION:
        if isinstance(entity, RemoteEntity):
            raise AssertionError(
                f"Remote entities are not yet runnable locally {entity.name}")

        if ctx.execution_state.branch_eval_mode == BranchEvalMode.BRANCH_SKIPPED:
            logger.warning(
                f"Manual node creation cannot be used in branch logic {entity.name}"
            )
            raise Exception(
                "Being more restrictive for now and disallowing manual node creation in branch logic"
            )

        # This the output of __call__ under local execute conditions which means this is the output of local_execute
        # which means this is the output of create_task_output with Promises containing values (or a VoidPromise)
        results = entity(**kwargs)

        # If it's a VoidPromise, let's just return it, it shouldn't get used anywhere and if it does, we want an error
        # The reason we return it if it's a tuple is to handle the case where the task returns a typing.NamedTuple.
        # In that case, it's already a tuple and we don't need to further tupletize.
        if isinstance(results, VoidPromise) or isinstance(results, tuple):
            return results

        output_names = entity.python_interface.output_names

        if not output_names:
            raise Exception(
                f"Non-VoidPromise received {results} but interface for {entity.name} doesn't have outputs"
            )

        if len(output_names) == 1:
            # See explanation above for why we still tupletize a single element.
            return entity.python_interface.output_tuple(results)

        return entity.python_interface.output_tuple(*results)

    else:
        raise Exception(
            f"Cannot use explicit run to call Flyte entities {entity.name}")
示例#29
0
def test_default():
    ctx = FlyteContext.current_context()
    assert ctx.file_access is not None
示例#30
0
def test_dict_transformer():
    d = DictTransformer()

    def assert_struct(lit: LiteralType):
        assert lit is not None
        assert lit.simple == SimpleType.STRUCT

    def recursive_assert(lit: LiteralType,
                         expected: LiteralType,
                         expected_depth: int = 1,
                         curr_depth: int = 0):
        assert curr_depth <= expected_depth
        assert lit is not None
        if lit.map_value_type is None:
            assert lit == expected
            return
        recursive_assert(lit.map_value_type, expected, expected_depth,
                         curr_depth + 1)

    # Type inference
    assert_struct(d.get_literal_type(dict))
    assert_struct(d.get_literal_type(typing.Dict[int, int]))
    recursive_assert(d.get_literal_type(typing.Dict[str, str]),
                     LiteralType(simple=SimpleType.STRING))
    recursive_assert(d.get_literal_type(typing.Dict[str, int]),
                     LiteralType(simple=SimpleType.INTEGER))
    recursive_assert(d.get_literal_type(typing.Dict[str, datetime.datetime]),
                     LiteralType(simple=SimpleType.DATETIME))
    recursive_assert(d.get_literal_type(typing.Dict[str, datetime.timedelta]),
                     LiteralType(simple=SimpleType.DURATION))
    recursive_assert(d.get_literal_type(typing.Dict[str, dict]),
                     LiteralType(simple=SimpleType.STRUCT))
    recursive_assert(
        d.get_literal_type(typing.Dict[str, typing.Dict[str, str]]),
        LiteralType(simple=SimpleType.STRING),
        expected_depth=2,
    )
    recursive_assert(
        d.get_literal_type(typing.Dict[str, typing.Dict[int, str]]),
        LiteralType(simple=SimpleType.STRUCT),
        expected_depth=2,
    )
    recursive_assert(
        d.get_literal_type(typing.Dict[str, typing.Dict[str,
                                                        typing.Dict[str,
                                                                    str]]]),
        LiteralType(simple=SimpleType.STRING),
        expected_depth=3,
    )
    recursive_assert(
        d.get_literal_type(typing.Dict[str, typing.Dict[str,
                                                        typing.Dict[str,
                                                                    dict]]]),
        LiteralType(simple=SimpleType.STRUCT),
        expected_depth=3,
    )
    recursive_assert(
        d.get_literal_type(typing.Dict[str, typing.Dict[str,
                                                        typing.Dict[int,
                                                                    dict]]]),
        LiteralType(simple=SimpleType.STRUCT),
        expected_depth=2,
    )

    ctx = FlyteContext.current_context()

    lit = d.to_literal(ctx, {}, typing.Dict, LiteralType(SimpleType.STRUCT))
    pv = d.to_python_value(ctx, lit, typing.Dict)
    assert pv == {}

    # Literal to python
    with pytest.raises(TypeError):
        d.to_python_value(
            ctx, Literal(scalar=Scalar(primitive=Primitive(integer=10))), dict)
    with pytest.raises(TypeError):
        d.to_python_value(ctx, Literal(), dict)
    with pytest.raises(TypeError):
        d.to_python_value(ctx, Literal(map=LiteralMap(literals={"x": None})),
                          dict)
    with pytest.raises(TypeError):
        d.to_python_value(ctx, Literal(map=LiteralMap(literals={"x": None})),
                          typing.Dict[int, str])

    d.to_python_value(
        ctx,
        Literal(map=LiteralMap(
            literals={
                "x": Literal(scalar=Scalar(primitive=Primitive(integer=1)))
            })),
        typing.Dict[str, int],
    )