Exemplo n.º 1
0
    def _local_execute(self, ctx: FlyteContext, **kwargs) -> Union[Tuple[Promise], Promise, VoidPromise]:
        """
        Please see the _local_execute comments in the main task.
        """
        # Unwrap the kwargs values. After this, we essentially have a LiteralMap
        # The reason why we need to do this is because the inputs during local execute can be of 2 types
        #  - Promises or native constants
        #  Promises as essentially inputs from previous task executions
        #  native constants are just bound to this specific task (default values for a task input)
        #  Also alongwith promises and constants, there could be dictionary or list of promises or constants
        kwargs = translate_inputs_to_literals(
            ctx,
            incoming_values=kwargs,
            flyte_interface_types=self.interface.inputs,
            native_types=self.python_interface.inputs,
        )
        input_literal_map = _literal_models.LiteralMap(literals=kwargs)

        outputs_literal_map = self.unwrap_literal_map_and_execute(ctx, input_literal_map)

        # After running, we again have to wrap the outputs, if any, back into Promise objects
        outputs_literals = outputs_literal_map.literals
        output_names = list(self.python_interface.outputs.keys())
        if len(output_names) != len(outputs_literals):
            # Length check, clean up exception
            raise AssertionError(f"Length difference {len(output_names)} {len(outputs_literals)}")

        # Tasks that don't return anything still return a VoidPromise
        if len(output_names) == 0:
            return VoidPromise(self.name)

        vals = [Promise(var, outputs_literals[var]) for var in output_names]
        return create_task_output(vals, self.python_interface)
Exemplo n.º 2
0
    def _local_execute(self, ctx: FlyteContext, **kwargs) -> Union[Tuple[Promise], Promise, VoidPromise]:
        """
        This code is used only in the case when we want to dispatch_execute with outputs from a previous node
        For regular execution, dispatch_execute is invoked directly.
        """
        # Unwrap the kwargs values. After this, we essentially have a LiteralMap
        # The reason why we need to do this is because the inputs during local execute can be of 2 types
        #  - Promises or native constants
        #  Promises as essentially inputs from previous task executions
        #  native constants are just bound to this specific task (default values for a task input)
        #  Also alongwith promises and constants, there could be dictionary or list of promises or constants
        kwargs = translate_inputs_to_literals(
            ctx, input_kwargs=kwargs, interface=self.interface, native_input_types=self.get_input_types()
        )
        input_literal_map = _literal_models.LiteralMap(literals=kwargs)

        outputs_literal_map = self.dispatch_execute(ctx, input_literal_map)
        outputs_literals = outputs_literal_map.literals

        # TODO maybe this is the part that should be done for local execution, we pass the outputs to some special
        #    location, otherwise we dont really need to right? The higher level execute could just handle literalMap
        # After running, we again have to wrap the outputs, if any, back into Promise objects
        output_names = list(self.interface.outputs.keys())
        if len(output_names) != len(outputs_literals):
            # Length check, clean up exception
            raise AssertionError(f"Length difference {len(output_names)} {len(outputs_literals)}")

        # Tasks that don't return anything still return a VoidPromise
        if len(output_names) == 0:
            return VoidPromise(self.name)

        vals = [Promise(var, outputs_literals[var]) for var in output_names]
        return create_task_output(vals, self.python_interface)
Exemplo n.º 3
0
def test_translate_inputs_to_literals_with_wrong_types():
    ctx = context_manager.FlyteContext.current_context()
    with pytest.raises(TypeError, match="Not a map type union_type"):

        @task
        def t1(a: typing.Union[float, typing.List[int]]):
            print(a)

        translate_inputs_to_literals(ctx, {"a": {"a": 3}}, t1.interface.inputs, t1.python_interface.inputs)

    with pytest.raises(TypeError, match="Not a collection type union_type"):

        @task
        def t1(a: typing.Union[float, typing.Dict[str, int]]):
            print(a)

        translate_inputs_to_literals(ctx, {"a": [1, 2, 3]}, t1.interface.inputs, t1.python_interface.inputs)

    with pytest.raises(
        AssertionError, match="Outputs of a non-output producing task n0 cannot be passed to another task"
    ):

        @task
        def t1(a: typing.Union[float, typing.Dict[str, int]]):
            print(a)

        translate_inputs_to_literals(ctx, {"a": VoidPromise("n0")}, t1.interface.inputs, t1.python_interface.inputs)
Exemplo n.º 4
0
    def _local_execute(self, ctx: FlyteContext, **kwargs) -> Union[Tuple[Promise], Promise, VoidPromise]:
        # This is done to support the invariant that Workflow local executions always work with Promise objects
        # holding Flyte literal values. Even in a wf, a user can call a sub-workflow with a Python native value.
        for k, v in kwargs.items():
            if not isinstance(v, Promise):
                t = self.python_interface.inputs[k]
                kwargs[k] = Promise(var=k, val=TypeEngine.to_literal(ctx, v, t, self.interface.inputs[k].type))

        # The output of this will always be a combination of Python native values and Promises containing Flyte
        # Literals.
        function_outputs = self.execute(**kwargs)

        # First handle the empty return case.
        # A workflow function may return a task that doesn't return anything
        #   def wf():
        #       return t1()
        # or it may not return at all
        #   def wf():
        #       t1()
        # In the former case we get the task's VoidPromise, in the latter we get None
        if isinstance(function_outputs, VoidPromise) or function_outputs is None:
            if len(self.python_interface.outputs) != 0:
                raise FlyteValueException(
                    function_outputs,
                    f"{function_outputs} received but interface has {len(self.python_interface.outputs)} outputs.",
                )
            return VoidPromise(self.name)

        # Because we should've already returned in the above check, we just raise an error here.
        if len(self.python_interface.outputs) == 0:
            raise FlyteValueException(
                function_outputs, f"{function_outputs} received but should've been VoidPromise or None."
            )

        expected_output_names = list(self.python_interface.outputs.keys())
        if len(expected_output_names) == 1:
            # Here we have to handle the fact that the wf could've been declared with a typing.NamedTuple of
            # length one. That convention is used for naming outputs - and single-length-NamedTuples are
            # particularly troublesome but elegant handling of them is not a high priority
            # Again, we're using the output_tuple_name as a proxy.
            if self.python_interface.output_tuple_name and isinstance(function_outputs, tuple):
                wf_outputs_as_map = {expected_output_names[0]: function_outputs[0]}
            else:
                wf_outputs_as_map = {expected_output_names[0]: function_outputs}
        else:
            wf_outputs_as_map = {expected_output_names[i]: function_outputs[i] for i, _ in enumerate(function_outputs)}

        # Basically we need to repackage the promises coming from the tasks into Promises that match the workflow's
        # interface. We do that by extracting out the literals, and creating new Promises
        wf_outputs_as_literal_dict = translate_inputs_to_literals(
            ctx,
            wf_outputs_as_map,
            flyte_interface_types=self.interface.outputs,
            native_types=self.python_interface.outputs,
        )
        # Recreate new promises that use the workflow's output names.
        new_promises = [Promise(var, wf_outputs_as_literal_dict[var]) for var in expected_output_names]

        return create_task_output(new_promises, self.python_interface)
Exemplo n.º 5
0
 def _compute_outputs(
         self,
         n: Node) -> Optional[Union[Promise, Tuple[Promise], VoidPromise]]:
     curr = self.compute_output_vars()
     if curr is None or len(curr) == 0:
         return VoidPromise(n.id, NodeOutput(node=n, var="placeholder"))
     promises = [
         Promise(var=x, val=NodeOutput(node=n, var=x)) for x in curr
     ]
     # TODO: Is there a way to add the Python interface here? Currently, it's an optional arg.
     return create_task_output(promises)
Exemplo n.º 6
0
 def end_branch(
     self
 ) -> Optional[Union[Condition, Tuple[Promise], Promise, VoidPromise]]:
     """
     This should be invoked after every branch has been visited
     """
     if self._last_case:
         FlyteContextManager.pop_context()
         curr = self.compute_output_vars()
         if curr is None:
             return VoidPromise(self.name)
         promises = [Promise(var=x, val=None) for x in curr]
         return create_task_output(promises)
     return self._condition
Exemplo n.º 7
0
 def _compute_outputs(
     self, selected_output_promise
 ) -> Optional[Union[Tuple[Promise], Promise, VoidPromise]]:
     """
     For the local execution case only returns the least common set of outputs
     """
     curr = self.compute_output_vars()
     if curr is None:
         return VoidPromise(self.name)
     if not isinstance(selected_output_promise, tuple):
         selected_output_promise = (selected_output_promise, )
     promises = [
         Promise(var=x, val=v.val)
         for x, v in zip(curr, selected_output_promise)
     ]
     return create_task_output(promises)
Exemplo n.º 8
0
 def _compute_outputs(self, n: Node) -> Union[Promise, Tuple[Promise], VoidPromise]:
     output_var_sets: typing.List[typing.Set[str]] = []
     for c in self._cases:
         if c.output_promise is None and c.err is None:
             # One node returns a void output and no error, we will default to a
             # Void output
             return VoidPromise(n.id)
         if c.output_promise is not None:
             if isinstance(c.output_promise, tuple):
                 output_var_sets.append(set([i.var for i in c.output_promise]))
             else:
                 output_var_sets.append(set([c.output_promise.var]))
     curr = output_var_sets[0]
     if len(output_var_sets) > 1:
         for x in output_var_sets[1:]:
             curr = curr.intersection(x)
     promises = [Promise(var=x, val=NodeOutput(node=n, var=x)) for x in curr]
     # TODO: Is there a way to add the Python interface here? Currently, it's an optional arg.
     return create_task_output(promises)
Exemplo n.º 9
0
    def _local_execute(
            self, ctx: FlyteContext,
            **kwargs) -> Union[Tuple[Promise], Promise, VoidPromise]:
        """
        Performs local execution of a workflow. kwargs are expected to be Promises for the most part (unless,
        someone has hardcoded in my_wf(input_1=5) or something).
        :param ctx: The FlyteContext
        :param kwargs: parameters for the workflow itself
        """
        logger.info(
            f"Executing Workflow {self._name}, ctx{ctx.execution_state.Mode}")

        # This is done to support the invariant that Workflow local executions always work with Promise objects
        # holding Flyte literal values. Even in a wf, a user can call a sub-workflow with a Python native value.
        for k, v in kwargs.items():
            if not isinstance(v, Promise):
                t = self._native_interface.inputs[k]
                kwargs[k] = Promise(var=k,
                                    val=TypeEngine.to_literal(
                                        ctx, v, t,
                                        self.interface.inputs[k].type))

        function_outputs = self.execute(**kwargs)
        if (isinstance(function_outputs, VoidPromise)
                or function_outputs is None
                or len(self.python_interface.outputs) == 0):
            # The reason this is here is because a workflow function may return a task that doesn't return anything
            #   def wf():
            #       return t1()
            # or it may not return at all
            #   def wf():
            #       t1()
            # In the former case we get the task's VoidPromise, in the latter we get None
            return VoidPromise(self.name)

        # TODO: Can we refactor the task code to be similar to what's in this function?
        promises = _workflow_fn_outputs_to_promise(
            ctx, self._native_interface.outputs, self.interface.outputs,
            function_outputs)
        # TODO: With the native interface, create_task_output should be able to derive the typed interface, and it
        #   should be able to do the conversion of the output of the execute() call directly.
        return create_task_output(promises, self._native_interface)
Exemplo n.º 10
0
def test_dispatch_execute_void(mock_write_to_file, mock_upload_dir,
                               mock_get_data, mock_load_proto):
    # Just leave these here, mock them out so nothing happens
    mock_get_data.return_value = True
    mock_upload_dir.return_value = True

    ctx = context_manager.FlyteContext.current_context()
    with ctx.new_execution_context(
            mode=context_manager.ExecutionState.Mode.TASK_EXECUTION) as ctx:

        python_task = mock.MagicMock()
        python_task.dispatch_execute.return_value = VoidPromise("testing")

        empty_literal_map = _literal_models.LiteralMap({}).to_flyte_idl()
        mock_load_proto.return_value = empty_literal_map

        def verify_output(*args, **kwargs):
            assert args[0] == empty_literal_map

        mock_write_to_file.side_effect = verify_output
        _dispatch_execute(ctx, python_task, "inputs path", "outputs prefix")
        assert mock_write_to_file.call_count == 1
Exemplo n.º 11
0
    def execute(self, **kwargs):
        """
        Called by local_execute. This function is how local execution for imperative workflows runs. Because when an
        entity is added using the add_entity function, all inputs to that entity should've been already declared, we
        can just iterate through the nodes in order and we shouldn't run into any dependency issues. That is, we force
        the user to declare entities already in a topological sort. To keep track of outputs, we create a map to
        start things off, filled in only with the workflow inputs (if any). As things are run, their outputs are stored
        in this map.
        After all nodes are run, we fill in workflow level outputs the same way as any other previous node.
        """
        if not self.ready():
            raise FlyteValidationException(
                f"Workflow not ready, wf is currently {self}")

        # Create a map that holds the outputs of each node.
        intermediate_node_outputs = {
            GLOBAL_START_NODE: {}
        }  # type: Dict[Node, Dict[str, Promise]]

        # Start things off with the outputs of the global input node, i.e. the inputs to the workflow.
        # local_execute should've already ensured that all the values in kwargs are Promise objects
        for k, v in kwargs.items():
            intermediate_node_outputs[GLOBAL_START_NODE][k] = v

        # Next iterate through the nodes in order.
        for node in self.compilation_state.nodes:
            if node not in intermediate_node_outputs.keys():
                intermediate_node_outputs[node] = {}

            # Retrieve the entity from the node, and call it by looking up the promises the node's bindings require,
            # and then fill them in using the node output tracker map we have.
            entity = node.flyte_entity
            entity_kwargs = get_promise_map(node.bindings,
                                            intermediate_node_outputs)

            # Handle the calling and outputs of each node's entity
            results = entity(**entity_kwargs)
            expected_output_names = list(
                entity.python_interface.outputs.keys())

            if isinstance(results, VoidPromise) or results is None:
                continue  # pragma: no cover # Move along, nothing to assign

            # Because we should've already returned in the above check, we just raise an Exception here.
            if len(entity.python_interface.outputs) == 0:
                raise FlyteValueException(
                    results,
                    "Interface output should've been VoidPromise or None.")

            # if there's only one output,
            if len(expected_output_names) == 1:
                if entity.python_interface.output_tuple_name and isinstance(
                        results, tuple):
                    intermediate_node_outputs[node][
                        expected_output_names[0]] = results[0]
                else:
                    intermediate_node_outputs[node][
                        expected_output_names[0]] = results

            else:
                if len(results) != len(expected_output_names):
                    raise FlyteValueException(
                        results,
                        f"Different lengths {results} {expected_output_names}")
                for idx, r in enumerate(results):
                    intermediate_node_outputs[node][
                        expected_output_names[idx]] = r

        # The rest of this function looks like the above but now we're doing it for the workflow as a whole rather
        # than just one node at a time.
        if len(self.python_interface.outputs) == 0:
            return VoidPromise(self.name)

        # The values that we return below from the output have to be pulled by fulfilling all of the
        # workflow's output bindings.
        # The return style here has to match what 1) what the workflow would've returned had it been declared
        # functionally, and 2) what a user would return in mock function. That is, if it's a tuple, then it
        # should be a tuple here, if it's a one element named tuple, then we do a one-element non-named tuple,
        # if it's a single element then we return a single element
        if len(self.output_bindings) == 1:
            # Again use presence of output_tuple_name to understand that we're dealing with a one-element
            # named tuple
            if self.python_interface.output_tuple_name:
                return (get_promise(self.output_bindings[0].binding,
                                    intermediate_node_outputs), )
            # Just a normal single element
            return get_promise(self.output_bindings[0].binding,
                               intermediate_node_outputs)
        return tuple([
            get_promise(b.binding, intermediate_node_outputs)
            for b in self.output_bindings
        ])
Exemplo n.º 12
0
    def local_execute(self, ctx: FlyteContext,
                      **kwargs) -> Union[Tuple[Promise], Promise, VoidPromise]:
        """
        This function is used only in the local execution path and is responsible for calling dispatch execute.
        Use this function when calling a task with native values (or Promises containing Flyte literals derived from
        Python native values).
        """
        # Unwrap the kwargs values. After this, we essentially have a LiteralMap
        # The reason why we need to do this is because the inputs during local execute can be of 2 types
        #  - Promises or native constants
        #  Promises as essentially inputs from previous task executions
        #  native constants are just bound to this specific task (default values for a task input)
        #  Also along with promises and constants, there could be dictionary or list of promises or constants
        kwargs = translate_inputs_to_literals(
            ctx,
            incoming_values=kwargs,
            flyte_interface_types=self.interface.inputs,  # type: ignore
            native_types=self.get_input_types(),
        )
        input_literal_map = _literal_models.LiteralMap(literals=kwargs)

        # if metadata.cache is set, check memoized version
        if self.metadata.cache:
            # TODO: how to get a nice `native_inputs` here?
            logger.info(
                f"Checking cache for task named {self.name}, cache version {self.metadata.cache_version} "
                f"and inputs: {input_literal_map}")
            outputs_literal_map = LocalTaskCache.get(
                self.name, self.metadata.cache_version, input_literal_map)
            # The cache returns None iff the key does not exist in the cache
            if outputs_literal_map is None:
                logger.info("Cache miss, task will be executed now")
                outputs_literal_map = self.dispatch_execute(
                    ctx, input_literal_map)
                # TODO: need `native_inputs`
                LocalTaskCache.set(self.name, self.metadata.cache_version,
                                   input_literal_map, outputs_literal_map)
                logger.info(
                    f"Cache set for task named {self.name}, cache version {self.metadata.cache_version} "
                    f"and inputs: {input_literal_map}")
            else:
                logger.info("Cache hit")
        else:
            es = ctx.execution_state
            b = es.user_space_params.with_task_sandbox()
            ctx = ctx.current_context().with_execution_state(
                es.with_params(user_space_params=b.build())).build()
            outputs_literal_map = self.dispatch_execute(ctx, input_literal_map)
        outputs_literals = outputs_literal_map.literals

        # TODO maybe this is the part that should be done for local execution, we pass the outputs to some special
        #    location, otherwise we dont really need to right? The higher level execute could just handle literalMap
        # After running, we again have to wrap the outputs, if any, back into Promise objects
        output_names = list(self.interface.outputs.keys())  # type: ignore
        if len(output_names) != len(outputs_literals):
            # Length check, clean up exception
            raise AssertionError(
                f"Length difference {len(output_names)} {len(outputs_literals)}"
            )

        # Tasks that don't return anything still return a VoidPromise
        if len(output_names) == 0:
            return VoidPromise(self.name)

        vals = [Promise(var, outputs_literals[var]) for var in output_names]
        return create_task_output(vals, self.python_interface)