示例#1
0
    def test_viz_reflects_multiple_mapping_if_flow_state_provided(self):
        ipython = MagicMock(
            get_ipython=lambda: MagicMock(config=dict(IPKernelApp=True)))
        add = AddTask(name="a_nice_task")
        list_task = Task(name="a_list_task")

        map_state1 = Mapped(map_states=[Success(), TriggerFailed()])
        map_state2 = Mapped(map_states=[Success(), Failed()])

        with patch.dict("sys.modules", IPython=ipython):
            with Flow(name="test") as f:
                first_res = add.map(x=list_task, y=8)
                with pytest.warns(
                        UserWarning
                ):  # making a copy of a task with dependencies
                    res = first_res.map(x=first_res, y=9)
            graph = f.visualize(flow_state=Success(
                result={
                    res: map_state1,
                    list_task: Success(),
                    first_res: map_state2,
                }))

        assert "{first} -> {second} [label=x style=dashed]".format(
            first=str(id(first_res)) + "0", second=str(id(res)) + "0")
        assert "{first} -> {second} [label=x style=dashed]".format(
            first=str(id(first_res)) + "1", second=str(id(res)) + "1")
示例#2
0
    def test_state_load_result_loads_map_states(self):
        """
        This test ensures that loading state results also loads mapped children results.
        See https://github.com/PrefectHQ/prefect/pull/2952
        """
        class MyResult(Result):
            def read(self, *args, **kwargs):
                new = self.copy()
                new.value = kwargs.get("location", args[0])
                return new

        state = Mapped(map_states=[
            None,
            Success("1", result=MyResult(location="foo")),
            Success("2", result=MyResult(location="bar")),
        ])
        assert state.message is None
        assert state.result is None
        assert [getattr(s, "result", None)
                for s in state.map_states] == [None] * 3

        new_state = state.load_result(MyResult(location=""))
        assert new_state.result == [None, "foo", "bar"]
        assert not new_state._result.location
        assert [getattr(s, "result", None) for s in state.map_states] == [
            None,
            "foo",
            "bar",
        ]
示例#3
0
def test_n_map_states():
    state = Mapped(map_states=[1, 2])
    assert state.n_map_states == 2

    state = Mapped(n_map_states=4)
    assert state.n_map_states == 4

    state = Mapped(map_states=[1, 2], n_map_states=4)
    assert state.n_map_states == 4
示例#4
0
    def test_viz_reflects_mapping_if_flow_state_provided(self):
        ipython = MagicMock(
            get_ipython=lambda: MagicMock(config=dict(IPKernelApp=True)))
        add = AddTask(name="a_nice_task")
        list_task = Task(name="a_list_task")

        map_state = Mapped(map_states=[Success(), Failed()])
        with patch.dict("sys.modules", IPython=ipython):
            with Flow(name="test") as f:
                res = add.map(x=list_task, y=8)
            graph = f.visualize(flow_state=Success(result={
                res: map_state,
                list_task: Success()
            }))

        # one colored node for each mapped result
        assert 'label="a_nice_task <map>" color="#00800080"' in graph.source
        assert 'label="a_nice_task <map>" color="#FF000080"' in graph.source
        assert 'label=a_list_task color="#00800080"' in graph.source
        assert 'label=8 color="#00000080"' in graph.source

        # two edges for each input to add()
        for var in ["x", "y"]:
            for index in [0, 1]:
                assert "{0} [label={1} style=dashed]".format(
                    index, var) in graph.source
示例#5
0
def test_task_map_with_no_upstream_results_and_a_mapped_state(executor):
    """
    This test makes sure that mapped tasks properly generate children tasks even when
    run multiple times and without available upstream results. In this test, we run the pipeline
    from a variety of starting points, ensuring that some upstream results are unavailable and
    checking that children pipelines are properly regenerated.

    Note that upstream results will be hydrated from remote locations when running with a Cloud TaskRunner.
    """
    @prefect.task
    def numbers():
        return [1, 2, 3]

    @prefect.task
    def identity(x):
        return x

    with Flow(name="test") as f:
        n = numbers()
        x = identity.map(n)

    # first run with a missing result from `n` but map_states for `x`
    state = FlowRunner(flow=f).run(
        executor=executor,
        task_states={
            n: Success(),
            x: Mapped(map_states=[Pending() for i in range(1, 4)]),
        },
        return_tasks=f.tasks,
    )

    assert state.is_successful()
    assert state.result[x].result == [None] * 3
示例#6
0
def test_mapped_retries_regenerate_child_pipelines():
    """
    This test sets up a situation analogous to one found in Cloud: if a reduce task fails, and a user
    retries it in the future, we want to make sure that the mapped children pipelines are correctly
    regenerated.  When run against Cloud, these child tasks will correctly query for their states and
    the run will proceed with the correct data.

    This test mimics this scenario by running this flow with a provided set of states that only contain
    metadata about the runs with no actual data to reference.  The child runs should still be produced
    based only on the n_map_states attribute of the parent.
    """
    idt = IdTask()
    ll = ListTask()
    with Flow("test") as flow:
        mapped = idt.map(ll)
        reduced = idt(mapped)

    flow_state = flow.run()
    assert flow_state.is_successful()
    assert flow_state.result[mapped].is_mapped()
    assert flow_state.result[reduced].is_successful()
    assert flow_state.result[reduced].result == [1, 2, 3]

    second_pass_states = {
        mapped: Mapped(n_map_states=3),
        ll: Success(result=Result())
    }

    new_state = flow.run(task_states=second_pass_states)
    assert new_state.is_successful()
    assert new_state.result[mapped].is_mapped()
    assert new_state.result[reduced].is_successful()
示例#7
0
    def check_task_ready_to_map(self, state: State,
                                upstream_states: Dict[Edge, State]) -> State:
        """
        Checks if the parent task is ready to proceed with mapping.

        Args:
            - state (State): the current state of this task
            - upstream_states (Dict[Edge, Union[State, List[State]]]): the upstream states

        Raises:
            - ENDRUN: either way, we dont continue past this point
        """
        if state.is_mapped():
            raise ENDRUN(state)

        # we can't map if there are no success states with iterables upstream
        if upstream_states and not any([
                edge.mapped and state.is_successful()
                for edge, state in upstream_states.items()
        ]):
            new_state = Failed(
                "No upstream states can be mapped over.")  # type: State
            raise ENDRUN(new_state)
        elif not all([
                hasattr(state.result, "__getitem__")
                for edge, state in upstream_states.items() if
                state.is_successful() and not state.is_mapped() and edge.mapped
        ]):
            new_state = Failed(
                "At least one upstream state has an unmappable result.")
            raise ENDRUN(new_state)
        else:
            new_state = Mapped("Ready to proceed with mapping.")
            raise ENDRUN(new_state)
    def check_task_ready_to_map(self, state: State,
                                upstream_states: Dict[Edge, State]) -> State:
        """
        Checks if the parent task is ready to proceed with mapping.

        Args:
            - state (State): the current state of this task
            - upstream_states (Dict[Edge, Union[State, List[State]]]): the upstream states

        Raises:
            - ENDRUN: either way, we dont continue past this point
        """
        if state.is_mapped():
            # this indicates we are executing a re-run of a mapped pipeline;
            # in this case, we populate both `map_states` and `cached_inputs`
            # to ensure the flow runner can properly regenerate the child tasks,
            # regardless of whether we mapped over an exchanged piece of data
            # or a non-data-exchanging upstream dependency
            if len(state.map_states
                   ) == 0 and state.n_map_states > 0:  # type: ignore
                state.map_states = [None] * state.n_map_states  # type: ignore
            state.cached_inputs = {
                edge.key: state._result  # type: ignore
                for edge, state in upstream_states.items() if edge.key
            }
            raise ENDRUN(state)

        # we can't map if there are no success states with iterables upstream
        if upstream_states and not any([
                edge.mapped and state.is_successful()
                for edge, state in upstream_states.items()
        ]):
            new_state = Failed(
                "No upstream states can be mapped over.")  # type: State
            raise ENDRUN(new_state)
        elif not all([
                hasattr(state.result, "__getitem__")
                for edge, state in upstream_states.items() if
                state.is_successful() and not state.is_mapped() and edge.mapped
        ]):
            new_state = Failed(
                "At least one upstream state has an unmappable result.")
            raise ENDRUN(new_state)
        else:
            # compute and set n_map_states
            n_map_states = min(
                [
                    len(s.result) for e, s in upstream_states.items()
                    if e.mapped and s.is_successful() and not s.is_mapped()
                ] + [
                    s.n_map_states  # type: ignore
                    for e, s in upstream_states.items()
                    if e.mapped and s.is_mapped()
                ],
                default=0,
            )
            new_state = Mapped("Ready to proceed with mapping.",
                               n_map_states=n_map_states)
            raise ENDRUN(new_state)
示例#9
0
    def test_mapped_will_use_existing_map_states_if_available(self, executor):

        with Flow(name="test") as flow:
            res = ReturnTask().map([0, 1])

        state = FlowRunner(flow=flow).run(
            return_tasks=[res],
            executor=executor,
            task_states={res: Mapped(map_states=[Success(), Success(result=100)])},
        )
        assert state.is_successful()
        assert state.result[res].map_states[1].is_successful()
        assert state.result[res].map_states[1].result == 100
示例#10
0
    def run_mapped_task(
        self,
        state: State,
        upstream_states: Dict[Edge, State],
        context: Dict[str, Any],
        executor: "prefect.engine.executors.Executor",
    ) -> State:
        """
        If the task is being mapped, submits children tasks for execution. Returns a `Mapped` state.

        Args:
            - state (State): the current task state
            - upstream_states (Dict[Edge, State]): the upstream states
            - context (dict, optional): prefect Context to use for execution
            - executor (Executor): executor to use when performing computation

        Returns:
            - State: the state of the task after running the check

        Raises:
            - ENDRUN: if the current state is not `Running`
        """

        map_upstream_states = []

        # we don't know how long the iterables are, but we want to iterate until we reach
        # the end of the shortest one
        counter = itertools.count()

        # infinite loop, if upstream_states has any entries
        while True and upstream_states:
            i = next(counter)
            states = {}

            try:

                for edge, upstream_state in upstream_states.items():

                    # if the edge is not mapped over, then we take its state
                    if not edge.mapped:
                        states[edge] = upstream_state

                    # if the edge is mapped and the upstream state is Mapped, then we are mapping
                    # over a mapped task. In this case, we take the appropriately-indexed upstream
                    # state from the upstream tasks's `Mapped.map_states` array.
                    # Note that these "states" might actually be futures at this time; we aren't
                    # blocking until they finish.
                    elif edge.mapped and upstream_state.is_mapped():
                        states[edge] = upstream_state.map_states[i]  # type: ignore

                    # Otherwise, we are mapping over the result of a "vanilla" task. In this
                    # case, we create a copy of the upstream state but set the result to the
                    # appropriately-indexed item from the upstream task's `State.result`
                    # array.
                    else:
                        states[edge] = copy.copy(upstream_state)

                        # if the current state is already Mapped, then we might be executing
                        # a re-run of the mapping pipeline. In that case, the upstream states
                        # might not have `result` attributes (as any required results could be
                        # in the `cached_inputs` attribute of one of the child states).
                        # Therefore, we only try to get a result if EITHER this task's
                        # state is not already mapped OR the upstream result is not None.
                        if not state.is_mapped() or upstream_state._result != NoResult:
                            upstream_result = Result(
                                upstream_state.result[i],
                                result_handler=upstream_state._result.result_handler,  # type: ignore
                            )
                            states[edge].result = upstream_result
                        elif state.is_mapped():
                            if i >= len(state.map_states):  # type: ignore
                                raise IndexError()

                # only add this iteration if we made it through all iterables
                map_upstream_states.append(states)

            # index error means we reached the end of the shortest iterable
            except IndexError:
                break

        def run_fn(
            state: State, map_index: int, upstream_states: Dict[Edge, State]
        ) -> State:
            map_context = context.copy()
            map_context.update(map_index=map_index)
            with prefect.context(self.context):
                return self.run(
                    upstream_states=upstream_states,
                    # if we set the state here, then it will not be processed by `initialize_run()`
                    state=state,
                    context=map_context,
                    executor=executor,
                )

        # generate initial states, if available
        if isinstance(state, Mapped):
            initial_states = list(state.map_states)  # type: List[Optional[State]]
        else:
            initial_states = []
        initial_states.extend([None] * (len(map_upstream_states) - len(initial_states)))

        current_state = Mapped(
            message="Preparing to submit {} mapped tasks.".format(len(initial_states)),
            map_states=initial_states,  # type: ignore
        )
        state = self.handle_state_change(old_state=state, new_state=current_state)
        if state is not current_state:
            return state

        # map over the initial states, a counter representing the map_index, and also the mapped upstream states
        map_states = executor.map(
            run_fn, initial_states, range(len(map_upstream_states)), map_upstream_states
        )

        self.logger.debug(
            "{} mapped tasks submitted for execution.".format(len(map_states))
        )
        new_state = Mapped(
            message="Mapped tasks submitted for execution.", map_states=map_states
        )
        return self.handle_state_change(old_state=state, new_state=new_state)
示例#11
0
def test_task_map_with_no_upstream_results_and_a_mapped_state(executor):
    """
    This test makes sure that mapped tasks properly generate children tasks even when
    run multiple times and without available upstream results. In this test, we run the pipeline
    from a variety of starting points, ensuring that some upstream results are unavailable and
    checking that children pipelines are properly regenerated.
    """
    @prefect.task
    def numbers():
        return [1, 2, 3]

    @prefect.task
    def plus_one(x):
        return x + 1

    @prefect.task
    def get_sum(x):
        return sum(x)

    with Flow(name="test") as f:
        n = numbers()
        x = plus_one.map(n)
        y = plus_one.map(x)
        s = get_sum(y)

    # first run with a missing result from `n` but map_states for `x`
    state = FlowRunner(flow=f).run(
        executor=executor,
        task_states={
            n:
            Success(),
            x:
            Mapped(map_states=[
                Pending(cached_inputs={"x": Result(i)}) for i in range(1, 4)
            ]),
        },
        return_tasks=f.tasks,
    )

    assert state.is_successful()
    assert state.result[s].result == 12

    # next run with missing results for n and x
    state = FlowRunner(flow=f).run(
        executor=executor,
        task_states={
            n:
            Success(),
            x:
            Mapped(map_states=[Success(), Success(),
                               Success()]),
            y:
            Mapped(map_states=[
                Success(result=3),
                Success(result=4),
                Retrying(cached_inputs={"x": Result(4)}),
            ]),
        },
        return_tasks=f.tasks,
    )

    assert state.is_successful()
    assert state.result[s].result == 12

    # next run with missing results for n, x, and y
    state = FlowRunner(flow=f).run(
        executor=executor,
        task_states={
            n:
            Success(),
            x:
            Mapped(map_states=[Success(), Success(),
                               Success()]),
            y:
            Mapped(map_states=[
                Success(result=3),
                Success(result=4),
                Success(result=5)
            ]),
        },
        return_tasks=f.tasks,
    )

    assert state.is_successful()
    assert state.result[s].result == 12
示例#12
0
    def test_trigger_failed_is_failed(self):
        assert issubclass(TriggerFailed, Failed)


@pytest.mark.parametrize(
    "state_check",
    [
        dict(state=Cancelled(), assert_true={"is_finished"}),
        dict(state=Cached(),
             assert_true={"is_cached", "is_finished", "is_successful"}),
        dict(state=ClientFailed(), assert_true={"is_meta_state"}),
        dict(state=Failed(), assert_true={"is_finished", "is_failed"}),
        dict(state=Finished(), assert_true={"is_finished"}),
        dict(state=Looped(), assert_true={"is_finished", "is_looped"}),
        dict(state=Mapped(),
             assert_true={"is_finished", "is_mapped", "is_successful"}),
        dict(state=Paused(), assert_true={"is_pending", "is_scheduled"}),
        dict(state=Pending(), assert_true={"is_pending"}),
        dict(state=Queued(), assert_true={"is_meta_state", "is_queued"}),
        dict(state=Resume(), assert_true={"is_pending", "is_scheduled"}),
        dict(state=Retrying(),
             assert_true={"is_pending", "is_scheduled", "is_retrying"}),
        dict(state=Running(), assert_true={"is_running"}),
        dict(state=Scheduled(), assert_true={"is_pending", "is_scheduled"}),
        dict(state=Skipped(),
             assert_true={"is_finished", "is_successful", "is_skipped"}),
        dict(state=Submitted(), assert_true={"is_meta_state", "is_submitted"}),
        dict(state=Success(), assert_true={"is_finished", "is_successful"}),
        dict(state=TimedOut(), assert_true={"is_finished", "is_failed"}),
        dict(state=TriggerFailed(), assert_true={"is_finished", "is_failed"}),
示例#13
0
        assert issubclass(TimedOut, Failed)

    def test_trigger_failed_is_failed(self):
        assert issubclass(TriggerFailed, Failed)


@pytest.mark.parametrize(
    "state_check",
    [
        dict(state=Cancelled(), assert_true={"is_finished", "is_failed"}),
        dict(state=Cached(), assert_true={"is_cached", "is_finished", "is_successful"}),
        dict(state=ClientFailed(), assert_true={"is_meta_state"}),
        dict(state=Failed(), assert_true={"is_finished", "is_failed"}),
        dict(state=Finished(), assert_true={"is_finished"}),
        dict(state=Looped(), assert_true={"is_finished", "is_looped"}),
        dict(state=Mapped(), assert_true={"is_finished", "is_mapped", "is_successful"}),
        dict(state=Paused(), assert_true={"is_pending", "is_scheduled"}),
        dict(state=Pending(), assert_true={"is_pending"}),
        dict(state=Queued(), assert_true={"is_meta_state", "is_queued"}),
        dict(state=Resume(), assert_true={"is_pending", "is_scheduled"}),
        dict(
            state=Retrying(), assert_true={"is_pending", "is_scheduled", "is_retrying"}
        ),
        dict(state=Running(), assert_true={"is_running"}),
        dict(state=Scheduled(), assert_true={"is_pending", "is_scheduled"}),
        dict(
            state=Skipped(), assert_true={"is_finished", "is_successful", "is_skipped"}
        ),
        dict(state=Submitted(), assert_true={"is_meta_state", "is_submitted"}),
        dict(state=Success(), assert_true={"is_finished", "is_successful"}),
        dict(state=TimedOut(), assert_true={"is_finished", "is_failed"}),
示例#14
0
 def test_state_type_methods_with_mapped_state(self):
     state = Mapped()
     assert not state.is_cached()
     assert not state.is_pending()
     assert not state.is_retrying()
     assert not state.is_running()
     assert state.is_finished()
     assert not state.is_skipped()
     assert not state.is_scheduled()
     assert state.is_successful()
     assert not state.is_failed()
     assert state.is_mapped()
     assert not state.is_meta_state()