Пример #1
0
def test_workflow_copying_eopatches():
    feature1 = FeatureType.DATA, "data1"
    feature2 = FeatureType.DATA, "data2"

    create_node = EONode(CreateEOPatchTask())
    init_node = EONode(
        InitializeFeatureTask([feature1, feature2],
                              shape=(2, 4, 4, 3),
                              init_value=1),
        inputs=[create_node],
    )
    remove_node1 = EONode(RemoveFeatureTask([feature1]), inputs=[init_node])
    remove_node2 = EONode(RemoveFeatureTask([feature2]), inputs=[init_node])
    output_node1 = EONode(OutputTask(name="out1"), inputs=[remove_node1])
    output_node2 = EONode(OutputTask(name="out2"), inputs=[remove_node2])

    workflow = EOWorkflow([
        create_node, init_node, remove_node1, remove_node2, output_node1,
        output_node2
    ])
    results = workflow.execute()

    eop1 = results.outputs["out1"]
    eop2 = results.outputs["out2"]

    assert eop1 == EOPatch(
        data={"data2": np.ones((2, 4, 4, 3), dtype=np.uint8)})
    assert eop2 == EOPatch(
        data={"data1": np.ones((2, 4, 4, 3), dtype=np.uint8)})
Пример #2
0
def test_get_nodes():
    in_node = EONode(InputTask())
    inc_node0 = EONode(IncTask(), inputs=[in_node])
    inc_node1 = EONode(IncTask(), inputs=[inc_node0])
    inc_node2 = EONode(IncTask(), inputs=[inc_node1])
    output_node = EONode(OutputTask(name="out"), inputs=[inc_node2])

    eow = EOWorkflow([in_node, inc_node0, inc_node1, inc_node2, output_node])

    returned_nodes = eow.get_nodes()

    assert [
        in_node,
        inc_node0,
        inc_node1,
        inc_node2,
        output_node,
    ] == returned_nodes, "Returned nodes differ from original nodes"

    arguments_dict = {in_node: {"val": 2}, inc_node0: {"d": 2}}
    workflow_res = eow.execute(arguments_dict)

    manual_res = []
    for _, node in enumerate(returned_nodes):
        manual_res = [
            node.task.execute(*manual_res, **arguments_dict.get(node, {}))
        ]

    assert workflow_res.outputs["out"] == manual_res[
        0], "Manually running returned nodes produces different results."
Пример #3
0
def test_exception_handling():
    input_node = EONode(InputTask(), name="xyz")
    exception_node = EONode(ExceptionTask(), inputs=[input_node])
    increase_node = EONode(IncTask(), inputs=[exception_node])
    workflow = EOWorkflow([input_node, exception_node, increase_node])

    with pytest.raises(CustomException):
        workflow.execute()

    results = workflow.execute(raise_errors=False)

    assert results.outputs == {}
    assert results.error_node_uid == exception_node.uid
    assert len(results.stats) == 2

    for node in [input_node, exception_node]:
        node_stats = results.stats[node.uid]

        assert node_stats.node_uid == node.uid
        assert node_stats.node_name == node.name

        if node is exception_node:
            assert isinstance(node_stats.exception, CustomException)
            assert node_stats.exception_traceback.startswith("Traceback")
        else:
            assert node_stats.exception is None
            assert node_stats.exception_traceback is None
Пример #4
0
def test_workflow_arguments():
    input_node1 = EONode(InputTask())
    input_node2 = EONode(InputTask(), name="some name")
    divide_node = EONode(DivideTask(),
                         inputs=(input_node1, input_node2),
                         name="some name")
    output_node = EONode(OutputTask(name="output"), inputs=[divide_node])

    workflow = EOWorkflow([input_node1, input_node2, divide_node, output_node])

    with concurrent.futures.ProcessPoolExecutor(max_workers=5) as executor:
        k2future = {
            k: executor.submit(workflow.execute, {
                input_node1: {
                    "val": k**3
                },
                input_node2: {
                    "val": k**2
                }
            })
            for k in range(2, 100)
        }
        executor.shutdown()
        for k in range(2, 100):
            assert k2future[k].result().outputs["output"] == k

    result1 = workflow.execute({
        input_node1: {
            "val": 15
        },
        input_node2: {
            "val": 3
        }
    })
    assert result1.outputs["output"] == 5

    result2 = workflow.execute({
        input_node1: {
            "val": 6
        },
        input_node2: {
            "val": 3
        }
    })
    assert result2.outputs["output"] == 2

    result3 = workflow.execute({
        input_node1: {
            "val": 6
        },
        input_node2: {
            "val": 3
        },
        divide_node: {
            "z": 1
        }
    })
    assert result3.outputs[output_node.task.name] == 3
def test_output_task_in_workflow(test_eopatch_path, test_eopatch):
    load = EONode(LoadTask(test_eopatch_path))
    output = EONode(OutputTask(name="result-name"), inputs=[load])

    workflow = EOWorkflow([load, output, EONode(DummyTask(), inputs=[load])])

    results = workflow.execute()

    assert len(results.outputs) == 1
    assert results.outputs["result-name"] == test_eopatch
Пример #6
0
    def setUp(self):
        input_task1 = InputTask()
        input_task2 = InputTask()
        divide_task = DivideTask()

        self.workflow = EOWorkflow(dependencies=[
            Dependency(task=input_task1, inputs=[]),
            Dependency(task=input_task2, inputs=[]),
            Dependency(task=divide_task, inputs=[input_task1, input_task2])
        ])
    def setUp(self):
        task1 = FooTask()
        task2 = FooTask()
        task3 = FooTask()

        self.workflow = EOWorkflow(dependencies=[
            Dependency(task=task1, inputs=[]),
            Dependency(task=task2, inputs=[]),
            Dependency(task=task3, inputs=[task1, task2])
        ])
Пример #8
0
def test_multiedge_workflow():
    in_node = EONode(InputTask())
    inc_node = EONode(IncTask(), inputs=[in_node])
    div_node = EONode(DivideTask(), inputs=[inc_node, inc_node])
    output_node = EONode(OutputTask(name="out"), inputs=[div_node])

    workflow = EOWorkflow([in_node, output_node, inc_node, div_node])
    arguments_dict = {in_node: {"val": 2}}
    workflow_res = workflow.execute(arguments_dict)

    assert workflow_res.outputs["out"] == 1
Пример #9
0
 def test_resolve_dependencies(self, edges):
     dag = DirectedGraph.from_edges(edges)
     if DirectedGraph._is_cyclic(dag):
         with self.assertRaises(CyclicDependencyError):
             _ = EOWorkflow._schedule_dependencies(dag)
     else:
         ver2pos = {u: i for i, u in enumerate(EOWorkflow._schedule_dependencies(dag))}
         self.assertTrue(functools.reduce(
             lambda P, Q: P and Q,
             [ver2pos[u] < ver2pos[v] for u, v in edges]
         ))
    def process(self, arguments):
        data = self.validate_parameter(arguments,
                                       "data",
                                       required=True,
                                       allowed_types=[xr.DataArray])
        process = self.validate_parameter(arguments, "process", required=True)

        dependencies, result_task = self.generate_workflow_dependencies(
            process["callback"], arguments)
        workflow = EOWorkflow(dependencies)
        all_results = workflow.execute({})
        return all_results[result_task]
def _execute_process_graph(process_graph, job_id, variables):
    # This is what we are aiming for:
    #
    #   loadco1 = load_collectionEOTask(process_graph["loadco1"]["arguments"])
    #   ndvi1 = ndviEOTask(process_graph["ndvi1"]["arguments"])
    #   reduce1 = reduceEOTask(process_graph["reduce1"]["arguments"])
    #   tasks = [
    #       (loadco1, [], "Node name: load1"),
    #       (ndvi1, [loadco1], "Node name: ndvi1"),
    #       (reduce1, [ndvi1], "Node name: reduce1"),
    #   ]
    #
    #   workflow = EOWorkflow(tasks)
    #   workflow.execute({})

    # first create all the tasks and remember their names, so we will be able
    # to reference them when looking for tasks that current task depends on:
    tasks_by_name = {}
    result_task = None
    for node_name, node_definition in process_graph.items():
        # We would like to instantiate an appropriate EOTask based on
        # process_id, like this:
        #   tasks_by_name[node_name] = \
        #           load_collectionEOTask(node_definition['arguments'], ...)
        process_id = node_definition['process_id']
        task_module_name = '{process_id}'.format(process_id=process_id)
        task_class_name = '{process_id}EOTask'.format(process_id=process_id)
        task_module = getattr(sys.modules[__name__].process, task_module_name)
        task_class = getattr(task_module, task_class_name)
        tasks_by_name[node_name] = task_class(node_definition['arguments'],
                                              job_id, logger, variables,
                                              node_name)

        if node_definition.get('result', False):
            result_task = tasks_by_name[node_name]
            if process_id != 'save_result':
                raise process.VariableValueMissing(
                    "No value specified for process graph variable 'save_result'."
                )

    # create a list of tasks for workflow:
    tasks = []
    for node_name, task in tasks_by_name.items():
        depends_on = [tasks_by_name[x] for x in task.depends_on()]
        tasks.append((task, depends_on, 'Node name: ' + node_name))

    workflow = EOWorkflow(tasks)

    logger.debug("[{}]: executing workflow...".format(job_id))
    result = workflow.execute({})
    logger.debug("[{}]: workflow executed.".format(job_id))

    return result_task.results
Пример #12
0
    def process(self, arguments):
        data = self.validate_parameter(arguments,
                                       "data",
                                       required=True,
                                       allowed_types=[xr.DataArray])
        dimension = self.validate_parameter(arguments,
                                            "dimension",
                                            required=True,
                                            allowed_types=[str])
        reducer = self.validate_parameter(arguments, "reducer", default=None)
        target_dimension = self.validate_parameter(
            arguments,
            "target_dimension",
            default=None,
            allowed_types=[str, type(None)])
        binary = self.validate_parameter(arguments,
                                         "binary",
                                         default=False,
                                         allowed_types=[bool])

        if dimension not in data.dims:
            raise ProcessArgumentInvalid(
                "The argument 'dimension' in process 'reduce' is invalid: Dimension '{}' does not exist in data."
                .format(dimension))

        if reducer is None:
            if data[dimension].size > 1:
                raise ProcessArgumentInvalid(
                    "The argument 'dimension' in process 'reduce' is invalid: Dimension '{}' has more than one value, but reducer is not specified."
                    .format(dimension))
            return data.squeeze(dimension, drop=True)
        else:
            if not data.attrs.get("reduce_by"):
                arguments["data"].attrs["reduce_by"] = [dimension]
            else:
                arguments["data"].attrs["reduce_by"].append(dimension)

            dependencies, result_task = self.generate_workflow_dependencies(
                reducer["callback"], arguments)
            workflow = EOWorkflow(dependencies)
            all_results = workflow.execute({})
            result = all_results[result_task]

            result.attrs["reduce_by"].pop()

            if target_dimension:
                result = xr.concat(result, dim=target_dimension)

            return result
 def to_workflow(self):
     input_task = LoadTask(".")  # Dummy to show correct graph
     return EOWorkflow([
         (input_task, [], "Download bands"),
         *self.tasks(input_task),
     ]
     )
Пример #14
0
    def test_trivial_workflow(self):
        task = DummyTask()
        dep = Dependency(task, [])
        workflow = EOWorkflow([dep])

        result = workflow.execute()

        self.assertTrue(isinstance(result, WorkflowResults))
        self.assertEqual(len(result), 1)
        self.assertEqual(len(result.keys()), 1)
        self.assertEqual(len(result.values()), 1)
        items = list(result.items())
        self.assertEqual(len(items), 1)
        self.assertTrue(isinstance(items[0][0], EOTask))
        self.assertEqual(items[0][1], 42)
        self.assertEqual(result[dep], 42)
Пример #15
0
    def test_workflow_arguments(self):
        input_task1 = InputTask()
        input_task2 = InputTask()
        divide_task = DivideTask()

        workflow = EOWorkflow(dependencies=[
            Dependency(task=input_task1, inputs=[]),
            Dependency(task=input_task2, inputs=[]),
            Dependency(task=divide_task, inputs=[input_task1, input_task2])
        ])

        with concurrent.futures.ProcessPoolExecutor(max_workers=5) as executor:
            k2future = {
                k: executor.submit(
                    workflow.execute,
                    {
                        input_task1: {'val': k ** 3},
                        input_task2: {'val': k ** 2}
                    }
                ) for k in range(2, 100)
            }
            executor.shutdown()
            for k in range(2, 100):
                future = k2future[k]
                self.assertEqual(future.result()[divide_task], k)

        result1 = workflow.execute({
            input_task1: {'val': 15},
            input_task2: {'val': 3}
        })

        self.assertEqual(result1[divide_task], 5)

        result2 = workflow.execute({
            input_task1: {'val': 6},
            input_task2: {'val': 3}
        })
        self.assertEqual(result2[divide_task], 2)

        result3 = workflow.execute({
            input_task1: {'val': 6},
            input_task2: {'val': 3},
            divide_task: {'z': 1}
        })

        self.assertEqual(result3[divide_task], 3)
class TestGraph(unittest.TestCase):
    def setUp(self):
        task1 = FooTask()
        task2 = FooTask()
        task3 = FooTask()

        self.workflow = EOWorkflow(dependencies=[
            Dependency(task=task1, inputs=[]),
            Dependency(task=task2, inputs=[]),
            Dependency(task=task3, inputs=[task1, task2])
        ])

    def test_graph_nodes_and_edges(self):
        dot = self.workflow.get_dot()
        self.assertTrue(isinstance(dot, Digraph))

        digraph = self.workflow.dependency_graph()
        self.assertTrue(isinstance(digraph, Digraph))
Пример #17
0
def test_keyboard_interrupt(simple_cluster):
    exception_node = EONode(KeyboardExceptionTask())
    workflow = EOWorkflow([exception_node])
    execution_kwargs = []
    for _ in range(10):
        execution_kwargs.append({exception_node: {"arg1": 1}})

    with pytest.raises(
        (ray.exceptions.TaskCancelledError, ray.exceptions.RayTaskError)):
        RayExecutor(workflow, execution_kwargs).run()
Пример #18
0
class TestGraph(unittest.TestCase):

    def setUp(self):
        input_task1 = InputTask()
        input_task2 = InputTask()
        divide_task = DivideTask()

        self.workflow = EOWorkflow(dependencies=[
            Dependency(task=input_task1, inputs=[]),
            Dependency(task=input_task2, inputs=[]),
            Dependency(task=divide_task, inputs=[input_task1, input_task2])
        ])

    def test_graph_nodes_and_edges(self):
        dot = self.workflow.get_dot()
        dot_file = StringIO()
        dot_file.write(dot.source)
        dot_file.seek(0)

        digraph = self.workflow.dependency_graph()
Пример #19
0
def test_keyboard_interrupt():
    exception_node = EONode(KeyboardExceptionTask())
    workflow = EOWorkflow([exception_node])
    execution_kwargs = []
    for _ in range(10):
        execution_kwargs.append({exception_node: {"arg1": 1}})

    run_kwargs = [{"workers": 1}, {"workers": 3, "multiprocess": True}, {"workers": 3, "multiprocess": False}]
    for kwarg in run_kwargs:
        with pytest.raises(KeyboardInterrupt):
            EOExecutor(workflow, execution_kwargs).run(**kwarg)
Пример #20
0
    def setUpClass(cls):
        task = ExampleTask()
        cls.workflow = EOWorkflow([(task, []),
                                   Dependency(task=ExampleTask(), inputs=[task, task])])

        cls.execution_args = [
            {task: {'arg1': 1}},
            {},
            {task: {'arg1': 3, 'arg3': 10}},
            {task: {'arg1': None}}
        ]
Пример #21
0
    def setUpClass(cls):
        cls.task = ExampleTask()
        cls.final_task = FooTask()
        cls.workflow = EOWorkflow([(cls.task, []),
                                   Dependency(task=cls.final_task, inputs=[cls.task, cls.task])])

        cls.execution_args = [
            {cls.task: {'arg1': 1}},
            {},
            {cls.task: {'arg1': 3, 'arg3': 10}},
            {cls.task: {'arg1': None}}
        ]
Пример #22
0
def test_bad_structure_exceptions():
    in_node = EONode(InputTask())
    inc_node0 = EONode(IncTask(), inputs=[in_node])
    inc_node1 = EONode(IncTask(), inputs=[inc_node0])
    inc_node2 = EONode(IncTask(), inputs=[inc_node1])
    output_node = EONode(OutputTask(name="out"), inputs=[inc_node2])

    # This one must work
    EOWorkflow([in_node, inc_node0, inc_node1, inc_node2, output_node])

    # Duplicated node
    with pytest.raises(ValueError):
        EOWorkflow(
            [in_node, inc_node0, inc_node0, inc_node1, inc_node2, output_node])

    # Missing node
    with pytest.raises(ValueError):
        EOWorkflow([in_node, inc_node0, inc_node2, output_node])

    # Create circle (much more difficult now)
    super(EONode, inc_node0).__setattr__("inputs", (inc_node1, ))
    with pytest.raises(ValueError):
        EOWorkflow([in_node, inc_node0, inc_node1, inc_node2, output_node])
Пример #23
0
def test_workflow_results():
    input_node = EONode(InputTask())
    output_node = EONode(OutputTask(name="out"), inputs=[input_node])
    workflow = EOWorkflow([input_node, output_node])

    results = workflow.execute({input_node: {"val": 10}})

    assert isinstance(results, WorkflowResults)
    assert results.outputs == {"out": 10}

    results_without_outputs = results.drop_outputs()
    assert results_without_outputs.outputs == {}
    assert id(results_without_outputs) != id(results)

    assert isinstance(results.start_time, dt.datetime)
    assert isinstance(results.end_time, dt.datetime)
    assert results.start_time < results.end_time < dt.datetime.now()

    assert isinstance(results.stats, dict)
    assert len(results.stats) == 2
    for node in [input_node, output_node]:
        stats_uid = node.uid
        assert isinstance(results.stats.get(stats_uid), NodeStats)
Пример #24
0
    def test_report_creation(self):
        task = ExampleTask()

        workflow = EOWorkflow(dependencies=[
            Dependency(task=task, inputs=[]),
        ])

        execution_args = [
            {'arg1': 1}
        ]

        with tempfile.TemporaryDirectory() as tmpdirname:
            executor = EOExecutor(workflow, execution_args, file_path=tmpdirname)
            executor.run()

            self.assertIsNotNone(executor.make_report())
Пример #25
0
    def test_execution_errors(self):
        task = RaiserErrorTask()

        workflow = EOWorkflow(dependencies=[
            Dependency(task=task, inputs=[]),
        ])

        execution_args = [
            {'arg1': 1}
        ]

        with tempfile.TemporaryDirectory() as tmpdirname:
            executor = EOExecutor(workflow, execution_args, file_path=tmpdirname)
            executor.run()

            self.assertTrue('error' in executor.execution_stats[0])
Пример #26
0
def test_run_after_interrupt(workflow, execution_kwargs, simple_cluster):
    foo_node = EONode(FooTask())
    exception_node = EONode(KeyboardExceptionTask(), inputs=[foo_node])
    exception_workflow = EOWorkflow([foo_node, exception_node])
    exception_executor = RayExecutor(exception_workflow, [{}])
    executor = RayExecutor(workflow,
                           execution_kwargs[:-1])  # removes args for exception

    result_preexception = executor.run()
    with pytest.raises(
        (ray.exceptions.TaskCancelledError, ray.exceptions.RayTaskError)):
        exception_executor.run()
    result_postexception = executor.run()

    assert [res.outputs for res in result_preexception
            ] == [res.outputs for res in result_postexception]
Пример #27
0
    def test_execution_stats(self):
        task = ExampleTask()

        workflow = EOWorkflow(dependencies=[
            Dependency(task=task, inputs=[]),
        ])

        execution_args = [
            {'arg1': 1},
            {'arg1': 2}
        ]

        with tempfile.TemporaryDirectory() as tmpdirname:
            executor = EOExecutor(workflow, execution_args, file_path=tmpdirname)
            executor.run()

            self.assertEqual(len(executor.execution_stats), 2)
Пример #28
0
 def test_linear_workflow(self):
     in_task = InputTask()
     inc_task = Inc()
     pow_task = Pow()
     eow = EOWorkflow.make_linear_workflow(in_task, inc_task, pow_task)
     res = eow.execute({
         in_task: {
             'val': 2
         },
         inc_task: {
             'd': 2
         },
         pow_task: {
             'n': 3
         }
     })
     self.assertEqual(res[pow_task], (2 + 2)**3)
Пример #29
0
def test_get_node_with_uid():
    in_node = EONode(InputTask())
    inc_node = EONode(IncTask(), inputs=[in_node])
    output_node = EONode(OutputTask(name="out"), inputs=[inc_node])

    eow = EOWorkflow([in_node, inc_node, output_node])

    assert all(node == eow.get_node_with_uid(node.uid)
               for node in (in_node, inc_node, output_node))
    assert eow.get_node_with_uid("nonexsitant") is None
    with pytest.raises(KeyError):
        eow.get_node_with_uid("nonexsitant", fail_if_missing=True)
Пример #30
0
def test_workflows_reusing_nodes():

    in_node = EONode(InputTask())
    node1 = EONode(IncTask(), inputs=[in_node])
    node2 = EONode(IncTask(), inputs=[node1])
    out_node = EONode(OutputTask(name="out"), inputs=[node2])
    input_args = {in_node: {"val": 2}, node2: {"d": 2}}

    original = EOWorkflow([in_node, node1, node2, out_node])
    node_reuse = EOWorkflow([in_node, node1, node2, out_node])

    assert original.execute(input_args).outputs["out"] == node_reuse.execute(
        input_args).outputs["out"]