def test_unordered_flow_stringy(self): f = uf.Flow('test') expected = 'unordered_flow.Flow: test(len=0)' self.assertEqual(expected, str(f)) task1 = _task(name='task1') task2 = _task(name='task2') task3 = _task(name='task3') f = uf.Flow('test') f.add(task1, task2, task3) expected = 'unordered_flow.Flow: test(len=3)' self.assertEqual(expected, str(f))
def test_retry_in_unordered_flow_with_tasks(self): c = retry.AlwaysRevert("c") a, b = test_utils.make_many(2) flo = uf.Flow("test", c).add(a, b) g = _replicate_graph_with_names( compiler.PatternCompiler(flo).compile()) self.assertEqual(5, len(g)) self.assertItemsEqual(g.edges(data=True), [ ('test', 'c', { 'invariant': True }), ('c', 'a', { 'invariant': True, 'retry': True }), ('c', 'b', { 'invariant': True, 'retry': True }), ('b', 'test[$]', { 'invariant': True }), ('a', 'test[$]', { 'invariant': True }), ]) self.assertItemsEqual(['test'], list(g.no_predecessors_iter())) self.assertItemsEqual(['test[$]'], list(g.no_successors_iter())) self.assertIs(c, g.node['a']['retry']) self.assertIs(c, g.node['b']['retry'])
def test_unordered_flow_two_tasks_reverse_order(self): task1 = _task(name='task1', provides=['a']) task2 = _task(name='task2', requires=['a']) f = uf.Flow('test').add(task2).add(task1) self.assertEqual(2, len(f)) self.assertEqual(set(['a']), f.requires) self.assertEqual(set(['a']), f.provides)
def test_iter_links(self): task1 = _task(name='task1', provides=['a', 'b']) task2 = _task(name='task2', provides=['a', 'c']) f = uf.Flow('test') f.add(task2, task1) for (u, v, data) in f.iter_links(): raise AssertionError('links iterator should be empty')
def test_nested_flows_requirements(self): flow = uf.Flow('uf').add( lf.Flow('lf').add( utils.TaskOneArgOneReturn('task1', rebind=['a'], provides=['x']), utils.TaskOneArgOneReturn('task2', provides=['y'])), uf.Flow('uf').add( utils.TaskOneArgOneReturn('task3', rebind=['b'], provides=['z']), utils.TaskOneArgOneReturn('task4', rebind=['c'], provides=['q']))) self.assertEqual(set(['a', 'b', 'c']), flow.requires) self.assertEqual(set(['x', 'y', 'z', 'q']), flow.provides)
def test_unordered_flow_provides_required_value_other_call(self): flow = uf.Flow('uf') flow.add(utils.TaskOneArg('task2')) flow.add(utils.TaskOneReturn('task1', provides='x')) self.assertEqual(2, len(flow)) self.assertEqual(set(['x']), flow.provides) self.assertEqual(set(['x']), flow.requires)
def test_unordered_flow_with_retry_fully_satisfies(self): ret = retry.AlwaysRevert(provides=['b', 'a']) f = uf.Flow('test', ret) f.add(_task(name='task1', requires=['a'])) self.assertIs(f.retry, ret) self.assertEqual('test_retry', ret.name) self.assertEqual(set([]), f.requires) self.assertEqual(set(['b', 'a']), f.provides)
def test_unordered_flow_two_tasks(self): task1 = _task(name='task1') task2 = _task(name='task2') f = uf.Flow('test').add(task1, task2) self.assertEqual(2, len(f)) self.assertEqual(set([task1, task2]), set(f)) self.assertEqual([], list(f.iter_links()))
def test_unordered_flow_multi_provides_and_requires_values(self): flow = uf.Flow('uf').add( utils.TaskMultiArgMultiReturn('task1', rebind=['a', 'b', 'c'], provides=['d', 'e', 'f']), utils.TaskMultiArgMultiReturn('task2', provides=['i', 'j', 'k'])) self.assertEqual(set(['a', 'b', 'c', 'x', 'y', 'z']), flow.requires) self.assertEqual(set(['d', 'e', 'f', 'i', 'j', 'k']), flow.provides)
def test_unordered_flow_with_retry(self): ret = retry.AlwaysRevert(requires=['a'], provides=['b']) f = uf.Flow('test', ret) self.assertIs(f.retry, ret) self.assertEqual('test_retry', ret.name) self.assertEqual(set(['a']), f.requires) self.assertEqual(set(['b']), f.provides)
def calculate(engine_conf): # Subdivide the work into X pieces, then request each worker to calculate # one of those chunks and then later we will write these chunks out to # an image bitmap file. # And unordered flow is used here since the mandelbrot calculation is an # example of an embarrassingly parallel computation that we can scatter # across as many workers as possible. flow = uf.Flow("mandelbrot") # These symbols will be automatically given to tasks as input to their # execute method, in this case these are constants used in the mandelbrot # calculation. store = { 'mandelbrot_config': [-2.0, 1.0, -1.0, 1.0, MAX_ITERATIONS], 'image_config': { 'size': IMAGE_SIZE, } } # We need the task names to be in the right order so that we can extract # the final results in the right order (we don't care about the order when # executing). task_names = [] # Compose our workflow. height, _width = IMAGE_SIZE chunk_size = int(math.ceil(height / float(CHUNK_COUNT))) for i in compat_range(0, CHUNK_COUNT): chunk_name = 'chunk_%s' % i task_name = "calculation_%s" % i # Break the calculation up into chunk size pieces. rows = [i * chunk_size, i * chunk_size + chunk_size] flow.add( MandelCalculator( task_name, # This ensures the storage symbol with name # 'chunk_name' is sent into the tasks local # symbol 'chunk'. This is how we give each # calculator its own correct sequence of rows # to work on. rebind={'chunk': chunk_name})) store[chunk_name] = rows task_names.append(task_name) # Now execute it. eng = engines.load(flow, store=store, engine_conf=engine_conf) eng.run() # Gather all the results and order them for further processing. gather = [] for name in task_names: gather.extend(eng.storage.get(name)) points = [] for y, row in enumerate(gather): for x, color in enumerate(row): points.append(((x, y), color)) return points
def test_iter_nodes(self): task1 = _task(name='task1', provides=['a', 'b']) task2 = _task(name='task2', provides=['a', 'c']) tasks = set([task1, task2]) f = uf.Flow('test') f.add(task2, task1) for (node, data) in f.iter_nodes(): self.assertTrue(node in tasks) self.assertDictEqual({}, data)
def test_unordered_flow_starts_as_empty(self): f = uf.Flow('test') self.assertEqual(0, len(f)) self.assertEqual([], list(f)) self.assertEqual([], list(f.iter_links())) self.assertEqual(set(), f.requires) self.assertEqual(set(), f.provides)
def test_no_visible(self): r = uf.Flow("root") atoms = [] for i in range(0, 10): atoms.append(test_utils.TaskOneReturn("root.%s" % i)) r.add(*atoms) c = compiler.PatternCompiler(r).compile() for a in atoms: self.assertEqual([], _get_scopes(c, a))
def test_unordered_flow_retry_and_task(self): flow = uf.Flow( 'uf', retry.AlwaysRevert('rt', requires=['x', 'y'], provides=['a', 'b'])) flow.add( utils.TaskMultiArgOneReturn(rebind=['a', 'x', 'c'], provides=['z'])) self.assertEqual(set(['x', 'y', 'c']), flow.requires) self.assertEqual(set(['a', 'b', 'z']), flow.provides)
def test_unordered_flow_one_task(self): f = uf.Flow('test') task = _task(name='task1', requires=['a', 'b'], provides=['c', 'd']) result = f.add(task) self.assertIs(f, result) self.assertEqual(1, len(f)) self.assertEqual([task], list(f)) self.assertEqual([], list(f.iter_links())) self.assertEqual(set(['a', 'b']), f.requires) self.assertEqual(set(['c', 'd']), f.provides)
def make_flow(table): # This creation will allow for parallel computation (since the flow here # is specifically unordered; and when things are unordered they have # no dependencies and when things have no dependencies they can just be # ran at the same time, limited in concurrency by the executor or max # workers of that executor...) f = uf.Flow("root") for i, row in enumerate(table): f.add(RowMultiplier("m-%s" % i, i, row, MULTIPLER)) # NOTE(harlowja): at this point nothing has ran, the above is just # defining what should be done (but not actually doing it) and associating # an ordering dependencies that should be enforced (the flow pattern used # forces this), the engine in the later main() function will actually # perform this work... return f
def test_unordered_nested_in_linear(self): a, b, c, d = test_utils.make_many(4) inner_flo = uf.Flow('ut').add(b, c) flo = lf.Flow('lt').add(a, inner_flo, d) g = _replicate_graph_with_names( compiler.PatternCompiler(flo).compile()) self.assertEqual(8, len(g)) self.assertItemsEqual(g.edges(), [ ('lt', 'a'), ('a', 'ut'), ('ut', 'b'), ('ut', 'c'), ('b', 'ut[$]'), ('c', 'ut[$]'), ('ut[$]', 'd'), ('d', 'lt[$]'), ])
def test_unordered(self): a, b, c, d = test_utils.make_many(4) flo = uf.Flow("test") flo.add(a, b, c, d) g = _replicate_graph_with_names( compiler.PatternCompiler(flo).compile()) self.assertEqual(6, len(g)) self.assertItemsEqual(g.edges(), [ ('test', 'a'), ('test', 'b'), ('test', 'c'), ('test', 'd'), ('a', 'test[$]'), ('b', 'test[$]'), ('c', 'test[$]'), ('d', 'test[$]'), ]) self.assertEqual(set(['test']), set(g.no_predecessors_iter()))
def run(**store): # Creates a flow, each task in the flow will examine the kwargs passed in # here and based on those kwargs it will behave in a different manner # while executing; this allows for the calling code (see below) to show # different usages of the failure catching and handling mechanism. flow = uf.Flow('flow').add(FirstTask(), SecondTask()) try: with utils.wrap_all_failures(): zag.engines.run(flow, store=store, engine='parallel') except exceptions.WrappedFailure as ex: unknown_failures = [] for a_failure in ex: if a_failure.check(FirstException): print("Got FirstException: %s" % a_failure.exception_str) elif a_failure.check(SecondException): print("Got SecondException: %s" % a_failure.exception_str) else: print("Unknown failure: %s" % a_failure) unknown_failures.append(a_failure) failure.Failure.reraise_if_any(unknown_failures)
def test_unordered_nested(self): a, b, c, d = test_utils.make_many(4) flo = uf.Flow("test") flo.add(a, b) flo2 = lf.Flow("test2") flo2.add(c, d) flo.add(flo2) g = _replicate_graph_with_names( compiler.PatternCompiler(flo).compile()) self.assertEqual(8, len(g)) self.assertItemsEqual(g.edges(), [ ('test', 'a'), ('test', 'b'), ('test', 'test2'), ('test2', 'c'), ('c', 'd'), ('d', 'test2[$]'), ('test2[$]', 'test[$]'), ('a', 'test[$]'), ('b', 'test[$]'), ])
def test_linear_unordered_scope(self): r = lf.Flow("root") r_1 = test_utils.TaskOneReturn("root.1") r_2 = test_utils.TaskOneReturn("root.2") r.add(r_1, r_2) u = uf.Flow("subroot") atoms = [] for i in range(0, 5): atoms.append(test_utils.TaskOneReturn("subroot.%s" % i)) u.add(*atoms) r.add(u) r_3 = test_utils.TaskOneReturn("root.3") r.add(r_3) c = compiler.PatternCompiler(r).compile() self.assertEqual([], _get_scopes(c, r_1)) self.assertEqual([['root.1']], _get_scopes(c, r_2)) for a in atoms: self.assertEqual([[], ['root.2', 'root.1']], _get_scopes(c, a)) scope = _get_scopes(c, r_3) self.assertEqual(1, len(scope)) first_root = 0 for i, n in enumerate(scope[0]): if n.startswith('root.'): first_root = i break first_subroot = 0 for i, n in enumerate(scope[0]): if n.startswith('subroot.'): first_subroot = i break self.assertGreater(first_subroot, first_root) self.assertEqual(['root.2', 'root.1'], scope[0][-2:])
def test_linear_nested(self): a, b, c, d = test_utils.make_many(4) flo = lf.Flow("test") flo.add(a, b) inner_flo = uf.Flow("test2") inner_flo.add(c, d) flo.add(inner_flo) g = _replicate_graph_with_names( compiler.PatternCompiler(flo).compile()) self.assertEqual(8, len(g)) sub_g = g.subgraph(['a', 'b']) self.assertFalse(sub_g.has_edge('b', 'a')) self.assertTrue(sub_g.has_edge('a', 'b')) self.assertEqual({'invariant': True}, sub_g.get_edge_data("a", "b")) sub_g = g.subgraph(['c', 'd']) self.assertEqual(0, sub_g.number_of_edges()) # This ensures that c and d do not start executing until after b. self.assertTrue(g.has_edge('b', 'test2')) self.assertTrue(g.has_edge('test2', 'c')) self.assertTrue(g.has_edge('test2', 'd'))
def execute(self, output): if self._show_name: print("%s: %s" % (self.name, output)) else: print(output) # This will be the work that we want done, which for this example is just to # print 'hello world' (like a song) using different tasks and different # execution models. song = lf.Flow("beats") # Unordered flows when ran can be ran in parallel; and a chorus is everyone # singing at once of course! hi_chorus = uf.Flow('hello') world_chorus = uf.Flow('world') for (name, hello, world) in [('bob', 'hello', 'world'), ('joe', 'hellooo', 'worllllld'), ('sue', "helloooooo!", 'wooorllld!')]: hi_chorus.add(PrinterTask("%s@hello" % name, # This will show up to the execute() method of # the task as the argument named 'output' (which # will allow us to print the character we want). inject={'output': hello})) world_chorus.add(PrinterTask("%s@world" % name, inject={'output': world})) # The composition starts with the conductor and then runs in sequence with # the chorus running in parallel, but no matter what the 'hello' chorus must # always run before the 'world' chorus (otherwise the world will fall apart).
def test_unordered_flow_retry_two_tasks_provide_same_value(self): flow = uf.Flow('uf', retry.AlwaysRevert('rt', provides=['y'])) flow.add(utils.TaskOneReturn('t1', provides=['x']), utils.TaskOneReturn('t2', provides=['x'])) self.assertEqual(set(['x', 'y']), flow.provides)
def test_unordered_flow_without_dependencies(self): flow = uf.Flow('uf').add(utils.TaskNoRequiresNoReturns('task1'), utils.TaskNoRequiresNoReturns('task2')) self.assertEqual(set(), flow.requires) self.assertEqual(set(), flow.provides)
def test_unordered_flow_two_task_same_provide(self): task1 = _task(name='task1', provides=['a', 'b']) task2 = _task(name='task2', provides=['a', 'c']) f = uf.Flow('test') f.add(task2, task1) self.assertEqual(2, len(f))
# Upper bound of numbers to sum for example purposes... UPPER_BOUND = 10000 # How many mappers we want to have. SPLIT = 10 # How big of a chunk we want to give each mapper. CHUNK_SIZE = UPPER_BOUND // SPLIT # This will be the workflow we will compose and run. w = linear_flow.Flow("root") # The mappers will run in parallel. store = {} provided = [] mappers = unordered_flow.Flow('map') for i, chunk in enumerate(chunk_iter(CHUNK_SIZE, UPPER_BOUND)): mapper_name = 'mapper_%s' % i # Give that mapper some information to compute. store[mapper_name] = chunk # The reducer uses all of the outputs of the mappers, so it needs # to be recorded that it needs access to them (under a specific name). provided.append("reduction_%s" % i) mappers.add( SumMapper(name=mapper_name, rebind={'inputs': mapper_name}, provides=provided[-1])) w.add(mappers) # The reducer will run last (after all the mappers). w.add(TotalReducer('reducer', requires=provided))
def test_retry_in_unordered_flow(self): flo = uf.Flow("test", retry.AlwaysRevert("c")) compilation = compiler.PatternCompiler(flo).compile() self.assertEqual(3, len(compilation.execution_graph)) self.assertEqual(2, compilation.execution_graph.number_of_edges())
def test_unordered_flow_add_nothing(self): f = uf.Flow('test') result = f.add() self.assertIs(f, result) self.assertEqual(0, len(f))