def test_checks_for_dups_globally(self): flo = gf.Flow("test").add( gf.Flow("int1").add(test_utils.DummyTask(name="a")), gf.Flow("int2").add(test_utils.DummyTask(name="a"))) e = engines.load(flo) self.assertRaisesRegex(exc.Duplicate, '^Atoms with duplicate names', e.compile)
def test_formatted_via_listener(self, mock_format_node): mock_format_node.return_value = 'A node' flo = self._make_test_flow() e = engines.load(flo) with logging_listener.DynamicLoggingListener(e): self.assertRaises(RuntimeError, e.run) self.assertTrue(mock_format_node.called)
def calculate(engine_conf): # Subdivide the work into X pieces, then request each worker to calculate # one of those chunks and then later we will write these chunks out to # an image bitmap file. # And unordered flow is used here since the mandelbrot calculation is an # example of an embarrassingly parallel computation that we can scatter # across as many workers as possible. flow = uf.Flow("mandelbrot") # These symbols will be automatically given to tasks as input to their # execute method, in this case these are constants used in the mandelbrot # calculation. store = { 'mandelbrot_config': [-2.0, 1.0, -1.0, 1.0, MAX_ITERATIONS], 'image_config': { 'size': IMAGE_SIZE, } } # We need the task names to be in the right order so that we can extract # the final results in the right order (we don't care about the order when # executing). task_names = [] # Compose our workflow. height, _width = IMAGE_SIZE chunk_size = int(math.ceil(height / float(CHUNK_COUNT))) for i in compat_range(0, CHUNK_COUNT): chunk_name = 'chunk_%s' % i task_name = "calculation_%s" % i # Break the calculation up into chunk size pieces. rows = [i * chunk_size, i * chunk_size + chunk_size] flow.add( MandelCalculator( task_name, # This ensures the storage symbol with name # 'chunk_name' is sent into the tasks local # symbol 'chunk'. This is how we give each # calculator its own correct sequence of rows # to work on. rebind={'chunk': chunk_name})) store[chunk_name] = rows task_names.append(task_name) # Now execute it. eng = engines.load(flow, store=store, engine_conf=engine_conf) eng.run() # Gather all the results and order them for further processing. gather = [] for name in task_names: gather.extend(eng.storage.get(name)) points = [] for y, row in enumerate(gather): for x, color in enumerate(row): points.append(((x, y), color)) return points
def run(engine_options): flow = lf.Flow('simple-linear').add( utils.TaskOneArgOneReturn(provides='result1'), utils.TaskMultiArgOneReturn(provides='result2')) eng = engines.load(flow, store=dict(x=111, y=222, z=333), engine='worker-based', **engine_options) eng.run() return eng.storage.fetch_all()
def test_exc_info_format(self): flo = self._make_test_flow() e = engines.load(flo) self.assertRaises(RuntimeError, e.run) fails = e.storage.get_execute_failures() self.assertEqual(1, len(fails)) self.assertIn('Broken', fails) fail = fails['Broken'] f = formatters.FailureFormatter(e) (exc_info, details) = f.format(fail, self._broken_atom_matcher) self.assertEqual(3, len(exc_info)) self.assertEqual("", details)
def main(): if len(sys.argv) == 2: tbl = [] with open(sys.argv[1], 'rb') as fh: reader = csv.reader(fh) for row in reader: tbl.append([float(r) if r else 0.0 for r in row]) else: # Make some random table out of thin air... tbl = [] cols = random.randint(1, 100) rows = random.randint(1, 100) for _i in compat_range(0, rows): row = [] for _j in compat_range(0, cols): row.append(random.random()) tbl.append(row) # Generate the work to be done. f = make_flow(tbl) # Now run it (using the specified executor)... try: executor = futurist.GreenThreadPoolExecutor(max_workers=5) except RuntimeError: # No eventlet currently active, use real threads instead. executor = futurist.ThreadPoolExecutor(max_workers=5) try: e = engines.load(f, engine='parallel', executor=executor) for st in e.run_iter(): print(st) finally: executor.shutdown() # Find the old rows and put them into place... # # TODO(harlowja): probably easier just to sort instead of search... computed_tbl = [] for i in compat_range(0, len(tbl)): for t in f: if t.index == i: computed_tbl.append(e.storage.get(t.name)) # Do some basic validation (which causes the return code of this process # to be different if things were not as expected...) if len(computed_tbl) != len(tbl): return 1 else: return 0
def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('--profile', "-p", dest='profile', action='store_true', default=False, help='profile instead of gather timing' ' (default: False)') parser.add_argument('--dummies', "-d", dest='dummies', action='store', type=int, default=100, metavar="<number>", help='how many dummy/no-op tasks to inject' ' (default: 100)') parser.add_argument('--limit', '-l', dest='limit', action='store', type=float, default=100.0, metavar="<number>", help='percentage of profiling output to show' ' (default: 100%%)') args = parser.parse_args() if args.profile: ctx_manager = ProfileIt else: ctx_manager = TimeIt dummy_am = max(0, args.dummies) with ctx_manager("Building linear flow with %s tasks" % dummy_am, args): f = lf.Flow("root") for i in compat_range(0, dummy_am): f.add(DummyTask(name="dummy_%s" % i)) with ctx_manager("Loading", args): e = engines.load(f) with ctx_manager("Compiling", args): e.compile() with ctx_manager("Preparing", args): e.prepare() with ctx_manager("Validating", args): e.validate() with ctx_manager("Running", args): e.run()
def test_exc_info_with_details_format(self, mock_format_node): mock_format_node.return_value = 'A node' flo = self._make_test_flow() e = engines.load(flo) self.assertRaises(RuntimeError, e.run) fails = e.storage.get_execute_failures() self.assertEqual(1, len(fails)) self.assertIn('Broken', fails) fail = fails['Broken'] # Doing this allows the details to be shown... e.storage.set_atom_intention("Broken", states.EXECUTE) f = formatters.FailureFormatter(e) (exc_info, details) = f.format(fail, self._broken_atom_matcher) self.assertEqual(3, len(exc_info)) self.assertTrue(mock_format_node.called)
def test_exc_info_with_details_format_hidden(self, mock_get_execute): flo = self._make_test_flow() e = engines.load(flo) self.assertRaises(RuntimeError, e.run) fails = e.storage.get_execute_failures() self.assertEqual(1, len(fails)) self.assertIn('Broken', fails) fail = fails['Broken'] # Doing this allows the details to be shown... e.storage.set_atom_intention("Broken", states.EXECUTE) hide_inputs_outputs_of = ['Broken', "Happy-1", "Happy-2"] f = formatters.FailureFormatter( e, hide_inputs_outputs_of=hide_inputs_outputs_of) (exc_info, details) = f.format(fail, self._broken_atom_matcher) self.assertEqual(3, len(exc_info)) self.assertFalse(mock_get_execute.called)
from zag import engines from zag.listeners import timing from zag.patterns import linear_flow as lf from zag import task # INTRO: in this example we will attach a listener to an engine # and have variable run time tasks run and show how the listener will print # out how long those tasks took (when they started and when they finished). # # This shows how timing metrics can be gathered (or attached onto an engine) # after a workflow has been constructed, making it easy to gather metrics # dynamically for situations where this kind of information is applicable (or # even adding this information on at a later point in the future when your # application starts to slow down). class VariableTask(task.Task): def __init__(self, name): super(VariableTask, self).__init__(name) self._sleepy_time = random.random() def execute(self): time.sleep(self._sleepy_time) f = lf.Flow('root') f.add(VariableTask('a'), VariableTask('b'), VariableTask('c')) e = engines.load(f) with timing.PrintingDurationListener(e): e.run()
song.add(PrinterTask("conductor@begin", show_name=False, inject={'output': "*ding*"}), hi_chorus, world_chorus, PrinterTask("conductor@end", show_name=False, inject={'output': "*dong*"})) # Run in parallel using eventlet green threads... try: import eventlet as _eventlet # noqa except ImportError: # No eventlet currently active, skip running with it... pass else: print("-- Running in parallel using eventlet --") e = engines.load(song, executor='greenthreaded', engine='parallel', max_workers=1) e.run() # Run in parallel using real threads... print("-- Running in parallel using threads --") e = engines.load(song, executor='threaded', engine='parallel', max_workers=1) e.run() # Run in parallel using external processes... print("-- Running in parallel using processes --") e = engines.load(song, executor='processes', engine='parallel', max_workers=1) e.run()
def run(engine_options): reporter = EventReporter() reporter.notifier.register(ANY, event_receiver) flow = lf.Flow('event-reporter').add(reporter) eng = engines.load(flow, engine='worker-based', **engine_options) eng.run()
mappers = unordered_flow.Flow('map') for i, chunk in enumerate(chunk_iter(CHUNK_SIZE, UPPER_BOUND)): mapper_name = 'mapper_%s' % i # Give that mapper some information to compute. store[mapper_name] = chunk # The reducer uses all of the outputs of the mappers, so it needs # to be recorded that it needs access to them (under a specific name). provided.append("reduction_%s" % i) mappers.add( SumMapper(name=mapper_name, rebind={'inputs': mapper_name}, provides=provided[-1])) w.add(mappers) # The reducer will run last (after all the mappers). w.add(TotalReducer('reducer', requires=provided)) # Now go! e = engines.load(w, engine='parallel', store=store, max_workers=4) print("Running a parallel engine with options: %s" % e.options) e.run() # Now get the result the reducer created. total = e.storage.get('reducer') print("Calculated result = %s" % total) # Calculate it manually to verify that it worked... calc_total = sum(range(0, UPPER_BOUND)) if calc_total != total: sys.exit(1)
# Create a custom executor for the engine to use. # # TODO(harlowja): this will be fixed in a future version (so that instead # of doing this an entrypoint can be requested instead). executor_id = uuidutils.generate_uuid() ex = executor.WorkerTaskExecutor( executor_id, exchange, finder_factory, transport=transport, # How long (in seconds) we will wait to find # a worker before timing out the request(s). transition_timeout=60) # Now create some work and run it in a worker engine! f = lf.Flow('dummy') f.add(OneTask('1'), TwoTask('2')) print("Running...") eng = engines.load(f, pu.create_flow_detail(f), executor=ex, engine='workers') for st in eng.run_iter(): print(" -> %s" % st) print('Done!!') print("Results: %s" % eng.storage.fetch_all()) print("Shutting down the worker...") w.stop() w_runner.join() print('Goodbye!!')
backend_uri = "sqlite:///%s" % (persist_path) else: persist_path = os.path.join(tempfile.gettempdir(), "persisting") backend_uri = "file:///%s" % (persist_path) if os.path.exists(persist_path): blowup = False else: blowup = True with eu.get_backend(backend_uri) as backend: # Make a flow that will blow up if the file didn't exist previously, if it # did exist, assume we won't blow up (and therefore this shows the undo # and redo that a flow will go through). book = models.LogBook("my-test") flow = make_flow(blowup=blowup) eu.print_wrapped("Running") try: eng = engines.load(flow, engine='serial', backend=backend, book=book) eng.run() if not blowup: eu.rm_path(persist_path) except Exception: # NOTE(harlowja): don't exit with non-zero status code, so that we can # print the book contents, as well as avoiding exiting also makes the # unit tests (which also runs these examples) pass. traceback.print_exc(file=sys.stdout) eu.print_wrapped("Book contents") print(book.pformat())
# stored) and creating a flow detail (where flow and task state is # stored). The combination of these 2 objects unique ids (uuids) allows # the users of zag to reassociate the workflows that were # potentially running (and which may have partially completed) back # with zag so that those workflows can be resumed (or reverted) # after a process/thread/engine has failed in someway. book = models.LogBook('resume-volume-create') flow_detail = models.FlowDetail("root", uuid=uuidutils.generate_uuid()) book.add(flow_detail) with contextlib.closing(backend.get_connection()) as conn: conn.save_logbook(book) print("!! Your tracking id is: '%s+%s'" % (book.uuid, flow_detail.uuid)) print("!! Please submit this on later runs for tracking purposes") else: flow_detail = find_flow_detail(backend, book_id, flow_id) # Load and run. engine = engines.load(flow, flow_detail=flow_detail, backend=backend, engine='serial') engine.run() # How to use. # # 1. $ python me.py "sqlite:////tmp/cinder.db" # 2. ctrl-c before this finishes # 3. Find the tracking id (search for 'Your tracking id is') # 4. $ python me.py "sqlite:////tmp/cinder.db" "$tracking_id" # 5. Profit!
default_provides = 'a' def execute(self): print("Executing '%s'" % (self.name)) return 'a' class TaskB(task.Task): def execute(self, a): print("Executing '%s'" % (self.name)) print("Got input '%s'" % (a)) print("Constructing...") wf = linear_flow.Flow("pass-from-to") wf.add(TaskA('a'), TaskB('b')) print("Loading...") e = engines.load(wf) print("Compiling...") e.compile() print("Preparing...") e.prepare() print("Running...") e.run() print("Done...")
def execute(self): print("Running '%s' in thread '%s'" % (self.name, tu.get_ident())) time.sleep(self._wait_for) f1 = uf.Flow("f1") f1.add(DelayedTask("f1-1")) f1.add(DelayedTask("f1-2")) f2 = uf.Flow("f2") f2.add(DelayedTask("f2-1")) f2.add(DelayedTask("f2-2")) # Run them all using the same futures (thread-pool based) executor... with futurist.ThreadPoolExecutor() as ex: e1 = engines.load(f1, engine='parallel', executor=ex) e2 = engines.load(f2, engine='parallel', executor=ex) iters = [e1.run_iter(), e2.run_iter()] # Iterate over a copy (so we can remove from the source list). cloned_iters = list(iters) while iters: # Run a single 'step' of each iterator, forcing each engine to perform # some work, then yield, and repeat until each iterator is consumed # and there is no more engine work to be done. for it in cloned_iters: try: six.next(it) except StopIteration: try: iters.remove(it) except ValueError:
# initial 0% and 100% are triggered automatically by the engine when # a task is started and finished (so that's why those are not emitted # here). _PROGRESS_PARTS = [fractions.Fraction("%s/5" % x) for x in range(1, 5)] def execute(self): for p in self._PROGRESS_PARTS: self.update_progress(p) time.sleep(self._DELAY) print("Constructing...") soup = linear_flow.Flow("alphabet-soup") for letter in string.ascii_lowercase: abc = AlphabetTask(letter) abc.notifier.register(task.EVENT_UPDATE_PROGRESS, functools.partial(progress_printer, abc)) soup.add(abc) try: print("Loading...") e = engines.load(soup, engine='parallel', executor='processes') print("Compiling...") e.compile() print("Preparing...") e.prepare() print("Running...") e.run() print("Done: %s" % e.statistics) except exceptions.NotImplementedError as e: print(e)
def allow(history): print(history) return False # Declare our work to be done... r = gf.Flow("root") r_a = DummyTask('r-a') r_b = DummyTask('r-b') r.add(r_a, r_b) r.link(r_a, r_b, decider=allow) # Setup and run the engine layer. e = engines.load(r) e.compile() e.prepare() e.run() print("---------") print("After run") print("---------") backend = e.storage.backend entries = [ os.path.join(backend.memory.root_path, child) for child in backend.memory.ls(backend.memory.root_path) ] while entries: path = entries.pop() value = backend.memory[path]
# Resources (db handles and similar) of course can *not* be persisted so we # need to make sure that we pass this resource fetcher to the tasks constructor # so that the tasks have access to any needed resources (the resources are # lazily loaded so that they are only created when they are used). resources = ResourceFetcher() flow = lf.Flow("initialize-me") # 1. First we extract the api request into a usable format. # 2. Then we go ahead and make a database entry for our request. flow.add(ExtractInputRequest(resources), MakeDBEntry(resources)) # 3. Then we activate our payment method and finally declare success. sub_flow = gf.Flow("after-initialize") sub_flow.add(ActivateDriver(resources), DeclareSuccess()) flow.add(sub_flow) # Initially populate the storage with the following request object, # prepopulating this allows the tasks that dependent on the 'request' variable # to start processing (in this case this is the ExtractInputRequest task). store = { 'request': DummyUser(user="******", id_="1.35"), } eng = engines.load(flow, engine='serial', store=store) # This context manager automatically adds (and automatically removes) a # helpful set of state transition notification printing helper utilities # that show you exactly what transitions the engine is going through # while running the various billing related tasks. with printing.PrintingListener(eng): eng.run()
# task that is created). A name based off the volume id that is to be # created is more easily tied back to the original task so that the # volume create can be resumed/revert, and is much easier to use for # audit and tracking purposes. base_name = reflection.get_callable_name(self) super(VolumeCreator, self).__init__(name="%s-%s" % (base_name, volume_id)) self._volume_id = volume_id def execute(self): print("Making volume %s" % (self._volume_id)) time.sleep(random.random() * MAX_CREATE_TIME) print("Finished making volume %s" % (self._volume_id)) # Assume there is no ordering dependency between volumes. flow = uf.Flow("volume-maker") for i in range(0, VOLUME_COUNT): flow.add(VolumeCreator(volume_id="vol-%s" % (i))) # Show how much time the overall engine loading and running takes. with show_time(name=flow.name.title()): eng = engines.load(flow, engine=engine) # This context manager automatically adds (and automatically removes) a # helpful set of state transition notification printing helper utilities # that show you exactly what transitions the engine is going through # while running the various volume create tasks. with printing.PrintingListener(eng): eng.run()