def _get_task_parents(self, taskid): parents = [] for tid in self._tasks: parenttask = storage.get(tid) if 'consumers' in parenttask and taskid in parenttask['consumers']: parents.append(tid) return parents
def start(self): # 1. Determine the order of tasks # 2. Spin up first task, passing # a. Args from storage # b. and callback to what to do after it's done. # Task runner positions recorded in Hive roster don't have "require" or "depends on" # What is stored along with Tasks's ID is a list of "consumers" - # 'I hand over control to these task IDs after I am done' # This is done so that when a long-running task completes, and its rules # were rewritten while it ran, instead of rescanning the whole task tree # and figuring out who now depends on it, the task's locker at this Hive will # have a list of who to talk to next. # The Task.REQUIRE list burned into each task should not bother us now, # as that is a list of other components this task "needs to be around in general" # I would assume these were ran by tasks initializer at the start of server run, # Once these are *inited* in order, they should be designed to be *ran()* async # But, for first run, we need the opposite data - which of the tasks don't have parents. # we start them all simultaneously logging.debug(HELLO + "Initing TaskHive %s" % self.id) parent_count = {} task_ids = storage.get(self.id).get('tasks', []) # this loop marks task runners that depend on other runners # we also use the looping opportunity to set up Strorage event watches. for task_id in task_ids: self._add_to_watched_tasks(task_id) for consumer in storage.get(task_id, {}).get('consumers', []): parent_count[consumer] = parent_count.get(consumer, 0) + 1 already_running = self._running_tasks_map.keys() # here we run only those of them that do NOT have parents or are not already running. for task_id in task_ids: task_data = storage.get(task_id) if task_data and not parent_count.get( task_id) and task_id not in already_running: self.run_task(task_id, task_data)
def stop(self): for callback in self._running_tasks.keys(): if callback: task_id, kw = self._running_tasks.get( callback, ['', {}] ) tasktype = self.get_tasktypes().get(storage.get(task_id, {}).get('tasktype','not to be found')) if tasktype and hasattr(tasktype, 'stop'): kw['callback'] = callback try: tasktype.stop(**kw) except Exception as ex: logging.debug(HELLO + "Stopping task '' errored out with ''" % (tasktype.ID, ex.message))
def start(self): # 1. Determine the order of tasks # 2. Spin up first task, passing # a. Args from storage # b. and callback to what to do after it's done. # Task runner positions recorded in Hive roster don't have "require" or "depends on" # What is stored along with Tasks's ID is a list of "consumers" - # 'I hand over control to these task IDs after I am done' # This is done so that when a long-running task completes, and its rules # were rewritten while it ran, instead of rescanning the whole task tree # and figuring out who now depends on it, the task's locker at this Hive will # have a list of who to talk to next. # The Task.REQUIRE list burned into each task should not bother us now, # as that is a list of other components this task "needs to be around in general" # I would assume these were ran by tasks initializer at the start of server run, # Once these are *inited* in order, they should be designed to be *ran()* async # But, for first run, we need the opposite data - which of the tasks don't have parents. # we start them all simultaneously logging.debug(HELLO + "Initing TaskHive %s" % self.id) parent_count = {} task_ids = storage.get(self.id).get('tasks', []) # this loop marks task runners that depend on other runners # we also use the looping opportunity to set up Strorage event watches. for task_id in task_ids: self._add_to_watched_tasks(task_id) for consumer in storage.get(task_id,{}).get('consumers',[]): parent_count[consumer] = parent_count.get(consumer, 0) + 1 already_running = self._running_tasks_map.keys() # here we run only those of them that do NOT have parents or are not already running. for task_id in task_ids: task_data = storage.get(task_id) if task_data and not parent_count.get(task_id) and task_id not in already_running: self.run_task(task_id, task_data)
def _hive_tasks_change_handler(self): # we store a local copy of tasks roster between handler runs. # this is done so that repeat, or snoball calls don't restart # tasks needlessly. # here we only handle additions and removals of tasks. # individual task state changes are handled by other code. oldtasks = self._tasks self._tasks = newtasks = set( storage.get(self.id).get('tasks', []) ) removed = oldtasks.difference(newtasks) added = newtasks.difference(oldtasks) for taskid in removed: with self._taskswatchers_lock: if taskid in self._taskswatchers: del self._taskswatchers[taskid] # callback goes poof on the other end. self.stop_task(taskid) for taskid in added: self._add_to_watched_tasks(taskid) if not self._get_task_parents(taskid) and not storage.get(taskid,{}).get('paused'): self.run_task(taskid, storage.get(taskid))
def _hive_tasks_change_handler(self): # we store a local copy of tasks roster between handler runs. # this is done so that repeat, or snoball calls don't restart # tasks needlessly. # here we only handle additions and removals of tasks. # individual task state changes are handled by other code. oldtasks = self._tasks self._tasks = newtasks = set(storage.get(self.id).get('tasks', [])) removed = oldtasks.difference(newtasks) added = newtasks.difference(oldtasks) for taskid in removed: with self._taskswatchers_lock: if taskid in self._taskswatchers: del self._taskswatchers[ taskid] # callback goes poof on the other end. self.stop_task(taskid) for taskid in added: self._add_to_watched_tasks(taskid) if not self._get_task_parents(taskid) and not storage.get( taskid, {}).get('paused'): self.run_task(taskid, storage.get(taskid))
def stop(self): for callback in self._running_tasks.keys(): if callback: task_id, kw = self._running_tasks.get(callback, ['', {}]) tasktype = self.get_tasktypes().get( storage.get(task_id, {}).get('tasktype', 'not to be found')) if tasktype and hasattr(tasktype, 'stop'): kw['callback'] = callback try: tasktype.stop(**kw) except Exception as ex: logging.debug(HELLO + "Stopping task '' errored out with ''" % (tasktype.ID, ex.message))
def stop_task(self, taskid): was_running = False callback = self._running_tasks_map.get(taskid) if callback: stored_taskid, kw = self._running_tasks.get(callback, ['', {}]) tasktype = self.get_tasktypes().get(storage.get(stored_taskid, {}).get('tasktype','not to be found')) if tasktype and hasattr(tasktype, 'stop'): kw['callback'] = callback try: tasktype.stop(**kw) was_running = True except Exception as ex: logging.debug(HELLO + "Stopping task '' errored out with ''" % (tasktype.ID, ex.message)) return was_running
def stop_task(self, taskid): was_running = False callback = self._running_tasks_map.get(taskid) if callback: stored_taskid, kw = self._running_tasks.get(callback, ['', {}]) tasktype = self.get_tasktypes().get( storage.get(stored_taskid, {}).get('tasktype', 'not to be found')) if tasktype and hasattr(tasktype, 'stop'): kw['callback'] = callback try: tasktype.stop(**kw) was_running = True except Exception as ex: logging.debug(HELLO + "Stopping task '' errored out with ''" % (tasktype.ID, ex.message)) return was_running
def is_paused(self): return bool(storage.get(self.id).get('paused'))
def process_callback(self, task_id, *args, **kw): ''' This converts output arguments (those the done task pushed to the callback) into input arguments for each of the to-be-called-next task. ''' #logging.debug(HELLO + "Processing callback on hive '%s' for args '%s'" % (self.id, args)) donetask = storage.get(task_id) if not donetask: # which may happen if task was removed from the roster while it was running return # tasktypes = self.get_tasktypes() # donetasktype = tasktypes.get(donetask['type']) # if not donetasktype: # # kinda hard to imagine, but, heck.. # return consumers = [[consumer_id, storage.get(consumer_id)] for consumer_id in donetask.get('consumers',[])] if not consumers: return # Preassemblying input args. # in python, as long as you don't splat-collect args in the function, # all positional args can be pulled into dictionary and # applied to the function as named args. # we rely on that here. # in other words, DO NOT USE SPLAT ("*args, **kw") in Task run() declaration. incoming_args_def = donetask.get('outputs',[]) incoming_name_type_map = {} incoming_args = {} for arg in incoming_args_def: # arg is [default value, type, name, description] value, typename, name = arg[:3] if name in kw: incoming_args[name] = kw[name] incoming_name_type_map[name] = typename for arg_position in xrange(min( len(args), len(incoming_args_def) )): value, typename, name = incoming_args_def[arg_position][:3] # [default value, type, name, description] incoming_args[name] = args[arg_position] incoming_name_type_map[name] = typename # we allow task runners to push back updates to task records. # web server task can push back update to task label with # port number that was autopicked by the server. etc. updateargtype = 'MetadataUpdate' updateargname = updateargtype.lower() if updateargname in incoming_args and \ incoming_name_type_map[updateargname] == updateargtype: storage.change(donetask.id, incoming_args[updateargname]) del incoming_args[updateargname] for consumer in consumers: # consumer is an array [id, metadata object] if consumer[1]: self.run_task( consumer[0] , consumer[1] , task_id , incoming_args , incoming_name_type_map )
def process_callback(self, task_id, *args, **kw): ''' This converts output arguments (those the done task pushed to the callback) into input arguments for each of the to-be-called-next task. ''' #logging.debug(HELLO + "Processing callback on hive '%s' for args '%s'" % (self.id, args)) donetask = storage.get(task_id) if not donetask: # which may happen if task was removed from the roster while it was running return # tasktypes = self.get_tasktypes() # donetasktype = tasktypes.get(donetask['type']) # if not donetasktype: # # kinda hard to imagine, but, heck.. # return consumers = [[consumer_id, storage.get(consumer_id)] for consumer_id in donetask.get('consumers', [])] if not consumers: return # Preassemblying input args. # in python, as long as you don't splat-collect args in the function, # all positional args can be pulled into dictionary and # applied to the function as named args. # we rely on that here. # in other words, DO NOT USE SPLAT ("*args, **kw") in Task run() declaration. incoming_args_def = donetask.get('outputs', []) incoming_name_type_map = {} incoming_args = {} for arg in incoming_args_def: # arg is [default value, type, name, description] value, typename, name = arg[:3] if name in kw: incoming_args[name] = kw[name] incoming_name_type_map[name] = typename for arg_position in xrange(min(len(args), len(incoming_args_def))): value, typename, name = incoming_args_def[arg_position][:3] # [default value, type, name, description] incoming_args[name] = args[arg_position] incoming_name_type_map[name] = typename # we allow task runners to push back updates to task records. # web server task can push back update to task label with # port number that was autopicked by the server. etc. updateargtype = 'MetadataUpdate' updateargname = updateargtype.lower() if updateargname in incoming_args and \ incoming_name_type_map[updateargname] == updateargtype: storage.change(donetask.id, incoming_args[updateargname]) del incoming_args[updateargname] for consumer in consumers: # consumer is an array [id, metadata object] if consumer[1]: self.run_task(consumer[0], consumer[1], task_id, incoming_args, incoming_name_type_map)