def run_task( self, task_id, task_data, incoming_task_id = 'no source task id', incoming_args = {}, incoming_name_type_map = {} ): if task_data.get('paused'): return tasktype = self.get_tasktypes().get( task_data.get('tasktype','not to be found') ) if not tasktype: storage.change(task_id, {'paused':True}) return for name in incoming_name_type_map.keys(): if incoming_name_type_map[name] == 'Args': # we have boxed args. need to unpack these. # Args are something like this: # [{ # 'kw':{} # all incoming args are converted to kw because we know the declaration. # 'kw_types':{} # 'from':'long_id_of_task_runner_instance' # 'to':'long_id_of_task_runner_instance' # }, another one here ] # Args are always unpacked and repacked at each hand-over point. # so there would not be a double-wrapping. kw = incoming_args[name].get('kw') kw_types = incoming_args[name].get('kw_types') del incoming_args[name] del incoming_name_type_map[name] kw.update(incoming_args) kw_types.update(incoming_name_type_map) incoming_args = kw incoming_name_type_map = kw_types outgoing_args = {} Args_packing_name = None for arg in task_data.get('inputs',[]): # arg is [default value, type, name, description] # the rules of args migration: # if consumer's arg definition has non-Null value, it wins # if it's Null, we look for similar name + type in incoming args and use that # Future TODO: if name + type did not match we try matching on "non-native" types (those not including String, Number, Object, Array, Int) # if even 'non-native' type matching did not yield any matches, we use Null value, typename, name = arg[:3] if typename == 'Callback': callback = outgoing_args[name] = bind(self.process_callback, task_id) elif typename == 'Args': # we have to box args. # Args are something like this: # [{ # 'kw':{} # all incoming args are converted to kw because we know the declaration. # 'kw_types':{} # 'from':'long_id_of_task_runner_instance' # 'to':'long_id_of_task_runner_instance' # }, another one here ] # Args are always unpacked and repacked at each hand-over point. # so there would not be a double-wrapping. # long id is a combination of TaskHive ID + Task Run record ID. outgoing_args[name] = { 'kw': incoming_args , 'kw_types': incoming_name_type_map , 'from': incoming_task_id + '@' + self.id , 'to': task_id + '@' + self.id } elif value == None and typename == incoming_name_type_map.get(name,'not to be matched'): # nice! arg name+type matches to incoming outgoing_args[name] = incoming_args.get(name) else: # either it's non-null, which is a proper outcome, # or it's null, but we are too lazy to do better matching and settle on default Null # either way, # using default value outgoing_args[name] = value # this is used for shutting long-running tasks down if hasattr( tasktype, 'stop' ) and 'callback' in outgoing_args: args_passed = outgoing_args.copy() del args_passed['callback'] # _running... is Weakref dictionary. When last hard ref to # callback goes, so goes the _running.. entry. self._running_tasks[callback] = [ task_id, args_passed ] self._running_tasks_map[task_id] = callback logging.debug(HELLO + "Running '%s' task's runner for TaskHive '%s'" % (task_id, self.id)) th = threading.Thread( target = tasktype.run , kwargs = outgoing_args ) th.daemon = True th.start()
def run_task(self, task_id, task_data, incoming_task_id='no source task id', incoming_args={}, incoming_name_type_map={}): if task_data.get('paused'): return tasktype = self.get_tasktypes().get( task_data.get('tasktype', 'not to be found')) if not tasktype: storage.change(task_id, {'paused': True}) return for name in incoming_name_type_map.keys(): if incoming_name_type_map[name] == 'Args': # we have boxed args. need to unpack these. # Args are something like this: # [{ # 'kw':{} # all incoming args are converted to kw because we know the declaration. # 'kw_types':{} # 'from':'long_id_of_task_runner_instance' # 'to':'long_id_of_task_runner_instance' # }, another one here ] # Args are always unpacked and repacked at each hand-over point. # so there would not be a double-wrapping. kw = incoming_args[name].get('kw') kw_types = incoming_args[name].get('kw_types') del incoming_args[name] del incoming_name_type_map[name] kw.update(incoming_args) kw_types.update(incoming_name_type_map) incoming_args = kw incoming_name_type_map = kw_types outgoing_args = {} Args_packing_name = None for arg in task_data.get('inputs', []): # arg is [default value, type, name, description] # the rules of args migration: # if consumer's arg definition has non-Null value, it wins # if it's Null, we look for similar name + type in incoming args and use that # Future TODO: if name + type did not match we try matching on "non-native" types (those not including String, Number, Object, Array, Int) # if even 'non-native' type matching did not yield any matches, we use Null value, typename, name = arg[:3] if typename == 'Callback': callback = outgoing_args[name] = bind(self.process_callback, task_id) elif typename == 'Args': # we have to box args. # Args are something like this: # [{ # 'kw':{} # all incoming args are converted to kw because we know the declaration. # 'kw_types':{} # 'from':'long_id_of_task_runner_instance' # 'to':'long_id_of_task_runner_instance' # }, another one here ] # Args are always unpacked and repacked at each hand-over point. # so there would not be a double-wrapping. # long id is a combination of TaskHive ID + Task Run record ID. outgoing_args[name] = { 'kw': incoming_args, 'kw_types': incoming_name_type_map, 'from': incoming_task_id + '@' + self.id, 'to': task_id + '@' + self.id } elif value == None and typename == incoming_name_type_map.get( name, 'not to be matched'): # nice! arg name+type matches to incoming outgoing_args[name] = incoming_args.get(name) else: # either it's non-null, which is a proper outcome, # or it's null, but we are too lazy to do better matching and settle on default Null # either way, # using default value outgoing_args[name] = value # this is used for shutting long-running tasks down if hasattr(tasktype, 'stop') and 'callback' in outgoing_args: args_passed = outgoing_args.copy() del args_passed['callback'] # _running... is Weakref dictionary. When last hard ref to # callback goes, so goes the _running.. entry. self._running_tasks[callback] = [task_id, args_passed] self._running_tasks_map[task_id] = callback logging.debug(HELLO + "Running '%s' task's runner for TaskHive '%s'" % (task_id, self.id)) th = threading.Thread(target=tasktype.run, kwargs=outgoing_args) th.daemon = True th.start()
def process_callback(self, task_id, *args, **kw): ''' This converts output arguments (those the done task pushed to the callback) into input arguments for each of the to-be-called-next task. ''' #logging.debug(HELLO + "Processing callback on hive '%s' for args '%s'" % (self.id, args)) donetask = storage.get(task_id) if not donetask: # which may happen if task was removed from the roster while it was running return # tasktypes = self.get_tasktypes() # donetasktype = tasktypes.get(donetask['type']) # if not donetasktype: # # kinda hard to imagine, but, heck.. # return consumers = [[consumer_id, storage.get(consumer_id)] for consumer_id in donetask.get('consumers',[])] if not consumers: return # Preassemblying input args. # in python, as long as you don't splat-collect args in the function, # all positional args can be pulled into dictionary and # applied to the function as named args. # we rely on that here. # in other words, DO NOT USE SPLAT ("*args, **kw") in Task run() declaration. incoming_args_def = donetask.get('outputs',[]) incoming_name_type_map = {} incoming_args = {} for arg in incoming_args_def: # arg is [default value, type, name, description] value, typename, name = arg[:3] if name in kw: incoming_args[name] = kw[name] incoming_name_type_map[name] = typename for arg_position in xrange(min( len(args), len(incoming_args_def) )): value, typename, name = incoming_args_def[arg_position][:3] # [default value, type, name, description] incoming_args[name] = args[arg_position] incoming_name_type_map[name] = typename # we allow task runners to push back updates to task records. # web server task can push back update to task label with # port number that was autopicked by the server. etc. updateargtype = 'MetadataUpdate' updateargname = updateargtype.lower() if updateargname in incoming_args and \ incoming_name_type_map[updateargname] == updateargtype: storage.change(donetask.id, incoming_args[updateargname]) del incoming_args[updateargname] for consumer in consumers: # consumer is an array [id, metadata object] if consumer[1]: self.run_task( consumer[0] , consumer[1] , task_id , incoming_args , incoming_name_type_map )
def process_callback(self, task_id, *args, **kw): ''' This converts output arguments (those the done task pushed to the callback) into input arguments for each of the to-be-called-next task. ''' #logging.debug(HELLO + "Processing callback on hive '%s' for args '%s'" % (self.id, args)) donetask = storage.get(task_id) if not donetask: # which may happen if task was removed from the roster while it was running return # tasktypes = self.get_tasktypes() # donetasktype = tasktypes.get(donetask['type']) # if not donetasktype: # # kinda hard to imagine, but, heck.. # return consumers = [[consumer_id, storage.get(consumer_id)] for consumer_id in donetask.get('consumers', [])] if not consumers: return # Preassemblying input args. # in python, as long as you don't splat-collect args in the function, # all positional args can be pulled into dictionary and # applied to the function as named args. # we rely on that here. # in other words, DO NOT USE SPLAT ("*args, **kw") in Task run() declaration. incoming_args_def = donetask.get('outputs', []) incoming_name_type_map = {} incoming_args = {} for arg in incoming_args_def: # arg is [default value, type, name, description] value, typename, name = arg[:3] if name in kw: incoming_args[name] = kw[name] incoming_name_type_map[name] = typename for arg_position in xrange(min(len(args), len(incoming_args_def))): value, typename, name = incoming_args_def[arg_position][:3] # [default value, type, name, description] incoming_args[name] = args[arg_position] incoming_name_type_map[name] = typename # we allow task runners to push back updates to task records. # web server task can push back update to task label with # port number that was autopicked by the server. etc. updateargtype = 'MetadataUpdate' updateargname = updateargtype.lower() if updateargname in incoming_args and \ incoming_name_type_map[updateargname] == updateargtype: storage.change(donetask.id, incoming_args[updateargname]) del incoming_args[updateargname] for consumer in consumers: # consumer is an array [id, metadata object] if consumer[1]: self.run_task(consumer[0], consumer[1], task_id, incoming_args, incoming_name_type_map)