示例#1
0
	def _get_task_parents(self, taskid):
		parents = []
		for tid in self._tasks:
			parenttask = storage.get(tid)
			if 'consumers' in parenttask and taskid in parenttask['consumers']:
				parents.append(tid)
		return parents
示例#2
0
 def _get_task_parents(self, taskid):
     parents = []
     for tid in self._tasks:
         parenttask = storage.get(tid)
         if 'consumers' in parenttask and taskid in parenttask['consumers']:
             parents.append(tid)
     return parents
示例#3
0
    def start(self):
        # 1. Determine the order of tasks
        # 2. Spin up first task, passing
        #  a. Args from storage
        #  b. and callback to what to do after it's done.

        # Task runner positions recorded in Hive roster don't have "require" or "depends on"
        # What is stored along with Tasks's ID is a list of "consumers" -
        # 'I hand over control to these task IDs after I am done'
        # This is done so that when a long-running task completes, and its rules
        # were rewritten while it ran, instead of rescanning the whole task tree
        # and figuring out who now depends on it, the task's locker at this Hive will
        # have a list of who to talk to next.

        # The Task.REQUIRE list burned into each task should not bother us now,
        # as that is a list of other components this task "needs to be around in general"
        # I would assume these were ran by tasks initializer at the start of server run,
        # Once these are *inited* in order, they should be designed to be *ran()* async

        # But, for first run, we need the opposite data - which of the tasks don't have parents.
        # we start them all simultaneously

        logging.debug(HELLO + "Initing TaskHive %s" % self.id)

        parent_count = {}
        task_ids = storage.get(self.id).get('tasks', [])

        # this loop marks task runners that depend on other runners
        # we also use the looping opportunity to set up Strorage event watches.
        for task_id in task_ids:
            self._add_to_watched_tasks(task_id)

            for consumer in storage.get(task_id, {}).get('consumers', []):
                parent_count[consumer] = parent_count.get(consumer, 0) + 1

        already_running = self._running_tasks_map.keys()
        # here we run only those of them that do NOT have parents or are not already running.
        for task_id in task_ids:
            task_data = storage.get(task_id)
            if task_data and not parent_count.get(
                    task_id) and task_id not in already_running:
                self.run_task(task_id, task_data)
示例#4
0
	def stop(self):
		for callback in self._running_tasks.keys():
			if callback:
				task_id, kw = self._running_tasks.get( callback, ['', {}] )
				tasktype = self.get_tasktypes().get(storage.get(task_id, {}).get('tasktype','not to be found'))
				if tasktype and hasattr(tasktype, 'stop'):
					kw['callback'] = callback
					try:
						tasktype.stop(**kw)
					except Exception as ex:
						logging.debug(HELLO + "Stopping task '' errored out with ''" % (tasktype.ID, ex.message))
示例#5
0
	def start(self):
		# 1. Determine the order of tasks
		# 2. Spin up first task, passing
		#  a. Args from storage
		#  b. and callback to what to do after it's done.

		# Task runner positions recorded in Hive roster don't have "require" or "depends on"
		# What is stored along with Tasks's ID is a list of "consumers" - 
		# 'I hand over control to these task IDs after I am done'
		# This is done so that when a long-running task completes, and its rules
		# were rewritten while it ran, instead of rescanning the whole task tree
		# and figuring out who now depends on it, the task's locker at this Hive will
		# have a list of who to talk to next.

		# The Task.REQUIRE list burned into each task should not bother us now, 
		# as that is a list of other components this task "needs to be around in general"
		# I would assume these were ran by tasks initializer at the start of server run, 
		# Once these are *inited* in order, they should be designed to be *ran()* async

		# But, for first run, we need the opposite data - which of the tasks don't have parents.
		# we start them all simultaneously

		logging.debug(HELLO + "Initing TaskHive %s" % self.id)

		parent_count = {}
		task_ids = storage.get(self.id).get('tasks', [])

		# this loop marks task runners that depend on other runners
		# we also use the looping opportunity to set up Strorage event watches.
		for task_id in task_ids:
			self._add_to_watched_tasks(task_id)

			for consumer in storage.get(task_id,{}).get('consumers',[]):
				parent_count[consumer] = parent_count.get(consumer, 0) + 1
		
		already_running = self._running_tasks_map.keys() 
		# here we run only those of them that do NOT have parents or are not already running.
		for task_id in task_ids:
			task_data = storage.get(task_id)
			if task_data and not parent_count.get(task_id) and task_id not in already_running:
				self.run_task(task_id, task_data)
示例#6
0
	def _hive_tasks_change_handler(self):
		# we store a local copy of tasks roster between handler runs.
		# this is done so that repeat, or snoball calls don't restart
		# tasks needlessly.
		# here we only handle additions and removals of tasks.
		# individual task state changes are handled by other code.

		oldtasks = self._tasks
		self._tasks = newtasks = set( storage.get(self.id).get('tasks', []) )
		removed = oldtasks.difference(newtasks)
		added = newtasks.difference(oldtasks)

		for taskid in removed:
			with self._taskswatchers_lock:
				if taskid in self._taskswatchers:
					del self._taskswatchers[taskid] # callback goes poof on the other end.
			self.stop_task(taskid)

		for taskid in added:
			self._add_to_watched_tasks(taskid)
			if not self._get_task_parents(taskid) and not storage.get(taskid,{}).get('paused'):
				self.run_task(taskid, storage.get(taskid))
示例#7
0
    def _hive_tasks_change_handler(self):
        # we store a local copy of tasks roster between handler runs.
        # this is done so that repeat, or snoball calls don't restart
        # tasks needlessly.
        # here we only handle additions and removals of tasks.
        # individual task state changes are handled by other code.

        oldtasks = self._tasks
        self._tasks = newtasks = set(storage.get(self.id).get('tasks', []))
        removed = oldtasks.difference(newtasks)
        added = newtasks.difference(oldtasks)

        for taskid in removed:
            with self._taskswatchers_lock:
                if taskid in self._taskswatchers:
                    del self._taskswatchers[
                        taskid]  # callback goes poof on the other end.
            self.stop_task(taskid)

        for taskid in added:
            self._add_to_watched_tasks(taskid)
            if not self._get_task_parents(taskid) and not storage.get(
                    taskid, {}).get('paused'):
                self.run_task(taskid, storage.get(taskid))
示例#8
0
 def stop(self):
     for callback in self._running_tasks.keys():
         if callback:
             task_id, kw = self._running_tasks.get(callback, ['', {}])
             tasktype = self.get_tasktypes().get(
                 storage.get(task_id, {}).get('tasktype',
                                              'not to be found'))
             if tasktype and hasattr(tasktype, 'stop'):
                 kw['callback'] = callback
                 try:
                     tasktype.stop(**kw)
                 except Exception as ex:
                     logging.debug(HELLO +
                                   "Stopping task '' errored out with ''" %
                                   (tasktype.ID, ex.message))
示例#9
0
	def stop_task(self, taskid):
		was_running = False

		callback = self._running_tasks_map.get(taskid)
		
		if callback:
			stored_taskid, kw = self._running_tasks.get(callback, ['', {}])
			tasktype = self.get_tasktypes().get(storage.get(stored_taskid, {}).get('tasktype','not to be found'))
			if tasktype and hasattr(tasktype, 'stop'):
				kw['callback'] = callback
				try:
					tasktype.stop(**kw)
					was_running = True
				except Exception as ex:
					logging.debug(HELLO + "Stopping task '' errored out with ''" % (tasktype.ID, ex.message))

		return was_running
示例#10
0
    def stop_task(self, taskid):
        was_running = False

        callback = self._running_tasks_map.get(taskid)

        if callback:
            stored_taskid, kw = self._running_tasks.get(callback, ['', {}])
            tasktype = self.get_tasktypes().get(
                storage.get(stored_taskid, {}).get('tasktype',
                                                   'not to be found'))
            if tasktype and hasattr(tasktype, 'stop'):
                kw['callback'] = callback
                try:
                    tasktype.stop(**kw)
                    was_running = True
                except Exception as ex:
                    logging.debug(HELLO +
                                  "Stopping task '' errored out with ''" %
                                  (tasktype.ID, ex.message))

        return was_running
示例#11
0
	def is_paused(self):
		return bool(storage.get(self.id).get('paused'))
示例#12
0
	def process_callback(self, task_id, *args, **kw):
		'''
		This converts output arguments (those the done task pushed to the callback)
		into input arguments for each of the to-be-called-next task.
		'''
		#logging.debug(HELLO + "Processing callback on hive '%s' for args '%s'" % (self.id, args))

		donetask = storage.get(task_id)
		if not donetask:
			# which may happen if task was removed from the roster while it was running
			return

		# tasktypes = self.get_tasktypes()
		# donetasktype = tasktypes.get(donetask['type'])
		# if not donetasktype:
		# 	# kinda hard to imagine, but, heck..
		# 	return		

		consumers = [[consumer_id, storage.get(consumer_id)] for consumer_id in donetask.get('consumers',[])]

		if not consumers:
			return

		# Preassemblying input args.
		# in python, as long as you don't splat-collect args in the function,
		# all positional args can be pulled into dictionary and 
		# applied to the function as named args.
		# we rely on that here.
		# in other words, DO NOT USE SPLAT ("*args, **kw") in Task run() declaration.
		incoming_args_def = donetask.get('outputs',[])
		incoming_name_type_map = {}
		incoming_args = {}
		for arg in incoming_args_def:
			# arg is [default value, type, name, description]
			value, typename, name = arg[:3]
			if name in kw:
				incoming_args[name] = kw[name]
				incoming_name_type_map[name] = typename

		for arg_position in xrange(min( len(args), len(incoming_args_def) )):
			value, typename, name = incoming_args_def[arg_position][:3]
			# [default value, type, name, description]
			incoming_args[name] = args[arg_position]
			incoming_name_type_map[name] = typename


		# we allow task runners to push back updates to task records.
		# web server task can push back update to task label with
		# port number that was autopicked by the server. etc.
		updateargtype = 'MetadataUpdate'
		updateargname = updateargtype.lower()
		if updateargname in incoming_args and \
			incoming_name_type_map[updateargname] == updateargtype:
			storage.change(donetask.id, incoming_args[updateargname])
			del incoming_args[updateargname]

		for consumer in consumers:
			# consumer is an array [id, metadata object]
			if consumer[1]:
				self.run_task(
					consumer[0]
					, consumer[1]
					, task_id
					, incoming_args
					, incoming_name_type_map
				)
示例#13
0
 def is_paused(self):
     return bool(storage.get(self.id).get('paused'))
示例#14
0
    def process_callback(self, task_id, *args, **kw):
        '''
		This converts output arguments (those the done task pushed to the callback)
		into input arguments for each of the to-be-called-next task.
		'''
        #logging.debug(HELLO + "Processing callback on hive '%s' for args '%s'" % (self.id, args))

        donetask = storage.get(task_id)
        if not donetask:
            # which may happen if task was removed from the roster while it was running
            return

        # tasktypes = self.get_tasktypes()
        # donetasktype = tasktypes.get(donetask['type'])
        # if not donetasktype:
        # 	# kinda hard to imagine, but, heck..
        # 	return

        consumers = [[consumer_id, storage.get(consumer_id)]
                     for consumer_id in donetask.get('consumers', [])]

        if not consumers:
            return

        # Preassemblying input args.
        # in python, as long as you don't splat-collect args in the function,
        # all positional args can be pulled into dictionary and
        # applied to the function as named args.
        # we rely on that here.
        # in other words, DO NOT USE SPLAT ("*args, **kw") in Task run() declaration.
        incoming_args_def = donetask.get('outputs', [])
        incoming_name_type_map = {}
        incoming_args = {}
        for arg in incoming_args_def:
            # arg is [default value, type, name, description]
            value, typename, name = arg[:3]
            if name in kw:
                incoming_args[name] = kw[name]
                incoming_name_type_map[name] = typename

        for arg_position in xrange(min(len(args), len(incoming_args_def))):
            value, typename, name = incoming_args_def[arg_position][:3]
            # [default value, type, name, description]
            incoming_args[name] = args[arg_position]
            incoming_name_type_map[name] = typename

        # we allow task runners to push back updates to task records.
        # web server task can push back update to task label with
        # port number that was autopicked by the server. etc.
        updateargtype = 'MetadataUpdate'
        updateargname = updateargtype.lower()
        if updateargname in incoming_args and \
         incoming_name_type_map[updateargname] == updateargtype:
            storage.change(donetask.id, incoming_args[updateargname])
            del incoming_args[updateargname]

        for consumer in consumers:
            # consumer is an array [id, metadata object]
            if consumer[1]:
                self.run_task(consumer[0], consumer[1], task_id, incoming_args,
                              incoming_name_type_map)