Пример #1
0
 def _handle_terminating_task(self, task_id):
     keyt = 'task:%s' % task_id
     _, service = self._get_service(keyt=keyt)
     data = self._redis.hget(keyt, 'job')
     nxpus = Capacity(self._redis.hget(keyt, 'ngpus'),
                      self._redis.hget(keyt, 'ncpus'))
     if data is not None:
         container_id = self._redis.hget(keyt, 'container_id')
         data = json.loads(data)
         data['container_id'] = container_id
         self._logger.info('%s: terminating task (job: %s)', task_id,
                           json.dumps(data))
         try:
             service.terminate(data)
             self._logger.info('%s: terminated', task_id)
         except Exception:
             self._logger.warning('%s: failed to terminate', task_id)
             self._logger.info(traceback.format_exc())
     else:
         self._logger.info('%s: terminating task (on error)', task_id)
     resource = self._redis.hget(keyt, 'alloc_resource')
     if resource:
         self._release_resource(service, resource, task_id, nxpus)
     task.set_status(self._redis, keyt, 'stopped')
     task.disable(self._redis, task_id)
Пример #2
0
def reorganize_tasks():
    logger.debug(f"[{service}-{pid}]: Reorganizing tasks")
    # On startup, add all active tasks in the work queue or service queue
    for task_id in task.list_active(redis, service):
        task_key = f'task:{task_id}'
        with redis.acquire_lock(task_id):
            status = redis.hget(task_key, 'status')
            if status in ['queued', 'allocated']:
                task.service_queue(redis, task_id, service)
                task.set_status(redis, 'task:' + task_id, 'queued')
            else:
                task.work_queue(redis, task_id, service)
        # check integrity of tasks
        if redis.hget(task_key, 'priority') is None:
            redis.hset(task_key, 'priority', 0)
        if redis.hget(task_key, 'queued_time') is None:
            redis.hset(task_key, 'queued_time', time.time())
Пример #3
0
    def _advance_task(self, task_id):
        """Tries to advance the task to the next status. If it can, re-queue it immediately
        to process the next stage. Otherwise, re-queue it after some delay to try again.
        """
        keyt = 'task:%s' % task_id
        with self._redis.acquire_lock(keyt, acquire_timeout=1, expire_time=600):
            status = self._redis.hget(keyt, 'status')
            if status == 'stopped':
                return

            service_name = self._redis.hget(keyt, 'service')
            if service_name not in self._services:
                raise ValueError('unknown service %s' % service_name)
            service = self._services[service_name]

            self._logger.info('%s: trying to advance from status %s', task_id, status)

            if status == 'queued':
                resource = self._redis.hget(keyt, 'resource')
                parent = self._redis.hget(keyt, 'parent')
                if parent:
                    keyp = 'task:%s' % parent
                    # if the parent task is in the database, check for dependencies
                    if self._redis.exists(keyp):
                        status = self._redis.hget(keyp, 'status')
                        if status == 'stopped':
                            if self._redis.hget(keyp, 'message') != 'completed':
                                task.terminate(self._redis, task_id, phase='dependency_error')
                                return
                        else:
                            self._logger.warning('%s: depending on other task, waiting', task_id)
                            task.service_queue(self._redis, task_id, service.name)
                            return
                ngpus = int(self._redis.hget(keyt, 'ngpus'))
                resource, available_gpus = self._allocate_resource(task_id, resource, service, ngpus)
                if resource is not None:
                    self._logger.info('%s: resource %s reserved (%d/%d)',
                                      task_id, resource, available_gpus, ngpus)
                    self._redis.hset(keyt, 'alloc_resource', resource)
                    if ngpus == available_gpus:
                        task.set_status(self._redis, keyt, 'allocated')
                    else:
                        task.set_status(self._redis, keyt, 'allocating')
                    task.work_queue(self._redis, task_id, service_name)
                else:
                    self._logger.warning('%s: no resources available, waiting', task_id)
                    task.service_queue(self._redis, task_id, service.name)
            elif status == 'allocating':
                resource = self._redis.hget(keyt, 'alloc_resource')
                keyr = 'resource:%s:%s' % (service.name, resource)
                ngpus = int(self._redis.hget(keyt, 'ngpus'))
                already_allocated_gpus = 0
                for k, v in six.iteritems(self._redis.hgetall(keyr)):
                    if v == task_id:
                        already_allocated_gpus += 1
                capacity = service.list_resources()[resource]
                available_gpus, remaining_gpus = self._reserve_resource(service, resource,
                                                                        capacity, task_id,
                                                                        ngpus - already_allocated_gpus,
                                                                        0, -1, True)
                self._logger.warning('task: %s - resource: %s (capacity %d)- already %d - available %d', task_id, resource, capacity, already_allocated_gpus, available_gpus)
                if available_gpus == ngpus - already_allocated_gpus:
                    task.set_status(self._redis, keyt, 'allocated')
                    key_reserved = 'reserved:%s:%s' % (service.name, resource)
                    self._redis.delete(key_reserved)
                    task.work_queue(self._redis, task_id, service.name)
                else:
                    task.work_queue(self._redis, task_id, service.name,
                                    delay=service.is_notifying_activity and 120 or 30)
            elif status == 'allocated':
                content = json.loads(self._redis.hget(keyt, 'content'))
                resource = self._redis.hget(keyt, 'alloc_resource')
                self._logger.info('%s: launching on %s', task_id, service.name)
                try:
                    keyr = 'resource:%s:%s' % (service.name, resource)
                    lgpu = []
                    for k, v in six.iteritems(self._redis.hgetall(keyr)):
                        if v == task_id:
                            lgpu.append(k)
                    self._redis.hset(keyt, 'alloc_lgpu', ",".join(lgpu))
                    data = service.launch(
                        task_id,
                        content['options'],
                        lgpu,
                        resource,
                        content['docker']['registry'],
                        content['docker']['image'],
                        content['docker']['tag'],
                        content['docker']['command'],
                        task.file_list(self._redis, task_id),
                        content['wait_after_launch'])
                except EnvironmentError as e:
                    # the resource is not available and will be set busy
                    self._block_resource(resource, service, str(e))
                    # set the task as queued again
                    self._redis.hdel(keyt, 'alloc_resource')
                    self._release_resource(service, resource, task_id)
                    task.set_status(self._redis, keyt, 'queued')
                    task.service_queue(self._redis, task_id, service.name)
                    self._logger.info('could not launch [%s] %s on %s: blocking resource', str(e), task_id, resource)
                    return
                except Exception as e:
                    # all other errors make the task fail
                    task.append_log(self._redis, task_id, str(e))
                    task.terminate(self._redis, task_id, phase='launch_error')
                    return
                self._logger.info('%s: task started on %s', task_id, service.name)
                self._redis.hset(keyt, 'job', json.dumps(data))
                task.set_status(self._redis, keyt, 'running')
                # For services that do not notify their activity, we should
                # poll the task status more regularly.
                task.work_queue(self._redis, task_id, service.name,
                                delay=service.is_notifying_activity and 120 or 30)

            elif status == 'running':
                self._logger.debug('- checking activity of task: %s', task_id)
                data = json.loads(self._redis.hget(keyt, 'job'))
                status = service.status(task_id, data)
                if status == 'dead':
                    self._logger.info('%s: task no longer running on %s, request termination',
                                      task_id, service.name)
                    task.terminate(self._redis, task_id, phase='exited')
                else:
                    task.work_queue(self._redis, task_id, service.name,
                                    delay=service.is_notifying_activity and 120 or 30)

            elif status == 'terminating':
                data = self._redis.hget(keyt, 'job')
                if data is not None:
                    container_id = self._redis.hget(keyt, 'container_id')
                    data = json.loads(data)
                    data['container_id'] = container_id
                    self._logger.info('%s: terminating task (%s)', task_id, json.dumps(data))
                    try:
                        service.terminate(data)
                        self._logger.info('%s: terminated', task_id)
                    except Exception:
                        self._logger.warning('%s: failed to terminate', task_id)
                resource = self._redis.hget(keyt, 'alloc_resource')
                self._release_resource(service, resource, task_id)
                task.set_status(self._redis, keyt, 'stopped')
                task.disable(self._redis, task_id)
Пример #4
0
    for key in redis.keys('busy:%s:*' % service):
        redis.delete(key)
    # remove reserved state from resources
    for key in redis.keys('reserved:%s:*' % service):
        redis.delete(key)
    # remove queued tasks on service
    for key in redis.keys('queued:%s' % service):
        redis.delete(key)

    # On startup, add all active tasks in the work queue or service queue
    for task_id in task.list_active(redis, service):
        with redis.acquire_lock(task_id):
            status = redis.hget('task:'+task_id, 'status')
            if status == 'queued' or status == 'allocating' or status == 'allocated':
                task.service_queue(redis, task_id, redis.hget('task:'+task_id, 'service'))
                task.set_status(redis, 'task:'+task_id, 'queued')
            else:
                task.work_queue(redis, task_id, service)
        # check integrity of tasks
        if redis.hget(task_id, 'priority') is None:
            redis.hset(task_id, 'priority', 0)
        if redis.hget(task_id, 'queued_time') is None:
            redis.hset(task_id, 'queued_time', time.time())

    # Desallocate all resources that are not anymore associated to a running task
    resources = services[service].list_resources()

    for resource in resources:
        keyr = 'resource:%s:%s' % (service, resource)
        running_tasks = redis.hgetall(keyr)
        for g, task_id in six.iteritems(running_tasks):
Пример #5
0
    def _select_best_task_to_process(self, service):
        """find the best next task to push to the work queue
        """
        class EntityUsage:
            def __init__(self, current_usage, entity_name, usage_coeff):
                self._entity = entity_name
                self._current_usage_capacity = current_usage if current_usage else Capacity(
                )
                self._usage_coeff = usage_coeff

            def __str__(self):
                return 'EntityUsage (%s, Absolute usage :%s . Weighted usage : %s. Weight:%f)' % (
                    self._entity, self._current_usage_capacity,
                    self._weighted_usage, self._usage_coeff)

            @property
            def _weighted_usage(self):
                return self._current_usage_capacity.ncpus * self._usage_coeff,\
                       self._current_usage_capacity.ngpus * self._usage_coeff

            def add_current_usage(self, current_usage):
                self._current_usage_capacity += current_usage

            def __eq__(self, other):
                return self._weighted_usage[0] == other._weighted_usage[0] and \
                       self._weighted_usage[1] == other._weighted_usage[1]

            def __lt__(self, other):
                return self._weighted_usage[1] < other._weighted_usage[1] or \
                       (self._weighted_usage[1] == other._weighted_usage[1] and
                        self._weighted_usage[0] < other._weighted_usage[0])

            def __le__(self, other):
                return self == other or self < other

            @staticmethod
            def initialize_entities_usage(mongo_client, service_name):
                entity_usage_weights = config.get_entities_limit_rate(
                    mongo_client, service_name)
                weight_sum = float(
                    sum([w for w in entity_usage_weights.values() if w > 0]))
                entities_usage = {
                    e: EntityUsage(None, e,
                                   float(weight_sum) / r if r > 0 else 0)
                    for e, r in six.iteritems(entity_usage_weights)
                }
                return entities_usage

        class CandidateTask:
            def __init__(self, task_id, task_entity, redis, task_capacity,
                         entity_usage, logger):
                assert task_id
                self._task_id = task_id
                self._entity = task_entity
                self._redis_key = 'task:%s' % next_task_id
                self._priority = int(redis.hget(self._redis_key, 'priority'))
                self._launched_time = float(
                    redis.hget(self._redis_key, 'launched_time'))
                self._runnable_machines = set()
                self._capacity = task_capacity
                self._entity_usage = entity_usage
                self._logger = logger

            def __str__(self):
                return "Task ( %s / %s ; %s ; Priority:%d)" % (
                    self._task_id, self._capacity, self._entity_usage,
                    self._priority)

            def __gt__(self, other):
                return self.is_higher_priority(other)

            def __ge__(self, other):
                return self.is_higher_priority(other)

            def _already_on_node(self):
                result = self._task_id in resource_mgr.preallocated_task_resource
                return result

            def _is_more_respectful_usage(self, other):
                if self._entity == other._entity:  # same entity, go for highest priority
                    is_more_prio = self._priority > other._priority or (
                        self._priority == other._priority
                        and self._launched_time < other._launched_time)
                    return is_more_prio
                my_entity_usage = resource_mgr.entities_usage[self._entity]
                other_entity_usage = resource_mgr.entities_usage[other._entity]
                if my_entity_usage == other_entity_usage:
                    return self._launched_time < other._launched_time

                result = my_entity_usage < other_entity_usage
                self._logger.debug(
                    "AZ-COMPUSE: my: %s.Other: %s . Result = %s",
                    my_entity_usage, other_entity_usage, result)
                return result

            def is_higher_priority(self, other_task):
                # Decision tree for the most priority task
                if not other_task:
                    return True

                # go for already allocated resource task
                if self._already_on_node():
                    if not other_task._already_on_node():
                        return True

                    return self._is_more_respectful_usage(other_task)
                if other_task._already_on_node():
                    return False
                return self._is_more_respectful_usage(other_task)

            @staticmethod
            def try_create(next_task_id):
                next_keyt = 'task:%s' % next_task_id
                parent = self._redis.hget(next_keyt, 'parent')
                task_entity = task.get_owner_entity(self._redis, next_task_id)

                if task_entity not in resource_mgr.entities_usage:
                    self._logger.error(
                        "\t[Task %s] entity %s - without usage limit !",
                        next_task_id, task_entity)
                    return None

                # check parent dependency
                if parent:
                    keyp = 'task:%s' % parent
                    if self._redis.exists(keyp):
                        # if the parent task is in the database, check for dependencies
                        parent_status = self._redis.hget(keyp, 'status')
                        if parent_status != 'stopped':
                            if parent_status == 'running':
                                # parent is still running so update queued time to be as close
                                # as possible to terminate time of parent task
                                self._redis.hset(next_keyt, "queued_time",
                                                 time.time())
                            return None

                        if self._redis.hget(keyp, 'message') != 'completed':
                            task.terminate(self._redis,
                                           next_task_id,
                                           phase='dependency_error')
                            return None

                task_capacity = Capacity(self._redis.hget(next_keyt, 'ngpus'),
                                         self._redis.hget(next_keyt, 'ncpus'))
                candidate_task = CandidateTask(
                    next_task_id, task_entity, self._redis, task_capacity,
                    resource_mgr.entities_usage[task_entity], self._logger)
                # check now the task has a chance to be processed by any machine
                for _, machine in six.iteritems(resource_mgr._machines):
                    can_be_processed = machine._is_authorized(candidate_task._entity, candidate_task._capacity) \
                                       and candidate_task._capacity.inf_or_eq(machine._init_capacity)
                    if can_be_processed:
                        return candidate_task

                return None

        class ResourceManager:
            def __init__(self, worker):
                self.preallocated_task_resource = {}
                resources = service.list_resources()
                self._machines = {
                    res:
                    Worker.Machine(service, res, resources[res],
                                   worker._logger,
                                   service.get_server_detail(res, "priority"))
                    for res in resources
                }
                self.entities_usage = {}
                self.worker = worker

            def __str__(self):
                msg = " - ".join(str(m) for m in self._machines.values())
                return "ResourceManager ( %s )." % msg

            def load_machines(self, service_name):
                self.entities_usage = EntityUsage.initialize_entities_usage(
                    self.worker._mongo_client, service_name)
                for resource, machine in six.iteritems(self._machines):
                    current_xpu_usage = Capacity()
                    keygr = 'gpu_resource:%s:%s' % (self.worker._service,
                                                    resource)
                    keycr = 'cpu_resource:%s:%s' % (self.worker._service,
                                                    resource)

                    gpu_tasks = self.worker._redis.hgetall(keygr)
                    cpu_tasks = self.worker._redis.hgetall(keycr)

                    # can not launch multiple tasks on service with no multi-tasking (ec2)
                    # or launch multiple tasks on service with hybrid task mode and dynamic resource mode (nova)
                    if not _is_resource_multitask(
                            service, resource) and (gpu_tasks or cpu_tasks):
                        continue
                    tmp_tasks = {}
                    for _, v in six.iteritems(gpu_tasks):
                        if v not in tmp_tasks:
                            task_entity = task.get_owner_entity(
                                self.worker._redis, v)
                            tmp_tasks[v] = task_entity
                        else:
                            task_entity = tmp_tasks[v]

                        if v not in self.preallocated_task_resource:
                            self.preallocated_task_resource[v] = resource
                        self._machines[resource].add_task(
                            v, self.worker._redis)
                        current_xpu_usage.incr_ngpus(1)
                        self.entities_usage[task_entity].add_current_usage(
                            Capacity(ngpus=1))

                    for _, v in six.iteritems(cpu_tasks):
                        if v not in tmp_tasks:
                            task_entity = task.get_owner_entity(
                                self.worker._redis, v)
                            tmp_tasks[v] = task_entity
                        else:
                            task_entity = tmp_tasks[v]

                        if v not in self.preallocated_task_resource:
                            self.preallocated_task_resource[v] = resource

                        self._machines[resource].add_task(
                            v, self.worker._redis)
                        current_xpu_usage.incr_ncpus(1)
                        self.entities_usage[task_entity].add_current_usage(
                            Capacity(ncpus=1))

                    available_xpus = machine._init_capacity - current_xpu_usage
                    self._machines[resource].set_available(available_xpus)
                    self.worker._logger.debug("\tresource %s: - free %s",
                                              resource, available_xpus)

                return len(resource_mgr._machines) > 0

        with self._redis.acquire_lock('service:' + service.name):
            queue = 'queued:%s' % service.name
            count = self._redis.llen(queue)
            if count == 0:
                return

            resource_mgr = ResourceManager(self)
            if not resource_mgr.load_machines(service.name):
                return

            runnable_tasks = []
            for e in resource_mgr.entities_usage.values():
                self._logger.debug("[AZ-USE] %s", e)
            while count > 0:
                count -= 1
                next_task_id = self._redis.lindex(queue, count)
                candidate_task = CandidateTask.try_create(next_task_id)
                if candidate_task:
                    runnable_tasks.append(candidate_task)
            num_of_runnable_tasks = len(runnable_tasks)
            self._logger.info('Runnable task count: %d', num_of_runnable_tasks)
            if num_of_runnable_tasks > 0:
                sorted_runnable_tasks = sorted(runnable_tasks, reverse=True)
                for runnable_task in sorted_runnable_tasks:
                    task_id = runnable_task._task_id
                    nxpus = runnable_task._capacity
                    keyt = 'task:%s' % task_id
                    request_resource = self._redis.hget(keyt, 'resource')
                    allocated_resource = self._allocate_resource(
                        task_id, request_resource, service, nxpus)
                    if allocated_resource is not None:
                        self._logger.info('%s: resource %s reserved %s',
                                          task_id, allocated_resource, nxpus)
                        self._redis.hset(keyt, 'alloc_resource',
                                         allocated_resource)
                        task.set_status(self._redis, keyt, 'allocated')
                        task.work_queue(self._redis, task_id, service.name)
                        self._redis.lrem(queue, 0, task_id)
                        self._logger.info(
                            '[AZ-SELECTED] %s to be launched on %s', task_id,
                            service.name)
                        break
                    self._logger.info(
                        '[AZ-SELECTED] %s to be launched on %s, but not able to allocate resource',
                        task_id, service.name)
Пример #6
0
 def _handle_allocated_task(self, task_id):
     keyt = 'task:%s' % task_id
     _, service = self._get_service(keyt=keyt)
     content = json.loads(self._redis.hget(keyt, 'content'))
     resource = self._redis.hget(keyt, 'alloc_resource')
     self._logger.info('%s: launching on %s', task_id, service.name)
     try:
         entity_config = self._get_current_config(task_id)
         keygr = 'gpu_resource:%s:%s' % (service.name, resource)
         lgpu = []
         for k, v in six.iteritems(self._redis.hgetall(keygr)):
             if v == task_id:
                 lgpu.append(k)
         self._redis.hset(keyt, 'alloc_lgpu', ",".join(lgpu))
         keycr = 'cpu_resource:%s:%s' % (service.name, resource)
         lcpu = []
         for k, v in six.iteritems(self._redis.hgetall(keycr)):
             if v == task_id:
                 lcpu.append(k)
         self._redis.hset(keyt, 'alloc_lcpu', ",".join(lcpu))
         data = service.launch(
             task_id, content['options'], (lgpu, lcpu), resource,
             entity_config["storages"], entity_config["docker"],
             content['docker']['registry'], content['docker']['image'],
             content['docker']['tag'], content['docker']['command'],
             task.file_list(self._taskfile_dir,
                            task_id), content['wait_after_launch'],
             self._redis.hget(keyt, 'token'),
             content.get('support_statistics'))
     except EnvironmentError as e:
         # the resource is not available and will be set busy
         self._block_resource(resource, service, str(e))
         self._redis.hdel(keyt, 'alloc_resource')
         # set the task as queued again
         self._release_resource(
             service, resource, task_id,
             Capacity(self._redis.hget(keyt, 'ngpus'),
                      self._redis.hget(keyt, 'ncpus')))
         status = self._redis.hget(keyt, 'status')
         if status == 'terminating':
             return None
         task.set_status(self._redis, keyt, 'queued')
         task.service_queue(self._redis, task_id, service.name)
         self._logger.info(
             'could not launch [%s] %s on %s: blocking resource', str(e),
             task_id, resource)
         self._logger.info(traceback.format_exc())
         return None
     except Exception as e:
         # all other errors make the task fail
         self._logger.info('fail task [%s] - %s', task_id, str(e))
         self._logger.info(traceback.format_exc())
         task.append_log(self._taskfile_dir, task_id, str(e))
         auth_token = self._redis.hget(keyt, 'token')
         callback_url = service._config.get('callback_url')
         if auth_token:
             callback_url = callback_url.replace("://",
                                                 "://" + auth_token + ":x@")
         r = requests.get(os.path.join(callback_url, "task/terminate",
                                       task_id),
                          params={'phase': 'launch_error'})
         if r.status_code != 200:
             raise RuntimeError(
                 'incorrect result from \'task/terminate\' service: %s' %
                 r.text) from e
         task.terminate(self._redis, task_id, phase='launch_error')
         self._logger.info(traceback.format_exc())
         return None
     self._logger.info('%s: task started on %s', task_id, service.name)
     self._redis.hset(keyt, 'job', json.dumps(data))
     status = self._redis.hget(keyt, 'status')
     if status == 'terminating':
         return None
     task.set_status(self._redis, keyt, 'running')
     # For services that do not notify their activity, we should
     # poll the task status more regularly.
     task.work_queue(self._redis,
                     task_id,
                     service.name,
                     delay=service.is_notifying_activity and 120 or 30)
     return None
Пример #7
0
    def _advance_task(self, task_id):
        """Tries to advance the task to the next status. If it can, re-queue it immediately
        to process the next stage. Otherwise, re-queue it after some delay to try again.
        """
        keyt = 'task:%s' % task_id
        with self._redis.acquire_lock(keyt, acquire_timeout=1,
                                      expire_time=600):
            status = self._redis.hget(keyt, 'status')
            if status == 'stopped':
                return

            service_name = self._redis.hget(keyt, 'service')
            if service_name not in self._services:
                raise ValueError('unknown service %s' % service_name)
            service = self._services[service_name]

            self._logger.info('%s: trying to advance from status %s', task_id,
                              status)

            if status == 'queued':
                resource = self._redis.hget(keyt, 'resource')
                resource = self._allocate_resource(task_id, resource, service)
                if resource is not None:
                    self._logger.info('%s: resource %s reserved', task_id,
                                      resource)
                    self._redis.hset(keyt, 'resource', resource)
                    task.set_status(self._redis, keyt, 'allocated')
                    task.queue(self._redis, task_id)
                else:
                    self._logger.warning('%s: no resources available, waiting',
                                         task_id)
                    self._wait_for_resource(service, task_id)

            elif status == 'allocated':
                content = json.loads(self._redis.hget(keyt, 'content'))
                resource = self._redis.hget(keyt, 'resource')
                self._logger.info('%s: launching on %s', task_id, service.name)
                data = service.launch(task_id, content['options'], resource,
                                      content['docker']['registry'],
                                      content['docker']['image'],
                                      content['docker']['tag'],
                                      content['docker']['command'],
                                      task.file_list(self._redis, task_id),
                                      content['wait_after_launch'])
                self._logger.info('%s: task started on %s', task_id,
                                  service.name)
                self._redis.hset(keyt, 'job', json.dumps(data))
                task.set_status(self._redis, keyt, 'running')
                # For services that do not notify their activity, we should
                # poll the task status more regularly.
                task.queue(self._redis,
                           task_id,
                           delay=service.is_notifying_activity and 120 or 30)

            elif status == 'running':
                data = json.loads(self._redis.hget(keyt, 'job'))
                status = service.status(data)
                if status == 'dead':
                    self._logger.info(
                        '%s: task no longer running on %s, request termination',
                        task_id, service.name)
                    task.terminate(self._redis, task_id, phase='exited')
                else:
                    task.queue(self._redis,
                               task_id,
                               delay=service.is_notifying_activity and 120
                               or 30)

            elif status == 'terminating':
                data = self._redis.hget(keyt, 'job')
                if data is not None:
                    data = json.loads(data)
                    self._logger.info('%s: terminating task', task_id)
                    try:
                        service.terminate(data)
                        self._logger.info('%s: terminated', task_id)
                    except Exception:
                        self._logger.warning('%s: failed to terminate',
                                             task_id)
                resource = self._redis.hget(keyt, 'resource')
                self._release_resource(service, resource, task_id)
                task.set_status(self._redis, keyt, 'stopped')
                task.disable(self._redis, task_id)