def load_machines(self, service_name): self.entities_usage = EntityUsage.initialize_entities_usage( self.worker._mongo_client, service_name) for resource, machine in six.iteritems(self._machines): current_xpu_usage = Capacity() keygr = 'gpu_resource:%s:%s' % (self.worker._service, resource) keycr = 'cpu_resource:%s:%s' % (self.worker._service, resource) gpu_tasks = self.worker._redis.hgetall(keygr) cpu_tasks = self.worker._redis.hgetall(keycr) # can not launch multiple tasks on service with no multi-tasking (ec2) # or launch multiple tasks on service with hybrid task mode and dynamic resource mode (nova) if not _is_resource_multitask( service, resource) and (gpu_tasks or cpu_tasks): continue tmp_tasks = {} for _, v in six.iteritems(gpu_tasks): if v not in tmp_tasks: task_entity = task.get_owner_entity( self.worker._redis, v) tmp_tasks[v] = task_entity else: task_entity = tmp_tasks[v] if v not in self.preallocated_task_resource: self.preallocated_task_resource[v] = resource self._machines[resource].add_task( v, self.worker._redis) current_xpu_usage.incr_ngpus(1) self.entities_usage[task_entity].add_current_usage( Capacity(ngpus=1)) for _, v in six.iteritems(cpu_tasks): if v not in tmp_tasks: task_entity = task.get_owner_entity( self.worker._redis, v) tmp_tasks[v] = task_entity else: task_entity = tmp_tasks[v] if v not in self.preallocated_task_resource: self.preallocated_task_resource[v] = resource self._machines[resource].add_task( v, self.worker._redis) current_xpu_usage.incr_ncpus(1) self.entities_usage[task_entity].add_current_usage( Capacity(ncpus=1)) available_xpus = machine._init_capacity - current_xpu_usage self._machines[resource].set_available(available_xpus) self.worker._logger.debug("\tresource %s: - free %s", resource, available_xpus) return len(resource_mgr._machines) > 0
def _usagecapacity(service): """calculate the current usage of the service.""" usage_xpu = Capacity() capacity_xpus = Capacity() busy = 0 detail = {} for resource in service.list_resources(): detail[resource] = {'busy': '', 'reserved': ''} r_capacity = service.list_resources()[resource] detail[resource]['capacity'] = r_capacity capacity_xpus += r_capacity reserved = redis.get("reserved:%s:%s" % (service.name, resource)) if reserved: detail[resource]['reserved'] = reserved count_map_gpu = Counter() count_map_cpu = Counter() task_type = {} count_used_xpus = Capacity() r_usage_gpu = redis.hgetall("gpu_resource:%s:%s" % (service.name, resource)).values() for t in r_usage_gpu: if t not in task_type: task_type[t] = redis.hget("task:%s" % t, "type") count_map_gpu[t] += 1 count_used_xpus.incr_ngpus(1) r_usage_cpu = redis.hgetall("cpu_resource:%s:%s" % (service.name, resource)).values() for t in r_usage_cpu: if t not in task_type: task_type[t] = redis.hget("task:%s" % t, "type") count_map_cpu[t] += 1 count_used_xpus.incr_ncpus(1) detail[resource]['usage'] = [ "%s %s: %d (%d)" % (task_type[t], t, count_map_gpu[t], count_map_cpu[t]) for t in task_type ] detail[resource][ 'avail_gpus'] = r_capacity.ngpus - count_used_xpus.ngpus detail[resource][ 'avail_cpus'] = r_capacity.ncpus - count_used_xpus.ncpus err = redis.get("busy:%s:%s" % (service.name, resource)) if err: detail[resource]['busy'] = err busy = busy + 1 usage_xpu += count_used_xpus queued = redis.llen("queued:" + service.name) return ("%d (%d)" % (usage_xpu.ngpus, usage_xpu.ncpus), queued, "%d (%d)" % (capacity_xpus.ngpus, capacity_xpus.ncpus), busy, detail)
def _service_unqueue(self, service): """find the best next task to push to the work queue """ with self._redis.acquire_lock('service:' + service.name): queue = 'queued:%s' % service.name count = self._redis.llen(queue) idx = 0 preallocated_task_count = {} preallocated_task_resource = {} avail_resource = {} resources = service.list_resources() reserved = {} # list free cpu/gpus on each node for resource in resources: current_xpu_usage = Capacity() capacity = resources[resource] keygr = 'gpu_resource:%s:%s' % (self._service, resource) keycr = 'cpu_resource:%s:%s' % (self._service, resource) key_reserved = 'reserved:%s:%s' % (service.name, resource) gpu_tasks = self._redis.hgetall(keygr) cpu_tasks = self._redis.hgetall(keycr) task_reserved = self._redis.get(key_reserved) # can not launch multiple tasks on service with no multi-tasking (ec2) if not service.resource_multitask and \ not task_reserved and \ (gpu_tasks or cpu_tasks): continue for k, v in six.iteritems(gpu_tasks): if v in preallocated_task_count: preallocated_task_count[v].incr_ngpus(1) else: preallocated_task_count[v] = Capacity(ngpus=1) preallocated_task_resource[v] = resource current_xpu_usage.incr_ngpus(1) for k, v in six.iteritems(cpu_tasks): if v in preallocated_task_count: preallocated_task_count[v].incr_ncpus(1) else: preallocated_task_count[v] = Capacity(ncpus=1) preallocated_task_resource[v] = resource current_xpu_usage.incr_ncpus(1) available_xpus = capacity - current_xpu_usage avail_resource[resource] = available_xpus reserved[resource] = task_reserved self._logger.debug("\tresource %s - reserved: %s - free %s", resource, task_reserved or "False", available_xpus) if len(avail_resource) == 0: return # Go through the tasks, find if there are tasks that can be launched and # queue the best one best_task_id = None best_task_priority = -10000 best_task_queued_time = 0 while count > 0: count -= 1 next_task_id = self._redis.lindex(queue, count) if next_task_id is not None: next_keyt = 'task:%s' % next_task_id # self._logger.debug("\tcheck task: %s", next_task_id) parent = self._redis.hget(next_keyt, 'parent') # check parent dependency if parent: keyp = 'task:%s' % parent if self._redis.exists(keyp): # if the parent task is in the database, check for dependencies parent_status = self._redis.hget(keyp, 'status') if parent_status != 'stopped': if parent_status == 'running': # parent is still running so update queued time to be as close # as possible to terminate time of parent task self._redis.hset(next_keyt, "queued_time", time.time()) continue else: if self._redis.hget(keyp, 'message') != 'completed': task.terminate(self._redis, next_task_id, phase='dependency_error') continue nxpus = Capacity(self._redis.hget(next_keyt, 'ngpus'), self._redis.hget(next_keyt, 'ncpus')) foundResource = False if next_task_id in preallocated_task_count: # if task is pre-allocated, can only continue on the same node r = preallocated_task_resource[next_task_id] nxpus -= preallocated_task_count[next_task_id] avail_r = avail_resource[r] foundResource = (nxpus.ngpus == 0 and avail_r.ncpus != 0) or (nxpus.ngpus != 0 and avail_r.ngpus != 0) else: # can the task be launched on any node for r, v in six.iteritems(avail_resource): # cannot launch a new task on a reserved node if reserved[r]: continue if ((nxpus.ngpus > 0 and resources[r].ngpus >= nxpus.ngpus and v.ngpus > 0) or (nxpus.ngpus == 0 and v.ncpus >= 0)): foundResource = True break if not foundResource: continue priority = int(self._redis.hget(next_keyt, 'priority')) queued_time = float( self._redis.hget(next_keyt, 'queued_time')) if priority > best_task_priority or ( priority == best_task_priority and best_task_queued_time > queued_time): best_task_priority = priority best_task_id = next_task_id best_task_queued_time = queued_time if best_task_id: self._logger.info('selected %s to be launched on %s', best_task_id, service.name) task.work_queue(self._redis, best_task_id, service.name) self._redis.lrem(queue, 0, best_task_id)
def _advance_task(self, task_id): """Tries to advance the task to the next status. If it can, re-queue it immediately to process the next stage. Otherwise, re-queue it after some delay to try again. """ keyt = 'task:%s' % task_id with self._redis.acquire_lock(keyt, acquire_timeout=1, expire_time=600): status = self._redis.hget(keyt, 'status') if status == 'stopped': return service_name = self._redis.hget(keyt, 'service') if service_name not in self._services: raise ValueError('unknown service %s' % service_name) service = self._services[service_name] self._logger.info('%s: trying to advance from status %s', task_id, status) if status == 'queued': resource = self._redis.hget(keyt, 'resource') parent = self._redis.hget(keyt, 'parent') if parent: keyp = 'task:%s' % parent # if the parent task is in the database, check for dependencies if self._redis.exists(keyp): status = self._redis.hget(keyp, 'status') if status == 'stopped': if self._redis.hget(keyp, 'message') != 'completed': task.terminate(self._redis, task_id, phase='dependency_error') return else: self._logger.warning( '%s: depending on other task, waiting', task_id) task.service_queue(self._redis, task_id, service.name) return nxpus = Capacity(self._redis.hget(keyt, 'ngpus'), self._redis.hget(keyt, 'ncpus')) resource, available_xpus = self._allocate_resource( task_id, resource, service, nxpus) if resource is not None: self._logger.info('%s: resource %s reserved %s/%s', task_id, resource, available_xpus, nxpus) self._redis.hset(keyt, 'alloc_resource', resource) if nxpus == available_xpus: task.set_status(self._redis, keyt, 'allocated') else: task.set_status(self._redis, keyt, 'allocating') task.work_queue(self._redis, task_id, service_name) else: self._logger.warning('%s: no resources available, waiting', task_id) task.service_queue(self._redis, task_id, service.name) elif status == 'allocating': resource = self._redis.hget(keyt, 'alloc_resource') nxpus = Capacity(self._redis.hget(keyt, 'ngpus'), self._redis.hget(keyt, 'ncpus')) already_allocated_xpus = Capacity() keygr = 'gpu_resource:%s:%s' % (service.name, resource) for k, v in six.iteritems(self._redis.hgetall(keygr)): if v == task_id: already_allocated_xpus.incr_ngpus(1) keycr = 'cpu_resource:%s:%s' % (service.name, resource) for k, v in six.iteritems(self._redis.hgetall(keycr)): if v == task_id: already_allocated_xpus.incr_ncpus(1) capacity = service.list_resources()[resource] available_xpus, remaining_xpus = self._reserve_resource( service, resource, capacity, task_id, nxpus - already_allocated_xpus, Capacity(), Capacity(-1, -1), True) self._logger.info( 'task: %s - resource: %s (capacity %s)- already %s - available %s', task_id, resource, capacity, already_allocated_xpus, available_xpus) if available_xpus and available_xpus == nxpus - already_allocated_xpus: task.set_status(self._redis, keyt, 'allocated') key_reserved = 'reserved:%s:%s' % (service.name, resource) self._redis.delete(key_reserved) task.work_queue(self._redis, task_id, service.name) else: task.work_queue(self._redis, task_id, service.name, delay=20) elif status == 'allocated': content = json.loads(self._redis.hget(keyt, 'content')) resource = self._redis.hget(keyt, 'alloc_resource') self._logger.info('%s: launching on %s', task_id, service.name) try: keygr = 'gpu_resource:%s:%s' % (service.name, resource) lgpu = [] for k, v in six.iteritems(self._redis.hgetall(keygr)): if v == task_id: lgpu.append(k) self._redis.hset(keyt, 'alloc_lgpu', ",".join(lgpu)) keycr = 'cpu_resource:%s:%s' % (service.name, resource) lcpu = [] for k, v in six.iteritems(self._redis.hgetall(keycr)): if v == task_id: lcpu.append(k) self._redis.hset(keyt, 'alloc_lcpu', ",".join(lcpu)) data = service.launch( task_id, content['options'], (lgpu, lcpu), resource, content['docker']['registry'], content['docker']['image'], content['docker']['tag'], content['docker']['command'], task.file_list(self._redis, self._taskfile_dir, task_id), content['wait_after_launch'], self._redis.hget(keyt, 'token'), content.get('support_statistics')) except EnvironmentError as e: # the resource is not available and will be set busy self._block_resource(resource, service, str(e)) self._redis.hdel(keyt, 'alloc_resource') # set the task as queued again self._release_resource( service, resource, task_id, Capacity(self._redis.hget(keyt, 'ngpus'), self._redis.hget(keyt, 'ncpus'))) task.set_status(self._redis, keyt, 'queued') task.service_queue(self._redis, task_id, service.name) self._logger.info( 'could not launch [%s] %s on %s: blocking resource', str(e), task_id, resource) return except Exception as e: # all other errors make the task fail self._logger.info('fail task [%s] - %s', task_id, str(e)) task.append_log(self._redis, self._taskfile_dir, task_id, str(e)) task.terminate(self._redis, task_id, phase='launch_error') return self._logger.info('%s: task started on %s', task_id, service.name) self._redis.hset(keyt, 'job', json.dumps(data)) task.set_status(self._redis, keyt, 'running') # For services that do not notify their activity, we should # poll the task status more regularly. task.work_queue(self._redis, task_id, service.name, delay=service.is_notifying_activity and 120 or 30) elif status == 'running': self._logger.debug('- checking activity of task: %s', task_id) data = json.loads(self._redis.hget(keyt, 'job')) try: status = service.status(task_id, data) except Exception as e: self._logger.info('cannot get status for [%s] - %s', task_id, str(e)) self._redis.hincrby(keyt, 'status_fail', 1) if self._redis.hget(keyt, 'status_fail') > 4: task.terminate(self._redis, task_id, phase='lost_connection') return else: self._redis.hdel(keyt, 'status_fail') if status == 'dead': self._logger.info( '%s: task no longer running on %s, request termination', task_id, service.name) task.terminate(self._redis, task_id, phase='exited') else: task.work_queue(self._redis, task_id, service.name, delay=service.is_notifying_activity and 600 or 120) elif status == 'terminating': data = self._redis.hget(keyt, 'job') nxpus = Capacity(self._redis.hget(keyt, 'ngpus'), self._redis.hget(keyt, 'ncpus')) if data is not None: container_id = self._redis.hget(keyt, 'container_id') data = json.loads(data) data['container_id'] = container_id self._logger.info('%s: terminating task (job: %s)', task_id, json.dumps(data)) try: service.terminate(data) self._logger.info('%s: terminated', task_id) except Exception: self._logger.warning('%s: failed to terminate', task_id) else: self._logger.info('%s: terminating task (on error)', task_id) resource = self._redis.hget(keyt, 'alloc_resource') if resource: self._release_resource(service, resource, task_id, nxpus) task.set_status(self._redis, keyt, 'stopped') task.disable(self._redis, task_id)