def update_job_info(self, schedule): for item in schedule: job_col = mg.get_col(self.db, item[0]) job_filter = 'job_info.tasks.%s.container_name' % item[1] # add node filed target = 'job_info.tasks.%s.node' % item[1] mg.update_doc(job_col, job_filter, item[1], target, item[2]) # add cpuset_cpus field target = 'job_info.tasks.%s.cpuset_cpus' % item[1] mg.update_doc(job_col, job_filter, item[1], target, ','.join(item[3]))
def reset_db(self): all_cols = mg.get_all_cols(self.__db) if SystemConstants.WorkersResourceInfo in all_cols: # Drop worker resource info collection mg.drop_col(self.__db_client, SystemConstants.MONGODB_NAME, SystemConstants.WorkersResourceInfo) if SystemConstants.WorkersInfo in all_cols: # Reset worker info collection workers_info_col = mg.get_col(self.__db, SystemConstants.WorkersInfo) workers_info_data = mg.find_col(workers_info_col) for index, worker in enumerate(workers_info_data[:]): for cpu in worker['CPUs']: workers_info_data[index]['CPUs'][cpu] = False mg.update_doc(col=workers_info_col, filter_key='hostname', filter_value=worker['hostname'], target_key='CPUs', target_value=workers_info_data[index]['CPUs']) print('Reset MongoDB.')
def request_new_job(): data = request.get_json() # create overlay network if not exists if docker.verify_network(dockerClient, data['job_info']['network']['name']): create_overlay_network(network=data['job_info']['network']['name'], driver=data['job_info']['network']['driver'], subnet=data['job_info']['network']['subnet']) try: # make directory for nfs nfs_master_path = '/var/nfs/RESTfulSwarm/%s' % data['job_name'] os.mkdir(path=nfs_master_path) for _task in data['job_info']['tasks']: data['job_info']['tasks'][_task].update( {'network': data['job_info']['network']['name']}) # deploy job for _task in list(data['job_info']['tasks'].values()): new_container(_task) # update job status mg.update_doc(db[data['job_name']], 'job_name', data['job_name'], 'status', 'Deployed') mg.update_doc(db[data['job_name']], 'job_name', data['job_name'], 'start_time', time.time()) # update task status for task in data['job_info']['tasks'].keys(): filter_key = 'job_info.tasks.%s.container_name' % task target_key = 'job_info.tasks.%s.status' % task mg.update_doc(db[data['job_name']], filter_key, task, target_key, 'Deployed') job_buffer.append(data['job_name']) return 'OK', 200 except Exception as ex: traceback.print_exc(file=sys.stderr) return str(ex), 400
def process_cores_scheduling_result(self, schedule, core_request, mem_request_arr, available_workers): job_index = 0 result = [] waiting_plan = [] global_task_index = 0 next_job = False task_index = 0 temp_result = [] # print(schedule) for index, item in enumerate(schedule): if next_job is False: global_task_index += len(core_request[job_index][1].items()) next_job = True # item=(task_index, assigned_core_id/-1) if item[1] != -1: # get the first n cores from all free cores because the amount # of free cores may be more than requested cores cores = [] for j in range( list(core_request[job_index][1].values())[task_index]): cores.append(list(available_workers.values())[item[1]][0]) # remove used cores key = list(available_workers.keys())[item[1]] available_workers[key].pop(0) result_item = ( core_request[job_index][0], list(core_request[job_index][1].keys())[task_index], list(available_workers.keys())[item[1]], cores) temp_result.append(result_item) # update free memory worker_info = list( self.workers_col.find( {'hostname': list(available_workers.keys())[item[1]]}))[0] new_free_mem = utl.memory_size_translator( worker_info['MemFree']) request_mem = utl.memory_size_translator( mem_request_arr[index]) new_free_mem -= request_mem new_free_mem = str(new_free_mem) + 'm' mg.update_doc(self.workers_col, 'hostname', list(available_workers.keys())[item[1]], 'MemFree', new_free_mem) else: # if resources are not enough, add the job into waiting list waiting_plan.append(core_request[job_index][0]) # update job index if index == global_task_index - 1: if len(temp_result) == len(core_request[job_index][1]): result.extend(temp_result) else: waiting_plan.append(core_request[job_index][0]) job_index += 1 next_job = False task_index = 0 temp_result = [] else: task_index += 1 waiting_plan = list(set(waiting_plan)) return result, waiting_plan
def update_cores(core): _target_key = 'CPUs.%s' % core mg.update_doc(self.__workers_info, 'hostname', worker_host, _target_key, False) self.__logger.info('Release core %s status in worker %s' % (_target_key, worker_host))
def update_db(msg): worker_host = msg.split()[0] msg = msg.split()[1] task_name = msg if task_name not in deployed_tasks: deployed_tasks.append(task_name) job_name = msg.split('_')[0] job_col = mg.get_col(self.__db, job_name) # update job collection -- task status filter_key = 'job_info.tasks.%s.container_name' % task_name target_key = 'job_info.tasks.%s.status' % task_name mg.update_doc(job_col, filter_key, task_name, target_key, 'Down') job_details = mg.find_col(job_col)[0] # update job status if all tasks are down flag = True for task in job_details['job_info']['tasks']: if job_details['job_info']['tasks'][task][ 'status'] != 'Down': flag = False if flag: mg.update_doc(job_col, 'job_name', job_name, 'status', 'Down') mg.update_doc(job_col, 'job_name', job_name, 'end_time', time.time()) self.__logger.info('Updating Job collection %s.' % job_name) # get the resource utilization of the 'Down' container cores = job_details['job_info']['tasks'][task_name][ 'cpuset_cpus'] cores = cores.split(',') memory = job_details['job_info']['tasks'][task_name][ 'mem_limit'] self.__logger.info( 'Collecting resources from down containers.') # update WorkersInfo collection # update cores info def update_cores(core): _target_key = 'CPUs.%s' % core mg.update_doc(self.__workers_info, 'hostname', worker_host, _target_key, False) self.__logger.info('Release core %s status in worker %s' % (_target_key, worker_host)) [update_cores(core) for core in cores] # update memory info worker_info = mg.filter_col(self.__workers_info, 'hostname', worker_host) free_memory = worker_info['MemFree'] memory = float(memory.split('m')[0]) free_memory = float(free_memory.split('m')[0]) updated_memory = memory + free_memory updated_memory = str(updated_memory) + 'm' mg.update_doc(self.__workers_info, 'hostname', worker_host, 'MemFree', updated_memory) self.__logger.info( 'Updating memory resources in WorkersInfo collection.') # update worker resource collection mg.update_workers_resource_col(self.__workers_info, worker_host, self.__workers_resource_info) self.__logger.info( 'Updated WorkersResourceInfo collection, because some cores are released.' ) # update job collection -- cpuset_cpus target_key = 'job_info.tasks.%s.cpuset_cpus' % task_name mg.update_doc(job_col, filter_key, task_name, target_key, '') self.__logger.info( 'Updated Job collection. Released used cores.') # update job collection -- mem_limit target_key = 'job_info.tasks.%s.mem_limit' % task_name mg.update_doc(job_col, filter_key, task_name, target_key, '') self.__logger.info( 'Updated Job collection. Released used memory.')
def update_workers_info(self, schedule): for item in schedule: for core in item[3]: target = 'CPUs.%s' % str(core) mg.update_doc(self.workers_col, 'hostname', item[2], target, True)