def find_resource_n_h_local(itvs, hy, rqts, top, h, h_bottom): n = rqts[h+1] size_bks = [] avail_bks = [] for top_itvs in top: avail_itvs = intersec(top_itvs, itvs) avail_bks.append(avail_itvs) size_bks.append(itvs_size(avail_itvs)) sorted_ids = sorted(range(len(avail_bks)), key=lambda k: size_bks[k]) for i, idx in enumerate(sorted_ids): if size_bks[i] >= n: res_itvs = [] k = 0 for itv in avail_bks[idx]: size_itv = itv[1] - itv[0] + 1 if (k + size_itv) < n: res_itvs.append(itv) else: res_itvs.append((itv[0], itv[0] + (n-k-1))) return res_itvs return []
def update(self, job, prev_nb_res=0, prev_duration=0): queue = job.queue_name project = job.project user = job.user # TOREMOVE ? if hasattr(job, 'res_set'): if not hasattr(self, 'nb_res'): job.nb_res = itvs_size(intersec(job.res_set, rs.default_resource_itvs)) nb_resources = job.nb_res else: nb_resources = prev_nb_res if hasattr(job, 'walltime'): duration = job.walltime else: duration = prev_duration for t in quotas_job_types: if (t == '*') or (t in job.types): # Update the number of used resources self.counters['*', '*', t, '*'][0] += nb_resources self.counters['*', '*', t, user][0] += nb_resources self.counters['*', project, t, '*'][0] += nb_resources self.counters[queue, '*', t, '*'][0] += nb_resources self.counters[queue, project, t, user][0] += nb_resources self.counters[queue, project, t, '*'][0] += nb_resources self.counters[queue, '*', t, user][0] += nb_resources self.counters['*', project, t, user][0] += nb_resources # Update the number of running jobs self.counters['*', '*', t, '*'][1] += 1 self.counters['*', '*', t, user][1] += 1 self.counters['*', project, t, '*'][1] += 1 self.counters[queue, '*', t, '*'][1] += 1 self.counters[queue, project, t, user][1] += 1 self.counters[queue, project, t, '*'][1] += 1 self.counters[queue, '*', t, user][1] += 1 self.counters['*', project, t, user][1] += 1 # Update the resource * second self.counters['*', '*', t, '*'][2] += nb_resources * duration self.counters['*', '*', t, user][2] += nb_resources * duration self.counters['*', project, t, '*'][2] += nb_resources * duration self.counters[queue, '*', t, '*'][2] += nb_resources * duration self.counters[queue, project, t, user][2] += nb_resources * duration self.counters[queue, project, t, '*'][2] += nb_resources * duration self.counters[queue, '*', t, user][2] += nb_resources * duration self.counters['*', project, t, user][2] += nb_resources * duration
def check_reservation_jobs(plt, resource_set, queue_name, all_slot_sets, current_time_sec): """Processing of new Advance Reservations""" logger.debug("Queue " + queue_name + ": begin processing of new reservations") ar_jobs_scheduled = {} ar_jobs, ar_jids, nb_ar_jobs = plt.get_waiting_jobs( queue_name, 'toSchedule') logger.debug("nb_ar_jobs:" + str(nb_ar_jobs)) if nb_ar_jobs > 0: job_security_time = int(config['SCHEDULER_JOB_SECURITY_TIME']) plt.get_data_jobs(ar_jobs, ar_jids, resource_set, job_security_time) logger.debug("Try and schedule new reservations") for jid in ar_jids: job = ar_jobs[jid] logger.debug( "Find resource for Advance Reservation job:" + str(job.id)) # It is a reservation, we take care only of the first moldable job moldable_id, walltime, hy_res_rqts = job.mld_res_rqts[0] # test if reservation is too old if current_time_sec >= (job.start_time + walltime): logger.warn( "[" + str(job.id) + "] Canceling job: reservation is too old") set_job_message(job.id, "Reservation too old") set_job_state(job.id, 'toError') continue else: if job.start_time < current_time_sec: # TODO update to DB ???? job.start_time = current_time_sec ss_name = 'default' # TODO container # if 'inner' in job.types: # ss_name = job.types['inner'] # TODO: test if container is an AR job slots = all_slot_sets[ss_name].slots t_e = job.start_time + walltime - job_security_time sid_left, sid_right = get_encompassing_slots( slots, job.start_time, t_e) if job.ts or (job.ph == ALLOW): itvs_avail = intersec_ts_ph_itvs_slots( slots, sid_left, sid_right, job) else: itvs_avail = intersec_itvs_slots(slots, sid_left, sid_right) itvs = find_resource_hierarchies_job( itvs_avail, hy_res_rqts, resource_set.hierarchy) if ('QUOTAS' in config) and (config['QUOTAS'] == 'yes'): nb_res = itvs_size(intersec(itvs, resource_set.default_resource_itvs)) res = check_slots_quotas(slots, sid_left, sid_right, job, nb_res, walltime) (quotas_ok, quotas_msg, rule, value) = res if not quotas_ok: itvs = [] logger.info("Quotas limitaion reached, job:" + str(job.id) + ", " + quotas_msg + ", rule: " + str(rule) + ", value: " + str(value)) set_job_state(job.id, 'toError') set_job_message(job.id, "This advance reservation cannot run due to quotas") if itvs == []: # not enough resource available logger.warn("[" + str(job.id) + "] advance reservation cannot be validated, not enough resources") set_job_state(job.id, 'toError') set_job_message(job.id, "This advance reservation cannot run") else: # The reservation can be scheduled logger.debug( "[" + str(job.id) + "] advance reservation is validated") job.moldable_id = moldable_id job.res_set = itvs ar_jobs_scheduled[job.id] = job # if 'container' in job.types # slot = Slot(1, 0, 0, job.res_set[:], job.start_time, # job.start_time + job.walltime - job_security_time) # slot.show() # slots_sets[job.id] = SlotSet(slot) set_job_state(job.id, 'toAckReservation') set_job_resa_state(job.id, 'Scheduled') if ar_jobs_scheduled != []: logger.debug("Save AR jobs' assignements in database") save_assigns(ar_jobs_scheduled, resource_set) logger.debug("Queue " + queue_name + ": end processing of new reservations")
def handle_waiting_reservation_jobs(queue_name, resource_set, job_security_time, current_time_sec): logger.debug("Queue " + queue_name + ": begin processing accepted Advance Reservations") ar_jobs = get_waiting_scheduled_AR_jobs(queue_name, resource_set, job_security_time, current_time_sec) for job in ar_jobs: moldable_id = job.moldable_id walltime = job.walltime # Test if AR job is expired and handle it if (current_time_sec > (job.start_time + walltime)): logger.warn("[" + str(job.id) + "] set job state to Error: avdance reservation expired and couldn't be started") set_job_state(job.id, 'Error') set_job_message(job.id, "Reservation expired and couldn't be started.") else: # Determine current available ressources avail_res = intersec(resource_set.roid_itvs, job.res_set) # Test if the AR job is waiting to be launched due to nodes' unavailabilities if (avail_res == []) and (job.start_time < current_time_sec): logger.warn("[%s] advance reservation is waiting because no resource is present" % str(job.id)) # Delay launching time set_gantt_job_start_time(moldable_id, current_time_sec + 1) elif (job.start_time < current_time_sec): if (job.start_time + reservation_waiting_timeout) > current_time_sec: if not equal_itvs(avail_res, job.res_set): # The expected ressources are not all available, # wait the specified timeout logger.warn("[" + str(job.id) + "] advance reservation is waiting because not all \ resources are available yet") set_gantt_job_start_time(moldable_id, current_time_sec + 1) else: # It's time to launch the AR job, remove missing ressources missing_resources_itvs = sub_intervals(job.res_set, avail_res) remove_gantt_resource_job(moldable_id, missing_resources_itvs, resource_set) logger.warn("[" + str(job.id) + "remove some resources assigned to this advance reservation, \ because there are not Alive") add_new_event('SCHEDULER_REDUCE_NB_RESSOURCES_FOR_ADVANCE_RESERVATION', job.id, "[MetaSched] Reduce the number of resources for the job " + str(job.id)) nb_res = itvs_size(job.res_set) - itvs_size(missing_resources_itvs) new_message = re.sub(r'R=\d+', 'R=' + str(nb_res), job.message) if new_message != job.message: set_job_message(job.id, new_message) logger.debug("Queue " + queue_name + ": end processing of reservations with missing resources")
def find_first_suitable_contiguous_slots(slots_set, job, res_rqt, hy, min_start_time): '''find first_suitable_contiguous_slot ''' (mld_id, walltime, hy_res_rqts) = res_rqt itvs = [] slots = slots_set.slots cache = slots_set.cache # flag to control cache update for considered entry no_cache = False # updated_cache = False sid_left = 1 if min_start_time < 0: # to not always begin by the first slots ( O(n^2) ) # TODO cache_by_container/inner + moldable + time_sharing(?) if job.key_cache and (job.key_cache[mld_id] in cache): sid_left = cache[job.key_cache[mld_id]] # print("cache hit...... ", sid_left) # else: # print("cache miss :(") else: while slots[sid_left].b < min_start_time: sid_left = slots[sid_left].next # satisfy job dependencies converted in min start_time # sid_left = 1 # TODO no cache sid_right = sid_left slot_e = slots[sid_right].e # print('first sid_left', sid_left) while True: # find next contiguous slots_time # print("A: job.id:", job.id, "sid_left:", sid_left, "sid_right:",) # sid_right if sid_left != 0 and sid_right != 0: slot_b = slots[sid_left].b else: # TODO error # print("TODO error can't schedule job.id:", job.id) logger.info( "can't schedule job with id:" + str(job.id) + ", due resources") return ([], -1, -1) while ((slot_e - slot_b + 1) < walltime): sid_right = slots[sid_right].next if sid_right != 0: slot_e = slots[sid_right].e else: logger.info( "can't schedule job with id:" + str(job.id) + ", due time") return ([], -1, -1) # if not updated_cache and (slots[sid_left].itvs != []): # cache[walltime] = sid_left # updated_cache = True if job.ts or (job.ph == ALLOW): itvs_avail = intersec_ts_ph_itvs_slots( slots, sid_left, sid_right, job) else: itvs_avail = intersec_itvs_slots(slots, sid_left, sid_right) # print("itvs_avail", itvs_avail, "h_res_req", hy_res_rqts, "hy", hy) if job.find: beginning_slotset = True if (sid_left == 1) and (slots_set.begin == slots[1].b) else False # Use specialized find resource function itvs = job.find_func(itvs_avail, hy_res_rqts, hy, beginning_slotset, *job.find_args, **job.find_kwargs) else: itvs = find_resource_hierarchies_job(itvs_avail, hy_res_rqts, hy) if itvs != []: if ('QUOTAS' in config) and (config['QUOTAS'] == 'yes'): nb_res = itvs_size(intersec(itvs, rs.default_resource_itvs)) res = check_slots_quotas(slots, sid_left, sid_right, job, nb_res, walltime) (quotas_ok, quotas_msg, rule, value) = res if not quotas_ok: logger.info("Quotas limitaion reached, job:" + str(job.id) + ", " + quotas_msg + ", rule: " + str(rule) + ", value: " + str(value)) # quotas limitation trigger therefore disable cache update for this entry no_cache = True else: break else: break sid_left = slots[sid_left].next if job.key_cache and (min_start_time < 0) and (not no_cache): # and (not job.deps): cache[job.key_cache[mld_id]] = sid_left # print("cache: update entry ", job.key_cache[mld_id], " with ", sid_left) # else: # print("cache: not updated ", job.key_cache, min_start_time, job.deps) return (itvs, sid_left, sid_right)
def test_itvs_size(): assert itvs_size([(1, 2), (4, 5), (10, 10)]) == 5
def estimate_job_nb_resources(resource_request, j_properties): '''returns an array with an estimation of the number of resources that can be used by a job: (resources_available, [(nbresources => int, walltime => int)]) ''' # estimate_job_nb_resources estimated_nb_resources = [] resource_available = False resource_set = ResourceSet() resources_itvs = resource_set.roid_itvs for mld_idx, mld_resource_request in enumerate(resource_request): resource_desc, walltime = mld_resource_request if not walltime: walltime = default_job_walltime result = [] for prop_res in resource_desc: jrg_grp_property = prop_res['property'] resource_value_lst = prop_res['resources'] # # determine resource constraints # if (not j_properties) and (not jrg_grp_property or (jrg_grp_property == "type = 'default'")): constraints = deepcopy(resource_set.roid_itvs) else: if not j_properties or not jrg_grp_property: and_sql = "" else: and_sql = " AND " sql_constraints = j_properties + and_sql + jrg_grp_property try: request_constraints = db.query(Resource.id).filter(text(sql_constraints)).all() except exc.SQLAlchemyError: print_error('Bad resource SQL constraints request:', sql_constraints) print_error('SQLAlchemyError: ', exc) result = [] break roids = [resource_set.rid_i2o[int(y[0])] for y in request_constraints] constraints = unordered_ids2itvs(roids) hy_levels = [] hy_nbs = [] for resource_value in resource_value_lst: res_name = resource_value['resource'] value = resource_value['value'] hy_levels.append(resource_set.hierarchy[res_name]) hy_nbs.append(int(value)) cts_resources_itvs = intersec(constraints, resources_itvs) res_itvs = find_resource_hierarchies_scattered(cts_resources_itvs, hy_levels, hy_nbs) if res_itvs: result.extend(res_itvs) else: result = [] break if result: resource_available = True estimated_nb_res = itvs_size(result) estimated_nb_resources.append((estimated_nb_res, walltime)) print_info('Moldable instance: ', mld_idx, ' Estimated nb resources: ', estimated_nb_res, ' Walltime: ', walltime) if not resource_available: print_error("There are not enough resources for your request") sub_exit(-5) return(resource_available, estimated_nb_resources)