def frag_job(jid): if 'OARDO_USER' in os.environ: luser = os.environ['OARDO_USER'] else: luser = os.environ['USER'] job = get_job(jid) if (job is not None) and ((luser == job.user) or (luser == 'oar') or (luser == 'root')): res = db.query(FragJob).filter(FragJob.job_id == jid).all() if len(res) == 0: date = tools.get_date() frajob = FragJob(job_id=jid, date=date) db.add(frajob) db.commit() add_new_event("FRAG_JOB_REQUEST", jid, "User %s requested to frag the job %s" % (luser, str(jid))) return 0 else: # Job already killed return -2 else: return -1
def check_besteffort_jobs_to_kill(jobs_to_launch, rid2jid_to_launch, current_time_sec, besteffort_rid2job, resource_set): '''Detect if there are besteffort jobs to kill return 1 if there is at least 1 job to frag otherwise 0 ''' return_code = 0 logger.debug("Begin processing of besteffort jobs to kill") fragged_jobs = [] for rid, job_id in iteritems(rid2jid_to_launch): if rid in besteffort_rid2job: be_job = besteffort_rid2job[rid] job_to_launch = jobs_to_launch[job_id] if is_timesharing_for_two_jobs(be_job, job_to_launch): logger.debug("Resource " + str(rid) + " is needed for job " + str(job_id) + ", but besteffort job " + str(be_job.id) + " can live, because timesharing compatible") else: if be_job.id not in fragged_jobs: skip_kill = 0 checkpoint_first_date = sys.maxsize # Check if we must checkpoint the besteffort job if be_job.checkpoint > 0: for ev in get_job_events(be_job.id): if ev.type == 'CHECKPOINT': if checkpoint_first_date > ev.date: checkpoint_first_date = ev.date if (checkpoint_first_date == sys.maxsize) or\ (current_time_sec <= (checkpoint_first_date + be_job.checkpoint)): skip_kill = 1 send_checkpoint_signal(be_job) logger.debug("Send checkpoint signal to the job " + str(be_job.id)) if not skip_kill: logger.debug("Resource " + str(rid) + "need to be freed for job " + str(be_job.id) + ": killing besteffort job " + str(job_to_launch.id)) add_new_event('BESTEFFORT_KILL', be_job.id, "kill the besteffort job " + str(be_job.id)) frag_job(be_job.id) fragged_jobs.append(be_job.id) return_code = 1 logger.debug("End precessing of besteffort jobs to kill\n") return return_code
def handle_jobs_to_launch(jobs_to_launch_lst, current_time_sec, current_time_sql): logger.debug( "Begin processing jobs to launch (start time <= " + current_time_sql) return_code = 0 for job in jobs_to_launch_lst: return_code = 1 logger.debug("Set job " + str(job.id) + " state to toLaunch at " + current_time_sql) # # Advance Reservation # walltime = job.walltime if ((job.reservation == 'Scheduled') and (job.start_time < current_time_sec)): max_time = walltime - (current_time_sec - job.start_time) set_moldable_job_max_time(job.moldable_id, max_time) set_gantt_job_start_time(job.moldable_id, current_time_sec) logger.warn("Reduce walltime of job " + str(job.id) + "to " + str(max_time) + "(was " + str(walltime) + " )") add_new_event('REDUCE_RESERVATION_WALLTIME', job.id, "Change walltime from " + str(walltime) + " to " + str(max_time)) w_max_time = duration_to_sql(max_time) new_message = re.sub(r'W=\d+:\d+:\d+', 'W=' + w_max_time, job.message) if new_message != job.message: set_job_message(job.id, new_message) prepare_job_to_be_launched(job, current_time_sec) logger.debug("End processing of jobs to launch") return return_code
def meta_schedule(mode='internal', plt=Platform()): exit_code = 0 job_security_time = int(config['SCHEDULER_JOB_SECURITY_TIME']) if ('QUOTAS' in config) and (config['QUOTAS'] == 'yes'): if 'QUOTAS_FILE' not in config: config['QUOTAS_FILE'] = './quotas_conf.json' load_quotas_rules() tools.init_judas_notify_user() tools.create_almighty_socket() logger.debug( "Retrieve information for already scheduled reservations from \ database before flush (keep assign resources)") # reservation ??. initial_time_sec = tools.get_date() # time.time() initial_time_sql = local_to_sql(initial_time_sec) current_time_sec = initial_time_sec current_time_sql = initial_time_sql gantt_init_results = gantt_init_with_running_jobs(plt, initial_time_sec, job_security_time) all_slot_sets, scheduled_jobs, besteffort_rid2jid = gantt_init_results resource_set = plt.resource_set() # Path for user of external schedulers if 'OARDIR' in os.environ: binpath = os.environ['OARDIR'] + '/' else: binpath = '/usr/local/lib/oar/' logger.warning( "OARDIR env variable must be defined, " + binpath + " is used by default") for queue in db.query(Queue).order_by(text('priority DESC')).all(): if queue.state == 'Active': logger.debug("Queue " + queue.name + ": Launching scheduler " + queue.scheduler_policy + " at time " + initial_time_sql) if mode == 'external': # pragma: no cover call_external_scheduler(binpath, scheduled_jobs, all_slot_sets, resource_set, job_security_time, queue, initial_time_sec, initial_time_sql) else: call_internal_scheduler(plt, scheduled_jobs, all_slot_sets, job_security_time, queue, initial_time_sec) handle_waiting_reservation_jobs(queue.name, resource_set, job_security_time, current_time_sec) # handle_new_AR_jobs check_reservation_jobs( plt, resource_set, queue.name, all_slot_sets, current_time_sec) jobs_to_launch, jobs_to_launch_lst, rid2jid_to_launch = get_gantt_jobs_to_launch(resource_set, job_security_time, current_time_sec) if check_besteffort_jobs_to_kill(jobs_to_launch, rid2jid_to_launch, current_time_sec, besteffort_rid2jid, resource_set) == 1: # We must kill some besteffort jobs tools.notify_almighty('ChState') exit_code = 2 elif handle_jobs_to_launch(jobs_to_launch_lst, current_time_sec, current_time_sql) == 1: exit_code = 0 # Update visu gantt tables update_gantt_visualization() # Manage dynamic node feature flag_hulot = False timeout_cmd = int(config['SCHEDULER_TIMEOUT']) if ((('SCHEDULER_NODE_MANAGER_SLEEP_CMD' in config) or ((config['ENERGY_SAVING_INTERNAL'] == 'yes') and ('ENERGY_SAVING_NODE_MANAGER_SLEEP_CMD' in config))) and (('SCHEDULER_NODE_MANAGER_SLEEP_TIME' in config) and ('SCHEDULER_NODE_MANAGER_IDLE_TIME' in config))): # Look at nodes that are unused for a duration idle_duration = int(config['SCHEDULER_NODE_MANAGER_IDLE_TIME']) sleep_duration = int(config['SCHEDULER_NODE_MANAGER_SLEEP_TIME']) idle_nodes = search_idle_nodes(current_time_sec) tmp_time = current_time_sec - idle_duration node_halt = [] for node, idle_duration in iteritems(idle_nodes): if idle_duration < tmp_time: # Search if the node has enough time to sleep tmp = get_next_job_date_on_node(node) if (tmp is None) or (tmp - sleep_duration > current_time_sec): # Search if node has not been woken up recently wakeup_date = get_last_wake_up_date_of_node(node) if (wakeup_date is None) or (wakeup_date < tmp_time): node_halt.append(node) if node_halt != []: logger.debug("Powering off some nodes (energy saving): " + str(node_halt)) # Using the built-in energy saving module to shut down nodes if config['ENERGY_SAVING_INTERNAL'] == 'yes': if kao_tools.send_to_hulot('HALT', ' '.join(node_halt)): logger.error("Communication problem with the energy saving module (Hulot)\n") flag_hulot = 1 else: # Not using the built-in energy saving module to shut down nodes cmd = config['SCHEDULER_NODE_MANAGER_SLEEP_CMD'] if kao_tools.fork_and_feed_stdin(cmd, timeout_cmd, node_halt): logger.error("Command " + cmd + "timeouted (" + str(timeout_cmd) + "s) while trying to poweroff some nodes") if (('SCHEDULER_NODE_MANAGER_SLEEP_CMD' in config) or ((config['ENERGY_SAVING_INTERNAL'] == 'yes') and ('ENERGY_SAVING_NODE_MANAGER_SLEEP_CMD' in config))): # Get nodes which the scheduler wants to schedule jobs to, # but which are in the Absent state, to wake them up wakeup_time = int(config['SCHEDULER_NODE_MANAGER_WAKEUP_TIME']) nodes = get_gantt_hostname_to_wake_up(current_time_sec, wakeup_time) if nodes != []: logger.debug("Awaking some nodes: " + str(nodes)) # Using the built-in energy saving module to wake up nodes if config['ENERGY_SAVING_INTERNAL'] == 'yes': if kao_tools.send_to_hulot('WAKEUP', ' '.join(nodes)): logger.error("Communication problem with the energy saving module (Hulot)") flag_hulot = 1 else: # Not using the built-in energy saving module to wake up nodes cmd = config['SCHEDULER_NODE_MANAGER_WAKE_UP_CMD'] if kao_tools.fork_and_feed_stdin(cmd, timeout_cmd, nodes): logger.error("Command " + cmd + "timeouted (" + str(timeout_cmd) + "s) while trying to wake-up some nodes ") # Send CHECK signal to Hulot if needed if not flag_hulot and (config['ENERGY_SAVING_INTERNAL'] == 'yes'): if kao_tools.send_to_hulot('CHECK', []): logger.error("Communication problem with the energy saving module (Hulot)") # Retrieve jobs according to their state and excluding job in 'Waiting' state. jobs_by_state = get_current_not_waiting_jobs() # # Search jobs to resume # # # TODO: TOFINISH # if 'Resuming' in jobs_by_state: logger.warn("Resuming job is NOT ENTIRELY IMPLEMENTED") for job in jobs_by_state['Resuming']: other_jobs = get_jobs_on_resuming_job_resources(job.id) # TODO : look for timesharing other jobs. What do we do????? if other_jobs == []: # We can resume the job logger.debug("[" + str(job.id) + "] Resuming job") if 'noop' in job.types: resume_job_action(job.id) logger.debug("[" + str(job.id) + "] Resume NOOP job OK") else: script = config['JUST_BEFORE_RESUME_EXEC_FILE'] timeout = int(config['SUSPEND_RESUME_SCRIPT_TIMEOUT']) if timeout is None: timeout = kao_tools.get_default_suspend_resume_script_timeout() skip = 0 logger.debug("[" + str(job.id) + "] Running post suspend script: `" + script + " " + str(job.id) + "'") cmd_str = script + str(job.id) return_code = -1 try: return_code = call(cmd_str, shell=True, timeout=timeout) except TimeoutExpired as e: logger.error(str(e) + "[" + str(job.id) + "] Suspend script timeouted") add_new_event('RESUME_SCRIPT_ERROR', job.id, "Suspend script timeouted") if return_code != 0: str_error = "[" + str(job.id) + "] Suspend script error, return code = "\ + str(return_code) logger.error(str_error) add_new_event('RESUME_SCRIPT_ERROR', job.id, str_error) frag_job(job.id) tools.notify_almighty('Qdel') skip = 1 cpuset_nodes = None if 'JOB_RESOURCE_MANAGER_PROPERTY_DB_FIELD' in config: cpuset_field = config['JOB_RESOURCE_MANAGER_PROPERTY_DB_FIELD'] else: cpuset_field = "" if cpuset_field and (skip == 0): # TODO cpuset_name = job.user + "_" + str(job.id) cpuset_nodes = get_cpuset_values(cpuset_field, job.assigned_moldable_id) # TODO suspend_data_hash = {'name': cpuset_name, 'job_id': job.id, 'oarexec_pid_file': kao_tools.get_oar_pid_file_name(job.id)} if cpuset_nodes: # TODO taktuk_cmd = config['TAKTUK_CMD'] if 'SUSPEND_RESUME_FILE' in config: suspend_file = config['SUSPEND_RESUME_FILE'] else: # TODO suspend_file = kao_tools.get_default_suspend_resume_file() # # TODO: TOFINISH # # Notify oarsub -I when they will be launched for j_info in get_gantt_waiting_interactive_prediction_date(): job_id, job_info_type, job_start_time, job_message = j_info addr, port = job_info_type.split(':') new_start_prediction = local_to_sql(job_start_time) logger.debug("[" + str(job_id) + "] Notifying user of the start prediction: " + new_start_prediction + "(" + job_message + ")") tools.notify_tcp_socket(addr, port, "[" + initial_time_sql + "] Start prediction: " + new_start_prediction + " (" + job_message + ")") # Run the decisions # Process "toError" jobs if 'toError' in jobs_by_state: for job in jobs_by_state['toError']: addr, port = job.info_type.split(':') if job.type == 'INTERACTIVE' or\ (job.type == 'PASSIVE' and job.reservation == 'Scheduled'): logger.debug("Notify oarsub job (num:" + str(job.id) + ") in error; jobInfo=" + job.info_type) nb_sent1 = tools.notify_tcp_socket(addr, port, job.message + '\n') nb_sent2 = tools.notify_tcp_socket(addr, port, 'BAD JOB' + '\n') if (nb_sent1 == 0) or (nb_sent2 == 0): logger.warn( "Cannot open connection to oarsub client for" + str(job.id)) logger.debug("Set job " + str(job.id) + " to state Error") set_job_state(job.id, 'Error') # Process toAckReservation jobs if 'toAckReservation' in jobs_by_state: for job in jobs_by_state['toAckReservation']: addr, port = job.info_type.split(':') logger.debug( "Treate job" + str(job.id) + " in toAckReservation state") nb_sent = tools.notify_tcp_socket(addr, port, 'GOOD RESERVATION' + '\n') if nb_sent == 0: logger.warn( "Frag job " + str(job.id) + ", I cannot notify oarsub for the reservation") add_new_event('CANNOT_NOTIFY_OARSUB', str( job.id), "Can not notify oarsub for the job " + str(job.id)) # TODO ??? # OAR::IO::lock_table / OAR::IO::unlock_table($base) frag_job(job.id) exit_code = 2 else: logger.debug("Notify oarsub for a RESERVATION (idJob=" + str(job.id) + ") --> OK; jobInfo=" + job.info_type) set_job_state(job.id, 'Waiting') if ((job.start_time - 1) <= current_time_sec) and (exit_code == 0): exit_code = 1 # Process toLaunch jobs if 'toLaunch' in jobs_by_state: for job in jobs_by_state['toLaunch']: notify_to_run_job(job.id) logger.debug("End of Meta Scheduler") return exit_code
def handle_waiting_reservation_jobs(queue_name, resource_set, job_security_time, current_time_sec): logger.debug("Queue " + queue_name + ": begin processing accepted Advance Reservations") ar_jobs = get_waiting_scheduled_AR_jobs(queue_name, resource_set, job_security_time, current_time_sec) for job in ar_jobs: moldable_id = job.moldable_id walltime = job.walltime # Test if AR job is expired and handle it if (current_time_sec > (job.start_time + walltime)): logger.warn("[" + str(job.id) + "] set job state to Error: avdance reservation expired and couldn't be started") set_job_state(job.id, 'Error') set_job_message(job.id, "Reservation expired and couldn't be started.") else: # Determine current available ressources avail_res = intersec(resource_set.roid_itvs, job.res_set) # Test if the AR job is waiting to be launched due to nodes' unavailabilities if (avail_res == []) and (job.start_time < current_time_sec): logger.warn("[%s] advance reservation is waiting because no resource is present" % str(job.id)) # Delay launching time set_gantt_job_start_time(moldable_id, current_time_sec + 1) elif (job.start_time < current_time_sec): if (job.start_time + reservation_waiting_timeout) > current_time_sec: if not equal_itvs(avail_res, job.res_set): # The expected ressources are not all available, # wait the specified timeout logger.warn("[" + str(job.id) + "] advance reservation is waiting because not all \ resources are available yet") set_gantt_job_start_time(moldable_id, current_time_sec + 1) else: # It's time to launch the AR job, remove missing ressources missing_resources_itvs = sub_intervals(job.res_set, avail_res) remove_gantt_resource_job(moldable_id, missing_resources_itvs, resource_set) logger.warn("[" + str(job.id) + "remove some resources assigned to this advance reservation, \ because there are not Alive") add_new_event('SCHEDULER_REDUCE_NB_RESSOURCES_FOR_ADVANCE_RESERVATION', job.id, "[MetaSched] Reduce the number of resources for the job " + str(job.id)) nb_res = itvs_size(job.res_set) - itvs_size(missing_resources_itvs) new_message = re.sub(r'R=\d+', 'R=' + str(nb_res), job.message) if new_message != job.message: set_job_message(job.id, new_message) logger.debug("Queue " + queue_name + ": end processing of reservations with missing resources")