示例#1
0
文件: job.py 项目: oar-team/oar3
def frag_job(jid):

    if 'OARDO_USER' in os.environ:
        luser = os.environ['OARDO_USER']
    else:
        luser = os.environ['USER']

    job = get_job(jid)

    if (job is not None) and ((luser == job.user)
                              or (luser == 'oar')
                              or (luser == 'root')):
        res = db.query(FragJob).filter(FragJob.job_id == jid).all()

        if len(res) == 0:

            date = tools.get_date()
            frajob = FragJob(job_id=jid, date=date)
            db.add(frajob)
            db.commit()
            add_new_event("FRAG_JOB_REQUEST",
                          jid, "User %s requested to frag the job %s"
                          % (luser, str(jid)))
            return 0
        else:
            # Job already killed
            return -2
    else:
        return -1
示例#2
0
def check_besteffort_jobs_to_kill(jobs_to_launch, rid2jid_to_launch, current_time_sec,
                                  besteffort_rid2job, resource_set):
    '''Detect if there are besteffort jobs to kill
    return 1 if there is at least 1 job to frag otherwise 0
    '''

    return_code = 0

    logger.debug("Begin processing of besteffort jobs to kill")

    fragged_jobs = []

    for rid, job_id in iteritems(rid2jid_to_launch):
        if rid in besteffort_rid2job:
            be_job = besteffort_rid2job[rid]
            job_to_launch = jobs_to_launch[job_id]

            if is_timesharing_for_two_jobs(be_job, job_to_launch):
                logger.debug("Resource " + str(rid) +
                             " is needed for  job " + str(job_id) +
                             ", but besteffort job  " + str(be_job.id) +
                             " can live, because timesharing compatible")
            else:
                if be_job.id not in fragged_jobs:
                    skip_kill = 0
                    checkpoint_first_date = sys.maxsize
                    # Check if we must checkpoint the besteffort job
                    if be_job.checkpoint > 0:
                        for ev in get_job_events(be_job.id):
                            if ev.type == 'CHECKPOINT':
                                if checkpoint_first_date > ev.date:
                                    checkpoint_first_date = ev.date

                        if (checkpoint_first_date == sys.maxsize) or\
                           (current_time_sec <= (checkpoint_first_date + be_job.checkpoint)):
                            skip_kill = 1
                            send_checkpoint_signal(be_job)

                            logger.debug("Send checkpoint signal to the job " + str(be_job.id))

                    if not skip_kill:
                        logger.debug("Resource " + str(rid) +
                                     "need to be freed for job " + str(be_job.id) +
                                     ": killing besteffort job " + str(job_to_launch.id))

                        add_new_event('BESTEFFORT_KILL', be_job.id,
                                      "kill the besteffort job " + str(be_job.id))
                        frag_job(be_job.id)

                    fragged_jobs.append(be_job.id)
                    return_code = 1

    logger.debug("End precessing of besteffort jobs to kill\n")

    return return_code
示例#3
0
def handle_jobs_to_launch(jobs_to_launch_lst, current_time_sec, current_time_sql):
    logger.debug(
        "Begin processing jobs to launch (start time <= " + current_time_sql)

    return_code = 0

    for job in jobs_to_launch_lst:
        return_code = 1
        logger.debug("Set job " + str(job.id) + " state to toLaunch at " + current_time_sql)

        #
        # Advance Reservation
        #
        walltime = job.walltime
        if ((job.reservation == 'Scheduled') and (job.start_time < current_time_sec)):
            max_time = walltime - (current_time_sec - job.start_time)

            set_moldable_job_max_time(job.moldable_id, max_time)
            set_gantt_job_start_time(job.moldable_id, current_time_sec)
            logger.warn("Reduce walltime of job " + str(job.id) +
                        "to " + str(max_time) + "(was  " + str(walltime) + " )")

            add_new_event('REDUCE_RESERVATION_WALLTIME', job.id,
                          "Change walltime from " + str(walltime) + " to "
                          + str(max_time))

            w_max_time = duration_to_sql(max_time)
            new_message = re.sub(r'W=\d+:\d+:\d+', 'W=' + w_max_time, job.message)

            if new_message != job.message:
                set_job_message(job.id, new_message)

        prepare_job_to_be_launched(job, current_time_sec)

    logger.debug("End processing of jobs to launch")

    return return_code
示例#4
0
def meta_schedule(mode='internal', plt=Platform()):

    exit_code = 0

    job_security_time = int(config['SCHEDULER_JOB_SECURITY_TIME'])

    if ('QUOTAS' in config) and (config['QUOTAS'] == 'yes'):
        if 'QUOTAS_FILE' not in config:
            config['QUOTAS_FILE'] = './quotas_conf.json'
        load_quotas_rules()

    tools.init_judas_notify_user()
    tools.create_almighty_socket()

    logger.debug(
        "Retrieve information for already scheduled reservations from \
        database before flush (keep assign resources)")

    # reservation ??.

    initial_time_sec = tools.get_date()  # time.time()
    initial_time_sql = local_to_sql(initial_time_sec)

    current_time_sec = initial_time_sec
    current_time_sql = initial_time_sql

    gantt_init_results = gantt_init_with_running_jobs(plt, initial_time_sec,
                                                      job_security_time)
    all_slot_sets, scheduled_jobs, besteffort_rid2jid = gantt_init_results
    resource_set = plt.resource_set()

    # Path for user of external schedulers
    if 'OARDIR' in os.environ:
        binpath = os.environ['OARDIR'] + '/'
    else:
        binpath = '/usr/local/lib/oar/'
        logger.warning(
            "OARDIR env variable must be defined, " + binpath + " is used by default")

    for queue in db.query(Queue).order_by(text('priority DESC')).all():

        if queue.state == 'Active':
            logger.debug("Queue " + queue.name + ": Launching scheduler " +
                         queue.scheduler_policy + " at time " + initial_time_sql)

            if mode == 'external':  # pragma: no cover
                call_external_scheduler(binpath, scheduled_jobs, all_slot_sets,
                                        resource_set, job_security_time, queue,
                                        initial_time_sec, initial_time_sql)
            else:
                call_internal_scheduler(plt, scheduled_jobs, all_slot_sets,
                                        job_security_time, queue, initial_time_sec)

            handle_waiting_reservation_jobs(queue.name, resource_set,
                                            job_security_time, current_time_sec)

            # handle_new_AR_jobs
            check_reservation_jobs(
                plt, resource_set, queue.name, all_slot_sets, current_time_sec)

    jobs_to_launch, jobs_to_launch_lst, rid2jid_to_launch = get_gantt_jobs_to_launch(resource_set,
                                                                                     job_security_time,
                                                                                     current_time_sec)

    if check_besteffort_jobs_to_kill(jobs_to_launch, rid2jid_to_launch,
                                     current_time_sec, besteffort_rid2jid,
                                     resource_set) == 1:
        # We must kill some besteffort jobs
        tools.notify_almighty('ChState')
        exit_code = 2
    elif handle_jobs_to_launch(jobs_to_launch_lst, current_time_sec, current_time_sql) == 1:
        exit_code = 0

    # Update visu gantt tables
    update_gantt_visualization()

    # Manage dynamic node feature
    flag_hulot = False
    timeout_cmd = int(config['SCHEDULER_TIMEOUT'])

    if ((('SCHEDULER_NODE_MANAGER_SLEEP_CMD' in config) or
         ((config['ENERGY_SAVING_INTERNAL'] == 'yes') and
          ('ENERGY_SAVING_NODE_MANAGER_SLEEP_CMD' in config))) and
        (('SCHEDULER_NODE_MANAGER_SLEEP_TIME' in config)
         and ('SCHEDULER_NODE_MANAGER_IDLE_TIME' in config))):

        # Look at nodes that are unused for a duration
        idle_duration = int(config['SCHEDULER_NODE_MANAGER_IDLE_TIME'])
        sleep_duration = int(config['SCHEDULER_NODE_MANAGER_SLEEP_TIME'])

        idle_nodes = search_idle_nodes(current_time_sec)
        tmp_time = current_time_sec - idle_duration

        node_halt = []

        for node, idle_duration in iteritems(idle_nodes):
            if idle_duration < tmp_time:
                # Search if the node has enough time to sleep
                tmp = get_next_job_date_on_node(node)
                if (tmp is None) or (tmp - sleep_duration > current_time_sec):
                    # Search if node has not been woken up recently
                    wakeup_date = get_last_wake_up_date_of_node(node)
                    if (wakeup_date is None) or (wakeup_date < tmp_time):
                        node_halt.append(node)

        if node_halt != []:
            logger.debug("Powering off some nodes (energy saving): " + str(node_halt))
            # Using the built-in energy saving module to shut down nodes
            if config['ENERGY_SAVING_INTERNAL'] == 'yes':
                if kao_tools.send_to_hulot('HALT', ' '.join(node_halt)):
                    logger.error("Communication problem with the energy saving module (Hulot)\n")
                flag_hulot = 1
            else:
                # Not using the built-in energy saving module to shut down nodes
                cmd = config['SCHEDULER_NODE_MANAGER_SLEEP_CMD']
                if kao_tools.fork_and_feed_stdin(cmd, timeout_cmd, node_halt):
                    logger.error("Command " + cmd + "timeouted (" + str(timeout_cmd)
                                 + "s) while trying to  poweroff some nodes")

    if (('SCHEDULER_NODE_MANAGER_SLEEP_CMD' in config) or
        ((config['ENERGY_SAVING_INTERNAL'] == 'yes') and
         ('ENERGY_SAVING_NODE_MANAGER_SLEEP_CMD' in config))):
        # Get nodes which the scheduler wants to schedule jobs to,
        # but which are in the Absent state, to wake them up
        wakeup_time = int(config['SCHEDULER_NODE_MANAGER_WAKEUP_TIME'])
        nodes = get_gantt_hostname_to_wake_up(current_time_sec, wakeup_time)

        if nodes != []:
            logger.debug("Awaking some nodes: " + str(nodes))
            # Using the built-in energy saving module to wake up nodes
            if config['ENERGY_SAVING_INTERNAL'] == 'yes':
                if kao_tools.send_to_hulot('WAKEUP', ' '.join(nodes)):
                    logger.error("Communication problem with the energy saving module (Hulot)")
                flag_hulot = 1
            else:
                # Not using the built-in energy saving module to wake up nodes
                cmd = config['SCHEDULER_NODE_MANAGER_WAKE_UP_CMD']
                if kao_tools.fork_and_feed_stdin(cmd, timeout_cmd, nodes):
                    logger.error("Command " + cmd + "timeouted (" + str(timeout_cmd)
                                 + "s) while trying to wake-up some nodes ")

    # Send CHECK signal to Hulot if needed
    if not flag_hulot and (config['ENERGY_SAVING_INTERNAL'] == 'yes'):
        if kao_tools.send_to_hulot('CHECK', []):
            logger.error("Communication problem with the energy saving module (Hulot)")

    # Retrieve jobs according to their state and excluding job in 'Waiting' state.
    jobs_by_state = get_current_not_waiting_jobs()

    #
    # Search jobs to resume
    #

    #
    # TODO: TOFINISH
    #
    if 'Resuming' in jobs_by_state:
        logger.warn("Resuming job is NOT ENTIRELY IMPLEMENTED")
        for job in jobs_by_state['Resuming']:
            other_jobs = get_jobs_on_resuming_job_resources(job.id)
            # TODO : look for timesharing other jobs. What do we do?????
            if other_jobs == []:
                # We can resume the job
                logger.debug("[" + str(job.id) + "] Resuming job")
                if 'noop' in job.types:
                    resume_job_action(job.id)
                    logger.debug("[" + str(job.id) + "] Resume NOOP job OK")
                else:
                    script = config['JUST_BEFORE_RESUME_EXEC_FILE']
                    timeout = int(config['SUSPEND_RESUME_SCRIPT_TIMEOUT'])
                    if timeout is None:
                        timeout = kao_tools.get_default_suspend_resume_script_timeout()
                    skip = 0
                    logger.debug("[" + str(job.id) + "] Running post suspend script: `" +
                                 script + " " + str(job.id) + "'")
                    cmd_str = script + str(job.id)
                    return_code = -1
                    try:
                        return_code = call(cmd_str, shell=True, timeout=timeout)
                    except TimeoutExpired as e:
                        logger.error(str(e) + "[" + str(job.id) + "] Suspend script timeouted")
                        add_new_event('RESUME_SCRIPT_ERROR', job.id, "Suspend script timeouted")
                    if return_code != 0:
                        str_error = "[" + str(job.id) + "] Suspend script error, return code = "\
                                    + str(return_code)
                        logger.error(str_error)
                        add_new_event('RESUME_SCRIPT_ERROR', job.id, str_error)
                        frag_job(job.id)
                        tools.notify_almighty('Qdel')
                    skip = 1

                cpuset_nodes = None
                if 'JOB_RESOURCE_MANAGER_PROPERTY_DB_FIELD' in config:
                    cpuset_field = config['JOB_RESOURCE_MANAGER_PROPERTY_DB_FIELD']
                else:
                    cpuset_field = ""
                if cpuset_field and (skip == 0):
                    # TODO
                    cpuset_name = job.user + "_" + str(job.id)
                    cpuset_nodes = get_cpuset_values(cpuset_field,
                                                     job.assigned_moldable_id)
                    # TODO
                    suspend_data_hash = {'name': cpuset_name,
                                         'job_id': job.id,
                                         'oarexec_pid_file':
                                         kao_tools.get_oar_pid_file_name(job.id)}
                if cpuset_nodes:
                    # TODO
                    taktuk_cmd = config['TAKTUK_CMD']
                    if 'SUSPEND_RESUME_FILE' in config:
                        suspend_file = config['SUSPEND_RESUME_FILE']
                    else:
                        # TODO
                        suspend_file = kao_tools.get_default_suspend_resume_file()

    #
    # TODO: TOFINISH
    #

    # Notify oarsub -I when they will be launched
    for j_info in get_gantt_waiting_interactive_prediction_date():
        job_id, job_info_type, job_start_time, job_message = j_info
        addr, port = job_info_type.split(':')
        new_start_prediction = local_to_sql(job_start_time)
        logger.debug("[" + str(job_id) + "] Notifying user of the start prediction: " +
                     new_start_prediction + "(" + job_message + ")")
        tools.notify_tcp_socket(addr, port, "[" + initial_time_sql + "] Start prediction: " +
                                new_start_prediction + " (" + job_message + ")")

    # Run the decisions
    # Process "toError" jobs
    if 'toError' in jobs_by_state:
        for job in jobs_by_state['toError']:
            addr, port = job.info_type.split(':')
            if job.type == 'INTERACTIVE' or\
               (job.type == 'PASSIVE' and job.reservation == 'Scheduled'):
                logger.debug("Notify oarsub job (num:" + str(job.id) + ") in error; jobInfo=" +
                             job.info_type)

                nb_sent1 = tools.notify_tcp_socket(addr, port, job.message + '\n')
                nb_sent2 = tools.notify_tcp_socket(addr, port, 'BAD JOB' + '\n')
                if (nb_sent1 == 0) or (nb_sent2 == 0):
                    logger.warn(
                        "Cannot open connection to oarsub client for" + str(job.id))
            logger.debug("Set job " + str(job.id) + " to state Error")
            set_job_state(job.id, 'Error')

    # Process toAckReservation jobs
    if 'toAckReservation' in jobs_by_state:
        for job in jobs_by_state['toAckReservation']:
            addr, port = job.info_type.split(':')
            logger.debug(
                "Treate job" + str(job.id) + " in toAckReservation state")

            nb_sent = tools.notify_tcp_socket(addr, port, 'GOOD RESERVATION' + '\n')

            if nb_sent == 0:
                logger.warn(
                    "Frag job " + str(job.id) + ", I cannot notify oarsub for the reservation")
                add_new_event('CANNOT_NOTIFY_OARSUB', str(
                    job.id), "Can not notify oarsub for the job " + str(job.id))

                # TODO ???
                # OAR::IO::lock_table / OAR::IO::unlock_table($base)
                frag_job(job.id)

                exit_code = 2
            else:
                logger.debug("Notify oarsub for a RESERVATION (idJob=" +
                             str(job.id) + ") --> OK; jobInfo=" + job.info_type)
                set_job_state(job.id, 'Waiting')
                if ((job.start_time - 1) <= current_time_sec) and (exit_code == 0):
                    exit_code = 1

    # Process toLaunch jobs
    if 'toLaunch' in jobs_by_state:
        for job in jobs_by_state['toLaunch']:
            notify_to_run_job(job.id)

    logger.debug("End of Meta Scheduler")

    return exit_code
示例#5
0
def handle_waiting_reservation_jobs(queue_name, resource_set, job_security_time, current_time_sec):

    logger.debug("Queue " + queue_name +
                 ": begin processing accepted Advance Reservations")

    ar_jobs = get_waiting_scheduled_AR_jobs(queue_name, resource_set, job_security_time, current_time_sec)

    for job in ar_jobs:

        moldable_id = job.moldable_id
        walltime = job.walltime

        # Test if AR job is expired and handle it
        if (current_time_sec > (job.start_time + walltime)):
            logger.warn("[" + str(job.id) +
                        "] set job state to Error: avdance reservation expired and couldn't be started")
            set_job_state(job.id, 'Error')
            set_job_message(job.id, "Reservation expired and couldn't be started.")
        else:

            # Determine current available ressources
            avail_res = intersec(resource_set.roid_itvs, job.res_set)

            # Test if the AR job is waiting to be launched due to nodes' unavailabilities
            if (avail_res == []) and (job.start_time < current_time_sec):
                logger.warn("[%s] advance reservation is waiting because no resource is present"
                            % str(job.id))

                # Delay launching time
                set_gantt_job_start_time(moldable_id, current_time_sec + 1)
            elif (job.start_time < current_time_sec):

                if (job.start_time + reservation_waiting_timeout) > current_time_sec:
                    if not equal_itvs(avail_res, job.res_set):
                        # The expected ressources are not all available,
                        # wait the specified timeout
                        logger.warn("[" + str(job.id) +
                                    "] advance reservation is waiting because not all \
                                    resources are available yet")
                        set_gantt_job_start_time(moldable_id, current_time_sec + 1)
                else:
                    # It's time to launch the AR job, remove missing ressources
                    missing_resources_itvs = sub_intervals(job.res_set, avail_res)
                    remove_gantt_resource_job(moldable_id, missing_resources_itvs,
                                              resource_set)
                    logger.warn("[" + str(job.id) +
                                "remove some resources assigned to this advance reservation, \
                                because there are not Alive")

                    add_new_event('SCHEDULER_REDUCE_NB_RESSOURCES_FOR_ADVANCE_RESERVATION',
                                  job.id,
                                  "[MetaSched] Reduce the number of resources for the job "
                                  + str(job.id))

                    nb_res = itvs_size(job.res_set) - itvs_size(missing_resources_itvs)
                    new_message = re.sub(r'R=\d+', 'R=' + str(nb_res), job.message)
                    if new_message != job.message:
                        set_job_message(job.id, new_message)

    logger.debug("Queue " + queue_name +
                 ": end processing of reservations with missing resources")