示例#1
0
文件: hierarchy.py 项目: fr0uty/oartm
 def __init__(self, hy=None, hy_rid=None, ):
     if hy_rid:
         self.hy = {}
         for hy_label, hy_level_roids in iteritems(hy_rid):
             self.hy[hy_label] = [
                 ordered_ids2itvs(ids) for k, ids in iteritems(hy_level_roids)]
     else:
         if hy:
             self.hy = hy
         else:
             raise Exception("Hierarchy description must be provided")
示例#2
0
文件: simsim.py 项目: fr0uty/oartm
    def sched(self):

        next_job_arrival = self.job_arrival()

        while True:

            print('Wait for job arrivals or job endings', self.env.now)

            events = list(self.evt_running_jobs)
            if next_job_arrival is not None:
                print("append next_job_arrival evt")
                events.append(next_job_arrival)
            any_of_events = AnyOf(self.env, events)
            ev = yield any_of_events

            for k, v in iteritems(ev.todict()):
                if k == next_job_arrival:
                    print("job arrives !", v)
                    for jid in v:
                        self.waiting_jids.add(jid)
                    next_job_arrival = self.job_arrival()

                else:
                    print("job endings !", k, v)
                    # if k in self.evt_running_jobs:
                    # print("remove ev: ", k)
                    self.evt_running_jobs.remove(k)
                    self.jobs[v].state = "Terminated"
                    self.platform.completed_jids.append(v)
                    self.platform.running_jids.remove(v)

            now = self.env.now

            if ((next_job_arrival is None)
                    and not self.waiting_jids
                    and not self.evt_running_jobs):
                print("All job submitted, no more waiting or running jobs ...", now)
                self.env.exit()

            print("call schedule_cycle.... ", now)

            schedule_cycle(self.platform, now, "test")

            # launch jobs if needed
            for jid, job in iteritems(self.platform.assigned_jobs):
                if job.start_time == now:
                    self.waiting_jids.remove(jid)
                    job.state = "Running"
                    print("launch:", jid)
                    evt_running_job = self.env.timeout(job.run_time, jid)
                    self.evt_running_jobs.add(evt_running_job)

                    self.platform.running_jids.append(jid)
示例#3
0
文件: utils.py 项目: fr0uty/oartm
 def parse(self):
     """Parses the request arguments."""
     parsed_kwargs = {}
     raw_kwargs = {}
     for argname, argobj in iteritems(self.argmap):
         dest = argobj.dest if argobj.dest is not None else argname
         parsed_value = self.parse_arg(argname, argobj)
         if parsed_value is not self.MISSING:
             try:
                 parsed_kwargs[dest] = self.convert(parsed_value,
                                                    argobj.type)
             except Exception as e:
                 msg = ("The parameter '%s' specified in the request "
                        "URI is not supported. %s" % (argname, e))
                 try:
                     abort(400)
                 except:
                     exc_type, exc_value, tb = sys.exc_info()
                     exc_value.data = msg
                     reraise(exc_type, exc_value, tb.tb_next)
         else:
             parsed_kwargs[dest] = argobj.default
         raw_value = argobj.raw_value(parsed_kwargs[dest])
         if raw_value is not None:
             raw_kwargs[argname] = raw_value
     return parsed_kwargs, raw_kwargs
示例#4
0
文件: slot.py 项目: fr0uty/oartm
 def __str__(self):
     lines = []
     for i, slot in iteritems(self.slots):
         lines.append("[%s] %s" % (i, slot))
     max_length = max([len(line) for line in lines])
     lines.append("%s" % ("-" * max_length))
     lines.insert(0, ('{:-^%d}' % max_length).format(' SlotSet '))
     return '\n'.join(lines)
示例#5
0
文件: helpers.py 项目: fr0uty/oartm
def plot_slots_and_job(slots_set, jobs, nb_res, t_max):
    import matplotlib.pyplot as plt
    import matplotlib.patches as mpatch
    fig, ax = plt.subplots()

    if slots_set:
        for sid, slot in iteritems(slots_set.slots):
            col = "blue"
            if (sid % 2):
                col = "red"
            for i, itv in enumerate(slot.itvs):
                (y0, y1) = itv
                # print i, y0,y1, slot.b, slot.e
                # rect =  mpatch.Rectangle((2,2), 8, 2)
                rect = mpatch.Rectangle((slot.b, y0 - 0.4), slot.e - slot.b,
                                        y1 - y0 + 0.9, alpha=0.1, color=col)
                if (i == 0):
                    annotate(ax, rect, 's' + str(sid))
                ax.add_artist(rect)

    if jobs:
        for jid, job in iteritems(jobs):
            col = RGB_tuples[random.randint(0, NB_COLORS - 1)]
            duration = job.walltime
            if hasattr(job, 'run_time'):
                duration = job.run_time
            for i, itv in enumerate(job.res_set):
                (y0, y1) = itv
                rect = mpatch.Rectangle((job.start_time, y0 - 0.4), duration,
                                        y1 - y0, alpha=0.2, color=col)
                if (i == 0):
                    annotate(ax, rect, 'j' + str(jid))
                ax.add_artist(rect)

    ax.set_xlim((0, t_max))
    ax.set_ylim((0, nb_res))
#    ax.set_aspect('equal')
    ax.grid(True)
    mng = plt.get_current_fig_manager()
    try:
        mng.resize(*mng.window.maxsize())
        # mng.window.showMaximized()
    except:
        pass
    plt.show()
示例#6
0
文件: platform.py 项目: fr0uty/oartm
    def save_assigns_simu(self, jobs, resource_set):
        print("save_assigns_simu")

        for jid, job in iteritems(jobs):
            jres_set = job.res_set
            print("job.res_set before", jid, job.res_set)
            r_ids = [resource_set.rid_o2i[roid] for roid in itvs2ids(jres_set)]
            job.res_set = unordered_ids2itvs(r_ids)
        self.assigned_jobs = jobs
示例#7
0
def check_besteffort_jobs_to_kill(jobs_to_launch, rid2jid_to_launch, current_time_sec,
                                  besteffort_rid2job, resource_set):
    '''Detect if there are besteffort jobs to kill
    return 1 if there is at least 1 job to frag otherwise 0
    '''

    return_code = 0

    logger.debug("Begin processing of besteffort jobs to kill")

    fragged_jobs = []

    for rid, job_id in iteritems(rid2jid_to_launch):
        if rid in besteffort_rid2job:
            be_job = besteffort_rid2job[rid]
            job_to_launch = jobs_to_launch[job_id]

            if is_timesharing_for_two_jobs(be_job, job_to_launch):
                logger.debug("Resource " + str(rid) +
                             " is needed for  job " + str(job_id) +
                             ", but besteffort job  " + str(be_job.id) +
                             " can live, because timesharing compatible")
            else:
                if be_job.id not in fragged_jobs:
                    skip_kill = 0
                    checkpoint_first_date = sys.maxsize
                    # Check if we must checkpoint the besteffort job
                    if be_job.checkpoint > 0:
                        for ev in get_job_events(be_job.id):
                            if ev.type == 'CHECKPOINT':
                                if checkpoint_first_date > ev.date:
                                    checkpoint_first_date = ev.date

                        if (checkpoint_first_date == sys.maxsize) or\
                           (current_time_sec <= (checkpoint_first_date + be_job.checkpoint)):
                            skip_kill = 1
                            send_checkpoint_signal(be_job)

                            logger.debug("Send checkpoint signal to the job " + str(be_job.id))

                    if not skip_kill:
                        logger.debug("Resource " + str(rid) +
                                     "need to be freed for job " + str(be_job.id) +
                                     ": killing besteffort job " + str(job_to_launch.id))

                        add_new_event('BESTEFFORT_KILL', be_job.id,
                                      "kill the besteffort job " + str(be_job.id))
                        frag_job(be_job.id)

                    fragged_jobs.append(be_job.id)
                    return_code = 1

    logger.debug("End precessing of besteffort jobs to kill\n")

    return return_code
示例#8
0
文件: bataar.py 项目: oar-team/oar3
    def scheduleJobs(self):
        print("Sheduling Round")
        real_time = time.time()
        if self.platform_model == "simu":
            schedule_cycle(self.platform, self.env.now, "default")

            # retrieve jobs to launch
            jids_to_launch = []
            for jid, job in iteritems(self.platform.assigned_jobs):
                print("job.start_time %s" % job.start_time)
                if (job.start_time == self.env.now) and (job.state == "Waiting"):
                    self.waiting_jids.remove(jid)
                    jids_to_launch.append(jid)
                    job.state = "Running"
                    print("tolaunch: %s" % jid)
                    self.platform.running_jids.append(jid)

        else:
            print("call meta_schedule('internal')")
            meta_schedule("internal", plt)

            result = db.query(Job).filter(Job.state == "toLaunch").order_by(Job.id).all()

            for job_db in result:
                set_job_state(job_db.id, "Running")
                jid = self.db_jid2s_jid[job_db.id]
                self.waiting_jids.remove(jid)
                jids_to_launch.append(jid)
                self.jobs[jid].state = "Running"
                print("_tolaunch: %s" % jid)
                self.platform.running_jids.append(jid)

        print("Ids of jobs to launch: ", *jids_to_launch)
        print("Time befort scheduling round: ", self.bs._current_time, self.sched_delay)
        # update time
        real_sched_time = time.time() - real_time
        if self.sched_delay == -1:
            self.bs.consume_time(real_sched_time)  # TODO
        else:
            self.bs.consume_time(self.sched_delay)

        self.env.now = self.bs._current_time

        print("Time after scheduling round: ", self.bs._current_time)
        # send to uds
        if len(jids_to_launch) > 0:
            scheduled_jobs = []
            jobs_res = {}
            for jid in jids_to_launch:
                ds_job = self.jobs[jid].ds_job
                res = itvs2batsim_str0(self.jobs[jid].res_set)
                scheduled_jobs.append(ds_job)
                jobs_res[ds_job.id] = res

            self.bs.start_jobs(scheduled_jobs, jobs_res)
示例#9
0
文件: quotas.py 项目: fr0uty/oartm
 def check(self, job):
     global quotas_rules
     # self.show_counters('before check, job id: ' + str(job.id))
     for rl_fields, rl_quotas in iteritems(quotas_rules):
         # pdb.set_trace()
         rl_queue, rl_project, rl_job_type, rl_user = rl_fields
         rl_nb_resources, rl_nb_jobs, rl_resources_time = rl_quotas
         for fields, counters in iteritems(self.counters):
             queue, project, job_type, user = fields
             nb_resources, nb_jobs, resources_time = counters
             # match queue
             if ((rl_queue == '*') and (queue == '*')) or\
                ((rl_queue == queue) and (job.queue_name == queue)) or\
                (rl_queue == '/'):
                 # match project
                 if ((rl_project == '*') and (project == '*')) or\
                    ((rl_project == project) and (job.project == project)) or\
                    (rl_project == '/'):
                     # match job_typ
                     if ((rl_job_type == '*') and (job_type == '*')) or\
                        ((rl_job_type == job_type) and (job_type in job.types)):
                         # match user
                         if ((rl_user == '*') and (user == '*')) or\
                            ((rl_user == user) and (job.user == user)) or\
                            (rl_user == '/'):
                             # test quotas values plus job's ones
                             # 1) test nb_resources
                             if (rl_nb_resources > -1) and\
                                (rl_nb_resources < nb_resources):
                                     return (False, 'nb resources quotas failed',
                                             rl_fields, rl_nb_resources)
                             # 2) test nb_jobs
                             if (rl_nb_jobs > -1) and (rl_nb_jobs < nb_jobs):
                                     return (False, 'nb jobs quotas failed',
                                             rl_fields, rl_nb_jobs)
                             # 3) test resources_time (work)
                             if (rl_resources_time > -1) and\
                                (rl_resources_time < resources_time):
                                     return (False, 'resources hours quotas failed',
                                             rl_fields, rl_resources_time)
     return (True, 'quotas ok', '', 0)
示例#10
0
def set_slots_with_prev_scheduled_jobs(slots_sets, jobs, job_security_time,
                                       now=0,
                                       filter_besteffort=True,
                                       only_besteffort=False):

    jobs_slotsets = {'default': []}

    for job in jobs:
        logger.debug("job.id:" + str(job.id))
        # print("job.id:", str(job.id))
        if ((not filter_besteffort) and ("besteffort" in job.types)) or\
           ((not only_besteffort) and (not ("besteffort" in job.types))):
            if "container" in job.types:
                t_e = job.start_time + job.walltime - job_security_time
                # t "job.res_set, job.start_time, t_e", job.res_set,
                # job.start_time, t_e

                if job.types["container"] != "":
                    ss_name = job.types["container"]
                else:
                    ss_name = str(job.id)

                logger.debug("container:" + ss_name)

                if ss_name not in slots_sets:
                    slots_sets[ss_name] = SlotSet(([], 1))

                if job.start_time < now:
                    start_time = now
                else:
                    start_time = job.start_time

                j = JobPseudo(id=0, start_time=start_time,
                              walltime=job.walltime - job_security_time,
                              res_set=job.res_set,
                              ts=job.ts, ph=job.ts)

                slots_sets[ss_name].split_slots_jobs([j], False)  # add job's resources

            ss_name = 'default'
            if "inner" in job.types:
                ss_name = job.types["inner"]

            if ss_name not in jobs_slotsets:
                jobs_slotsets[ss_name] = []

            jobs_slotsets[ss_name].append(job)

    for ss_name, slot_set in iteritems(slots_sets):
        logger.debug(" slots_sets.iteritems():" + ss_name)
        if ss_name in jobs_slotsets:
            slot_set.split_slots_jobs(jobs_slotsets[ss_name])
示例#11
0
文件: platform.py 项目: fr0uty/oartm
    def save_assigns_simu_and_default(self, jobs, resource_set):
        print("save_assigns_simu_and_default........................")
        # assigned_jobs = {}
        for jid, job in iteritems(jobs):
            sid = self.db_jid2s_jid[jid]
            jobsimu = self.jobs[sid]
            jres_set = job.res_set
            r_ids = [resource_set.rid_o2i[roid] for roid in itvs2ids(jres_set)]
            jobsimu.res_set = unordered_ids2itvs(r_ids)
            print("save assign jid, sid, res_set: ", jid, " ", sid, " ", jobsimu.res_set)
            jobsimu.start_time = job.start_time
            jobsimu.walltime = job.walltime
            # assigned_jobs[sid] = jobsimu

        # self.assigned_jobs = assigned_jobs

        return save_assigns(jobs, resource_set)
示例#12
0
文件: job.py 项目: oar-team/oar3
def set_jobs_cache_keys(jobs):
    """
    Set keys for job use by slot_set cache to speed up the search of suitable
    slots.

    Jobs with timesharing, placeholder or dependencies requirements are not
    suitable for this cache feature. Jobs in container might leverage of cache
    because container is link to a particular slot_set.

    For jobs with dependencies, they do not update the cache entries.

    """
    for job_id, job in iteritems(jobs):
        if (not job.ts) and (job.ph == NO_PLACEHOLDER):
            for res_rqt in job.mld_res_rqts:
                (moldable_id, walltime, hy_res_rqts) = res_rqt
                job.key_cache[int(moldable_id)] = str(walltime) + str(hy_res_rqts)
示例#13
0
文件: quotas.py 项目: fr0uty/oartm
def load_quotas_rules():
    global quotas_rules
    global quotas_job_types
    """
    {
        "quotas": {
               "*,*,*,*": [120,-1,-1],
                "*,*,*,john": [150,-1,-1]
        }
        "quotas_job_types": ['besteffort','deploy','console']
    }

    """
    quotas_rules_filename = config['QUOTAS_FILE']
    with open(quotas_rules_filename) as json_file:
        json_quotas = json.load(json_file)
        for k, v in iteritems(json_quotas['quotas']):
            quotas_rules[tuple(k.split(','))] = [v[0], v[1], int(3600 * v[2])]
        if 'quotas_job_types' in json_quotas:
            quotas_job_types.extend(json_quotas['quotas_job_types'])
示例#14
0
文件: coorm.py 项目: fr0uty/oartm
    def assign_resources(self, *proxy_args, **proxy_kwargs):
        self.app.logger.info("┳ OAR ask to assign resources")
        slots_set = pickle.loads(proxy_args[0])
        job_dict = proxy_args[1]
        job = SimpleNamespace(job_dict)
        hy = {}
        for res_label in iteritems(proxy_args[2]):
            hy[res_label] = [tuple(i) for i in proxy_args[2][res_label]]

        self.app.logger.debug("┃ Before COORM scheduling")
        for line in ("%s" % slots_set).split('\n'):
            self.app.logger.debug("┃ %s" % line)

        prev_sid_left, prev_sid_right, job = \
            self.app.assign_resources(slots_set, job, *proxy_args[3:])

        self.app.logger.debug("┃ After COORM scheduling")
        for line in ("%s" % slots_set).split('\n'):
            self.app.logger.debug("┃ %s" % line)

        self.app.logger.info("┻ Returns : [%s, %s]" %
                             (prev_sid_left, prev_sid_right))
        self.app.logger.debug("JOBRET: %s %s %s" % (str(job.id), str(job.res_set), str(job.start_time)))
        return prev_sid_left, prev_sid_right, dict(job)
示例#15
0
文件: quotas.py 项目: fr0uty/oartm
 def show_counters(self, msg=''):  # pragma: no cover
     print('show_counters:', msg)
     for k, v in iteritems(self.counters):
         print(k, ' = ', v)
示例#16
0
文件: bataar.py 项目: oar-team/oar3
    def sched_loop(self):
        nb_completed_jobs = 0
        while nb_completed_jobs < self.nb_jobs:

            now_float, jobs_submitted, new_jobs_completed = read_bat_msg(self.sock)

            # now_str = "10"
            # jobs_submitted = [1]
            # new_jobs_completed = []

            if jobs_submitted:
                for jid in jobs_submitted:
                    self.waiting_jids.add(jid)
                    if self.platform_model == "batsim-db":
                        print('set_job_state("Waiting"):', self.jobs[jid].db_jid)
                        set_job_state(self.jobs[jid].db_jid, "Waiting")

            nb_completed_jobs += len(new_jobs_completed)

            print("new job completed: %s" % new_jobs_completed)

            for jid in new_jobs_completed:
                jobs_completed.append(jid)
                if jid in self.platform.running_jids:
                    self.platform.running_jids.remove(jid)
                if self.platform_model == "batsim-db":
                    set_job_state(self.jobs[jid].db_jid, "Terminated")

            now = int(now_float)
            self.env.now = now  # TODO can be remove ???
            real_time = time.time()

            print("jobs running: %s" % self.platform.running_jids)
            print("jobs waiting: %s" % self.waiting_jids)
            print("jobs completed: %s" % jobs_completed)

            jids_to_launch = []

            if self.platform_model == "simu":
                print("call schedule_cycle.... %s" % now)
                schedule_cycle(self.platform, now, "default")

                # retrieve jobs to launch
                jids_to_launch = []
                for jid, job in iteritems(self.platform.assigned_jobs):
                    print("job.start_time %s" % job.start_time)
                    if (job.start_time == now) and (job.state == "Waiting"):
                        self.waiting_jids.remove(jid)
                        jids_to_launch.append(jid)
                        job.state = "Running"
                        print("tolaunch: %s" % jid)
                        self.platform.running_jids.append(jid)

            else:
                print("call meta_schedule('internal')")
                meta_schedule("internal", plt)
                # Launching phase
                # Retrieve job to Launch

                result = db.query(Job).filter(Job.state == "toLaunch").order_by(Job.id).all()

                for job_db in result:
                    set_job_state(job_db.id, "Running")
                    jid = self.db_jid2s_jid[job_db.id]
                    self.waiting_jids.remove(jid)
                    jids_to_launch.append(jid)
                    self.jobs[jid].state = "Running"
                    print("_tolaunch: %s" % jid)
                    self.platform.running_jids.append(jid)

            real_sched_time = time.time() - real_time
            if self.sched_delay == -1:
                now_float += real_sched_time
            else:
                now_float += self.sched_delay
            send_bat_msg(self.sock, now_float, jids_to_launch, self.jobs)
示例#17
0
文件: resource.py 项目: fr0uty/oartm
    def __init__(self):

        # prepare resource order/indirection stuff
        order_by_clause = config["SCHEDULER_RESOURCE_ORDER"]
        self.rid_i2o = array("i", [0] * MAX_NB_RESOURCES)
        self.rid_o2i = array("i", [0] * MAX_NB_RESOURCES)

        # suspend
        suspendable_roids = []
        if "SCHEDULER_AVAILABLE_SUSPENDED_RESOURCE_TYPE" not in config:
            config["SCHEDULER_AVAILABLE_SUSPENDED_RESOURCE_TYPE"] = "default"

        res_suspend_types = (
            config["SCHEDULER_AVAILABLE_SUSPENDED_RESOURCE_TYPE"]).split()

        # prepare hierarchy stuff
        # "HIERARCHY_LABELS" = "resource_id,network_address"
        conf_hy_labels = config[
            "HIERARCHY_LABELS"] if "HIERARCHY_LABELS" in config else "resource_id,network_address"

        hy_labels = conf_hy_labels.split(",")
        hy_labels_w_id = ["id" if v == "resource_id" else v for v in hy_labels]

        hy_roid = {}
        for hy_label in hy_labels_w_id:
            hy_roid[hy_label] = OrderedDict()

        # available_upto for pseudo job in slot
        available_upto = {}
        self.available_upto = {}

        roids = []

        default_rids = []

        # retreive resource in order from DB
        self.resources_db = db.query(Resource).order_by(text(order_by_clause)).all()

        # fill the different structures
        for roid, r in enumerate(self.resources_db):
            if (r.state == "Alive") or (r.state == "Absent"):
                rid = int(r.id)
                roids.append(roid)
                if r.type == 'default':
                    default_rids.append(rid)

                self.rid_i2o[rid] = roid
                self.rid_o2i[roid] = rid

                # fill hy_rid structure
                for hy_label in hy_labels_w_id:
                    v = getattr(r, hy_label)
                    if v in hy_roid[hy_label]:
                        hy_roid[hy_label][v].append(roid)
                    else:
                        hy_roid[hy_label][v] = [roid]

                # fill available_upto structure
                if r.available_upto in available_upto:
                    available_upto[r.available_upto].append(roid)
                else:
                    available_upto[r.available_upto] = [roid]

                # fill resource available for suspended job
                if r.type in res_suspend_types:
                    suspendable_roids.append(roid)

        # global ordered resources intervals
        # print roids
        self.roid_itvs = ordered_ids2itvs(roids)

        if "id" in hy_roid:
            hy_roid["resource_id"] = hy_roid["id"]
            del hy_roid["id"]

        # create hierarchy
        self.hierarchy = Hierarchy(hy_rid=hy_roid).hy

        # transform available_upto
        for k, v in iteritems(available_upto):
            self.available_upto[k] = ordered_ids2itvs(v)

        #
        self.suspendable_roid_itvs = ordered_ids2itvs(suspendable_roids)

        default_roids = [self.rid_i2o[i] for i in default_rids]
        self.default_resource_itvs = unordered_ids2itvs(default_roids)
        # update global variable
        default_resource_itvs = self.default_resource_itvs
示例#18
0
def meta_schedule(mode='internal', plt=Platform()):

    exit_code = 0

    job_security_time = int(config['SCHEDULER_JOB_SECURITY_TIME'])

    if ('QUOTAS' in config) and (config['QUOTAS'] == 'yes'):
        if 'QUOTAS_FILE' not in config:
            config['QUOTAS_FILE'] = './quotas_conf.json'
        load_quotas_rules()

    tools.init_judas_notify_user()
    tools.create_almighty_socket()

    logger.debug(
        "Retrieve information for already scheduled reservations from \
        database before flush (keep assign resources)")

    # reservation ??.

    initial_time_sec = tools.get_date()  # time.time()
    initial_time_sql = local_to_sql(initial_time_sec)

    current_time_sec = initial_time_sec
    current_time_sql = initial_time_sql

    gantt_init_results = gantt_init_with_running_jobs(plt, initial_time_sec,
                                                      job_security_time)
    all_slot_sets, scheduled_jobs, besteffort_rid2jid = gantt_init_results
    resource_set = plt.resource_set()

    # Path for user of external schedulers
    if 'OARDIR' in os.environ:
        binpath = os.environ['OARDIR'] + '/'
    else:
        binpath = '/usr/local/lib/oar/'
        logger.warning(
            "OARDIR env variable must be defined, " + binpath + " is used by default")

    for queue in db.query(Queue).order_by(text('priority DESC')).all():

        if queue.state == 'Active':
            logger.debug("Queue " + queue.name + ": Launching scheduler " +
                         queue.scheduler_policy + " at time " + initial_time_sql)

            if mode == 'external':  # pragma: no cover
                call_external_scheduler(binpath, scheduled_jobs, all_slot_sets,
                                        resource_set, job_security_time, queue,
                                        initial_time_sec, initial_time_sql)
            else:
                call_internal_scheduler(plt, scheduled_jobs, all_slot_sets,
                                        job_security_time, queue, initial_time_sec)

            handle_waiting_reservation_jobs(queue.name, resource_set,
                                            job_security_time, current_time_sec)

            # handle_new_AR_jobs
            check_reservation_jobs(
                plt, resource_set, queue.name, all_slot_sets, current_time_sec)

    jobs_to_launch, jobs_to_launch_lst, rid2jid_to_launch = get_gantt_jobs_to_launch(resource_set,
                                                                                     job_security_time,
                                                                                     current_time_sec)

    if check_besteffort_jobs_to_kill(jobs_to_launch, rid2jid_to_launch,
                                     current_time_sec, besteffort_rid2jid,
                                     resource_set) == 1:
        # We must kill some besteffort jobs
        tools.notify_almighty('ChState')
        exit_code = 2
    elif handle_jobs_to_launch(jobs_to_launch_lst, current_time_sec, current_time_sql) == 1:
        exit_code = 0

    # Update visu gantt tables
    update_gantt_visualization()

    # Manage dynamic node feature
    flag_hulot = False
    timeout_cmd = int(config['SCHEDULER_TIMEOUT'])

    if ((('SCHEDULER_NODE_MANAGER_SLEEP_CMD' in config) or
         ((config['ENERGY_SAVING_INTERNAL'] == 'yes') and
          ('ENERGY_SAVING_NODE_MANAGER_SLEEP_CMD' in config))) and
        (('SCHEDULER_NODE_MANAGER_SLEEP_TIME' in config)
         and ('SCHEDULER_NODE_MANAGER_IDLE_TIME' in config))):

        # Look at nodes that are unused for a duration
        idle_duration = int(config['SCHEDULER_NODE_MANAGER_IDLE_TIME'])
        sleep_duration = int(config['SCHEDULER_NODE_MANAGER_SLEEP_TIME'])

        idle_nodes = search_idle_nodes(current_time_sec)
        tmp_time = current_time_sec - idle_duration

        node_halt = []

        for node, idle_duration in iteritems(idle_nodes):
            if idle_duration < tmp_time:
                # Search if the node has enough time to sleep
                tmp = get_next_job_date_on_node(node)
                if (tmp is None) or (tmp - sleep_duration > current_time_sec):
                    # Search if node has not been woken up recently
                    wakeup_date = get_last_wake_up_date_of_node(node)
                    if (wakeup_date is None) or (wakeup_date < tmp_time):
                        node_halt.append(node)

        if node_halt != []:
            logger.debug("Powering off some nodes (energy saving): " + str(node_halt))
            # Using the built-in energy saving module to shut down nodes
            if config['ENERGY_SAVING_INTERNAL'] == 'yes':
                if kao_tools.send_to_hulot('HALT', ' '.join(node_halt)):
                    logger.error("Communication problem with the energy saving module (Hulot)\n")
                flag_hulot = 1
            else:
                # Not using the built-in energy saving module to shut down nodes
                cmd = config['SCHEDULER_NODE_MANAGER_SLEEP_CMD']
                if kao_tools.fork_and_feed_stdin(cmd, timeout_cmd, node_halt):
                    logger.error("Command " + cmd + "timeouted (" + str(timeout_cmd)
                                 + "s) while trying to  poweroff some nodes")

    if (('SCHEDULER_NODE_MANAGER_SLEEP_CMD' in config) or
        ((config['ENERGY_SAVING_INTERNAL'] == 'yes') and
         ('ENERGY_SAVING_NODE_MANAGER_SLEEP_CMD' in config))):
        # Get nodes which the scheduler wants to schedule jobs to,
        # but which are in the Absent state, to wake them up
        wakeup_time = int(config['SCHEDULER_NODE_MANAGER_WAKEUP_TIME'])
        nodes = get_gantt_hostname_to_wake_up(current_time_sec, wakeup_time)

        if nodes != []:
            logger.debug("Awaking some nodes: " + str(nodes))
            # Using the built-in energy saving module to wake up nodes
            if config['ENERGY_SAVING_INTERNAL'] == 'yes':
                if kao_tools.send_to_hulot('WAKEUP', ' '.join(nodes)):
                    logger.error("Communication problem with the energy saving module (Hulot)")
                flag_hulot = 1
            else:
                # Not using the built-in energy saving module to wake up nodes
                cmd = config['SCHEDULER_NODE_MANAGER_WAKE_UP_CMD']
                if kao_tools.fork_and_feed_stdin(cmd, timeout_cmd, nodes):
                    logger.error("Command " + cmd + "timeouted (" + str(timeout_cmd)
                                 + "s) while trying to wake-up some nodes ")

    # Send CHECK signal to Hulot if needed
    if not flag_hulot and (config['ENERGY_SAVING_INTERNAL'] == 'yes'):
        if kao_tools.send_to_hulot('CHECK', []):
            logger.error("Communication problem with the energy saving module (Hulot)")

    # Retrieve jobs according to their state and excluding job in 'Waiting' state.
    jobs_by_state = get_current_not_waiting_jobs()

    #
    # Search jobs to resume
    #

    #
    # TODO: TOFINISH
    #
    if 'Resuming' in jobs_by_state:
        logger.warn("Resuming job is NOT ENTIRELY IMPLEMENTED")
        for job in jobs_by_state['Resuming']:
            other_jobs = get_jobs_on_resuming_job_resources(job.id)
            # TODO : look for timesharing other jobs. What do we do?????
            if other_jobs == []:
                # We can resume the job
                logger.debug("[" + str(job.id) + "] Resuming job")
                if 'noop' in job.types:
                    resume_job_action(job.id)
                    logger.debug("[" + str(job.id) + "] Resume NOOP job OK")
                else:
                    script = config['JUST_BEFORE_RESUME_EXEC_FILE']
                    timeout = int(config['SUSPEND_RESUME_SCRIPT_TIMEOUT'])
                    if timeout is None:
                        timeout = kao_tools.get_default_suspend_resume_script_timeout()
                    skip = 0
                    logger.debug("[" + str(job.id) + "] Running post suspend script: `" +
                                 script + " " + str(job.id) + "'")
                    cmd_str = script + str(job.id)
                    return_code = -1
                    try:
                        return_code = call(cmd_str, shell=True, timeout=timeout)
                    except TimeoutExpired as e:
                        logger.error(str(e) + "[" + str(job.id) + "] Suspend script timeouted")
                        add_new_event('RESUME_SCRIPT_ERROR', job.id, "Suspend script timeouted")
                    if return_code != 0:
                        str_error = "[" + str(job.id) + "] Suspend script error, return code = "\
                                    + str(return_code)
                        logger.error(str_error)
                        add_new_event('RESUME_SCRIPT_ERROR', job.id, str_error)
                        frag_job(job.id)
                        tools.notify_almighty('Qdel')
                    skip = 1

                cpuset_nodes = None
                if 'JOB_RESOURCE_MANAGER_PROPERTY_DB_FIELD' in config:
                    cpuset_field = config['JOB_RESOURCE_MANAGER_PROPERTY_DB_FIELD']
                else:
                    cpuset_field = ""
                if cpuset_field and (skip == 0):
                    # TODO
                    cpuset_name = job.user + "_" + str(job.id)
                    cpuset_nodes = get_cpuset_values(cpuset_field,
                                                     job.assigned_moldable_id)
                    # TODO
                    suspend_data_hash = {'name': cpuset_name,
                                         'job_id': job.id,
                                         'oarexec_pid_file':
                                         kao_tools.get_oar_pid_file_name(job.id)}
                if cpuset_nodes:
                    # TODO
                    taktuk_cmd = config['TAKTUK_CMD']
                    if 'SUSPEND_RESUME_FILE' in config:
                        suspend_file = config['SUSPEND_RESUME_FILE']
                    else:
                        # TODO
                        suspend_file = kao_tools.get_default_suspend_resume_file()

    #
    # TODO: TOFINISH
    #

    # Notify oarsub -I when they will be launched
    for j_info in get_gantt_waiting_interactive_prediction_date():
        job_id, job_info_type, job_start_time, job_message = j_info
        addr, port = job_info_type.split(':')
        new_start_prediction = local_to_sql(job_start_time)
        logger.debug("[" + str(job_id) + "] Notifying user of the start prediction: " +
                     new_start_prediction + "(" + job_message + ")")
        tools.notify_tcp_socket(addr, port, "[" + initial_time_sql + "] Start prediction: " +
                                new_start_prediction + " (" + job_message + ")")

    # Run the decisions
    # Process "toError" jobs
    if 'toError' in jobs_by_state:
        for job in jobs_by_state['toError']:
            addr, port = job.info_type.split(':')
            if job.type == 'INTERACTIVE' or\
               (job.type == 'PASSIVE' and job.reservation == 'Scheduled'):
                logger.debug("Notify oarsub job (num:" + str(job.id) + ") in error; jobInfo=" +
                             job.info_type)

                nb_sent1 = tools.notify_tcp_socket(addr, port, job.message + '\n')
                nb_sent2 = tools.notify_tcp_socket(addr, port, 'BAD JOB' + '\n')
                if (nb_sent1 == 0) or (nb_sent2 == 0):
                    logger.warn(
                        "Cannot open connection to oarsub client for" + str(job.id))
            logger.debug("Set job " + str(job.id) + " to state Error")
            set_job_state(job.id, 'Error')

    # Process toAckReservation jobs
    if 'toAckReservation' in jobs_by_state:
        for job in jobs_by_state['toAckReservation']:
            addr, port = job.info_type.split(':')
            logger.debug(
                "Treate job" + str(job.id) + " in toAckReservation state")

            nb_sent = tools.notify_tcp_socket(addr, port, 'GOOD RESERVATION' + '\n')

            if nb_sent == 0:
                logger.warn(
                    "Frag job " + str(job.id) + ", I cannot notify oarsub for the reservation")
                add_new_event('CANNOT_NOTIFY_OARSUB', str(
                    job.id), "Can not notify oarsub for the job " + str(job.id))

                # TODO ???
                # OAR::IO::lock_table / OAR::IO::unlock_table($base)
                frag_job(job.id)

                exit_code = 2
            else:
                logger.debug("Notify oarsub for a RESERVATION (idJob=" +
                             str(job.id) + ") --> OK; jobInfo=" + job.info_type)
                set_job_state(job.id, 'Waiting')
                if ((job.start_time - 1) <= current_time_sec) and (exit_code == 0):
                    exit_code = 1

    # Process toLaunch jobs
    if 'toLaunch' in jobs_by_state:
        for job in jobs_by_state['toLaunch']:
            notify_to_run_job(job.id)

    logger.debug("End of Meta Scheduler")

    return exit_code
示例#19
0
文件: job.py 项目: oar-team/oar3
 def __init__(self, **kwargs):
     self.mld_res_rqts = []
     for key, value in iteritems(kwargs):
         setattr(self, key, value)
示例#20
0
文件: quotas.py 项目: fr0uty/oartm
 def combine(self, quotas):
     # self.show_counters('combine before')
     for key, value in iteritems(quotas.counters):
         self.counters[key][0] = max(self.counters[key][0], value[0])
         self.counters[key][1] = max(self.counters[key][1], value[1])
         self.counters[key][2] += value[2]
示例#21
0
文件: job.py 项目: oar-team/oar3
def insert_job(**kwargs):
    """ Insert job in database

    #   "{ sql1 }/prop1=1/prop2=3+{sql2}/prop3=2/prop4=1/prop5=1+...,walltime=60"
    #
    #   res = "/switch=2/nodes=10+{lic_type = 'mathlab'}/licence=20" types="besteffort, container"
    #
    insert_job(
    res = [
        ( 60, [("switch=2/nodes=20", ""), ("licence=20", "lic_type = 'mathlab'")] ) ],
    types = ["besteffort", "container"],
    user= "")


    """

    default_values = {'launching_directory': "", 'checkpoint_signal': 0, 'properties': ""}

    for k, v in iteritems(default_values):
        if k not in kwargs:
            kwargs[k] = v

    if 'res' in kwargs:
        res = kwargs.pop('res')
    else:
        res = [(60, [('resource_id=1', "")])]

    if 'types' in kwargs:
        types = kwargs.pop('types')
    else:
        types = []

    if 'queue_name' not in kwargs:
        kwargs['queue_name'] = 'default'

    if 'user' in kwargs:
        kwargs['job_user'] = kwargs.pop('user')

    ins = Job.__table__.insert().values(**kwargs)
    result = db.session.execute(ins)
    job_id = result.inserted_primary_key[0]

    mld_jid_walltimes = []
    res_grps = []

    for res_mld in res:
        w, res_grp = res_mld
        mld_jid_walltimes.append(
            {'moldable_job_id': job_id, 'moldable_walltime': w})
        res_grps.append(res_grp)

    result = db.session.execute(MoldableJobDescription.__table__.insert(),
                                mld_jid_walltimes)

    if len(mld_jid_walltimes) == 1:
        mld_ids = [result.inserted_primary_key[0]]
    else:
        r = db.query(MoldableJobDescription.id)\
              .filter(MoldableJobDescription.job_id == job_id).all()
        mld_ids = [x for e in r for x in e]

    for mld_idx, res_grp in enumerate(res_grps):
        # job_resource_groups
        mld_id_property = []
        res_hys = []

        moldable_id = mld_ids[mld_idx]

        for r_hy_prop in res_grp:
            (res_hy, properties) = r_hy_prop
            mld_id_property.append({'res_group_moldable_id': moldable_id,
                                    'res_group_property': properties})
            res_hys.append(res_hy)

        result = db.session.execute(JobResourceGroup.__table__.insert(),
                                    mld_id_property)

        if len(mld_id_property) == 1:
            grp_ids = [result.inserted_primary_key[0]]
        else:
            r = db.query(JobResourceGroup.id)\
                  .filter(JobResourceGroup.moldable_id == moldable_id).all()
            grp_ids = [x for e in r for x in e]

        # job_resource_descriptions
        for grp_idx, res_hy in enumerate(res_hys):
            res_description = []
            for idx, val in enumerate(res_hy.split('/')):
                tv = val.split('=')
                res_description.append({'res_job_group_id': grp_ids[grp_idx],
                                        'res_job_resource_type': tv[0],
                                        'res_job_value': tv[1],
                                        'res_job_order': idx})

            db.session.execute(JobResourceDescription.__table__.insert(),
                               res_description)

    if types:
        ins = [{'job_id': job_id, 'type': typ} for typ in types]
        db.session.execute(JobType.__table__.insert(), ins)

    return job_id
示例#22
0
文件: simsim.py 项目: fr0uty/oartm
 def __init__(self, **kwargs):
     for key, value in iteritems(kwargs):
         setattr(self, key, value)