Пример #1
0
def test_db_all_in_one_quotas_2(monkeypatch):
    """
    quotas[queue, project, job_type, user] = [int, int, float];
                                               |    |     |
              maximum used resources ----------+    |     |
              maximum number of running jobs -------+     |
              maximum resources times (hours) ------------+
    """

    create_quotas_rules_file('{"quotas": {"*,*,*,/": [-1, 1, -1]}}')

    # Submit and allocate an Advance Reservation
    t0 = get_date()
    insert_and_sched_ar(t0 + 100)

    # Submit other jobs
    insert_job(res=[(100, [('resource_id=1', "")])], properties="", user="******")
    insert_job(res=[(200, [('resource_id=1', "")])], properties="", user="******")

    # pdb.set_trace()
    t1 = get_date()
    meta_schedule('internal')

    res = []
    for i in db['GanttJobsPrediction'].query.all():
        print("moldable_id: ", i.moldable_id, ' start_time: ', i.start_time - t1)
        res.append(i.start_time - t1)

    assert (res[1] - res[0]) == 120
    assert (res[2] - res[0]) == 280
Пример #2
0
def test_db_all_in_one_ar_1(monkeypatch):
    # add one job

    job = insert_and_sched_ar(get_date() + 10)
    print(job.state, ' ', job.reservation)

    assert ((job.state == 'Waiting') and (job.reservation == 'Scheduled'))
Пример #3
0
def test_db_all_in_one_quotas_1(monkeypatch):
    """
    quotas[queue, project, job_type, user] = [int, int, float];
                                               |    |     |
              maximum used resources ----------+    |     |
              maximum number of running jobs -------+     |
              maximum resources times (hours) ------------+
    """

    create_quotas_rules_file('{"quotas": {"*,*,*,/": [-1, 1, -1], "/,*,*,*": [-1, -1, 0.55]}}')

    insert_job(res=[(100, [('resource_id=1', "")])], properties="", user="******")
    insert_job(res=[(200, [('resource_id=1', "")])], properties="", user="******")
    insert_job(res=[(200, [('resource_id=1', "")])], properties="", user="******")

    # pdb.set_trace()
    now = get_date()
    meta_schedule('internal')

    res = []
    for i in db['GanttJobsPrediction'].query.order_by(GanttJobsPrediction.moldable_id).all():
        print("moldable_id: ", i.moldable_id, ' start_time: ', i.start_time - now)
        res.append(i.start_time - now)

    assert res == [0, 160, 420]
Пример #4
0
def frag_job(jid):

    if 'OARDO_USER' in os.environ:
        luser = os.environ['OARDO_USER']
    else:
        luser = os.environ['USER']

    job = get_job(jid)

    if (job is not None) and ((luser == job.user)
                              or (luser == 'oar')
                              or (luser == 'root')):
        res = db.query(FragJob).filter(FragJob.job_id == jid).all()

        if len(res) == 0:

            date = tools.get_date()
            frajob = FragJob(job_id=jid, date=date)
            db.add(frajob)
            db.commit()
            add_new_event("FRAG_JOB_REQUEST",
                          jid, "User %s requested to frag the job %s"
                          % (luser, str(jid)))
            return 0
        else:
            # Job already killed
            return -2
    else:
        return -1
Пример #5
0
def test_db_all_in_one_quotas_AR(monkeypatch):

    create_quotas_rules_file('{"quotas": {"*,*,*,*": [1, -1, -1]}}')

    job = insert_and_sched_ar(get_date() + 10)
    print(job.state, ' ', job.reservation)

    assert job.state == 'Error'
Пример #6
0
def test_db_all_in_one_AR_7(monkeypatch):

    now = get_date()
    insert_job(res=[(60, [('resource_id=4', "")])],
               reservation='toSchedule', start_time=now+10,
               info_type='localhost:4242', types=["timesharing=*,*"])

    meta_schedule('internal')

    job = db['Job'].query.one()
    assert ((job.state == 'Waiting') and (job.reservation == 'Scheduled'))
Пример #7
0
def test_db_all_in_one_wakeup_node_energy_saving_internal_1(monkeypatch):
    config['ENERGY_SAVING_INTERNAL'] = 'yes'
    insert_job(res=[(60, [('resource_id=4', "")])], properties="")

    now = get_date()
    # Suspend nodes
    db.query(Resource).update({Resource.state: 'Absent', Resource.available_upto: now + 1000},
                              synchronize_session=False)
    db.commit()
    meta_schedule('internal')

    job = db['Job'].query.one()
    print(job.state)
    print(node_list)
    assert (job.state == 'Waiting')
Пример #8
0
def test_db_moldable_2(monkeypatch):
    now = get_date()
    insert_job(res=[(60, [('resource_id=3', "")])], properties="")
    insert_job(res=[(60, [('resource_id=4', "")]), (70, [('resource_id=2', "")])], properties="")
    meta_schedule('internal')

    for j in db['Job'].query.all():
        print(j.state)

    res = []
    for i in db['GanttJobsPrediction'].query.all():
        print("moldable_id: ", i.moldable_id, ' start_time: ', i.start_time - now)
        res.append(i.start_time - now)

    assert res[0] == res[1]
Пример #9
0
def test_db_placeholder_2(monkeypatch):
    now = get_date()
    insert_job(res=[(60, [('resource_id=4', "")])], properties="", types=["placeholder=yop"])
    insert_job(res=[(60, [('resource_id=4', "")])], properties="", types=["allow=poy"])
    meta_schedule('internal')

    for j in db['Job'].query.all():
        print(j.state)

    res = []
    for i in db['GanttJobsPrediction'].query.all():
        print("moldable_id: ", i.moldable_id, ' start_time: ', i.start_time - now)
        res.append(i.start_time - now)

    assert res[0] != res[1]
Пример #10
0
def test_db_all_in_one_AR_4(monkeypatch):

    now = get_date()
    job = insert_and_sched_ar(now + 10)
    new_start_time = now - 20

    db.query(GanttJobsPrediction).update({GanttJobsPrediction.start_time: new_start_time},
                                         synchronize_session=False)
    db.commit()

    meta_schedule('internal')

    job = db['Job'].query.one()
    print('\n', job.id, job.state, ' ', job.reservation, job.start_time)

    assert job.state == 'toLaunch'
Пример #11
0
def test_db_metasched_ar_1(monkeypatch):

    # add one job
    now = get_date()
    # sql_now = local_to_sql(now)

    insert_job(res=[(60, [('resource_id=4', "")])], properties="",
               reservation='toSchedule', start_time=(now + 10),
               info_type='localhost:4242')

    meta_schedule()

    job = db['Job'].query.one()
    print(job.state, ' ', job.reservation)

    assert ((job.state == 'Waiting') and (job.reservation == 'Scheduled'))
Пример #12
0
def test_db_all_in_one_wakeup_node_1(monkeypatch):

    insert_job(res=[(60, [('resource_id=4', "")])], properties="")

    now = get_date()
    # Suspend nodes
    db.query(Resource).update({Resource.state: 'Absent', Resource.available_upto: now + 1000},
                              synchronize_session=False)
    db.commit()
    meta_schedule('internal')

    job = db['Job'].query.one()
    print(job.state)
    print(node_list)
    assert (job.state == 'Waiting')
    assert (node_list == [u'localhost0', u'localhost1'])
Пример #13
0
def test_db_timesharing_2(monkeypatch):
    now = get_date()
    insert_job(res=[(60, [('resource_id=4', "")])], properties="",
               types=["timesharing=user,*"], user='******')
    insert_job(res=[(60, [('resource_id=4', "")])], properties="",
               types=["timesharing=user,*"], user='******')
    meta_schedule('internal')

    for j in db['Job'].query.all():
        print(j.state)

    res = []
    for i in db['GanttJobsPrediction'].query.all():
        print("moldable_id: ", i.moldable_id, ' start_time: ', i.start_time - now)
        res.append(i.start_time - now)

    assert res[0] != res[1]
Пример #14
0
def test_db_all_in_one_AR_6(monkeypatch):

    now = get_date()
    job = insert_and_sched_ar(now + 10, 600)
    new_start_time = now - 350

    set_jobs_start_time(tuple([job.id]), new_start_time)
    db.query(GanttJobsPrediction).update({GanttJobsPrediction.start_time: new_start_time},
                                         synchronize_session=False)

    # db.query(Resource).update({Resource.state: 'Suspected'}, synchronize_session=False)

    meta_schedule('internal')

    job = db['Job'].query.one()
    print('\n', job.id, job.state, ' ', job.reservation, job.start_time)

    assert job.state == 'Waiting'
Пример #15
0
def test_db_all_in_one_sleep_node_1(monkeypatch):

    now = get_date()

    insert_job(res=[(60, [('resource_id=1', "")])], properties="")

    # Suspend nodes
    # pdb.set_trace()
    db.query(Resource).update({Resource.available_upto: now + 50000},
                              synchronize_session=False)
    db.commit()
    meta_schedule('internal')

    job = db['Job'].query.one()
    print(job.state)
    print(node_list)
    assert (job.state == 'toLaunch')
    assert (node_list == [u'localhost2', u'localhost1'] or
            node_list == [u'localhost1', u'localhost2'])
Пример #16
0
def add_micheline_subjob(job_vars,
                         ssh_private_key, ssh_public_key,
                         array_id, array_index,
                         array_commands,
                         properties_applied_after_validation):

    # Estimate_job_nb_resources and incidentally test if properties and resources request are coherent
    # against avalaible resources
    # pdb.set_trace()
    date = get_date()
    properties = job_vars['properties']
    resource_request = job_vars['resource_request']
    resource_available, estimated_nb_resources = estimate_job_nb_resources(resource_request, properties)
    # Add admin properties to the job
    if properties_applied_after_validation:
        if properties:
            properties = '(' + properties + ') AND ' + properties_applied_after_validation
        else:
            properties = properties_applied_after_validation
    job_vars['properties'] = properties
    # TODO Verify the content of the ssh keys

    # TODO format job message
    # message = ''

    # my $job_message = format_job_message_text($job_name,$estimated_nb_resources, $estimated_walltime,
    # $jobType, $reservationField, $queue_name, $project, $type_list, '');

    # TODO  job_group
    #
    name = job_vars['name']
    stdout = job_vars['stdout']
    if not stdout:
        stdout = 'OAR'
        if name:
            stdout += '.' + name
        stdout += ".%jobid%.stdout"
    else:
        stdout = re.sub(r'%jobname%', name, stdout)
    job_vars['stdout'] = stdout

    stderr = job_vars['stderr']
    if not stderr:
        stderr = 'OAR'
        if name:
            stderr += '.' + name
        stderr += '.%jobid%.stderr'
    else:
        stderr = re.sub(r'%jobname%', name, stderr)
    stderr = job_vars['stderr']

    # Insert job

    kwargs = job_kwargs(job_vars, array_commands[0], date)
    kwargs['message'] = ''  # TODO message
    kwargs['array_index'] = array_index

    if array_id > 0:
        kwargs['array_id'] = array_id

    ins = Job.__table__.insert().values(**kwargs)
    result = db.session.execute(ins)
    job_id = result.inserted_primary_key[0]

    if array_id <= 0:
        db.query(Job).filter(Job.id == job_id).update({Job.array_id: job_id})
        db.commit()

    random_number = random.randint(1, 1000000000000)
    ins = Challenge.__table__.insert().values(
        {'job_id': job_id, 'challenge': random_number,
         'ssh_private_key': ssh_private_key, 'ssh_public_key': ssh_public_key})
    db.session.execute(ins)

    # print(resource_request)

    # Insert resources request in DB
    mld_jid_walltimes = []
    resource_desc_lst = []
    for moldable_instance in resource_request:
        resource_desc, walltime = moldable_instance
        if not walltime:
            # TODO add nullable=True in [email protected] ?
            walltime = 0
        mld_jid_walltimes.append(
            {'moldable_job_id': job_id, 'moldable_walltime': walltime})
        resource_desc_lst.append(resource_desc)

    # Insert MoldableJobDescription job_id and walltime
    # print('mld_jid_walltimes)
    result = db.session.execute(MoldableJobDescription.__table__.insert(),
                                mld_jid_walltimes)

    # Retrieve MoldableJobDescription.ids
    if len(mld_jid_walltimes) == 1:
        mld_ids = [result.inserted_primary_key[0]]
    else:
        r = db.query(MoldableJobDescription.id)\
              .filter(MoldableJobDescription.job_id == job_id).all()
        mld_ids = [e[0] for e in r]
    #
    # print(mld_ids, resource_desc_lst)
    for mld_idx, resource_desc in enumerate(resource_desc_lst):
        # job_resource_groups
        mld_id_property = []
        res_lst = []

        moldable_id = mld_ids[mld_idx]

        for prop_res in resource_desc:
            prop = prop_res['property']
            res = prop_res['resources']

            mld_id_property.append({'res_group_moldable_id': moldable_id,
                                    'res_group_property': prop})

            res_lst.append(res)

        # print(mld_id_property)
        # Insert property for moldable
        db.session.execute(JobResourceGroup.__table__.insert(),
                           mld_id_property)

        if len(mld_id_property) == 1:
            grp_ids = [result.inserted_primary_key[0]]
        else:
            r = db.query(JobResourceGroup.id)\
                  .filter(JobResourceGroup.moldable_id == moldable_id).all()
            grp_ids = [e[0] for e in r]

        # print('grp_ids, res_lst)
        # Insert job_resource_descriptions
        for grp_idx, res in enumerate(res_lst):
            res_description = []
            for idx, res_value in enumerate(res):
                res_description.append({'res_job_group_id': grp_ids[grp_idx],
                                        'res_job_resource_type': res_value['resource'],
                                        'res_job_value': res_value['value'],
                                        'res_job_order': idx})
            # print(res_description)
            db.session.execute(JobResourceDescription.__table__.insert(),
                               res_description)

    # types of job
    types = job_vars['types']
    if types:
        ins = [{'job_id': job_id, 'type': typ} for typ in types]
        db.session.execute(JobType.__table__.insert(), ins)

    # TODO dependencies with min_start_shift and max_start_shift
    dependencies = job_vars['dependencies']
    if dependencies:
        ins = [{'job_id': job_id, 'job_id_required': dep} for dep in dependencies]
        db.session.execute(JobDependencie.__table__.insert(), ins)
    #    foreach my $a (@{$anterior_ref}){
    #    if (my ($j,$min,$max) = $a =~ /^(\d+)(?:,([\[\]][-+]?\d+)?(?:,([\[\]][-+]?\d+)?)?)?$/) {
    #        $dbh->do("  INSERT INTO job_dependencies (job_id,job_id_required,min_start_shift,max_start_shift)
    #                    VALUES ($job_id,$j,'".(defined($min)?$min:"")."','".(defined($max)?$max:"")."')

    if not job_vars['hold']:
        req = db.insert(JobStateLog).values(
            {'job_id': job_id, 'job_state': 'Waiting', 'date_start': date})
        db.session.execute(req)
        db.commit()

        db.query(Job).filter(Job.id == job_id).update({Job.state: 'Waiting'})
        db.commit()
    else:
        req = db.insert(JobStateLog).values(
            {'job_id': job_id, 'job_state': 'Hold', 'date_start': date})
        db.session.execute(req)
        db.commit()

    return(0, job_id)
Пример #17
0
def set_job_state(jid, state):

    # TODO
    # TODO Later: notify_user
    # TODO Later: update_current_scheduler_priority

    result = db.query(Job).filter(Job.id == jid)\
                          .filter(Job.state != 'Error')\
                          .filter(Job.state != 'Terminated')\
                          .filter(Job.state != state)\
                          .update({Job.state: state})
    db.commit()

    if result == 1:  # OK for sqlite
        logger.debug(
            "Job state updated, job_id: " + str(jid) + ", wanted state: " + state)

        date = tools.get_date()

        # TODO: optimize job log
        db.query(JobStateLog).filter(JobStateLog.date_stop == 0)\
                             .filter(JobStateLog.job_id == jid)\
                             .update({JobStateLog.date_stop: date})
        db.commit()
        req = db.insert(JobStateLog).values(
            {'job_id': jid, 'job_state': state, 'date_start': date})
        db.session.execute(req)

        if state == "Terminated" or state == "Error" or state == "toLaunch" or \
           state == "Running" or state == "Suspended" or state == "Resuming":
            job = db.query(Job).filter(Job.id == jid).one()
            if state == "Suspend":
                tools.notify_user(job, "SUSPENDED", "Job is suspended.")
            elif state == "Resuming":
                tools.notify_user(job, "RESUMING", "Job is resuming.")
            elif state == "Running":
                tools.notify_user(job, "RUNNING", "Job is running.")
            elif state == "toLaunch":
                update_current_scheduler_priority(job, "+2", "START")
            else:  # job is "Terminated" or ($state eq "Error")
                if job.stop_time < job.start_time:
                    db.query(Job).filter(Job.id == jid)\
                                 .update({Job.stop_time: job.start_time})
                    db.commit()

                if job.assigned_moldable_job != "0":
                    # Update last_job_date field for resources used
                    update_scheduler_last_job_date(
                        date, int(job.assigned_moldable_job))

                if state == "Terminated":
                    tools.notify_user(job, "END", "Job stopped normally.")
                else:
                    # Verify if the job was suspended and if the resource
                    # property suspended is updated
                    if job.suspended == "YES":
                        r = get_current_resources_with_suspended_job()

                        if r != ():
                            db.query(Resource).filter(~Resource.id.in_(r))\
                                              .update({Resource.suspended_jobs: 'NO'})

                        else:
                            db.query(Resource).update(
                                {Resource.suspended_jobs: 'NO'})
                        db.commit()

                    tools.notify_user(
                        job, "ERROR", "Job stopped abnormally or an OAR error occured.")

                update_current_scheduler_priority(job, "-2", "STOP")

                # Here we must not be asynchronously with the scheduler
                log_job(job)
                # $dbh is valid so these 2 variables must be defined
                nb_sent = tools.notify_almighty("ChState")
                if nb_sent == 0:
                    logger.warning("Not able to notify almighty to launch the job " +
                                   str(job.id) + " (socket error)")

    else:
        logger.warning("Job is already termindated or in error or wanted state, job_id: " +
                       str(jid) + ", wanted state: " + state)
Пример #18
0
def meta_schedule(mode='internal', plt=Platform()):

    exit_code = 0

    job_security_time = int(config['SCHEDULER_JOB_SECURITY_TIME'])

    if ('QUOTAS' in config) and (config['QUOTAS'] == 'yes'):
        if 'QUOTAS_FILE' not in config:
            config['QUOTAS_FILE'] = './quotas_conf.json'
        load_quotas_rules()

    tools.init_judas_notify_user()
    tools.create_almighty_socket()

    logger.debug(
        "Retrieve information for already scheduled reservations from \
        database before flush (keep assign resources)")

    # reservation ??.

    initial_time_sec = tools.get_date()  # time.time()
    initial_time_sql = local_to_sql(initial_time_sec)

    current_time_sec = initial_time_sec
    current_time_sql = initial_time_sql

    gantt_init_results = gantt_init_with_running_jobs(plt, initial_time_sec,
                                                      job_security_time)
    all_slot_sets, scheduled_jobs, besteffort_rid2jid = gantt_init_results
    resource_set = plt.resource_set()

    # Path for user of external schedulers
    if 'OARDIR' in os.environ:
        binpath = os.environ['OARDIR'] + '/'
    else:
        binpath = '/usr/local/lib/oar/'
        logger.warning(
            "OARDIR env variable must be defined, " + binpath + " is used by default")

    for queue in db.query(Queue).order_by(text('priority DESC')).all():

        if queue.state == 'Active':
            logger.debug("Queue " + queue.name + ": Launching scheduler " +
                         queue.scheduler_policy + " at time " + initial_time_sql)

            if mode == 'external':  # pragma: no cover
                call_external_scheduler(binpath, scheduled_jobs, all_slot_sets,
                                        resource_set, job_security_time, queue,
                                        initial_time_sec, initial_time_sql)
            else:
                call_internal_scheduler(plt, scheduled_jobs, all_slot_sets,
                                        job_security_time, queue, initial_time_sec)

            handle_waiting_reservation_jobs(queue.name, resource_set,
                                            job_security_time, current_time_sec)

            # handle_new_AR_jobs
            check_reservation_jobs(
                plt, resource_set, queue.name, all_slot_sets, current_time_sec)

    jobs_to_launch, jobs_to_launch_lst, rid2jid_to_launch = get_gantt_jobs_to_launch(resource_set,
                                                                                     job_security_time,
                                                                                     current_time_sec)

    if check_besteffort_jobs_to_kill(jobs_to_launch, rid2jid_to_launch,
                                     current_time_sec, besteffort_rid2jid,
                                     resource_set) == 1:
        # We must kill some besteffort jobs
        tools.notify_almighty('ChState')
        exit_code = 2
    elif handle_jobs_to_launch(jobs_to_launch_lst, current_time_sec, current_time_sql) == 1:
        exit_code = 0

    # Update visu gantt tables
    update_gantt_visualization()

    # Manage dynamic node feature
    flag_hulot = False
    timeout_cmd = int(config['SCHEDULER_TIMEOUT'])

    if ((('SCHEDULER_NODE_MANAGER_SLEEP_CMD' in config) or
         ((config['ENERGY_SAVING_INTERNAL'] == 'yes') and
          ('ENERGY_SAVING_NODE_MANAGER_SLEEP_CMD' in config))) and
        (('SCHEDULER_NODE_MANAGER_SLEEP_TIME' in config)
         and ('SCHEDULER_NODE_MANAGER_IDLE_TIME' in config))):

        # Look at nodes that are unused for a duration
        idle_duration = int(config['SCHEDULER_NODE_MANAGER_IDLE_TIME'])
        sleep_duration = int(config['SCHEDULER_NODE_MANAGER_SLEEP_TIME'])

        idle_nodes = search_idle_nodes(current_time_sec)
        tmp_time = current_time_sec - idle_duration

        node_halt = []

        for node, idle_duration in iteritems(idle_nodes):
            if idle_duration < tmp_time:
                # Search if the node has enough time to sleep
                tmp = get_next_job_date_on_node(node)
                if (tmp is None) or (tmp - sleep_duration > current_time_sec):
                    # Search if node has not been woken up recently
                    wakeup_date = get_last_wake_up_date_of_node(node)
                    if (wakeup_date is None) or (wakeup_date < tmp_time):
                        node_halt.append(node)

        if node_halt != []:
            logger.debug("Powering off some nodes (energy saving): " + str(node_halt))
            # Using the built-in energy saving module to shut down nodes
            if config['ENERGY_SAVING_INTERNAL'] == 'yes':
                if kao_tools.send_to_hulot('HALT', ' '.join(node_halt)):
                    logger.error("Communication problem with the energy saving module (Hulot)\n")
                flag_hulot = 1
            else:
                # Not using the built-in energy saving module to shut down nodes
                cmd = config['SCHEDULER_NODE_MANAGER_SLEEP_CMD']
                if kao_tools.fork_and_feed_stdin(cmd, timeout_cmd, node_halt):
                    logger.error("Command " + cmd + "timeouted (" + str(timeout_cmd)
                                 + "s) while trying to  poweroff some nodes")

    if (('SCHEDULER_NODE_MANAGER_SLEEP_CMD' in config) or
        ((config['ENERGY_SAVING_INTERNAL'] == 'yes') and
         ('ENERGY_SAVING_NODE_MANAGER_SLEEP_CMD' in config))):
        # Get nodes which the scheduler wants to schedule jobs to,
        # but which are in the Absent state, to wake them up
        wakeup_time = int(config['SCHEDULER_NODE_MANAGER_WAKEUP_TIME'])
        nodes = get_gantt_hostname_to_wake_up(current_time_sec, wakeup_time)

        if nodes != []:
            logger.debug("Awaking some nodes: " + str(nodes))
            # Using the built-in energy saving module to wake up nodes
            if config['ENERGY_SAVING_INTERNAL'] == 'yes':
                if kao_tools.send_to_hulot('WAKEUP', ' '.join(nodes)):
                    logger.error("Communication problem with the energy saving module (Hulot)")
                flag_hulot = 1
            else:
                # Not using the built-in energy saving module to wake up nodes
                cmd = config['SCHEDULER_NODE_MANAGER_WAKE_UP_CMD']
                if kao_tools.fork_and_feed_stdin(cmd, timeout_cmd, nodes):
                    logger.error("Command " + cmd + "timeouted (" + str(timeout_cmd)
                                 + "s) while trying to wake-up some nodes ")

    # Send CHECK signal to Hulot if needed
    if not flag_hulot and (config['ENERGY_SAVING_INTERNAL'] == 'yes'):
        if kao_tools.send_to_hulot('CHECK', []):
            logger.error("Communication problem with the energy saving module (Hulot)")

    # Retrieve jobs according to their state and excluding job in 'Waiting' state.
    jobs_by_state = get_current_not_waiting_jobs()

    #
    # Search jobs to resume
    #

    #
    # TODO: TOFINISH
    #
    if 'Resuming' in jobs_by_state:
        logger.warn("Resuming job is NOT ENTIRELY IMPLEMENTED")
        for job in jobs_by_state['Resuming']:
            other_jobs = get_jobs_on_resuming_job_resources(job.id)
            # TODO : look for timesharing other jobs. What do we do?????
            if other_jobs == []:
                # We can resume the job
                logger.debug("[" + str(job.id) + "] Resuming job")
                if 'noop' in job.types:
                    resume_job_action(job.id)
                    logger.debug("[" + str(job.id) + "] Resume NOOP job OK")
                else:
                    script = config['JUST_BEFORE_RESUME_EXEC_FILE']
                    timeout = int(config['SUSPEND_RESUME_SCRIPT_TIMEOUT'])
                    if timeout is None:
                        timeout = kao_tools.get_default_suspend_resume_script_timeout()
                    skip = 0
                    logger.debug("[" + str(job.id) + "] Running post suspend script: `" +
                                 script + " " + str(job.id) + "'")
                    cmd_str = script + str(job.id)
                    return_code = -1
                    try:
                        return_code = call(cmd_str, shell=True, timeout=timeout)
                    except TimeoutExpired as e:
                        logger.error(str(e) + "[" + str(job.id) + "] Suspend script timeouted")
                        add_new_event('RESUME_SCRIPT_ERROR', job.id, "Suspend script timeouted")
                    if return_code != 0:
                        str_error = "[" + str(job.id) + "] Suspend script error, return code = "\
                                    + str(return_code)
                        logger.error(str_error)
                        add_new_event('RESUME_SCRIPT_ERROR', job.id, str_error)
                        frag_job(job.id)
                        tools.notify_almighty('Qdel')
                    skip = 1

                cpuset_nodes = None
                if 'JOB_RESOURCE_MANAGER_PROPERTY_DB_FIELD' in config:
                    cpuset_field = config['JOB_RESOURCE_MANAGER_PROPERTY_DB_FIELD']
                else:
                    cpuset_field = ""
                if cpuset_field and (skip == 0):
                    # TODO
                    cpuset_name = job.user + "_" + str(job.id)
                    cpuset_nodes = get_cpuset_values(cpuset_field,
                                                     job.assigned_moldable_id)
                    # TODO
                    suspend_data_hash = {'name': cpuset_name,
                                         'job_id': job.id,
                                         'oarexec_pid_file':
                                         kao_tools.get_oar_pid_file_name(job.id)}
                if cpuset_nodes:
                    # TODO
                    taktuk_cmd = config['TAKTUK_CMD']
                    if 'SUSPEND_RESUME_FILE' in config:
                        suspend_file = config['SUSPEND_RESUME_FILE']
                    else:
                        # TODO
                        suspend_file = kao_tools.get_default_suspend_resume_file()

    #
    # TODO: TOFINISH
    #

    # Notify oarsub -I when they will be launched
    for j_info in get_gantt_waiting_interactive_prediction_date():
        job_id, job_info_type, job_start_time, job_message = j_info
        addr, port = job_info_type.split(':')
        new_start_prediction = local_to_sql(job_start_time)
        logger.debug("[" + str(job_id) + "] Notifying user of the start prediction: " +
                     new_start_prediction + "(" + job_message + ")")
        tools.notify_tcp_socket(addr, port, "[" + initial_time_sql + "] Start prediction: " +
                                new_start_prediction + " (" + job_message + ")")

    # Run the decisions
    # Process "toError" jobs
    if 'toError' in jobs_by_state:
        for job in jobs_by_state['toError']:
            addr, port = job.info_type.split(':')
            if job.type == 'INTERACTIVE' or\
               (job.type == 'PASSIVE' and job.reservation == 'Scheduled'):
                logger.debug("Notify oarsub job (num:" + str(job.id) + ") in error; jobInfo=" +
                             job.info_type)

                nb_sent1 = tools.notify_tcp_socket(addr, port, job.message + '\n')
                nb_sent2 = tools.notify_tcp_socket(addr, port, 'BAD JOB' + '\n')
                if (nb_sent1 == 0) or (nb_sent2 == 0):
                    logger.warn(
                        "Cannot open connection to oarsub client for" + str(job.id))
            logger.debug("Set job " + str(job.id) + " to state Error")
            set_job_state(job.id, 'Error')

    # Process toAckReservation jobs
    if 'toAckReservation' in jobs_by_state:
        for job in jobs_by_state['toAckReservation']:
            addr, port = job.info_type.split(':')
            logger.debug(
                "Treate job" + str(job.id) + " in toAckReservation state")

            nb_sent = tools.notify_tcp_socket(addr, port, 'GOOD RESERVATION' + '\n')

            if nb_sent == 0:
                logger.warn(
                    "Frag job " + str(job.id) + ", I cannot notify oarsub for the reservation")
                add_new_event('CANNOT_NOTIFY_OARSUB', str(
                    job.id), "Can not notify oarsub for the job " + str(job.id))

                # TODO ???
                # OAR::IO::lock_table / OAR::IO::unlock_table($base)
                frag_job(job.id)

                exit_code = 2
            else:
                logger.debug("Notify oarsub for a RESERVATION (idJob=" +
                             str(job.id) + ") --> OK; jobInfo=" + job.info_type)
                set_job_state(job.id, 'Waiting')
                if ((job.start_time - 1) <= current_time_sec) and (exit_code == 0):
                    exit_code = 1

    # Process toLaunch jobs
    if 'toLaunch' in jobs_by_state:
        for job in jobs_by_state['toLaunch']:
            notify_to_run_job(job.id)

    logger.debug("End of Meta Scheduler")

    return exit_code
Пример #19
0
def test_db_all_in_one_AR_2(monkeypatch):

    job = insert_and_sched_ar(get_date() - 1000)
    print(job.state, ' ', job.reservation)
    assert job.state == 'Error'
Пример #20
0
def add_micheline_simple_array_job(job_vars,
                                   ssh_private_key, ssh_public_key,
                                   array_id, array_index,
                                   array_commands,
                                   properties_applied_after_validation):

    job_id_list = []
    date = get_date()

    # Check the jobs are no moldable
    resource_request = job_vars['resource_request']
    if len(resource_request) > 1:
        print_error('array jobs cannot be moldable')
        sub_exit(-30)

    # Estimate_job_nb_resources and incidentally test if properties and resources request are coherent
    # against avalaible resources
    # pdb.set_trace()
    properties = job_vars['properties']
    resource_available, estimated_nb_resources = estimate_job_nb_resources(resource_request, properties)

    # Add admin properties to the job
    if properties_applied_after_validation:
        if properties:
            properties = '(' + properties + ') AND ' + properties_applied_after_validation
        else:
            properties = properties_applied_after_validation
    job_vars['properties'] = properties
    # TODO format job message

    # my $job_message = format_job_message_text($job_name,$estimated_nb_resources, $estimated_walltime,
    # $jobType, $reservationField, $queue_name, $project, $type_list, '');

    name = job_vars['name']
    stdout = job_vars['stdout']
    if not stdout:
        stdout = 'OAR'
        if name:
            stdout += '.' + name
        stdout += ".%jobid%.stdout"
    else:
        stdout = re.sub(r'%jobname%', name, stdout)
    job_vars['stdout'] = stdout

    stderr = job_vars['stderr']
    if not stderr:
        stderr = 'OAR'
        if name:
            stderr += '.' + name
        stderr += '.%jobid%.stderr'
    else:
        stderr = re.sub(r'%jobname%', name, stderr)
    stderr = job_vars['stderr']

    # Insert job
    kwargs = job_kwargs(job_vars, array_commands[0], date)
    kwargs['message'] = ''  # TODO message
    kwargs['array_index'] = array_index

    # print(kwargs)

    ins = Job.__table__.insert().values(**kwargs)
    result = db.session.execute(ins)
    first_job_id = result.inserted_primary_key[0]

    # Update array_id
    array_id = first_job_id
    db.query(Job).filter(Job.id == first_job_id).update({Job.array_id: array_id})
    db.commit()

    # Insert remaining array jobs with array_id
    jobs_data = []
    kwargs['array_id'] = array_id
    for command in array_commands[1:]:
        job_data = kwargs.copy()
        job_data['command'] = command
        jobs_data.append(job_data)

    db.session.execute(Job.__table__.insert(), jobs_data)
    db.commit()

    # Retrieve job_ids thanks to array_id value
    result = db.query(Job.id).filter(Job.array_id == array_id).all()
    job_id_list = [r[0] for r in result]

    # TODO Populate challenges and moldable_job_descriptions tables
    challenges = []
    moldable_job_descriptions = []

    walltime = resource_request[0][1]
    if not walltime:
        walltime = default_job_walltime

    for job_id in job_id_list:
        random_number = random.randint(1, 1000000000000)
        challenges.append({'job_id': job_id, 'challenge': random_number})
        moldable_job_descriptions.append({'moldable_job_id': job_id, 'moldable_walltime': walltime})

    db.session.execute(Challenge.__table__.insert(), challenges)
    db.session.execute(MoldableJobDescription.__table__.insert(), moldable_job_descriptions)
    db.commit()

    # Retrieve moldable_ids thanks to job_ids
    result = db.query(MoldableJobDescription.id)\
               .filter(MoldableJobDescription.job_id.in_(tuple(job_id_list)))\
               .order_by(MoldableJobDescription.id).all()
    moldable_ids = [r[0] for r in result]

    # Populate job_resource_groups table
    job_resource_groups = []
    resource_desc_lst = resource_request[0][0]

    for moldable_id in moldable_ids:
        for resource_desc in resource_desc_lst:
            prop = resource_desc['property']
            job_resource_groups.append({'res_group_moldable_id': moldable_id,
                                        'res_group_property': prop})

    db.session.execute(JobResourceGroup.__table__.insert(), job_resource_groups)
    db.commit()

    # Retrieve res_group_ids thanks to moldable_ids
    result = db.query(JobResourceGroup.id)\
               .filter(JobResourceGroup.moldable_id.in_(tuple(moldable_ids)))\
               .order_by(JobResourceGroup.id).all()
    res_group_ids = [r[0] for r in result]

    # Populate job_resource_descriptions table
    job_resource_descriptions = []
    k = 0
    for i in range(len(array_commands)):  # Nb jobs
        for resource_desc in resource_desc_lst:
            order = 0
            for res_val in resource_desc['resources']:
                job_resource_descriptions.append({'res_job_group_id': res_group_ids[k],
                                                  'res_job_resource_type': res_val['resource'],
                                                  'res_job_value': res_val['value'],
                                                  'res_job_order': order})
                order += 1
            k += 1

    db.session.execute(JobResourceDescription.__table__.insert(), job_resource_descriptions)
    db.commit()

    # Populate job_types table
    types = job_vars['types']
    if types:
        job_types = []
        for job_id in job_id_list:
            for typ in types:
                job_types.append({'job_id': job_id, 'type': typ})
        db.session.execute(JobType.__table__.insert(), job_types)
        db.commit()

    # TODO Anterior job setting

    # Hold/Waiting management, job_state_log setting
    # Job is inserted with hold state first
    state_log = 'Hold'
    if job_vars['hold']:
        state_log = 'Waiting'
        db.query(Job).filter(Job.array_id == array_id).update({Job.state: state_log})
        db.commit

    # Update array_id field and set job to state if waiting and insert job_state_log
    job_state_logs = [{'job_id': job_id, 'job_state': state_log, 'date_start': date}
                      for job_id in job_id_list]
    db.session.execute(JobStateLog.__table__.insert(), job_state_logs)
    db.commit()

    return(0, job_id_list)