def main(): config['LOG_FILE'] = '/tmp/oar_kamelot.log' logger = get_logger("oar.kamelot_fifo", forward_stderr=True) config.setdefault_config(DEFAULT_CONFIG) plt = Platform() schedule_fifo_cycle(plt, "default") logger.info("That's all folks")
def cli(job, full, state, user, array, compact, gantt, events, properties, accounting, sql, format, json, yaml, version): config.setdefault_config(DEFAULT_CONFIG) oarapi = OarApi() if not job: answer = oarapi.get('jobs/details.json') print_jobs(True, answer)
def send_to_hulot(cmd, data): config.setdefault_config({"FIFO_HULOT": "/tmp/oar_hulot_pipe"}) fifoname = config["FIFO_HULOT"] try: with open(fifoname, 'w') as fifo: fifo.write('HALT:%s\n' % data) fifo.flush() except IOError as e: e.strerror = 'Unable to communication with Hulot: %s (%s)' % fifoname % e.strerror logger.error(e.strerror) return 1 return 0
def create_app(**kwargs): """Return the OAR API application instance.""" app = Flask(__name__) app.wsgi_app = WSGIProxyFix(app.wsgi_app) config.setdefault_config(default_config) app.config.update(config) db.query_class = APIQuery db.query_collection_class = APIQueryCollection register_error_handlers(app) register_hooks(app) register_extensions(app) register_blueprints(app) register_proxy(app, **kwargs) return app
def main(): config['LOG_FILE'] = '/tmp/oar_kamelot.log' logger = get_logger("oar.kamelot", forward_stderr=True) config.setdefault_config(DEFAULT_CONFIG) plt = Platform() logger.debug("argv..." + str(sys.argv)) if len(sys.argv) > 2: schedule_cycle(plt, int(float(sys.argv[2])), sys.argv[1]) elif len(sys.argv) == 2: schedule_cycle(plt, plt.get_time(), sys.argv[1]) else: schedule_cycle(plt, plt.get_time()) logger.info("That's all folks") from oar.lib import db db.commit()
from pwd import getpwnam import pdb # Set undefined config value to default one DEFAULT_CONFIG = { 'META_SCHED_CMD': 'kao', 'SERVER_HOSTNAME': 'localhost', 'APPENDICE_SERVER_PORT': '6668', #new endpoint which replace appendic 'SCHEDULER_MIN_TIME_BETWEEN_2_CALLS': '1', 'FINAUD_FREQUENCY': '300', 'LOG_FILE': '/var/log/oar.log', 'ENERGY_SAVING_INTERNAL': 'no' } config.setdefault_config(DEFAULT_CONFIG) # retrieve umask and set new one old_umask = os.umask(0o022) # TODO # my $oldfh = select(STDERR); $| = 1; select($oldfh); # $oldfh = select(STDOUT); $| = 1; select($oldfh); # Everything is run by oar user (The real uid of this process.) #os.environ['OARDO_UID'] = str(os.geteuid()) # TODO # my $Redirect_STD_process = OAR::Modules::Judas::redirect_everything();
from copy import deepcopy import oar.kao.scheduling from oar.lib import config from oar.lib.interval import (intersec, itvs_size, extract_n_scattered_block_itv, aggregate_itvs) import pickle try: import zerorpc except ImportError: zerorpc = None # Set undefined config value to default one config.setdefault_config({ 'COORM_DEFAULT_TIMEOUT': 10, }) def find_default(itvs_avail, hy_res_rqts, hy, beginning, *find_args, **find_kwargs): """Simple wrap function to default function for test purpose""" return oar.kao.scheduling.find_resource_hierarchies_job(itvs_avail, hy_res_rqts, hy) def assign_default(slots_set, job, hy, min_start_time, *assign_args, **assign_kwargs): """Simple wrap function to default function for test purpose""" return oar.kao.scheduling.assign_resources_mld_job_split_slots(slots_set, job, hy, min_start_time) def find_begin(itvs_avail, hy_res_rqts, hy, beginning, *find_args, **find_kwargs): """Simple function to test beginning value which is set to True is the slot begins the slotset (slotset.begin == slots[1].b).
def test_simsim_1(): # Set undefined config value to default one DEFAULT_CONFIG = { 'HIERARCHY_LABELS': 'resource_id,network_address', 'SCHEDULER_RESOURCE_ORDER': "resource_id ASC", 'SCHEDULER_JOB_SECURITY_TIME': '60', 'SCHEDULER_AVAILABLE_SUSPENDED_RESOURCE_TYPE': 'default', 'FAIRSHARING_ENABLED': 'no', 'SCHEDULER_FAIRSHARING_MAX_JOB_PER_USER': '******', 'QUOTAS': 'no' } config.setdefault_config(DEFAULT_CONFIG) nb_res = 32 # # generate ResourceSet # hy_resource_id = [[(i, i)] for i in range(1, nb_res + 1)] res_set = ResourceSetSimu( rid_i2o=range(nb_res + 1), rid_o2i=range(nb_res + 1), roid_itvs=[(1, nb_res)], hierarchy={'resource_id': hy_resource_id}, available_upto={2147483600: [(1, nb_res)]} ) # # generate jobs # nb_jobs = 4 jobs = {} submission_time_jids = [] for i in range(1, nb_jobs + 1): jobs[i] = JobSimu(id=i, state="Waiting", queue="test", start_time=0, walltime=0, types={}, res_set=[], moldable_id=0, mld_res_rqts=[ (i, 60, [([("resource_id", 15)], [(0, nb_res - 1)])])], run_time=20 * i, deps=[], key_cache={}, ts=False, ph=0, assign=False, find=False) submission_time_jids.append((10, [i])) # submission_time_jids= [(10, [1,2,3,4])] # submission_time_jids= [(10, [1,2]), (10, [3])] print(submission_time_jids) simsched = SimSched(res_set, jobs, submission_time_jids) simsched.run() plt = simsched.platform print("Number completed jobs:", len(plt.completed_jids)) print("Completed job ids:", plt.completed_jids) print(jobs) # assert True assert len(plt.completed_jids) == nb_jobs
def karma_jobs_sorting(queue, now, jids, jobs, plt): # if "SCHEDULER_FAIRSHARING_MAX_JOB_PER_USER" in config: # fairsharing_nb_job_limit = config["SCHEDULER_FAIRSHARING_MAX_JOB_PER_USER"] # TODO NOT UDSED # fairsharing_nb_job_limit = 100000 karma_window_size = 3600 * 30 * 24 # TODO in conf ??? # Set undefined config value to default one default_config = { "SCHEDULER_FAIRSHARING_PROJECT_TARGETS": "{default => 21.0}", "SCHEDULER_FAIRSHARING_USER_TARGETS": "{default => 22.0}", "SCHEDULER_FAIRSHARING_COEF_PROJECT": "0", "SCHEDULER_FAIRSHARING_COEF_USER": "******", "SCHEDULER_FAIRSHARING_COEF_USER_ASK": "1" } config.setdefault_config(default_config) # get fairsharing config if any karma_proj_targets = perl_hash_2_dict( config["SCHEDULER_FAIRSHARING_PROJECT_TARGETS"]) karma_user_targets = perl_hash_2_dict( config["SCHEDULER_FAIRSHARING_USER_TARGETS"]) karma_coeff_proj_consumption = float( config["SCHEDULER_FAIRSHARING_COEF_PROJECT"]) karma_coeff_user_consumption = float( config["SCHEDULER_FAIRSHARING_COEF_USER"]) karma_coeff_user_asked_consumption = float( config["SCHEDULER_FAIRSHARING_COEF_USER_ASK"]) # # Sort jobs accordingly to karma value (fairsharing) *) # *) window_start = now - karma_window_size window_stop = now karma_sum_time_asked, karma_sum_time_used = plt.get_sum_accounting_window( queue, window_start, window_stop) karma_projects_asked, karma_projects_used = plt.get_sum_accounting_by_project( queue, window_start, window_stop) karma_users_asked, karma_users_used = plt.get_sum_accounting_by_user( queue, window_start, window_stop) # # compute karma for each job # for job in itervalues(jobs): if job.project in karma_projects_used: karma_proj_used_j = karma_projects_used[job.project] else: karma_proj_used_j = 0.0 if job.user in karma_users_used: karma_user_used_j = karma_users_used[job.user] else: karma_user_used_j = 0.0 if job.user in karma_users_asked: karma_user_asked_j = karma_users_asked[job.user] else: karma_user_asked_j = 0.0 if job.project in karma_proj_targets: karma_proj_target = karma_proj_targets[job.project] else: karma_proj_target = 0.0 if job.user in karma_user_targets: karma_user_target = karma_user_targets[job.user] / 100.0 else: karma_user_target = 0.0 # x1 = karma_coeff_proj_consumption * ((karma_proj_used_j / karma_sum_time_used) - (karma_proj_target / 100.0)) # x2 = karma_coeff_user_consumption * ((karma_user_used_j / karma_sum_time_used) - (karma_user_target / 100.0)) # x3 = karma_coeff_user_asked_consumption * ((karma_user_asked_j / karma_sum_time_asked) - (karma_user_target / 100.0)) # print "yopypop", x1, x2, x3 job.karma = (karma_coeff_proj_consumption * ((karma_proj_used_j / karma_sum_time_used) - (karma_proj_target / 100.0)) + karma_coeff_user_consumption * ((karma_user_used_j / karma_sum_time_used) - (karma_user_target / 100.0)) + karma_coeff_user_asked_consumption * ((karma_user_asked_j / karma_sum_time_asked) - (karma_user_target / 100.0)) ) # print "job.karma", job.karma # sort jids according to jobs' karma value # print jids karma_ordered_jids = sorted(jids, key=lambda jid: jobs[jid].karma) # print karma_ordered_jids return karma_ordered_jids
def main(): logger = get_logger("oar.kamelot_basic", forward_stderr=True) config.setdefault_config(DEFAULT_CONFIG) plt = Platform() schedule_cycle(plt) logger.info("That's all folks")
def cli( command, interactive, queue, resource, reservation, connect, type, checkpoint, property, resubmit, scanscript, project, signal, directory, name, after, notify, array, array_param_file, use_job_key, import_job_key_from_file, import_job_key_inline, export_job_key_to_file, stdout, stderr, hold, version, ): """Submit a job to OAR batch scheduler.""" config.setdefault_config(DEFAULT_CONFIG) # import pdb; pdb.set_trace() # print(resource) # When the walltime of a job is not defined default_job_walltime = str(config["DEFAULT_JOB_WALLTIME"]) log_warning = "" # TODO log_error = "" log_info = "" log_std = "" remote_host = config["SERVER_HOSTNAME"] remote_port = int(config["SERVER_PORT"]) if "OARSUB_DEFAULT_RESOURCES" in config: default_resources = config["OARSUB_DEFAULT_RESOURCES"] else: default_resources = "/resource_id=1" if "OARSUB_NODES_RESOURCES" in config: nodes_resources = config["OARSUB_NODES_RESOURCES"] else: nodes_resources = "resource_id" # TODO Deploy_hostname / Cosystem_hostname # $Deploy_hostname = get_conf("DEPLOY_HOSTNAME"); # if (!defined($Deploy_hostname)){ # $Deploy_hostname = $remote_host; # } # $Cosystem_hostname = get_conf("COSYSTEM_HOSTNAME"); # if (!defined($Cosystem_hostname)){ # $Cosystem_hostname = $remote_host; # } cpuset_field = config["JOB_RESOURCE_MANAGER_PROPERTY_DB_FIELD"] cpuset_path = config["CPUSET_PATH"] if "OAR_RUNTIME_DIRECTORY" in config: pass # if (is_conf("OAR_RUNTIME_DIRECTORY")){ # OAR::Sub::set_default_oarexec_directory(get_conf("OAR_RUNTIME_DIRECTORY")); # } # my $default_oar_dir = OAR::Sub::get_default_oarexec_directory(); # if (!(((-d $default_oar_dir) and (-O $default_oar_dir)) or (mkdir($default_oar_dir)))){ # die("# Error: failed to create the OAR directory $default_oar_dir, or bad permissions.\n"); # } binpath = "" if "OARDIR" in os.environ: binpath = os.environ["OARDIR"] + "/" else: print_error("OARDIR environment variable is not defined.") sub_exit(1) openssh_cmd = config["OPENSSH_CMD"] ssh_timeout = int(config["OAR_SSH_CONNECTION_TIMEOUT"]) # if (is_conf("OAR_SSH_CONNECTION_TIMEOUT")){ # OAR::Sub::set_ssh_timeout(get_conf("OAR_SSH_CONNECTION_TIMEOUT")); # } # OAR version # TODO: OAR is now a set of composition... # types = type properties = lstrip_none(property) if not directory: launching_directory = "" else: launching_directory = lstrip_none(directory) initial_request = " ".join(sys.argv[1:]) queue_name = lstrip_none(queue) reservation_date = lstrip_none(reservation) if reservation_date: m = re.search(r"^\s*(\d{4}\-\d{1,2}\-\d{1,2})\s+(\d{1,2}:\d{1,2}:\d{1,2})\s*$", reservation) if m: reservation_date = sql_to_local(m.group(1) + " " + m.group(2)) else: print_error( 'syntax error for the advance reservation start date \ specification. Expected format is:"YYYY-MM-DD hh:mm:ss"' ) sub_exit(7) if array: array_nb = array else: array_nb = 1 # Check the default name of the key if we have to generate it if ("OARSUB_FORCE_JOB_KEY" in config) and config["OARSUB_FORCE_JOB_KEY"] == "yes": use_job_key = True else: use_job_key = False # TODO ssh_private_key, ssh_public_key, # ssh_private_key = '' # ssh_public_key = '' # TODO import_job_key_file, export_job_key_file import_job_key_file = "" export_job_key_file = "" if resubmit: print("# Resubmitting job ", resubmit, "...") ret = resubmit_job(resubmit) if ret > 0: job_id = ret print(" done.\n") print("OAR_JOB_ID=" + str(job_id)) if signal_almighty(remote_host, remote_port, "Qsub") > 0: print_error( "cannot connect to executor " + str(remote_host) + ":" + str(remote_port) + ". OAR server might be down." ) sub_exit(3) else: sub_exit(0) else: print(" error.") if ret == -1: print_error("interactive jobs and advance reservations cannot be resubmitted.") elif ret == -2: print_error("only jobs in the Error or Terminated state can be resubmitted.") elif ret == -3: print_error("resubmitted job user mismatch.") elif ret == -4: print_error("another active job is using the same job key.") else: print_error("unknown error.") sub_exit(4) if not command and not interactive and not reservation and not connect: usage() sub_exit(5) if interactive and reservation: print_error("an advance reservation cannot be interactive.") usage() sub_exit(7) if interactive and any(re.match(r"^desktop_computing$", t) for t in type): print_error(" a desktop computing job cannot be interactive") usage() sub_exit(17) if any(re.match(r"^noop$", t) for t in type): if interactive: print_error("a NOOP job cannot be interactive.") sub_exit(17) elif connect: print_error("a NOOP job does not have a shell to connect to.") sub_exit(17) # notify : check insecure character if notify and re.match(r"^.*exec\s*:.+$"): m = re.search(r".*exec\s*:([a-zA-Z0-9_.\/ -]+)$", notify) if not m: print_error( "insecure characters found in the notification method \ (the allowed regexp is: [a-zA-Z0-9_.\/ -]+)." ) sub_exit(16) # TODO Connect to a reservation # Connect to a reservation # if (defined($connect_job)){ # Do not kill the job if the user close the window # $SIG{HUP} = 'DEFAULT'; # OAR::Sub::close_db_connection(); exit(connect_job($connect_job,0,$Openssh_cmd)); # } if not project: project = DEFAULT_VALUE["project"] if not signal: signal = DEFAULT_VALUE["signal"] if not directory: directory = DEFAULT_VALUE["directory"] resource_request = parse_resource_descriptions(resource, default_resources, nodes_resources) job_vars = { "job_type": None, "resource_request": resource_request, "command": command, "info_type": None, "queue_name": queue_name, "properties": properties, "checkpoint": checkpoint, "signal": signal, "notify": notify, "name": name, "types": types, "launching_directory": launching_directory, "dependencies": after, "stdout": stdout, "stderr": stderr, "hold": hold, "project": project, "initial_request": initial_request, "user": os.environ["OARDO_USER"], "array_id": 0, "start_time": "0", "reservation_field": None, } if not interactive and command: cmd_executor = "Qsub" if scanscript: # TODO scanscript pass array_params = [] if array_param_file: pass # TODO # $array_params_ref = OAR::Sub::read_array_param_file($array_param_file); # $array_nb = scalar @{$array_params_ref}; if array_nb == 0: print_error("an array of job must have a number of sub-jobs greater than 0.") usage() sub_exit(6) job_vars["info_type"] = "$Host:$server_port" # TODO "$Host:$server_port" job_vars["job_type"] = "PASSIVE" (err, job_id_lst) = add_micheline_jobs( job_vars, reservation_date, use_job_key, import_job_key_inline, import_job_key_file, export_job_key_file, initial_request, array_nb, array_params, ) else: # TODO interactive if command: print_warning("asking for an interactive job (-I), so ignoring arguments: " + command + " .") cmd_executor = "Qsub -I" if array_param_file: print_error("a array job with parameters given in a file cannot be interactive.") usage() sub_exit(9) if array_nb != 1: print_error("an array job cannot be interactive.") usage() sub_exit(8) if reservation: # Test if this job is a reservation and the syntax is right # TODO Pass pass socket_server = init_tcp_server() (server, server_port) = socket_server.getsockname() job_vars["info_type"] = server + ":" + str(server_port) job_vars["job_type"] = "INTERACTIVE" (err, job_id_lst) = add_micheline_jobs( job_vars, reservation_date, use_job_key, import_job_key_inline, import_job_key_file, export_job_key_file, initial_request, array_nb, array_params, ) # pdb.set_trace() if err != 0: print_error("command failed, please verify your syntax.") sub_exit(err, "") oar_array_id = 0 # Print job_id list if len(job_id_lst) == 1: print("OAR_JOB_ID=", job_id_lst[0]) else: job = db["Job"].query.filter(Job.id == job_id_lst[0]).one() oar_array_id = job.array_id for job_id in job_id_lst: print("OAR_JOB_ID=", job_id) result = (job_id_lst, oar_array_id) # Notify Almigthy tools.create_almighty_socket() tools.notify_almighty(cmd_executor) if reservation: # Reservation mode print_info("advance reservation request: waiting for approval from the scheduler...") (conn, address) = socket_server.accept() answer = conn.recv(1024) if answer[:-1] == "GOOD RESERVATION": print_info("advance reservation is GRANTED.") else: print_info("advance reservation is REJECTED ", answer[:-1]) sub_exit(10) elif interactive: # Interactive mode print_info("interactive mode: waiting...") prev_str = "" while True: (conn, address) = socket_server.accept() answer = conn.recv(1024) answer = answer[:-1] m = re.search(r"\](.*)$", answer) if m and m.group(1) != prev_str: print_info(answer) prev_str = m.group(1) elif answer != "GOOD JOB": print_info(answer) if ( (answer == "GOOD JOB") or (answer == "BAD JOB") or (answer == "JOB KILLE") or re.match(r"^ERROR", answer) ): break if answer == "GOOD JOB": # TODO exit(connect_job($Job_id_list_ref->[0],1,$Openssh_cmd)); pass else: sub_exit(11) sub_exit(0, result)