def execute_delay_db_updater(): global g_delay_jobs, g_delay_jobs_lock while True: ready_jobs = {} with g_delay_jobs_lock: for jobid in g_delay_jobs.keys(): # Check whether this job is submitted from antilles find_in_ui_jobs = False with g_jobop_lock: if jobid in g_ui_jobs: find_in_ui_jobs = True if job[jobid]['status'] == 'C': del g_ui_jobs[jobid] ready_jobs[jobid] = g_delay_jobs[jobid] # If still can not find in ui jobs, check timeout, default timeout is one minute if time.time() - g_delay_jobs[jobid]['time'] > 60: ready_jobs[jobid] = g_delay_jobs[jobid] ready_jobs[jobid]['cmd'] = True # Clear ready jobs from delay jobs, do this in the loop of delay jobs is not good. for jobid in ready_jobs.keys(): if ready_jobs[jobid]['status'] == 'C': del g_delay_jobs[jobid] # Store ready jobs into DB. for jobid in ready_jobs.keys(): # Parse job scan result to job object. job = JobManager().get_job(jobid, ready_jobs[jobid]) if job is not None: if 'cmd' in ready_jobs[jobid]: job.type = 'cmd' update_job_to_db(job) else: logger.debug('Invalid job scan result.') # Sleep 2 seconds to let jobid be stored into g_ui_jobs time.sleep(2)
def _look_for_jobid_by_name_and_args(job_name, job_submit_args): current_jobs = JobManager().get_alljob_status_info() find_jobid = '' for id in current_jobs: temp_job = JobManager().get_job(id, current_jobs[id]) if temp_job is not None and temp_job.jobname == job_name \ and temp_job.submit_args == job_submit_args: find_jobid = temp_job.jobid return find_jobid
def execute_job_monitor(): logger.debug("start one update circuit") try: current_jobs = JobManager().get_alljob_status_info() _clear_invalid_job(current_jobs) global g_prev_jobs, g_need_update_jobs for jobid in current_jobs: if jobid not in g_prev_jobs or current_jobs[jobid][ 'status'] != g_prev_jobs[jobid]['status']: if jobid in g_prev_jobs: logger.debug('Job {} change status from {} to {}'.format( jobid, g_prev_jobs[jobid]['status'], current_jobs[jobid]['status'])) else: logger.debug('Found new job {}'.format(jobid)) dump_job = {jobid: copy.deepcopy(current_jobs[jobid])} g_need_update_jobs.put(dump_job) # The jobs that in the prev scan result but not in the current scan result # can be considered stop between two scanning. disappered_jobs = set(g_prev_jobs) - set(current_jobs) for jobid in disappered_jobs: if g_prev_jobs[jobid]['status'] != 'C': dump_job = {jobid: copy.deepcopy(g_prev_jobs[jobid])} dump_job[jobid]['status'] = 'C' g_need_update_jobs.put(dump_job) # Store the current scan result g_prev_jobs = copy.deepcopy(current_jobs) except: logger.error(traceback.format_exc())
def execute_db_updater(): global g_need_update_jobs, g_delay_jobs, g_delay_jobs_lock while True: try: job = g_need_update_jobs.get(block=True, timeout=5) jobid = job.keys()[0] # Check whether this job is submitted from antilles find_in_ui_jobs = False with g_jobop_lock: if jobid in g_ui_jobs: find_in_ui_jobs = True if job[jobid]['status'] == 'C': del g_ui_jobs[jobid] # The jobs that can not be decide as UI jobs need add to delay_jobs if find_in_ui_jobs: ui_job = JobManager().get_job(jobid, job[jobid]) update_job_to_db(ui_job) else: with g_delay_jobs_lock: if jobid not in g_delay_jobs: g_delay_jobs[jobid] = { 'info': job[jobid]['info'], 'status': job[jobid]['status'], 'time': time.time() } else: g_delay_jobs[jobid]['info'] = job[jobid]['info'] g_delay_jobs[jobid]['status'] = job[jobid]['status'] except Queue.Empty: continue
def _submit_filejob(job): logger.debug("_submit_filejob entry") bcreate = True if job is None or not hasattr(job, 'jobfilename'): return False if job is None or not hasattr(job, 'workspace'): return False abs_job_file_path = JobUtil.get_real_path(job.jobfilename, job.workspace) abs_working_dir = JobUtil.get_real_path(job.workingdir, job.workspace) submiter = job.submiter jobname = job.jobname jobid = JobManager().submit_job(abs_working_dir, abs_job_file_path, submiter, jobname) if jobid: job.jobid = jobid with g_jobop_lock: g_ui_jobs[jobid] = "Q" else: bcreate = False submit_ret = True if not bcreate: logger.error("_submit_filejob: can not get the jobid, " "try to look for by name and submit_args") temp_jobid = JobMqOperations._look_for_jobid_by_name_and_args( jobname, abs_job_file_path) logger.info("Found jobid %s from name and args" % temp_jobid) if temp_jobid == '': raise Exception("_submit_filejob submit job failed " "without jobid returned") jobid = temp_jobid job.jobid = jobid submit_ret = False jobinfo = JobManager().get_job(jobid) if not jobinfo: logger.error("_submit_filejob: get job info " "failed when the created job") raise Exception("_submit_filejob get job info failed!") if hasattr(jobinfo, 'jobname') and jobinfo.jobname != "": job.jobname = jobinfo.jobname if hasattr(jobinfo, 'queue') and jobinfo.queue != "": job.queue = jobinfo.queue return submit_ret
def post(self, request): username = request.user.username action = request.data.get('action') node_list = request.data.get('node_list', '') state = JobManager().scheduler.update_node_state( node_list, action, username) if not state: raise NodeStateException else: return Response(status=HTTP_200_OK)
def get(self, request): if 'node_list' in request.GET: node_list = request.GET['node_list'] else: return Response([], status=HTTP_200_OK) data = JobManager().scheduler.get_node_detail( node_list) if data is None: raise NodeNotExistException else: return Response(data, status=HTTP_200_OK)
def get(self, request): if not settings.SCHEDULER_QUEUE_AUTO_GET: queues = settings.SCHEDULER_QUEUES else: usergroup = request.user.group.gr_name is_admin = request.user.is_admin queues = JobManager().get_allqueues(is_admin, usergroup) return Response( data=[ dict({ 'id': index }, **queue) for index, queue in enumerate(queues) ] )
def _create_job_op(args): logger.debug("_create_job_op entry") type = args["type"] submit_ret = True try: if type == "file": logger.debug("_create_job_op file") job = JobMqOperations._parse_args_for_filejob(args) submit_ret = JobMqOperations._submit_filejob(job) else: logger.debug("_create_job_op %r" % type) job = JobMqOperations._parse_args_for_commonjob(args) JobManager().generate_commonjobfile(job) submit_ret = JobMqOperations._submit_filejob(job) except Exception: logger.error("_create_job_op failed with " "exception" + traceback.format_exc()) return False, None return submit_ret, job
def post(self, request): data = JobManager().scheduler.create_queues_info(request.data) if data is None: raise CreateQueueException return Response(status=HTTP_200_OK)
def get(self, request): data = JobManager().scheduler.get_queues_info() if data is None: raise QueueInfoException return Response(data, status=HTTP_200_OK)
def put(self, request, queue_name): action = request.data['action'] data = JobManager().scheduler.update_queues_state(queue_name, action) if data is None: raise QueueStateException return Response(status=HTTP_200_OK)
def delete(self, request, queue_name): data = JobManager().scheduler.delete_queues_detail(queue_name) if data is None: raise DeleteQueueException return Response(status=HTTP_200_OK)
def put(self, request, queue_name): data = JobManager().scheduler.update_queues_detail( queue_name, request.data) if data is None: raise QueueInfoException return Response(status=HTTP_200_OK)
def schedule_summaries(): name = settings.DOMAIN point = {"cluster_scheduler_workable": JobManager().is_scheduler_working()} schedule_summary(name=name, point=point)
def _cancel_job_op(args): logger.debug("canceljob entry") jobid = args["jobid"] user = args["user"] ret = JobManager().cancel_job(jobid, user) return ret