def delete_job_from_db(self, job, results): """ delete the job """ status = "deleted" now = datetime.now().strftime('%Y-%m-%d %H:%M:%S') report = self.check_reports() if report is not None: status = 'completed' results = report try: dbcon = DBConnect() # free testbed tbstatus_cmd = """ update testbeds set status='free' where testbed='{0}'; """.format( job.testbed) dbcon.execute(tbstatus_cmd) # remove job from submitted job delete_cmd = """ delete from submitted_jobs where submit_id={0} """.format(job.submit_id) dbcon.execute(delete_cmd) # insert into accepted queue insert_sql_str = """ INSERT INTO completed_jobs (submit_id, submit_date, submitter, build, testsuite, asic, testbed, priority, eta, status, status_date, start_time, end_time, results, logs, flags, final_image_path, final_issu_image_path, scheduled_testsuite) VALUES ( {submit_id}, '{submit_date}', '{submitter}', '{build}', '{testsuite}', '{asic}', '{testbed}', {priority}, {eta}, '{status}', '{status_date}', '{start_time}', '{end_time}', '{results}', '{logs}', '{flags}', '{final_image_path}', '{final_issu_image_path}', '{scheduled_testsuite}'); """ insert_sql_str = insert_sql_str.format( submit_id=self.job.submit_id, submit_date=self.job.submit_date, submitter=self.job.submitter, build=self.job.build, testsuite=self.job.testsuite, asic=self.job.asic, testbed=self.job.testbed, priority=self.job.priority, eta=self.job.eta, status=status, status_date=now, start_time=self.job.start_time, end_time=now, results=results, logs=self.job_log_dir, flags=self.job.flags, final_image_path=self.job.final_image_path, final_issu_image_path=self.job.final_issu_image_path, scheduled_testsuite=self.job.scheduled_testsuite) dbcon.execute(insert_sql_str) except Exception as e: print("HealthMonitor failed:" + repr(e)) sys.exit(-1) finally: dbcon.close_connection()
def __init__(self): """ get all pending jobs as named tuple """ try: self.dbcon = DBConnect() get_jobs_cmd = "select * from submitted_jobs where status='pending' order by priority, submit_date;" self.pending_jobs = self.dbcon.fetch(get_jobs_cmd, type="_tuple") free_testbed_cmd = """select testbed, asic from testbeds where status='free' and lock_status='unlocked' """ free_tbs = self.dbcon.fetch(free_testbed_cmd, type="_tuple") log.info("Free testbeds: {0}".format(repr(free_tbs))) except Exception as e: log.error("Scheduler failed:" + repr(e)) sys.exit(-1)
def abort_submission(self, reason=None): ''' abort a submission ''' if reason is None: reason = "Unexpected error occurred in submit job stage" try: dbcon = DBConnect() abort_cmd = """ delete from submitted_jobs where submit_id={0} """.format( self.params["submit_id"]) dbcon.execute(abort_cmd) # insert into accepted queue insert_sql_str = """ INSERT INTO completed_jobs (submit_id, submit_date, submitter, build, testsuite, asic, testbed, priority, status, status_date, results, flags) VALUES ( {submit_id}, '{submit_date}', '{submitter}', '{build}', '{testsuite}', '{asic}', '{testbed}', {priority}, '{status}', '{status_date}', '{results}', '{flags}'); """ insert_sql_str = insert_sql_str.format( submit_id=self.params["submit_id"], submit_date=self.params["now"], submitter=self.params["submitter"], build=self.params["build"], testsuite=self.params["testsuite"], asic=self.params["asic"], testbed=self.params["testbed"], priority=self.params["priority"], status='aborted', status_date=self.params["now"], results=reason, flags=self.params["flags"]) dbcon.execute(insert_sql_str) except Exception as e: log.error("error occured in submit job:abort_submission" + repr(e)) sys.exit(-1) finally: dbcon.close_connection()
def accept_job(self): """ accept the job and add it to appropriate table. """ try: dbcon = DBConnect() # get eta from testsuites table suite_data_cmd = "select eta from testsuites where testsuite='{0}';".format( self.params["testsuite"]) suite_data = dbcon.fetch(suite_data_cmd) self.params["eta"] = suite_data[0][0] update_eta = """ update submitted_jobs set eta='{eta}' where submit_id='{submit_id}' """ update_eta = update_eta.format(eta=self.params["eta"], submit_id=self.params["submit_id"]) dbcon.execute(update_eta) # Mark job as completed if download_only flag set if self.params["download_only"] == constants.DOWNLOAD_ONLY: results = "download_only flag is set. Files were downloaded and job was marked complete" # insert into accepted queue insert_sql_str = """ INSERT INTO completed_jobs (submit_id, submit_date, submitter, build, testsuite, asic, testbed, priority, status, status_date, results, flags) VALUES ( {submit_id}, '{submit_date}', '{submitter}', '{build}', '{testsuite}', '{asic}', '{testbed}', {priority}, '{status}', '{status_date}', '{results}', '{flags}'); """ insert_sql_str = insert_sql_str.format( submit_id=self.params["submit_id"], submit_date=self.params["now"], submitter=self.params["submitter"], build=self.params["build"], testsuite=self.params["testsuite"], asic=self.params["asic"], testbed=self.params["testbed"], priority=self.params["priority"], status='completed', status_date=self.params["now"], results=results, flags=self.params["flags"]) dbcon.execute(insert_sql_str) log.info("Job submitted successfully, submit_id is {0}".format( self.params["submit_id"])) except Exception as e: log.error("error occured in submit job: accept_validated_job" + repr(e)) self.abort_submission() finally: dbcon.close_connection()
def __init__(self, submit_id=None, email=None): """ get all running jobs as named tuple """ if submit_id is None: log.error("Submit id is None") sys.exit(-1) try: dbcon = DBConnect() job_cmd = """ select * from submitted_jobs where submit_id={0}""".format( submit_id) complete_job_data = dbcon.fetch(job_cmd, type="_tuple") self.job = complete_job_data[0] log.info("Monitoring the job: " + repr(self.job)) testbed_cmd = """select * from testbeds where testbed='{0}'""".format( self.job.testbed) testbed_cmd_data = dbcon.fetch(testbed_cmd, type="_tuple") self.testbed = testbed_cmd_data[0] self.job_log_dir = """/vol/eor-qa/sanity/logs/{0}/{1}""".format( self.job.submitter, self.job.submit_id) self.email = email if email is None: self.email = self.job.submitter except Exception as e: log.error("Monitor job failed:" + repr(e)) sys.exit(-1) finally: dbcon.close_connection()
def record_job_submission(self): """ Record the job submission as a row in submitted_jobs """ try: self.params["now"] = datetime.now().strftime('%Y-%m-%d %H:%M:%S') dbcon = DBConnect() self.params["priority"] = 6 if self.params["submitter"] == "snoopy": self.params["priority"] = 1 get_flag_details = " select * from flag_details;" flag_details = dbcon.fetch(get_flag_details, type="_tuple") submitted_flags = "" for flag_detail in flag_details: flag_id = flag_detail.flag_id flag_value = self.params[flag_detail.flag_name] submitted_flags = "{0}={1},{2}".format(flag_id, flag_value, submitted_flags) self.params["flags"] = submitted_flags # insert into accepted queue insert_sql_str = """ INSERT INTO submitted_jobs (submit_date, submitter, build, testsuite, asic, testbed, priority, status_date, flags, send_updates) VALUES ( '{submit_date}', '{submitter}', '{build}', '{testsuite}', '{asic}', '{testbed}', {priority}, '{status_date}', '{flags}', {send_updates}); """ insert_sql_str = insert_sql_str.format( submit_date=self.params["now"], submitter=self.params["submitter"], build=self.params["build"], testsuite=self.params["testsuite"], asic=self.params["asic"], testbed=self.params["testbed"], priority=self.params["priority"], status_date=self.params["now"], flags=self.params["flags"], send_updates=self.params["send_updates"]) dbcon.execute(insert_sql_str) # get the submit id from last insert submit_id = dbcon.fetch("SELECT LAST_INSERT_ID();")[0][0] self.params["submit_id"] = submit_id log.debug( "Job was recorded into submitted_jobs table with submit_id:{0}" .format(submit_id)) except Exception as e: log.error("error occured in submit job:record_job_submission" + repr(e)) self.abort_submission() finally: dbcon.close_connection()
def update_job_details(self, column, value): """ for updating the submitted job table values """ try: dbcon = DBConnect() update_flags = """update submitted_jobs set {column}='{value}' where submit_id={submit_id} """ update_flags = update_flags.format(column=column, value=value, submit_id=self.job["submit_id"]) dbcon.execute(update_flags) except Exception as e: log.error("update_job_details failed:" + repr(e)) sys.exit(-1) finally: dbcon.close_connection()
def validate(self): """ Validate all params """ try: dbcon = DBConnect() # submitter if self.args["submitter"] is None: return False, "Submitter is None" # testsuite if self.args["testsuite"] is None: return False, "Testsuite is None" else: # check if testsuite name is valid testsuite_cmd = "select exists (select * from testsuites where testsuite='{0}');".format( self.args["testsuite"]) suite_exists = dbcon.fetch(testsuite_cmd)[0][0] if suite_exists == 0: return False, "Testsuite: {0} does not exist".format( self.args["testsuite"]) # asic if self.args["asic"] != "": # check if asic name is valid asic_exists_cmd = " select exists (select * from asics where asic='{0}'); ".format( self.args["asic"]) asic_exists = dbcon.fetch(asic_exists_cmd)[0][0] if asic_exists == 0: return False, "Asic: {0} does not exist".format( self.args["asic"]) # check if asic is unsupported for testsuite asic_testbed_cmd = "select unsupported_asics from testsuites" \ " where testsuite='{0}';".format(self.args["testsuite"]) asics = dbcon.fetch(asic_testbed_cmd)[0][0] if asics is not None: for asic in asics.split(","): if asic == self.args["asic"]: reason = "The asic:{asic} doesn't support the testsuite:{testsuite}" reason = reason.format( asic=self.args["asic"], testsuite=self.args["testsuite"]) return False, reason # build if self.args["build"] == "": if self.args["download_image"] == "": # download image value can only be validated at Sanity Server. return False, "Values for build, and download_image are all None" else: if not os.path.exists(self.args["build"]): return False, "Build file/folder does not exist" # if build is a dir elif os.path.isdir(self.args["build"]): # Case 1: when build set to /auto/ins-bld-tools/.../REL.x.x.x.xxx build_path = "/{0}/build/images/final".format( self.args["build"].strip("/")) path_res = glob.glob("{0}/nxos.*.bin".format(build_path)) if len(path_res) > 1: return False, "more than 1 nxos.*.bin file exists in {0}".format( build_path) elif len(path_res) < 1: # Case 2: when build set to /.../build/images/final build_path = "/{0}".format( self.args["build"].strip("/")) path_res = glob.glob( "{0}/nxos.*.bin".format(build_path)) if len(path_res) < 1: return False, "nxos.*.bin file doesn't exists in {0}".format( build_path) elif len(path_res) > 1: return False, "more than 1 nxos.*.bin file exists in {0}".format( build_path) # if build is a file else: if not self.args["build"].endswith('.bin'): return False, "build is not a .bin file" # check ISSU params if self.args["issu_build_srcdir"] != "": if not os.path.isdir(self.args["issu_build_srcdir"]): return False, "issu_build_srcdir is not a directory" # testbed if self.args["testbed"] != "": testbed_cmd = "select exists (select * from tb_status where testbed = '{0}');".format( self.args["testbed"]) exists = dbcon.fetch(testbed_cmd)[0][0] if exists == 0: return False, "Testbed '{}' does not exist".format( self.args["testbed"]) return True, "Params valid" except Exception as e: log.error("Validator failed:" + repr(e)) sys.exit("Exception caught in Validator") finally: dbcon.close_connection()
class Scheduler: pending_jobs = None dbcon = None def __init__(self): """ get all pending jobs as named tuple """ try: self.dbcon = DBConnect() get_jobs_cmd = "select * from submitted_jobs where status='pending' order by priority, submit_date;" self.pending_jobs = self.dbcon.fetch(get_jobs_cmd, type="_tuple") free_testbed_cmd = """select testbed, asic from testbeds where status='free' and lock_status='unlocked' """ free_tbs = self.dbcon.fetch(free_testbed_cmd, type="_tuple") log.info("Free testbeds: {0}".format(repr(free_tbs))) except Exception as e: log.error("Scheduler failed:" + repr(e)) sys.exit(-1) def __del__(self): log.info("Scheduler exiting!") if self.dbcon is not None: self.dbcon.close_connection() def abort_job(self, job=None, msg=None): # TODO log.error("abort job " + repr(job.submit_id) + " reason=" + repr(msg)) def get_an_available_testbed(self, job=None): """get all free testbeds of asic type """ if job is None: log.error("get_available_testbeds: job can't be None") sys.exit(-1) asic = job.asic try: if asic == "": # filter out using unsupported asics asic_cmd = "select unsupported_asics from testsuites" \ " where testsuite='{0}';".format(job.testsuite) asic_data = self.dbcon.fetch(asic_cmd, type="_tuple") asic_data = asic_data[0].unsupported_asics.split(",") asic_list = ', '.join("'{0}'".format(w) for w in asic_data) free_testbed_cmd = """select * from testbeds where asic not in ({0}) and status='free' and lock_status='unlocked' limit 1 """.format(asic_list) else: # get a free testbed of asic type free_testbed_cmd = """select * from testbeds where asic='{0}' and status='free' and lock_status='unlocked' limit 1 """.format( asic) testbeds = self.dbcon.fetch(free_testbed_cmd, type="_tuple") if len(testbeds) == 0: # no free testbeds available return None else: log.info("Available testbeds are: " + repr(testbeds)) return testbeds[0] except Exception as e: log.error("Scheduler failed in get_an_available_testbed:" + repr(e)) return None def get_testbed_if_free(self, testbed=None): """ check is testbed is free """ if testbed is None: log.error("is_testbed_free: testbed can't be None") sys.exit(-1) log.info("Checking to see if testbed {0} is free.".format(testbed)) try: testbed_cmd = "select * from testbeds where testbed='{0}'".format( testbed) testbed = self.dbcon.fetch(testbed_cmd, type="_tuple") status = testbed[0].status lock_status = testbed[0].lock_status if status != "free" or lock_status != "unlocked": return None return testbed[0] except Exception as e: log.error("Scheduler failed in get_testbed_if_free:" + repr(e)) sys.exit(-1) def schedule_job_on(self, job=None, testbed=None): if job is None or testbed is None: log.error( "Unable to schedule, invalid params. job/testbed is None") sys.exit(-1) log.info("scheduling job {0} on {1}".format(job.submit_id, testbed.testbed)) now = datetime.now().strftime('%Y-%m-%d %H:%M:%S') try: # update tb_status tbstatus_cmd = """ update testbeds set status='busy',current_job={0} where testbed='{1}' """.format(job.submit_id, testbed.testbed) self.dbcon.execute(tbstatus_cmd) # update status in accepted jobs update_job_cmd = """ update submitted_jobs set testbed='{0}', status='{1}', status_date='{2}', start_time='{2}', asic='{3}' where submit_id={4} """.format(testbed.testbed, "running", now, testbed.asic, job.submit_id) self.dbcon.execute(update_job_cmd) result, msg = self.start_job(job) if not result: self.abort_job(job, msg) else: log.info("job {0} was sent to sanity server".format( job.submit_id)) except Exception as e: log.error("Scheduler failed in schedule_job_on:" + repr(e)) sys.exit(-123) def start_job(self, job=None): """ start invoke_job stage using job id """ if job is None: log.error("Unable to start job, invalid params") sys.exit(-123) return jenkins_start_invoke_job(job.submit_id) def schedule_jobs(self): """schedule based on priority and fcfs policy""" for job in self.pending_jobs: log.info("Trying to schedule job: {0}".format(job.submit_id)) testbed = job.testbed if testbed != "": free_tb = self.get_testbed_if_free(testbed=testbed) if free_tb is not None: self.schedule_job_on(job=job, testbed=free_tb) else: free_tb = self.get_an_available_testbed(job=job) if free_tb is not None: self.schedule_job_on(job=job, testbed=free_tb)