def init_sar_iostat_top(): """ Agent process invoke this method on startup. This will spawn 4 threads for system metrics collection. Below are thread details: 1. top_gather - For TOP output collection 2. iostat_gather - For iostat output collection 3. sar_gather - For SAR data collection 4. docker_stat_gather - For docker stat of all active containers """ global sys_logger logger_file = os.getcwd() + "/system_metrics_gather_debug.out" sys_logger = loggersetup(logger_file) sys_logger.debug("Starting system metrics gather threads") sys_logger.debug("Starting top gather") t1 = common.FuncThread(top_gather, True) t1.start() sys_logger.debug("Starting iostat gather") t2 = common.FuncThread(iostat_gather, True) t2.start() sys_logger.debug("Starting SAR gather") t3 = common.FuncThread(sar_gather, True) t3.start() sys_logger.debug("Starting docker stat gather") t4 = common.FuncThread(docker_stat_gather, True) t4.start()
def perf_strace_gather(testid, perf_config=None, strace_config=None): """ Agent invoke this procedure on test startup for configuring profiler information provided in test details """ sys_logger.debug("Starting Profilers setup for test ID : " + str(testid)) sys_logger.debug("Perf configuration details") if "process" in perf_config: sys_logger.debug("Delay - " + perf_config['delay'] + " Duration - " + perf_config['duration'] + " Process - " + perf_config['process']) else: sys_logger.debug("Delay - " + perf_config['delay'] + " Duration - " + perf_config['duration']) t1 = common.FuncThread(perf_gather, True, testid, perf_config) t1.start() if strace_config is not None: sys_logger.debug("Strace configuration details") sys_logger.debug("Delay - " + strace_config['delay'] + " Duration - " + strace_config['duration'] + " Process - " + strace_config['process']) t2 = common.FuncThread(strace_gather, True, testid, strace_config) t2.start() else: sys_logger.debug("Strace not configured ")
def __init__(self, db, cfg, lctx): self.dbinstance = db self.cfg = cfg self.testmap = db.tests_to_run self.cl = client.TCPClient(LOG.getLogger("clientlog", "DH")) self.ev = envelope.DaytonaEnvelope() self.HOST = common.get_local_ip() self.PORT = cfg.DHPORT self.CPORT = cfg.CPORT self.scheduler_thread = common.FuncThread(self.dispatch, True) self.testmon_thread = common.FuncThread(self.testmon, True) self.lctx = lctx
def docker_stat_gather(self): # Checking docker version try: p1 = subprocess.Popen(docker_version, stdout=subprocess.PIPE, stderr=subprocess.PIPE) version = p1.communicate()[0].strip() version = re.findall("\d+\.\d+", version)[0] version = float(version) if version < 10.0: # Docker version less than 10 is not supported sys_logger.error("Docker version less than 10, not supported !! ") sys_logger.error("Aborting docker stat gather thread !! ") quit() except Exception: # Docker is not installed, abort this thread sys_logger.error("Docker not installed !! ") sys_logger.error("Aborting docker stat gather thread !! ") quit() # Starting docker stats # Spawning different thread for collecting docker stat as it takes some time to collect the stats while True: thread = common.FuncThread(collect_docker_stats, True) thread.start() time.sleep(float(system_metrics_interval))
def __init__(self, db, cfg, lctx): """ Scheduler class constructor which initialize class variables and other threads """ self.dbinstance = db self.cfg = cfg self.testmap = db.tests_to_run self.cl = client.TCPClient(LOG.getLogger("clientlog", "DH")) self.ev = envelope.DaytonaEnvelope() self.HOST = common.get_local_ip() self.PORT = cfg.DHPORT self.CPORT = cfg.CPORT self.scheduler_thread = common.FuncThread(self.dispatch, True) self.testmon_thread = common.FuncThread(self.testmon, True) self.lctx = lctx
def init_sar_iostat_top(): global sys_logger logger_file = os.getcwd() + "/system_metrics_gather_debug.out" sys_logger = loggersetup(logger_file) sys_logger.debug("Starting system metrics gather threads") sys_logger.debug("Starting top gather") t1 = common.FuncThread(top_gather, True) t1.start() sys_logger.debug("Starting iostat gather") t2 = common.FuncThread(iostat_gather, True) t2.start() sys_logger.debug("Starting SAR gather") t3 = common.FuncThread(sar_gather, True) t3.start() sys_logger.debug("Starting docker stat gather") t4 = common.FuncThread(docker_stat_gather, True) t4.start()
def execute(self, command, paramcsv, actionID): #based on SYNCFLAG release from here #send actionID for currently being executed action based on this we can stream resp #keep exec details over time in a buffer with actionID mapped #send actionID NULL and hold return till exec is complete module = self.conf.actionMap[command.strip()].split(".")[0] function = self.conf.actionMap[command.strip()].split(".")[1] sync = self.conf.actionMap[command.strip()].split(".")[2] t2 = testobj.testDefn() try: param = int(paramcsv) action.action_lock.acquire() t2 = action.running_tests[param].tobj action.action_lock.release() except Exception as e: pass m = __import__(module) f = getattr(m, function) if sync == "T": #wait for func to complete and return the ret self.lctx.debug("Executing SYNC ACTION for " + command.strip() + " : " + self.conf.actionMap[command.strip()] + ":" + str(actionID)) ret = f(self, self, command, paramcsv, actionID, sync) self.lctx.debug("ACTION completed for " + command.strip() + " : " + self.conf.actionMap[command.strip()] + ":" + str(actionID)) if command == "DAYTONA_CLI": return "actionID=" + str( actionID) + "%" + ret + "%" + "SYNC EXEC" else: return "actionID=" + str( actionID) + "," + ret + "," + "SYNC EXEC" else: #callback will be called after completion #actionID = uuid.uuid4() self.lctx.debug("Executing ASYNC ACTION for " + command.strip() + " : " + self.conf.actionMap[command.strip()] + ":" + str(actionID)) t1 = common.FuncThread(f, True, self, command, paramcsv, actionID, sync) x = (t1, actionID, t2, time.time()) self.lock.acquire() self.async_actions.append(x) self.lctx.debug("async_actions size :" + str(len(self.async_actions))) self.lock.release() t1.start() self.lctx.debug("Executing ACTION for " + command.strip() + " : " + self.conf.actionMap[command.strip()] + ":" + str(actionID)) return "actionID=" + str( actionID) + "," + "SUCCESS," + "ASYNC EXEC"
def execute(self, command, paramcsv, actionID): #based on SYNCFLAG release from here #send actionID for currently being executed action based on this we can stream resp #keep exec details over time in a buffer with actionID mapped #send actionID NULL and hold return till exec is complete module = self.conf.actionMap[command.strip()].split(".")[0] function = self.conf.actionMap[command.strip()].split(".")[1] sync = self.conf.actionMap[command.strip()].split(".")[2] self.lctx.debug(command) self.lctx.debug(paramcsv) self.lctx.debug(actionID) t2 = testobj.testDefn() tst = "" if paramcsv != "": p = paramcsv.split(",") tst = p[0] if command == "DAYTONA_FILE_DOWNLOAD": tst = p[3] if tst != "": t2.deserialize(tst) m = __import__ (module) f = getattr(m,function) if sync == "T" : #wait for func to complete and return the ret self.lctx.debug("Executing SYNC ACTION for " + command.strip() + " : " + self.conf.actionMap[command.strip()] + ":" + str(actionID)) ret = f(self, self, command, paramcsv, actionID, sync) self.lctx.debug("ACTION completed for " + command.strip() + " : " + self.conf.actionMap[command.strip()] + ":" + str(actionID)) return "actionID=" + str(actionID) + "," + ret + "," + "SYNC EXEC" else : #callback will be called after completion #actionID = uuid.uuid4() self.lctx.debug("Executing ASYNC ACTION for " + command.strip() + " : " + self.conf.actionMap[command.strip()] + ":" + str(actionID)) t1 = common.FuncThread(f, True, self, command, paramcsv, actionID, sync) x = (t1, actionID, t2, time.time()) self.lock.acquire() self.async_actions.append(x) self.lctx.debug( "async_actions size :" + str(len(self.async_actions))) self.lock.release() t1.start() self.lctx.debug( "Executing ACTION for " + command.strip() + " : " + self.conf.actionMap[command.strip()] + ":" + str(actionID)) return "actionID=" + str(actionID) + "," + "SUCCESS," + "ASYNC EXEC"
def execute(self, command, paramcsv, actionID): # based on SYNCFLAG release from here # send actionID for currently being executed action based on this we can stream resp # keep exec details over time in a buffer with actionID mapped # send actionID NULL and hold return till exec is complete module = self.conf.actionMap[command.strip()].split(".")[0] function = self.conf.actionMap[command.strip()].split(".")[1] sync = self.conf.actionMap[command.strip()].split(".")[2] t2 = testobj.testDefn() if command == "DAYTONA_START_TEST": testid = int(paramcsv.split(",")[0]) hosttype = paramcsv.split(",")[1] current_test = action.get_test(testid) if current_test: t2 = current_test.tobj m = __import__ (module) f = getattr(m,function) if sync == "T": # wait for func to complete and return the ret self.lctx.debug("Executing SYNC ACTION for " + command.strip() + " : " + self.conf.actionMap[command.strip()] + ":" + str(actionID)) ret = f(self, self, command, paramcsv, actionID, sync) self.lctx.debug("ACTION completed for " + command.strip() + " : " + self.conf.actionMap[command.strip()] + ":" + str(actionID)) if command == "DAYTONA_CLI": return "actionID=" + str(actionID) + "%" + ret + "%" + "SYNC EXEC" else: return "actionID=" + str(actionID) + "," + ret + "," + "SYNC EXEC" else: self.lctx.debug("Executing ASYNC ACTION for " + command.strip() + " : " + self.conf.actionMap[command.strip()] + ":" + str(actionID)) t1 = common.FuncThread(f, True, self, command, paramcsv, actionID, sync) if hosttype == "EXEC": x = (t1, actionID, t2, time.time()) self.lock.acquire() self.async_actions.append(x) self.lctx.debug( "async_actions size :" + str(len(self.async_actions))) self.lock.release() t1.start() self.lctx.debug( "Executing ACTION for " + command.strip() + " : " + self.conf.actionMap[command.strip()] + ":" + str(actionID)) return "actionID=" + str(actionID) + "," + "SUCCESS," + "ASYNC EXEC"
def testmon(self, *mon): process_results_threads = defaultdict() while True: d = "TSMON [R] : |" remove = False error = False for k in self.running_tests: if (self.running_tests[k] != None): t = self.running_tests[k] serialize_str = t.serialize() t2 = testobj.testDefn() t2.deserialize(serialize_str) if t.testobj.TestInputData.testid != t2.testobj.TestInputData.testid: lctx.error("testobj not same") t.updateStatus("running", "failed") remove = True break # out of for loop try: ret = self.cl.send( t.testobj.TestInputData.exechostname, self.CPORT, self.ev.construct( "DAYTONA_GET_STATUS", str(t2.testobj.TestInputData.testid))) status = ret.split(",")[1] lctx.debug(status) except Exception as e: lctx.debug(e) t.updateStatus("running", "failed") error = True break # out of for loop if status in [ "RUNNING", "INIT", "SETUP", "MONITOR_ON", "MONITOR_OFF" ]: found = checkTestRunning( t.testobj.TestInputData.testid) if not found: error = True break d = d + str(self.running_tests[k].testobj. TestInputData.testid) + "|" elif status in ["TESTEND", "TIMEOUT"]: d = d + "*" + str(self.running_tests[k].testobj. TestInputData.testid) + "*|" if t.testobj.TestInputData.end_status == "running": lctx.debug(t.testobj.TestInputData.end_status) if t.testobj.TestInputData.end_status == "running": if status == "TIMEOUT": t.testobj.TestInputData.timeout_flag = True t.updateStatus("running", "timeout") else: t.updateStatus("running", "completed") pt = common.FuncThread( self.process_results, True, t, t.testobj.TestInputData.end_status) process_results_threads[ t.testobj.TestInputData.testid] = (pt, t) pt.start() elif t.testobj.TestInputData.end_status == "collating" or t.testobj.TestInputData.end_status == "completed" or t.testobj.TestInputData.end_status == "finished clean": d = d + "*" + str(self.running_tests[k].testobj. TestInputData.testid) + "*|" else: remove = True t.updateStatus("running", "failed") lctx.error("ERROR : Unknown test status for : " + str(t.testobj.TestInputData.testid) + ":" + str(status)) break # out of for loop elif status.strip() == "FINISHED": d = "TSMON [F] : |*" + str( self.running_tests[k].testobj.TestInputData.testid ) + "*|" remove = True break elif status.strip() in ["FAILED", "ABORT", "TESTNA"]: if status.strip() == "FAILED": error = True elif status.strip() in ["ABORT", "TESTNA"]: remove = True t.updateStatus("", "failed") lctx.error("TEST " + status.strip() + " : Cleaning test from running queue") break # out of for loop else: remove = True t.updateStatus("running", "failed") lctx.error("ERROR : Unknown test status for : " + str(t.testobj.TestInputData.testid) + ":" + str(status)) break # out of for loop lctx.info(d) d = "" if error: retsend = None ip = t.testobj.TestInputData.exechostname try: retsend = self.cl.send( ip, self.CPORT, self.ev.construct("DAYTONA_HEARTBEAT", "")) except: pass if retsend and retsend.split(",")[1] == "ALIVE": retsend = self.cl.send( ip, self.CPORT, self.ev.construct("DAYTONA_STOP_MONITOR", str(t.testobj.TestInputData.testid))) retsend = self.cl.send( ip, self.CPORT, self.ev.construct("DAYTONA_ABORT_TEST", str(t.testobj.TestInputData.testid))) for s in t.testobj.TestInputData.stathostname.split(','): if len(s.strip()) == 0: break try: retsend = self.cl.send( s.strip(), self.CPORT, self.ev.construct("DAYTONA_HEARTBEAT", "")) except: pass if retsend and retsend.split(",")[1] == "ALIVE": retsend = self.cl.send( s.strip(), self.CPORT, self.ev.construct( "DAYTONA_STOP_MONITOR", str(t.testobj.TestInputData.testid))) retsend = self.cl.send( s.strip(), self.CPORT, self.ev.construct( "DAYTONA_ABORT_TEST", str(t.testobj.TestInputData.testid))) self.lock.acquire() for k in self.running_tests: if self.running_tests[ k].testobj.TestInputData.testid == t.testobj.TestInputData.testid: lctx.debug("removing entry for this test") rt = self.running_tests.pop(k) break if k in self.running_tests: del self.running_tests[k] self.lock.release() if remove: self.lock.acquire() for k in self.running_tests: if self.running_tests[ k].testobj.TestInputData.testid == t.testobj.TestInputData.testid: lctx.debug("removing entry for this test") rt = self.running_tests.pop(k) break if k in self.running_tests: del self.running_tests[k] self.lock.release() time.sleep(2)
def dispatch(self, *args): dispatch_threads = defaultdict() while True: for k in self.testmap: found = False try: if (self.running_tests[k]): found = True except KeyError: lctx.debug("Found spot for test") if found == True: continue try: tmp_t = self.testmap[k][0] except Exception as e: lctx.debug("No test object found in map") continue if tmp_t == None: continue alive = False h = tmp_t.testobj.TestInputData.exechostname try: ret = self.cl.send( h, self.CPORT, self.ev.construct("DAYTONA_HEARTBEAT", "")) status = "" st = ret.split(",") if len(st) > 2: status = st[1] else: raise Exception( "Remove host not avaliable - No Heartbeat ", tmp_t.testobj.TestInputData.testid) if "ALIVE" == status: ret = self.cl.send( h, self.CPORT, self.ev.construct( "DAYTONA_HANDSHAKE", self.HOST + "," + str(self.PORT) + "," + str(tmp_t.testobj.TestInputData.testid) + "," + h)) if ret == "SUCCESS": alive = True lctx.debug( "Handshake successful in scheduler, adding ip/hostname to reg hosts" ) server.serv.registered_hosts[h] = h addr = socket.gethostbyname(h) lctx.debug(addr) server.serv.registered_hosts[addr] = addr else: raise Exception("Unable to handshake with agent:" + h) except Exception as e: lctx.error(e) alive = False found = False # pause the dbmon here as we dont want the same test to be picked again after we pop self.dbinstance.mon_thread[0].pause() self.dbinstance.lock.acquire() t = self.testmap[k].pop(0) t.updateStatus("waiting", "failed") self.dbinstance.lock.release() lctx.debug("Removed test from map : " + str(t.testobj.TestInputData.testid)) self.dbinstance.mon_thread[0].resume() continue # todo : add host to reg list if handshake successful if alive == True and found == False: # for each framework pick one and move it to running, iff running has an empty slot. lctx.debug("-------Found empty slot in running Q-------") # pause the dbmon here as we dont want the same test to be picked again after we pop self.dbinstance.mon_thread[0].pause() self.dbinstance.lock.acquire() t = self.testmap[k].pop(0) self.dbinstance.lock.release() lctx.info("< %s" % (t.testobj.TestInputData.testid)) self.lock.acquire() self.running_tests[k] = t self.lock.release() t.updateStatus("waiting", "setup") self.dbinstance.mon_thread[0].resume() try: trigger_thread = common.FuncThread( self.trigger, True, t) dispatch_threads[t.testobj.TestInputData.testid] = ( trigger_thread, t) trigger_thread.start() except Exception as e: lctx.error("Trigger error : " + str(t.testobj.TestInputData.testid)) # todo : remove testid from running tests lctx.debug(e) try: d = "DISPATCH [S/R] : " for k in self.running_tests: d = d + " |" + str( self.running_tests[k].testobj.TestInputData.testid) except: lctx.error("ERROR : Dispatch Q empty") lctx.debug(d) d = "" time.sleep(2)
def testmon(self, *mon): process_results_threads = defaultdict() while True: d = "TSMON [R] : |" remove = False for k in self.running_tests: if (self.running_tests[k] != None): t = self.running_tests[k] serialize_str = t.serialize() t2 = testobj.testDefn() t2.deserialize(serialize_str) if (t.testobj.TestInputData.testid != t2.testobj.TestInputData.testid): lctx.error("testobj not same") t.updateStatus("running", "failed") remove = True break #out of for loop try: ret = self.cl.send( t.testobj.TestInputData.exechostname, self.CPORT, self.ev.construct("DAYTONA_GET_STATUS", serialize_str)) status = ret.split(",")[1] lctx.debug(status) except Exception as e: lctx.debug(e) t.updateStatus("running", "failed") remove = True break #out of for loop if "TESTRUNNING" == status or "TESTSETUP" == status: d = d + str(self.running_tests[k].testobj. TestInputData.testid) + "|" elif "TESTEND" == status.strip(): d = d + "*" + str(self.running_tests[k].testobj. TestInputData.testid) + "*|" if t.testobj.TestInputData.end_status == "running": lctx.debug(t.testobj.TestInputData.end_status) if t.testobj.TestInputData.end_status == "running": lctx.debug( "Updating status to completed in DB") t.updateStatus("running", "completed") pt = common.FuncThread( self.process_results, True, t, t.testobj.TestInputData.end_status) process_results_threads[ t.testobj.TestInputData.testid] = (pt, t) pt.start() elif t.testobj.TestInputData.end_status == "collating" or t.testobj.TestInputData.end_status == "completed" or t.testobj.TestInputData.end_status == "finished clean": d = d + "*" + str(self.running_tests[k].testobj. TestInputData.testid) + "*|" else: remove = True t.updateStatus("running", "failed") lctx.error("ERROR : Unknown test status for : " + str(t.testobj.TestInputData.testid) + ":" + str(status)) break #out of for loop elif "TESTFINISHED" == status.strip(): d = "TSMON [F] : |*" + str( self.running_tests[k].testobj.TestInputData.testid ) + "*|" remove = True break else: remove = True t.updateStatus("running", "failed") lctx.error("ERROR : Unknown test status for : " + str(t.testobj.TestInputData.testid) + ":" + str(status)) break #out of for loop lctx.info(d) d = "" if (remove == True): self.lock.acquire() for k in self.running_tests: if self.running_tests[ k].testobj.TestInputData.testid == t.testobj.TestInputData.testid: lctx.debug("removing entry for this test") rt = self.running_tests.pop(k) break if k in self.running_tests: del self.running_tests[k] self.lock.release() time.sleep(2)
def startMon(self): mthread = None mthread = common.FuncThread(self.mon, True) self.mon_thread.append(mthread) mthread.start()
def execute(self, command, paramcsv, actionID): """ This function maps daytona command with actual procedure which need to executed upon receiving a particular message. This mapping is saved in action.map file and procedures are implemented in action.py Upon mapping with actual procedure this routine spawns a new thread for executing that procedure seperately Below are some other action performed in this procedure : # based on SYNCFLAG release from here # send actionID for currently being executed action based on this we can stream resp # keep exec details over time in a buffer with actionID mapped # send actionID NULL and hold return till exec is complete """ module = self.conf.actionMap[command.strip()].split(".")[0] function = self.conf.actionMap[command.strip()].split(".")[1] sync = self.conf.actionMap[command.strip()].split(".")[2] t2 = testobj.testDefn() hosttype = None if command == "DAYTONA_START_TEST": testid = int(paramcsv.split(",")[0]) hosttype = paramcsv.split(",")[1] current_test = action.get_test(testid) if current_test: t2 = current_test.tobj m = __import__(module) f = getattr(m, function) if sync == "T": # wait for func to complete and return the ret self.lctx.debug("Executing SYNC ACTION for " + command.strip() + " : " + self.conf.actionMap[command.strip()] + ":" + str(actionID)) ret = f(self, self, command, paramcsv, actionID, sync) self.lctx.debug("ACTION completed for " + command.strip() + " : " + self.conf.actionMap[command.strip()] + ":" + str(actionID)) if command == "DAYTONA_CLI": return "actionID=" + str( actionID) + "%" + ret + "%" + "SYNC EXEC" else: return "actionID=" + str( actionID) + "," + ret + "," + "SYNC EXEC" else: self.lctx.debug("Executing ASYNC ACTION for " + command.strip() + " : " + self.conf.actionMap[command.strip()] + ":" + str(actionID)) t1 = common.FuncThread(f, True, self, command, paramcsv, actionID, sync) if hosttype == "EXEC": x = (t1, actionID, t2, time.time()) self.lock.acquire() self.async_actions.append(x) self.lctx.debug("async_actions size :" + str(len(self.async_actions))) self.lock.release() t1.start() self.lctx.debug("Executing ACTION for " + command.strip() + " : " + self.conf.actionMap[command.strip()] + ":" + str(actionID)) return "actionID=" + str( actionID) + "," + "SUCCESS," + "ASYNC EXEC"
def testmon(self, *mon): """ Testmon continuously monitors all the running test. It keeps on checking test status on a exec host where execution script is running. If anything goes wrong with the test execution, this thread trigger termination actions for this test. It also trigger graceful test termination and logs collection when test finishes on exec host """ process_results_threads = defaultdict() while True: d = "TSMON [R] : |" remove = False error = False # Continuously iterate over running test list for checking test status for k in self.running_tests: if self.running_tests[k] is not None: t = self.running_tests[k] serialize_str = t.serialize() t2 = testobj.testDefn() t2.deserialize(serialize_str) # Initiating test logger for capturing test life cycle on scheduler, all logs are logged in # file <testid>.log test_logger = LOG.gettestlogger(t2, "EXEC") if t.testobj.TestInputData.testid != t2.testobj.TestInputData.testid: lctx.error("testobj not same") t.updateStatus("running", "failed") remove = True break # out of for loop try: # Send DAYTONA_GET_STATUS message on exec host mentioned in test for checking test status ret = self.cl.send( t.testobj.TestInputData.exechostname, self.CPORT, self.ev.construct( "DAYTONA_GET_STATUS", str(t2.testobj.TestInputData.testid))) status = ret.split(",")[1] lctx.debug(status) test_logger.info("Test status : " + status) except Exception as e: lctx.debug(e) t.updateStatus("running", "failed") error = True break # out of for loop if status == "RUNNING": # If the test is in running state, then we need to verify that user hasn't terminated this # test from UI. If user has terminated then testmon will stop test execution on all exec host # and stat host found = checkTestRunning( t.testobj.TestInputData.testid) if not found: error = True break d = d + str(self.running_tests[k].testobj. TestInputData.testid) + "|" elif status in ["TESTEND", "TIMEOUT"]: # If test ends on exec host or if test timout occurs then trigger graceful shutdown of this test # Testmon invoke a new thread for this test for logs download and test cleanup from all hosts d = d + "*" + str(self.running_tests[k].testobj. TestInputData.testid) + "*|" if t.testobj.TestInputData.end_status == "running": lctx.debug(t.testobj.TestInputData.end_status) if status == "TIMEOUT": t.testobj.TestInputData.timeout_flag = True t.updateStatus("running", "timeout") else: t.updateStatus("running", "completed") # process_results download log files and perform cleanup on all other hosts pt = common.FuncThread( self.process_results, True, t, t.testobj.TestInputData.end_status) process_results_threads[ t.testobj.TestInputData.testid] = (pt, t) pt.start() remove = True break elif t.testobj.TestInputData.end_status == "collating" or t.testobj.TestInputData.end_status == "completed" or t.testobj.TestInputData.end_status == "finished clean": d = d + "*" + str(self.running_tests[k].testobj. TestInputData.testid) + "*|" else: remove = True t.updateStatus("running", "failed") lctx.error("ERROR : Unknown test status for : " + str(t.testobj.TestInputData.testid) + ":" + str(status)) break # out of for loop elif status.strip() in ["FAILED", "TESTNA"]: # Test termination if test fails or test is not even running on the host if status.strip() == "FAILED": error = True elif status.strip() in ["ABORT", "TESTNA"]: remove = True t.updateStatus("", "failed") lctx.error("TEST " + status.strip() + " : Cleaning test from running queue") break # out of for loop else: # Test termination on receiving any unknown test state remove = True t.updateStatus("running", "failed") lctx.error("ERROR : Unknown test status for : " + str(t.testobj.TestInputData.testid) + ":" + str(status)) break # out of for loop lctx.info(d) d = "" # Two modes of test termination: if error: # If error is set then testmon will perform below steps: # 1. Send test ABORT on exec host if is alive, this will stop execution script, perform logs cleanup and # test termination on the host # 2. Send test cleanup on all other stat host for performing logs cleanup and test termination on the host # 3. Remove test from the scheduler running queue retsend = None test_logger.error("Bad test status " + status + " - Terminating test") ip = t.testobj.TestInputData.exechostname try: retsend = self.cl.send( ip, self.CPORT, self.ev.construct("DAYTONA_HEARTBEAT", "")) except: pass if retsend and retsend.split(",")[1] == "ALIVE": retsend = self.cl.send( ip, self.CPORT, self.ev.construct("DAYTONA_ABORT_TEST", str(t.testobj.TestInputData.testid))) test_logger.error("Test Aborted on exec host " + ip) for s in t.testobj.TestInputData.stathostname.split(','): if len(s.strip()) == 0: break try: retsend = self.cl.send( s.strip(), self.CPORT, self.ev.construct("DAYTONA_HEARTBEAT", "")) except: pass if retsend and retsend.split(",")[1] == "ALIVE": retsend = self.cl.send( s.strip(), self.CPORT, self.ev.construct( "DAYTONA_CLEANUP_TEST", str(t.testobj.TestInputData.testid))) test_logger.error("Test Aborted on stat host " + s) self.lock.acquire() for k in self.running_tests: if self.running_tests[ k].testobj.TestInputData.testid == t.testobj.TestInputData.testid: lctx.debug("removing entry for this test") rt = self.running_tests.pop(k) break if k in self.running_tests: del self.running_tests[k] self.lock.release() if remove: # If remove flag is set, then testmon will only delete this test from the running queue of scheduler self.lock.acquire() for k in self.running_tests: if self.running_tests[ k].testobj.TestInputData.testid == t.testobj.TestInputData.testid: lctx.debug("removing entry for this test") rt = self.running_tests.pop(k) break if k in self.running_tests: del self.running_tests[k] self.lock.release() time.sleep(2)
def dispatch(self, *args): """ This is dispatch queue of scheduler where test from different framework wait in the waiting queue for scheduler to bind it with trigger thread. This procedure continuously iterate over testmap populated by DBMon with tests started by user from UI or CLI. This keep track of all running tests and it allows one test per framework. Once this procedure find an open test spot for a test from particular framework, this procedure will pop it from testmap, put it in dispatch queue and assign trigger thread for this test to start test setup and then execution. """ dispatch_threads = defaultdict() while True: # Continuously iterate on testmap for initiating any test execution for k in self.testmap: # iterating for all frameworkid k in testmap which contains list of waiting tests for a particular framework found = False # If test for a particular framework is already in running or dispatch queue then this new test need to # wait until previous test gets finish, hence we do nothing and just continue if k in self.dispatch_queue or k in self.running_tests: found = True else: lctx.debug("Found spot for test") if found: continue # Proceed if spot is available for executing test for this framework try: tmp_t = self.testmap[k][0] except Exception as e: lctx.debug("No test object found in map") continue if tmp_t is None: continue alive = False h = tmp_t.testobj.TestInputData.exechostname # Initiating test logger for capturing test life cycle on scheduler, all logs are logged in file <testid>.log test_logger = LOG.init_testlogger(tmp_t, "EXEC") test_logger.info("Test execution starts") try: # Sending heartbeat on exec host to check if it agent is up on exec host retsend = self.cl.send( h, self.CPORT, self.ev.construct("DAYTONA_HEARTBEAT", "")) if retsend and len(retsend.split(",")) > 2: status = retsend.split(",")[1] else: raise Exception( "Execution host not avaliable - No Heartbeat ", tmp_t.testobj.TestInputData.testid) if "ALIVE" == status: test_logger.info( "HeartBeat received from execution host " + h) # Sending DAYTONA_HANDSHAKE for verifying connectivity between scheduler and agent on exec host # using custom daytona ports ret = self.cl.send( h, self.CPORT, self.ev.construct( "DAYTONA_HANDSHAKE", "handshake1," + self.HOST + "," + str(self.PORT) + "," + str(tmp_t.testobj.TestInputData.testid) + "," + h)) if ret == "SUCCESS": alive = True test_logger.info( "Handshake successful with execution host " + h) lctx.debug( "Handshake successful in scheduler, adding ip/hostname to reg hosts" ) server.serv.registered_hosts[h] = h addr = socket.gethostbyname(h) lctx.debug(addr) server.serv.registered_hosts[addr] = addr else: raise Exception( "Unable to handshake with agent on executuion host " + h) except Exception as e: lctx.error(e) test_logger.error(e) # pause the dbmon here as we dont want the same test to be picked again after we pop self.dbinstance.mon_thread[0].pause() self.dbinstance.lock.acquire() t = self.testmap[k].pop(0) t.updateStatus("waiting", "failed") self.dbinstance.lock.release() lctx.debug("Removed test from map : " + str(t.testobj.TestInputData.testid)) self.dbinstance.mon_thread[0].resume() LOG.removeLogger(tmp_t) continue if alive == True and found == False: # for each framework pick one and move it to running, iff running has an empty slot. lctx.debug( "-------Found empty slot in dispatch and running Q-------" ) # pause the dbmon here as we dont want the same test to be picked again after we pop self.dbinstance.mon_thread[0].pause() self.dbinstance.lock.acquire() t = self.testmap[k].pop(0) self.dbinstance.lock.release() lctx.info("< %s" % t.testobj.TestInputData.testid) # put the test in dispatch queue self.dispatchQ__lock.acquire() self.dispatch_queue[k] = t self.dispatchQ__lock.release() t.updateStatus("waiting", "setup") self.dbinstance.mon_thread[0].resume() try: # Bind a seperate trigger thread for this test to start test execution trigger_thread = common.FuncThread( self.trigger, True, t) dispatch_threads[t.testobj.TestInputData.testid] = ( trigger_thread, t) trigger_thread.start() except Exception as e: lctx.error("Trigger error : " + str(t.testobj.TestInputData.testid)) test_logger.error("Test setup failed " + str(t.testobj.TestInputData.testid)) LOG.removeLogger(tmp_t) self.dispatchQ__lock.acquire() del self.dispatch_queue[k] self.dispatchQ__lock.release() lctx.debug(e) try: # Log list of test currently present in dispatch queue in scheduler debug file d = "DISPATCH [S/R] : " for k in self.dispatch_queue: d = d + " |" + str( self.dispatch_queue[k].testobj.TestInputData.testid) except: lctx.debug("ERROR : Dispatch Q empty") lctx.debug(d) d = "" time.sleep(2)