def testProcessDetailBasic(self): pid = os.getpid() name = inspect.stack()[0][3] # test name pd = ProcessDetail(pid, name) self.assertEqual(pd.pid, pid) self.assertEqual(pd.name, name) self.assertEqual(pd.proc.pid, pid) numChildren = len(psutil.Process(pid).get_children()) self.assertEqual(len(pd.children), numChildren) self.assertEqual(len(pd.allProcs), 1 + numChildren) d = pd.getDetails() self.assertEqual(d["pid"], pid) self.assertEqual(d["component"], name) self.assertEqual(d["numChildrenProcesses"], numChildren)
def testProcessDetailBasic(self): p = utils.getProcess() self.testProcesses.append(p) name = "mytestprocess" pd = ProcessDetail(p.pid, name) self.assertEqual(pd.pid, p.pid) self.assertEqual(pd.name, name) self.assertEqual(pd.proc.pid, p.pid) self.assertEqual(len(pd.children), 0) self.assertEqual(len(pd.allProcs), 1) utils.terminateProcesses(self.testProcesses) d = pd.getDetails() self.assertEqual(d["pid"], p.pid) self.assertEqual(d["component"], name) self.assertEqual(d["numChildrenProcesses"], 0)
def testProcessDetailBasic(self): pid = os.getpid() name = inspect.stack()[0][3] # test name pd = ProcessDetail(pid, name) self.assertEqual(pd.pid, pid) self.assertEqual(pd.name, name) self.assertEqual(pd.proc.pid, pid) numChildren = len(psutil.Process(pid).get_children()) self.assertEqual(len(pd.children), numChildren) self.assertEqual(len(pd.allProcs), 1 + numChildren) d = pd.getDetails() self.assertEqual(d["pid"], pid) self.assertEqual(d["component"], name) self.assertEqual(d["numChildrenProcesses"], numChildren) pd.refresh()
def testProcessMemoryPollerBasic(self): pid = os.getpid() name = "mytestprocess" pd = ProcessDetail(pid, name) poller = ProcessMemoryPoller() v = poller.sample(pd) self.assertTrue(isinstance(v, float))
def _updateComponentsInfo(self): """ The method is called at each individual polling cycle. This handles: 1) a particular component may have been restarted. 2) some components were started after starting / initializing AlertGenerator itself so further running components (processes) may only be known later. """ def removeItems(processDetail, measurements): self._components.remove(processDetail) self._compMeasurements.remove(measurements) myName = self.__class__.__name__ # dictionary[componentName] = componentPID componentsInfo = self._getComponentsInfo() for processDetail, measurements in zip(self._components, self._compMeasurements): try: newPID = componentsInfo[processDetail.name] if int(newPID) == processDetail.pid: # ok, component still runs under the same PID # update list of child processes (some may have (dis)appeared) logging.debug( "Component %s runs under the same PID, refreshing" " list of child processes ..." % processDetail.getDetails()) try: processDetail.refresh() except psutil.error.NoSuchProcess as ex: logging.error( "Could not update list of children processes " "for %s, reason: %s" % (processDetail.getDetails(), ex)) del componentsInfo[processDetail.name] else: logging.warn( "Component %s seems to have been restarted " "(different PID:%s, was:%s)." % (processDetail.name, newPID, processDetail.pid)) try: pd = ProcessDetail(newPID, processDetail.name) index = self._components.index(processDetail) self._components[index] = pd measurements.clear() except (psutil.error.NoSuchProcess, psutil.error.AccessDenied) as ex: logging.error("%s: component %s ignored, reason: %s" % (myName, processDetail.name, ex)) removeItems(processDetail, measurements) except KeyError: m = "Component %s seems not running anymore, removed from polling." % processDetail.name logging.warning(m) removeItems(processDetail, measurements) if len(componentsInfo) > 0: logging.info("Some new components appeared since last check ...") for compName, compPID in componentsInfo.items(): self._setUpProcessDetailAndMeasurements(compPID, compName)
def testProcessDetailChildren(self): numSubProcesses = 3 p = utils.getProcess(numChildren = numSubProcesses) self.testProcesses.append(p) # wait until all desired processes are running while len(psutil.Process(p.pid).get_children()) < numSubProcesses: print "waiting for children processes to start" time.sleep(0.5) name = "mytestprocess2" pd = ProcessDetail(p.pid, name) self.assertEqual(pd.proc.pid, p.pid) self.assertEqual(len(pd.children), numSubProcesses) self.assertEqual(len(pd.allProcs), numSubProcesses + 1) utils.terminateProcesses(self.testProcesses) d = pd.getDetails() self.assertEqual(d["pid"], p.pid) self.assertEqual(d["numChildrenProcesses"], numSubProcesses)
def _setUp(self): """ Query the database to find out the main process PID, create ProcessDetail and Measurements instances. """ pid = self._getProcessPID() self._dbProcessDetail = ProcessDetail(pid, "MySQL") numOfMeasurements = round( self.config.period / self.config.pollInterval, 0) self._measurements = Measurements(numOfMeasurements)
def testProcessDetailBasic(self): pid = os.getpid() name = inspect.stack()[0][3] # test name pd = ProcessDetail(pid, name) self.assertEqual(pd.pid, pid) self.assertEqual(pd.name, name) self.assertEqual(pd.proc.pid, pid) numChildren = None try: numChildren = len(psutil.Process(pid).children()) # psutil 3.1.1 except AttributeError: numChildren = len(psutil.Process(pid).get_children()) # psutil 0.6.1 self.assertEqual(len(pd.children), numChildren) self.assertEqual(len(pd.allProcs), 1 + numChildren) d = pd.getDetails() self.assertEqual(d["pid"], pid) self.assertEqual(d["component"], name) self.assertEqual(d["numChildrenProcesses"], numChildren) pd.refresh()
def _setUpProcessDetailAndMeasurements(self, compPID, compName): """ Based on input, create instances of ProcessDetail and Measurements. """ myName = self.__class__.__name__ try: pd = ProcessDetail(compPID, compName) self._components.append(pd) self._compMeasurements.append(Measurements(self.numOfMeasurements)) m = ("%s: loaded process information on %s:%s" % (myName, compName, compPID)) logging.info(m) except (psutil.error.NoSuchProcess, psutil.error.AccessDenied), ex: logging.error("%s: component %s ignored, reason: %s" % (myName, compName, ex))
def _getKilledProcessDetail(self): """ Create a process to have a valid pid, then kill it. Prepared in the ProcessDetail instance. """ command = "sleep 300" proc = subprocess.Popen(command.split()) name = "mytestkilledprocess" pd = ProcessDetail(proc.pid, name) os.kill(proc.pid, signal.SIGKILL) proc.poll() # necessary, otherwise it'll never end/return while proc.poll() == None: time.sleep(0.2) print "waiting" return pd
def testPeriodPollerOnRealProcess(self): config = getConfig("/tmp") config.component_("AlertProcessor") config.AlertProcessor.section_("critical") config.AlertProcessor.section_("soft") config.AlertProcessor.critical.level = 5 config.AlertProcessor.soft.level = 0 config.component_("AlertGenerator") config.AlertGenerator.section_("bogusPoller") config.AlertGenerator.bogusPoller.soft = 5 # [percent] config.AlertGenerator.bogusPoller.critical = 50 # [percent] config.AlertGenerator.bogusPoller.pollInterval = 0.2 # [second] # period during which measurements are collected before evaluating for # possible alert triggering config.AlertGenerator.bogusPoller.period = 1 generator = utils.AlertGeneratorMock(config) poller = PeriodPoller(config.AlertGenerator.bogusPoller, generator) poller.sender = utils.SenderMock() # get CPU usage percentage, it's like measuring CPU usage of a real # component, so use the appropriate poller's method for that # (PeriodPoller itself is higher-level class so it doesn't define # a method to provide sampling data) poller.sample = lambda processDetail: ComponentsCPUPoller.sample( processDetail) # get own pid pid = os.getpid() name = inspect.stack()[0][3] # test name pd = ProcessDetail(pid, name) # need to repeat sampling required number of measurements numOfMeasurements = int(config.AlertGenerator.bogusPoller.period / config.AlertGenerator.bogusPoller.pollInterval) mes = Measurements(numOfMeasurements) self.assertEqual(len(mes), 0) for i in range(mes._numOfMeasurements): poller.check(pd, mes) # since the whole measurement cycle was done, values should have been nulled self.assertEqual(len(mes), 0)
handler, receiver = setUpReceiver( ti.testCase.generator.config.Alert.address, ti.testCase.generator.config.Alert.controlAddr) pid = os.getpid() numMeasurements = ti.config.period / ti.config.pollInterval # inject own input sample data provider # there is in fact input argument in this case which needs be ignored poller.sample = lambda proc_: random.randint( ti.thresholdToTest, ti.thresholdToTest + ti.thresholdDiff) # the process to run upon is fooled as well here poller._dbProcessDetail = ProcessDetail(pid, "TestProcess") poller._measurements = Measurements(numMeasurements) poller.start() ti.testCase.assertTrue(poller.is_alive()) if ti.expected != 0: # beware - if the alert is not correctly generated, the test # will hang here and will be waiting for it # #2238 AlertGenerator test can take 1 hour+ (and fail) # fail 2mins anyway if alert is not received timeLimitExceeded = False startTime = datetime.datetime.now() limitTime = 2 * 60 # seconds while len(handler.queue) == 0: time.sleep(ti.config.pollInterval / 5) if (datetime.datetime.now() - startTime).seconds > limitTime:
def doGenericPeriodAndProcessPolling(ti): """ ti - Test Input instance (all variables on input to this test) The function is easier to reuse from here that from other test class. This helper function is also used for generic period polling. """ try: poller = ti.pollerClass(ti.config, ti.testCase.generator) except Exception as ex: ti.testCase.fail("%s: exception: %s" % (ti.testCase.testName, ex)) handler, receiver = setUpReceiver( ti.testCase.generator.config.Alert.address, ti.testCase.generator.config.Alert.controlAddr) pid = os.getpid() numMeasurements = ti.config.period / ti.config.pollInterval # inject own input sample data provider # there is in fact input argument in this case which needs be ignored poller.sample = lambda proc_: random.randint( ti.thresholdToTest, ti.thresholdToTest + ti.thresholdDiff) # the process to run upon is fooled as well here poller._dbProcessDetail = ProcessDetail(pid, "TestProcess") poller._measurements = Measurements(numMeasurements) poller.start() ti.testCase.assertTrue(poller.is_alive()) if ti.expected != 0: # beware - if the alert is not correctly generated, the test # will hang here and will be waiting for it # #2238 AlertGenerator test can take 1 hour+ (and fail) # fail 2mins anyway if alert is not received timeLimitExceeded = False startTime = datetime.datetime.now() limitTime = 2 * 60 # seconds while len(handler.queue) == 0: time.sleep(ti.config.pollInterval / 5) if (datetime.datetime.now() - startTime).seconds > limitTime: timeLimitExceeded = True break else: time.sleep(ti.config.period * 2) poller.terminate() receiver.shutdown() ti.testCase.assertFalse(poller.is_alive()) if ti.expected != 0: # #2238 AlertGenerator test can take 1 hour+ (and fail) # temporary measure from above loop: if timeLimitExceeded: ti.testCase.fail("No alert received in %s seconds." % limitTime) # there should be just one alert received, poller should have the # change to send a second ti.testCase.assertEqual(len(handler.queue), ti.expected) a = handler.queue[0] # soft threshold - alert should have soft level ti.testCase.assertEqual(a["Level"], ti.level) ti.testCase.assertEqual(a["Component"], ti.testCase.generator.__class__.__name__) ti.testCase.assertEqual(a["Source"], poller.__class__.__name__) else: ti.testCase.assertEqual(len(handler.queue), 0)