def testAFailedJobMonitoring(self): """ _TestAFailedJobMonitoring_ Simulate a job that completes but fails, check that the data sent is correct """ # Get the necessary objects name = 'testB' job = self.createTestJob() workload = self.createWorkload() task = workload.getTask(taskName="DataProcessing") report = self.createReport(outcome=1) # Fill the job environment self.setupJobEnvironment(name=name) # Instantiate DBInfo dbInfo = DashboardInfo(job=job, task=task, dashboardUrl='127.0.0.1:8884') # Check jobStart information data = dbInfo.jobStart() self.assertEqual(data['MessageType'], 'JobStatus') self.assertEqual(data['StatusValue'], 'running') self.assertEqual(data['StatusDestination'], "T1_US_FNAL") self.assertEqual(data['taskId'], 'wmagent_Tier1ReReco') # Do the first step step = task.getStep(stepName="cmsRun1") # Do the step start data = dbInfo.stepStart(step=step.data) self.assertNotEqual(data['jobStart'], None) self.assertEqual(data['jobStart']['ExeStart'], step.name()) self.assertEqual(data['jobStart']['WNHostName'], socket.gethostname()) self.assertEqual(data['1_ExeStart'], step.name()) #Do the step end data = dbInfo.stepEnd(step=step.data, stepReport=report) self.assertEqual(data['1_ExeEnd'], step.name()) self.assertNotEqual(data['1_ExeExitCode'], 0) self.assertTrue(data['1_ExeWCTime'] >= 0) self.assertEqual(report.retrieveStep("cmsRun1").counter, 1) # End the job! data = dbInfo.jobEnd() self.assertEqual(data['ExeEnd'], "cmsRun1") self.assertNotEqual(data['JobExitCode'], 0) self.assertEqual(data['WrapperCPUTime'], 0) self.assertTrue(data['WrapperWCTime'] >= 0) self.assertNotEqual(data['JobExitReason'].find('cmsRun1'), -1) return
def initMonitor(self, task, job, logPath, args={}): """ Handles the monitor initiation """ logging.info("In DashboardMonitor.initMonitor") self.task = task self.job = job destHost = args.get('destinationHost', None) destPort = args.get('destinationPort', None) dashboardUrl = '%s:%s' % (destHost, str(destPort)) self.dashboardInfo = DashboardInfo(task=task, job=job, dashboardUrl=dashboardUrl)
def testMultithreadedApplication(self): """ _testMultithreadedApplication_ Check that the data packets have NCores and it picks it up successfully from the CMSSW step """ # Get the necessary objects name = 'testMT' job = self.createTestJob() workload = self.createWorkload() task = workload.getTask(taskName="DataProcessing") report = self.createReport() # Fill the job environment self.setupJobEnvironment(name=name) # Instantiate DBInfo dbInfo = DashboardInfo(job=job, task=task, dashboardUrl='127.0.0.1:8884') # Modify the first step step = task.getStep(stepName="cmsRun1") step.getTypeHelper().setNumberOfCores(8) # Check jobStart information data = dbInfo.jobStart() self.assertEqual(data['NCores'], 8) # Do the first step step = task.getStep(stepName="cmsRun1") # Do the step start data = dbInfo.stepStart(step=step.data) #Do the step end data = dbInfo.stepEnd(step=step.data, stepReport=report) self.assertEqual(data['1_NCores'], 8) self.assertEqual(report.retrieveStep("cmsRun1").counter, 1) # End the job and test the final NCores report data = dbInfo.jobEnd() self.assertEqual(data['NCores'], 8) return
def initMonitor(self, task, job, logPath, args = {}): """ Handles the monitor initiation """ logging.info("In DashboardMonitor.initMonitor") self.task = task self.job = job destHost = args.get('destinationHost', None) destPort = args.get('destinationPort', None) self.dashboardInfo = DashboardInfo(task = task, job = job) if destHost and destPort: logging.info("About to set destination to %s:%s" % (destHost, destPort)) self.dashboardInfo.addDestination(host = destHost, port = destPort)
def testASuccessfulJobMonitoring(self): """ _testASuccessfulJobMonitoring_ Check that the data packets make sense when a job completes successfully """ # Get the necessary objects name = 'testA' job = self.createTestJob() workload = self.createWorkload() task = workload.getTask(taskName = "DataProcessing") report = self.createReport() # Fill the job environment self.setupJobEnvironment(name = name) # Instantiate DBInfo dbInfo = DashboardInfo(job = job, task = task) dbInfo.addDestination('127.0.0.1', 8884) # Check jobStart information data = dbInfo.jobStart() self.assertEqual(data['MessageType'], 'JobStatus') self.assertEqual(data['StatusValue'], 'running') self.assertEqual(data['StatusDestination'], "T1_US_FNAL") self.assertEqual(data['taskId'], 'wmagent_Tier1ReReco') # Do the first step step = task.getStep(stepName = "cmsRun1") # Do the step start data = dbInfo.stepStart(step = step.data) self.assertNotEqual(data['jobStart'], None) self.assertEqual(data['jobStart']['ExeStart'], step.name()) self.assertEqual(data['jobStart']['WNHostName'], socket.gethostname()) self.assertEqual(data['1_ExeStart'], step.name()) #Do the step end data = dbInfo.stepEnd(step = step.data, stepReport = report) self.assertEqual(data['1_ExeEnd'], step.name()) self.assertEqual(data['1_ExeExitCode'], 0) self.assertTrue(data['1_ExeWCTime'] >= 0) self.assertEqual(report.retrieveStep("cmsRun1").counter, 1) #Do a second step step = task.getStep(stepName = "cmsRun1") #Do the step start (It's not the first step) data = dbInfo.stepStart(step = step.data) self.assertEqual(data['jobStart'], None) self.assertEqual(data['2_ExeStart'], step.name()) #Do the step end data = dbInfo.stepEnd(step = step.data, stepReport = report) self.assertEqual(data['2_ExeEnd'], step.name()) self.assertEqual(data['2_ExeExitCode'], 0) self.assertTrue(data['2_ExeWCTime'] >= 0) self.assertEqual(report.retrieveStep("cmsRun1").counter, 2) # End the job! data = dbInfo.jobEnd() self.assertEqual(data['ExeEnd'], "cmsRun1") self.assertEqual(data['JobExitCode'], 0) self.assertEqual(data['WrapperCPUTime'], 0) self.assertTrue(data['WrapperWCTime'] >= 0) self.assertNotEqual(data['JobExitReason'], "") return