Пример #1
0
    def testC_Profile(self):
        """
        _Profile_

        DON'T RUN THIS!
        """

        return

        import cProfile, pstats

        myThread = threading.currentThread()

        name    = makeUUID()

        config = self.getConfig()

        jobList = self.createGiantJobSet(name = name, config = config,
                                         nSubs = 10, nJobs = 1000, nFiles = 10)

        cleanCouch = CleanCouchPoller(config = config)
        cleanCouch.setup()

        cProfile.runctx("cleanCouch.algorithm()", globals(), locals(), filename = "testStats.stat")

        p = pstats.Stats('testStats.stat')
        p.sort_stats('cumulative')
        p.print_stats()
        return
Пример #2
0
    def testC_Profile(self):
        """
        _Profile_

        DON'T RUN THIS!
        """
        import cProfile
        import pstats

        name = makeUUID()

        config = self.getConfig()

        jobList = self.createGiantJobSet(name=name,
                                         config=config,
                                         nSubs=10,
                                         nJobs=1000,
                                         nFiles=10)

        cleanCouch = CleanCouchPoller(config=config)
        cleanCouch.setup()

        cProfile.runctx("cleanCouch.algorithm()",
                        globals(),
                        locals(),
                        filename="testStats.stat")

        p = pstats.Stats('testStats.stat')
        p.sort_stats('cumulative')
        p.print_stats()
        return
Пример #3
0
    def preInitialization(self):
        print "TaskArchiver.preInitialization"

        # Add event loop to worker manager
        myThread = threading.currentThread()

        pollInterval = self.config.TaskArchiver.pollInterval
        logging.info("Setting task archiver poll interval to %s seconds" %
                     pollInterval)
        myThread.workerThreadManager.addWorker(TaskArchiverPoller(self.config),
                                               pollInterval)

        couchInterval = self.config.TaskArchiver.cleanCouchInterval
        logging.info(
            "Setting poll interval for cleanup old couch doc to %s seconds" %
            couchInterval)
        myThread.workerThreadManager.addWorker(CleanCouchPoller(self.config),
                                               couchInterval)

        return
Пример #4
0
    def testB_testErrors(self):
        """
        _testErrors_

        Test with a failed FWJR
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl')
        workload     = self.createWorkload(workloadName = workloadPath)
        testJobGroup = self.createTestJobGroup(config = config,
                                               name = workload.name(),
                                               specLocation = workloadPath,
                                               error = True)
        # Create second workload
        testJobGroup2 = self.createTestJobGroup(config = config,
                                                name = workload.name(),
                                                filesetName = "TestFileset_2",
                                                specLocation = workloadPath,
                                                task = "/TestWorkload/ReReco/LogCollect", 
                                                type = "LogCollect")

        cachePath = os.path.join(config.JobCreator.jobCacheDir,
                                 "TestWorkload", "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        couchdb      = CouchServer(config.JobStateMachine.couchurl)
        jobdb        = couchdb.connectDatabase("%s/jobs" % self.databaseName)
        fwjrdb       = couchdb.connectDatabase("%s/fwjrs" % self.databaseName)
        jobdb.loadView("JobDump", "jobsByWorkflowName",
                        options = {"startkey": [workload.name()],
                                   "endkey": [workload.name(), {}]})['rows']
        fwjrdb.loadView("FWJRDump", "fwjrsByWorkflowName",
                        options = {"startkey": [workload.name()],
                                   "endkey": [workload.name(), {}]})['rows']
    
        self.populateWorkflowWithCompleteStatus()
        testTaskArchiver = TaskArchiverPoller(config = config)
        testTaskArchiver.algorithm()

        cleanCouch = CleanCouchPoller(config = config)
        cleanCouch.setup()
        cleanCouch.algorithm()
        
        dbname       = getattr(config.JobStateMachine, "couchDBName")
        workdatabase = couchdb.connectDatabase("%s/workloadsummary" % dbname)
    
        workloadSummary = workdatabase.document(id = workload.name())

        self.assertEqual(workloadSummary['errors']['/TestWorkload/ReReco']['failureTime'], 500)
        self.assertTrue('99999' in workloadSummary['errors']['/TestWorkload/ReReco']['cmsRun1'])

        failedRunInfo = workloadSummary['errors']['/TestWorkload/ReReco']['cmsRun1']['99999']['runs']
        self.assertEqual(failedRunInfo, {'10' : [[12312, 12312]]},
                          "Wrong lumi information in the summary for failed jobs")

        # Check the failures by site histograms
        self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['data']['T1_IT_CNAF']['Failed Jobs'], 10)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['99999'], 10)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['8020'], 10)
        self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['average']['Failed Jobs'], 10)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['average']['99999'], 10)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['average']['8020'], 10)
        self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['stdDev']['Failed Jobs'], 0)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['stdDev']['99999'], 0)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['stdDev']['8020'], 0)
        return
Пример #5
0
    def testA_BasicFunctionTest(self):
        """
        _BasicFunctionTest_

        Tests the components, by seeing if they can process a simple set of closeouts
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl')
        workload     = self.createWorkload(workloadName = workloadPath)
        testJobGroup = self.createTestJobGroup(config = config,
                                               name = workload.name(),
                                               specLocation = workloadPath,
                                               error = False)

        # Create second workload
        testJobGroup2 = self.createTestJobGroup(config = config,
                                                name = workload.name(),
                                                filesetName = "TestFileset_2",
                                                specLocation = workloadPath,
                                                task = "/TestWorkload/ReReco/LogCollect", 
                                                type = "LogCollect")

        cachePath = os.path.join(config.JobCreator.jobCacheDir,
                                 "TestWorkload", "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        cachePath2 = os.path.join(config.JobCreator.jobCacheDir,
                                 "TestWorkload", "LogCollect")
        os.makedirs(cachePath2)
        self.assertTrue(os.path.exists(cachePath2))

        result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 2)

        workflowName = "TestWorkload"
        dbname       = config.TaskArchiver.workloadSummaryCouchDBName
        couchdb      = CouchServer(config.JobStateMachine.couchurl)
        workdatabase = couchdb.connectDatabase(dbname)
        jobdb        = couchdb.connectDatabase("%s/jobs" % self.databaseName)
        fwjrdb       = couchdb.connectDatabase("%s/fwjrs" % self.databaseName)
        jobs = jobdb.loadView("JobDump", "jobsByWorkflowName",
                              options = {"startkey": [workflowName],
                                         "endkey": [workflowName, {}]})['rows']
        fwjrdb.loadView("FWJRDump", "fwjrsByWorkflowName",
                        options = {"startkey": [workflowName],
                                   "endkey": [workflowName, {}]})['rows']

        self.assertEqual(len(jobs), 2*self.nJobs)

        from WMCore.WMBS.CreateWMBSBase import CreateWMBSBase
        create = CreateWMBSBase()
        tables = []
        for x in create.requiredTables:
            tables.append(x[2:])
 
        self.populateWorkflowWithCompleteStatus()
        testTaskArchiver = TaskArchiverPoller(config = config)
        testTaskArchiver.algorithm()
        
        cleanCouch = CleanCouchPoller(config = config)
        cleanCouch.setup()
        cleanCouch.algorithm()

        result = myThread.dbi.processData("SELECT * FROM wmbs_job")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_jobgroup")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_fileset")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_file_details")[0].fetchall()
        self.assertEqual(len(result), 0)

        # Make sure we deleted the directory
        self.assertFalse(os.path.exists(cachePath))
        self.assertFalse(os.path.exists(os.path.join(self.testDir, 'workloadTest/TestWorkload')))

        testWMBSFileset = Fileset(id = 1)
        self.assertEqual(testWMBSFileset.exists(), False)



        workloadSummary = workdatabase.document(id = "TestWorkload")
        # Check ACDC
        self.assertEqual(workloadSummary['ACDCServer'], sanitizeURL(config.ACDC.couchurl)['url'])

        # Check the output
        self.assertEqual(workloadSummary['output'].keys(), ['/Electron/MorePenguins-v0/RECO'])
        self.assertEqual(sorted(workloadSummary['output']['/Electron/MorePenguins-v0/RECO']['tasks']),
                        ['/TestWorkload/ReReco', '/TestWorkload/ReReco/LogCollect'])
        # Check performance
        # Check histograms
        self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['AvgEventTime']['histogram'][0]['average'],
                                0.89405199999999996, places = 2)
        self.assertEqual(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['AvgEventTime']['histogram'][0]['nEvents'],
                         10)

        # Check standard performance
        self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['TotalJobCPU']['average'], 17.786300000000001,
                                places = 2)
        self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['TotalJobCPU']['stdDev'], 0.0,
                                places = 2)

        # Check worstOffenders
        self.assertEqual(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['AvgEventTime']['worstOffenders'],
                         [{'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 1},
                          {'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 1},
                          {'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 2}])

        # Check retryData
        self.assertEqual(workloadSummary['retryData']['/TestWorkload/ReReco'], {'1': 10})
        logCollectPFN = 'srm://srm-cms.cern.ch:8443/srm/managerv2?SFN=/castor/cern.ch/cms/store/logs/prod/2012/11/WMAgent/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8-AlcaSkimLogCollect-1-logs.tar'
        self.assertEqual(workloadSummary['logArchives'], {'/TestWorkload/ReReco/LogCollect' : [logCollectPFN for _ in range(10)]})

        # LogCollect task is made out of identical FWJRs
        # assert that it is identical
        for x in workloadSummary['performance']['/TestWorkload/ReReco/LogCollect']['cmsRun1'].keys():
            if x in config.TaskArchiver.histogramKeys:
                continue
            for y in ['average', 'stdDev']:
                self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco/LogCollect']['cmsRun1'][x][y],
                                        workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'][x][y],
                                        places = 2)

        return
Пример #6
0
    def testB_testErrors(self):
        """
        _testErrors_

        Test with a failed FWJR
        """

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl')
        workload = self.createWorkload(workloadName=workloadPath)
        testJobGroup = self.createTestJobGroup(config=config,
                                               name=workload.name(),
                                               specLocation=workloadPath,
                                               error=True)
        # Create second workload
        testJobGroup2 = self.createTestJobGroup(
            config=config,
            name=workload.name(),
            filesetName="TestFileset_2",
            specLocation=workloadPath,
            task="/TestWorkload/ReReco/LogCollect",
            jobType="LogCollect")

        cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload",
                                 "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        couchdb = CouchServer(config.JobStateMachine.couchurl)
        jobdb = couchdb.connectDatabase("%s/jobs" % self.databaseName)
        fwjrdb = couchdb.connectDatabase("%s/fwjrs" % self.databaseName)
        jobdb.loadView("JobDump",
                       "jobsByWorkflowName",
                       options={
                           "startkey": [workload.name()],
                           "endkey": [workload.name(), {}]
                       })['rows']
        fwjrdb.loadView("FWJRDump",
                        "fwjrsByWorkflowName",
                        options={
                            "startkey": [workload.name()],
                            "endkey": [workload.name(), {}]
                        })['rows']

        self.populateWorkflowWithCompleteStatus()
        testTaskArchiver = TaskArchiverPoller(config=config)
        testTaskArchiver.algorithm()

        cleanCouch = CleanCouchPoller(config=config)
        cleanCouch.setup()
        cleanCouch.algorithm()

        dbname = getattr(config.JobStateMachine, "couchDBName")
        workdatabase = couchdb.connectDatabase("%s/workloadsummary" % dbname)

        workloadSummary = workdatabase.document(id=workload.name())

        self.assertEqual(
            workloadSummary['errors']['/TestWorkload/ReReco']['failureTime'],
            500)
        self.assertTrue('99999' in workloadSummary['errors']
                        ['/TestWorkload/ReReco']['cmsRun1'])

        failedRunInfo = workloadSummary['errors']['/TestWorkload/ReReco'][
            'cmsRun1']['99999']['runs']
        self.assertEqual(
            failedRunInfo, {'10': [[12312, 12312]]},
            "Wrong lumi information in the summary for failed jobs")

        # Check the failures by site histograms
        self.assertEqual(
            workloadSummary['histograms']['workflowLevel']['failuresBySite']
            ['data']['T1_IT_CNAF']['Failed Jobs'], 10)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']
            ['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['99999'], 10)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']
            ['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['8020'], 10)
        self.assertEqual(
            workloadSummary['histograms']['workflowLevel']['failuresBySite']
            ['average']['Failed Jobs'], 10)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']
            ['cmsRun1']['errorsBySite']['average']['99999'], 10)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']
            ['cmsRun1']['errorsBySite']['average']['8020'], 10)
        self.assertEqual(
            workloadSummary['histograms']['workflowLevel']['failuresBySite']
            ['stdDev']['Failed Jobs'], 0)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']
            ['cmsRun1']['errorsBySite']['stdDev']['99999'], 0)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']
            ['cmsRun1']['errorsBySite']['stdDev']['8020'], 0)
        return
Пример #7
0
    def testA_BasicFunctionTest(self):
        """
        _BasicFunctionTest_

        Tests the components, by seeing if they can process a simple set of closeouts
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl')
        workload = self.createWorkload(workloadName=workloadPath)
        testJobGroup = self.createTestJobGroup(config=config,
                                               name=workload.name(),
                                               specLocation=workloadPath,
                                               error=False)

        # Create second workload
        testJobGroup2 = self.createTestJobGroup(
            config=config,
            name=workload.name(),
            filesetName="TestFileset_2",
            specLocation=workloadPath,
            task="/TestWorkload/ReReco/LogCollect",
            jobType="LogCollect")

        cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload",
                                 "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        cachePath2 = os.path.join(config.JobCreator.jobCacheDir,
                                  "TestWorkload", "LogCollect")
        os.makedirs(cachePath2)
        self.assertTrue(os.path.exists(cachePath2))

        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 2)

        workflowName = "TestWorkload"
        dbname = config.TaskArchiver.workloadSummaryCouchDBName
        couchdb = CouchServer(config.JobStateMachine.couchurl)
        workdatabase = couchdb.connectDatabase(dbname)
        jobdb = couchdb.connectDatabase("%s/jobs" % self.databaseName)
        fwjrdb = couchdb.connectDatabase("%s/fwjrs" % self.databaseName)
        jobs = jobdb.loadView("JobDump",
                              "jobsByWorkflowName",
                              options={
                                  "startkey": [workflowName],
                                  "endkey": [workflowName, {}]
                              })['rows']
        fwjrdb.loadView("FWJRDump",
                        "fwjrsByWorkflowName",
                        options={
                            "startkey": [workflowName],
                            "endkey": [workflowName, {}]
                        })['rows']

        self.assertEqual(len(jobs), 2 * self.nJobs)

        from WMCore.WMBS.CreateWMBSBase import CreateWMBSBase
        create = CreateWMBSBase()
        tables = []
        for x in create.requiredTables:
            tables.append(x[2:])

        self.populateWorkflowWithCompleteStatus()
        testTaskArchiver = TaskArchiverPoller(config=config)
        testTaskArchiver.algorithm()

        cleanCouch = CleanCouchPoller(config=config)
        cleanCouch.setup()
        cleanCouch.algorithm()

        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_job")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_jobgroup")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_fileset")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_file_details")[0].fetchall()
        self.assertEqual(len(result), 0)

        # Make sure we deleted the directory
        self.assertFalse(os.path.exists(cachePath))
        self.assertFalse(
            os.path.exists(
                os.path.join(self.testDir, 'workloadTest/TestWorkload')))

        testWMBSFileset = Fileset(id=1)
        self.assertEqual(testWMBSFileset.exists(), False)

        workloadSummary = workdatabase.document(id="TestWorkload")
        # Check ACDC
        self.assertEqual(workloadSummary['ACDCServer'],
                         sanitizeURL(config.ACDC.couchurl)['url'])

        # Check the output
        self.assertEqual(workloadSummary['output'].keys(),
                         ['/Electron/MorePenguins-v0/RECO'])
        self.assertEqual(
            sorted(workloadSummary['output']['/Electron/MorePenguins-v0/RECO']
                   ['tasks']),
            ['/TestWorkload/ReReco', '/TestWorkload/ReReco/LogCollect'])
        # Check performance
        # Check histograms
        self.assertAlmostEqual(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['AvgEventTime']['histogram'][0]['average'],
            0.89405199999999996,
            places=2)
        self.assertEqual(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['AvgEventTime']['histogram'][0]['nEvents'], 10)

        # Check standard performance
        self.assertAlmostEqual(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['TotalJobCPU']['average'],
            17.786300000000001,
            places=2)
        self.assertAlmostEqual(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['TotalJobCPU']['stdDev'],
            0.0,
            places=2)

        # Check worstOffenders
        self.assertEqual(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['AvgEventTime']['worstOffenders'], [{
                'logCollect': None,
                'log': None,
                'value': '0.894052',
                'jobID': 1
            }, {
                'logCollect': None,
                'log': None,
                'value': '0.894052',
                'jobID': 1
            }, {
                'logCollect': None,
                'log': None,
                'value': '0.894052',
                'jobID': 2
            }])

        # Check retryData
        self.assertEqual(workloadSummary['retryData']['/TestWorkload/ReReco'],
                         {'1': 10})
        logCollectPFN = 'srm://srm-cms.cern.ch:8443/srm/managerv2?SFN=/castor/cern.ch/cms/store/logs/prod/2012/11/WMAgent/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8-AlcaSkimLogCollect-1-logs.tar'
        self.assertEqual(workloadSummary['logArchives'], {
            '/TestWorkload/ReReco/LogCollect':
            [logCollectPFN for _ in range(10)]
        })

        # LogCollect task is made out of identical FWJRs
        # assert that it is identical
        for x in workloadSummary['performance'][
                '/TestWorkload/ReReco/LogCollect']['cmsRun1'].keys():
            if x in config.TaskArchiver.histogramKeys:
                continue
            for y in ['average', 'stdDev']:
                self.assertAlmostEqual(
                    workloadSummary['performance']
                    ['/TestWorkload/ReReco/LogCollect']['cmsRun1'][x][y],
                    workloadSummary['performance']['/TestWorkload/ReReco']
                    ['cmsRun1'][x][y],
                    places=2)

        return