Пример #1
0
    def testF_LinearAlgo(self):
        """
        _testLinearAlgo_

        Test the linear algorithm to make sure it loads and works
        """

        testJobGroup = self.createTestJobGroup(nJobs=self.nJobs)

        config = self.getConfig()
        config.RetryManager.plugins = {'Processing': 'LinearAlgo'}
        config.RetryManager.section_("LinearAlgo")
        config.RetryManager.LinearAlgo.section_("Processing")
        config.RetryManager.LinearAlgo.Processing.coolOffTime = {
            'create': 10,
            'submit': 10,
            'job': 10
        }
        changer = ChangeState(config)
        changer.propagate(testJobGroup.jobs, 'created', 'new')
        changer.propagate(testJobGroup.jobs, 'submitfailed', 'created')
        changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed')
        changer.propagate(testJobGroup.jobs, 'created', 'submitcooloff')
        changer.propagate(testJobGroup.jobs, 'submitfailed', 'created')
        changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed')

        idList = self.getJobs.execute(state='SubmitCooloff')
        self.assertEqual(len(idList), self.nJobs)

        testRetryManager = RetryManagerPoller(config)
        testRetryManager.setup(None)

        for job in testJobGroup.jobs:
            self.setJobTime.execute(jobID=job["id"],
                                    stateTime=int(time.time()) - 5)

        testRetryManager.algorithm(None)
        idList = self.getJobs.execute(state='SubmitCooloff')
        self.assertEqual(len(idList), self.nJobs)

        for job in testJobGroup.jobs:
            self.setJobTime.execute(jobID=job["id"],
                                    stateTime=int(time.time()) - 12)

        testRetryManager.algorithm(None)

        idList = self.getJobs.execute(state='SubmitCooloff')
        self.assertEqual(len(idList), 0)

        idList = self.getJobs.execute(state='Created')
        self.assertEqual(len(idList), self.nJobs)

        return
Пример #2
0
    def __init__(self, config):
        """
        init jobCreator
        """

        BaseWorkerThread.__init__(self)

        myThread = threading.currentThread()

        # DAO factory for WMBS objects
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=logging,
                                     dbinterface=myThread.dbi)

        self.setBulkCache = self.daoFactory(classname="Jobs.SetCache")
        self.countJobs = self.daoFactory(
            classname="Jobs.GetNumberOfJobsPerWorkflow")
        self.subscriptionList = self.daoFactory(
            classname="Subscriptions.ListIncomplete")
        self.setFWJRPath = self.daoFactory(classname="Jobs.SetFWJRPath")

        # information
        self.config = config

        # Variables
        self.defaultJobType = config.JobCreator.defaultJobType
        self.limit = getattr(config.JobCreator, 'fileLoadLimit', 500)
        self.agentNumber = int(getattr(config.Agent, 'agentNumber', 0))
        self.agentName = getattr(config.Agent, 'hostName', '')
        self.glideinLimits = getattr(config.JobCreator, 'GlideInRestriction',
                                     None)

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName="JobCreator")

        try:
            self.jobCacheDir = getattr(
                config.JobCreator, 'jobCacheDir',
                os.path.join(config.JobCreator.componentDir, 'jobCacheDir'))
            self.check()
        except WMException:
            raise
        except Exception as ex:
            msg = "Unhandled exception while setting up jobCacheDir!\n"
            msg += str(ex)
            logging.error(msg)
            self.sendAlert(6, msg=msg)
            raise JobCreatorException(msg)

        self.changeState = ChangeState(self.config)

        return
Пример #3
0
    def testD_Exhausted(self):
        """
        _testExhausted_

        Test that the system can exhaust jobs correctly
        """
        workloadName = 'TestWorkload'

        self.createWorkload(workloadName=workloadName)
        workloadPath = os.path.join(self.testDir, 'workloadTest', workloadName,
                                    'WMSandbox', 'WMWorkload.pkl')

        testJobGroup = self.createTestJobGroup(nJobs=self.nJobs, retry_count=5,
                                               workloadPath=workloadPath)

        config = self.getConfig()
        config.ErrorHandler.maxRetries = 1
        changer = ChangeState(config)
        changer.propagate(testJobGroup.jobs, 'created', 'new')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')
        changer.propagate(testJobGroup.jobs, 'complete', 'executing')
        changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete')

        testSubscription = Subscription(id=1)  # You should only have one
        testSubscription.load()
        testSubscription.loadData()

        # Do we have files to start with?
        self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 2)


        testErrorHandler = ErrorHandlerPoller(config)
        # set reqAuxDB None for the test,
        testErrorHandler.reqAuxDB = None
        testErrorHandler.setup(None)
        testErrorHandler.algorithm(None)


        idList = self.getJobs.execute(state='JobFailed')
        self.assertEqual(len(idList), 0)

        idList = self.getJobs.execute(state='JobCooloff')
        self.assertEqual(len(idList), 0)

        idList = self.getJobs.execute(state='Exhausted')
        self.assertEqual(len(idList), self.nJobs)



        # Did we fail the files?
        self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 0)
        self.assertEqual(len(testSubscription.filesOfStatus("Failed")), 2)
Пример #4
0
    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules=["WMCore.WMBS"])

        self.splitterFactory = SplitterFactory(package="WMCore.JobSplitting")

        myThread = threading.currentThread()
        self.myThread = myThread
        daoFactory = DAOFactory(package="WMCore.WMBS",
                                logger=logging,
                                dbinterface=myThread.dbi)
        self.WMBSFactory = daoFactory

        config = self.getConfig()
        self.changer = ChangeState(config)

        myResourceControl = ResourceControl()
        myResourceControl.insertSite("SomeSite", 10, 20, "SomeSE", "SomeCE")
        myResourceControl.insertSite("SomeSite", 10, 20, "SomeSE2", "SomeCE")
        myResourceControl.insertSite("SomeSite2", 10, 20, "SomeSE3", "SomeCE2")

        self.fileset1 = Fileset(name="TestFileset1")
        for file in range(11):
            newFile = File("/some/file/name%d" % file, size=1000, events=100)
            newFile.addRun(Run(1, *[1]))
            newFile.setLocation('SomeSE')
            self.fileset1.addFile(newFile)

        self.fileset1.create()

        workflow1 = Workflow(spec="spec.xml",
                             owner="hufnagel",
                             name="TestWorkflow1",
                             task="Test")
        workflow1.create()

        self.subscription1 = Subscription(fileset=self.fileset1,
                                          workflow=workflow1,
                                          split_algo="Harvest",
                                          type="Harvesting")

        self.subscription1.create()
        self.configFile = EmulatorSetup.setupWMAgentConfig()

        return
Пример #5
0
    def testF_WMSMode(self):
        """
        _WMSMode_

        Try running things in WMS Mode.
        """

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        config = self.getConfig()
        config.BossAir.pluginName = 'PyCondorPlugin'
        config.BossAir.submitWMSMode = True

        baAPI  = BossAirAPI(config=config)

        workload = self.createTestWorkload()

        workloadName = "basicWorkload"

        changeState = ChangeState(config)

        nSubs = 5
        nJobs = 10

        dummycacheDir = os.path.join(self.testDir, 'CacheDir')

        jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=os.path.join(self.testDir,
                                                                      'workloadTest',
                                                                      workloadName),
                                            site=None)
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')


        jobSubmitter = JobSubmitterPoller(config=config)

        jobSubmitter.algorithm()

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nSubs * nJobs)

        baAPI.track()
        idleJobs = baAPI._loadByStatus(status='Idle')

        baAPI.kill(jobs=idleJobs)

        del jobSubmitter

        return
Пример #6
0
    def testF_PollerProfileTest(self):
        """
        _testF_PollerProfileTest_

        Submit a lot of jobs and test how long it takes for
        them to actually be submitted
        """

        workloadName = "basicWorkload"
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 100
        nJobs = 100
        site = "T1_US_FNAL"

        self.setResourceThresholds(site, pendingSlots = 20000, runningSlots = -1, tasks = ['Processing', 'Merge'],
                                   Processing = {'pendingSlots' : 10000, 'runningSlots' :-1},
                                   Merge = {'pendingSlots' : 10000, 'runningSlots' :-1, 'priority' : 5})

        # Always initialize the submitter after setting the sites, flaky!
        JobSubmitterPoller(config = config)

        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                        workloadName),
                                            site = site)

        jobGroupList.extend(self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                        workloadName),
                                            site = site,
                                            taskType = 'Merge'))

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Actually run it
        startTime = time.time()
        cProfile.runctx("JobSubmitterPoller(config=config).algorithm()", globals(), locals(), filename="testStats.stat")
        stopTime = time.time()

        print("Job took %f seconds to complete" % (stopTime - startTime))

        p = pstats.Stats('testStats.stat')
        p.sort_stats('cumulative')
        p.print_stats()

        return
Пример #7
0
    def testZ_Profile(self):
        """
        _testProfile_

        Do a full profile of the poller
        """

        nJobs = 100
        workloadName = 'TestWorkload'
        self.createWorkload(workloadName=workloadName)
        workloadPath = os.path.join(self.testDir, 'workloadTest', workloadName,
                                    'WMSandbox', 'WMWorkload.pkl')

        testJobGroup = self.createTestJobGroup(nJobs=nJobs,
                                               workloadPath=workloadPath)

        config = self.getConfig()
        changer = ChangeState(config)
        changer.propagate(testJobGroup.jobs, 'created', 'new')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')
        changer.propagate(testJobGroup.jobs, 'complete', 'executing')
        changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete')

        idList = self.getJobs.execute(state='JobFailed')
        self.assertEqual(len(idList), nJobs)

        testErrorHandler = ErrorHandlerPoller(config)
        testErrorHandler.setup(None)
        startTime = time.time()
        cProfile.runctx("testErrorHandler.algorithm()",
                        globals(),
                        locals(),
                        filename="profStats.stat")
        stopTime = time.time()

        idList = self.getJobs.execute(state='CreateFailed')
        self.assertEqual(len(idList), 0)

        idList = self.getJobs.execute(state='JobFailed')
        self.assertEqual(len(idList), 0)

        idList = self.getJobs.execute(state='JobCooloff')
        self.assertEqual(len(idList), nJobs)

        print("Took %f seconds to run polling algo" % (stopTime - startTime))

        p = pstats.Stats('profStats.stat')
        p.sort_stats('cumulative')
        p.print_stats(0.2)

        return
Пример #8
0
    def __init__(self, config, insertStates=False):
        """
        __init__

        BossAir should work with the standard config
        structure of WMAgent
        """

        WMConnectionBase.__init__(self, daoPackage="WMCore.BossAir")

        myThread = threading.currentThread()

        self.config = config
        self.plugins = {}
        self.states = []

        self.jobs = []

        self.pluginDir = config.BossAir.pluginDir
        # This is the default state jobs are created in
        self.newState = getattr(config.BossAir, 'newState', 'New')

        # Get any proxy info
        self.checkProxy = getattr(config.BossAir, 'checkProxy', False)
        self.cert = getattr(config.BossAir, 'cert', None)

        self.stateMachine = ChangeState(self.config)

        # Create a factory to load plugins
        self.pluginFactory = WMFactory("plugins", self.pluginDir)

        self.daoFactory = DAOFactory(package="WMCore.BossAir",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)

        self.deleteDAO = self.daoFactory(classname="DeleteJobs")
        self.stateDAO = self.daoFactory(classname="NewState")
        self.loadByWMBSDAO = self.daoFactory(classname="LoadByWMBSID")
        self.updateDAO = self.daoFactory(classname="UpdateJobs")
        self.newJobDAO = self.daoFactory(classname="NewJobs")
        self.runningJobDAO = self.daoFactory(classname="LoadRunning")
        self.completeJobDAO = self.daoFactory(classname="LoadComplete")
        self.loadJobsDAO = self.daoFactory(classname="LoadByStatus")
        self.completeDAO = self.daoFactory(classname="CompleteJob")
        self.monitorDAO = self.daoFactory(classname="JobStatusForMonitoring")

        self.states = None
        self.loadPlugin(insertStates)

        return
Пример #9
0
    def testUpdateFailedDoc(self):
        """
        _testUpdateFailedDoc_

        Verify that the update function will work correctly and not throw a 500
        error if the doc didn't make it into the database for some reason.
        """
        change = ChangeState(self.config, "changestate_t")

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute("site1", seName="somese.cern.ch")

        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task=self.taskName)
        testWorkflow.create()
        testFileset = Fileset(name="TestFileset")
        testFileset.create()
        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow,
                                        split_algo="FileBased")
        testSubscription.create()

        testFileA = File(lfn="SomeLFNA",
                         events=1024,
                         size=2048,
                         locations=set(["somese.cern.ch"]))
        testFileA.create()
        testFileset.addFile(testFileA)
        testFileset.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroup = jobFactory(files_per_job=1)[0]

        testJobA = jobGroup.jobs[0]
        testJobA["user"] = "******"
        testJobA["group"] = "DMWM"
        testJobA["taskType"] = "Merge"
        testJobA["couch_record"] = str(testJobA["id"])

        change.propagate([testJobA], "new", "none")
        testJobADoc = change.jobsdatabase.document(testJobA["couch_record"])

        self.assertTrue(testJobADoc.has_key("states"))
        self.assertTrue(testJobADoc["states"].has_key("1"))
        return
Пример #10
0
    def testT_updateJobInfo(self):
        """
        _updateJobInfo_

        Test the updateSiteInformation method from PyCondorPlugin.py
        """

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        config = self.getConfig()
        config.BossAir.pluginName = 'PyCondorPlugin'
        config.BossAir.submitWMSMode = True

        baAPI  = BossAirAPI(config=config)
        workload = self.createTestWorkload()
        workloadName = "basicWorkload"
        changeState = ChangeState(config)

        nSubs = 1
        nJobs = 2
        dummycacheDir = os.path.join(self.testDir, 'CacheDir')
        jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=os.path.join(self.testDir,
                                                                      'workloadTest',
                                                                      workloadName),
                                            site="se.T2_US_UCSD")
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')
        jobSubmitter = JobSubmitterPoller(config=config)
        jobSubmitter.algorithm()
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nSubs * nJobs)

        baAPI.track()
        idleJobs = baAPI._loadByStatus(status='Idle')

        ##
        # Make one of the sites in the sitelist to be True for ABORTED/DRAINING/DOWN
        # updateSiteInformation() method should edit the classAd for all the jobs
        # that are bound for the site
        # Check the Q manually using condor_q -l <job id>
        #
        jtok = baAPI.updateSiteInformation(idleJobs, "T2_US_UCSD", True)
        if jtok != None:
            baAPI.kill(jtok, errorCode=71301)  # errorCode can be either 71301/71302/71303 (Aborted/Draining/Down)

        return
Пример #11
0
    def __init__(self, config):
        """
        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.config = config
        self.changeState = ChangeState(self.config)

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)
        self.loadAction = self.daoFactory(
            classname="Jobs.LoadFromIDWithWorkflow")

        # Variables
        self.numberOfJobsToCluster = getattr(self.config.JobArchiver,
                                             "numberOfJobsToCluster", 1000)
        self.numberOfJobsToArchive = getattr(self.config.JobArchiver,
                                             "numberOfJobsToArchive", 10000)

        # initialize the alert framework (if available)
        self.initAlerts(compName="JobArchiver")

        try:
            self.logDir = getattr(
                config.JobArchiver, 'logDir',
                os.path.join(config.JobArchiver.componentDir, 'logDir'))
            if not os.path.isdir(self.logDir):
                os.makedirs(self.logDir)
        except Exception as ex:
            msg = "Unhandled exception while setting up logDir!\n"
            msg += str(ex)
            logging.exception(msg)
            raise JobArchiverPollerException(msg)

        try:
            self.workQueue = queueFromConfig(self.config)
        except Exception as ex:
            msg = "Could not load workQueue"
            msg += str(ex)
            logging.error(msg)
            # raise JobArchiverPollerException(msg)

        self.handleWorkflowInjection = getattr(self.config.JobArchiver,
                                               'handleInjected', True)

        return
Пример #12
0
    def testB_SpeedTest(self):
        """
        _SpeedTest_

        Tests the components, as in sees if they load.
        Otherwise does nothing.
        """
        return
        myThread = threading.currentThread()

        config = self.getConfig()

        self.nJobs = 2000

        testJobGroup = self.createTestJobGroup()

        changer = ChangeState(config)

        cacheDir = os.path.join(self.testDir, 'test')

        for job in testJobGroup.jobs:
            job["outcome"] = "success"
            job.save()
            path = os.path.join(cacheDir, job['name'])
            os.makedirs(path)
            f = open('%s/%s.out' %(path, job['name']),'w')
            f.write(job['name'])
            f.close()
            job.setCache(path)

        changer.propagate(testJobGroup.jobs, 'created', 'new')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')
        changer.propagate(testJobGroup.jobs, 'complete', 'executing')
        changer.propagate(testJobGroup.jobs, 'success', 'complete')




        testJobArchiver = JobArchiverPoller(config = config)
        cProfile.runctx("testJobArchiver.algorithm()", globals(), locals(), filename = "testStats.stat")


        p = pstats.Stats('testStats.stat')
        p.sort_stats('cumulative')
        p.print_stats(.2)

        return
Пример #13
0
    def __init__(self, config):
        """
        Initialise class members
        """

        BaseWorkerThread.__init__(self)
        self.config = config

        myThread = threading.currentThread()
        self.changeState = ChangeState(self.config)
        self.bossAir = BossAirAPI(config=config)
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)

        self.jobListAction = self.daoFactory(classname="Jobs.GetAllJobs")
        self.setFWJRAction = self.daoFactory(classname="Jobs.SetFWJRPath")
Пример #14
0
    def testCheck(self):
        """
        This is the test class for function Check from module ChangeState
        """
        change = ChangeState(self.config, "changestate_t")

        # Run through all good state transitions and assert that they work
        for state in self.transitions.keys():
            for dest in self.transitions[state]:
                change.check(dest, state)
        dummystates = ['dummy1', 'dummy2', 'dummy3', 'dummy4']

        # Then run through some bad state transistions and assertRaises(AssertionError)
        for state in self.transitions.keys():
            for dest in dummystates:
                self.assertRaises(AssertionError, change.check, dest, state)
        return
Пример #15
0
    def __init__(self, config):
        """
        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.config = config

        myThread = threading.currentThread()

        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)
        self.changeState = ChangeState(self.config)

        self.maxRetries = self.config.ErrorHandler.maxRetries
        if type(self.maxRetries) != dict:
            self.maxRetries = {'default': self.maxRetries}
        if 'default' not in self.maxRetries:
            raise ErrorHandlerException(
                'Max retries for the default job type must be specified')

        self.maxProcessSize = getattr(self.config.ErrorHandler,
                                      'maxProcessSize', 250)
        self.exitCodes = getattr(self.config.ErrorHandler, 'failureExitCodes',
                                 [])
        self.maxFailTime = getattr(self.config.ErrorHandler, 'maxFailTime',
                                   32 * 3600)
        self.readFWJR = getattr(self.config.ErrorHandler, 'readFWJR', False)
        self.passCodes = getattr(self.config.ErrorHandler, 'passExitCodes', [])

        self.getJobs = self.daoFactory(classname="Jobs.GetAllJobs")
        self.idLoad = self.daoFactory(classname="Jobs.LoadFromIDWithType")
        self.loadAction = self.daoFactory(classname="Jobs.LoadForErrorHandler")

        self.dataCollection = DataCollectionService(
            url=config.ACDC.couchurl, database=config.ACDC.database)

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName="ErrorHandler")

        # Some exit codes imply an immediate failure, non-configurable
        self.exitCodes.extend(WMJobPermanentSystemErrors)

        return
Пример #16
0
    def __init__(self, config):
        """
        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.config = config

        myThread = threading.currentThread()

        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)
        self.changeState = ChangeState(self.config)

        if hasattr(self.config, "Tier0Feeder"):
            self.reqAuxDB = None
            self.maxRetries = self.config.ErrorHandler.maxRetries
        else:
            self.reqAuxDB = ReqMgrAux(self.config.General.ReqMgr2ServiceURL)
            self.maxRetries = self.reqAuxDB.getWMAgentConfig(
                self.config.Agent.hostName).get("MaxRetries")

        if not isinstance(self.maxRetries, dict):
            self.maxRetries = {'default': self.maxRetries}
        if 'default' not in self.maxRetries:
            raise ErrorHandlerException(
                'Max retries for the default job type must be specified')

        self.exitCodesNoRetry = []
        self.maxProcessSize = getattr(self.config.ErrorHandler,
                                      'maxProcessSize', 250)
        self.maxFailTime = getattr(self.config.ErrorHandler, 'maxFailTime',
                                   32 * 3600)
        self.readFWJR = getattr(self.config.ErrorHandler, 'readFWJR', False)
        self.passCodes = getattr(self.config.ErrorHandler, 'passExitCodes', [])

        self.getJobs = self.daoFactory(classname="Jobs.GetAllJobs")
        self.idLoad = self.daoFactory(classname="Jobs.LoadFromIDWithType")
        self.loadAction = self.daoFactory(classname="Jobs.LoadForErrorHandler")

        self.dataCollection = DataCollectionService(
            url=config.ACDC.couchurl, database=config.ACDC.database)

        return
Пример #17
0
    def __init__(self, config):
        """
        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.config = config
        self.changeState = ChangeState(self.config)

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)
        self.loadAction = self.daoFactory(
            classname="Jobs.LoadFromIDWithWorkflow")

        # Variables
        self.numberOfJobsToCluster = getattr(self.config.JobArchiver,
                                             "numberOfJobsToCluster", 1000)

        # initialize the alert framework (if available)
        self.initAlerts(compName="JobArchiver")

        try:
            self.uploadPublishDir = getattr(
                self.config.JobArchiver, 'uploadPublishDir',
                os.path.join(config.JobArchiver.componentDir, 'publishDir'))
            self.logDir = getattr(
                config.JobArchiver, 'logDir',
                os.path.join(config.JobArchiver.componentDir, 'logDir'))
            if not os.path.isdir(self.logDir):
                os.makedirs(self.logDir)
            if not os.path.isdir(self.uploadPublishDir):
                os.makedirs(self.uploadPublishDir)
        except Exception, ex:
            msg = "Unhandled exception while setting up logDir and/or uploadPublishDir!\n"
            msg += str(ex)
            logging.error(msg)
            self.sendAlert(6, msg=msg)
            try:
                logging.debug("Directory: %s" % self.logDir)
                logging.debug("Config: %s" % config)
            except:
                pass
            raise JobArchiverPollerException(msg)
Пример #18
0
    def __init__(self, config):
        """
        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.config = config

        myThread = threading.currentThread()

        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)

        pluginPath = getattr(self.config.RetryManager, "pluginPath",
                             "WMComponent.RetryManager.PlugIns")
        self.pluginFactory = WMFactory("plugins", pluginPath)

        self.changeState = ChangeState(self.config)
        self.getJobs = self.daoFactory(classname="Jobs.GetAllJobs")

        # initialize the alert framework (if available) (self.sendAlert())
        self.initAlerts(compName="RetryManager")

        # get needed plugins
        self.plugins = {}

        self.typePluginsAssoc = getattr(self.config.RetryManager, 'plugins',
                                        {})
        self.typePluginsAssoc.setdefault('default', 'DefaultRetryAlgo')

        for pluginName in self.typePluginsAssoc.values():
            try:
                plugin = self.pluginFactory.loadObject(classname=pluginName,
                                                       args=config)
                self.plugins[pluginName] = plugin
            except Exception as ex:
                msg = "Error loading plugin %s on path %s\n" % (pluginName,
                                                                pluginPath)
                msg += str(ex)
                logging.error(msg)
                self.sendAlert(6, msg=msg)
                raise RetryManagerException(msg)

        return
Пример #19
0
    def testC_Job(self):
        """
        WMComponent_t.RetryManager_t.RetryManager_t:testJob()

        Mimics creation of component and test jobs failed in create stage.
        """
        testJobGroup = self.createTestJobGroup(nJobs=self.nJobs)

        config = self.getConfig()
        changer = ChangeState(config)
        changer.propagate(testJobGroup.jobs, 'created', 'new')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')
        changer.propagate(testJobGroup.jobs, 'complete', 'executing')
        changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete')
        changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed')

        idList = self.getJobs.execute(state='JobCooloff')
        self.assertEqual(len(idList), self.nJobs)

        testRetryManager = RetryManagerPoller(config)
        testRetryManager.setup(None)

        for job in testJobGroup.jobs:
            self.setJobTime.execute(jobID=job["id"],
                                    stateTime=int(time.time()) - 50)

        testRetryManager.algorithm(None)

        idList = self.getJobs.execute(state='JobCooloff')
        self.assertEqual(len(idList), self.nJobs)

        for job in testJobGroup.jobs:
            self.setJobTime.execute(jobID=job["id"],
                                    stateTime=int(time.time()) - 150)

        testRetryManager.algorithm(None)

        idList = self.getJobs.execute(state='JobCooloff')
        self.assertEqual(len(idList), 0)

        idList = self.getJobs.execute(state='Created')
        self.assertEqual(len(idList), self.nJobs)
        return
Пример #20
0
    def testZ_Profile(self):
        """
        _testProfile_

        Do a full profile of the poller
        """

        return

        import cProfile, pstats

        nJobs = 1000

        testJobGroup = self.createTestJobGroup(nJobs=nJobs)

        config = self.getConfig()
        changer = ChangeState(config)
        changer.propagate(testJobGroup.jobs, 'createfailed', 'new')

        idList = self.getJobs.execute(state='CreateFailed')
        self.assertEqual(len(idList), nJobs)

        testErrorHandler = ErrorHandlerPoller(config)
        testErrorHandler.setup(None)
        startTime = time.time()
        #cProfile.runctx("testErrorHandler.algorithm()", globals(), locals(), filename = "profStats.stat")
        testErrorHandler.algorithm()
        stopTime = time.time()

        idList = self.getJobs.execute(state='CreateFailed')
        self.assertEqual(len(idList), 0)

        idList = self.getJobs.execute(state='CreateCooloff')
        self.assertEqual(len(idList), nJobs)

        print("Took %f seconds to run polling algo" % (stopTime - startTime))

        p = pstats.Stats('profStats.stat')
        p.sort_stats('cumulative')
        p.print_stats(0.2)

        return
Пример #21
0
    def __init__(self, config):
        """
        Initialise class members
        """

        BaseWorkerThread.__init__(self)
        self.config = config

        myThread = threading.currentThread()

        self.changeState = ChangeState(self.config)
        self.bossAir = BossAirAPI(config=config)
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)

        self.jobListAction = self.daoFactory(classname="Jobs.GetAllJobs")

        # initialize the alert framework (if available)
        self.initAlerts(compName="JobTracker")
Пример #22
0
def thrashCouch():
    """
    _thrashCouch_

    """
    jobs = {
        "new": set(),
        "created": set(),
        "executing": set(),
        "complete": set(),
        "success": set(),
        "cleanout": set()
    }

    config = loadConfigurationFile(os.environ["WMAGENT_CONFIG"])
    changeState = ChangeState(config)

    myReport = Report()
    myReport.unpersist(
        os.path.join(
            getWMBASE(),
            "test/python/WMComponent_t/JobAccountant_t/fwjrs/LoadTest00.pkl"))

    for i in range(500):
        jobs = createJobs()
        changeState.recordInCouch(jobs, "created", "new")
        changeState.recordInCouch(jobs, "executing", "created")
        changeState.recordInCouch(jobs, "complete", "executing")

        for job in jobs:
            job["fwjr"] = myReport

        changeState.recordInCouch(jobs, "success", "complete")

        for job in jobs:
            job["fwjr"] = None

        changeState.recordInCouch(jobs, "cleanout", "success")
        #time.sleep(10)
    return
Пример #23
0
    def testG_monitoringDAO(self):
        """
        _monitoringDAO_

        Because I need a test for the monitoring DAO
        """

        return

        myThread = threading.currentThread()

        config = self.getConfig()

        changeState = ChangeState(config)

        baAPI = BossAirAPI(config=config)

        # Create some jobs
        nJobs = 10

        jobDummies = self.createDummyJobs(nJobs=nJobs)

        # Prior to building the job, each job must have a plugin
        # and user assigned
        for job in jobDummies:
            job['plugin'] = 'TestPlugin'
            job['owner'] = 'tapas'
            job['location'] = 'T2_US_UCSD'
            job.save()

        baAPI.submit(jobs=jobDummies)

        results = baAPI.monitor()

        self.assertEqual(len(results), nJobs)
        for job in results:
            self.assertEqual(job['plugin'], 'CondorPlugin')

        return
Пример #24
0
    def __init__(self, config):
        """
        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.config = config

        myThread = threading.currentThread()

        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)
        self.changeState = ChangeState(self.config)

        if hasattr(self.config, "Tier0Feeder"):
            self.reqAuxDB = None
            self.maxRetries = self.config.ErrorHandler.maxRetries
        else:
            self.reqAuxDB = ReqMgrAux(self.config.General.ReqMgr2ServiceURL)

        self.exitCodesNoRetry = []
        self.maxProcessSize = getattr(self.config.ErrorHandler,
                                      'maxProcessSize', 250)
        self.maxFailTime = getattr(self.config.ErrorHandler, 'maxFailTime',
                                   32 * 3600)
        self.readFWJR = getattr(self.config.ErrorHandler, 'readFWJR', False)
        self.passCodes = getattr(self.config.ErrorHandler, 'passExitCodes', [])

        self.getJobs = self.daoFactory(classname="Jobs.GetAllJobs")
        self.idLoad = self.daoFactory(classname="Jobs.LoadFromIDWithType")
        self.loadAction = self.daoFactory(classname="Jobs.LoadForErrorHandler")

        self.dataCollection = DataCollectionService(
            url=config.ACDC.couchurl, database=config.ACDC.database)

        self.setupComponentParam()

        return
Пример #25
0
    def setUp(self):
        myThread = threading.currentThread()

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=["WMCore.WMBS"],
                                useDefault=False)
        self.databaseName = "couchapp_t_0"

        # Setup config for couch connections
        config = self.testInit.getConfiguration()

        self.testInit.setupCouch(self.databaseName, "WorkloadSummary")
        self.testInit.setupCouch(
            "%s/jobs" % config.JobStateMachine.couchDBName, "JobDump")
        self.testInit.setupCouch(
            "%s/fwjrs" % config.JobStateMachine.couchDBName, "FWJRDump")
        self.testInit.setupCouch(config.JobStateMachine.summaryStatsDBName,
                                 "SummaryStats")

        # Create couch server and connect to databases
        self.couchdb = CouchServer(config.JobStateMachine.couchurl)
        self.jobsdatabase = self.couchdb.connectDatabase(
            "%s/jobs" % config.JobStateMachine.couchDBName)
        self.fwjrdatabase = self.couchdb.connectDatabase(
            "%s/fwjrs" % config.JobStateMachine.couchDBName)
        self.statsumdatabase = self.couchdb.connectDatabase(
            config.JobStateMachine.summaryStatsDBName)

        # Create changeState
        self.changeState = ChangeState(config)
        self.config = config

        # Create testDir
        self.testDir = self.testInit.generateWorkDir()

        return
Пример #26
0
    def testC_Jobs(self):
        """
        WMComponent_t.ErrorHandler_t.ErrorHandler_t.testJobs()

        Mimics creation of component and test jobs failed in execute stage.
        """
        workloadName = 'TestWorkload'

        self.createWorkload(workloadName=workloadName)
        workloadPath = os.path.join(self.testDir, 'workloadTest', workloadName,
                                    'WMSandbox', 'WMWorkload.pkl')

        testJobGroup = self.createTestJobGroup(nJobs=self.nJobs,
                                               workloadPath=workloadPath)

        config = self.getConfig()
        changer = ChangeState(config)
        changer.propagate(testJobGroup.jobs, 'created', 'new')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')
        changer.propagate(testJobGroup.jobs, 'complete', 'executing')
        changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete')

        idList = self.getJobs.execute(state='JobFailed')
        self.assertEqual(len(idList), self.nJobs)

        testErrorHandler = ErrorHandlerPoller(config)
        # set reqAuxDB None for the test,
        testErrorHandler.reqAuxDB = None
        testErrorHandler.setup(None)
        testErrorHandler.algorithm(None)

        idList = self.getJobs.execute(state='JobFailed')
        self.assertEqual(len(idList), 0)

        idList = self.getJobs.execute(state='JobCooloff')
        self.assertEqual(len(idList), self.nJobs)
        return
Пример #27
0
    def __init__(self, **configDict):
        """
        init jobCreator
        """

        myThread = threading.currentThread()

        self.transaction = myThread.transaction

        #DAO factory for WMBS objects
        self.daoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging,
                                     dbinterface = myThread.dbi)

        # WMCore splitter factory for splitting up jobs.
        self.splitterFactory = SplitterFactory()

        config = Configuration()
        config.section_("JobStateMachine")
        config.JobStateMachine.couchurl      = configDict["couchURL"]
        config.JobStateMachine.couch_retries = configDict["defaultRetries"]
        config.JobStateMachine.couchDBName   = configDict["couchDBName"]

        self.config = config

        #Variables
        self.jobCacheDir    = configDict['jobCacheDir']
        self.defaultJobType = configDict['defaultJobType']
        self.limit          = configDict.get('fileLoadLimit', 500)



        self.createWorkArea  = CreateWorkArea()

        self.changeState = ChangeState(self.config)

        return
Пример #28
0
    def testB_PluginTest(self):
        """
        _PluginTest_


        Now check that these functions worked if called through plugins
        Instead of directly.

        There are only three plugin
        """
        #return

        myThread = threading.currentThread()

        config = self.getConfig()

        baAPI = BossAirAPI(config=config)

        # Create some jobs
        nJobs = 10

        jobDummies = self.createDummyJobs(nJobs=nJobs, location='Xanadu')
        changeState = ChangeState(config)
        changeState.propagate(jobDummies, 'created', 'new')
        changeState.propagate(jobDummies, 'executing', 'created')

        # Prior to building the job, each job must have a plugin
        # and user assigned
        for job in jobDummies:
            job['plugin'] = 'TestPlugin'
            job['owner'] = 'tapas'

        baAPI.submit(jobs=jobDummies)

        newJobs = baAPI._loadByStatus(status='New')
        self.assertEqual(len(newJobs), nJobs)

        # Should be no more running jobs
        runningJobs = baAPI._listRunJobs()
        self.assertEqual(len(runningJobs), nJobs)

        # Test Plugin should complete all jobs
        baAPI.track()

        # Should be no more running jobs
        runningJobs = baAPI._listRunJobs()
        self.assertEqual(len(runningJobs), 0)

        # Check if they're complete
        completeJobs = baAPI.getComplete()
        self.assertEqual(len(completeJobs), nJobs)

        # Do this test because BossAir is specifically built
        # to keep it from finding completed jobs
        result = myThread.dbi.processData(
            "SELECT id FROM bl_runjob")[0].fetchall()
        self.assertEqual(len(result), nJobs)

        baAPI.removeComplete(jobs=jobDummies)

        result = myThread.dbi.processData(
            "SELECT id FROM bl_runjob")[0].fetchall()
        self.assertEqual(len(result), 0)

        return
Пример #29
0
    def testY_MultipleIterations(self):
        """
        _MultipleIterations_

        Paranoia based check to see if I'm saving class instances correctly
        """

        testJobGroup = self.createTestJobGroup(nJobs=self.nJobs)

        config = self.getConfig()
        changer = ChangeState(config)
        changer.propagate(testJobGroup.jobs, 'submitfailed', 'Created')
        changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed')

        idList = self.getJobs.execute(state='SubmitCooloff')
        self.assertEqual(len(idList), self.nJobs)

        testRetryManager = RetryManagerPoller(config)
        testRetryManager.setup(None)

        for job in testJobGroup.jobs:
            self.setJobTime.execute(jobID=job["id"],
                                    stateTime=int(time.time()) - 50)

        testRetryManager.algorithm(None)
        idList = self.getJobs.execute(state='SubmitCooloff')
        self.assertEqual(len(idList), self.nJobs)

        for job in testJobGroup.jobs:
            self.setJobTime.execute(jobID=job["id"],
                                    stateTime=int(time.time()) - 150)

        testRetryManager.algorithm(None)

        idList = self.getJobs.execute(state='SubmitCooloff')
        self.assertEqual(len(idList), 0)

        idList = self.getJobs.execute(state='Created')
        self.assertEqual(len(idList), self.nJobs)

        # Make a new jobGroup for a second run
        testJobGroup = self.createTestJobGroup(nJobs=self.nJobs)

        # Set job state
        changer.propagate(testJobGroup.jobs, 'submitfailed', 'created')
        changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed')

        # Set them to go off
        for job in testJobGroup.jobs:
            self.setJobTime.execute(jobID=job["id"],
                                    stateTime=int(time.time()) - 200)

        testRetryManager.algorithm(None)

        idList = self.getJobs.execute(state='SubmitCooloff')
        self.assertEqual(len(idList), 0)

        idList = self.getJobs.execute(state='Created')
        self.assertEqual(len(idList), self.nJobs * 2)

        return
Пример #30
0
    def testI_MultipleJobTypes(self):
        """
        _testI_MultipleJobTypes_

        Check that we can configure different retry algorithms for different
        job types, including a default for nonspecified types.
        Also check that two job types can share the same retry algorithm
        but with different parameters
        """

        # Let's create 4 job groups
        processingJobGroup = self.createTestJobGroup(nJobs=10, retryOnce=True)
        productionJobGroup = self.createTestJobGroup(nJobs=15,
                                                     subType="Production",
                                                     retryOnce=True)
        mergeJobGroup = self.createTestJobGroup(nJobs=20,
                                                subType="Merge",
                                                retryOnce=True)
        skimJobGroup = self.createTestJobGroup(nJobs=5,
                                               subType="Skim",
                                               retryOnce=True)

        # Set an adequate config
        # Processing jobs get the PauseAlgo with pauseCount 4
        # Production jobs get the ExponentialAlgo
        # Merge jobs get the PauseAlgo but with pauseCount 2 which is the default
        # Skim jobs are not configured, so they get the default SquaredAlgo
        config = self.getConfig()
        config.RetryManager.plugins = {
            'Processing': 'PauseAlgo',
            'Production': 'ExponentialAlgo',
            'Merge': 'PauseAlgo',
            'default': 'SquaredAlgo'
        }
        config.RetryManager.section_("PauseAlgo")
        config.RetryManager.PauseAlgo.section_("Processing")
        config.RetryManager.PauseAlgo.Processing.coolOffTime = {
            'create': 30,
            'submit': 30,
            'job': 30
        }
        config.RetryManager.PauseAlgo.Processing.pauseCount = 4
        config.RetryManager.PauseAlgo.section_("default")
        config.RetryManager.PauseAlgo.default.coolOffTime = {
            'create': 60,
            'submit': 60,
            'job': 60
        }
        config.RetryManager.PauseAlgo.default.pauseCount = 2
        config.RetryManager.section_("ExponentialAlgo")
        config.RetryManager.ExponentialAlgo.section_("Production")
        config.RetryManager.ExponentialAlgo.Production.coolOffTime = {
            'create': 30,
            'submit': 30,
            'job': 30
        }
        config.RetryManager.ExponentialAlgo.section_("default")
        config.RetryManager.ExponentialAlgo.default.coolOffTime = {
            'create': 60,
            'submit': 60,
            'job': 60
        }
        config.RetryManager.section_("SquaredAlgo")
        config.RetryManager.SquaredAlgo.section_("Skim")
        config.RetryManager.SquaredAlgo.Skim.coolOffTime = {
            'create': 30,
            'submit': 30,
            'job': 30
        }
        config.RetryManager.SquaredAlgo.section_("default")
        config.RetryManager.SquaredAlgo.default.coolOffTime = {
            'create': 60,
            'submit': 60,
            'job': 60
        }

        # Start the state changer and RetryManager
        changer = ChangeState(config)
        testRetryManager = RetryManagerPoller(config)
        testRetryManager.setup(None)

        # Create the jobs for the first time
        changer.propagate(processingJobGroup.jobs, 'created', 'new')

        # Let's start with the processing jobs and the pauseAlgo
        for count in range(1, 5):
            # Fail the jobs
            changer.propagate(processingJobGroup.jobs, 'executing', 'created')
            changer.propagate(processingJobGroup.jobs, 'jobfailed',
                              'executing')
            changer.propagate(processingJobGroup.jobs, 'jobcooloff',
                              'jobfailed')

            # Check  that the cooloff time is strictly enforced
            # First a job time just below the cooloff time
            for job in processingJobGroup.jobs:
                self.setJobTime.execute(jobID=job["id"],
                                        stateTime=int(time.time()) -
                                        30 * pow(count, 2) + 5)
            testRetryManager.algorithm(None)
            idList = self.getJobs.execute(state='JobCoolOff')
            self.assertEqual(
                len(idList), len(processingJobGroup.jobs),
                "Jobs went into cooloff without the proper timing")

            # Now above the cooloff time
            for job in processingJobGroup.jobs:
                self.setJobTime.execute(jobID=job["id"],
                                        stateTime=int(time.time()) -
                                        30 * pow(count, 2) - 5)
            testRetryManager.algorithm(None)

            # Make sure the jobs get created again or go to paused
            if count < 4:
                idList = self.getJobs.execute(state='created')
            else:
                idList = self.getJobs.execute(state='jobpaused')
            self.assertEqual(len(idList), len(processingJobGroup.jobs),
                             "Jobs didn't change state correctly")

        # Unpause them so they don't interfere with subsequent tests
        changer.propagate(processingJobGroup.jobs, 'created', 'jobpaused')
        changer.propagate(processingJobGroup.jobs, 'executing', 'created')

        # Now the production jobs and the exponential algo
        changer.propagate(productionJobGroup.jobs, 'created', 'new')

        for count in range(1, 3):
            changer.propagate(productionJobGroup.jobs, 'executing', 'created')
            changer.propagate(productionJobGroup.jobs, 'jobfailed',
                              'executing')
            changer.propagate(productionJobGroup.jobs, 'jobcooloff',
                              'jobfailed')

            for job in productionJobGroup.jobs:
                self.setJobTime.execute(jobID=job["id"],
                                        stateTime=int(time.time()) -
                                        pow(30, count) + 5)
            testRetryManager.algorithm(None)
            idList = self.getJobs.execute(state='JobCoolOff')
            self.assertEqual(
                len(idList), len(productionJobGroup.jobs),
                "Jobs went into cooloff without the proper timing")
            for job in productionJobGroup.jobs:
                self.setJobTime.execute(jobID=job["id"],
                                        stateTime=int(time.time()) -
                                        pow(30, count) - 5)
            testRetryManager.algorithm(None)

            idList = self.getJobs.execute(state='created')
            self.assertEqual(len(idList), len(productionJobGroup.jobs),
                             "Jobs didn't change state correctly")

        # Send them to executing
        changer.propagate(productionJobGroup.jobs, 'executing', 'created')

        # Now the merge jobs and the paused algo with different parameters
        changer.propagate(mergeJobGroup.jobs, 'created', 'new')

        for count in range(1, 3):
            changer.propagate(mergeJobGroup.jobs, 'executing', 'created')
            changer.propagate(mergeJobGroup.jobs, 'jobfailed', 'executing')
            changer.propagate(mergeJobGroup.jobs, 'jobcooloff', 'jobfailed')

            for job in mergeJobGroup.jobs:
                self.setJobTime.execute(jobID=job["id"],
                                        stateTime=int(time.time()) -
                                        30 * pow(count, 2) - 5)
            testRetryManager.algorithm(None)
            idList = self.getJobs.execute(state='JobCoolOff')
            self.assertEqual(
                len(idList), len(mergeJobGroup.jobs),
                "Jobs went into cooloff without the proper timing")

            for job in mergeJobGroup.jobs:
                self.setJobTime.execute(jobID=job["id"],
                                        stateTime=int(time.time()) -
                                        60 * pow(count, 2) - 5)
            testRetryManager.algorithm(None)

            if count < 2:
                idList = self.getJobs.execute(state='created')
            else:
                idList = self.getJobs.execute(state='jobpaused')
            self.assertEqual(len(idList), len(mergeJobGroup.jobs),
                             "Jobs didn't change state correctly")

        # Send them to executing
        changer.propagate(mergeJobGroup.jobs, 'created', 'jobpaused')
        changer.propagate(mergeJobGroup.jobs, 'executing', 'created')

        # Now the skim jobs and the squared algo
        changer.propagate(skimJobGroup.jobs, 'created', 'new')

        for count in range(1, 3):
            changer.propagate(skimJobGroup.jobs, 'executing', 'created')
            changer.propagate(skimJobGroup.jobs, 'jobfailed', 'executing')
            changer.propagate(skimJobGroup.jobs, 'jobcooloff', 'jobfailed')

            for job in skimJobGroup.jobs:
                self.setJobTime.execute(jobID=job["id"],
                                        stateTime=int(time.time()) -
                                        30 * pow(count, 2) + 5)
            testRetryManager.algorithm(None)
            idList = self.getJobs.execute(state='JobCoolOff')
            self.assertEqual(
                len(idList), len(skimJobGroup.jobs),
                "Jobs went into cooloff without the proper timing")
            for job in skimJobGroup.jobs:
                self.setJobTime.execute(jobID=job["id"],
                                        stateTime=int(time.time()) -
                                        30 * pow(count, 2) - 5)
            testRetryManager.algorithm(None)

            idList = self.getJobs.execute(state='created')
            self.assertEqual(len(idList), len(skimJobGroup.jobs),
                             "Jobs didn't change state correctly")