示例#1
0
class WorkQueueTest(WorkQueueTestCase):
    """
    _WorkQueueTest_
    
    """
    def setUp(self):
        """
        If we dont have a wmspec file create one
        """
        EmulatorHelper.setEmulators(phedex = True, dbs = True, 
                                    siteDB = True, requestMgr = False)
        #set up WMAgent config file for couchdb
        self.configFile = EmulatorSetup.setupWMAgentConfig()

        WorkQueueTestCase.setUp(self)

        # Basic production Spec
        self.spec = monteCarloWorkload('testProduction', mcArgs)
        getFirstTask(self.spec).setSiteWhitelist(['T2_XX_SiteA', 'T2_XX_SiteB'])
        getFirstTask(self.spec).addProduction(totalevents = 10000)
        self.spec.setSpecUrl(os.path.join(self.workDir, 'testworkflow.spec'))
        self.spec.save(self.spec.specUrl())

        # Sample Tier1 ReReco spec
        self.processingSpec = rerecoWorkload('testProcessing', rerecoArgs)
        self.processingSpec.setSpecUrl(os.path.join(self.workDir,
                                                    'testProcessing.spec'))
        self.processingSpec.save(self.processingSpec.specUrl())

        # Sample Tier1 ReReco spec
        self.parentProcSpec = rerecoWorkload('testParentProcessing', parentProcArgs)
        self.parentProcSpec.setSpecUrl(os.path.join(self.workDir,
                                                    'testParentProcessing.spec'))
        self.parentProcSpec.save(self.parentProcSpec.specUrl())

        # ReReco spec with blacklist
        self.blacklistSpec = rerecoWorkload('blacklistSpec', rerecoArgs)
        self.blacklistSpec.setSpecUrl(os.path.join(self.workDir,
                                                    'testBlacklist.spec'))
        getFirstTask(self.blacklistSpec).data.constraints.sites.blacklist = ['T2_XX_SiteA']
        self.blacklistSpec.save(self.blacklistSpec.specUrl())

        # ReReco spec with whitelist
        self.whitelistSpec = rerecoWorkload('whitelistlistSpec', rerecoArgs)
        self.whitelistSpec.setSpecUrl(os.path.join(self.workDir,
                                                    'testWhitelist.spec'))
        getFirstTask(self.whitelistSpec).data.constraints.sites.whitelist = ['T2_XX_SiteB']
        self.whitelistSpec.save(self.whitelistSpec.specUrl())
        # setup Mock DBS and PhEDEx
        inputDataset = getFirstTask(self.processingSpec).inputDataset()
        self.dataset = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed,
                                     inputDataset.tier)

        # Create queues
        globalCouchUrl = "%s/%s" % (self.testInit.couchUrl, self.globalQDB)
        self.globalQueue = globalQueue(DbName = self.globalQDB,
                                       InboxDbName = self.globalQInboxDB,
                                       QueueURL = globalCouchUrl)
#        self.midQueue = WorkQueue(SplitByBlock = False, # mid-level queue
#                            PopulateFilesets = False,
#                            ParentQueue = self.globalQueue,
#                            CacheDir = None)
        # ignore mid queue as it causes database duplication's
        # copy jobStateMachine couchDB configuration here since we don't want/need to pass whole configuration
        jobCouchConfig = Configuration()
        jobCouchConfig.section_("JobStateMachine")
        jobCouchConfig.JobStateMachine.couchurl = os.environ["COUCHURL"]
        jobCouchConfig.JobStateMachine.couchDBName = "testcouchdb"
        # copy bossAir configuration here since we don't want/need to pass whole configuration
        bossAirConfig = Configuration()
        bossAirConfig.section_("BossAir")
        bossAirConfig.BossAir.pluginDir = "WMCore.BossAir.Plugins"
        bossAirConfig.BossAir.pluginNames = ["CondorPlugin"]
        bossAirConfig.section_("Agent")
        bossAirConfig.Agent.agentName = "TestAgent"

        self.localQueue = localQueue(DbName = self.localQDB,
                                     InboxDbName = self.localQInboxDB,
                                     ParentQueueCouchUrl = globalCouchUrl,
                                     JobDumpConfig = jobCouchConfig,
                                     BossAirConfig = bossAirConfig,
                                     CacheDir = self.workDir)

        self.localQueue2 = localQueue(DbName = self.localQDB2,
                                      InboxDbName = self.localQInboxDB2,
                                      ParentQueueCouchUrl = globalCouchUrl,
                                      JobDumpConfig = jobCouchConfig,
                                      BossAirConfig = bossAirConfig,
                                      CacheDir = self.workDir)

        # configuration for the Alerts messaging framework, work (alerts) and
        # control  channel addresses to which alerts
        # these are destination addresses where AlertProcessor:Receiver listens
        config = Configuration()
        config.section_("Alert")
        config.Alert.address = "tcp://127.0.0.1:5557"
        config.Alert.controlAddr = "tcp://127.0.0.1:5559"

        # standalone queue for unit tests
        self.queue = WorkQueue(JobDumpConfig = jobCouchConfig,
                               BossAirConfig = bossAirConfig,
                               DbName = self.queueDB,
                               InboxDbName = self.queueInboxDB,
                               CacheDir = self.workDir,
                               config = config)

        # create relevant sites in wmbs
        rc = ResourceControl()
        for site, se in self.queue.SiteDB.mapping.items():
            rc.insertSite(site, 100, se, cmsName = site)
            daofactory = DAOFactory(package = "WMCore.WMBS",
                                    logger = threading.currentThread().logger,
                                    dbinterface = threading.currentThread().dbi)
            addLocation = daofactory(classname = "Locations.New")
            addLocation.execute(siteName = site, seName = se)


    def tearDown(self):
        """tearDown"""
        WorkQueueTestCase.tearDown(self)
        #Delete WMBSAgent config file
        EmulatorSetup.deleteConfig(self.configFile)
        EmulatorHelper.resetEmulators()
        
    
    def createResubmitSpec(self, serverUrl, couchDB):
        """
        _createResubmitSpec_
        Create a bogus resubmit workload.
        """
        self.site = "cmssrm.fnal.gov"
        workload = WMWorkloadHelper(WMWorkload("TestWorkload"))
        reco = workload.newTask("reco")
        workload.setOwnerDetails(name = "evansde77", group = "DMWM")

        # first task uses the input dataset
        reco.addInputDataset(primary = "PRIMARY", processed = "processed-v1", tier = "TIER1")
        reco.data.input.splitting.algorithm = "File"
        reco.setTaskType("Processing")
        cmsRunReco = reco.makeStep("cmsRun1")
        cmsRunReco.setStepType("CMSSW")
        reco.applyTemplates()
        cmsRunRecoHelper = cmsRunReco.getTypeHelper()
        cmsRunRecoHelper.addOutputModule("outputRECO",
                                        primaryDataset = "PRIMARY",
                                        processedDataset = "processed-v2",
                                        dataTier = "TIER2",
                                        lfnBase = "/store/dunkindonuts",
                                        mergedLFNBase = "/store/kfc")
        
        dcs = DataCollectionService(url = serverUrl, database = couchDB)

        def getJob(workload):
            job = Job()
            job["task"] = workload.getTask("reco").getPathName()
            job["workflow"] = workload.name()
            job["location"] = self.site
            job["owner"] = "evansde77"
            job["group"] = "DMWM"
            return job

        testFileA = WMFile(lfn = makeUUID(), size = 1024, events = 1024)
        testFileA.setLocation([self.site])
        testFileA.addRun(Run(1, 1, 2))
        testFileB = WMFile(lfn = makeUUID(), size = 1024, events = 1024)
        testFileB.setLocation([self.site])
        testFileB.addRun(Run(1, 3, 4))
        testJobA = getJob(workload)
        testJobA.addFile(testFileA)
        testJobA.addFile(testFileB)
        
        dcs.failedJobs([testJobA])
        topLevelTask = workload.getTopLevelTask()[0]
        workload.truncate("Resubmit_TestWorkload", topLevelTask.getPathName(), 
                          serverUrl, couchDB)
                                  
        return workload

    def testProduction(self):
        """
        Enqueue and get work for a production WMSpec.
        """
        specfile = self.spec.specUrl()
        numUnit = 1
        jobSlot = [10] * numUnit # array of jobs per block
        total = sum(jobSlot)

        for _ in range(numUnit):
            self.queue.queueWork(specfile)
        self.assertEqual(numUnit, len(self.queue))

        # try to get work
        work = self.queue.getWork({'SiteDoesNotExist' : jobSlot[0]})
        self.assertEqual([], work) # not in whitelist

        work = self.queue.getWork({'T2_XX_SiteA' : 0})
        self.assertEqual([], work)
        work = self.queue.getWork({'T2_XX_SiteA' : jobSlot[0]})
        self.assertEqual(len(work), 1)

        #no more work available
        self.assertEqual(0, len(self.queue.getWork({'T2_XX_SiteA' : total})))


    def testProductionMultiQueue(self):
        """Test production with multiple queueus"""
        specfile = self.spec.specUrl()
        numUnit = 1
        jobSlot = [10] * numUnit # array of jobs per block
        total = sum(jobSlot)

        self.globalQueue.queueWork(specfile)
        self.assertEqual(numUnit, len(self.globalQueue))

        # pull work to localQueue2 - check local doesn't get any
        self.assertEqual(numUnit, self.localQueue2.pullWork({'T2_XX_SiteA' : total}))
        self.assertEqual(0, self.localQueue.pullWork({'T2_XX_SiteA' : total}))
        syncQueues(self.localQueue)
        syncQueues(self.localQueue2)
        self.assertEqual(numUnit, len(self.localQueue2.status(status = 'Available')))
        self.assertEqual(0, len(self.localQueue.status(status = 'Available')))
        self.assertEqual(numUnit, len(self.globalQueue.status(status = 'Acquired')))
        self.assertEqual(sanitizeURL(self.localQueue2.params['QueueURL'])['url'],
                         self.globalQueue.status()[0]['ChildQueueUrl'])

#        curr_event = 1
#        for unit in work:
#            with open(unit['mask_url']) as mask_file:
#                mask = pickle.load(mask_file)
#                self.assertEqual(curr_event, mask['FirstEvent'])
#                curr_event = mask['LastEvent'] + 1
#        self.assertEqual(curr_event - 1, 10000)


    def testPriority(self):
        """
        Test priority change functionality
        """
        jobSlot = 10
        totalSlices = 1

        self.queue.queueWork(self.spec.specUrl())
        self.queue.processInboundWork()

        # priority change
        self.queue.setPriority(50, self.spec.name())
        # test elements are now cancelled
        self.assertEqual([x['Priority'] for x in self.queue.status(RequestName = self.spec.name())],
                         [50] * totalSlices)
        self.assertRaises(RuntimeError, self.queue.setPriority, 50, 'blahhhhh')

        # claim all work
        work = self.queue.getWork({'T2_XX_SiteA' : jobSlot})
        self.assertEqual(len(work), totalSlices)

        #no more work available
        self.assertEqual(0, len(self.queue.getWork({'T2_XX_SiteA' : jobSlot})))


    def testProcessing(self):
        """
        Enqueue and get work for a processing WMSpec.
        """
        specfile = self.processingSpec.specUrl()
        njobs = [5, 10] # array of jobs per block
        total = sum(njobs)

        # Queue Work & check accepted
        self.queue.queueWork(specfile)
        self.queue.processInboundWork()
        self.assertEqual(len(njobs), len(self.queue))

        self.queue.updateLocationInfo()
        # No resources
        work = self.queue.getWork({})
        self.assertEqual(len(work), 0)
        work = self.queue.getWork({'T2_XX_SiteA' : 0,
                                   'T2_XX_SiteB' : 0})
        self.assertEqual(len(work), 0)

        # Only 1 block at SiteB - get 1 work element when any resources free
        work = self.queue.getWork({'T2_XX_SiteB' : 1})
        self.assertEqual(len(work), 1)
        self.assertEqual(work[0]["NumOfFilesAdded"], GlobalParams.numOfFilesPerBlock())

        # claim remaining work
        work = self.queue.getWork({'T2_XX_SiteA' : total, 'T2_XX_SiteB' : total})
        self.assertEqual(len(work), 1)

        self.assertEqual(work[0]["NumOfFilesAdded"], GlobalParams.numOfFilesPerBlock())
        #no more work available
        self.assertEqual(0, len(self.queue.getWork({'T2_XX_SiteA' : total})))


    def testBlackList(self):
        """
        Black & White list functionality
        """
        specfile = self.blacklistSpec.specUrl()
        njobs = [5, 10] # array of jobs per block
        numBlocks = len(njobs)
        total = sum(njobs)

        # Queue Work & check accepted
        self.queue.queueWork(specfile)
        self.queue.processInboundWork()
        self.assertEqual(numBlocks, len(self.queue))
        self.queue.updateLocationInfo()

        #In blacklist (T2_XX_SiteA)
        work = self.queue.getWork({'T2_XX_SiteA' : total})
        self.assertEqual(len(work), 0)

        # copy block over to SiteB (all dbsHelpers point to same instance)

        blockLocations = {}
        blocks = DataBlockGenerator().getBlocks(self.dataset)
        for block in blocks:
            if block['Name'].endswith('1'):
                blockLocations[block['Name']] = ['T2_XX_SiteA', 'T2_XX_SiteB', 'T2_XX_SiteAA']

        Globals.moveBlock(blockLocations)
        self.queue.updateLocationInfo()

        # T2_XX_SiteA still blacklisted for all blocks
        work = self.queue.getWork({'T2_XX_SiteA' : total})
        self.assertEqual(len(work), 0)
        # SiteB can run all blocks now
        work = self.queue.getWork({'T2_XX_SiteB' : total})
        self.assertEqual(len(work), 2)

        # Test whitelist stuff
        specfile = self.whitelistSpec.specUrl()
        njobs = [5, 10] # array of jobs per block
        numBlocks = len(njobs)
        total = sum(njobs)

        self.queue.updateLocationInfo()

        # Queue Work & check accepted
        self.queue.queueWork(specfile)
        self.queue.processInboundWork()
        self.assertEqual(numBlocks, len(self.queue))

        # Only SiteB in whitelist
        work = self.queue.getWork({'T2_XX_SiteA' : total})
        self.assertEqual(len(work), 0)

        # Site B can run
        self.queue.updateLocationInfo()
        work = self.queue.getWork({'T2_XX_SiteB' : total, 'T2_XX_SiteAA' : total})
        self.assertEqual(len(work), 2)


    def testQueueChaining(self):
        """
        Chain WorkQueues, pull work down and verify splitting
        """
        self.assertEqual(0, len(self.globalQueue))
        # check no work in local queue
        self.assertEqual(0, len(self.localQueue.getWork({'T2_XX_SiteA' : 1000})))
        # Add work to top most queue
        self.globalQueue.queueWork(self.processingSpec.specUrl())
        self.assertEqual(2, len(self.globalQueue))

        # check work isn't passed down to site without subscription
        self.assertEqual(self.localQueue.pullWork({'T2_XX_SiteC' : 1000}), 0)

        # put at correct site
        self.globalQueue.updateLocationInfo()

        # check work isn't passed down to the wrong agent
        work = self.localQueue.getWork({'T2_XX_SiteC' : 1000}) # Not in subscription
        self.assertEqual(0, len(work))
        self.assertEqual(2, len(self.globalQueue))

        # pull work down to the lowest queue
        self.assertEqual(self.localQueue.pullWork({'T2_XX_SiteA' : 1000}), 2)
        syncQueues(self.localQueue)
        self.assertEqual(len(self.localQueue), 2)
        # parent state should be negotiating till we verify we have it
        #self.assertEqual(len(self.globalQueue.status('Negotiating')), 1)

        # check work passed down to lower queue where it was acquired
        # work should have expanded and parent element marked as acquired

        #self.assertEqual(len(self.localQueue.getWork({'T2_XX_SiteA' : 1000})), 0)
        # releasing on block so need to update locations
        self.localQueue.updateLocationInfo()
        work = self.localQueue.getWork({'T2_XX_SiteA' : 1000})
        self.assertEqual(0, len(self.localQueue))
        self.assertEqual(2, len(work))

        # check work in local and subscription made
        [self.assert_(x['SubscriptionId'] > 0) for x in work]
        [self.assert_(x['SubscriptionId'] > 0) for x in self.localQueue.status()]

        # mark work done & check this passes upto the top level
        self.localQueue.setStatus('Done', [x.id for x in work])


    def testQueueChainingStatusUpdates(self):
        """Chain workQueues, pass work down and verify lifecycle"""
        self.assertEqual(0, len(self.globalQueue))
        self.assertEqual(0, len(self.localQueue.getWork({'T2_XX_SiteA' : 1000})))

        # Add work to top most queue
        self.globalQueue.queueWork(self.processingSpec.specUrl())
        self.globalQueue.processInboundWork()
        self.assertEqual(2, len(self.globalQueue))

        # pull to local queue
        self.globalQueue.updateLocationInfo()
        self.assertEqual(self.localQueue.pullWork({'T2_XX_SiteA' : 1000}), 2)
        syncQueues(self.localQueue) # Tell parent local has acquired
        self.assertEqual(len(self.globalQueue.status('Acquired')), 2)
        self.assertEqual(len(self.localQueue.status('Available')), 2)

        # run work
        self.globalQueue.updateLocationInfo()
        work = self.localQueue.getWork({'T2_XX_SiteA' : 1000})
        self.assertEqual(len(work), 2)

        # resend info
        syncQueues(self.localQueue)
        self.assertEqual(len(self.globalQueue.status('Running')), 2)
        self.assertEqual(len(self.localQueue.status('Running')), 2)

        # finish work locally and propagate to global
        self.localQueue.doneWork([x.id for x in work])
        [self.localQueue.backend.updateElements(x.id, PercentComplete = 100, PercentSuccess = 99) for x in work]
        elements = self.localQueue.status('Done')
        self.assertEqual(len(elements), len(work))
        self.assertEqual([x['PercentComplete'] for x in elements],
                         [100] * len(work))
        self.assertEqual([x['PercentSuccess'] for x in elements],
                         [99] * len(work))

        self.localQueue.performQueueCleanupActions(skipWMBS = True) # will delete elements from local
        syncQueues(self.localQueue)
        
        elements = self.globalQueue.status('Done')
        self.assertEqual(len(elements), 2)
        self.assertEqual([x['PercentComplete'] for x in elements], [100,100])
        self.assertEqual([x['PercentSuccess'] for x in elements], [99, 99])

        self.globalQueue.performQueueCleanupActions()
        self.assertEqual(0, len(self.globalQueue.status()))
        elements = self.globalQueue.backend.getInboxElements('Done')
        self.assertEqual(len(elements), 1)
        self.assertEqual([x['PercentComplete'] for x in elements], [100])
        self.assertEqual([x['PercentSuccess'] for x in elements], [99])



    def testMultiTaskProduction(self):
        """
        Test Multi top level task production spec.
        multiTaskProduction spec consist 2 top level tasks each task has event size 1000 and 2000
        respectfully  
        """
        #TODO: needs more rigorous test on each element per task
        # Basic production Spec
        spec = MultiTaskProductionWorkload
        spec.setSpecUrl(os.path.join(self.workDir, 'multiTaskProduction.spec'))
        spec.setOwnerDetails("evansde77", "DMWM", {'dn': 'MyDN'})
        spec.save(spec.specUrl())
        
        specfile = spec.specUrl()
        numElements = 3
        njobs = [10] * numElements # array of jobs per block
        total = sum(njobs)

        # Queue Work &njobs check accepted
        self.queue.queueWork(specfile)
        self.assertEqual(2, len(self.queue))

        # try to get work
        work = self.queue.getWork({'T2_XX_SiteA' : 0})
        self.assertEqual([], work)
        work = self.queue.getWork({'T2_XX_SiteA' : total, 'T2_XX_SiteB' : total})
        self.assertEqual(len(work), 2)
        self.assertEqual(sum([x['Jobs'] for x in self.queue.status(status = 'Running')]),
                         total)

        #no more work available
        self.assertEqual(0, len(self.queue.getWork({'T2_XX_SiteA' : total})))
        try:
            os.unlink(specfile)
        except OSError:
            pass


    def testTeams(self):
        """
        Team behaviour
        """
        specfile = self.spec.specUrl()
        self.globalQueue.queueWork(specfile, team = 'The A-Team')
        self.globalQueue.processInboundWork()
        self.assertEqual(1, len(self.globalQueue))
        slots = {'T2_XX_SiteA' : 1000, 'T2_XX_SiteB' : 1000}

        # Can't get work for wrong team
        self.localQueue.params['Teams'] = ['other']
        self.assertEqual(self.localQueue.pullWork(slots), 0)
        # and with correct team name
        self.localQueue.params['Teams'] = ['The A-Team']
        self.assertEqual(self.localQueue.pullWork(slots), 1)
        syncQueues(self.localQueue)
        # when work leaves the queue in the agent it doesn't care about teams
        self.localQueue.params['Teams'] = ['other']
        self.assertEqual(len(self.localQueue.getWork(slots)), 1)
        self.assertEqual(0, len(self.globalQueue))

    def testMultipleTeams(self):
        """Multiple teams"""
        slots = {'T2_XX_SiteA' : 1000, 'T2_XX_SiteB' : 1000}
        self.globalQueue.queueWork(self.spec.specUrl(), team = 'The B-Team')
        self.globalQueue.queueWork(self.processingSpec.specUrl(), team = 'The C-Team')
        self.globalQueue.processInboundWork()
        self.globalQueue.updateLocationInfo()

        self.localQueue.params['Teams'] = ['The B-Team', 'The C-Team']
        self.assertEqual(self.localQueue.pullWork(slots), 3)
        syncQueues(self.localQueue)
        self.assertEqual(len(self.localQueue.getWork(slots)), 3)


    def testGlobalBlockSplitting(self):
        """Block splitting at global level"""
        # force global queue to split work on block
        self.globalQueue.params['SplittingMapping']['DatasetBlock']['name'] = 'Block'
        self.globalQueue.params['SplittingMapping']['Block']['name'] = 'Block'
        self.globalQueue.params['SplittingMapping']['Dataset']['name'] = 'Block'

        # queue work, globally for block, pass down, report back -> complete
        totalSpec = 1
        totalBlocks = totalSpec * 2
        self.assertEqual(0, len(self.globalQueue))
        for _ in range(totalSpec):
            self.globalQueue.queueWork(self.processingSpec.specUrl())
        self.globalQueue.processInboundWork()
        self.assertEqual(totalBlocks, len(self.globalQueue))
        # both blocks in global belong to same parent, but have different inputs
        status = self.globalQueue.status()
        self.assertEqual(status[0]['ParentQueueId'], status[1]['ParentQueueId'])
        self.assertNotEqual(status[0]['Inputs'], status[1]['Inputs'])

        # pull to local
        # location info should already be added
        #self.globalQueue.updateLocationInfo()
        self.assertEqual(self.localQueue.pullWork({'T2_XX_SiteA' : 1000}),
                         totalBlocks)
        syncQueues(self.localQueue)
        self.assertEqual(len(self.localQueue.status(status = 'Available')),
                         totalBlocks) # 2 in local
        #self.localQueue.updateLocationInfo()
        work = self.localQueue.getWork({'T2_XX_SiteA' : 1000, 'T2_XX_SiteB' : 1000})
        self.assertEqual(len(work), totalBlocks)
        # both refer to same wmspec
        self.assertEqual(work[0]['RequestName'], work[1]['RequestName'])
        self.localQueue.doneWork([str(x.id) for x in work])
        # elements in local deleted at end of update, only global ones left
        self.assertEqual(len(self.localQueue.status(status = 'Done')),
                         totalBlocks)
        syncQueues(self.localQueue)
        self.assertEqual(len(self.localQueue.status(status = 'Done')),
                         0)
        self.assertEqual(len(self.globalQueue.status(status = 'Done')),
                         totalBlocks)

    def testGlobalDatasetSplitting(self):
        """Dataset splitting at global level"""

        # force global queue to split work on block
        self.globalQueue.params['SplittingMapping']['DatasetBlock']['name'] = 'Dataset'
        self.globalQueue.params['SplittingMapping']['Block']['name'] = 'Dataset'
        self.globalQueue.params['SplittingMapping']['Dataset']['name'] = 'Dataset'

        # queue work, globally for block, pass down, report back -> complete
        totalSpec = 1
        totalBlocks = totalSpec * 2
        self.assertEqual(0, len(self.globalQueue))
        for _ in range(totalSpec):
            self.globalQueue.queueWork(self.processingSpec.specUrl())
        self.globalQueue.processInboundWork()
        self.assertEqual(totalSpec, len(self.globalQueue))

        # pull to local
        self.globalQueue.updateLocationInfo()
        self.assertEqual(self.localQueue.pullWork({'T2_XX_SiteA' : 1000}),
                         totalSpec)
        syncQueues(self.localQueue)
        self.assertEqual(len(self.localQueue.status(status = 'Available')),
                         totalBlocks) # 2 in local
        self.localQueue.updateLocationInfo()
        work = self.localQueue.getWork({'T2_XX_SiteA' : 1000, 'T2_XX_SiteB' : 1000})
        self.assertEqual(len(work), totalBlocks)
        # both refer to same wmspec
        self.assertEqual(work[0]['RequestName'], work[1]['RequestName'])
        self.assertNotEqual(work[0]['Inputs'], work[1]['Inputs'])
        self.localQueue.doneWork([str(x.id) for x in work])
        self.assertEqual(len(self.localQueue.status(status = 'Done')),
                         totalBlocks)
        syncQueues(self.localQueue)
        # elements in local deleted at end of update, only global ones left
        self.assertEqual(len(self.localQueue.status(status = 'Done')),
                         0)
        self.assertEqual(len(self.globalQueue.status(status = 'Done')),
                         totalSpec)

    def testResetWork(self):
        """Reset work in global to different child queue"""
        #TODO: This test sometimes fails - i suspect a race condition (maybe conflict in couch)
        # Cancel code needs reworking so this will hopefully be fixed then
        totalBlocks = 2
        self.globalQueue.queueWork(self.processingSpec.specUrl())
        self.globalQueue.updateLocationInfo()
        self.assertEqual(self.localQueue.pullWork({'T2_XX_SiteA' : 1000}),
                         totalBlocks)
        syncQueues(self.localQueue)
        work = self.localQueue.getWork({'T2_XX_SiteA' : 1000, 'T2_XX_SiteB' : 1000})
        self.assertEqual(len(work), totalBlocks)
        self.assertEqual(len(self.localQueue.status(status = 'Running')), 2)
        syncQueues(self.localQueue)
        self.assertEqual(len(self.globalQueue.status(status = 'Running')), 2)

        # Re-assign work in global
        self.globalQueue.resetWork([x.id for x in self.globalQueue.status(status = 'Running')])

        # work should be canceled in local
        #TODO: Note the work in local will be orphaned but not canceled
        syncQueues(self.localQueue)
        work_at_local = [x for x in self.globalQueue.status(status = 'Running') \
                         if x['ChildQueueUrl'] == sanitizeURL(self.localQueue.params['QueueURL'])['url']]
        self.assertEqual(len(work_at_local), 0)

        # now 2nd queue calls and acquires work
        self.assertEqual(self.localQueue2.pullWork({'T2_XX_SiteA' : 1000}),
                         totalBlocks)
        syncQueues(self.localQueue2)

        # check work in global assigned to local2
        self.assertEqual(len(self.localQueue2.status(status = 'Available')),
                         2) # work in local2
        work_at_local2 = [x for x in self.globalQueue.status(status = 'Acquired') \
                         if x['ChildQueueUrl'] == sanitizeURL(self.localQueue2.params['QueueURL'])['url']]
        self.assertEqual(len(work_at_local2), 2)


    def testCancelWork(self):
        """Cancel work"""
        self.queue.queueWork(self.processingSpec.specUrl())
        elements = len(self.queue)
        self.queue.updateLocationInfo()
        work = self.queue.getWork({'T2_XX_SiteA' : 1000, 'T2_XX_SiteB' : 1000})
        self.assertEqual(len(self.queue), 0)
        self.assertEqual(len(self.queue.status(status='Running')), elements)
        ids = [x.id for x in work]
        canceled = self.queue.cancelWork(ids)
        self.assertEqual(sorted(canceled), sorted(ids))
        self.assertEqual(len(self.queue), 0)
        self.assertEqual(len(self.queue.status()), 0)
        self.assertEqual(len(self.queue.statusInbox(status='Canceled')), 1)

        # now cancel a request
        self.queue.queueWork(self.spec.specUrl())
        elements = len(self.queue)
        work = self.queue.getWork({'T2_XX_SiteA' : 1000, 'T2_XX_SiteB' : 1000})
        self.assertEqual(len(self.queue), 0)
        self.assertEqual(len(self.queue.status(status='Running')), elements)
        ids = [x.id for x in work]
        canceled = self.queue.cancelWork(WorkflowName = ['testProduction'])
        self.assertEqual(canceled, ids)
        self.assertEqual(len(self.queue), 0)


    def testCancelWorkGlobal(self):
        """Cancel work in global queue"""
        # queue to global & pull to local
        self.globalQueue.queueWork(self.processingSpec.specUrl())
        self.globalQueue.updateLocationInfo()
        self.assertEqual(self.localQueue.pullWork({'T2_XX_SiteA' : 1000}), 2)
        syncQueues(self.localQueue)
        work = self.localQueue.getWork({'T2_XX_SiteA' : 1000, 'T2_XX_SiteB' : 1000})
        self.assertEqual(len(work), 2)
        syncQueues(self.localQueue)

        # cancel in global, and propagate down to local
        #service = WorkQueueService({'endpoint': self.localQueue.backend.parentCouchUrl})
        service = WorkQueueService(self.localQueue.backend.parentCouchUrlWithAuth)
        service.cancelWorkflow(self.processingSpec.name())
        #self.globalQueue.cancelWork(WorkflowName = self.spec.name())
        self.globalQueue.performQueueCleanupActions()
        self.assertEqual(len(self.globalQueue.statusInbox(status='CancelRequested')), 1)
        self.assertEqual(len(self.globalQueue.status(status='CancelRequested')), 2)
        syncQueues(self.localQueue)
        self.assertEqual(len(self.localQueue.statusInbox(status='Canceled')), 2)
        self.assertEqual(len(self.localQueue.status()), 0)

        # check cancel propagated back to global
        syncQueues(self.localQueue)
        self.assertEqual(len(self.globalQueue.status(status='Canceled')), 2)
        self.globalQueue.performQueueCleanupActions()
        syncQueues(self.localQueue)
        self.assertEqual(len(self.localQueue.statusInbox()), 0)
        self.assertEqual(len(self.globalQueue.statusInbox(status='Canceled')), 1)
        self.assertEqual(len(self.globalQueue.status()), 0)
        self.globalQueue.deleteWorkflows(self.processingSpec.name())

        # cancel work in global before it reaches a local queue
        self.globalQueue.queueWork(self.spec.specUrl())
        self.assertEqual(len(self.globalQueue.status(status='Available')), 1)
        service.cancelWorkflow(self.spec.name())
        self.globalQueue.performQueueCleanupActions()
        self.assertEqual(len(self.globalQueue.status()), 0)
        self.assertEqual(len(self.globalQueue.statusInbox(status='Canceled')), 1)
        self.globalQueue.deleteWorkflows(self.spec.name())

    def testInvalidSpecs(self):
        """Complain on invalid WMSpecs"""
        # request != workflow name
        self.assertRaises(WorkQueueWMSpecError, self.queue.queueWork,
                                                self.processingSpec.specUrl(),
                                                request = 'fail_this')

        # invalid white list
        mcspec = monteCarloWorkload('testProductionInvalid', mcArgs)
        getFirstTask(mcspec).setSiteWhitelist('ThisIsInvalid')
        mcspec.setSpecUrl(os.path.join(self.workDir, 'testProductionInvalid.spec'))
        mcspec.save(mcspec.specUrl())
        self.assertRaises(WorkQueueWMSpecError, self.queue.queueWork, mcspec.specUrl())
        getFirstTask(mcspec).setSiteWhitelist([])

        # 0 events
        getFirstTask(mcspec).addProduction(totalevents = 0)
        mcspec.save(mcspec.specUrl())
        self.assertRaises(WorkQueueNoWorkError, self.queue.queueWork, mcspec.specUrl())

        # no dataset
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        processingSpec.setSpecUrl(os.path.join(self.workDir,
                                                    'testProcessingInvalid.spec'))
        processingSpec.save(processingSpec.specUrl())
        getFirstTask(processingSpec).data.input.dataset = None
        processingSpec.save(processingSpec.specUrl())
        self.assertRaises(WorkQueueWMSpecError, self.queue.queueWork, processingSpec.specUrl())

        # invalid dbs url
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        processingSpec.setSpecUrl(os.path.join(self.workDir,
                                                    'testProcessingInvalid.spec'))
        getFirstTask(processingSpec).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com'
        processingSpec.save(processingSpec.specUrl())
        self.assertRaises(WorkQueueWMSpecError, self.queue.queueWork, processingSpec.specUrl())

        # invalid dataset name
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        processingSpec.setSpecUrl(os.path.join(self.workDir,
                                                    'testProcessingInvalid.spec'))
        getFirstTask(processingSpec).data.input.dataset.primary = Globals.NOT_EXIST_DATASET
        processingSpec.save(processingSpec.specUrl())
        self.assertRaises(WorkQueueNoWorkError, self.queue.queueWork, processingSpec.specUrl())

        # Cant have a slash in primary ds name - validation should fail
        getFirstTask(processingSpec).data.input.dataset.primary = 'a/b'
        processingSpec.save(processingSpec.specUrl())
        self.assertRaises(WorkQueueWMSpecError, self.queue.queueWork, processingSpec.specUrl())

        # dataset splitting with invalid run whitelist
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        processingSpec.setSpecUrl(os.path.join(self.workDir,
                                                    'testProcessingInvalid.spec'))
        processingSpec.setStartPolicy('Dataset')
        processingSpec.setRunWhitelist([666]) # not in this dataset
        processingSpec.save(processingSpec.specUrl())
        self.assertRaises(WorkQueueNoWorkError, self.queue.queueWork, processingSpec.specUrl())

        # block splitting with invalid run whitelist
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        processingSpec.setSpecUrl(os.path.join(self.workDir,
                                                    'testProcessingInvalid.spec'))
        processingSpec.setStartPolicy('Block')
        processingSpec.setRunWhitelist([666]) # not in this dataset
        processingSpec.save(processingSpec.specUrl())
        self.assertRaises(WorkQueueNoWorkError, self.queue.queueWork, processingSpec.specUrl())

    def testIgnoreDuplicates(self):
        """Ignore duplicate work"""
        specfile = self.spec.specUrl()
        self.globalQueue.queueWork(specfile)
        self.assertEqual(1, len(self.globalQueue))
        
        # queue work again
        self.globalQueue.queueWork(specfile)
        self.assertEqual(1, len(self.globalQueue))


    def testConflicts(self):
        """Resolve conflicts between global & local queue"""
        self.globalQueue.queueWork(self.spec.specUrl())
        self.localQueue.pullWork({'T2_XX_SiteA' : 10000})
        self.localQueue.getWork({'T2_XX_SiteA' : 10000})
        syncQueues(self.localQueue)
        global_ids = [x.id for x in self.globalQueue.status()]
        self.localQueue.backend.updateInboxElements(*global_ids, Status = 'Done', PercentComplete = 69)
        self.globalQueue.backend.updateElements(*global_ids, Status = 'Canceled')
        self.localQueue.backend.forceQueueSync()
        self.localQueue.backend.fixConflicts()
        self.localQueue.backend.forceQueueSync()
        self.assertEqual([x['Status'] for x in self.globalQueue.status(elementIDs = global_ids)],
                         ['Canceled'])
        self.assertEqual([x['PercentComplete'] for x in self.globalQueue.status(elementIDs = global_ids)],
                         [69])
        self.assertEqual([x for x in self.localQueue.statusInbox()],
                         [x for x in self.globalQueue.status()])

    def testDeleteWork(self):
        """Delete finished work"""
        self.globalQueue.queueWork(self.spec.specUrl())
        self.assertEqual(self.localQueue.pullWork({'T2_XX_SiteA' : 10000}), 1)
        syncQueues(self.localQueue)
        self.assertEqual(len(self.localQueue.getWork({'T2_XX_SiteA' : 10000})), 1)
        syncQueues(self.localQueue)
        self.localQueue.doneWork(WorkflowName = self.spec.name())
        syncQueues(self.localQueue)
        self.assertEqual(len(self.localQueue.status(WorkflowName = self.spec.name())),
                         0) # deleted once inbox updated
        self.assertEqual('Done',
                         self.globalQueue.status(WorkflowName = self.spec.name())[0]['Status'])
        self.globalQueue.performQueueCleanupActions()
        self.assertEqual('Done',
                         self.globalQueue.statusInbox(WorkflowName = self.spec.name())[0]['Status'])
        self.assertEqual(len(self.globalQueue.status(WorkflowName = self.spec.name())),
                         0) # deleted once inbox updated
        self.globalQueue.deleteWorkflows(self.spec.name())
        self.assertEqual(len(self.globalQueue.statusInbox(WorkflowName = self.spec.name())),
                         0)
        syncQueues(self.localQueue)
        self.assertEqual(len(self.localQueue.statusInbox(WorkflowName = self.spec.name())),
                         0)
    
    def testResubmissionWorkflow(self):
        """Test workflow resubmission via ACDC"""
        acdcCouchDB = "workqueue_t_acdc"
        self.testInit.setupCouch(acdcCouchDB, "GroupUser", "ACDC")
        
        spec = self.createResubmitSpec(self.testInit.couchUrl,
                                       acdcCouchDB)
        spec.setSpecUrl(os.path.join(self.workDir, 'resubmissionWorkflow.spec'))
        spec.save(spec.specUrl())
        self.localQueue.params['Teams'] = ['cmsdataops']
        self.globalQueue.queueWork(spec.specUrl(), "Resubmit_TestWorkload", team = "cmsdataops")
        self.localQueue.pullWork({"T1_US_FNAL": 100})
        syncQueues(self.localQueue)
        self.localQueue.getWork({"T1_US_FNAL": 100})


    def testThrottling(self):
        """Pull work only if all previous work processed in child"""
        self.globalQueue.queueWork(self.processingSpec.specUrl())
        self.assertEqual(2, len(self.globalQueue))
        self.assertEqual(self.localQueue.pullWork({'T2_XX_SiteA' : 1}), 1)
        # further pull will fail till we replicate to child
        # hopefully couch replication wont happen till we manually sync
        self.assertEqual(self.localQueue.pullWork({'T2_XX_SiteA' : 1}), 0)
        self.assertEqual(1, len(self.globalQueue))
        self.assertEqual(0, len(self.localQueue))
        syncQueues(self.localQueue)
        self.assertEqual(1, len(self.localQueue))
        # pull works again
        self.assertEqual(self.localQueue.pullWork({'T2_XX_SiteA' : 1}), 1)
        
    def testSitesFromResourceControl(self):
        """Test sites from resource control"""
        # Most tests pull work for specific sites (to give us control)
        # In reality site list will come from resource control so test
        # that here (just a simple check that we can get sites from rc)
        self.globalQueue.queueWork(self.spec.specUrl())
        self.assertEqual(self.localQueue.pullWork(), 1)
        syncQueues(self.localQueue)
        self.assertEqual(len(self.localQueue.status()), 1)

    def testParentProcessing(self):
        """
        Enqueue and get work for a processing WMSpec.
        """
        specfile = self.parentProcSpec.specUrl()
        njobs = [5, 10] # array of jobs per block
        total = sum(njobs)

        # Queue Work & check accepted
        self.queue.queueWork(specfile)
        self.queue.processInboundWork()
        self.assertEqual(len(njobs), len(self.queue))

        self.queue.updateLocationInfo()
        # No resources
        work = self.queue.getWork({})
        self.assertEqual(len(work), 0)
        work = self.queue.getWork({'T2_XX_SiteA' : 0,
                                   'T2_XX_SiteB' : 0})
        self.assertEqual(len(work), 0)

        # Only 1 block at SiteB - get 1 work element when any resources free
        work = self.queue.getWork({'T2_XX_SiteB' : 1})
        self.assertEqual(len(work), 1)
        self.assertEqual(work[0]["NumOfFilesAdded"], GlobalParams.numOfFilesPerBlock() * 2)

        # claim remaining work
        work = self.queue.getWork({'T2_XX_SiteA' : total, 'T2_XX_SiteB' : total})
        self.assertEqual(len(work), 1)
        self.assertEqual(work[0]["NumOfFilesAdded"], GlobalParams.numOfFilesPerBlock() * 2)

        # no more work available
        self.assertEqual(0, len(self.queue.getWork({'T2_XX_SiteA' : total})))

    def testDrainMode(self):
        """Stop acquiring work when DrainMode set"""
        self.localQueue.params['DrainMode'] = True
        self.globalQueue.queueWork(self.spec.specUrl())
        self.assertEqual(1, len(self.globalQueue))
        self.assertEqual(self.localQueue.pullWork({'T2_XX_SiteA' : 1000, 'T2_XX_SiteB' : 1000}), 0)

    def testWMBSInjectionStatus(self):

        self.globalQueue.queueWork(self.spec.specUrl())
        self.globalQueue.queueWork(self.processingSpec.specUrl())
        # test globalqueue status (no parent queue case)
        self.assertEqual(self.globalQueue.getWMBSInjectionStatus(),
                         [{'testProcessing': False}, {'testProduction': False}])
        self.assertEqual(self.globalQueue.getWMBSInjectionStatus(self.spec.name()),
                         False)

        self.assertEqual(self.localQueue.pullWork(),3)
        # test local queue status with parents (globalQueue is not synced yet
        self.assertEqual(self.localQueue.getWMBSInjectionStatus(),
                         [{'testProcessing': False}, {'testProduction': False}])
        self.assertEqual(self.localQueue.getWMBSInjectionStatus(self.spec.name()),
                         False)
        self.localQueue.processInboundWork()
        self.localQueue.updateLocationInfo()
        self.localQueue.getWork({'T2_XX_SiteA' : 1000})
        self.assertEqual(self.localQueue.getWMBSInjectionStatus(),
                            [{'testProcessing': False}, {'testProduction': False}])
        self.assertEqual(self.localQueue.getWMBSInjectionStatus(self.spec.name()),
                         False)

        #update parents status
        self.localQueue.performQueueCleanupActions()
        self.assertEqual(self.localQueue.getWMBSInjectionStatus(),
                         [{'testProcessing': True}, {'testProduction': True}])
        self.assertEqual(self.localQueue.getWMBSInjectionStatus(self.spec.name()),
                         True)

        #test not existing workflow
        self.assertRaises(ValueError,
                          self.localQueue.getWMBSInjectionStatus,
                          "NotExistWorkflow"
                         )

    def testEndPolicyNegotiating(self):
        """Test end policy processing of request before splitting"""
        work = self.globalQueue.queueWork(self.processingSpec.specUrl())
        self.assertEqual(work, 2)
        self.assertEqual(self.localQueue.pullWork({'T2_XX_SiteA' : 1}), 1)
        self.localQueue.backend.pullFromParent() # pull work into inbox (Negotiating state)
        self.localQueue.processInboundWork()
        self.localQueue.backend.sendToParent()
        self.assertEqual(self.localQueue.pullWork({'T2_XX_SiteA' : 1}), 1)
        # should print message but not raise an error
        self.localQueue.performQueueCleanupActions(skipWMBS = True)
        self.assertEqual(self.localQueue.statusInbox()[1]['Status'], 'Negotiating')
        self.assertEqual(len(self.localQueue), 1)