def testCreateDeleteExistsNoFiles(self): """ _testCreateDeleteExistsNoFiles_ Create and then delete a job but don't add any input files to it. Use the job class's exists() method to determine if the job has been written to the database before it is created, after it has been created and after it has been deleted. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job(name="TestJob") assert testJob.exists() == False, "ERROR: Job exists before it was created" testJob.create(group=testJobGroup) assert testJob.exists() >= 0, "ERROR: Job does not exist after it was created" testJob.delete() assert testJob.exists() == False, "ERROR: Job exists after it was delete" return
def test_AutoIncrementCheck(self): """ _AutoIncrementCheck_ Test and see whether we can find and set the auto_increment values """ myThread = threading.currentThread() if not myThread.dialect.lower() == "mysql": return testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testFileA = File(lfn=makeUUID(), locations="test.site.ch") testFileB = File(lfn=makeUUID(), locations="test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() incrementDAO = self.daoFactory(classname="Jobs.AutoIncrementCheck") incrementDAO.execute() testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 1) incrementDAO.execute() testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 2) incrementDAO.execute(input=10) testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 11) incrementDAO.execute(input=5) testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 12) return
def createLargeFileBlock(self): """ _createLargeFileBlock_ Creates a large group of files for testing """ testFileset = Fileset(name = "TestFilesetX") testFileset.create() for i in range(5000): newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"])) newFile.create() testFileset.addFile(newFile) testFileset.commit() testWorkflow = Workflow(spec = "spec.xml", owner = "mnorman", name = "wf003", task="Test" ) testWorkflow.create() largeSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, split_algo = "FileBased", type = "Processing") largeSubscription.create() return largeSubscription
def testStoreResults(self): """ _testStoreResults_ Create a StoreResults workflow and verify it installs into WMBS correctly. """ arguments = StoreResultsWorkloadFactory.getTestArguments() arguments.update({'CmsPath': "/uscmst1/prod/sw/cms"}) factory = StoreResultsWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", arguments) testWMBSHelper = WMBSHelper(testWorkload, "StoreResults", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) testWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/StoreResults") testWorkflow.load() self.assertEqual(len(testWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") goldenOutputMods = ["Merged"] for goldenOutputMod in goldenOutputMods: mergedOutput = testWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = testWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s." % unmergedOutput.name) logArchOutput = testWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = testWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive", "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-StoreResults-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=testWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") return
def testFilesWithoutOtherSubscriptions(self): """ _testFilesWithoutOtherSubscriptions_ Test the case where files only in the delete subscription can happen if cleanup of the other subscriptions is fast """ testWorkflowA = Workflow(spec="specA.xml", owner="Steve", name="wfA", task="Test") testWorkflowA.create() testFileset = Fileset(name="TestFileset") testFileset.create() allFiles = [] for i in range(500): testFile = File(str(i), size=1000, events=100, locations=set(["somese.cern.ch"])) testFile.create() allFiles.append(testFile) testFileset.addFile(testFile) testFileset.commit() testSubscriptionA = Subscription( fileset=testFileset, workflow=testWorkflowA, split_algo="SiblingProcessingBased", type="Processing" ) testSubscriptionA.create() splitter = SplitterFactory() deleteFactoryA = splitter(package="WMCore.WMBS", subscription=testSubscriptionA) result = deleteFactoryA(files_per_job=50) self.assertEqual(len(result), 1, "Error: Wrong number of job groups returned.") self.assertEqual(len(result[0].jobs), 10, "Error: Wrong number of jobs returned.") return
def testByRunHarvesting(self): """ _testByRunHarvesting_ Provided a fileset with a couple of files and 4 different runs, create one single job per run and location. The multiRun baggage should be false in this case. """ multipleFilesFileset = createCommonFileset() self.assertEqual(multipleFilesFileset.open, True, "Fileset should be open!") harvestingWorkflow = Workflow(spec="spec.xml", owner="amaltaro", name="TestWorkflow", task="Test") harvestingWorkflow.create() harvestSub = Subscription(fileset=multipleFilesFileset, workflow=harvestingWorkflow, split_algo="Harvest", type="Harvesting") harvestSub.create() multipleFilesFileset.markOpen(False) self.assertEqual(multipleFilesFileset.open, False, "Fileset should now be closed") jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSub) jobGroups = jobFactory() self.assertEqual(len(jobGroups), 1, "Should have created 1 job group") for jobGroup in jobGroups: self.assertEqual(len(jobGroup.jobs), 6, "Should have created 6 jobs") for job in jobGroup.jobs: baggage = job.getBaggage() self.assertFalse(getattr(baggage, "multiRun", False), "It's supposed to be a byRun job")
def testMask(self): """ _testMask_ Test the new mask setup """ testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testFileA = File(lfn=makeUUID(), locations="test.site.ch") testFileB = File(lfn=makeUUID(), locations="test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job() testJob['mask'].addRunAndLumis(run=100, lumis=[101, 102]) testJob['mask'].addRunAndLumis(run=200, lumis=[201, 202]) testJob.create(group=testJobGroup) loadJob = Job(id=testJob.exists()) loadJob.loadData() runs = loadJob['mask'].getRunAndLumis() self.assertEqual(len(runs), 2) self.assertEqual(runs[100], [[101, 102]]) self.assertEqual(runs[200], [[201, 202]]) bigRun = Run(100, *[101, 102, 103, 104]) badRun = Run(300, *[1001, 1002]) result = loadJob['mask'].filterRunLumisByMask([bigRun, badRun]) self.assertEqual(len(result), 1) alteredRun = result.pop() self.assertEqual(alteredRun.run, 100) self.assertEqual(alteredRun.lumis, [101, 102]) run0 = Run(300, *[1001, 1002]) run1 = Run(300, *[1001, 1002]) loadJob['mask'].filterRunLumisByMask([run0, run1]) return
def testListRunningJobs(self): """ _testListRunningJobs_ Test the ListRunningJobs DAO. """ testWorkflow = Workflow(spec = makeUUID(), owner = "Steve", name = makeUUID(), task="Test") testWorkflow.create() testFileset = Fileset(name = "TestFileset") testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = "Processing") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testJobA = Job(name = makeUUID(), files = []) testJobA["couch_record"] = makeUUID() testJobA.create(group = testJobGroup) testJobA["state"] = "executing" testJobB = Job(name = makeUUID(), files = []) testJobB["couch_record"] = makeUUID() testJobB.create(group = testJobGroup) testJobB["state"] = "complete" testJobC = Job(name = makeUUID(), files = []) testJobC["couch_record"] = makeUUID() testJobC.create(group = testJobGroup) testJobC["state"] = "new" changeStateAction = self.daoFactory(classname = "Jobs.ChangeState") changeStateAction.execute(jobs = [testJobA, testJobB, testJobC]) runningJobsAction = self.daoFactory(classname = "Monitoring.ListRunningJobs") runningJobs = runningJobsAction.execute() assert len(runningJobs) == 2, \ "Error: Wrong number of running jobs returned." for runningJob in runningJobs: if runningJob["job_name"] == testJobA["name"]: assert runningJob["state"] == testJobA["state"], \ "Error: Running job has wrong state." assert runningJob["couch_record"] == testJobA["couch_record"], \ "Error: Running job has wrong couch record." else: assert runningJob["job_name"] == testJobC["name"], \ "Error: Running job has wrong name." assert runningJob["state"] == testJobC["state"], \ "Error: Running job has wrong state." assert runningJob["couch_record"] == testJobC["couch_record"], \ "Error: Running job has wrong couch record." return
def testCreateDeleteExists(self): """ _testCreateDeleteExists_ Create a JobGroup and then delete it. Use the JobGroup's exists() method to determine if it exists before it is created, after it is created and after it is deleted. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testFileset = WMBSFileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) self.assertFalse(testJobGroup.exists()) testJobGroup.create() self.assertTrue(testJobGroup.exists()) testJobGroup.delete() self.assertFalse(testJobGroup.exists()) testSubscription.delete() testFileset.delete() testWorkflow.delete() return
def testCreateTransaction(self): """ _testCreateTransaction_ Create a JobGroup and commit it to the database. Rollback the database transaction and verify that the JobGroup is no longer in the database. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testFileset = WMBSFileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) assert testJobGroup.exists() == False, "ERROR: Job group exists before it was created" myThread = threading.currentThread() myThread.transaction.begin() testJobGroup.create() assert testJobGroup.exists() >= 0, "ERROR: Job group does not exist after it was created" myThread.transaction.rollback() assert testJobGroup.exists() == False, "ERROR: Job group exists after transaction was rolled back." testSubscription.delete() testFileset.delete() testWorkflow.delete() return
def createSingleJobWorkflow(self): """ Create a workflow with one jobs and two files and store the results in instance variables """ self.testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") self.testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=self.testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() self.testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) self.testFileA.addRun(Run(1, *[45])) self.testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) self.testFileB.addRun(Run(1, *[46])) self.testFileA.create() self.testFileB.create() self.testJob = Job(name="TestJob", files=[self.testFileA, self.testFileB]) self.testJob.create(group=testJobGroup) self.testJob.associateFiles()
def generateFakeMCFile(self, numEvents = 100, firstEvent = 1, lastEvent = 100, firstLumi = 1, lastLumi = 10, index = 1): #MC comes with only one MCFakeFile singleMCFileset = Fileset(name = "MCTestFileset %i" % index) singleMCFileset.create() newFile = File("MCFakeFileTest %i" % index, size = 1000, events = numEvents, locations = set(["somese.cern.ch"])) newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1))) newFile["first_event"] = firstEvent newFile["last_event"] = lastEvent newFile.create() singleMCFileset.addFile(newFile) singleMCFileset.commit() testWorkflow = Workflow(spec = "spec.xml", owner = "Steve", name = "wf001", task="Test") testWorkflow.create() singleMCFileSubscription = Subscription(fileset = singleMCFileset, workflow = testWorkflow, split_algo = "EventBased", type = "Production") singleMCFileSubscription.create() return singleMCFileSubscription
def createTestSubscription(self, nFiles, nSites=1, closeFileset=False): """ _createTestSubscription_ Create a set of test subscriptions for testing purposes. """ if nSites > self.nSites: nSites = self.nSites testFileset = Fileset(name="TestFileset") testFileset.create() # Create a testWorkflow testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() # Create the files for each site for s in range(nSites): for i in range(nFiles): newFile = File(makeUUID(), size=1024, events=100, locations=set(["site%i.cern.ch" % s])) newFile.create() testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription( fileset=testFileset, workflow=testWorkflow, split_algo="MinFileBased", type="Processing" ) testSubscription.create() # Close the fileset if closeFileset: testFileset.markOpen(isOpen=False) return testSubscription
def subscribeWMBS(self, task): """ Create a subscription for each task """ workFlow, fileSet = self.createWorkflow(task) workFlow.load() fileSet.load() subType = '' if task.name() == 'Processing' or task.name() == 'Production': subType = 'Processing' elif task.name() == 'Merge': subType = 'Merge' newSub = Subscription(fileset=fileSet, workflow=workFlow, split_algo='FileBased', type=subType) newSub.create() # Add subscription to dictionary self.subDict[task.name()] = newSub # Add subscription id to task setattr(task.data.input.WMBS, 'Subscription', newSub['id']) if not newSub.exists() >= 0: raise Exception("ERROR: Subscription does not exist after it was created") logging.info('Created subscription for task %s' % (task.name())) return
def subscribeWMBS(self, task): """ Create a subscription for each task """ workFlow, fileSet = self.createWorkflow(task) workFlow.load() fileSet.load() subType = "" if task.name() == "Processing" or task.name() == "Production": subType = "Processing" elif task.name() == "Merge": subType = "Merge" newSub = Subscription(fileset=fileSet, workflow=workFlow, split_algo="FileBased", type=subType) newSub.create() # Add subscription to dictionary self.subDict[task.name()] = newSub # Add subscription id to task setattr(task.data.input.WMBS, "Subscription", newSub["id"]) if not newSub.exists() >= 0: raise Exception("ERROR: Subscription does not exist after it was created") logging.info("Created subscription for task %s" % (task.name())) return
def testByRunAndRunBlacklist(self): """ _testByRunAndRunWhitelist_ Create harvesting jobs by run for the runs provided in the RunWhitelist """ multipleFilesFileset = createCommonFileset() self.assertEqual(multipleFilesFileset.open, True) harvestingWorkflow = Workflow(spec="spec.xml", owner="amaltaro", name="TestWorkflow", task="Test") harvestingWorkflow.create() harvestSub = Subscription(fileset=multipleFilesFileset, workflow=harvestingWorkflow, split_algo="Harvest", type="Harvesting") harvestSub.create() multipleFilesFileset.markOpen(False) self.assertEqual(multipleFilesFileset.open, False, "Fileset should now be closed") jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSub) jobGroups = jobFactory(runWhitelist=[1, 2, 3, 4, 5], runBlacklist=[1, 3]) self.assertEqual(len(jobGroups), 1, "One jobgroup per location") for jobGroup in jobGroups: self.assertEqual(len(jobGroup.jobs), 3)
def createSubscription(self, nFiles, lumisPerFile, twoSites=False, nEventsPerFile=100): """ _createSubscription_ Create a subscription for testing """ baseName = makeUUID() testFileset = Fileset(name=baseName) testFileset.create() for i in range(nFiles): newFile = self.createFile("%s_%i" % (baseName, i), nEventsPerFile, i, lumisPerFile, "somese.cern.ch") newFile.create() testFileset.addFile(newFile) if twoSites: for i in range(nFiles): newFile = self.createFile("%s_%i_2" % (baseName, i), nEventsPerFile, i, lumisPerFile, "otherse.cern.ch") newFile.create() testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription( fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiBased", type="Processing" ) testSubscription.create() return testSubscription
def createSubscription(self, nFiles, lumisPerFile, twoSites=False, nEventsPerFile=100): """ _createSubscription_ Create a subscription for testing """ baseName = makeUUID() testWorkflow = Workflow(spec="spec.xml", owner="dmwm", name="testWorkflow_%s" % baseName[:4], task="Test") testWorkflow.create() testFileset = Fileset(name=baseName) for i in range(nFiles): newFile = self.createFile('%s_%i' % (baseName, i), nEventsPerFile, i, lumisPerFile, 'T1_US_FNAL_Disk') testFileset.addFile(newFile) if twoSites: for i in range(nFiles): newFile = self.createFile('%s_%i_2' % (baseName, i), nEventsPerFile, i, lumisPerFile, 'T2_CH_CERN') testFileset.addFile(newFile) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="EventAwareLumiByWork", type="Processing") testSubscription.create() return testSubscription
def testMultiRunHarvesting(self): """ _testMultiRunHarvesting_ Provided a fileset with a couple of files and different runs, create a single job for all the runs at a specific location, which also adds a baggage to the job (True) which is later on looked up by SetupCMSSWPSet. """ multipleFilesFileset = createCommonFileset() self.assertEqual(multipleFilesFileset.open, True) harvestingWorkflow = Workflow(spec="spec.xml", owner="amaltaro", name="TestWorkflow", task="Test") harvestingWorkflow.create() harvestSub = Subscription(fileset=multipleFilesFileset, workflow=harvestingWorkflow, split_algo="Harvest", type="Harvesting") harvestSub.create() multipleFilesFileset.markOpen(False) self.assertEqual(multipleFilesFileset.open, False, "Fileset should now be closed") jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSub) jobGroups = jobFactory(dqmHarvestUnit="multiRun") self.assertEqual(len(jobGroups), 1) for jobGroup in jobGroups: self.assertEqual(len(jobGroup.jobs), 1)
def createTestJob(subscriptionType="Merge"): """ _createTestJob_ Create a test job with two files as input. This will also create the appropriate workflow, jobgroup and subscription. """ testWorkflow = Workflow(spec=makeUUID(), owner="Simon", name=makeUUID(), task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type=subscriptionType) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(1, *[45])) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(1, *[46])) testFileA.create() testFileB.create() testJob = Job(name=makeUUID(), files=[testFileA, testFileB]) testJob["couch_record"] = "somecouchrecord" testJob["location"] = "test.site.ch" testJob.create(group=testJobGroup) testJob.associateFiles() return testJob
def testD_NonContinuousLumis(self): """ _NonContinuousLumis_ Test and see if LumiBased can work when the lumis are non continuous """ baseName = makeUUID() nFiles = 10 testFileset = Fileset(name = baseName) testFileset.create() for i in range(nFiles): newFile = File(lfn = '%s_%i' % (baseName, i), size = 1000, events = 100, locations = "somese.cern.ch") # Set to two non-continuous lumi numbers lumis = [100 + i, 200 + i] newFile.addRun(Run(i, *lumis)) newFile.create() testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription(fileset = testFileset, workflow = self.testWorkflow, split_algo = "LumiBased", type = "Processing") testSubscription.create() splitter = SplitterFactory() jobFactory = splitter(package = "WMCore.WMBS", subscription = testSubscription) jobGroups = jobFactory(lumis_per_job = 2, halt_job_on_file_boundaries = False, splitOnRun = False, performance = self.performanceParams) self.assertEqual(len(jobGroups), 1) jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 10) for j in jobs: runs = j['mask'].getRunAndLumis() for r in runs.keys(): self.assertEqual(len(runs[r]), 2) for l in runs[r]: # Each run should have two lumis # Each lumi should be of form [x, x] # meaning that the first and last lumis are the same self.assertEqual(len(l), 2) self.assertEqual(l[0], l[1]) self.assertEqual(j['estimatedJobTime'], 100 * 12) self.assertEqual(j['estimatedDiskUsage'], 100 * 400) self.assertEqual(j['estimatedMemoryUsage'], 2300) return
def _createThisSubscription(self, initialCounter=1): """ Private function to create a fileset and subscription with different fileset and file names :param initialCounter: just a simple integer to be appended to files :return: an splitter instance (jobFactory) """ splitter = SplitterFactory() # Create 3 files with 100 events per lumi: # - file1 with 1 run of 8 lumis # - file2 with 2 runs of 2 lumis each # - file3 with 1 run of 5 lumis testFileset = Fileset(name='Fileset%s' % initialCounter) fileA = File(lfn="/this/is/file%s" % initialCounter, size=1000, events=800) lumiListA = [] for lumi in range(8): lumiListA.append(10 + lumi) fileA.addRun(Run(1, *lumiListA)) fileA.setLocation("T1_US_FNAL_Disk") initialCounter = int(initialCounter) + 1 fileB = File(lfn="/this/is/file%s" % initialCounter, size=1000, events=400) lumiListB1 = [] lumiListB2 = [] for lumi in range(2): lumiListB1.append(20 + lumi) lumiListB2.append(30 + lumi) fileB.addRun(Run(2, *lumiListB1)) fileB.addRun(Run(3, *lumiListB2)) fileB.setLocation("T1_US_FNAL_Disk") initialCounter = int(initialCounter) + 1 fileC = File(lfn="/this/is/file%s" % initialCounter, size=1000, events=500) lumiListC = [] for lumi in range(5): lumiListC.append(40 + lumi) fileC.addRun(Run(4, *lumiListC)) fileC.setLocation("T1_US_FNAL_Disk") testFileset.addFile(fileA) testFileset.addFile(fileB) testFileset.addFile(fileC) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiByWork", type="Processing") testSubscription.create() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) return jobFactory
def setupRepackWorkflow(self): """ _setupRepackWorkflow_ Populate WMBS with a repack-like workflow, every subscription must be unfinished at first """ workflowName = 'Repack_Run481516_StreamZ' mergeTasks = ['RepackMergewrite_QuadElectron_RAW', 'RepackMergewrite_TriPhoton_RAW', 'RepackMergewrite_SingleNeutrino_RAW'] self.stateMap = {'Merge': [], 'Processing Done': []} self.orderedStates = ['Merge', 'Processing Done'] # Populate WMStats self.requestDBWriter.insertGenericRequest({'RequestName': workflowName}) self.requestDBWriter.updateRequestStatus(workflowName, 'Closed') # Create a wmspec in disk workload = newWorkload(workflowName) repackTask = workload.newTask('Repack') for task in mergeTasks: repackTask.addTask(task) repackTask.addTask('RepackCleanupUnmergedwrite_QuadElectron_RAW') specPath = os.path.join(self.testDir, 'Repack.pkl') workload.save(specPath) # Populate WMBS topFileset = Fileset(name='TestStreamerFileset') topFileset.create() options = {'spec': specPath, 'owner': 'ItsAMeMario', 'name': workflowName, 'wfType': 'tier0'} topLevelWorkflow = Workflow(task='/%s/Repack' % workflowName, **options) topLevelWorkflow.create() topLevelSub = Subscription(topFileset, topLevelWorkflow) topLevelSub.create() self.stateMap['Merge'].append(topFileset) for task in mergeTasks: mergeWorkflow = Workflow(task='/%s/Repack/%s' % (workflowName, task), **options) mergeWorkflow.create() unmergedFileset = Fileset(name='TestUnmergedFileset%s' % task) unmergedFileset.create() mergeSub = Subscription(unmergedFileset, mergeWorkflow) mergeSub.create() self.stateMap['Processing Done'].append(unmergedFileset) cleanupWorkflow = Workflow(task='/Repack_Run481516_StreamZ/Repack/RepackCleanupUnmergedwrite_QuadElectron_RAW', **options) cleanupWorkflow.create() unmergedFileset = Fileset(name='TestUnmergedFilesetToCleanup') unmergedFileset.create() cleanupSub = Subscription(unmergedFileset, cleanupWorkflow) cleanupSub.create() return
def testParentageByJob(self): """ _testParentageByJob_ Tests the DAO that assigns parentage by Job """ testWorkflow = Workflow(spec = 'hello', owner = "mnorman", name = "wf001", task="basicWorkload/Production") testWorkflow.create() testFileset = Fileset(name = "TestFileset") testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = "Processing", split_algo = "FileBased") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testFileParentA = File(lfn = "/this/is/a/parent/lfnA", size = 1024, events = 20, checksums = {'cksum': 1}) testFileParentA.addRun(Run( 1, *[45])) testFileParentB = File(lfn = "/this/is/a/parent/lfnB", size = 1024, events = 20, checksums = {'cksum': 1}) testFileParentB.addRun(Run( 1, *[45])) testFileParentA.create() testFileParentB.create() testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums = {'cksum':1}) testFileA.addRun(Run( 1, *[45])) testFileA.create() testJobA = Job() testJobA.create(group = testJobGroup) testJobA.addFile(testFileParentA) testJobA.addFile(testFileParentB) testJobA.associateFiles() parentAction = self.daofactory(classname = "Files.SetParentageByJob") parentAction.execute(binds = {'jobid': testJobA.exists(), 'child': testFileA['lfn']}) testFileB = File(id = testFileA["id"]) testFileB.loadData(parentage = 1) goldenFiles = [testFileParentA, testFileParentB] for parentFile in testFileB["parents"]: self.assertEqual(parentFile in goldenFiles, True, "ERROR: Unknown parent file") goldenFiles.remove(parentFile) self.assertEqual(len(goldenFiles), 0, "ERROR: Some parents are missing")
def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site, taskType='Processing', name=None, wfPrio=1, changeState=None): """ _createJobGroups_ Creates a series of jobGroups for submissions changeState is an instance of the ChangeState class to make job status changes """ jobGroupList = [] if name is None: name = makeUUID() testWorkflow = Workflow(spec=workloadSpec, owner="tapas", name=name, task="basicWorkload/Production", priority=wfPrio) testWorkflow.create() # Create subscriptions for _ in range(nSubs): name = makeUUID() # Create Fileset, Subscription, jobGroup testFileset = Fileset(name=name) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type=taskType, split_algo="FileBased") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() # Create jobs self.makeNJobs(name=name, task=task, nJobs=nJobs, jobGroup=testJobGroup, fileset=testFileset, sub=testSubscription.exists(), site=site) testFileset.commit() testJobGroup.commit() jobGroupList.append(testJobGroup) if changeState: for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') return jobGroupList
def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site=None, bl=[], wl=[]): """ Creates a series of jobGroups for submissions """ jobGroupList = [] testWorkflow = Workflow( spec=workloadSpec, owner="tapas", name=makeUUID(), task="basicWorkload/Production", owner_vogroup="phgroup", owner_vorole="cmsrole", ) testWorkflow.create() # Create subscriptions for i in range(nSubs): name = makeUUID() # Create Fileset, Subscription, jobGroup testFileset = Fileset(name=name) testFileset.create() testSubscription = Subscription( fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased" ) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() # Create jobs self.makeNJobs( name=name, task=task, nJobs=nJobs, jobGroup=testJobGroup, fileset=testFileset, sub=testSubscription.exists(), site=site, bl=bl, wl=wl, ) testFileset.commit() testJobGroup.commit() jobGroupList.append(testJobGroup) return jobGroupList
def testDeleteTransaction(self): """ _testDeleteTransaction_ Create a new job and commit it to the database. Start a new transaction and delete the file from the database. Verify that the file has been deleted. After that, roll back the transaction and verify that the job is once again in the database. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileA.create() testFileB.create() testJob = Job(name="TestJob", files=[testFileA, testFileB]) assert testJob.exists() is False, \ "ERROR: Job exists before it was created" testJob.create(group=testJobGroup) assert testJob.exists() >= 0, \ "ERROR: Job does not exist after it was created" myThread = threading.currentThread() myThread.transaction.begin() testJob.delete() assert testJob.exists() is False, \ "ERROR: Job exists after it was delete" myThread.transaction.rollback() assert testJob.exists() >= 0, \ "ERROR: Job does not exist after transaction was rolled back." return
def stuffWMBS(self): """ _stuffWMBS_ Stuff WMBS with workflows """ workflow = Workflow(spec = 'spec.xml', name = 'ReRecoTest_v0Emulator', task = '/ReRecoTest_v0Emulator/Test', priority = 10) workflow.create() inputFileset = Fileset(name = 'TestFileset') inputFileset.create() subscription = Subscription(inputFileset, workflow) subscription.create()
def createSubscription(self, nFiles, lumisPerFile, twoSites = False, rand = False): """ _createSubscription_ Create a subscription for testing """ baseName = makeUUID() testFileset = Fileset(name = baseName) testFileset.create() parentFile = File('%s_parent' % (baseName), size = 1000, events = 100, locations = set(["somese.cern.ch"])) parentFile.create() for i in range(nFiles): newFile = File(lfn = '%s_%i' % (baseName, i), size = 1000, events = 100, locations = "somese.cern.ch") lumis = [] for lumi in range(lumisPerFile): if rand: lumis.append(random.randint(1000 * i, 1000 * (i + 1))) else: lumis.append((100 * i) + lumi) newFile.addRun(Run(i, *lumis)) newFile.create() newFile.addParent(parentFile['lfn']) testFileset.addFile(newFile) if twoSites: for i in range(nFiles): newFile = File(lfn = '%s_%i_2' % (baseName, i), size = 1000, events = 100, locations = "otherse.cern.ch") lumis = [] for lumi in range(lumisPerFile): if rand: lumis.append(random.randint(1000 * i, 1000 * (i + 1))) else: lumis.append((100 * i) + lumi) newFile.addRun(Run(i, *lumis)) newFile.create() newFile.addParent(parentFile['lfn']) testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription(fileset = testFileset, workflow = self.testWorkflow, split_algo = "LumiBased", type = "Processing") testSubscription.create() return testSubscription
def createTestJobs(self, nJobs, cacheDir): """ _createTestJobs_ Create several jobs """ testWorkflow = Workflow(spec = "spec.xml", owner = "Simon", name = "wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name = "TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow, type = "Processing", split_algo = "FileBased") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() # Create a file testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() baseName = makeUUID() # Now create a job for i in range(nJobs): testJob = Job(name = '%s-%i' % (baseName, i)) testJob.addFile(testFileA) testJob['location'] = 'malpaquet' testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJob.create(testJobGroup) testJob.save() testJobGroup.add(testJob) testJobGroup.commit() # Set test job caches for job in testJobGroup.jobs: job.setCache(cacheDir) return testJobGroup
class SiblingProcessingBasedTest(unittest.TestCase): """ _SiblingProcessingBasedTest_ Test SiblingProcessing job splitting. """ def setUp(self): """ _setUp_ Setup the database connections and schema. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMCore.WMBS"], useDefault = False) myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = daofactory(classname = "Locations.New") locationAction.execute("site1", seName = "somese.cern.ch") locationAction.execute("site2", seName = "somese2.cern.ch") self.testFilesetA = Fileset(name = "FilesetA") self.testFilesetA.create() self.testFilesetB = Fileset(name = "FilesetB") self.testFilesetB.create() self.testFileA = File("testFileA", size = 1000, events = 100, locations = set(["somese.cern.ch"])) self.testFileA.create() self.testFileB = File("testFileB", size = 1000, events = 100, locations = set(["somese.cern.ch"])) self.testFileB.create() self.testFileC = File("testFileC", size = 1000, events = 100, locations = set(["somese.cern.ch"])) self.testFileC.create() self.testFilesetA.addFile(self.testFileA) self.testFilesetA.addFile(self.testFileB) self.testFilesetA.addFile(self.testFileC) self.testFilesetA.commit() self.testFileD = File("testFileD", size = 1000, events = 100, locations = set(["somese.cern.ch"])) self.testFileD.create() self.testFileE = File("testFileE", size = 1000, events = 100, locations = set(["somese.cern.ch"])) self.testFileE.create() self.testFileF = File("testFileF", size = 1000, events = 100, locations = set(["somese.cern.ch"])) self.testFileF.create() self.testFilesetB.addFile(self.testFileD) self.testFilesetB.addFile(self.testFileE) self.testFilesetB.addFile(self.testFileF) self.testFilesetB.commit() testWorkflowA = Workflow(spec = "specA.xml", owner = "Steve", name = "wfA", task = "Test") testWorkflowA.create() testWorkflowB = Workflow(spec = "specB.xml", owner = "Steve", name = "wfB", task = "Test") testWorkflowB.create() testWorkflowC = Workflow(spec = "specC.xml", owner = "Steve", name = "wfC", task = "Test") testWorkflowC.create() testWorkflowD = Workflow(spec = "specD.xml", owner = "Steve", name = "wfD", task = "Test") testWorkflowD.create() self.testSubscriptionA = Subscription(fileset = self.testFilesetA, workflow = testWorkflowA, split_algo = "FileBased", type = "Processing") self.testSubscriptionA.create() self.testSubscriptionB = Subscription(fileset = self.testFilesetB, workflow = testWorkflowB, split_algo = "FileBased", type = "Processing") self.testSubscriptionB.create() self.testSubscriptionC = Subscription(fileset = self.testFilesetB, workflow = testWorkflowC, split_algo = "FileBased", type = "Processing") self.testSubscriptionC.create() self.testSubscriptionD = Subscription(fileset = self.testFilesetB, workflow = testWorkflowD, split_algo = "FileBased", type = "Processing") self.testSubscriptionD.create() deleteWorkflow = Workflow(spec = "specE.xml", owner = "Steve", name = "wfE", task = "Test") deleteWorkflow.create() self.deleteSubscriptionA = Subscription(fileset = self.testFilesetA, workflow = deleteWorkflow, split_algo = "SiblingProcessingBased", type = "Cleanup") self.deleteSubscriptionA.create() self.deleteSubscriptionB = Subscription(fileset = self.testFilesetB, workflow = deleteWorkflow, split_algo = "SiblingProcessingBased", type = "Cleanup") self.deleteSubscriptionB.create() return def tearDown(self): """ _tearDown_ Clear out WMBS. """ self.testInit.clearDatabase() return def testSiblingProcessing(self): """ _testSiblingProcessing_ Verify that the sibling processing split works correctly dealing with failed files and acquiring files correctly. """ splitter = SplitterFactory() deleteFactoryA = splitter(package = "WMCore.WMBS", subscription = self.deleteSubscriptionA) deleteFactoryB = splitter(package = "WMCore.WMBS", subscription = self.deleteSubscriptionB) result = deleteFactoryA() assert len(result) == 0, \ "Error: No jobs should be returned." result = deleteFactoryB() assert len(result) == 0, \ "Error: No jobs should be returned." self.testSubscriptionA.completeFiles(self.testFileA) result = deleteFactoryA(files_per_job = 1) assert len(result) == 1, \ "Error: Only one jobgroup should be returned." assert len(result[0].jobs) == 1, \ "Error: There should only be one job in the jobgroup." assert len(result[0].jobs[0]["input_files"]) == 1, \ "Error: Job should only have one input file." assert result[0].jobs[0]["input_files"][0]["lfn"] == "testFileA", \ "Error: Input file for job is wrong." assert list(result[0].jobs[0]["input_files"][0]["locations"]) == ["somese.cern.ch"], \ "Error: File location is wrong." result = deleteFactoryB(files_per_job = 1) assert len(result) == 0, \ "Error: Second subscription should have no jobs." result = deleteFactoryA(files_per_job = 1) assert len(result) == 0, \ "Error: No jobs should have been created." self.testSubscriptionB.completeFiles(self.testFileD) self.testSubscriptionC.failFiles(self.testFileD) result = deleteFactoryA(files_per_job = 1) assert len(result) == 0, \ "Error: No jobs should have been created." result = deleteFactoryB(files_per_job = 1) assert len(result) == 0, \ "Error: No jobs should have been created." self.testSubscriptionD.failFiles(self.testFileD) result = deleteFactoryA(files_per_job = 1) assert len(result) == 0, \ "Error: No jobs should have been created." result = deleteFactoryB(files_per_job = 1) assert len(result) == 0, \ "Error: No job groups should have been created." self.testSubscriptionB.completeFiles([self.testFileE, self.testFileF]) self.testSubscriptionC.completeFiles([self.testFileE, self.testFileF]) self.testSubscriptionD.completeFiles([self.testFileE, self.testFileF]) result = deleteFactoryB(files_per_job = 10) assert len(result) == 0, \ "Error: No jobs should have been created." self.testFilesetB.markOpen(False) result = deleteFactoryB(files_per_job = 10) assert len(result) == 1, \ "Error: One jobgroup should have been returned." assert len(result[0].jobs) == 1, \ "Error: There should only be one job in the jobgroup." assert len(result[0].jobs[0]["input_files"]) == 2, \ "Error: Job should only have one input file." lfns = [result[0].jobs[0]["input_files"][0]["lfn"], result[0].jobs[0]["input_files"][1]["lfn"]] assert "testFileE" in lfns, \ "Error: TestFileE missing from job input." assert "testFileF" in lfns, \ "Error: TestFileF missing from job input." self.assertEqual(len(self.deleteSubscriptionB.availableFiles()), 0, "Error: There should be no available files.") completeFiles = self.deleteSubscriptionB.filesOfStatus("Completed") self.assertEqual(len(completeFiles), 1, "Error: There should only be one complete file.") self.assertEqual(list(completeFiles)[0]["lfn"], "testFileD", "Error: Test file D should be complete.") return def testMultipleLocations(self): """ _testMultipleLocations_ Verify that the sibling processing based algorithm doesn't create jobs that run over files at multiple sites. """ testFile1 = File("testFile1", size = 1000, events = 100, locations = set(["somese2.cern.ch"])) testFile1.create() testFile2 = File("testFile2", size = 1000, events = 100, locations = set(["somese2.cern.ch"])) testFile2.create() testFile3 = File("testFile3", size = 1000, events = 100, locations = set(["somese2.cern.ch"])) testFile3.create() self.testFilesetA.addFile(testFile1) self.testFilesetA.addFile(testFile2) self.testFilesetA.addFile(testFile3) self.testFilesetA.commit() self.testFilesetA.markOpen(False) self.testSubscriptionA.completeFiles([testFile1, testFile2, testFile3]) self.testSubscriptionA.completeFiles([self.testFileA, self.testFileB, self.testFileC]) splitter = SplitterFactory() deleteFactoryA = splitter(package = "WMCore.WMBS", subscription = self.deleteSubscriptionA) result = deleteFactoryA(files_per_job = 50) assert len(result) == 2, \ "Error: Wrong number of jobgroups returned." goldenFilesA = ["testFileA", "testFileB", "testFileC"] goldenFilesB = ["testFile1", "testFile2", "testFile3"] locations = {"testFileA": "somese.cern.ch", "testFileB": "somese.cern.ch", "testFileC": "somese.cern.ch", "testFile1": "somese2.cern.ch", "testFile2": "somese2.cern.ch", "testFile3": "somese2.cern.ch"} for jobGroup in result: assert len(jobGroup.jobs) == 1, \ "Error: Wrong number of jobs in jobgroup." assert len(jobGroup.jobs[0]["input_files"]) == 3, \ "Error: Wrong number of input files in job." jobSite = list(jobGroup.jobs[0]["input_files"][0]["locations"])[0] if jobSite == "somese.cern.ch": goldenFiles = goldenFilesA else: goldenFiles = goldenFilesB for jobFile in jobGroup.jobs[0]["input_files"]: assert list(jobFile["locations"])[0] == locations[jobFile["lfn"]], \ "Error: Wrong site for file." goldenFiles.remove(jobFile["lfn"]) assert len(goldenFiles) == 0, \ "Error: Files are missing." return def testLargeNumberOfFiles(self): """ _testLargeNumberOfFiles_ Setup a subscription with 500 files and verify that the splitting algo works correctly. """ testWorkflowA = Workflow(spec = "specA.xml", owner = "Steve", name = "wfA", task = "Test") testWorkflowA.create() testWorkflowB = Workflow(spec = "specB.xml", owner = "Steve", name = "wfB", task = "Test") testWorkflowB.create() testFileset = Fileset(name = "TestFileset") testFileset.create() allFiles = [] for i in range(500): testFile = File(str(i), size = 1000, events = 100, locations = set(["somese.cern.ch"])) testFile.create() allFiles.append(testFile) testFileset.addFile(testFile) testFileset.commit() testSubscriptionA = Subscription(fileset = testFileset, workflow = testWorkflowA, split_algo = "FileBased", type = "Processing") testSubscriptionA.create() testSubscriptionB = Subscription(fileset = testFileset, workflow = testWorkflowB, split_algo = "SiblingProcessingBased", type = "Processing") testSubscriptionB.create() testSubscriptionA.completeFiles(allFiles) splitter = SplitterFactory() deleteFactoryA = splitter(package = "WMCore.WMBS", subscription = testSubscriptionB) result = deleteFactoryA(files_per_job = 50) self.assertEqual(len(result), 1, "Error: Wrong number of job groups returned.") self.assertEqual(len(result[0].jobs), 10, "Error: Wrong number of jobs returned.") return def testFilesWithoutOtherSubscriptions(self): """ _testFilesWithoutOtherSubscriptions_ Test the case where files only in the delete subscription can happen if cleanup of the other subscriptions is fast """ testWorkflowA = Workflow(spec = "specA.xml", owner = "Steve", name = "wfA", task = "Test") testWorkflowA.create() testFileset = Fileset(name = "TestFileset") testFileset.create() allFiles = [] for i in range(500): testFile = File(str(i), size = 1000, events = 100, locations = set(["somese.cern.ch"])) testFile.create() allFiles.append(testFile) testFileset.addFile(testFile) testFileset.commit() testSubscriptionA = Subscription(fileset = testFileset, workflow = testWorkflowA, split_algo = "SiblingProcessingBased", type = "Processing") testSubscriptionA.create() splitter = SplitterFactory() deleteFactoryA = splitter(package = "WMCore.WMBS", subscription = testSubscriptionA) result = deleteFactoryA(files_per_job = 50) self.assertEqual(len(result), 1, "Error: Wrong number of job groups returned.") self.assertEqual(len(result[0].jobs), 10, "Error: Wrong number of jobs returned.") return
def testPersist(self): """ _testPersist_ This is the test class for function Propagate from module ChangeState """ change = ChangeState(self.config, "changestate_t") locationAction = self.daoFactory(classname="Locations.New") locationAction.execute("site1", pnn="T2_CH_CERN") testWorkflow = Workflow(spec=self.specUrl, owner="Steve", name="wf001", task=self.taskName) testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() for i in range(4): newFile = File(lfn="File%s" % i, locations=set(["T2_CH_CERN"])) newFile.create() testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="FileBased") testSubscription.create() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroup = jobFactory(files_per_job=1)[0] assert len(jobGroup.jobs) == 4, \ "Error: Splitting should have created four jobs." testJobA = jobGroup.jobs[0] testJobA["user"] = "******" testJobA["group"] = "DMWM" testJobA["taskType"] = "Processing" testJobB = jobGroup.jobs[1] testJobB["user"] = "******" testJobB["group"] = "DMWM" testJobB["taskType"] = "Processing" testJobC = jobGroup.jobs[2] testJobC["user"] = "******" testJobC["group"] = "DMWM" testJobC["taskType"] = "Processing" testJobD = jobGroup.jobs[3] testJobD["user"] = "******" testJobD["group"] = "DMWM" testJobD["taskType"] = "Processing" change.persist([testJobA, testJobB], "created", "new") change.persist([testJobC, testJobD], "new", "none") stateDAO = self.daoFactory(classname="Jobs.GetState") jobAState = stateDAO.execute(id=testJobA["id"]) jobBState = stateDAO.execute(id=testJobB["id"]) jobCState = stateDAO.execute(id=testJobC["id"]) jobDState = stateDAO.execute(id=testJobD["id"]) assert jobAState == "created" and jobBState == "created" and \ jobCState == "new" and jobDState == "new", \ "Error: Jobs didn't change state correctly." return
def setUp(self): """ _setUp_ Setup the database connections and schema. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMCore.WMBS"], useDefault = False) myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = daofactory(classname = "Locations.New") locationAction.execute("site1", seName = "somese.cern.ch") locationAction.execute("site2", seName = "somese2.cern.ch") self.testFilesetA = Fileset(name = "FilesetA") self.testFilesetA.create() self.testFilesetB = Fileset(name = "FilesetB") self.testFilesetB.create() self.testFileA = File("testFileA", size = 1000, events = 100, locations = set(["somese.cern.ch"])) self.testFileA.create() self.testFileB = File("testFileB", size = 1000, events = 100, locations = set(["somese.cern.ch"])) self.testFileB.create() self.testFileC = File("testFileC", size = 1000, events = 100, locations = set(["somese.cern.ch"])) self.testFileC.create() self.testFilesetA.addFile(self.testFileA) self.testFilesetA.addFile(self.testFileB) self.testFilesetA.addFile(self.testFileC) self.testFilesetA.commit() self.testFileD = File("testFileD", size = 1000, events = 100, locations = set(["somese.cern.ch"])) self.testFileD.create() self.testFileE = File("testFileE", size = 1000, events = 100, locations = set(["somese.cern.ch"])) self.testFileE.create() self.testFileF = File("testFileF", size = 1000, events = 100, locations = set(["somese.cern.ch"])) self.testFileF.create() self.testFilesetB.addFile(self.testFileD) self.testFilesetB.addFile(self.testFileE) self.testFilesetB.addFile(self.testFileF) self.testFilesetB.commit() testWorkflowA = Workflow(spec = "specA.xml", owner = "Steve", name = "wfA", task = "Test") testWorkflowA.create() testWorkflowB = Workflow(spec = "specB.xml", owner = "Steve", name = "wfB", task = "Test") testWorkflowB.create() testWorkflowC = Workflow(spec = "specC.xml", owner = "Steve", name = "wfC", task = "Test") testWorkflowC.create() testWorkflowD = Workflow(spec = "specD.xml", owner = "Steve", name = "wfD", task = "Test") testWorkflowD.create() self.testSubscriptionA = Subscription(fileset = self.testFilesetA, workflow = testWorkflowA, split_algo = "FileBased", type = "Processing") self.testSubscriptionA.create() self.testSubscriptionB = Subscription(fileset = self.testFilesetB, workflow = testWorkflowB, split_algo = "FileBased", type = "Processing") self.testSubscriptionB.create() self.testSubscriptionC = Subscription(fileset = self.testFilesetB, workflow = testWorkflowC, split_algo = "FileBased", type = "Processing") self.testSubscriptionC.create() self.testSubscriptionD = Subscription(fileset = self.testFilesetB, workflow = testWorkflowD, split_algo = "FileBased", type = "Processing") self.testSubscriptionD.create() deleteWorkflow = Workflow(spec = "specE.xml", owner = "Steve", name = "wfE", task = "Test") deleteWorkflow.create() self.deleteSubscriptionA = Subscription(fileset = self.testFilesetA, workflow = deleteWorkflow, split_algo = "SiblingProcessingBased", type = "Cleanup") self.deleteSubscriptionA.create() self.deleteSubscriptionB = Subscription(fileset = self.testFilesetB, workflow = deleteWorkflow, split_algo = "SiblingProcessingBased", type = "Cleanup") self.deleteSubscriptionB.create() return
def testUpdateLocation(self): """ _testUpdateLocation_ Check that we can update the location of a job through the state machine. """ change = ChangeState(self.config, "changestate_t") locationAction = self.daoFactory(classname="Locations.New") locationAction.execute("site1", pnn="T2_CH_CERN") locationAction.execute("site2", pnn="T1_US_FNAL_Disk") testWorkflow = Workflow(spec=self.specUrl, owner="Steve", name="wf001", task=self.taskName) testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="FileBased") testSubscription.create() testFileA = File(lfn="SomeLFNA", events=1024, size=2048, locations=set(["T2_CH_CERN", "T1_US_FNAL_Disk"])) testFileB = File(lfn="SomeLFNB", events=1025, size=2049, locations=set(["T2_CH_CERN", "T1_US_FNAL_Disk"])) testFileA.create() testFileB.create() testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.commit() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroup = jobFactory(files_per_job=1)[0] assert len(jobGroup.jobs) == 2, \ "Error: Splitting should have created two jobs." testJobA = jobGroup.jobs[0] testJobA["user"] = "******" testJobA["group"] = "DMWM" testJobA["taskType"] = "Merge" testJobA["site_cms_name"] = "site1" testJobB = jobGroup.jobs[1] testJobB["user"] = "******" testJobB["group"] = "DMWM" testJobB["taskType"] = "Processing" testJobB["site_cms_name"] = "site2" change.propagate([testJobA, testJobB], "new", "none") change.propagate([testJobA, testJobB], "created", "new") change.propagate([testJobA, testJobB], "executing", "created") testJobADoc = change.jobsdatabase.document(testJobA["couch_record"]) maxKey = max(testJobADoc["states"].keys()) transition = testJobADoc["states"][maxKey] self.assertEqual(transition["location"], "site1") testJobBDoc = change.jobsdatabase.document(testJobB["couch_record"]) maxKey = max(testJobBDoc["states"].keys()) transition = testJobBDoc["states"][maxKey] self.assertEqual(transition["location"], "site2") jobs = [{'jobid': 1, 'location': 'site2'}] change.recordLocationChange(jobs) testJobADoc = change.jobsdatabase.document(testJobA["couch_record"]) maxKey = max(testJobADoc["states"].keys()) transition = testJobADoc["states"][maxKey] self.assertEqual(transition["location"], "site2") listJobsDAO = self.daoFactory(classname="Jobs.GetLocation") jobid = [{'jobid': 1}, {'jobid': 2}] jobsLocation = listJobsDAO.execute(jobid) for job in jobsLocation: self.assertEqual(job['site_name'], 'site2') return
def testRetryCount(self): """ _testRetryCount_ Verify that the retry count is incremented when we move out of the submitcooloff or jobcooloff state. """ change = ChangeState(self.config, "changestate_t") locationAction = self.daoFactory(classname="Locations.New") locationAction.execute("site1", pnn="T2_CH_CERN") testWorkflow = Workflow(spec=self.specUrl, owner="Steve", name="wf001", task=self.taskName) testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() for i in range(4): newFile = File(lfn="File%s" % i, locations=set(["T2_CH_CERN"])) newFile.create() testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="FileBased") testSubscription.create() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroup = jobFactory(files_per_job=1)[0] assert len(jobGroup.jobs) == 4, \ "Error: Splitting should have created four jobs." testJobA = jobGroup.jobs[0] testJobA["user"] = "******" testJobA["group"] = "DMWM" testJobA["taskType"] = "Processing" testJobB = jobGroup.jobs[1] testJobB["user"] = "******" testJobB["group"] = "DMWM" testJobB["taskType"] = "Processing" testJobC = jobGroup.jobs[2] testJobC["user"] = "******" testJobC["group"] = "DMWM" testJobC["taskType"] = "Processing" testJobD = jobGroup.jobs[3] testJobD["user"] = "******" testJobD["group"] = "DMWM" testJobD["taskType"] = "Processing" change.persist([testJobA], "created", "submitcooloff") change.persist([testJobB], "created", "jobcooloff") change.persist([testJobC, testJobD], "new", "none") testJobA.load() testJobB.load() testJobC.load() testJobD.load() assert testJobA["retry_count"] == 1, \ "Error: Retry count is wrong." assert testJobB["retry_count"] == 1, \ "Error: Retry count is wrong." assert testJobC["retry_count"] == 0, \ "Error: Retry count is wrong." assert testJobD["retry_count"] == 0, \ "Error: Retry count is wrong." return
def testReRecoDroppingRECO(self): """ _testReRecoDroppingRECO_ Verify that ReReco workflows can be created and inserted into WMBS correctly. The ReReco workflow is just a DataProcessing workflow with skims tacked on. This tests run on unmerged RECO output """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = ReRecoWorkloadFactory.getTestArguments() dataProcArguments["ProcessingString"] = "ProcString" dataProcArguments["ConfigCacheID"] = recoConfig dataProcArguments.update({ "SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": skimConfig }) dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" dataProcArguments["TransientOutputModules"] = ["RECOoutput"] dataProcArguments["EnableHarvesting"] = True dataProcArguments["DQMConfigCacheID"] = self.injectDQMHarvestConfig() factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", dataProcArguments) self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children. \ SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules. \ Merged.mergedLFNBase, '/store/data/FAKE/MinimumBias/USER/SkimBFilter-ProcString-v1') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) skimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = {"SkimA": "RAW-RECO", "SkimB": "USER"} for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier mergedOutput = skimWorkflow.outputMap[fset][0][ "merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod, tier in viewitems(goldenOutputMods): mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset( name="/TestWorkload/DataProcessing/unmerged-RECOoutputRECO") topLevelFileset.loadData() skimSubscription = Subscription(fileset=topLevelFileset, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput, tier in viewitems(goldenOutputMods): fset = skimOutput + tier unmerged = Fileset( name="/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % fset) unmerged.loadData() mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset=unmerged, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput, tier in viewitems(goldenOutputMods): fset = skimOutput + tier unmerged = Fileset( name="/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % fset) unmerged.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimCleanupUnmerged%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in goldenOutputMods: skimMergeLogCollect = Fileset( name= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/SomeSkim%sMergeLogCollect" % (skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset=skimMergeLogCollect, workflow=skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
class FixedDelayTest(unittest.TestCase): def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMCore.WMBS"], useDefault = False) myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = daofactory(classname = "Locations.New") locationAction.execute(siteName = "site1", pnn = "T2_CH_CERN") self.multipleFileFileset = Fileset(name = "TestFileset1") self.multipleFileFileset.create() for i in range(10): newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["T2_CH_CERN"])) newFile.addRun(Run(i, *[45+i])) newFile.create() self.multipleFileFileset.addFile(newFile) self.multipleFileFileset.commit() self.singleFileFileset = Fileset(name = "TestFileset2") self.singleFileFileset.create() newFile = File("/some/file/name", size = 1000, events = 100, locations = set(["T2_CH_CERN"])) newFile.addRun(Run(1, *[45])) newFile.create() self.singleFileFileset.addFile(newFile) self.singleFileFileset.commit() self.multipleFileLumiset = Fileset(name = "TestFileset3") self.multipleFileLumiset.create() for i in range(10): newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["T2_CH_CERN"])) newFile.addRun(Run(1, *[45+i/3])) newFile.create() self.multipleFileLumiset.addFile(newFile) self.multipleFileLumiset.commit() self.singleLumiFileset = Fileset(name = "TestFileset4") self.singleLumiFileset.create() for i in range(10): newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["T2_CH_CERN"])) newFile.addRun(Run(1, *[45])) newFile.create() self.singleLumiFileset.addFile(newFile) self.singleLumiFileset.commit() testWorkflow = Workflow(spec = "spec.xml", owner = "mnorman", name = "wf001", task="Test") testWorkflow.create() self.multipleFileSubscription = Subscription(fileset = self.multipleFileFileset, workflow = testWorkflow, split_algo = "FixedDelay", type = "Processing") self.singleFileSubscription = Subscription(fileset = self.singleFileFileset, workflow = testWorkflow, split_algo = "FixedDelay", type = "Processing") self.multipleLumiSubscription = Subscription(fileset = self.multipleFileLumiset, workflow = testWorkflow, split_algo = "FixedDelay", type = "Processing") self.singleLumiSubscription = Subscription(fileset = self.singleLumiFileset, workflow = testWorkflow, split_algo = "FixedDelay", type = "Processing") self.multipleFileSubscription.create() self.singleFileSubscription.create() self.multipleLumiSubscription.create() self.singleLumiSubscription.create() return def tearDown(self): """ _tearDown_ Nothing to do... """ self.testInit.clearDatabase() return def testNone(self): """ _testNone_ Since the subscriptions are open, we shouldn't get any jobs back """ splitter = SplitterFactory() jobFactory = splitter(self.singleFileSubscription) jobGroups = jobFactory(trigger_time = int(time.time())*2) self.assertEqual(jobGroups, [], "Should have returned a null set") jobFactory = splitter(self.multipleFileSubscription) jobGroups = jobFactory(trigger_time = int(time.time())*2) self.assertEqual(jobGroups, [], "Should have returned a null set") jobFactory = splitter(self.multipleLumiSubscription) jobGroups = jobFactory(trigger_time = int(time.time())*2) self.assertEqual(jobGroups, [], "Should have returned a null set") jobFactory = splitter(self.singleLumiSubscription) jobGroups = jobFactory(trigger_time = int(time.time())*2) self.assertEqual(jobGroups, [], "Should have returned a null set") return def testClosed(self): """ _testClosed_ Since the subscriptions are closed and none of the files have been acquired, all of the files should show up """ splitter = SplitterFactory() self.singleFileSubscription.getFileset().markOpen(False) jobFactory = splitter(self.singleFileSubscription) jobGroups = jobFactory(trigger_time = 1) assert len(jobGroups) == 1, \ "ERROR: JobFactory didn't return one JobGroup." assert len(jobGroups[0].jobs) == 1, \ "ERROR: JobFactory didn't create a single job." job = jobGroups[0].jobs.pop() assert job.getFiles(type = "lfn") == ["/some/file/name"], \ "ERROR: Job contains unknown files." self.multipleFileSubscription.getFileset().markOpen(False) jobFactory = splitter(self.multipleFileSubscription) jobGroups = jobFactory(trigger_time = 1) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs),1) myfiles = jobGroups[0].jobs[0].getFiles() self.assertEqual(len(myfiles), 10) self.multipleLumiSubscription.getFileset().markOpen(False) jobFactory = splitter(self.multipleLumiSubscription) jobGroups = jobFactory(trigger_time = 1) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs),1) myfiles = jobGroups[0].jobs[0].getFiles() self.assertEqual(len(myfiles), 10) #self.assertEqual(jobGroups, [], "Should have returned a null set") self.singleLumiSubscription.getFileset().markOpen(False) jobFactory = splitter(self.singleLumiSubscription) jobGroups = jobFactory(trigger_time = 1) assert len(jobGroups) == 1, \ "ERROR: JobFactory didn't return one JobGroup." assert len(jobGroups[0].jobs) == 1, \ "ERROR: JobFactory didn't create a single job." myfiles = jobGroups[0].jobs[0].getFiles() self.assertEqual(len(myfiles), 10) def testAllAcquired(self): """ _testAllAcquired_ should all return no job groups """ splitter = SplitterFactory() self.singleFileSubscription.acquireFiles( self.singleFileSubscription.availableFiles()) jobFactory = splitter(self.singleFileSubscription) jobGroups = jobFactory(trigger_time = 1) self.assertEqual(jobGroups, [], "Should have returned a null set") self.multipleFileSubscription.acquireFiles( self.multipleFileSubscription.availableFiles()) jobFactory = splitter(self.multipleFileSubscription) jobGroups = jobFactory(trigger_time = 1) self.assertEqual(jobGroups, [], "Should have returned a null set") self.multipleLumiSubscription.acquireFiles( self.multipleLumiSubscription.availableFiles()) jobFactory = splitter(self.multipleLumiSubscription) jobGroups = jobFactory(trigger_time = 1) self.assertEqual(jobGroups, [], "Should have returned a null set") self.singleLumiSubscription.acquireFiles( self.singleLumiSubscription.availableFiles()) jobFactory = splitter(self.singleLumiSubscription) jobGroups = jobFactory(trigger_time = 1) self.assertEqual(jobGroups, [], "Should have returned a null set") def testClosedSomeAcquired(self): """ _testClosedSomeAcquired_ since the subscriptions are closed and none of the files ahve been acquired, all of the files should show up """ splitter = SplitterFactory() self.multipleFileSubscription.getFileset().markOpen(False) self.singleFileSubscription.acquireFiles( [self.singleFileSubscription.availableFiles().pop()]) jobFactory = splitter(self.singleFileSubscription) jobGroups = jobFactory(trigger_time = 1) self.assertEqual(jobGroups, [], "Should have returned a null set") self.multipleFileSubscription.getFileset().markOpen(False) self.multipleFileSubscription.acquireFiles( [self.multipleFileSubscription.availableFiles().pop()]) jobFactory = splitter(package = "WMCore.WMBS", subscription =self.multipleFileSubscription) jobGroups = jobFactory(trigger_time = 1) self.assertEqual(len(jobGroups),1, "Should have gotten one jobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, \ "JobFactory should have made one job") myfiles = jobGroups[0].jobs[0].getFiles() self.assertEqual(len(myfiles), 9, \ "JobFactory should have provides us with 9 files") self.multipleLumiSubscription.getFileset().markOpen(False) self.multipleLumiSubscription.acquireFiles( [self.multipleLumiSubscription.availableFiles().pop()]) jobFactory = splitter(self.multipleLumiSubscription) jobGroups = jobFactory(trigger_time = 1) self.assertEqual(len(jobGroups),1, "Should have gotten one jobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, \ "JobFactory should have made one job") myfiles = jobGroups[0].jobs[0].getFiles() self.assertEqual(len(myfiles), 9, \ "JobFactory should have provides us with 9 files") self.singleLumiSubscription.getFileset().markOpen(False) self.singleLumiSubscription.acquireFiles( [self.singleLumiSubscription.availableFiles().pop()]) jobFactory = splitter(self.singleLumiSubscription) jobGroups = jobFactory(trigger_time = 1) self.assertEqual(len(jobGroups),1, "Should have gotten one jobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, \ "JobFactory should have made one job") myfiles = jobGroups[0].jobs[0].getFiles() self.assertEqual(len(myfiles), 9, \ "JobFactory should have provides us with 9 files") self.assertEqual(len(myfiles), 9)
def _createSubscriptionsInWMBS(self, task, fileset, alternativeFilesetClose=False): """ __createSubscriptionsInWMBS_ Create subscriptions in WMBS for all the tasks in the spec. This includes filesets, workflows and the output map for each task. """ # create runtime sandbox for workflow self.createSandbox() # FIXME: Let workflow put in values if spec is missing them workflow = Workflow( spec=self.wmSpec.specUrl(), owner=self.wmSpec.getOwner()["name"], dn=self.wmSpec.getOwner().get("dn", "unknown"), group=self.wmSpec.getOwner().get("group", "unknown"), owner_vogroup=self.wmSpec.getOwner().get("vogroup", "DEFAULT"), owner_vorole=self.wmSpec.getOwner().get("vorole", "DEFAULT"), name=self.wmSpec.name(), task=task.getPathName(), wfType=self.wmSpec.getDashboardActivity(), alternativeFilesetClose=alternativeFilesetClose, priority=self.wmSpec.priority()) workflow.create() subscription = Subscription(fileset=fileset, workflow=workflow, split_algo=task.jobSplittingAlgorithm(), type=task.getPrimarySubType()) subscription.create() ### FIXME: I'm pretty sure we can improve how we handle this site white/black list for site in task.siteWhitelist(): subscription.addWhiteBlackList([{ "site_name": site, "valid": True }]) for site in task.siteBlacklist(): subscription.addWhiteBlackList([{ "site_name": site, "valid": False }]) if self.topLevelSubscription is None: self.topLevelSubscription = subscription logging.info("Top level subscription %s created for %s", subscription["id"], self.wmSpec.name()) else: logging.info("Child subscription %s created for %s", subscription["id"], self.wmSpec.name()) outputModules = task.getOutputModulesForTask() ignoredOutputModules = task.getIgnoredOutputModulesForTask() for outputModule in outputModules: for outputModuleName in outputModule.listSections_(): if outputModuleName in ignoredOutputModules: msg = "%s has %s as IgnoredOutputModule, skipping fileset creation." logging.info(msg, task.getPathName(), outputModuleName) continue dataTier = getattr(getattr(outputModule, outputModuleName), "dataTier", '') filesetName = self.outputFilesetName(task, outputModuleName, dataTier) outputFileset = Fileset(filesetName) outputFileset.create() outputFileset.markOpen(True) mergedOutputFileset = None for childTask in task.childTaskIterator(): if childTask.data.input.outputModule == outputModuleName: childDatatier = getattr(childTask.data.input, 'dataTier', '') if childTask.taskType() in [ "Cleanup", "Merge" ] and childDatatier != dataTier: continue elif childTask.taskType( ) == "Merge" and childDatatier == dataTier: filesetName = self.outputFilesetName( childTask, "Merged", dataTier) mergedOutputFileset = Fileset(filesetName) mergedOutputFileset.create() mergedOutputFileset.markOpen(True) primaryDataset = getattr( getattr(outputModule, outputModuleName), "primaryDataset", None) if primaryDataset is not None: self.mergeOutputMapping[ mergedOutputFileset.id] = primaryDataset self._createSubscriptionsInWMBS( childTask, outputFileset, alternativeFilesetClose) if mergedOutputFileset is None: workflow.addOutput(outputModuleName + dataTier, outputFileset, outputFileset) else: workflow.addOutput(outputModuleName + dataTier, outputFileset, mergedOutputFileset) return
def testMonteCarloFromGEN(self): """ _testMonteCarloFromGEN_ Create a MonteCarloFromGEN workflow and verify it installs into WMBS correctly. """ arguments = MonteCarloFromGENWorkloadFactory.getTestArguments() arguments["ConfigCacheID"] = self.injectConfig() arguments["CouchDBName"] = "mclhe_t" arguments["PrimaryDataset"] = "WaitThisIsNotMinimumBias" factory = MonteCarloFromGENWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", arguments) outputDatasets = testWorkload.listOutputDatasets() self.assertEqual(len(outputDatasets), 2) self.assertTrue("/WaitThisIsNotMinimumBias/FAKE-FilterRECO-v1/RECO" in outputDatasets) self.assertTrue( "/WaitThisIsNotMinimumBias/FAKE-FilterALCARECO-v1/ALCARECO" in outputDatasets) testWMBSHelper = WMBSHelper(testWorkload, "MonteCarloFromGEN", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") self.assertEqual(procWorkflow.wfType, 'production') goldenOutputMods = ["outputRECORECO", "outputALCARECOALCARECO"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset( name="TestWorkload-MonteCarloFromGEN-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual( procSubscription["type"], "Production", "Error: Wrong subscription type: %s" % procSubscription["type"]) self.assertEqual(procSubscription["split_algo"], "EventAwareLumiBased", "Error: Wrong split algo.") unmergedReco = Fileset( name="/TestWorkload/MonteCarloFromGEN/unmerged-outputRECORECO") unmergedReco.loadData() recoMergeWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO" ) recoMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedReco, workflow=recoMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual( mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) unmergedAlca = Fileset( name= "/TestWorkload/MonteCarloFromGEN/unmerged-outputALCARECOALCARECO") unmergedAlca.loadData() alcaMergeWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO" ) alcaMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedAlca, workflow=alcaMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual( mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) for procOutput in ["outputRECORECO", "outputALCARECOALCARECO"]: unmerged = Fileset( name="/TestWorkload/MonteCarloFromGEN/unmerged-%s" % procOutput) unmerged.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENCleanupUnmerged%s" % procOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset( name="/TestWorkload/MonteCarloFromGEN/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset( name= "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/merged-logArchive" ) procLogCollect.loadData() procLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/MonteCarloFromGENoutputRECORECOMergeLogCollect" ) procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset( name= "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/merged-logArchive" ) procLogCollect.loadData() procLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/MonteCarloFromGENoutputALCARECOALCARECOMergeLogCollect" ) procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) myThread = threading.currentThread() daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = daofactory(classname="Locations.New") locationAction.execute(siteName="site1", seName="somese.cern.ch") locationAction.execute(siteName="site2", seName="otherse.cern.ch") self.multipleFileFileset = Fileset(name="TestFileset1") self.multipleFileFileset.create() parentFile = File('/parent/lfn/', size=1000, events=100, locations=set(["somese.cern.ch"])) parentFile.create() for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations=set(["somese.cern.ch"])) newFile.addRun(Run(i, *[45])) newFile.create() newFile.addParent(lfn=parentFile['lfn']) self.multipleFileFileset.addFile(newFile) self.multipleFileFileset.commit() self.singleFileFileset = Fileset(name="TestFileset2") self.singleFileFileset.create() newFile = File("/some/file/name", size=1000, events=100, locations=set(["somese.cern.ch"])) newFile.create() self.singleFileFileset.addFile(newFile) self.singleFileFileset.commit() self.multipleSiteFileset = Fileset(name="TestFileset3") self.multipleSiteFileset.create() for i in range(5): newFile = File(makeUUID(), size=1000, events=100, locations=set(["somese.cern.ch"])) newFile.create() self.multipleSiteFileset.addFile(newFile) for i in range(5): newFile = File(makeUUID(), size=1000, events=100, locations=set(["otherse.cern.ch", "somese.cern.ch"])) newFile.create() self.multipleSiteFileset.addFile(newFile) self.multipleSiteFileset.commit() testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() self.multipleFileSubscription = Subscription( fileset=self.multipleFileFileset, workflow=testWorkflow, split_algo="FileBased", type="Processing") self.multipleFileSubscription.create() self.singleFileSubscription = Subscription( fileset=self.singleFileFileset, workflow=testWorkflow, split_algo="FileBased", type="Processing") self.singleFileSubscription.create() self.multipleSiteSubscription = Subscription( fileset=self.multipleSiteFileset, workflow=testWorkflow, split_algo="FileBased", type="Processing") self.multipleSiteSubscription.create() self.performanceParams = { 'timePerEvent': 12, 'memoryRequirement': 2300, 'sizePerEvent': 400 } return
class FileBasedTest(unittest.TestCase): """ _FileBasedTest_ Test file based job splitting. """ def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) myThread = threading.currentThread() daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = daofactory(classname="Locations.New") locationAction.execute(siteName="site1", seName="somese.cern.ch") locationAction.execute(siteName="site2", seName="otherse.cern.ch") self.multipleFileFileset = Fileset(name="TestFileset1") self.multipleFileFileset.create() parentFile = File('/parent/lfn/', size=1000, events=100, locations=set(["somese.cern.ch"])) parentFile.create() for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations=set(["somese.cern.ch"])) newFile.addRun(Run(i, *[45])) newFile.create() newFile.addParent(lfn=parentFile['lfn']) self.multipleFileFileset.addFile(newFile) self.multipleFileFileset.commit() self.singleFileFileset = Fileset(name="TestFileset2") self.singleFileFileset.create() newFile = File("/some/file/name", size=1000, events=100, locations=set(["somese.cern.ch"])) newFile.create() self.singleFileFileset.addFile(newFile) self.singleFileFileset.commit() self.multipleSiteFileset = Fileset(name="TestFileset3") self.multipleSiteFileset.create() for i in range(5): newFile = File(makeUUID(), size=1000, events=100, locations=set(["somese.cern.ch"])) newFile.create() self.multipleSiteFileset.addFile(newFile) for i in range(5): newFile = File(makeUUID(), size=1000, events=100, locations=set(["otherse.cern.ch", "somese.cern.ch"])) newFile.create() self.multipleSiteFileset.addFile(newFile) self.multipleSiteFileset.commit() testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() self.multipleFileSubscription = Subscription( fileset=self.multipleFileFileset, workflow=testWorkflow, split_algo="FileBased", type="Processing") self.multipleFileSubscription.create() self.singleFileSubscription = Subscription( fileset=self.singleFileFileset, workflow=testWorkflow, split_algo="FileBased", type="Processing") self.singleFileSubscription.create() self.multipleSiteSubscription = Subscription( fileset=self.multipleSiteFileset, workflow=testWorkflow, split_algo="FileBased", type="Processing") self.multipleSiteSubscription.create() self.performanceParams = { 'timePerEvent': 12, 'memoryRequirement': 2300, 'sizePerEvent': 400 } return def tearDown(self): """ _tearDown_ Clear out WMBS. """ self.testInit.clearDatabase() return def createLargeFileBlock(self): """ _createLargeFileBlock_ Creates a large group of files for testing """ testFileset = Fileset(name="TestFilesetX") testFileset.create() for i in range(5000): newFile = File(makeUUID(), size=1000, events=100, locations=set(["somese.cern.ch"])) newFile.create() testFileset.addFile(newFile) testFileset.commit() testWorkflow = Workflow(spec="spec.xml", owner="mnorman", name="wf003", task="Test") testWorkflow.create() largeSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="FileBased", type="Processing") largeSubscription.create() return largeSubscription def testExactFiles(self): """ _testExactFiles_ Test file based job splitting when the number of files per job is exactly the same as the number of files in the input fileset. """ splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.singleFileSubscription) jobGroups = jobFactory(files_per_job=1, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 1) job = jobGroups[0].jobs.pop() self.assertEqual(job.getFiles(type="lfn"), ["/some/file/name"]) self.assertEqual(job["estimatedMemoryUsage"], 2300) self.assertEqual(job["estimatedDiskUsage"], 400 * 100) self.assertEqual(job["estimatedJobTime"], 12 * 100) return def testMoreFiles(self): """ _testMoreFiles_ Test file based job splitting when the number of files per job is greater than the number of files in the input fileset. """ splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.singleFileSubscription) jobGroups = jobFactory(files_per_job=10, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 1) job = jobGroups[0].jobs.pop() self.assertEqual(job.getFiles(type="lfn"), ["/some/file/name"]) self.assertEqual(job["estimatedMemoryUsage"], 2300) self.assertEqual(job["estimatedDiskUsage"], 400 * 100) self.assertEqual(job["estimatedJobTime"], 12 * 100) return def test2FileSplit(self): """ _test2FileSplit_ Test file based job splitting when the number of files per job is 2, this should result in five jobs. """ splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.multipleFileSubscription) jobGroups = jobFactory(files_per_job=2, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 5) fileList = [] for job in jobGroups[0].jobs: self.assertEqual(len(job.getFiles()), 2) for file in job.getFiles(type="lfn"): fileList.append(file) self.assertEqual(job["estimatedMemoryUsage"], 2300) self.assertEqual(job["estimatedDiskUsage"], 400 * 100 * 2) self.assertEqual(job["estimatedJobTime"], 12 * 100 * 2) self.assertEqual(len(fileList), 10) return def test3FileSplit(self): """ _test3FileSplit_ Test file based job splitting when the number of files per job is 3, this should result in four jobs. """ splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.multipleFileSubscription) jobGroups = jobFactory(files_per_job=3, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 4) fileList = [] for job in jobGroups[0].jobs: assert len(job.getFiles()) in [ 3, 1 ], "ERROR: Job contains incorrect number of files" for file in job.getFiles(type="lfn"): fileList.append(file) if len(job.getFiles()) == 3: self.assertEqual(job["estimatedMemoryUsage"], 2300) self.assertEqual(job["estimatedDiskUsage"], 400 * 100 * 3) self.assertEqual(job["estimatedJobTime"], 12 * 100 * 3) elif len(job.getFiles()) == 1: self.assertEqual(job["estimatedMemoryUsage"], 2300) self.assertEqual(job["estimatedDiskUsage"], 400 * 100) self.assertEqual(job["estimatedJobTime"], 12 * 100) else: self.fail("Unexpected splitting reached") self.assertEqual(len(fileList), 10) return def testLocationSplit(self): """ _testLocationSplit_ This should test whether or not the FileBased algorithm understands that files at seperate sites cannot be in the same jobGroup (this is the current standard). """ myThread = threading.currentThread() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.multipleSiteSubscription) jobGroups = jobFactory(files_per_job=10, performance=self.performanceParams) self.assertEqual(len(jobGroups), 2) self.assertEqual(len(jobGroups[0].jobs), 1) self.assertEqual(jobGroups[0].jobs[0]["estimatedMemoryUsage"], 2300) self.assertEqual(jobGroups[0].jobs[0]["estimatedDiskUsage"], 100 * 400 * 5) self.assertEqual(jobGroups[0].jobs[0]["estimatedJobTime"], 100 * 12 * 5) self.assertEqual(len(jobGroups[1].jobs[0].getFiles()), 5) self.assertEqual(jobGroups[1].jobs[0]["estimatedMemoryUsage"], 2300) self.assertEqual(jobGroups[1].jobs[0]["estimatedDiskUsage"], 100 * 400 * 5) self.assertEqual(jobGroups[1].jobs[0]["estimatedJobTime"], 100 * 12 * 5) return def testLimit(self): """ _testLimit_ Test what happens when you limit the number of files. This should run each separate file in a separate loop, creating one jobGroups with one job with one file (The limit argument tells it what to do) """ splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.multipleFileSubscription) jobGroups = jobFactory(files_per_job=10, limit_file_loading=True, file_load_limit=1, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 1) return def testRespectRunBoundaries(self): """ _testRespectRunBoundaries_ Test whether or not this thing will respect run boundaries """ splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.multipleFileSubscription) jobGroups = jobFactory(files_per_job=10, respect_run_boundaries=True, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 10) return def test_getParents(self): """ _getParents_ Check that we can do the same as the TwoFileBased """ splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.multipleFileSubscription) jobGroups = jobFactory(files_per_job=2, include_parents=True, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 5) fileList = [] for job in jobGroups[0].jobs: self.assertEqual(len(job.getFiles()), 2) for file in job.getFiles(type="lfn"): fileList.append(file) self.assertEqual(len(fileList), 10) for j in jobGroups[0].jobs: for f in j['input_files']: self.assertEqual(len(f['parents']), 1) self.assertEqual(list(f['parents'])[0]['lfn'], '/parent/lfn/') return def testZ_randomCrapForGenerators(self): """ Either this works, and all other tests are obsolete, or it doesn't and they aren't. Either way, don't screw around with this. """ def runCode(self, jobFactory): func = self.crazyAssFunction(jobFactory=jobFactory, file_load_limit=500) startTime = time.time() goFlag = True while goFlag: try: res = func.next() self.jobGroups.extend(res) except StopIteration: goFlag = False stopTime = time.time() return jobGroups splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.multipleSiteSubscription) jobFactory.open() jobGroups = [] a = self.crazyAssFunction(jobFactory=jobFactory, file_load_limit=2) for x in range(7): try: res = a.next() jobGroups.extend(res) except StopIteration: continue jobFactory.close() self.assertEqual(len(jobGroups), 6) for group in jobGroups: self.assertTrue(len(group.jobs) in [1, 2]) for job in group.jobs: self.assertTrue(len(job['input_files']) in (1, 2)) for file in job['input_files']: self.assertTrue(file['locations'] in [ set(['somese.cern.ch']), set(['otherse.cern.ch', 'somese.cern.ch']) ]) self.jobGroups = [] subscript = self.createLargeFileBlock() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=subscript) jobFactory.open() runCode(self, jobFactory) #cProfile.runctx("runCode(self, jobFactory)", globals(), locals(), "coroutine.stats") jobGroups = self.jobGroups self.assertEqual(len(jobGroups), 10) for group in jobGroups: self.assertEqual(len(group.jobs), 500) self.assertTrue(group.exists() > 0) jobFactory.close() return def crazyAssFunction(self, jobFactory, file_load_limit=1): groups = ['test'] while groups != []: groups = jobFactory(files_per_job=1, file_load_limit=file_load_limit, performance=self.performanceParams) yield groups
def createTestJobGroup(self, name="TestWorkthrough", specLocation="spec.xml", error=False, task="/TestWorkload/ReReco", nJobs=10): """ _createTestJobGroup_ Generate a test WMBS JobGroup with real FWJRs """ myThread = threading.currentThread() testWorkflow = Workflow(spec=specLocation, owner="Simon", name=name, task=task) testWorkflow.create() testWMBSFileset = Fileset(name=name) testWMBSFileset.create() testFileA = File(lfn=makeUUID(), size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn=makeUUID(), size=1024, events=10) testFileB.addRun(Run(10, *[12312])) testFileB.setLocation('malpaquet') testFileA.create() testFileB.create() testWMBSFileset.addFile(testFileA) testWMBSFileset.addFile(testFileB) testWMBSFileset.commit() testWMBSFileset.markOpen(0) testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() for i in range(0, nJobs): testJob = Job(name=makeUUID()) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJob['mask'].addRunAndLumis(run=10, lumis=[12312, 12313]) testJobGroup.add(testJob) testJobGroup.commit() report = Report() if error: path = os.path.join(WMCore.WMBase.getTestBase(), "WMComponent_t/JobAccountant_t/fwjrs", "badBackfillJobReport.pkl") else: path = os.path.join(WMCore.WMBase.getTestBase(), "WMComponent_t/JobAccountant_t/fwjrs", "PerformanceReport2.pkl") report.load(filename=path) self.changeState.propagate(testJobGroup.jobs, 'created', 'new') self.changeState.propagate(testJobGroup.jobs, 'executing', 'created') self.changeState.propagate(testJobGroup.jobs, 'complete', 'executing') for job in testJobGroup.jobs: job['fwjr'] = report self.changeState.propagate(testJobGroup.jobs, 'jobfailed', 'complete') self.changeState.propagate(testJobGroup.jobs, 'retrydone', 'jobfailed') self.changeState.propagate(testJobGroup.jobs, 'exhausted', 'retrydone') self.changeState.propagate(testJobGroup.jobs, 'cleanout', 'exhausted') testSubscription.completeFiles([testFileA, testFileB]) return testJobGroup
def databaseWork(self): """ Queries DB for all watched filesets, if a filesets matches become available, create the subscriptions """ # Get all watched workflows availableWorkflows = self.getUnsubscribedWorkflows.execute() logging.debug("Found %s unsubscribed managed workflows" \ % len(availableWorkflows)) # Get all filesets to check if they match a wrokflow availableFilesets = self.getAllFilesets.execute() logging.debug("Found %s filesets" % len(availableFilesets)) # Loop on unsubscribed workflows to match filesets for managedWorkflow in availableWorkflows: # Workflow object cache to pass into Subscription constructor wfObj = None for fileset in availableFilesets: # Fileset object cache fsObj = None # Load the location information #whitelist = Set() #blacklist = Set() # Location is only caf #locations = self.queries.getLocations(managedWorkflow['id']) #for location in locations: # if bool(int(location['valid'])) == True: # whitelist.add(location['site_name']) # else: # blacklist.add(location['site_name']) # Attempt to match workflows to filesets if re.match(managedWorkflow['fileset_match'], fileset['name']): # Log in debug msg = "Creating subscription for %s to workflow id %s" msg %= (fileset['name'], managedWorkflow['workflow']) logging.debug(msg) # Match found - Load the fileset if not already loaded if not fsObj: fsObj = Fileset(id=fileset['id']) fsObj.load() # Load the workflow if not already loaded if not wfObj: wfObj = Workflow(id=managedWorkflow['workflow']) wfObj.load() # Create the subscription newSub = Subscription(fileset = fsObj, \ workflow = wfObj, \ #whitelist = whitelist, \ #blacklist = blacklist, \ split_algo = managedWorkflow['split_algo'], type = managedWorkflow['type']) newSub.create() managedWorkflows = self.getManagedWorkflows.execute() logging.debug("Found %s managed workflows" \ % len(managedWorkflows)) unsubscribedFilesets = self.getUnsubscribedFilesets.execute() logging.debug("Found %s unsubscribed filesets" % \ len(unsubscribedFilesets)) # Loop on unsubscribed filesets to match workflows for unsubscribedFileset in unsubscribedFilesets: # Workflow object cache to pass into Subscription constructor # FIXME wfObj = None for managedWork in managedWorkflows: logging.debug("The workflow %s" % managedWork['workflow']) # Fileset object cache wfObj = None fsObj = None # Load the location information #whitelist = Set() #blacklist = Set() # Location is only caf #locations = self.queries.getLocations(managedWorkflow['id']) #for location in locations: # if bool(int(location['valid'])) == True: # whitelist.add(location['site_name']) # else: # blacklist.add(location['site_name']) # Attempt to match workflows to filesets if re.match(managedWork['fileset_match'], \ unsubscribedFileset['name']): # Log in debug msg = "Creating subscription for %s to workflow id %s" msg %= (unsubscribedFileset['name'], \ managedWork['workflow']) logging.debug(msg) # Match found - Load the fileset if not already loaded if not fsObj: fsObj = Fileset(id=unsubscribedFileset['id']) fsObj.load() # Load the workflow if not already loaded if not wfObj: wfObj = Workflow(id=managedWork['workflow']) wfObj.load() # Create the subscription newSub = Subscription(fileset = fsObj, \ workflow = wfObj, \ #whitelist = whitelist, \ #blacklist = blacklist, \ split_algo = managedWork['split_algo'], type = managedWork['type']) newSub.create() newSub.load()
def createTestJobGroup(self, nJobs=10, retry_count=1, workloadPath='test', fwjrPath=None, workloadName=makeUUID(), fileModifier=''): """ Creates a group of several jobs """ myThread = threading.currentThread() myThread.transaction.begin() testWorkflow = Workflow(spec=workloadPath, owner="cmsdataops", group="cmsdataops", name=workloadName, task="/TestWorkload/ReReco") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFile0 = File(lfn="/this/is/a/parent%s" % fileModifier, size=1024, events=10) testFile0.addRun(Run(10, *[12312])) testFile0.setLocation('T2_CH_CERN') testFileA = File(lfn="/this/is/a/lfnA%s" % fileModifier, size=1024, events=10, first_event=88) testFileA.addRun(Run(10, *[12312, 12313])) testFileA.setLocation('T2_CH_CERN') testFileB = File(lfn="/this/is/a/lfnB%s" % fileModifier, size=1024, events=10, first_event=88) testFileB.addRun(Run(10, *[12314, 12315, 12316])) testFileB.setLocation('T2_CH_CERN') testFile0.create() testFileA.create() testFileB.create() testFileA.addParent(lfn="/this/is/a/parent%s" % fileModifier) testFileB.addParent(lfn="/this/is/a/parent%s" % fileModifier) for i in range(0, nJobs): testJob = Job(name=makeUUID()) testJob['retry_count'] = retry_count testJob['retry_max'] = 10 testJob['mask'].addRunAndLumis(run=10, lumis=[12312]) testJob['mask'].addRunAndLumis(run=10, lumis=[12314, 12316]) testJob['cache_dir'] = os.path.join(self.testDir, testJob['name']) testJob['fwjr_path'] = fwjrPath os.mkdir(testJob['cache_dir']) testJobGroup.add(testJob) testJob.create(group=testJobGroup) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob.save() testJobGroup.commit() testSubscription.acquireFiles(files=[testFileA, testFileB]) testSubscription.save() myThread.transaction.commit() return testJobGroup
def testTruncatedWFInsertion(self): """ _testTruncatedWFInsertion_ """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', seName='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testTopLevelTask, testWMBSHelper.topLevelFileset) testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask", "someserver", "somedatabase") # create the subscription for multiple top task (MergeTask and CleanupTask for the same block) for task in testWorkload.getTopLevelTask(): testResubmitWMBSHelper = WMBSHelper(testWorkload, task.name(), "SomeBlock2", cachepath=self.workDir) testResubmitWMBSHelper.createTopLevelFileset() testResubmitWMBSHelper.createSubscription( task, testResubmitWMBSHelper.topLevelFileset) mergeWorkflow = Workflow(name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow(name="ResubmitTestWorkload", task="/ResubmitTestWorkload/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset( name="ResubmitTestWorkload-MergeTask-SomeBlock2") topLevelFileset.loadData() mergeSubscription = Subscription(fileset=topLevelFileset, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return
def stuffWMBS(self, workflowURL, name): """ _stuffWMBS_ Insert some dummy jobs, jobgroups, filesets, files and subscriptions into WMBS to test job creation. Three completed job groups each containing several files are injected. Another incomplete job group is also injected. Also files are added to the "Mergeable" subscription as well as to the output fileset for their jobgroups. """ locationAction = self.daoFactory(classname="Locations.New") locationAction.execute(siteName="s1", pnn="somese.cern.ch") mergeFileset = Fileset(name="mergeFileset") mergeFileset.create() bogusFileset = Fileset(name="bogusFileset") bogusFileset.create() mergeWorkflow = Workflow(spec=workflowURL, owner="mnorman", name=name, task="/TestWorkload/ReReco") mergeWorkflow.create() mergeSubscription = Subscription(fileset=mergeFileset, workflow=mergeWorkflow, split_algo="ParentlessMergeBySize") mergeSubscription.create() dummySubscription = Subscription(fileset=bogusFileset, workflow=mergeWorkflow, split_algo="ParentlessMergeBySize") file1 = File(lfn="file1", size=1024, events=1024, first_event=0, locations={"somese.cern.ch"}) file1.addRun(Run(1, *[45])) file1.create() file2 = File(lfn="file2", size=1024, events=1024, first_event=1024, locations={"somese.cern.ch"}) file2.addRun(Run(1, *[45])) file2.create() file3 = File(lfn="file3", size=1024, events=1024, first_event=2048, locations={"somese.cern.ch"}) file3.addRun(Run(1, *[45])) file3.create() file4 = File(lfn="file4", size=1024, events=1024, first_event=3072, locations={"somese.cern.ch"}) file4.addRun(Run(1, *[45])) file4.create() fileA = File(lfn="fileA", size=1024, events=1024, first_event=0, locations={"somese.cern.ch"}) fileA.addRun(Run(1, *[46])) fileA.create() fileB = File(lfn="fileB", size=1024, events=1024, first_event=1024, locations={"somese.cern.ch"}) fileB.addRun(Run(1, *[46])) fileB.create() fileC = File(lfn="fileC", size=1024, events=1024, first_event=2048, locations={"somese.cern.ch"}) fileC.addRun(Run(1, *[46])) fileC.create() fileI = File(lfn="fileI", size=1024, events=1024, first_event=0, locations={"somese.cern.ch"}) fileI.addRun(Run(2, *[46])) fileI.create() fileII = File(lfn="fileII", size=1024, events=1024, first_event=1024, locations={"somese.cern.ch"}) fileII.addRun(Run(2, *[46])) fileII.create() fileIII = File(lfn="fileIII", size=1024, events=1024, first_event=2048, locations={"somese.cern.ch"}) fileIII.addRun(Run(2, *[46])) fileIII.create() fileIV = File(lfn="fileIV", size=1024 * 1000000, events=1024, first_event=3072, locations={"somese.cern.ch"}) fileIV.addRun(Run(2, *[46])) fileIV.create() for fileObj in [file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII, fileIII, fileIV]: mergeFileset.addFile(fileObj) bogusFileset.addFile(fileObj) mergeFileset.commit() bogusFileset.commit() return
class WMBSHelperTest(unittest.TestCase): def setUp(self): """ _setUp_ """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("wmbshelper_t/jobs", "JobDump") self.testInit.setupCouch("wmbshelper_t/fwjrs", "FWJRDump") self.testInit.setupCouch("config_test", "GroupUser", "ConfigCache") os.environ["COUCHDB"] = "wmbshelper_t" self.testInit.setSchema(customModules=[ "WMCore.WMBS", "WMComponent.DBS3Buffer", "WMCore.BossAir", "WMCore.ResourceControl" ], useDefault=False) self.workDir = self.testInit.generateWorkDir() self.wmspec = self.createWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = MockDBSReader(self.inputDataset.dbsurl) self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=threading.currentThread().logger, dbinterface=threading.currentThread().dbi) return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.clearDatabase() self.testInit.tearDownCouch() self.testInit.delWorkDir() return def setupForKillTest(self, baAPI=None): """ _setupForKillTest_ Inject a workflow into WMBS that has a processing task, a merge task and a cleanup task. Inject files into the various tasks at various processing states (acquired, complete, available...). Also create jobs for each subscription in various states. """ myThread = threading.currentThread() daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = daoFactory(classname="Locations.New") changeStateAction = daoFactory(classname="Jobs.ChangeState") resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertThreshold(siteName = 'site1', taskType = 'Processing', \ maxSlots = 10000, pendingSlots = 10000) userDN = 'someDN' userAction = daoFactory(classname="Users.New") userAction.execute(dn=userDN, group_name='DEFAULT', role_name='DEFAULT') inputFileset = Fileset("input") inputFileset.create() inputFileA = File("lfnA", locations="goodse.cern.ch") inputFileB = File("lfnB", locations="goodse.cern.ch") inputFileC = File("lfnC", locations="goodse.cern.ch") inputFileA.create() inputFileB.create() inputFileC.create() inputFileset.addFile(inputFileA) inputFileset.addFile(inputFileB) inputFileset.addFile(inputFileC) inputFileset.commit() unmergedOutputFileset = Fileset("unmerged") unmergedOutputFileset.create() unmergedFileA = File("ulfnA", locations="goodse.cern.ch") unmergedFileB = File("ulfnB", locations="goodse.cern.ch") unmergedFileC = File("ulfnC", locations="goodse.cern.ch") unmergedFileA.create() unmergedFileB.create() unmergedFileC.create() unmergedOutputFileset.addFile(unmergedFileA) unmergedOutputFileset.addFile(unmergedFileB) unmergedOutputFileset.addFile(unmergedFileC) unmergedOutputFileset.commit() mainProcWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="Proc") mainProcWorkflow.create() mainProcMergeWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="ProcMerge") mainProcMergeWorkflow.create() mainCleanupWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="Cleanup") mainCleanupWorkflow.create() self.mainProcSub = Subscription(fileset=inputFileset, workflow=mainProcWorkflow, type="Processing") self.mainProcSub.create() self.mainProcSub.acquireFiles(inputFileA) self.mainProcSub.completeFiles(inputFileB) procJobGroup = JobGroup(subscription=self.mainProcSub) procJobGroup.create() self.procJobA = Job(name="ProcJobA") self.procJobA["state"] = "new" self.procJobA["location"] = "site1" self.procJobB = Job(name="ProcJobB") self.procJobB["state"] = "executing" self.procJobB["location"] = "site1" self.procJobC = Job(name="ProcJobC") self.procJobC["state"] = "complete" self.procJobC["location"] = "site1" self.procJobA.create(procJobGroup) self.procJobB.create(procJobGroup) self.procJobC.create(procJobGroup) self.mainMergeSub = Subscription(fileset=unmergedOutputFileset, workflow=mainProcMergeWorkflow, type="Merge") self.mainMergeSub.create() self.mainMergeSub.acquireFiles(unmergedFileA) self.mainMergeSub.failFiles(unmergedFileB) mergeJobGroup = JobGroup(subscription=self.mainMergeSub) mergeJobGroup.create() self.mergeJobA = Job(name="MergeJobA") self.mergeJobA["state"] = "exhausted" self.mergeJobA["location"] = "site1" self.mergeJobB = Job(name="MergeJobB") self.mergeJobB["state"] = "cleanout" self.mergeJobB["location"] = "site1" self.mergeJobC = Job(name="MergeJobC") self.mergeJobC["state"] = "new" self.mergeJobC["location"] = "site1" self.mergeJobA.create(mergeJobGroup) self.mergeJobB.create(mergeJobGroup) self.mergeJobC.create(mergeJobGroup) self.mainCleanupSub = Subscription(fileset=unmergedOutputFileset, workflow=mainCleanupWorkflow, type="Cleanup") self.mainCleanupSub.create() self.mainCleanupSub.acquireFiles(unmergedFileA) self.mainCleanupSub.completeFiles(unmergedFileB) cleanupJobGroup = JobGroup(subscription=self.mainCleanupSub) cleanupJobGroup.create() self.cleanupJobA = Job(name="CleanupJobA") self.cleanupJobA["state"] = "new" self.cleanupJobA["location"] = "site1" self.cleanupJobB = Job(name="CleanupJobB") self.cleanupJobB["state"] = "executing" self.cleanupJobB["location"] = "site1" self.cleanupJobC = Job(name="CleanupJobC") self.cleanupJobC["state"] = "complete" self.cleanupJobC["location"] = "site1" self.cleanupJobA.create(cleanupJobGroup) self.cleanupJobB.create(cleanupJobGroup) self.cleanupJobC.create(cleanupJobGroup) jobList = [ self.procJobA, self.procJobB, self.procJobC, self.mergeJobA, self.mergeJobB, self.mergeJobC, self.cleanupJobA, self.cleanupJobB, self.cleanupJobC ] changeStateAction.execute(jobList) if baAPI: for job in jobList: job['plugin'] = 'TestPlugin' job['userdn'] = userDN job['usergroup'] = 'DEFAULT' job['userrole'] = 'DEFAULT' job['custom']['location'] = 'site1' baAPI.createNewJobs(wmbsJobs=jobList) # We'll create an unrelated workflow to verify that it isn't affected # by the killing code. bogusFileset = Fileset("dontkillme") bogusFileset.create() bogusFileA = File("bogus/lfnA", locations="goodse.cern.ch") bogusFileA.create() bogusFileset.addFile(bogusFileA) bogusFileset.commit() bogusWorkflow = Workflow(spec="spec2", owner="Steve", name="Bogus", task="Proc") bogusWorkflow.create() self.bogusSub = Subscription(fileset=bogusFileset, workflow=bogusWorkflow, type="Processing") self.bogusSub.create() self.bogusSub.acquireFiles(bogusFileA) return def verifyFileKillStatus(self): """ _verifyFileKillStatus_ Verify that all files were killed correctly. The status of files in Cleanup and LogCollect subscriptions isn't modified. Status of already completed and failed files is not modified. Also verify that the bogus subscription is untouched. """ failedFiles = self.mainProcSub.filesOfStatus("Failed") acquiredFiles = self.mainProcSub.filesOfStatus("Acquired") completedFiles = self.mainProcSub.filesOfStatus("Completed") availableFiles = self.mainProcSub.filesOfStatus("Available") bogusAcquiredFiles = self.bogusSub.filesOfStatus("Acquired") self.assertEqual(len(availableFiles), 0, \ "Error: There should be no available files.") self.assertEqual(len(acquiredFiles), 0, \ "Error: There should be no acquired files.") self.assertEqual(len(bogusAcquiredFiles), 1, \ "Error: There should be one acquired file.") self.assertEqual(len(completedFiles), 3, \ "Error: There should be only one completed file.") goldenLFNs = ["lfnA", "lfnB", "lfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra completed file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(failedFiles), 0, \ "Error: There should be no failed files.") self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainMergeSub.filesOfStatus("Failed") acquiredFiles = self.mainMergeSub.filesOfStatus("Acquired") completedFiles = self.mainMergeSub.filesOfStatus("Completed") availableFiles = self.mainMergeSub.filesOfStatus("Available") self.assertEqual(len(acquiredFiles), 0, \ "Error: Merge subscription should have 0 acq files.") self.assertEqual(len(availableFiles), 0, \ "Error: Merge subscription should have 0 avail files.") self.assertEqual(len(failedFiles), 1, \ "Error: Merge subscription should have 1 failed files.") self.assertEqual( list(failedFiles)[0]["lfn"], "ulfnB", "Error: Wrong failed file.") self.assertEqual(len(completedFiles), 2, \ "Error: Merge subscription should have 2 compl files.") goldenLFNs = ["ulfnA", "ulfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra complete file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainCleanupSub.filesOfStatus("Failed") acquiredFiles = self.mainCleanupSub.filesOfStatus("Acquired") completedFiles = self.mainCleanupSub.filesOfStatus("Completed") availableFiles = self.mainCleanupSub.filesOfStatus("Available") self.assertEqual(len(failedFiles), 0, \ "Error: Cleanup subscription should have 0 fai files.") self.assertEqual(len(acquiredFiles), 1, \ "Error: There should be only one acquired file.") self.assertEqual(list(acquiredFiles)[0]["lfn"], "ulfnA", \ "Error: Wrong acquired LFN.") self.assertEqual(len(completedFiles), 1, \ "Error: There should be only one completed file.") self.assertEqual(list(completedFiles)[0]["lfn"], "ulfnB", \ "Error: Wrong completed LFN.") self.assertEqual(len(availableFiles), 1, \ "Error: There should be only one available file.") self.assertEqual(list(availableFiles)[0]["lfn"], "ulfnC", \ "Error: Wrong completed LFN.") return def verifyJobKillStatus(self): """ _verifyJobKillStatus_ Verify that jobs are killed correctly. Jobs belonging to Cleanup and LogCollect subscriptions are not killed. The status of jobs that have already finished running is not changed. """ self.procJobA.load() self.procJobB.load() self.procJobC.load() self.assertEqual(self.procJobA["state"], "killed", \ "Error: Proc job A should be killed.") self.assertEqual(self.procJobB["state"], "killed", \ "Error: Proc job B should be killed.") self.assertEqual(self.procJobC["state"], "complete", \ "Error: Proc job C should be complete.") self.mergeJobA.load() self.mergeJobB.load() self.mergeJobC.load() self.assertEqual(self.mergeJobA["state"], "exhausted", \ "Error: Merge job A should be exhausted.") self.assertEqual(self.mergeJobB["state"], "cleanout", \ "Error: Merge job B should be cleanout.") self.assertEqual(self.mergeJobC["state"], "killed", \ "Error: Merge job C should be killed.") self.cleanupJobA.load() self.cleanupJobB.load() self.cleanupJobC.load() self.assertEqual(self.cleanupJobA["state"], "new", \ "Error: Cleanup job A should be new.") self.assertEqual(self.cleanupJobB["state"], "executing", \ "Error: Cleanup job B should be executing.") self.assertEqual(self.cleanupJobC["state"], "complete", \ "Error: Cleanup job C should be complete.") return def createTestWMSpec(self): """ _createTestWMSpec_ Create a WMSpec that has a processing, merge, cleanup and skims tasks that can be used by the subscription creation test. """ testWorkload = WMWorkloadHelper(WMWorkload("TestWorkload")) testWorkload.setDashboardActivity("TestReReco") testWorkload.setSpecUrl("/path/to/workload") testWorkload.setOwnerDetails("sfoulkes", "DMWM", {'dn': 'MyDN'}) procTask = testWorkload.newTask("ProcessingTask") procTask.setTaskType("Processing") procTask.setSplittingAlgorithm("FileBased", files_per_job=1) procTaskCMSSW = procTask.makeStep("cmsRun1") procTaskCMSSW.setStepType("CMSSW") procTaskCMSSWHelper = procTaskCMSSW.getTypeHelper() procTask.setTaskType("Processing") procTask.setSiteWhitelist(["site1"]) procTask.setSiteBlacklist(["site2"]) procTask.applyTemplates() procTaskCMSSWHelper.addOutputModule("OutputA", primaryDataset="bogusPrimary", processedDataset="bogusProcessed", dataTier="DataTierA", lfnBase="bogusUnmerged", mergedLFNBase="bogusMerged", filterName=None) mergeTask = procTask.addTask("MergeTask") mergeTask.setInputReference(procTaskCMSSW, outputModule="OutputA") mergeTask.setTaskType("Merge") mergeTask.setSplittingAlgorithm("WMBSMergeBySize", min_merge_size=1, max_merge_size=2, max_merge_events=3) mergeTaskCMSSW = mergeTask.makeStep("cmsRun1") mergeTaskCMSSW.setStepType("CMSSW") mergeTaskCMSSWHelper = mergeTaskCMSSW.getTypeHelper() mergeTask.setTaskType("Merge") mergeTask.applyTemplates() mergeTaskCMSSWHelper.addOutputModule("Merged", primaryDataset="bogusPrimary", processedDataset="bogusProcessed", dataTier="DataTierA", lfnBase="bogusUnmerged", mergedLFNBase="bogusMerged", filterName=None) cleanupTask = procTask.addTask("CleanupTask") cleanupTask.setInputReference(procTaskCMSSW, outputModule="OutputA") cleanupTask.setTaskType("Merge") cleanupTask.setSplittingAlgorithm("SiblingProcessingBase", files_per_job=50) cleanupTaskCMSSW = cleanupTask.makeStep("cmsRun1") cleanupTaskCMSSW.setStepType("CMSSW") cleanupTaskCMSSWHelper = cleanupTaskCMSSW.getTypeHelper() cleanupTask.setTaskType("Cleanup") cleanupTask.applyTemplates() skimTask = mergeTask.addTask("SkimTask") skimTask.setTaskType("Skim") skimTask.setInputReference(mergeTaskCMSSW, outputModule="Merged") skimTask.setSplittingAlgorithm("FileBased", files_per_job=1, include_parents=True) skimTaskCMSSW = skimTask.makeStep("cmsRun1") skimTaskCMSSW.setStepType("CMSSW") skimTaskCMSSWHelper = skimTaskCMSSW.getTypeHelper() skimTask.setTaskType("Skim") skimTask.applyTemplates() skimTaskCMSSWHelper.addOutputModule("SkimOutputA", primaryDataset="bogusPrimary", processedDataset="bogusProcessed", dataTier="DataTierA", lfnBase="bogusUnmerged", mergedLFNBase="bogusMerged", filterName=None) skimTaskCMSSWHelper.addOutputModule("SkimOutputB", primaryDataset="bogusPrimary", processedDataset="bogusProcessed", dataTier="DataTierA", lfnBase="bogusUnmerged", mergedLFNBase="bogusMerged", filterName=None) return testWorkload def setupMCWMSpec(self): """Setup MC workflow""" self.wmspec = self.createMCWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = None self.siteDB = fakeSiteDB() # add sites that would normally be added by operator via resource_control locationDAO = self.daoFactory(classname="Locations.New") self.ses = [] for site in ['T2_XX_SiteA', 'T2_XX_SiteB']: locationDAO.execute(siteName=site, seName=self.siteDB.cmsNametoSE(site)) self.ses.append(self.siteDB.cmsNametoSE(site)) def createWMSpec(self, name='ReRecoWorkload'): wmspec = rerecoWorkload(name, rerecoArgs) wmspec.setSpecUrl("/path/to/workload") return wmspec def createMCWMSpec(self, name='MonteCarloWorkload'): wmspec = monteCarloWorkload(name, mcArgs) wmspec.setSpecUrl("/path/to/workload") getFirstTask(wmspec).addProduction(totalevents=10000) return wmspec def getDBS(self, wmspec): topLevelTask = getFirstTask(wmspec) inputDataset = topLevelTask.inputDataset() dbs = MockDBSReader(inputDataset.dbsurl) #dbsDict = {self.inputDataset.dbsurl : self.dbs} return dbs def createWMBSHelperWithTopTask(self, wmspec, block, mask=None, parentFlag=False, detail=False): topLevelTask = getFirstTask(wmspec) wmbs = WMBSHelper(wmspec, topLevelTask.name(), block, mask, cachepath=self.workDir) if block: if parentFlag: block = self.dbs.getFileBlockWithParents(block)[block] else: block = self.dbs.getFileBlock(block)[block] sub, files = wmbs.createSubscriptionAndAddFiles(block=block) if detail: return wmbs, sub, files else: return wmbs def testKillWorkflow(self): """ _testKillWorkflow_ Verify that workflow killing works correctly. """ configFile = EmulatorSetup.setupWMAgentConfig() config = loadConfigurationFile(configFile) baAPI = BossAirAPI(config=config) # Create nine jobs self.setupForKillTest(baAPI=baAPI) self.assertEqual(len(baAPI._listRunJobs()), 9) killWorkflow("Main", config, config) self.verifyFileKillStatus() self.verifyJobKillStatus() self.assertEqual(len(baAPI._listRunJobs()), 8) EmulatorSetup.deleteConfig(configFile) return def testCreateSubscription(self): """ _testCreateSubscription_ Verify that the subscription creation code works correctly. """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', seName='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testTopLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask") procWorkflow.load() self.assertEqual(procWorkflow.owner, "sfoulkes", "Error: Wrong owner: %s" % procWorkflow.owner) self.assertEqual(procWorkflow.group, "DMWM", "Error: Wrong group: %s" % procWorkflow.group) self.assertEqual(procWorkflow.wfType, "TestReReco", "Error: Wrong type.") self.assertEqual( procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(procWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") mergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "merged_output_fileset"] unmergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "output_fileset"] mergedProcOutput.loadData() unmergedProcOutput.loadData() self.assertEqual( mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputA", "Error: Unmerged output fileset is wrong.") mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-ProcessingTask-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(len(procSubscription.getWhiteBlackList()), 2, "Error: Wrong site white/black list for proc sub.") for site in procSubscription.getWhiteBlackList(): if site["site_name"] == "site1": self.assertEqual(site["valid"], 1, "Error: Site should be white listed.") else: self.assertEqual(site["valid"], 0, "Error: Site should be black listed.") self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") mergeSubscription = Subscription(fileset=unmergedProcOutput, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return def testTruncatedWFInsertion(self): """ _testTruncatedWFInsertion_ """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', seName='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testTopLevelTask, testWMBSHelper.topLevelFileset) testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask", "someserver", "somedatabase") # create the subscription for multiple top task (MergeTask and CleanupTask for the same block) for task in testWorkload.getTopLevelTask(): testResubmitWMBSHelper = WMBSHelper(testWorkload, task.name(), "SomeBlock2", cachepath=self.workDir) testResubmitWMBSHelper.createTopLevelFileset() testResubmitWMBSHelper.createSubscription( task, testResubmitWMBSHelper.topLevelFileset) mergeWorkflow = Workflow(name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow(name="ResubmitTestWorkload", task="/ResubmitTestWorkload/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset( name="ResubmitTestWorkload-MergeTask-SomeBlock2") topLevelFileset.loadData() mergeSubscription = Subscription(fileset=topLevelFileset, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return # def testProduction(self): # """Production workflow""" # pass def testReReco(self): """ReReco workflow""" # create workflow block = self.dataset + "#1" wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)) self.assertEqual(len(files), 1) def testReRecoBlackRunRestriction(self): """ReReco workflow with Run restrictions""" block = self.dataset + "#2" #add run blacklist self.topLevelTask.setInputRunBlacklist([1, 2, 3, 4]) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 0) def testReRecoWhiteRunRestriction(self): block = self.dataset + "#2" # Run Whitelist self.topLevelTask.setInputRunWhitelist([2]) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), GlobalParams.numOfFilesPerBlock()) def testLumiMaskRestrictionsOK(self): block = self.dataset + "#1" self.wmspec.getTopLevelTask()[0].data.input.splitting.runs = ['1'] self.wmspec.getTopLevelTask()[0].data.input.splitting.lumis = ['1,1'] wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), GlobalParams.numOfFilesPerBlock()) def testLumiMaskRestrictionsKO(self): block = self.dataset + "#1" self.wmspec.getTopLevelTask()[0].data.input.splitting.runs = [ '123454321' ] self.wmspec.getTopLevelTask()[0].data.input.splitting.lumis = [ '123,123' ] wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 0) def testDuplicateFileInsert(self): # using default wmspec block = self.dataset + "#1" wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) # check initially inserted files. dbsFiles = self.dbs.getFileBlock(block)[block]['Files'] self.assertEqual(numOfFiles, len(dbsFiles)) firstFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname="Files.InFileset") numOfFiles = len(wmbsDao.execute(firstFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) # use the new spec with same inputdataset block = self.dataset + "#1" wmspec = self.createWMSpec("TestSpec1") dbs = self.getDBS(wmspec) wmbs = self.createWMBSHelperWithTopTask(wmspec, block) # check duplicate insert dbsFiles = dbs.getFileBlock(block)[block]['Files'] numOfFiles = wmbs.addFiles(dbs.getFileBlock(block)[block]) self.assertEqual(numOfFiles, 0) secondFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname="Files.InFileset") numOfFiles = len(wmbsDao.execute(secondFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) self.assertNotEqual(firstFileset.id, secondFileset.id) def testDuplicateSubscription(self): """Can't duplicate subscriptions""" # using default wmspec block = self.dataset + "#1" wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) filesetId = wmbs.topLevelFileset.id subId = wmbs.topLevelSubscription['id'] # check initially inserted files. dbsFiles = self.dbs.getFileBlock(block)[block]['Files'] self.assertEqual(numOfFiles, len(dbsFiles)) firstFileset = wmbs.topLevelFileset self.assertEqual(numOfFiles, len(dbsFiles)) # reinsert subscription - shouldn't create anything new wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() self.assertEqual(numOfFiles, len(wmbs.topLevelFileset.files)) self.assertEqual(filesetId, wmbs.topLevelFileset.id) self.assertEqual(subId, wmbs.topLevelSubscription['id']) # now do a montecarlo workflow self.setupMCWMSpec() mask = Mask(FirstRun=12, FirstLumi=1234, FirstEvent=12345, LastEvent=999995, LastLumi=12345, LastRun=12) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) filesetId = wmbs.topLevelFileset.id subId = wmbs.topLevelSubscription['id'] # check initially inserted files. numDbsFiles = 1 self.assertEqual(numOfFiles, numDbsFiles) firstFileset = wmbs.topLevelFileset self.assertEqual(numOfFiles, numDbsFiles) # reinsert subscription - shouldn't create anything new wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) wmbs.topLevelFileset.loadData() self.assertEqual(numOfFiles, len(wmbs.topLevelFileset.files)) self.assertEqual(filesetId, wmbs.topLevelFileset.id) self.assertEqual(subId, wmbs.topLevelSubscription['id']) def testParentage(self): """ 1. check whether parent files are created in wmbs. 2. check parent files are associated to child. 3. When 2 specs with the same input data (one with parent processing, one without it) is inserted, if one without parent processing inserted first then the other with parent processing insert, it still needs to create parent files although child files are duplicate """ block = self.dataset + "#1" wmbs, sub, numFiles = self.createWMBSHelperWithTopTask( self.wmspec, block, parentFlag=False, detail=True) # file creation without parents self.assertEqual(GlobalParams.numOfFilesPerBlock(), numFiles) wmbs.topLevelFileset.loadData() for child in wmbs.topLevelFileset.files: # no parent per child self.assertEqual(len(child["parents"]), 0) wmbs, sub, numFiles = self.createWMBSHelperWithTopTask(self.wmspec, block, parentFlag=True, detail=True) self.assertEqual(GlobalParams.numOfFilesPerBlock(), numFiles) wmbs.topLevelFileset.loadData() for child in wmbs.topLevelFileset.files: # one parent per child self.assertEqual(len(child["parents"]), 1) def testMCFakeFileInjection(self): """Inject fake Monte Carlo files into WMBS""" self.setupMCWMSpec() mask = Mask(FirstRun=12, FirstLumi=1234, FirstEvent=12345, LastEvent=999995, LastLumi=12345, LastRun=12) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) subscription = wmbs.topLevelSubscription self.assertEqual(1, subscription.exists()) fileset = subscription['fileset'] self.assertEqual(1, fileset.exists()) fileset.loadData() # need to refresh from database self.assertEqual(len(fileset.files), 1) self.assertEqual(len(fileset.parents), 0) self.assertFalse(fileset.open) file = list(fileset.files)[0] self.assertEqual(file['events'], mask['LastEvent'] - mask['FirstEvent'] + 1) # inclusive range self.assertEqual(file['merged'], False) # merged files get added to dbs self.assertEqual(len(file['parents']), 0) #file.loadData() self.assertEqual(sorted(file['locations']), sorted(self.ses)) self.assertEqual(len(file.getParentLFNs()), 0) self.assertEqual(len(file.getRuns()), 1) run = file.getRuns()[0] self.assertEqual(run.run, mask['FirstRun']) self.assertEqual(run.lumis[0], mask['FirstLumi']) self.assertEqual(run.lumis[-1], mask['LastLumi']) self.assertEqual(len(run.lumis), mask['LastLumi'] - mask['FirstLumi'] + 1)
def testJobSummary(self): """ _testJobSummary_ verify that job summary for jobs with fwjr are correctly created and that status is updated when updatesummary flag is enabled """ change = ChangeState(self.config, "changestate_t") locationAction = self.daoFactory(classname="Locations.New") locationAction.execute("site1", pnn="T2_CH_CERN") testWorkflow = Workflow(spec=self.specUrl, owner="Steve", name="wf001", task=self.taskName) testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testFile = File(lfn="SomeLFNC", locations=set(["T2_CH_CERN"])) testFile.create() testFileset.addFile(testFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroup = jobFactory(files_per_job=1)[0] assert len(jobGroup.jobs) == 1, \ "Error: Splitting should have created one job." testJobA = jobGroup.jobs[0] testJobA["user"] = "******" testJobA["group"] = "DMWM" testJobA["taskType"] = "Production" change.propagate([testJobA], 'created', 'new') myReport = Report() reportPath = os.path.join(getTestBase(), "WMCore_t/JobStateMachine_t/Report.pkl") myReport.unpersist(reportPath) change.propagate([testJobA], 'executing', 'created') testJobA["fwjr"] = myReport change.propagate([testJobA], 'jobfailed', 'executing') changeStateDB = self.couchServer.connectDatabase( dbname=self.config.JobStateMachine.jobSummaryDBName) allDocs = changeStateDB.document("_all_docs") self.assertEqual(len(allDocs["rows"]), 2, "Error: Wrong number of documents") fwjrDoc = {'state': None} for resultRow in allDocs["rows"]: if resultRow["id"] != "_design/WMStats": fwjrDoc = changeStateDB.document(resultRow["id"]) break self.assertEqual(fwjrDoc['state'], 'jobfailed', "Error: summary doesn't have the expected job state") del testJobA["fwjr"] change.propagate([testJobA], 'jobcooloff', 'jobfailed', updatesummary=True) return
def testIndexConflict(self): """ _testIndexConflict_ Verify that in case of conflict in the job index we discard the old document and replace with a new one. Only works for MySQL backend """ change = ChangeState(self.config, "changestate_t") locationAction = self.daoFactory(classname="Locations.New") locationAction.execute("site1", pnn="T2_CH_CERN") testWorkflow = Workflow(spec=self.specUrl, owner="Steve", name="wf001", task=self.taskName) testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testFile = File(lfn="SomeLFNC", locations=set(["T2_CH_CERN"])) testFile.create() testFileset.addFile(testFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroup = jobFactory(files_per_job=1)[0] assert len(jobGroup.jobs) == 1, \ "Error: Splitting should have created one job." testJobA = jobGroup.jobs[0] testJobA["user"] = "******" testJobA["group"] = "CompOps" testJobA["taskType"] = "Processing" myReport = Report() reportPath = os.path.join(getTestBase(), "WMCore_t/JobStateMachine_t/Report.pkl") myReport.unpersist(reportPath) testJobA["fwjr"] = myReport change.propagate([testJobA], 'created', 'new') jobdatabase = self.couchServer.connectDatabase('changestate_t/jobs', False) fwjrdatabase = self.couchServer.connectDatabase( 'changestate_t/fwjrs', False) jobDoc = jobdatabase.document("1") fwjrDoc = fwjrdatabase.document("1-0") self.assertEqual(jobDoc["workflow"], "wf001", "Wrong workflow in couch job document") self.assertEqual(fwjrDoc["fwjr"]["task"], self.taskName, "Wrong task in fwjr couch document") testJobA.delete() myThread = threading.currentThread() myThread.dbi.processData("ALTER TABLE wmbs_job AUTO_INCREMENT = 1") testWorkflow = Workflow(spec=self.specUrl, owner="Steve", name="wf002", task="/TestWorkflow/Test2") testWorkflow.create() testFileset = Fileset(name="TestFilesetB") testFileset.create() testFile = File(lfn="SomeLFNB", locations=set(["T2_CH_CERN"])) testFile.create() testFileset.addFile(testFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroup = jobFactory(files_per_job=1)[0] testJobB = jobGroup.jobs[0] testJobB["user"] = "******" testJobB["group"] = "CompOps" testJobB["taskType"] = "Processing" testJobB["fwjr"] = myReport change.propagate([testJobB], 'created', 'new') jobDoc = jobdatabase.document("1") fwjrDoc = fwjrdatabase.document("1-0") self.assertEqual(jobDoc["workflow"], "wf002", "Job document was not overwritten") self.assertEqual(fwjrDoc["fwjr"]["task"], "/TestWorkflow/Test2", "FWJR document was not overwritten") return
def setupForKillTest(self, baAPI=None): """ _setupForKillTest_ Inject a workflow into WMBS that has a processing task, a merge task and a cleanup task. Inject files into the various tasks at various processing states (acquired, complete, available...). Also create jobs for each subscription in various states. """ myThread = threading.currentThread() daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = daoFactory(classname="Locations.New") changeStateAction = daoFactory(classname="Jobs.ChangeState") resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertThreshold(siteName = 'site1', taskType = 'Processing', \ maxSlots = 10000, pendingSlots = 10000) userDN = 'someDN' userAction = daoFactory(classname="Users.New") userAction.execute(dn=userDN, group_name='DEFAULT', role_name='DEFAULT') inputFileset = Fileset("input") inputFileset.create() inputFileA = File("lfnA", locations="goodse.cern.ch") inputFileB = File("lfnB", locations="goodse.cern.ch") inputFileC = File("lfnC", locations="goodse.cern.ch") inputFileA.create() inputFileB.create() inputFileC.create() inputFileset.addFile(inputFileA) inputFileset.addFile(inputFileB) inputFileset.addFile(inputFileC) inputFileset.commit() unmergedOutputFileset = Fileset("unmerged") unmergedOutputFileset.create() unmergedFileA = File("ulfnA", locations="goodse.cern.ch") unmergedFileB = File("ulfnB", locations="goodse.cern.ch") unmergedFileC = File("ulfnC", locations="goodse.cern.ch") unmergedFileA.create() unmergedFileB.create() unmergedFileC.create() unmergedOutputFileset.addFile(unmergedFileA) unmergedOutputFileset.addFile(unmergedFileB) unmergedOutputFileset.addFile(unmergedFileC) unmergedOutputFileset.commit() mainProcWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="Proc") mainProcWorkflow.create() mainProcMergeWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="ProcMerge") mainProcMergeWorkflow.create() mainCleanupWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="Cleanup") mainCleanupWorkflow.create() self.mainProcSub = Subscription(fileset=inputFileset, workflow=mainProcWorkflow, type="Processing") self.mainProcSub.create() self.mainProcSub.acquireFiles(inputFileA) self.mainProcSub.completeFiles(inputFileB) procJobGroup = JobGroup(subscription=self.mainProcSub) procJobGroup.create() self.procJobA = Job(name="ProcJobA") self.procJobA["state"] = "new" self.procJobA["location"] = "site1" self.procJobB = Job(name="ProcJobB") self.procJobB["state"] = "executing" self.procJobB["location"] = "site1" self.procJobC = Job(name="ProcJobC") self.procJobC["state"] = "complete" self.procJobC["location"] = "site1" self.procJobA.create(procJobGroup) self.procJobB.create(procJobGroup) self.procJobC.create(procJobGroup) self.mainMergeSub = Subscription(fileset=unmergedOutputFileset, workflow=mainProcMergeWorkflow, type="Merge") self.mainMergeSub.create() self.mainMergeSub.acquireFiles(unmergedFileA) self.mainMergeSub.failFiles(unmergedFileB) mergeJobGroup = JobGroup(subscription=self.mainMergeSub) mergeJobGroup.create() self.mergeJobA = Job(name="MergeJobA") self.mergeJobA["state"] = "exhausted" self.mergeJobA["location"] = "site1" self.mergeJobB = Job(name="MergeJobB") self.mergeJobB["state"] = "cleanout" self.mergeJobB["location"] = "site1" self.mergeJobC = Job(name="MergeJobC") self.mergeJobC["state"] = "new" self.mergeJobC["location"] = "site1" self.mergeJobA.create(mergeJobGroup) self.mergeJobB.create(mergeJobGroup) self.mergeJobC.create(mergeJobGroup) self.mainCleanupSub = Subscription(fileset=unmergedOutputFileset, workflow=mainCleanupWorkflow, type="Cleanup") self.mainCleanupSub.create() self.mainCleanupSub.acquireFiles(unmergedFileA) self.mainCleanupSub.completeFiles(unmergedFileB) cleanupJobGroup = JobGroup(subscription=self.mainCleanupSub) cleanupJobGroup.create() self.cleanupJobA = Job(name="CleanupJobA") self.cleanupJobA["state"] = "new" self.cleanupJobA["location"] = "site1" self.cleanupJobB = Job(name="CleanupJobB") self.cleanupJobB["state"] = "executing" self.cleanupJobB["location"] = "site1" self.cleanupJobC = Job(name="CleanupJobC") self.cleanupJobC["state"] = "complete" self.cleanupJobC["location"] = "site1" self.cleanupJobA.create(cleanupJobGroup) self.cleanupJobB.create(cleanupJobGroup) self.cleanupJobC.create(cleanupJobGroup) jobList = [ self.procJobA, self.procJobB, self.procJobC, self.mergeJobA, self.mergeJobB, self.mergeJobC, self.cleanupJobA, self.cleanupJobB, self.cleanupJobC ] changeStateAction.execute(jobList) if baAPI: for job in jobList: job['plugin'] = 'TestPlugin' job['userdn'] = userDN job['usergroup'] = 'DEFAULT' job['userrole'] = 'DEFAULT' job['custom']['location'] = 'site1' baAPI.createNewJobs(wmbsJobs=jobList) # We'll create an unrelated workflow to verify that it isn't affected # by the killing code. bogusFileset = Fileset("dontkillme") bogusFileset.create() bogusFileA = File("bogus/lfnA", locations="goodse.cern.ch") bogusFileA.create() bogusFileset.addFile(bogusFileA) bogusFileset.commit() bogusWorkflow = Workflow(spec="spec2", owner="Steve", name="Bogus", task="Proc") bogusWorkflow.create() self.bogusSub = Subscription(fileset=bogusFileset, workflow=bogusWorkflow, type="Processing") self.bogusSub.create() self.bogusSub.acquireFiles(bogusFileA) return
def testFWJRInputFileTruncation(self): """ _testFWJRInputFileTruncation_ Test and see whether the ChangeState code can be used to automatically truncate the number of input files in a FWJR Code stolen from the serialization test """ self.config.JobStateMachine.maxFWJRInputFiles = 0 change = ChangeState(self.config, "changestate_t") locationAction = self.daoFactory(classname="Locations.New") locationAction.execute("site1", pnn="T2_CH_CERN") testWorkflow = Workflow(spec=self.specUrl, owner="Steve", name="wf001", task=self.taskName) testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testFile = File(lfn="SomeLFNC", locations=set(["T2_CH_CERN"])) testFile.create() testFileset.addFile(testFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroup = jobFactory(files_per_job=1)[0] self.assertEqual(len(jobGroup.jobs), 1, "Error: Splitting should have created one job.") testJobA = jobGroup.jobs[0] testJobA["user"] = "******" testJobA["group"] = "DMWM" testJobA["taskType"] = "Processing" change.propagate([testJobA], 'created', 'new') myReport = Report() reportPath = os.path.join(getTestBase(), "WMCore_t/JobStateMachine_t/Report.pkl") myReport.unpersist(reportPath) testJobA["fwjr"] = myReport change.propagate([testJobA], 'executing', 'created') changeStateDB = self.couchServer.connectDatabase( dbname="changestate_t/fwjrs") allDocs = changeStateDB.document("_all_docs") self.assertEqual(len(allDocs["rows"]), 2, "Error: Wrong number of documents") result = changeStateDB.loadView("FWJRDump", "fwjrsByWorkflowName") self.assertEqual(len(result["rows"]), 1, "Error: Wrong number of rows.") for row in result["rows"]: couchJobDoc = changeStateDB.document(row["value"]["id"]) self.assertEqual(couchJobDoc["_rev"], row["value"]["rev"], "Error: Rev is wrong.") for resultRow in allDocs["rows"]: if resultRow["id"] != "_design/FWJRDump": fwjrDoc = changeStateDB.document(resultRow["id"]) break self.assertEqual( fwjrDoc["fwjr"]["steps"]['cmsRun1']['input']['source'], []) return
def testJobKilling(self): """ _testJobKilling_ Test that we can successfully set jobs to the killed state """ change = ChangeState(self.config, "changestate_t") locationAction = self.daoFactory(classname="Locations.New") locationAction.execute("site1", pnn="T2_CH_CERN") testWorkflow = Workflow(spec=self.specUrl, owner="Steve", name="wf001", task=self.taskName) testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() for i in range(4): newFile = File(lfn="File%s" % i, locations=set(["T2_CH_CERN"])) newFile.create() testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="FileBased") testSubscription.create() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroup = jobFactory(files_per_job=1)[0] assert len(jobGroup.jobs) == 4, \ "Error: Splitting should have created four jobs." testJobA = jobGroup.jobs[0] testJobA["user"] = "******" testJobA["group"] = "DMWM" testJobA["taskType"] = "Processing" testJobB = jobGroup.jobs[1] testJobB["user"] = "******" testJobB["group"] = "DMWM" testJobB["taskType"] = "Processing" testJobC = jobGroup.jobs[2] testJobC["user"] = "******" testJobC["group"] = "DMWM" testJobC["taskType"] = "Processing" testJobD = jobGroup.jobs[3] testJobD["user"] = "******" testJobD["group"] = "DMWM" testJobD["taskType"] = "Processing" change.persist([testJobA], "created", "new") change.persist([testJobB], "jobfailed", "executing") change.persist([testJobC, testJobD], "executing", "created") change.persist([testJobA], "killed", "created") change.persist([testJobB], "killed", "jobfailed") change.persist([testJobC, testJobD], "killed", "executing") for job in [testJobA, testJobB, testJobC, testJobD]: job.load() self.assertEqual(job['retry_count'], 99999) self.assertEqual(job['state'], 'killed') return
def testDuplicateJobReports(self): """ _testDuplicateJobReports_ Verify that everything works correctly if a job report is added to the database more than once. """ change = ChangeState(self.config, "changestate_t") locationAction = self.daoFactory(classname="Locations.New") locationAction.execute("site1", pnn="T2_CH_CERN") testWorkflow = Workflow(spec=self.specUrl, owner="Steve", name="wf001", task=self.taskName) testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testFile = File(lfn="SomeLFNC", locations=set(["T2_CH_CERN"])) testFile.create() testFileset.addFile(testFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroup = jobFactory(files_per_job=1)[0] assert len(jobGroup.jobs) == 1, \ "Error: Splitting should have created one job." testJobA = jobGroup.jobs[0] testJobA["user"] = "******" testJobA["group"] = "DMWM" testJobA["taskType"] = "Processing" change.propagate([testJobA], 'created', 'new') myReport = Report() reportPath = os.path.join(getTestBase(), "WMCore_t/JobStateMachine_t/Report.pkl") myReport.unpersist(reportPath) testJobA["fwjr"] = myReport change.propagate([testJobA], 'executing', 'created') change.propagate([testJobA], 'executing', 'created') changeStateDB = self.couchServer.connectDatabase( dbname="changestate_t/fwjrs") allDocs = changeStateDB.document("_all_docs") self.assertEqual(len(allDocs["rows"]), 2, "Error: Wrong number of documents") for resultRow in allDocs["rows"]: if resultRow["id"] != "_design/FWJRDump": changeStateDB.document(resultRow["id"]) break return
def testCreateSubscription(self): """ _testCreateSubscription_ Verify that the subscription creation code works correctly. """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', seName='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testTopLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask") procWorkflow.load() self.assertEqual(procWorkflow.owner, "sfoulkes", "Error: Wrong owner: %s" % procWorkflow.owner) self.assertEqual(procWorkflow.group, "DMWM", "Error: Wrong group: %s" % procWorkflow.group) self.assertEqual(procWorkflow.wfType, "TestReReco", "Error: Wrong type.") self.assertEqual( procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(procWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") mergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "merged_output_fileset"] unmergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "output_fileset"] mergedProcOutput.loadData() unmergedProcOutput.loadData() self.assertEqual( mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputA", "Error: Unmerged output fileset is wrong.") mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-ProcessingTask-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(len(procSubscription.getWhiteBlackList()), 2, "Error: Wrong site white/black list for proc sub.") for site in procSubscription.getWhiteBlackList(): if site["site_name"] == "site1": self.assertEqual(site["valid"], 1, "Error: Site should be white listed.") else: self.assertEqual(site["valid"], 0, "Error: Site should be black listed.") self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") mergeSubscription = Subscription(fileset=unmergedProcOutput, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return
def testJobSerialization(self): """ _testJobSerialization_ Verify that serialization of a job works when adding a FWJR. """ change = ChangeState(self.config, "changestate_t") locationAction = self.daoFactory(classname="Locations.New") locationAction.execute("site1", pnn="T2_CH_CERN") testWorkflow = Workflow(spec=self.specUrl, owner="Steve", name="wf001", task=self.taskName) testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testFile = File(lfn="SomeLFNC", locations=set(["T2_CH_CERN"])) testFile.create() testFileset.addFile(testFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroup = jobFactory(files_per_job=1)[0] assert len(jobGroup.jobs) == 1, \ "Error: Splitting should have created one job." testJobA = jobGroup.jobs[0] testJobA["user"] = "******" testJobA["group"] = "DMWM" testJobA["taskType"] = "Processing" change.propagate([testJobA], 'created', 'new') myReport = Report() reportPath = os.path.join(getTestBase(), "WMCore_t/JobStateMachine_t/Report.pkl") myReport.unpersist(reportPath) testJobA["fwjr"] = myReport change.propagate([testJobA], 'executing', 'created') changeStateDB = self.couchServer.connectDatabase( dbname="changestate_t/fwjrs") allDocs = changeStateDB.document("_all_docs") self.assertEqual(len(allDocs["rows"]), 2, "Error: Wrong number of documents") result = changeStateDB.loadView("FWJRDump", "fwjrsByWorkflowName") self.assertEqual(len(result["rows"]), 1, "Error: Wrong number of rows.") for row in result["rows"]: couchJobDoc = changeStateDB.document(row["value"]["id"]) self.assertEqual(couchJobDoc["_rev"], row["value"]["rev"], "Error: Rev is wrong.") for resultRow in allDocs["rows"]: if resultRow["id"] != "_design/FWJRDump": fwjrDoc = changeStateDB.document(resultRow["id"]) break assert fwjrDoc["retrycount"] == 0, \ "Error: Retry count is wrong." assert len(fwjrDoc["fwjr"]["steps"].keys()) == 2, \ "Error: Wrong number of steps in FWJR." assert "cmsRun1" in fwjrDoc["fwjr"]["steps"].keys(), \ "Error: cmsRun1 step is missing from FWJR." assert "stageOut1" in fwjrDoc["fwjr"]["steps"].keys(), \ "Error: stageOut1 step is missing from FWJR." return
def testListRunningJobs(self): """ _testListRunningJobs_ Test the ListRunningJobs DAO. """ testWorkflow = Workflow(spec=makeUUID(), owner="Steve", name=makeUUID(), task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type="Processing") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJobA = Job(name=makeUUID(), files=[]) testJobA["couch_record"] = makeUUID() testJobA.create(group=testJobGroup) testJobA["state"] = "executing" testJobB = Job(name=makeUUID(), files=[]) testJobB["couch_record"] = makeUUID() testJobB.create(group=testJobGroup) testJobB["state"] = "complete" testJobC = Job(name=makeUUID(), files=[]) testJobC["couch_record"] = makeUUID() testJobC.create(group=testJobGroup) testJobC["state"] = "new" changeStateAction = self.daoFactory(classname="Jobs.ChangeState") changeStateAction.execute(jobs=[testJobA, testJobB, testJobC]) runningJobsAction = self.daoFactory( classname="Monitoring.ListRunningJobs") runningJobs = runningJobsAction.execute() assert len(runningJobs) == 2, \ "Error: Wrong number of running jobs returned." for runningJob in runningJobs: if runningJob["job_name"] == testJobA["name"]: assert runningJob["state"] == testJobA["state"], \ "Error: Running job has wrong state." assert runningJob["couch_record"] == testJobA["couch_record"], \ "Error: Running job has wrong couch record." else: assert runningJob["job_name"] == testJobC["name"], \ "Error: Running job has wrong name." assert runningJob["state"] == testJobC["state"], \ "Error: Running job has wrong state." assert runningJob["couch_record"] == testJobC["couch_record"], \ "Error: Running job has wrong couch record." return
def testRecordInCouch(self): """ _testRecordInCouch_ Verify that jobs, state transitions and fwjrs are recorded correctly. """ change = ChangeState(self.config, "changestate_t") locationAction = self.daoFactory(classname="Locations.New") locationAction.execute("site1", pnn="T2_CH_CERN") testWorkflow = Workflow(spec=self.specUrl, owner="Steve", name="wf001", task=self.taskName) testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="FileBased") testSubscription.create() testFileA = File(lfn="SomeLFNA", events=1024, size=2048, locations=set(["T2_CH_CERN"])) testFileB = File(lfn="SomeLFNB", events=1025, size=2049, locations=set(["T2_CH_CERN"])) testFileA.create() testFileB.create() testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.commit() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroup = jobFactory(files_per_job=1)[0] assert len(jobGroup.jobs) == 2, \ "Error: Splitting should have created two jobs." testJobA = jobGroup.jobs[0] testJobA["user"] = "******" testJobA["group"] = "DMWM" testJobA["taskType"] = "Merge" testJobB = jobGroup.jobs[1] testJobB["user"] = "******" testJobB["group"] = "DMWM" testJobB["taskType"] = "Processing" change.propagate([testJobA, testJobB], "new", "none") change.propagate([testJobA, testJobB], "created", "new") change.propagate([testJobA, testJobB], "executing", "created") testJobADoc = change.jobsdatabase.document(testJobA["couch_record"]) for transition in testJobADoc["states"].itervalues(): self.assertTrue( isinstance(transition["timestamp"], int) or isinstance(transition["timestamp"], long)) self.assertEqual(testJobADoc["jobid"], testJobA["id"], "Error: ID parameter is incorrect.") assert testJobADoc["name"] == testJobA["name"], \ "Error: Name parameter is incorrect." assert testJobADoc["jobgroup"] == testJobA["jobgroup"], \ "Error: Jobgroup parameter is incorrect." assert testJobADoc["workflow"] == testJobA["workflow"], \ "Error: Workflow parameter is incorrect." assert testJobADoc["task"] == testJobA["task"], \ "Error: Task parameter is incorrect." assert testJobADoc["owner"] == testJobA["owner"], \ "Error: Owner parameter is incorrect." assert testJobADoc["mask"]["FirstEvent"] == testJobA["mask"]["FirstEvent"], \ "Error: First event in mask is incorrect." assert testJobADoc["mask"]["LastEvent"] == testJobA["mask"]["LastEvent"], \ "Error: Last event in mask is incorrect." assert testJobADoc["mask"]["FirstLumi"] == testJobA["mask"]["FirstLumi"], \ "Error: First lumi in mask is incorrect." assert testJobADoc["mask"]["LastLumi"] == testJobA["mask"]["LastLumi"], \ "Error: First lumi in mask is incorrect." assert testJobADoc["mask"]["FirstRun"] == testJobA["mask"]["FirstRun"], \ "Error: First run in mask is incorrect." assert testJobADoc["mask"]["LastEvent"] == testJobA["mask"]["LastRun"], \ "Error: First event in mask is incorrect." assert len(testJobADoc["inputfiles"]) == 1, \ "Error: Input files parameter is incorrect." testJobBDoc = change.jobsdatabase.document(testJobB["couch_record"]) assert testJobBDoc["jobid"] == testJobB["id"], \ "Error: ID parameter is incorrect." assert testJobBDoc["name"] == testJobB["name"], \ "Error: Name parameter is incorrect." assert testJobBDoc["jobgroup"] == testJobB["jobgroup"], \ "Error: Jobgroup parameter is incorrect." assert testJobBDoc["mask"]["FirstEvent"] == testJobB["mask"]["FirstEvent"], \ "Error: First event in mask is incorrect." assert testJobBDoc["mask"]["LastEvent"] == testJobB["mask"]["LastEvent"], \ "Error: Last event in mask is incorrect." assert testJobBDoc["mask"]["FirstLumi"] == testJobB["mask"]["FirstLumi"], \ "Error: First lumi in mask is incorrect." assert testJobBDoc["mask"]["LastLumi"] == testJobB["mask"]["LastLumi"], \ "Error: First lumi in mask is incorrect." assert testJobBDoc["mask"]["FirstRun"] == testJobB["mask"]["FirstRun"], \ "Error: First run in mask is incorrect." assert testJobBDoc["mask"]["LastEvent"] == testJobB["mask"]["LastRun"], \ "Error: First event in mask is incorrect." assert len(testJobBDoc["inputfiles"]) == 1, \ "Error: Input files parameter is incorrect." changeStateDB = self.couchServer.connectDatabase( dbname="changestate_t/jobs") allDocs = changeStateDB.document("_all_docs") self.assertEqual(len(allDocs["rows"]), 3, "Error: Wrong number of documents.") couchJobDoc = changeStateDB.document("1") assert couchJobDoc["name"] == testJobA["name"], \ "Error: Name is wrong" assert len(couchJobDoc["inputfiles"]) == 1, \ "Error: Wrong number of input files." result = changeStateDB.loadView("JobDump", "jobsByWorkflowName") self.assertEqual(len(result["rows"]), 2, "Error: Wrong number of rows.") for row in result["rows"]: couchJobDoc = changeStateDB.document(row["value"]["id"]) self.assertEqual(couchJobDoc["_rev"], row["value"]["rev"], "Error: Rev is wrong.") return
def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMCore.WMBS"], useDefault = False) myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = daofactory(classname = "Locations.New") locationAction.execute(siteName = "site1", pnn = "T2_CH_CERN") self.multipleFileFileset = Fileset(name = "TestFileset1") self.multipleFileFileset.create() for i in range(10): newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["T2_CH_CERN"])) newFile.addRun(Run(i, *[45+i])) newFile.create() self.multipleFileFileset.addFile(newFile) self.multipleFileFileset.commit() self.singleFileFileset = Fileset(name = "TestFileset2") self.singleFileFileset.create() newFile = File("/some/file/name", size = 1000, events = 100, locations = set(["T2_CH_CERN"])) newFile.addRun(Run(1, *[45])) newFile.create() self.singleFileFileset.addFile(newFile) self.singleFileFileset.commit() self.multipleFileLumiset = Fileset(name = "TestFileset3") self.multipleFileLumiset.create() for i in range(10): newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["T2_CH_CERN"])) newFile.addRun(Run(1, *[45+i/3])) newFile.create() self.multipleFileLumiset.addFile(newFile) self.multipleFileLumiset.commit() self.singleLumiFileset = Fileset(name = "TestFileset4") self.singleLumiFileset.create() for i in range(10): newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["T2_CH_CERN"])) newFile.addRun(Run(1, *[45])) newFile.create() self.singleLumiFileset.addFile(newFile) self.singleLumiFileset.commit() testWorkflow = Workflow(spec = "spec.xml", owner = "mnorman", name = "wf001", task="Test") testWorkflow.create() self.multipleFileSubscription = Subscription(fileset = self.multipleFileFileset, workflow = testWorkflow, split_algo = "FixedDelay", type = "Processing") self.singleFileSubscription = Subscription(fileset = self.singleFileFileset, workflow = testWorkflow, split_algo = "FixedDelay", type = "Processing") self.multipleLumiSubscription = Subscription(fileset = self.multipleFileLumiset, workflow = testWorkflow, split_algo = "FixedDelay", type = "Processing") self.singleLumiSubscription = Subscription(fileset = self.singleLumiFileset, workflow = testWorkflow, split_algo = "FixedDelay", type = "Processing") self.multipleFileSubscription.create() self.singleFileSubscription.create() self.multipleLumiSubscription.create() self.singleLumiSubscription.create() return
def createJobs(self): """ _createJobs_ Create test jobs in WMBS and BossAir """ testWorkflow = Workflow(spec=makeUUID(), owner="tapas", name=makeUUID(), task="Test") testWorkflow.create() testFilesetA = Fileset(name="TestFilesetA") testFilesetA.create() testFilesetB = Fileset(name="TestFilesetB") testFilesetB.create() testFilesetC = Fileset(name="TestFilesetC") testFilesetC.create() testFileA = File(lfn="testFileA", locations=set(["testSE1", "testSE2"])) testFileA.create() testFilesetA.addFile(testFileA) testFilesetA.commit() testFilesetB.addFile(testFileA) testFilesetB.commit() testFilesetC.addFile(testFileA) testFilesetC.commit() testSubscriptionA = Subscription(fileset=testFilesetA, workflow=testWorkflow, type="Processing") testSubscriptionA.create() testSubscriptionA.addWhiteBlackList([{ "site_name": "testSite1", "valid": True }]) testSubscriptionB = Subscription(fileset=testFilesetB, workflow=testWorkflow, type="Processing") testSubscriptionB.create() testSubscriptionB.addWhiteBlackList([{ "site_name": "testSite1", "valid": False }]) testSubscriptionC = Subscription(fileset=testFilesetC, workflow=testWorkflow, type="Merge") testSubscriptionC.create() testJobGroupA = JobGroup(subscription=testSubscriptionA) testJobGroupA.create() testJobGroupB = JobGroup(subscription=testSubscriptionB) testJobGroupB.create() testJobGroupC = JobGroup(subscription=testSubscriptionC) testJobGroupC.create() # Site1, Has been assigned a location and is complete. testJobA = Job(name="testJobA", files=[testFileA]) testJobA["couch_record"] = makeUUID() testJobA.create(group=testJobGroupA) testJobA["state"] = "success" # Site 1, Has been assigned a location and is incomplete. testJobB = Job(name="testJobB", files=[testFileA]) testJobB["couch_record"] = makeUUID() testJobB["cache_dir"] = self.tempDir testJobB.create(group=testJobGroupA) testJobB["state"] = "executing" runJobB = RunJob() runJobB.buildFromJob(testJobB) runJobB["status"] = "PEND" # Does not have a location, white listed to site 1 testJobC = Job(name="testJobC", files=[testFileA]) testJobC["couch_record"] = makeUUID() testJobC.create(group=testJobGroupA) testJobC["state"] = "new" # Site 2, Has been assigned a location and is complete. testJobD = Job(name="testJobD", files=[testFileA]) testJobD["couch_record"] = makeUUID() testJobD.create(group=testJobGroupB) testJobD["state"] = "success" # Site 2, Has been assigned a location and is incomplete. testJobE = Job(name="testJobE", files=[testFileA]) testJobE["couch_record"] = makeUUID() testJobE.create(group=testJobGroupB) testJobE["state"] = "executing" runJobE = RunJob() runJobE.buildFromJob(testJobE) runJobE["status"] = "RUN" # Does not have a location, site 1 is blacklisted. testJobF = Job(name="testJobF", files=[testFileA]) testJobF["couch_record"] = makeUUID() testJobF.create(group=testJobGroupB) testJobF["state"] = "new" # Site 3, Has been assigned a location and is complete. testJobG = Job(name="testJobG", files=[testFileA]) testJobG["couch_record"] = makeUUID() testJobG.create(group=testJobGroupC) testJobG["state"] = "cleanout" # Site 3, Has been assigned a location and is incomplete. testJobH = Job(name="testJobH", files=[testFileA]) testJobH["couch_record"] = makeUUID() testJobH.create(group=testJobGroupC) testJobH["state"] = "new" # Site 3, Does not have a location. testJobI = Job(name="testJobI", files=[testFileA]) testJobI["couch_record"] = makeUUID() testJobI.create(group=testJobGroupC) testJobI["state"] = "new" # Site 3, Does not have a location and is in cleanout. testJobJ = Job(name="testJobJ", files=[testFileA]) testJobJ["couch_record"] = makeUUID() testJobJ.create(group=testJobGroupC) testJobJ["state"] = "cleanout" changeStateAction = self.daoFactory(classname="Jobs.ChangeState") changeStateAction.execute(jobs=[ testJobA, testJobB, testJobC, testJobD, testJobE, testJobF, testJobG, testJobH, testJobI, testJobJ ]) self.insertRunJob.execute([runJobB, runJobE]) setLocationAction = self.daoFactory(classname="Jobs.SetLocation") setLocationAction.execute(testJobA["id"], "testSite1") setLocationAction.execute(testJobB["id"], "testSite1") setLocationAction.execute(testJobD["id"], "testSite1") setLocationAction.execute(testJobE["id"], "testSite2") setLocationAction.execute(testJobG["id"], "testSite1") setLocationAction.execute(testJobH["id"], "testSite1") return