示例#1
0
    def createTestSubscription(self, nFiles, nSites=1, closeFileset=False):
        """
        _createTestSubscription_
        
        Create a set of test subscriptions for testing purposes.
        """

        if nSites > self.nSites:
            nSites = self.nSites

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        # Create a testWorkflow
        testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test")
        testWorkflow.create()

        # Create the files for each site
        for s in range(nSites):
            for i in range(nFiles):
                newFile = File(makeUUID(), size=1024, events=100, locations=set(["site%i.cern.ch" % s]))
                newFile.create()
                testFileset.addFile(newFile)
        testFileset.commit()

        testSubscription = Subscription(
            fileset=testFileset, workflow=testWorkflow, split_algo="MinFileBased", type="Processing"
        )
        testSubscription.create()

        # Close the fileset
        if closeFileset:
            testFileset.markOpen(isOpen=False)

        return testSubscription
示例#2
0
    def createFileCollection(self,
                             name,
                             nSubs,
                             nFiles,
                             workflowURL='test',
                             site=None):
        """
        _createFileCollection_

        Create a collection of files for splitting into jobs
        """

        myThread = threading.currentThread()

        testWorkflow = Workflow(spec=workflowURL,
                                owner="mnorman",
                                name=name,
                                task="/TestWorkload/ReReco")
        testWorkflow.create()

        for sub in range(nSubs):

            nameStr = '%s-%i' % (name, sub)

            testFileset = Fileset(name=nameStr)
            testFileset.create()

            for f in range(nFiles):
                # pick a random site
                if not site:
                    tmpSite = 'se.%s' % (random.choice(self.sites))
                else:
                    tmpSite = 'se.%s' % (site)
                testFile = File(lfn="/lfn/%s/%i" % (nameStr, f),
                                size=1024,
                                events=10)
                testFile.setLocation(tmpSite)
                testFile.create()
                testFileset.addFile(testFile)

            testFileset.commit()
            testFileset.markOpen(isOpen=0)
            testSubscription = Subscription(fileset=testFileset,
                                            workflow=testWorkflow,
                                            type="Processing",
                                            split_algo="FileBased")
            testSubscription.create()

        return
示例#3
0
    def createTestSubscription(self, nFiles, nSites=1, closeFileset=False):
        """
        _createTestSubscription_

        Create a set of test subscriptions for testing purposes.
        """

        if nSites > self.nSites:
            nSites = self.nSites

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        # Create a testWorkflow
        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task="Test")
        testWorkflow.create()

        # Create the files for each site
        for s in range(nSites):
            for i in range(nFiles):
                newFile = File(makeUUID(),
                               size=1024,
                               events=100,
                               locations=set(["T2_CH_CERN_%i" % s]))
                newFile.create()
                testFileset.addFile(newFile)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow,
                                        split_algo="MinFileBased",
                                        type="Processing")
        testSubscription.create()

        # Close the fileset
        if closeFileset:
            testFileset.markOpen(isOpen=False)

        return testSubscription
示例#4
0
    def pollForClosable(self):
        """
        _pollForClosable_

        Search WMBS for filesets that can be closed and mark them as closed.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()

        closableFilesetDAO = self.daoFactory(classname="Fileset.ListClosable")
        closableFilesets = closableFilesetDAO.execute()

        for closableFileset in closableFilesets:
            openFileset = Fileset(id=closableFileset)
            openFileset.load()

            logging.debug("Closing fileset %s" % openFileset.name)
            openFileset.markOpen(False)

        myThread.transaction.commit()
    def pollForClosable(self):
        """
        _pollForClosable_

        Search WMBS for filesets that can be closed and mark them as closed.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()

        closableFilesetDAO = self.daoFactory(classname="Fileset.ListClosable")
        closableFilesets = closableFilesetDAO.execute()

        for closableFileset in closableFilesets:
            openFileset = Fileset(id=closableFileset)
            openFileset.load()

            logging.debug("Closing fileset %s", openFileset.name)
            openFileset.markOpen(False)

        myThread.transaction.commit()
示例#6
0
    def createFileCollection(self, name, nSubs, nFiles, workflowURL = 'test', site = None):
        """
        _createFileCollection_

        Create a collection of files for splitting into jobs
        """

        myThread = threading.currentThread()

        testWorkflow = Workflow(spec = workflowURL, owner = "mnorman",
                                name = name, task="/TestWorkload/ReReco")
        testWorkflow.create()

        for sub in range(nSubs):

            nameStr = '%s-%i' % (name, sub)

            testFileset = Fileset(name = nameStr)
            testFileset.create()

            for f in range(nFiles):
                # pick a random site
                if not site:
                    tmpSite = 'se.%s' % (random.choice(self.sites))
                else:
                    tmpSite = 'se.%s' % (site)
                testFile = File(lfn = "/lfn/%s/%i" % (nameStr, f), size = 1024, events = 10)
                testFile.setLocation(tmpSite)
                testFile.create()
                testFileset.addFile(testFile)

            testFileset.commit()
            testFileset.markOpen(isOpen = 0)
            testSubscription = Subscription(fileset = testFileset,
                                            workflow = testWorkflow,
                                            type = "Processing",
                                            split_algo = "FileBased")
            testSubscription.create()


        return
示例#7
0
    def createGiantJobSet(self, name, config, nSubs = 10, nJobs = 10,
                          nFiles = 1, spec = "spec.xml"):
        """
        Creates a massive set of jobs

        """


        jobList = []



        for i in range(0, nSubs):
            # Make a bunch of subscriptions
            localName = '%s-%i' % (name, i)
            testWorkflow = Workflow(spec = spec, owner = self.OWNERDN,
                                    name = localName, task="Test", owner_vogroup="", owner_vorole="")
            testWorkflow.create()

            testWMBSFileset = Fileset(name = localName)
            testWMBSFileset.create()


            testSubscription = Subscription(fileset = testWMBSFileset,
                                            workflow = testWorkflow)
            testSubscription.create()

            testJobGroup = JobGroup(subscription = testSubscription)
            testJobGroup.create()

            filesToComplete = []

            for j in range(0, nJobs):
                # Create jobs for each subscription
                testFileA = File(lfn = "%s-%i-lfnA" % (localName, j) , size = 1024, events = 10)
                testFileA.addRun(Run(10, *[11,12,13,14,15,16,17,18,19,20,
                                           21,22,23,24,25,26,27,28,29,30,
                                           31,32,33,34,35,36,37,38,39,40]))
                testFileA.setLocation('malpaquet')
                testFileA.create()

                testWMBSFileset.addFile(testFileA)
                testWMBSFileset.commit()

                filesToComplete.append(testFileA)

                testJob = Job(name = '%s-%i' % (localName, j))
                testJob.addFile(testFileA)
                testJob['retry_count'] = 1
                testJob['retry_max'] = 10
                testJobGroup.add(testJob)
                jobList.append(testJob)

                for k in range(0, nFiles):
                    # Create output files
                    testFile = File(lfn = "%s-%i-output" % (localName, k) , size = 1024, events = 10)
                    testFile.addRun(Run(10, *[12312]))
                    testFile.setLocation('malpaquet')
                    testFile.create()

                    testJobGroup.output.addFile(testFile)

                testJobGroup.output.commit()


            testJobGroup.commit()

            changer = ChangeState(config)

            changer.propagate(testJobGroup.jobs, 'created', 'new')
            changer.propagate(testJobGroup.jobs, 'executing', 'created')
            changer.propagate(testJobGroup.jobs, 'complete', 'executing')
            changer.propagate(testJobGroup.jobs, 'success', 'complete')
            changer.propagate(testJobGroup.jobs, 'cleanout', 'success')

            testWMBSFileset.markOpen(0)

            testSubscription.completeFiles(filesToComplete)


        return jobList
示例#8
0
class ConditionTest(unittest.TestCase):
    """
    _ExpressTest_

    Test for Express job splitter
    """

    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules = ["T0.WMBS"])

        self.splitterFactory = SplitterFactory(package = "T0.JobSplitting")

        myThread = threading.currentThread()
        daoFactory = DAOFactory(package = "T0.WMBS",
                                logger = logging,
                                dbinterface = myThread.dbi)

        wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS",
                                    logger = logging,
                                    dbinterface = myThread.dbi)

        myThread.dbi.processData("""INSERT INTO wmbs_location
                                    (id, site_name, state)
                                    VALUES (1, 'SomeSite', 1)
                                    """, transaction = False)
        myThread.dbi.processData("""INSERT INTO wmbs_location_senames
                                    (location, se_name)
                                    VALUES (1, 'SomeSE')
                                    """, transaction = False)

        insertRunDAO = daoFactory(classname = "RunConfig.InsertRun")
        insertRunDAO.execute(binds = { 'RUN' : 1,
                                       'TIME' : int(time.time()),
                                       'HLTKEY' : "someHLTKey" },
                             transaction = False)

        insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection")
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 1 },
                              transaction = False)

        insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream")
        insertStreamDAO.execute(binds = { 'STREAM' : "Express" },
                                transaction = False)

        insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset")
        insertStreamFilesetDAO.execute(1, "Express", "TestFileset1")

        insertStreamerDAO = daoFactory(classname = "RunConfig.InsertStreamer")
        insertStreamerDAO.execute(binds = { 'RUN' : 1,
                                            'LUMI' : 1,
                                            'STREAM' : "Express",
                                            'TIME' : int(time.time()),
                                            'LFN' : "/streamer",
                                            'FILESIZE' : 0,
                                            'EVENTS' : 0 },
                                  transaction = False)

        insertPromptCalibrationDAO = daoFactory(classname = "RunConfig.InsertPromptCalibration")
        insertPromptCalibrationDAO.execute( { 'RUN' : 1,
                                              'STREAM' : "Express" },
                                            transaction = False)

        self.fileset1 = Fileset(name = "TestFileset1")
        self.fileset1.create()

        workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test")
        workflow1.create()

        self.subscription1  = Subscription(fileset = self.fileset1,
                                           workflow = workflow1,
                                           split_algo = "Condition",
                                           type = "Condition")
        self.subscription1.create()

        # set parentage chain and sqlite fileset
        alcaRecoFile = File("/alcareco", size = 0, events = 0)
        alcaRecoFile.addRun(Run(1, *[1]))
        alcaRecoFile.setLocation("SomeSE", immediateSave = False)
        alcaRecoFile.create()
        alcaPromptFile = File("/alcaprompt", size = 0, events = 0)
        alcaPromptFile.addRun(Run(1, *[1]))
        alcaPromptFile.setLocation("SomeSE", immediateSave = False)
        alcaPromptFile.create()
        sqliteFile = File("/sqlite", size = 0, events = 0)
        sqliteFile.create()
        self.fileset1.addFile(sqliteFile)
        self.fileset1.commit()

        results = myThread.dbi.processData("""SELECT lfn FROM wmbs_file_details
                                              """,
                                           transaction = False)[0].fetchall()

        setParentageDAO = wmbsDaoFactory(classname = "Files.SetParentage")
        setParentageDAO.execute(binds = [ { 'parent' : "/streamer",
                                            'child' : "/alcareco" },
                                          { 'parent' : "/alcareco",
                                            'child' : "/alcaprompt" },
                                          { 'parent' : "/alcaprompt",
                                            'child' : "/sqlite" } ],
                                transaction = False)

        # default split parameters
        self.splitArgs = {}
        self.splitArgs['runNumber'] = 1
        self.splitArgs['streamName'] = "Express"

        return

    def tearDown(self):
        """
        _tearDown_

        """
        self.testInit.clearDatabase()

        return

    def isPromptCalibFinished(self):
        """
        _isPromptCalibFinished_

        """
        myThread = threading.currentThread()

        result = myThread.dbi.processData("""SELECT finished
                                             FROM prompt_calib
                                             """,
                                          transaction = False)[0].fetchall()[0][0]

        return result

    def countPromptCalibFiles(self):
        """
        _deleteSplitLumis_

        """
        myThread = threading.currentThread()

        result = myThread.dbi.processData("""SELECT COUNT(*)
                                             FROM prompt_calib_file
                                             """,
                                          transaction = False)[0].fetchall()[0][0]

        return result

    def test00(self):
        """
        _test00_

        Make sure the job splitter behaves correctly.

        Just make sure the job splitter does nothing
        when the fileset is open and populates t0ast
        data structures when it's closed. In the later
        case all input files should be marked as
        acquired without creating a job as well.

        """
        mySplitArgs = self.splitArgs.copy()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.assertEqual(self.isPromptCalibFinished(), 0,
                         "ERROR: prompt_calib should not be finished")

        self.assertEqual(self.countPromptCalibFiles(), 0,
                         "ERROR: there should be no prompt_calib_file")

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(self.isPromptCalibFinished(), 0,
                         "ERROR: prompt_calib should not be finished")

        self.assertEqual(self.countPromptCalibFiles(), 1,
                         "ERROR: there should be one prompt_calib_file")

        self.fileset1.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.assertEqual(self.isPromptCalibFinished(), 1,
                         "ERROR: prompt_calib should be finished")

        self.assertEqual(self.countPromptCalibFiles(), 1,
                         "ERROR: there should be one prompt_calib_file")

        return
示例#9
0
    def _createSubscriptionsInWMBS(self,
                                   task,
                                   fileset,
                                   alternativeFilesetClose=False):
        """
        __createSubscriptionsInWMBS_

        Create subscriptions in WMBS for all the tasks in the spec.  This
        includes filesets, workflows and the output map for each task.
        """
        # create runtime sandbox for workflow
        self.createSandbox()

        # FIXME: Let workflow put in values if spec is missing them
        workflow = Workflow(
            spec=self.wmSpec.specUrl(),
            owner=self.wmSpec.getOwner()["name"],
            dn=self.wmSpec.getOwner().get("dn", "unknown"),
            group=self.wmSpec.getOwner().get("group", "unknown"),
            owner_vogroup=self.wmSpec.getOwner().get("vogroup", "DEFAULT"),
            owner_vorole=self.wmSpec.getOwner().get("vorole", "DEFAULT"),
            name=self.wmSpec.name(),
            task=task.getPathName(),
            wfType=self.wmSpec.getDashboardActivity(),
            alternativeFilesetClose=alternativeFilesetClose,
            priority=self.wmSpec.priority())
        workflow.create()
        subscription = Subscription(fileset=fileset,
                                    workflow=workflow,
                                    split_algo=task.jobSplittingAlgorithm(),
                                    type=task.getPrimarySubType())
        subscription.create()

        ### FIXME: I'm pretty sure we can improve how we handle this site white/black list
        for site in task.siteWhitelist():
            subscription.addWhiteBlackList([{
                "site_name": site,
                "valid": True
            }])

        for site in task.siteBlacklist():
            subscription.addWhiteBlackList([{
                "site_name": site,
                "valid": False
            }])

        if self.topLevelSubscription is None:
            self.topLevelSubscription = subscription
            logging.info("Top level subscription %s created for %s",
                         subscription["id"], self.wmSpec.name())
        else:
            logging.info("Child subscription %s created for %s",
                         subscription["id"], self.wmSpec.name())

        outputModules = task.getOutputModulesForTask()
        ignoredOutputModules = task.getIgnoredOutputModulesForTask()
        for outputModule in outputModules:
            for outputModuleName in outputModule.listSections_():
                if outputModuleName in ignoredOutputModules:
                    msg = "%s has %s as IgnoredOutputModule, skipping fileset creation."
                    logging.info(msg, task.getPathName(), outputModuleName)
                    continue
                dataTier = getattr(getattr(outputModule, outputModuleName),
                                   "dataTier", '')
                filesetName = self.outputFilesetName(task, outputModuleName,
                                                     dataTier)
                outputFileset = Fileset(filesetName)
                outputFileset.create()
                outputFileset.markOpen(True)
                mergedOutputFileset = None

                for childTask in task.childTaskIterator():
                    if childTask.data.input.outputModule == outputModuleName:
                        childDatatier = getattr(childTask.data.input,
                                                'dataTier', '')
                        if childTask.taskType() in [
                                "Cleanup", "Merge"
                        ] and childDatatier != dataTier:
                            continue
                        elif childTask.taskType(
                        ) == "Merge" and childDatatier == dataTier:
                            filesetName = self.outputFilesetName(
                                childTask, "Merged", dataTier)
                            mergedOutputFileset = Fileset(filesetName)
                            mergedOutputFileset.create()
                            mergedOutputFileset.markOpen(True)

                            primaryDataset = getattr(
                                getattr(outputModule, outputModuleName),
                                "primaryDataset", None)
                            if primaryDataset is not None:
                                self.mergeOutputMapping[
                                    mergedOutputFileset.id] = primaryDataset

                        self._createSubscriptionsInWMBS(
                            childTask, outputFileset, alternativeFilesetClose)

                if mergedOutputFileset is None:
                    workflow.addOutput(outputModuleName + dataTier,
                                       outputFileset, outputFileset)
                else:
                    workflow.addOutput(outputModuleName + dataTier,
                                       outputFileset, mergedOutputFileset)

        return
    def __call__(self, parameters):
        """
        Perform the work required with the given parameters
        """
        DefaultSlave.__call__(self, parameters)

        # Handle the message
        message = self.messageArgs

        # Lock on the running feeders list
        myThread = threading.currentThread()
        myThread.runningFeedersLock.acquire()

        # Create empty fileset if fileset.name doesn't exist
        filesetName = message["dataset"]
        feederType = message["FeederType"]
        fileType = message["FileType"]
        startRun = message["StartRun"]

        logging.debug("Dataset " + filesetName + " arrived")

        fileset = Fileset(name = filesetName+':'\
          +feederType+':'+fileType+':'+startRun)

        # Check if the fileset is already there
        if fileset.exists() == False:

            # Empty fileset creation
            fileset.create()
            fileset.setLastUpdate(0)

            logging.info("Fileset %s whith id %s is added" \
                               %(fileset.name, str(fileset.id)))

            # Get feeder type
            feederType = message["FeederType"]

            # Check if there is a running feeder
            if myThread.runningFeeders.has_key(feederType):
                logging.info("HAVE FEEDER " + feederType + " RUNNING")
                logging.info(myThread.runningFeeders[feederType])

            else:
                logging.info("NO FEEDER " + feederType + " RUNNING")

                # Check if we have a feeder in DB
                if self.queries.checkFeeder(feederType):
                    # Have feeder, get info
                    logging.info("Getting Feeder from DB")
                    feederId = self.queries.getFeederId(feederType)
                    logging.info(feederId)
                    myThread.runningFeeders[feederType] = feederId
                else:
                    # Create feeder
                    logging.info("Adding Feeder to DB")
                    self.queries.addFeeder(feederType, "StatePath")
                    feederId = self.queries.getFeederId(feederType)
                    logging.info(feederId)
                    myThread.runningFeeders[feederType] = feederId

            # Fileset/Feeder association
            self.queries.addFilesetToManage(fileset.id, \
                          myThread.runningFeeders[feederType])
            logging.info("Fileset %s is added to feeder %s" %(fileset.id, \
                          myThread.runningFeeders[feederType]))
        else:

            # If fileset already exist a new subscription
            # will be created for its workflow
            logging.info("Fileset exists: Subscription will be created for it")

            # Open it if close
            fileset.load()
            if fileset.open == False:

                fileset.markOpen(True)

                logging.info("Getting Feeder from DB")
                feederId = self.queries.getFeederId(feederType)
                logging.info(feederId)
                myThread.runningFeeders[feederType] = feederId

                self.queries.addFilesetToManage(fileset.id, \
                                  myThread.runningFeeders[feederType])
                logging.info("Fileset %s is added to feeder %s" %(fileset.id, \
                                  myThread.runningFeeders[feederType]))

        myThread.runningFeedersLock.release()
        myThread.msgService.finish()
示例#11
0
    def createGiantJobSet(self,
                          name,
                          config,
                          nSubs=10,
                          nJobs=10,
                          nFiles=1,
                          spec="spec.xml"):
        """
        Creates a massive set of jobs

        """

        jobList = []

        for i in range(0, nSubs):
            # Make a bunch of subscriptions
            localName = '%s-%i' % (name, i)
            testWorkflow = Workflow(spec=spec,
                                    owner=self.OWNERDN,
                                    name=localName,
                                    task="Test",
                                    owner_vogroup="",
                                    owner_vorole="")
            testWorkflow.create()

            testWMBSFileset = Fileset(name=localName)
            testWMBSFileset.create()

            testSubscription = Subscription(fileset=testWMBSFileset,
                                            workflow=testWorkflow)
            testSubscription.create()

            testJobGroup = JobGroup(subscription=testSubscription)
            testJobGroup.create()

            filesToComplete = []

            for j in range(0, nJobs):
                # Create jobs for each subscription
                testFileA = File(lfn="%s-%i-lfnA" % (localName, j),
                                 size=1024,
                                 events=10)
                testFileA.addRun(
                    Run(
                        10, *[
                            11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
                            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
                            37, 38, 39, 40
                        ]))
                testFileA.setLocation('malpaquet')
                testFileA.create()

                testWMBSFileset.addFile(testFileA)
                testWMBSFileset.commit()

                filesToComplete.append(testFileA)

                testJob = Job(name='%s-%i' % (localName, j))
                testJob.addFile(testFileA)
                testJob['retry_count'] = 1
                testJob['retry_max'] = 10
                testJobGroup.add(testJob)
                jobList.append(testJob)

                for k in range(0, nFiles):
                    # Create output files
                    testFile = File(lfn="%s-%i-output" % (localName, k),
                                    size=1024,
                                    events=10)
                    testFile.addRun(Run(10, *[12312]))
                    testFile.setLocation('malpaquet')
                    testFile.create()

                    testJobGroup.output.addFile(testFile)

                testJobGroup.output.commit()

            testJobGroup.commit()

            changer = ChangeState(config)

            changer.propagate(testJobGroup.jobs, 'created', 'new')
            changer.propagate(testJobGroup.jobs, 'executing', 'created')
            changer.propagate(testJobGroup.jobs, 'complete', 'executing')
            changer.propagate(testJobGroup.jobs, 'success', 'complete')
            changer.propagate(testJobGroup.jobs, 'cleanout', 'success')

            testWMBSFileset.markOpen(0)

            testSubscription.completeFiles(filesToComplete)

        return jobList
示例#12
0
class WMBSHelper(WMConnectionBase):
    """
    _WMBSHelper_

    Interface between the WorkQueue and WMBS.
    """
    def __init__(self,
                 wmSpec,
                 taskName,
                 blockName=None,
                 mask=None,
                 cachepath='.'):
        """
        _init_

        Initialize DAOs and other things needed.
        """
        self.block = blockName
        self.mask = mask
        self.wmSpec = wmSpec
        self.topLevelTask = wmSpec.getTask(taskName)
        self.cachepath = cachepath
        self.isDBS = True

        self.topLevelFileset = None
        self.topLevelSubscription = None
        self.topLevelTaskDBSBufferId = None

        self.mergeOutputMapping = {}

        # Initiate the pieces you need to run your own DAOs
        WMConnectionBase.__init__(self, "WMCore.WMBS")
        myThread = threading.currentThread()
        self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                        logger=myThread.logger,
                                        dbinterface=myThread.dbi)

        # DAOs from WMBS for file commit
        self.setParentage = self.daofactory(classname="Files.SetParentage")
        self.setFileRunLumi = self.daofactory(classname="Files.AddRunLumi")
        self.setFileLocation = self.daofactory(
            classname="Files.SetLocationForWorkQueue")
        self.setFileAddChecksum = self.daofactory(
            classname="Files.AddChecksumByLFN")
        self.addFileAction = self.daofactory(classname="Files.Add")
        self.addToFileset = self.daofactory(classname="Files.AddDupsToFileset")
        self.getLocations = self.daofactory(classname="Locations.ListSites")
        self.getLocationInfo = self.daofactory(
            classname="Locations.GetSiteInfo")

        # DAOs from DBSBuffer
        self.dbsCreateFiles = self.dbsDaoFactory(
            classname="DBSBufferFiles.Add")
        self.dbsSetLocation = self.dbsDaoFactory(
            classname="DBSBufferFiles.SetLocationByLFN")
        self.dbsInsertLocation = self.dbsDaoFactory(
            classname="DBSBufferFiles.AddLocation")
        self.dbsSetChecksum = self.dbsDaoFactory(
            classname="DBSBufferFiles.AddChecksumByLFN")
        self.dbsInsertWorkflow = self.dbsDaoFactory(classname="InsertWorkflow")

        # Added for file creation bookkeeping
        self.dbsFilesToCreate = []
        self.addedLocations = []
        self.wmbsFilesToCreate = []
        self.insertedBogusDataset = -1

        return

    def createSandbox(self):
        """Create the runtime sandbox"""
        sandboxCreator = SandboxCreator()
        sandboxCreator.makeSandbox(self.cachepath, self.wmSpec)

    def createTopLevelFileset(self, topLevelFilesetName=None):
        """
        _createTopLevelFileset_

        Create the top level fileset for the workflow.  If the name of the top
        level fileset is not given create one.
        """
        if topLevelFilesetName is None:
            filesetName = (
                "%s-%s" %
                (self.wmSpec.name(), self.wmSpec.getTopLevelTask()[0].name()))
            if self.block:
                filesetName += "-%s" % self.block
            if self.mask:
                from hashlib import md5
                mask_string = ",".join(
                    ["%s=%s" % (x, self.mask[x]) for x in sorted(self.mask)])
                filesetName += "-%s" % md5(mask_string).hexdigest()
        else:
            filesetName = topLevelFilesetName

        self.topLevelFileset = Fileset(filesetName)
        self.topLevelFileset.create()
        return

    def outputFilesetName(self, task, outputModuleName):
        """
        _outputFilesetName_

        Generate an output fileset name for the given task and output module.
        """
        if task.taskType() == "Merge":
            outputFilesetName = "%s/merged-%s" % (task.getPathName(),
                                                  outputModuleName)
        else:
            outputFilesetName = "%s/unmerged-%s" % (task.getPathName(),
                                                    outputModuleName)

        return outputFilesetName

    def createSubscription(self, task, fileset, alternativeFilesetClose=False):
        """
        _createSubscription_

        Create subscriptions in the database.
        This includes workflows in WMBS and DBSBuffer, output maps, datasets
        and phedex subscriptions, and filesets for each task below and including
        the given task.
        """
        sub = self._createSubscriptionsInWMBS(task, fileset,
                                              alternativeFilesetClose)

        self._createWorkflowsInDBSBuffer()
        self._createDatasetSubscriptionsInDBSBuffer()

        return sub

    def _createSubscriptionsInWMBS(self,
                                   task,
                                   fileset,
                                   alternativeFilesetClose=False):
        """
        __createSubscriptionsInWMBS_

        Create subscriptions in WMBS for all the tasks in the spec.  This
        includes filesets, workflows and the output map for each task.
        """
        # create runtime sandbox for workflow
        self.createSandbox()

        # FIXME: Let workflow put in values if spec is missing them
        workflow = Workflow(
            spec=self.wmSpec.specUrl(),
            owner=self.wmSpec.getOwner()["name"],
            dn=self.wmSpec.getOwner().get("dn", "unknown"),
            group=self.wmSpec.getOwner().get("group", "unknown"),
            owner_vogroup=self.wmSpec.getOwner().get("vogroup", "DEFAULT"),
            owner_vorole=self.wmSpec.getOwner().get("vorole", "DEFAULT"),
            name=self.wmSpec.name(),
            task=task.getPathName(),
            wfType=self.wmSpec.getDashboardActivity(),
            alternativeFilesetClose=alternativeFilesetClose,
            priority=self.wmSpec.priority())
        workflow.create()
        subscription = Subscription(fileset=fileset,
                                    workflow=workflow,
                                    split_algo=task.jobSplittingAlgorithm(),
                                    type=task.getPrimarySubType())
        if subscription.exists():
            subscription.load()
            msg = "Subscription %s already exists for %s (you may ignore file insertion messages below, existing files wont be duplicated)"
            self.logger.info(msg % (subscription['id'], task.getPathName()))
        else:
            subscription.create()
        for site in task.siteWhitelist():
            subscription.addWhiteBlackList([{
                "site_name": site,
                "valid": True
            }])

        for site in task.siteBlacklist():
            subscription.addWhiteBlackList([{
                "site_name": site,
                "valid": False
            }])

        if self.topLevelSubscription is None:
            self.topLevelSubscription = subscription
            logging.info("Top level subscription created: %s",
                         subscription["id"])
        else:
            logging.info("Child subscription created: %s", subscription["id"])

        outputModules = task.getOutputModulesForTask()
        ignoredOutputModules = task.getIgnoredOutputModulesForTask()
        for outputModule in outputModules:
            for outputModuleName in outputModule.listSections_():
                if outputModuleName in ignoredOutputModules:
                    logging.info(
                        "IgnoredOutputModule set for %s, skipping fileset creation.",
                        outputModuleName)
                    continue
                outputFileset = Fileset(
                    self.outputFilesetName(task, outputModuleName))
                outputFileset.create()
                outputFileset.markOpen(True)
                mergedOutputFileset = None

                for childTask in task.childTaskIterator():
                    if childTask.data.input.outputModule == outputModuleName:
                        if childTask.taskType() == "Merge":
                            mergedOutputFileset = Fileset(
                                self.outputFilesetName(childTask, "Merged"))
                            mergedOutputFileset.create()
                            mergedOutputFileset.markOpen(True)

                            primaryDataset = getattr(
                                getattr(outputModule, outputModuleName),
                                "primaryDataset", None)
                            if primaryDataset != None:
                                self.mergeOutputMapping[
                                    mergedOutputFileset.id] = primaryDataset

                        self._createSubscriptionsInWMBS(
                            childTask, outputFileset, alternativeFilesetClose)

                if mergedOutputFileset is None:
                    workflow.addOutput(outputModuleName, outputFileset,
                                       outputFileset)
                else:
                    workflow.addOutput(outputModuleName, outputFileset,
                                       mergedOutputFileset)

        return self.topLevelSubscription

    def addMCFakeFile(self):
        """Add a fake file for wmbs to run production over"""
        needed = [
            'FirstEvent', 'FirstLumi', 'FirstRun', 'LastEvent', 'LastLumi',
            'LastRun'
        ]
        for key in needed:
            if self.mask and self.mask.get(key) is None:
                msg = 'Invalid value "%s" for %s' % (self.mask.get(key), key)
                raise WorkQueueWMBSException(msg)
        locations = set()
        for site in self.getLocations.execute(
                conn=self.getDBConn(), transaction=self.existingTransaction()):
            try:
                siteInfo = self.getLocationInfo.execute(
                    site,
                    conn=self.getDBConn(),
                    transaction=self.existingTransaction())
                if not siteInfo:
                    self.logger.info(
                        'Skipping MonteCarlo injection to site "%s" as unknown to wmbs'
                        % site)
                    continue
                locations.add(siteInfo[0]['pnn'])
            except Exception as ex:
                self.logger.error(
                    'Error getting storage element for "%s": %s' %
                    (site, str(ex)))
        if not locations:
            msg = 'No locations to inject Monte Carlo work to, unable to proceed'
            raise WorkQueueWMBSException(msg)
        mcFakeFileName = ("MCFakeFile-%s" % self.topLevelFileset.name).encode(
            'ascii', 'ignore')
        wmbsFile = File(
            lfn=mcFakeFileName,
            first_event=self.mask['FirstEvent'],
            last_event=self.mask['LastEvent'],
            events=self.mask['LastEvent'] - self.mask['FirstEvent'] +
            1,  # inclusive range
            locations=locations,
            merged=False,  # merged causes dbs parentage relation
        )

        if self.mask:
            lumis = range(self.mask['FirstLumi'],
                          self.mask['LastLumi'] + 1)  # inclusive range
            wmbsFile.addRun(Run(self.mask['FirstRun'],
                                *lumis))  # assume run number static
        else:
            wmbsFile.addRun(Run(1, 1))

        wmbsFile['inFileset'] = True  # file is not a parent

        logging.info("WMBS File: %s on Location: %s", wmbsFile['lfn'],
                     wmbsFile['newlocations'])

        self.wmbsFilesToCreate.append(wmbsFile)

        totalFiles = self.topLevelFileset.addFilesToWMBSInBulk(
            self.wmbsFilesToCreate, self.wmSpec.name(), isDBS=self.isDBS)

        self.topLevelFileset.markOpen(False)
        return totalFiles

    def createSubscriptionAndAddFiles(self, block):
        """
        _createSubscriptionAndAddFiles_

        Create the subscription and add files at one time to
        put everything in one transaction.

        """
        self.beginTransaction()

        self.createTopLevelFileset()
        try:
            sub = self.createSubscription(self.topLevelTask,
                                          self.topLevelFileset)
        except Exception as ex:
            myThread = threading.currentThread()
            myThread.transaction.rollback()
            msg = traceback.format_exc()
            logging.error("Failed to create subscription %s", msg)
            raise ex

        if block != None:
            logging.info('"%s" Injecting block %s (%d files) into wmbs',
                         self.wmSpec.name(), self.block, len(block['Files']))
            addedFiles = self.addFiles(block)
        # For MC case
        else:
            logging.info(
                '"%s" Injecting production %s:%s:%s - %s:%s:%s (run:lumi:event) into wmbs',
                self.wmSpec.name(), self.mask['FirstRun'],
                self.mask['FirstLumi'], self.mask['FirstEvent'],
                self.mask['LastRun'], self.mask['LastLumi'],
                self.mask['LastEvent'])
            addedFiles = self.addMCFakeFile()

        self.commitTransaction(existingTransaction=False)

        return sub, addedFiles

    def addFiles(self, block):
        """
        _addFiles_

        create wmbs files from given dbs block.
        as well as run lumi update
        """

        if self.topLevelTask.getInputACDC():
            self.isDBS = False
            for acdcFile in self.validFiles(block['Files']):
                self._addACDCFileToWMBSFile(acdcFile)
        else:
            self.isDBS = True
            for dbsFile in self.validFiles(block['Files']):
                self._addDBSFileToWMBSFile(dbsFile, block['PhEDExNodeNames'])

        # Add files to WMBS
        totalFiles = self.topLevelFileset.addFilesToWMBSInBulk(
            self.wmbsFilesToCreate, self.wmSpec.name(), isDBS=self.isDBS)
        # Add files to DBSBuffer
        self._createFilesInDBSBuffer()

        self.topLevelFileset.markOpen(block.get('IsOpen', False))
        return totalFiles

    def getMergeOutputMapping(self):
        """
        _getMergeOutputMapping_

        retrieves the relationship between primary
        dataset and merge output fileset ids for
        all merge tasks created
        """
        return self.mergeOutputMapping

    def _createWorkflowsInDBSBuffer(self):
        """
        _createWorkflowsInDBSBuffer_

        Register workflow information and settings in dbsbuffer for all
        tasks that will potentially produce any output in this spec.
        """

        for task in self.wmSpec.listOutputProducingTasks():
            workflow_id = self.dbsInsertWorkflow.execute(
                self.wmSpec.name(),
                task,
                self.wmSpec.getBlockCloseMaxWaitTime(),
                self.wmSpec.getBlockCloseMaxFiles(),
                self.wmSpec.getBlockCloseMaxEvents(),
                self.wmSpec.getBlockCloseMaxSize(),
                conn=self.getDBConn(),
                transaction=self.existingTransaction())
            if task == self.topLevelTask.getPathName():
                self.topLevelTaskDBSBufferId = workflow_id

    def _createDatasetSubscriptionsInDBSBuffer(self):
        """
        _createDatasetSubscriptionsInDBSBuffer_

        Insert the subscriptions defined in the workload for the output
        datasets with the different options.
        """
        subInfo = self.wmSpec.getSubscriptionInformation()
        for dataset in subInfo:
            dbsDataset = DBSBufferDataset(path=dataset)
            dbsDataset.create()
            dbsDataset.addSubscription(subInfo[dataset])
        return

    def _createFilesInDBSBuffer(self):
        """
        _createFilesInDBSBuffer_

        It does the actual job of creating things in DBSBuffer

        """
        if len(self.dbsFilesToCreate) == 0:
            # Whoops, nothing to do!
            return

        dbsFileTuples = []
        dbsFileLoc = []
        dbsCksumBinds = []
        locationsToAdd = []
        selfChecksums = None

        # The first thing we need to do is add the datasetAlgo
        # Assume all files in a pass come from one datasetAlgo?
        if self.insertedBogusDataset == -1:
            self.insertedBogusDataset = self.dbsFilesToCreate[
                0].insertDatasetAlgo()

        for dbsFile in self.dbsFilesToCreate:
            # Append a tuple in the format specified by DBSBufferFiles.Add
            # Also run insertDatasetAlgo

            lfn = dbsFile['lfn']
            selfChecksums = dbsFile['checksums']

            newTuple = (lfn, dbsFile['size'], dbsFile['events'],
                        self.insertedBogusDataset, dbsFile['status'],
                        self.topLevelTaskDBSBufferId)

            if newTuple not in dbsFileTuples:
                dbsFileTuples.append(newTuple)

            if len(dbsFile['newlocations']) < 1:
                msg = ''
                msg += "File created without any locations!\n"
                msg += "File lfn: %s\n" % (lfn)
                msg += "Rejecting this group of files in DBS!\n"
                logging.error(msg)
                raise WorkQueueWMBSException(msg)

            for jobLocation in dbsFile['newlocations']:
                if jobLocation not in self.addedLocations:
                    # If we don't have it, try and add it
                    locationsToAdd.append(jobLocation)
                    self.addedLocations.append(jobLocation)
                dbsFileLoc.append({'lfn': lfn, 'pnn': jobLocation})

            if selfChecksums:
                # If we have checksums we have to create a bind
                # For each different checksum
                for entry in selfChecksums.keys():
                    dbsCksumBinds.append({
                        'lfn': lfn,
                        'cksum': selfChecksums[entry],
                        'cktype': entry
                    })

        for jobLocation in locationsToAdd:
            self.dbsInsertLocation.execute(
                siteName=jobLocation,
                conn=self.getDBConn(),
                transaction=self.existingTransaction())

        self.dbsCreateFiles.execute(files=dbsFileTuples,
                                    conn=self.getDBConn(),
                                    transaction=self.existingTransaction())

        self.dbsSetLocation.execute(binds=dbsFileLoc,
                                    conn=self.getDBConn(),
                                    transaction=self.existingTransaction())

        if len(dbsCksumBinds) > 0:
            self.dbsSetChecksum.execute(bulkList=dbsCksumBinds,
                                        conn=self.getDBConn(),
                                        transaction=self.existingTransaction())

        # Now that we've created those files, clear the list
        self.dbsFilesToCreate = []
        return

    def _addToDBSBuffer(self, dbsFile, checksums, locations):
        """
        This step is just for increase the performance for
        Accountant doesn't neccessary to check the parentage
        """
        dbsBuffer = DBSBufferFile(lfn=dbsFile["LogicalFileName"],
                                  size=dbsFile["FileSize"],
                                  events=dbsFile["NumberOfEvents"],
                                  checksums=checksums,
                                  locations=locations,
                                  status="GLOBAL")
        dbsBuffer.setDatasetPath('bogus')
        dbsBuffer.setAlgorithm(appName="cmsRun",
                               appVer="Unknown",
                               appFam="Unknown",
                               psetHash="Unknown",
                               configContent="Unknown")

        if not dbsBuffer.exists():
            self.dbsFilesToCreate.append(dbsBuffer)
        # dbsBuffer.create()
        return

    def _addDBSFileToWMBSFile(self, dbsFile, storageElements, inFileset=True):
        """
        There are two assumptions made to make this method behave properly,
        1. DBS returns only one level of ParentList.
           If DBS returns multiple level of parentage, it will be still get handled.
           However that might not be what we wanted. In that case, restrict to one level.
        2. Assumes parents files are in the same location as child files.
           This is not True in general case, but workquue should only select work only
           where child and parent files are in the same location
        """
        wmbsParents = []
        dbsFile.setdefault("ParentList", [])
        for parent in dbsFile["ParentList"]:
            wmbsParents.append(
                self._addDBSFileToWMBSFile(parent,
                                           storageElements,
                                           inFileset=False))

        checksums = {}
        if dbsFile.get('Checksum'):
            checksums['cksum'] = dbsFile['Checksum']
        if dbsFile.get('Adler32'):
            checksums['adler32'] = dbsFile['Adler32']

        wmbsFile = File(
            lfn=dbsFile["LogicalFileName"],
            size=dbsFile["FileSize"],
            events=dbsFile["NumberOfEvents"],
            checksums=checksums,
            # TODO: need to get list of parent lfn
            parents=wmbsParents,
            locations=set(storageElements))

        for lumi in dbsFile['LumiList']:
            if isinstance(lumi['LumiSectionNumber'], list):
                run = Run(lumi['RunNumber'], *lumi['LumiSectionNumber'])
            else:
                run = Run(lumi['RunNumber'], lumi['LumiSectionNumber'])
            wmbsFile.addRun(run)

        self._addToDBSBuffer(dbsFile, checksums, storageElements)

        logging.info("WMBS File: %s\n on Location: %s", wmbsFile['lfn'],
                     wmbsFile['newlocations'])

        wmbsFile['inFileset'] = bool(inFileset)
        self.wmbsFilesToCreate.append(wmbsFile)

        return wmbsFile

    def _convertACDCFileToDBSFile(self, acdcFile):
        """
        convert ACDCFiles to dbs file format
        """
        dbsFile = {}
        dbsFile["LogicalFileName"] = acdcFile["lfn"]
        dbsFile["FileSize"] = acdcFile["size"]
        dbsFile["NumberOfEvents"] = acdcFile["events"]
        return dbsFile

    def _addACDCFileToWMBSFile(self, acdcFile, inFileset=True):
        """
        adds the ACDC files into WMBS database
        """
        wmbsParents = []
        for parent in acdcFile["parents"]:
            parent = self._addACDCFileToWMBSFile(DatastructFile(
                lfn=parent, locations=acdcFile["locations"]),
                                                 inFileset=False)
            wmbsParents.append(parent)

        # pass empty check sum since it won't be updated to dbs anyway
        checksums = {}
        wmbsFile = File(lfn=str(acdcFile["lfn"]),
                        size=acdcFile["size"],
                        events=acdcFile["events"],
                        first_event=acdcFile.get('first_event', 0),
                        last_event=acdcFile.get('last_event', 0),
                        checksums=checksums,
                        parents=wmbsParents,
                        locations=acdcFile["locations"],
                        merged=acdcFile.get('merged', True))

        ## TODO need to get the lumi lists
        for run in acdcFile['runs']:
            wmbsFile.addRun(run)

        dbsFile = self._convertACDCFileToDBSFile(acdcFile)
        self._addToDBSBuffer(dbsFile, checksums, acdcFile["locations"])

        logging.info("WMBS File: %s\n on Location: %s", wmbsFile['lfn'],
                     wmbsFile['newlocations'])

        wmbsFile['inFileset'] = bool(inFileset)

        self.wmbsFilesToCreate.append(wmbsFile)

        return wmbsFile

    def validFiles(self, files):
        """
        Apply lumi mask and or run white/black list and return files which have
        one or more of the requested lumis
        """
        runWhiteList = self.topLevelTask.inputRunWhitelist()
        runBlackList = self.topLevelTask.inputRunBlacklist()
        lumiMask = self.topLevelTask.getLumiMask()

        blackMask = None
        if lumiMask:  # We have a lumiMask, so use it and modify with run white/black list
            if runWhiteList:
                lumiMask.selectRuns(runWhiteList)
            if runBlackList:
                lumiMask.removeRuns(runBlackList)
        elif runWhiteList:  # We have a run whitelist, subtract off blacklist
            lumiMask = LumiList(runs=runWhiteList)
            if runBlackList:  # We only have a blacklist, so make a black mask out of it instead
                lumiMask.removeRuns(runBlackList)
        else:
            lumiMask = None
            if runBlackList:
                blackMask = LumiList(runs=runBlackList)

        results = []
        for f in files:
            if isinstance(f, basestring) or "LumiList" not in f:
                results.append(f)
                continue

            # Create a LumiList from the WMBS info
            runLumis = {}
            for x in f['LumiList']:
                if x['RunNumber'] in runLumis:
                    runLumis[x['RunNumber']].extend(x['LumiSectionNumber'])
                else:
                    runLumis[x['RunNumber']] = x['LumiSectionNumber']
            fileLumiList = LumiList(runsAndLumis=runLumis)

            if lumiMask:
                if fileLumiList & lumiMask:  # At least one lumi from file is in lumiMask
                    results.append(f)
            elif blackMask:
                if fileLumiList - blackMask:  # At least one lumi from file is not in blackMask
                    results.append(f)
            else:  # There is effectively no mask
                results.append(f)

        return results
示例#13
0
    def setupExpressWorkflow(self):
        """
        _setupExpressWorkflow_

        Populate WMBS with a express-like workflow,
        every subscription must be unfinished at first
        """

        workflowName = 'Express_Run481516_StreamZFast'
        secondLevelTasks = [
            'ExpressMergewrite_StreamZFast_DQM',
            'ExpressMergewrite_ExpressPhysics_FEVT',
            'ExpressAlcaSkimwrite_StreamZFast_ALCARECO',
            'ExpressCleanupUnmergedwrite_StreamZFast_DQM',
            'ExpressCleanupUnmergedwrite_ExpressPhysics_FEVT',
            'ExpressCleanupUnmergedwrite_StreamZFast_ALCARECO'
        ]
        alcaHarvestTask = 'ExpressAlcaSkimwrite_StreamZFast_ALCARECOAlcaHarvestALCARECOStreamPromptCalibProd'
        dqmHarvestTask = 'ExpressMergewrite_StreamZFast_DQMEndOfRunDQMHarvestMerged'

        self.stateMap = {'Merge': [], 'Harvesting': [], 'Processing Done': []}
        self.orderedStates = ['Merge', 'Harvesting', 'Processing Done']

        # Populate WMStats
        self.requestDBWriter.insertGenericRequest(
            {'RequestName': workflowName})
        self.requestDBWriter.updateRequestStatus(workflowName, 'Closed')

        # Create a wmspec in disk
        workload = newWorkload(workflowName)
        expressTask = workload.newTask('Express')
        for task in secondLevelTasks:
            secondLevelTask = expressTask.addTask(task)
            if task == 'ExpressAlcaSkimwrite_StreamZFast_ALCARECO':
                secondLevelTask.addTask(alcaHarvestTask)
            elif task == 'ExpressMergewrite_StreamZFast_DQM':
                secondLevelTask.addTask(dqmHarvestTask)

        specPath = os.path.join(self.testDir, 'Express.pkl')
        workload.save(specPath)

        # Populate WMBS
        sharedFileset = Fileset(name='TestFileset')
        sharedFileset.create()
        sharedFileset.markOpen(False)

        options = {
            'spec': specPath,
            'owner': 'ItsAMeMario',
            'name': workflowName,
            'wfType': 'tier0'
        }
        topLevelWorkflow = Workflow(task='/%s/Express' % workflowName,
                                    **options)
        topLevelWorkflow.create()
        topLevelSub = Subscription(sharedFileset, topLevelWorkflow)
        topLevelSub.create()
        self.stateMap['Merge'].append(topLevelSub)
        for task in [
                x for x in secondLevelTasks if not x.count('CleanupUnmerged')
        ]:
            secondLevelWorkflow = Workflow(task='/%s/Express/%s' %
                                           (workflowName, task),
                                           **options)
            secondLevelWorkflow.create()
            mergeSub = Subscription(sharedFileset, secondLevelWorkflow)
            mergeSub.create()
            self.stateMap['Harvesting'].append(mergeSub)

        for (parent, child) in [
            ('ExpressAlcaSkimwrite_StreamZFast_ALCARECO', alcaHarvestTask),
            ('ExpressMergewrite_StreamZFast_DQM', dqmHarvestTask)
        ]:
            harvestingWorkflow = Workflow(task='/%s/Express/%s/%s' %
                                          (workflowName, parent, child),
                                          **options)
            harvestingWorkflow.create()
            harvestingSub = Subscription(sharedFileset, harvestingWorkflow)
            harvestingSub.create()
            self.stateMap['Processing Done'].append(harvestingSub)

        return
示例#14
0
class RepackMergeTest(unittest.TestCase):
    """
    _RepackMergeTest_
    Test for RepackMerge job splitter
    """

    def setUp(self):
        """
        _setUp_
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules = ["WMComponent.DBS3Buffer", "T0.WMBS"])

        self.splitterFactory = SplitterFactory(package = "T0.JobSplitting")

        myThread = threading.currentThread()

        daoFactory = DAOFactory(package = "T0.WMBS",
                                logger = logging,
                                dbinterface = myThread.dbi)

        wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS",
                                    logger = logging,
                                    dbinterface = myThread.dbi)

        myThread.dbi.processData("""INSERT INTO wmbs_location
                                    (id, site_name, state, state_time)
                                    VALUES (1, 'SomeSite', 1, 1)
                                    """, transaction = False)
        myThread.dbi.processData("""INSERT INTO wmbs_pnns
                                    (id, pnn)
                                    VALUES (2, 'SomePNN')
                                    """, transaction = False)
        
        myThread.dbi.processData("""INSERT INTO wmbs_location_pnns
                                    (location, pnn)
                                    VALUES (1, 2)
                                    """, transaction = False)


        insertRunDAO = daoFactory(classname = "RunConfig.InsertRun")
        insertRunDAO.execute(binds = { 'RUN' : 1,
                                       'HLTKEY' : "someHLTKey" },
                             transaction = False)

        insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection")
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 1 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 2 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 3 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 4 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 5 },
                              transaction = False)

        insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream")
        insertStreamDAO.execute(binds = { 'STREAM' : "A" },
                                transaction = False)

        insertCMSSVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion")
        insertCMSSVersionDAO.execute(binds = { 'VERSION' : "CMSSW_4_2_7" },
                                     transaction = False)

        insertStreamCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertStreamCMSSWVersion")
        insertStreamCMSSWVersionDAO.execute(binds = { 'RUN' : 1,
                                                      'STREAM' : 'A',
                                                      'VERSION' : "CMSSW_4_2_7" },
                                            transaction = False)

        insertStreamerDAO = daoFactory(classname = "RunConfig.InsertStreamer")
        insertStreamerDAO.execute(streamerPNN = "SomePNN",
                                  binds = { 'RUN' : 1,
                                            'P5_ID' : 1,
                                            'LUMI' : 4,
                                            'STREAM' : "A",
                                            'LFN' : "/testLFN/A",
                                            'FILESIZE' : 100,
                                            'EVENTS' : 100,
                                            'TIME' : int(time.time()) },
                                  transaction = False)

        insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset")
        insertStreamFilesetDAO.execute(1, "A", "TestFileset1")

        self.fileset1 = Fileset(name = "TestFileset1")
        self.fileset2 = Fileset(name = "TestFileset2")
        self.fileset1.load()
        self.fileset2.create()

        workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test")
        workflow2 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow2", task="Test")
        workflow1.create()
        workflow2.create()

        self.subscription1  = Subscription(fileset = self.fileset1,
                                           workflow = workflow1,
                                           split_algo = "Repack",
                                           type = "Repack")
        self.subscription2  = Subscription(fileset = self.fileset2,
                                           workflow = workflow2,
                                           split_algo = "RepackMerge",
                                           type = "RepackMerge")
        self.subscription1.create()
        self.subscription2.create()

        myThread.dbi.processData("""INSERT INTO wmbs_workflow_output
                                    (WORKFLOW_ID, OUTPUT_IDENTIFIER, OUTPUT_FILESET)
                                    VALUES (%d, 'SOMEOUTPUT', %d)
                                    """ % (workflow1.id, self.fileset2.id),
                                 transaction = False)

        # keep for later
        self.insertSplitLumisDAO = daoFactory(classname = "JobSplitting.InsertSplitLumis")
        self.insertClosedLumiDAO = daoFactory(classname = "RunLumiCloseout.InsertClosedLumi")
        self.feedStreamersDAO = daoFactory(classname = "Tier0Feeder.FeedStreamers")                                                      
        self.acquireFilesDAO = wmbsDaoFactory(classname = "Subscriptions.AcquireFiles")
        self.completeFilesDAO = wmbsDaoFactory(classname = "Subscriptions.CompleteFiles")
        self.currentTime = int(time.time())

        # default split parameters
        self.splitArgs = {}
        self.splitArgs['minInputSize'] = 2.1 * 1024 * 1024 * 1024
        self.splitArgs['maxInputSize'] = 4.0 * 1024 * 1024 * 1024
        self.splitArgs['maxInputEvents'] = 100000000
        self.splitArgs['maxInputFiles'] = 1000
        self.splitArgs['maxEdmSize'] = 20 * 1024 * 1024 * 1024
        self.splitArgs['maxOverSize'] = 10 * 1024 * 1024 * 1024
        self.SplitArgs['maxLatency'] = 50000

        return

    def tearDown(self):
        """
        _tearDown_
        """
        self.testInit.clearDatabase()

        return

    def deleteSplitLumis(self):
        """
        _deleteSplitLumis_
        """
        myThread = threading.currentThread()

        myThread.dbi.processData("""DELETE FROM lumi_section_split_active
                                    """,
                                 transaction = False)

        return

    def test00(self):
        """
        _test00_
        Test that the job name prefix feature works
        Test max edm size threshold for single lumi
        small lumi, followed by over-large lumi
        expect 1 job for small lumi and 4 jobs for over-large
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2 * lumi):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxEdmSize'] = 13000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 3,
                         "ERROR: JobFactory didn't create three jobs")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("RepackMerge-"),
                        "ERROR: Job has wrong name")

        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 3,
                         "ERROR: Job does not process 3 files")

        job = jobGroups[0].jobs[2]
        self.assertEqual(len(job.getFiles()), 1,
                         "ERROR: Job does not process 1 file")

        return

    def test01(self):
        """
        _test01_
        Test max size threshold for single lumi
        small lumi, followed by large lumi
        expect 1 job for small lumi and 1 job for large
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000 * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputSize'] = 3000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test02(self):
        """
        _test02_
        Test max event threshold for single lumi
        small lumi, followed by large lumi
        expect 1 job for small lumi and 1 job for large
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100 * lumi)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputEvents'] = 300
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test03(self):
        """
        _test03_
        Test max input files threshold for single lumi
        small lumi, followed by large lumi
        expect 1 job for small lumi and 1 job for large
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(lumi * 2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputFiles'] = 3
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return

    def test04(self):
        """
        _test04_
        Test max size threshold for multi lumi
        3 same size lumis
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 3]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputSize'] = 5000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test05(self):
        """
        _test05_
        Test max event threshold for multi lumi
        3 same size lumis
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 3]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputEvents'] = 500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test06(self):
        """
        _test06_
        Test max input files threshold for multi lumi
        3 same size lumis
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 3]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputFiles'] = 5
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test07(self):
        """
        _test07_
        Test over merge
        one small lumi, one large lumi (small below min size,
        large below max size, but both together above max size)
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        mySplitArgs['maxInputSize'] = 9000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return

    def test08(self):
        """
        _test08_
        Test under merge (over merge size threshold)
        one small lumi, one large lumi (small below min size,
        large below max size, but both together above max size)
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        mySplitArgs['maxInputSize'] = 9000
        mySplitArgs['maxOverSize'] = 9500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test09(self):
        """
        _test09_
        Test under merge (over merge event threshold)
        one small lumi, one large lumi (small below min size,
        large below max size, but both together above max size)
        
        It was changed due to maxinputevents not being used anymore.
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 1500
        mySplitArgs['maxInputSize'] = 9000
        mySplitArgs['maxOverSize'] = 9500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test10(self):
        """
        _test10_
        Test merging of multiple lumis with holes in the lumi sequence
        Hole is due to no streamer files for the lumi
        Multi lumi input
        
        It only works with a single hole, as it creates a merged file even with it being of a smaller size than the mininputsize.
        
        It was changed due to the maxinputevents not being used anymore
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 4]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 100000
        mySplitArgs['maxInputSize'] = 200000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1,
                                                   'LUMI' : 3,
                                                   'STREAM' : "A",
                                                   'FILECOUNT' : 0,
                                                   'INSERT_TIME' : self.currentTime,
                                                   'CLOSE_TIME' : self.currentTime },
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return
示例#15
0
class SiblingProcessingBasedTest(unittest.TestCase):
    """
    _SiblingProcessingBasedTest_

    Test SiblingProcessing job splitting.
    """
    def setUp(self):
        """
        _setUp_

        Setup the database connections and schema.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=["WMCore.WMBS"],
                                useDefault=False)

        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        locationAction = daofactory(classname="Locations.New")
        locationAction.execute("T2_CH_CERN", pnn="T2_CH_CERN")
        locationAction.execute("T1_US_FNAL", pnn="T1_US_FNAL_Disk")

        self.testFilesetA = Fileset(name="FilesetA")
        self.testFilesetA.create()
        self.testFilesetB = Fileset(name="FilesetB")
        self.testFilesetB.create()

        self.testFileA = File("testFileA",
                              size=1000,
                              events=100,
                              locations=set(["T2_CH_CERN"]))
        self.testFileA.create()
        self.testFileB = File("testFileB",
                              size=1000,
                              events=100,
                              locations=set(["T2_CH_CERN"]))
        self.testFileB.create()
        self.testFileC = File("testFileC",
                              size=1000,
                              events=100,
                              locations=set(["T2_CH_CERN"]))
        self.testFileC.create()

        self.testFilesetA.addFile(self.testFileA)
        self.testFilesetA.addFile(self.testFileB)
        self.testFilesetA.addFile(self.testFileC)
        self.testFilesetA.commit()

        self.testFileD = File("testFileD",
                              size=1000,
                              events=100,
                              locations=set(["T2_CH_CERN"]))
        self.testFileD.create()
        self.testFileE = File("testFileE",
                              size=1000,
                              events=100,
                              locations=set(["T2_CH_CERN"]))
        self.testFileE.create()
        self.testFileF = File("testFileF",
                              size=1000,
                              events=100,
                              locations=set(["T2_CH_CERN"]))
        self.testFileF.create()

        self.testFilesetB.addFile(self.testFileD)
        self.testFilesetB.addFile(self.testFileE)
        self.testFilesetB.addFile(self.testFileF)
        self.testFilesetB.commit()

        testWorkflowA = Workflow(spec="specA.xml",
                                 owner="Steve",
                                 name="wfA",
                                 task="Test")
        testWorkflowA.create()
        testWorkflowB = Workflow(spec="specB.xml",
                                 owner="Steve",
                                 name="wfB",
                                 task="Test")
        testWorkflowB.create()
        testWorkflowC = Workflow(spec="specC.xml",
                                 owner="Steve",
                                 name="wfC",
                                 task="Test")
        testWorkflowC.create()
        testWorkflowD = Workflow(spec="specD.xml",
                                 owner="Steve",
                                 name="wfD",
                                 task="Test")
        testWorkflowD.create()

        self.testSubscriptionA = Subscription(fileset=self.testFilesetA,
                                              workflow=testWorkflowA,
                                              split_algo="FileBased",
                                              type="Processing")
        self.testSubscriptionA.create()
        self.testSubscriptionB = Subscription(fileset=self.testFilesetB,
                                              workflow=testWorkflowB,
                                              split_algo="FileBased",
                                              type="Processing")
        self.testSubscriptionB.create()
        self.testSubscriptionC = Subscription(fileset=self.testFilesetB,
                                              workflow=testWorkflowC,
                                              split_algo="FileBased",
                                              type="Processing")
        self.testSubscriptionC.create()
        self.testSubscriptionD = Subscription(fileset=self.testFilesetB,
                                              workflow=testWorkflowD,
                                              split_algo="FileBased",
                                              type="Processing")
        self.testSubscriptionD.create()

        deleteWorkflow = Workflow(spec="specE.xml",
                                  owner="Steve",
                                  name="wfE",
                                  task="Test")
        deleteWorkflow.create()

        self.deleteSubscriptionA = Subscription(
            fileset=self.testFilesetA,
            workflow=deleteWorkflow,
            split_algo="SiblingProcessingBased",
            type="Cleanup")
        self.deleteSubscriptionA.create()
        self.deleteSubscriptionB = Subscription(
            fileset=self.testFilesetB,
            workflow=deleteWorkflow,
            split_algo="SiblingProcessingBased",
            type="Cleanup")
        self.deleteSubscriptionB.create()
        return

    def tearDown(self):
        """
        _tearDown_

        Clear out WMBS.
        """
        self.testInit.clearDatabase()
        return

    def testSiblingProcessing(self):
        """
        _testSiblingProcessing_

        Verify that the sibling processing split works correctly dealing with
        failed files and acquiring files correctly.
        """
        splitter = SplitterFactory()
        deleteFactoryA = splitter(package="WMCore.WMBS",
                                  subscription=self.deleteSubscriptionA)
        deleteFactoryB = splitter(package="WMCore.WMBS",
                                  subscription=self.deleteSubscriptionB)

        result = deleteFactoryA()

        assert len(result) == 0, \
               "Error: No jobs should be returned."

        result = deleteFactoryB()

        assert len(result) == 0, \
               "Error: No jobs should be returned."

        self.testSubscriptionA.completeFiles(self.testFileA)

        result = deleteFactoryA(files_per_job=1)

        assert len(result) == 1, \
               "Error: Only one jobgroup should be returned."
        assert len(result[0].jobs) == 1, \
               "Error: There should only be one job in the jobgroup."
        assert result[0].jobs[0]["possiblePSN"] == set(["T2_CH_CERN"]), \
               "Error: possiblePSN is wrong."
        assert len(result[0].jobs[0]["input_files"]) == 1, \
               "Error: Job should only have one input file."
        assert result[0].jobs[0]["input_files"][0]["lfn"] == "testFileA", \
               "Error: Input file for job is wrong."

        result = deleteFactoryB(files_per_job=1)

        assert len(result) == 0, \
               "Error: Second subscription should have no jobs."

        result = deleteFactoryA(files_per_job=1)

        assert len(result) == 0, \
               "Error: No jobs should have been created."

        self.testSubscriptionB.completeFiles(self.testFileD)
        self.testSubscriptionC.failFiles(self.testFileD)

        result = deleteFactoryA(files_per_job=1)

        assert len(result) == 0, \
               "Error: No jobs should have been created."

        result = deleteFactoryB(files_per_job=1)

        assert len(result) == 0, \
               "Error: No jobs should have been created."

        self.testSubscriptionD.failFiles(self.testFileD)

        result = deleteFactoryA(files_per_job=1)

        assert len(result) == 0, \
               "Error: No jobs should have been created."

        result = deleteFactoryB(files_per_job=1)

        assert len(result) == 0, \
               "Error: No job groups should have been created."

        self.testSubscriptionB.completeFiles([self.testFileE, self.testFileF])
        self.testSubscriptionC.completeFiles([self.testFileE, self.testFileF])
        self.testSubscriptionD.completeFiles([self.testFileE, self.testFileF])

        result = deleteFactoryB(files_per_job=10)

        assert len(result) == 0, \
               "Error: No jobs should have been created."

        self.testFilesetB.markOpen(False)

        result = deleteFactoryB(files_per_job=10)

        assert len(result) == 1, \
               "Error: One jobgroup should have been returned."
        assert len(result[0].jobs) == 1, \
               "Error: There should only be one job in the jobgroup."
        assert len(result[0].jobs[0]["input_files"]) == 2, \
               "Error: Job should only have one input file."

        lfns = [
            result[0].jobs[0]["input_files"][0]["lfn"],
            result[0].jobs[0]["input_files"][1]["lfn"]
        ]

        assert "testFileE" in lfns, \
               "Error: TestFileE missing from job input."
        assert "testFileF" in lfns, \
               "Error: TestFileF missing from job input."

        self.assertEqual(len(self.deleteSubscriptionB.availableFiles()), 0,
                         "Error: There should be no available files.")

        completeFiles = self.deleteSubscriptionB.filesOfStatus("Completed")
        self.assertEqual(len(completeFiles), 1,
                         "Error: There should only be one complete file.")
        self.assertEqual(
            list(completeFiles)[0]["lfn"], "testFileD",
            "Error: Test file D should be complete.")

        return

    def testMultipleLocations(self):
        """
        _testMultipleLocations_

        Verify that the sibling processing based algorithm doesn't create jobs
        that run over files at multiple sites.
        """
        testFile1 = File("testFile1",
                         size=1000,
                         events=100,
                         locations=set(["T1_US_FNAL_Disk"]))
        testFile1.create()
        testFile2 = File("testFile2",
                         size=1000,
                         events=100,
                         locations=set(["T1_US_FNAL_Disk"]))
        testFile2.create()
        testFile3 = File("testFile3",
                         size=1000,
                         events=100,
                         locations=set(["T1_US_FNAL_Disk"]))
        testFile3.create()

        self.testFilesetA.addFile(testFile1)
        self.testFilesetA.addFile(testFile2)
        self.testFilesetA.addFile(testFile3)
        self.testFilesetA.commit()
        self.testFilesetA.markOpen(False)

        self.testSubscriptionA.completeFiles([testFile1, testFile2, testFile3])
        self.testSubscriptionA.completeFiles(
            [self.testFileA, self.testFileB, self.testFileC])

        splitter = SplitterFactory()
        deleteFactoryA = splitter(package="WMCore.WMBS",
                                  subscription=self.deleteSubscriptionA)

        result = deleteFactoryA(files_per_job=50)

        assert len(result) == 2, \
               "Error: Wrong number of jobgroups returned."

        goldenFilesA = ["testFileA", "testFileB", "testFileC"]
        goldenFilesB = ["testFile1", "testFile2", "testFile3"]

        for jobGroup in result:
            assert len(jobGroup.jobs) == 1, \
                   "Error: Wrong number of jobs in jobgroup."
            assert len(jobGroup.jobs[0]["input_files"]) == 3, \
                   "Error: Wrong number of input files in job."

            jobSite = jobGroup.jobs[0]["possiblePSN"]

            assert (jobSite == set(["T2_CH_CERN"])
                    or jobSite == set(["T1_US_FNAL"])), \
                    "Error: Wrong site for job."

            if jobSite == set(["T2_CH_CERN"]):
                goldenFiles = goldenFilesA
            else:
                goldenFiles = goldenFilesB

            for jobFile in jobGroup.jobs[0]["input_files"]:
                goldenFiles.remove(jobFile["lfn"])

            assert len(goldenFiles) == 0,  \
                   "Error: Files are missing."

        return

    def testLargeNumberOfFiles(self):
        """
        _testLargeNumberOfFiles_

        Setup a subscription with 500 files and verify that the splitting algo
        works correctly.
        """
        testWorkflowA = Workflow(spec="specA.xml",
                                 owner="Steve",
                                 name="wfA",
                                 task="Test")
        testWorkflowA.create()
        testWorkflowB = Workflow(spec="specB.xml",
                                 owner="Steve",
                                 name="wfB",
                                 task="Test")
        testWorkflowB.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        allFiles = []
        for i in range(500):
            testFile = File(str(i),
                            size=1000,
                            events=100,
                            locations=set(["T2_CH_CERN"]))
            testFile.create()
            allFiles.append(testFile)
            testFileset.addFile(testFile)
        testFileset.commit()

        testSubscriptionA = Subscription(fileset=testFileset,
                                         workflow=testWorkflowA,
                                         split_algo="FileBased",
                                         type="Processing")
        testSubscriptionA.create()
        testSubscriptionB = Subscription(fileset=testFileset,
                                         workflow=testWorkflowB,
                                         split_algo="SiblingProcessingBased",
                                         type="Processing")
        testSubscriptionB.create()

        testSubscriptionA.completeFiles(allFiles)

        splitter = SplitterFactory()
        deleteFactoryA = splitter(package="WMCore.WMBS",
                                  subscription=testSubscriptionB)

        result = deleteFactoryA(files_per_job=50)
        self.assertEqual(len(result), 1,
                         "Error: Wrong number of job groups returned.")
        self.assertEqual(len(result[0].jobs), 10,
                         "Error: Wrong number of jobs returned.")

        return

    def testFilesWithoutOtherSubscriptions(self):
        """
        _testFilesWithoutOtherSubscriptions_

        Test the case where files only in the delete subscription
        can happen if cleanup of the other subscriptions is fast

        """
        testWorkflowA = Workflow(spec="specA.xml",
                                 owner="Steve",
                                 name="wfA",
                                 task="Test")
        testWorkflowA.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        allFiles = []
        for i in range(500):
            testFile = File(str(i),
                            size=1000,
                            events=100,
                            locations=set(["T2_CH_CERN"]))
            testFile.create()
            allFiles.append(testFile)
            testFileset.addFile(testFile)
        testFileset.commit()

        testSubscriptionA = Subscription(fileset=testFileset,
                                         workflow=testWorkflowA,
                                         split_algo="SiblingProcessingBased",
                                         type="Processing")
        testSubscriptionA.create()

        splitter = SplitterFactory()
        deleteFactoryA = splitter(package="WMCore.WMBS",
                                  subscription=testSubscriptionA)

        result = deleteFactoryA(files_per_job=50)
        self.assertEqual(len(result), 1,
                         "Error: Wrong number of job groups returned.")
        self.assertEqual(len(result[0].jobs), 10,
                         "Error: Wrong number of jobs returned.")

        return
示例#16
0
    def __call__(self, filesetToProcess):
        """
        The algorithm itself
        """
        global LOCK

        # Get configuration
        initObj = WMInit()
        initObj.setLogging()
        initObj.setDatabaseConnection(os.getenv("DATABASE"), \
            os.getenv('DIALECT'), os.getenv("DBSOCK"))

        myThread = threading.currentThread()

        daofactory = DAOFactory(package = "WMCore.WMBS" , \
              logger = myThread.logger, \
              dbinterface = myThread.dbi)

        locationNew = daofactory(classname="Locations.New")
        getFileLoc = daofactory(classname="Files.GetLocation")


        logging.debug("the T0Feeder is processing %s" % \
                 filesetToProcess.name)
        logging.debug("the fileset name %s" % \
         (filesetToProcess.name).split(":")[0])

        startRun = (filesetToProcess.name).split(":")[3]
        fileType = (filesetToProcess.name).split(":")[2]

        # url builder
        primaryDataset = ((filesetToProcess.name).split(":")[0]).split('/')[1]
        processedDataset = ((
            filesetToProcess.name).split(":")[0]).split('/')[2]
        dataTier = (((filesetToProcess.name\
            ).split(":")[0]).split('/')[3]).split('-')[0]

        # Fisrt call to T0 db for this fileset
        # Here add test for the closed fileset
        LASTIME = filesetToProcess.lastUpdate

        url = "/tier0/listfilesoverinterval/%s/%s/%s/%s/%s" % \
              (fileType, LASTIME, primaryDataset,processedDataset, dataTier)

        tries = 1
        while True:

            try:

                myRequester = JSONRequests(url="vocms52.cern.ch:8889")
                requestResult = myRequester.get(\
             url+"/"+"?return_type=text/json%2Bdas")
                newFilesList = requestResult[0]["results"]

            except:

                logging.debug("T0Reader call error...")
                if tries == self.maxRetries:
                    return
                else:
                    tries += 1
                    continue

            logging.debug("T0ASTRun queries done ...")
            now = time.time()
            filesetToProcess.last_update = now
            LASTIME = int(newFilesList['end_time']) + 1

            break

        # process all files
        if len(newFilesList['files']):

            LOCK.acquire()

            try:
                locationNew.execute(siteName="caf.cern.ch",
                                    seName="caf.cern.ch")
            except Exception as e:
                logging.debug("Error when adding new location...")
                logging.debug(e)
                logging.debug(format_exc())

            for files in newFilesList['files']:

                # Assume parents aren't asked
                newfile = File(str(files['lfn']), \
           size = files['file_size'], events = files['events'])

                try:
                    if newfile.exists() == False:
                        newfile.create()

                    else:
                        newfile.loadData()

                    #Add run test if already exist
                    for run in files['runs']:

                        if startRun != 'None' and int(startRun) <= int(run):

                            # ToDo: Distinguish between
                            # filestA-RunX and filesetA-Run[0-9]*
                            filesetRun = Fileset( name = (((\
                   filesetToProcess.name).split(':')[0]).split('/')[0]\
                   )+'/'+(((filesetToProcess.name).split(':')[0]).split\
                   ('/')[1])+'/'+(((filesetToProcess.name).split(':')[0]\
                   ).split('/')[2])+'/'+((((filesetToProcess.name).split\
                   (':')[0]).split('/')[3]).split('-')[0])+'-'+'Run'+str\
               (run)+":"+":".join((filesetToProcess.name).split(':')[1:] \
                                     ) )

                            if filesetRun.exists() == False:
                                filesetRun.create()

                            else:
                                filesetRun.loadData()

                            # Add test runs already there
                            # (for growing dataset) -
                            # to support file with different runs and lumi
                            if not newfile['runs']:

                                runSet = set()
                                runSet.add(Run(run, *files['runs'][run]))
                                newfile.addRunSet(runSet)

                            fileLoc = getFileLoc.execute(file=files['lfn'])

                            if 'caf.cern.ch' not in fileLoc:
                                newfile.setLocation("caf.cern.ch")

                            filesetRun.addFile(newfile)
                            logging.debug(
                                "new file created/loaded added by T0ASTRun...")
                            filesetRun.commit()

                except Exception as e:

                    logging.debug("Error when adding new files in T0ASTRun...")
                    logging.debug(e)
                    logging.debug(format_exc())



                filesetToProcess.setLastUpdate\
              (int(newFilesList['end_time']) + 1)
                filesetToProcess.commit()

            LOCK.release()

        else:

            logging.debug("nothing to do...")
            # For re-opned fileset or empty, try until the purge time
            if (int(now) / 3600 - LASTIME / 3600) > self.reopenTime:

                filesetToProcess.setLastUpdate(time.time())
                filesetToProcess.commit()

        if LASTIME:

            myRequester = JSONRequests(url="vocms52.cern.ch:8889")
            requestResult = myRequester.get("/tier0/runs")

            for listRun in requestResult[0]:

                if int(startRun) <= int(listRun['run']):

                    if listRun['status'] =='CloseOutExport' or \
           listRun['status'] =='Complete' or listRun['status'] ==\
                          'CloseOutT1Skimming':

                        closeFileset = Fileset( name = (((\
      filesetToProcess.name).split(':')[0]).split('/')[0])+'/'+\
     (((filesetToProcess.name).split(':')[0]).split('/')[1]\
     )+'/'+(((filesetToProcess.name).split(':')[0]).split('/')\
     [2])+'/'+((((filesetToProcess.name).split(':')[0]).split\
     ('/')[3]).split('-')[0])+'-'+'Run'+str(listRun['run'])\
     +":"+":".join((filesetToProcess.name).split(':')[1:] ) )

                        if closeFileset.exists() != False:

                            closeFileset = Fileset(id=closeFileset.exists())
                            closeFileset.loadData()

                            if closeFileset.open == True:
                                closeFileset.markOpen(False)

        # Commit the fileset
        filesetToProcess.commit()

        # Commit the fileset
        logging.debug("Test purge in T0ASTRun ...")
        filesetToProcess.load()
        LASTIME = filesetToProcess.lastUpdate

        if (int(now) / 3600 - LASTIME / 3600) > self.purgeTime:

            filesetToProcess.markOpen(False)
            logging.debug("Purge Done...")

        filesetToProcess.commit()
示例#17
0
class WMBSHelper(WMConnectionBase):
    """
    _WMBSHelper_

    Interface between the WorkQueue and WMBS.
    """

    def __init__(self, wmSpec, taskName, blockName=None, mask=None, cachepath='.'):
        """
        _init_

        Initialize DAOs and other things needed.
        """
        self.block = blockName
        self.mask = mask
        self.wmSpec = wmSpec
        self.topLevelTask = wmSpec.getTask(taskName)
        self.cachepath = cachepath
        self.isDBS = True

        self.topLevelFileset = None
        self.topLevelSubscription = None
        self.topLevelTaskDBSBufferId = None

        self.mergeOutputMapping = {}

        # Initiate the pieces you need to run your own DAOs
        WMConnectionBase.__init__(self, "WMCore.WMBS")
        myThread = threading.currentThread()
        self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                        logger=myThread.logger,
                                        dbinterface=myThread.dbi)

        # DAOs from WMBS for file commit
        self.setParentage = self.daofactory(classname="Files.SetParentage")
        self.setFileRunLumi = self.daofactory(classname="Files.AddRunLumi")
        self.setFileLocation = self.daofactory(classname="Files.SetLocationForWorkQueue")
        self.setFileAddChecksum = self.daofactory(classname="Files.AddChecksumByLFN")
        self.addFileAction = self.daofactory(classname="Files.Add")
        self.addToFileset = self.daofactory(classname="Files.AddDupsToFileset")
        self.getLocations = self.daofactory(classname="Locations.ListSites")
        self.getLocationInfo = self.daofactory(classname="Locations.GetSiteInfo")

        # DAOs from DBSBuffer
        self.dbsCreateFiles = self.dbsDaoFactory(classname="DBSBufferFiles.Add")
        self.dbsSetLocation = self.dbsDaoFactory(classname="DBSBufferFiles.SetLocationByLFN")
        self.dbsInsertLocation = self.dbsDaoFactory(classname="DBSBufferFiles.AddLocation")
        self.dbsSetChecksum = self.dbsDaoFactory(classname="DBSBufferFiles.AddChecksumByLFN")
        self.dbsInsertWorkflow = self.dbsDaoFactory(classname="InsertWorkflow")

        # Added for file creation bookkeeping
        self.dbsFilesToCreate = []
        self.addedLocations = []
        self.wmbsFilesToCreate = []
        self.insertedBogusDataset = -1

        return

    def createSandbox(self):
        """Create the runtime sandbox"""
        sandboxCreator = SandboxCreator()
        sandboxCreator.makeSandbox(self.cachepath, self.wmSpec)

    def createTopLevelFileset(self, topLevelFilesetName=None):
        """
        _createTopLevelFileset_

        Create the top level fileset for the workflow.  If the name of the top
        level fileset is not given create one.
        """
        if topLevelFilesetName is None:
            filesetName = ("%s-%s" % (self.wmSpec.name(),
                                      self.wmSpec.getTopLevelTask()[0].name()))
            if self.block:
                filesetName += "-%s" % self.block
            if self.mask:
                from hashlib import md5
                mask_string = ",".join(["%s=%s" % (x, self.mask[x]) for x in sorted(self.mask)])
                filesetName += "-%s" % md5(mask_string).hexdigest()
        else:
            filesetName = topLevelFilesetName

        self.topLevelFileset = Fileset(filesetName)
        self.topLevelFileset.create()
        return

    def outputFilesetName(self, task, outputModuleName):
        """
        _outputFilesetName_

        Generate an output fileset name for the given task and output module.
        """
        if task.taskType() == "Merge":
            outputFilesetName = "%s/merged-%s" % (task.getPathName(),
                                                  outputModuleName)
        else:
            outputFilesetName = "%s/unmerged-%s" % (task.getPathName(),
                                                    outputModuleName)

        return outputFilesetName

    def createSubscription(self, task, fileset, alternativeFilesetClose=False):
        """
        _createSubscription_

        Create subscriptions in the database.
        This includes workflows in WMBS and DBSBuffer, output maps, datasets
        and phedex subscriptions, and filesets for each task below and including
        the given task.
        """
        sub = self._createSubscriptionsInWMBS(task, fileset, alternativeFilesetClose)

        self._createWorkflowsInDBSBuffer()
        self._createDatasetSubscriptionsInDBSBuffer()

        return sub

    def _createSubscriptionsInWMBS(self, task, fileset, alternativeFilesetClose=False):
        """
        __createSubscriptionsInWMBS_

        Create subscriptions in WMBS for all the tasks in the spec.  This
        includes filesets, workflows and the output map for each task.
        """
        # create runtime sandbox for workflow
        self.createSandbox()

        # FIXME: Let workflow put in values if spec is missing them
        workflow = Workflow(spec=self.wmSpec.specUrl(), owner=self.wmSpec.getOwner()["name"],
                            dn=self.wmSpec.getOwner().get("dn", "unknown"),
                            group=self.wmSpec.getOwner().get("group", "unknown"),
                            owner_vogroup=self.wmSpec.getOwner().get("vogroup", "DEFAULT"),
                            owner_vorole=self.wmSpec.getOwner().get("vorole", "DEFAULT"),
                            name=self.wmSpec.name(), task=task.getPathName(),
                            wfType=self.wmSpec.getDashboardActivity(),
                            alternativeFilesetClose=alternativeFilesetClose,
                            priority=self.wmSpec.priority())
        workflow.create()
        subscription = Subscription(fileset=fileset, workflow=workflow,
                                    split_algo=task.jobSplittingAlgorithm(),
                                    type=task.getPrimarySubType())
        if subscription.exists():
            subscription.load()
            msg = "Subscription %s already exists for %s (you may ignore file insertion messages below, existing files wont be duplicated)"
            self.logger.info(msg % (subscription['id'], task.getPathName()))
        else:
            subscription.create()
        for site in task.siteWhitelist():
            subscription.addWhiteBlackList([{"site_name": site, "valid": True}])

        for site in task.siteBlacklist():
            subscription.addWhiteBlackList([{"site_name": site, "valid": False}])

        if self.topLevelSubscription is None:
            self.topLevelSubscription = subscription
            logging.info("Top level subscription created: %s", subscription["id"])
        else:
            logging.info("Child subscription created: %s", subscription["id"])

        outputModules = task.getOutputModulesForTask()
        ignoredOutputModules = task.getIgnoredOutputModulesForTask()
        for outputModule in outputModules:
            for outputModuleName in outputModule.listSections_():
                if outputModuleName in ignoredOutputModules:
                    logging.info("IgnoredOutputModule set for %s, skipping fileset creation.", outputModuleName)
                    continue
                outputFileset = Fileset(self.outputFilesetName(task, outputModuleName))
                outputFileset.create()
                outputFileset.markOpen(True)
                mergedOutputFileset = None

                for childTask in task.childTaskIterator():
                    if childTask.data.input.outputModule == outputModuleName:
                        if childTask.taskType() == "Merge":
                            mergedOutputFileset = Fileset(self.outputFilesetName(childTask, "Merged"))
                            mergedOutputFileset.create()
                            mergedOutputFileset.markOpen(True)

                            primaryDataset = getattr(getattr(outputModule, outputModuleName), "primaryDataset", None)
                            if primaryDataset != None:
                                self.mergeOutputMapping[mergedOutputFileset.id] = primaryDataset

                        self._createSubscriptionsInWMBS(childTask, outputFileset, alternativeFilesetClose)

                if mergedOutputFileset is None:
                    workflow.addOutput(outputModuleName, outputFileset,
                                       outputFileset)
                else:
                    workflow.addOutput(outputModuleName, outputFileset,
                                       mergedOutputFileset)

        return self.topLevelSubscription

    def addMCFakeFile(self):
        """Add a fake file for wmbs to run production over"""
        needed = ['FirstEvent', 'FirstLumi', 'FirstRun', 'LastEvent', 'LastLumi', 'LastRun']
        for key in needed:
            if self.mask and self.mask.get(key) is None:
                msg = 'Invalid value "%s" for %s' % (self.mask.get(key), key)
                raise WorkQueueWMBSException(msg)
        locations = set()
        for site in self.getLocations.execute(conn=self.getDBConn(),
                                              transaction=self.existingTransaction()):
            try:
                siteInfo = self.getLocationInfo.execute(site, conn=self.getDBConn(),
                                                        transaction=self.existingTransaction())
                if not siteInfo:
                    self.logger.info('Skipping MonteCarlo injection to site "%s" as unknown to wmbs' % site)
                    continue
                locations.add(siteInfo[0]['pnn'])
            except Exception as ex:
                self.logger.error('Error getting storage element for "%s": %s' % (site, str(ex)))
        if not locations:
            msg = 'No locations to inject Monte Carlo work to, unable to proceed'
            raise WorkQueueWMBSException(msg)
        mcFakeFileName = ("MCFakeFile-%s" % self.topLevelFileset.name).encode('ascii', 'ignore')
        wmbsFile = File(lfn=mcFakeFileName,
                        first_event=self.mask['FirstEvent'],
                        last_event=self.mask['LastEvent'],
                        events=self.mask['LastEvent'] - self.mask['FirstEvent'] + 1,  # inclusive range
                        locations=locations,
                        merged=False,  # merged causes dbs parentage relation
                       )

        if self.mask:
            lumis = range(self.mask['FirstLumi'], self.mask['LastLumi'] + 1)  # inclusive range
            wmbsFile.addRun(Run(self.mask['FirstRun'], *lumis))  # assume run number static
        else:
            wmbsFile.addRun(Run(1, 1))

        wmbsFile['inFileset'] = True  # file is not a parent

        logging.info("WMBS File: %s on Location: %s", wmbsFile['lfn'], wmbsFile['newlocations'])

        self.wmbsFilesToCreate.append(wmbsFile)

        totalFiles = self.topLevelFileset.addFilesToWMBSInBulk(self.wmbsFilesToCreate,
                                                               self.wmSpec.name(),
                                                               isDBS=self.isDBS)

        self.topLevelFileset.markOpen(False)
        return totalFiles

    def createSubscriptionAndAddFiles(self, block):
        """
        _createSubscriptionAndAddFiles_

        Create the subscription and add files at one time to
        put everything in one transaction.

        """
        self.beginTransaction()

        self.createTopLevelFileset()
        try:
            sub = self.createSubscription(self.topLevelTask, self.topLevelFileset)
        except Exception as ex:
            myThread = threading.currentThread()
            myThread.transaction.rollback()
            msg = traceback.format_exc()
            logging.error("Failed to create subscription %s", msg)
            raise ex

        if block != None:
            logging.info('"%s" Injecting block %s (%d files) into wmbs', self.wmSpec.name(),
                                                                         self.block,
                                                                         len(block['Files']))
            addedFiles = self.addFiles(block)
        # For MC case
        else:
            logging.info(
                '"%s" Injecting production %s:%s:%s - %s:%s:%s (run:lumi:event) into wmbs', self.wmSpec.name(),
                                                                                            self.mask['FirstRun'],
                                                                                            self.mask['FirstLumi'],
                                                                                            self.mask['FirstEvent'],
                                                                                            self.mask['LastRun'],
                                                                                            self.mask['LastLumi'],
                                                                                            self.mask['LastEvent'])
            addedFiles = self.addMCFakeFile()

        self.commitTransaction(existingTransaction=False)

        return sub, addedFiles

    def addFiles(self, block):
        """
        _addFiles_

        create wmbs files from given dbs block.
        as well as run lumi update
        """

        if self.topLevelTask.getInputACDC():
            self.isDBS = False
            for acdcFile in self.validFiles(block['Files']):
                self._addACDCFileToWMBSFile(acdcFile)
        else:
            self.isDBS = True
            for dbsFile in self.validFiles(block['Files']):
                self._addDBSFileToWMBSFile(dbsFile, block['PhEDExNodeNames'])

        # Add files to WMBS
        totalFiles = self.topLevelFileset.addFilesToWMBSInBulk(self.wmbsFilesToCreate,
                                                               self.wmSpec.name(),
                                                               isDBS=self.isDBS)
        # Add files to DBSBuffer
        self._createFilesInDBSBuffer()

        self.topLevelFileset.markOpen(block.get('IsOpen', False))
        return totalFiles

    def getMergeOutputMapping(self):
        """
        _getMergeOutputMapping_

        retrieves the relationship between primary
        dataset and merge output fileset ids for
        all merge tasks created
        """
        return self.mergeOutputMapping

    def _createWorkflowsInDBSBuffer(self):
        """
        _createWorkflowsInDBSBuffer_

        Register workflow information and settings in dbsbuffer for all
        tasks that will potentially produce any output in this spec.
        """

        for task in self.wmSpec.listOutputProducingTasks():
            workflow_id = self.dbsInsertWorkflow.execute(self.wmSpec.name(), task,
                                                         self.wmSpec.getBlockCloseMaxWaitTime(),
                                                         self.wmSpec.getBlockCloseMaxFiles(),
                                                         self.wmSpec.getBlockCloseMaxEvents(),
                                                         self.wmSpec.getBlockCloseMaxSize(),
                                                         conn=self.getDBConn(), transaction=self.existingTransaction())
            if task == self.topLevelTask.getPathName():
                self.topLevelTaskDBSBufferId = workflow_id

    def _createDatasetSubscriptionsInDBSBuffer(self):
        """
        _createDatasetSubscriptionsInDBSBuffer_

        Insert the subscriptions defined in the workload for the output
        datasets with the different options.
        """
        subInfo = self.wmSpec.getSubscriptionInformation()
        for dataset in subInfo:
            dbsDataset = DBSBufferDataset(path=dataset)
            dbsDataset.create()
            dbsDataset.addSubscription(subInfo[dataset])
        return

    def _createFilesInDBSBuffer(self):
        """
        _createFilesInDBSBuffer_

        It does the actual job of creating things in DBSBuffer

        """
        if len(self.dbsFilesToCreate) == 0:
            # Whoops, nothing to do!
            return

        dbsFileTuples = []
        dbsFileLoc = []
        dbsCksumBinds = []
        locationsToAdd = []
        selfChecksums = None

        # The first thing we need to do is add the datasetAlgo
        # Assume all files in a pass come from one datasetAlgo?
        if self.insertedBogusDataset == -1:
            self.insertedBogusDataset = self.dbsFilesToCreate[0].insertDatasetAlgo()

        for dbsFile in self.dbsFilesToCreate:
            # Append a tuple in the format specified by DBSBufferFiles.Add
            # Also run insertDatasetAlgo

            lfn = dbsFile['lfn']
            selfChecksums = dbsFile['checksums']

            newTuple = (lfn, dbsFile['size'],
                        dbsFile['events'], self.insertedBogusDataset,
                        dbsFile['status'], self.topLevelTaskDBSBufferId)

            if newTuple not in dbsFileTuples:
                dbsFileTuples.append(newTuple)

            if len(dbsFile['newlocations']) < 1:
                msg = ''
                msg += "File created without any locations!\n"
                msg += "File lfn: %s\n" % (lfn)
                msg += "Rejecting this group of files in DBS!\n"
                logging.error(msg)
                raise WorkQueueWMBSException(msg)

            for jobLocation in dbsFile['newlocations']:
                if jobLocation not in self.addedLocations:
                    # If we don't have it, try and add it
                    locationsToAdd.append(jobLocation)
                    self.addedLocations.append(jobLocation)
                dbsFileLoc.append({'lfn': lfn, 'pnn': jobLocation})

            if selfChecksums:
                # If we have checksums we have to create a bind
                # For each different checksum
                for entry in selfChecksums.keys():
                    dbsCksumBinds.append({'lfn': lfn, 'cksum': selfChecksums[entry],
                                          'cktype': entry})

        for jobLocation in locationsToAdd:
            self.dbsInsertLocation.execute(siteName=jobLocation,
                                           conn=self.getDBConn(),
                                           transaction=self.existingTransaction())

        self.dbsCreateFiles.execute(files=dbsFileTuples,
                                    conn=self.getDBConn(),
                                    transaction=self.existingTransaction())

        self.dbsSetLocation.execute(binds=dbsFileLoc,
                                    conn=self.getDBConn(),
                                    transaction=self.existingTransaction())

        if len(dbsCksumBinds) > 0:
            self.dbsSetChecksum.execute(bulkList=dbsCksumBinds,
                                        conn=self.getDBConn(),
                                        transaction=self.existingTransaction())

        # Now that we've created those files, clear the list
        self.dbsFilesToCreate = []
        return

    def _addToDBSBuffer(self, dbsFile, checksums, locations):
        """
        This step is just for increase the performance for
        Accountant doesn't neccessary to check the parentage
        """
        dbsBuffer = DBSBufferFile(lfn=dbsFile["LogicalFileName"],
                                  size=dbsFile["FileSize"],
                                  events=dbsFile["NumberOfEvents"],
                                  checksums=checksums,
                                  locations=locations,
                                  status="GLOBAL")
        dbsBuffer.setDatasetPath('bogus')
        dbsBuffer.setAlgorithm(appName="cmsRun", appVer="Unknown",
                               appFam="Unknown", psetHash="Unknown",
                               configContent="Unknown")

        if not dbsBuffer.exists():
            self.dbsFilesToCreate.append(dbsBuffer)
        # dbsBuffer.create()
        return

    def _addDBSFileToWMBSFile(self, dbsFile, storageElements, inFileset=True):
        """
        There are two assumptions made to make this method behave properly,
        1. DBS returns only one level of ParentList.
           If DBS returns multiple level of parentage, it will be still get handled.
           However that might not be what we wanted. In that case, restrict to one level.
        2. Assumes parents files are in the same location as child files.
           This is not True in general case, but workquue should only select work only
           where child and parent files are in the same location
        """
        wmbsParents = []
        dbsFile.setdefault("ParentList", [])
        for parent in dbsFile["ParentList"]:
            wmbsParents.append(self._addDBSFileToWMBSFile(parent,
                                                          storageElements, inFileset=False))

        checksums = {}
        if dbsFile.get('Checksum'):
            checksums['cksum'] = dbsFile['Checksum']
        if dbsFile.get('Adler32'):
            checksums['adler32'] = dbsFile['Adler32']

        wmbsFile = File(lfn=dbsFile["LogicalFileName"],
                        size=dbsFile["FileSize"],
                        events=dbsFile["NumberOfEvents"],
                        checksums=checksums,
                        # TODO: need to get list of parent lfn
                        parents=wmbsParents,
                        locations=set(storageElements))

        for lumi in dbsFile['LumiList']:
            if isinstance(lumi['LumiSectionNumber'], list):
                run = Run(lumi['RunNumber'], *lumi['LumiSectionNumber'])
            else:
                run = Run(lumi['RunNumber'], lumi['LumiSectionNumber'])
            wmbsFile.addRun(run)

        self._addToDBSBuffer(dbsFile, checksums, storageElements)

        logging.info("WMBS File: %s\n on Location: %s", wmbsFile['lfn'], wmbsFile['newlocations'])

        wmbsFile['inFileset'] = bool(inFileset)
        self.wmbsFilesToCreate.append(wmbsFile)

        return wmbsFile

    def _convertACDCFileToDBSFile(self, acdcFile):
        """
        convert ACDCFiles to dbs file format
        """
        dbsFile = {}
        dbsFile["LogicalFileName"] = acdcFile["lfn"]
        dbsFile["FileSize"] = acdcFile["size"]
        dbsFile["NumberOfEvents"] = acdcFile["events"]
        return dbsFile

    def _addACDCFileToWMBSFile(self, acdcFile, inFileset=True):
        """
        adds the ACDC files into WMBS database
        """
        wmbsParents = []
        for parent in acdcFile["parents"]:
            parent = self._addACDCFileToWMBSFile(DatastructFile(lfn=parent,
                                                                locations=acdcFile["locations"]),
                                                 inFileset=False)
            wmbsParents.append(parent)

        # pass empty check sum since it won't be updated to dbs anyway
        checksums = {}
        wmbsFile = File(lfn=str(acdcFile["lfn"]),
                        size=acdcFile["size"],
                        events=acdcFile["events"],
                        first_event=acdcFile.get('first_event', 0),
                        last_event=acdcFile.get('last_event', 0),
                        checksums=checksums,
                        parents=wmbsParents,
                        locations=acdcFile["locations"],
                        merged=acdcFile.get('merged', True))

        ## TODO need to get the lumi lists
        for run in acdcFile['runs']:
            wmbsFile.addRun(run)

        dbsFile = self._convertACDCFileToDBSFile(acdcFile)
        self._addToDBSBuffer(dbsFile, checksums, acdcFile["locations"])

        logging.info("WMBS File: %s\n on Location: %s", wmbsFile['lfn'], wmbsFile['newlocations'])

        wmbsFile['inFileset'] = bool(inFileset)

        self.wmbsFilesToCreate.append(wmbsFile)

        return wmbsFile

    def validFiles(self, files):
        """
        Apply lumi mask and or run white/black list and return files which have
        one or more of the requested lumis
        """
        runWhiteList = self.topLevelTask.inputRunWhitelist()
        runBlackList = self.topLevelTask.inputRunBlacklist()
        lumiMask = self.topLevelTask.getLumiMask()

        blackMask = None
        if lumiMask:  # We have a lumiMask, so use it and modify with run white/black list
            if runWhiteList:
                lumiMask.selectRuns(runWhiteList)
            if runBlackList:
                lumiMask.removeRuns(runBlackList)
        elif runWhiteList:  # We have a run whitelist, subtract off blacklist
            lumiMask = LumiList(runs=runWhiteList)
            if runBlackList:  # We only have a blacklist, so make a black mask out of it instead
                lumiMask.removeRuns(runBlackList)
        else:
            lumiMask = None
            if runBlackList:
                blackMask = LumiList(runs=runBlackList)

        results = []
        for f in files:
            if isinstance(f, basestring) or "LumiList" not in f:
                results.append(f)
                continue

            # Create a LumiList from the WMBS info
            runLumis = {}
            for x in f['LumiList']:
                if x['RunNumber'] in runLumis:
                    runLumis[x['RunNumber']].extend(x['LumiSectionNumber'])
                else:
                    runLumis[x['RunNumber']] = x['LumiSectionNumber']
            fileLumiList = LumiList(runsAndLumis=runLumis)

            if lumiMask:
                if fileLumiList & lumiMask:  # At least one lumi from file is in lumiMask
                    results.append(f)
            elif blackMask:
                if fileLumiList - blackMask:  # At least one lumi from file is not in blackMask
                    results.append(f)
            else:  # There is effectively no mask
                results.append(f)

        return results
示例#18
0
文件: Feeder.py 项目: lucacopa/WMCore
    def __call__(self, filesetToProcess):
        """
        The algorithm itself
        """
        global LOCK


        # Get configuration
        initObj = WMInit()
        initObj.setLogging()
        initObj.setDatabaseConnection(os.getenv("DATABASE"), \
            os.getenv('DIALECT'), os.getenv("DBSOCK"))

        myThread = threading.currentThread()

        daofactory = DAOFactory(package = "WMCore.WMBS" , \
              logger = myThread.logger, \
              dbinterface = myThread.dbi)

        locationNew = daofactory(classname = "Locations.New")
        getFileLoc = daofactory(classname = "Files.GetLocation")


        logging.debug("the T0Feeder is processing %s" % \
                 filesetToProcess.name)
        logging.debug("the fileset name %s" % \
         (filesetToProcess.name).split(":")[0])

        startRun = (filesetToProcess.name).split(":")[3]
        fileType = (filesetToProcess.name).split(":")[2]

        # url builder
        primaryDataset = ((filesetToProcess.name).split(":")[0]).split('/')[1]
        processedDataset = ((filesetToProcess.name).split(":")[0]).split('/')[2]
        dataTier = (((filesetToProcess.name\
            ).split(":")[0]).split('/')[3]).split('-')[0]

        # Fisrt call to T0 db for this fileset
        # Here add test for the closed fileset
        LASTIME = filesetToProcess.lastUpdate

        url = "/tier0/listfilesoverinterval/%s/%s/%s/%s/%s" % \
              (fileType, LASTIME, primaryDataset,processedDataset, dataTier)

        tries = 1
        while True:

            try:

                myRequester = JSONRequests(url = "vocms52.cern.ch:8889")
                requestResult = myRequester.get(\
             url+"/"+"?return_type=text/json%2Bdas")
                newFilesList = requestResult[0]["results"]

            except:

                logging.debug("T0Reader call error...")
                if tries == self.maxRetries:
                    return
                else:
                    tries += 1
                    continue

            logging.debug("T0ASTRun queries done ...")
            now = time.time()
            filesetToProcess.last_update = now
            LASTIME = int(newFilesList['end_time']) + 1

            break



        # process all files
        if len(newFilesList['files']):

            LOCK.acquire()

            try:
                locationNew.execute(siteName = "caf.cern.ch", seName = "caf.cern.ch")
            except Exception as e:
                logging.debug("Error when adding new location...")
                logging.debug(e)
                logging.debug( format_exc() )

            for files in newFilesList['files']:

                # Assume parents aren't asked
                newfile = File(str(files['lfn']), \
           size = files['file_size'], events = files['events'])


                try:
                    if newfile.exists() == False :
                        newfile.create()

                    else:
                        newfile.loadData()

                    #Add run test if already exist
                    for run in files['runs']:

                        if startRun != 'None' and int(startRun) <= int(run):

                            # ToDo: Distinguish between
                            # filestA-RunX and filesetA-Run[0-9]*
                            filesetRun = Fileset( name = (((\
                   filesetToProcess.name).split(':')[0]).split('/')[0]\
                   )+'/'+(((filesetToProcess.name).split(':')[0]).split\
                   ('/')[1])+'/'+(((filesetToProcess.name).split(':')[0]\
                   ).split('/')[2])+'/'+((((filesetToProcess.name).split\
                   (':')[0]).split('/')[3]).split('-')[0])+'-'+'Run'+str\
               (run)+":"+":".join((filesetToProcess.name).split(':')[1:] \
                                     ) )


                            if filesetRun.exists() == False :
                                filesetRun.create()

                            else:
                                filesetRun.loadData()

                            # Add test runs already there
                            # (for growing dataset) -
                            # to support file with different runs and lumi
                            if not newfile['runs']:

                                runSet = set()
                                runSet.add(Run( run, *files['runs'][run]))
                                newfile.addRunSet(runSet)

                            fileLoc = getFileLoc.execute(file = files['lfn'])

                            if 'caf.cern.ch' not in fileLoc:
                                newfile.setLocation("caf.cern.ch")


                            filesetRun.addFile(newfile)
                            logging.debug("new file created/loaded added by T0ASTRun...")
                            filesetRun.commit()

                except Exception as e:

                    logging.debug("Error when adding new files in T0ASTRun...")
                    logging.debug(e)
                    logging.debug( format_exc() )



                filesetToProcess.setLastUpdate\
              (int(newFilesList['end_time']) + 1)
                filesetToProcess.commit()

            LOCK.release()

        else:

            logging.debug("nothing to do...")
            # For re-opned fileset or empty, try until the purge time
            if (int(now)/3600 - LASTIME/3600) > self.reopenTime:

                filesetToProcess.setLastUpdate(time.time())
                filesetToProcess.commit()


        if LASTIME:

            myRequester = JSONRequests(url = "vocms52.cern.ch:8889")
            requestResult = myRequester.get("/tier0/runs")

            for listRun in requestResult[0]:

                if int(startRun) <= int(listRun['run']):

                    if listRun['status'] =='CloseOutExport' or \
           listRun['status'] =='Complete' or listRun['status'] ==\
                          'CloseOutT1Skimming':

                        closeFileset = Fileset( name = (((\
      filesetToProcess.name).split(':')[0]).split('/')[0])+'/'+\
     (((filesetToProcess.name).split(':')[0]).split('/')[1]\
     )+'/'+(((filesetToProcess.name).split(':')[0]).split('/')\
     [2])+'/'+((((filesetToProcess.name).split(':')[0]).split\
     ('/')[3]).split('-')[0])+'-'+'Run'+str(listRun['run'])\
     +":"+":".join((filesetToProcess.name).split(':')[1:] ) )

                        if closeFileset.exists() != False :

                            closeFileset = Fileset( id = closeFileset.exists())
                            closeFileset.loadData()

                            if closeFileset.open == True:
                                closeFileset.markOpen(False)


        # Commit the fileset
        filesetToProcess.commit()


        # Commit the fileset
        logging.debug("Test purge in T0ASTRun ...")
        filesetToProcess.load()
        LASTIME = filesetToProcess.lastUpdate

        if (int(now)/3600 - LASTIME/3600) > self.purgeTime:

            filesetToProcess.markOpen(False)
            logging.debug("Purge Done...")

        filesetToProcess.commit()
示例#19
0
文件: Feeder.py 项目: ticoann/WMCore
                        closeFileset = Fileset( name = (((\
      filesetToProcess.name).split(':')[0]).split('/')[0])+'/'+\
     (((filesetToProcess.name).split(':')[0]).split('/')[1]\
     )+'/'+(((filesetToProcess.name).split(':')[0]).split('/')\
     [2])+'/'+((((filesetToProcess.name).split(':')[0]).split\
     ('/')[3]).split('-')[0])+'-'+'Run'+str(listRun['run'])\
     +":"+":".join((filesetToProcess.name).split(':')[1:] ) )

                        if closeFileset.exists() != False:

                            closeFileset = Fileset(id=closeFileset.exists())
                            closeFileset.loadData()

                            if closeFileset.open == True:
                                closeFileset.markOpen(False)

        # Commit the fileset
        filesetToProcess.commit()

        # Commit the fileset
        logging.debug("Test purge in T0ASTRun ...")
        filesetToProcess.load()
        LASTIME = filesetToProcess.lastUpdate

        if (int(now) / 3600 - LASTIME / 3600) > self.purgeTime:

            filesetToProcess.markOpen(False)
            logging.debug("Purge Done...")

        filesetToProcess.commit()
示例#20
0
    def createSubscription(self, topLevelFilesetName = None, task = None,
                           fileset = None):
        """
        _createSubscription_

        Create subscriptions in WMBS for all the tasks in the spec.  This
        includes filesets, workflows and the output map for each task.
        """
        if task == None or fileset == None:
            self.createTopLevelFileset(topLevelFilesetName)
            sub = None
            for topLevelTask in self.wmSpec.getTopLevelTask():
                sub = self.createSubscription(topLevelFilesetName,
                                              topLevelTask,
                                              self.topLevelFileset)
            return sub

        # create runtime sandbox for workflow
        self.createSandbox()

        workflow = Workflow(spec = self.wmSpec.specUrl(), owner = self.wmSpec.getOwner()["name"],
                            dn = self.wmSpec.getOwner().get("dn", None),
                            group = self.wmSpec.getOwner().get("group", None),                            
                            owner_vogroup = self.wmSpec.getOwner().get("vogroup", ''),
                            owner_vorole = self.wmSpec.getOwner().get("vorole", ''),
                            name = self.wmSpec.name(), task = task.getPathName(),
                            wfType = self.wmSpec.getDashboardActivity())
        workflow.create()
        subscription = Subscription(fileset = fileset, workflow = workflow,
                                    split_algo = task.jobSplittingAlgorithm(),
                                    type = task.taskType())
        subscription.create()
        for site in task.siteWhitelist():
            subscription.addWhiteBlackList([{"site_name": site, "valid": True}])

        for site in task.siteBlacklist():
            subscription.addWhiteBlackList([{"site_name": site, "valid": False}])            
        
        if self.topLevelSubscription == None:
            self.topLevelSubscription = subscription
            logging.info("Top level subscription created: %s" % subscription["id"])
        else:
            logging.info("Child subscription created: %s" % subscription["id"])
        
        outputModules = task.getOutputModulesForTask()
        for outputModule in outputModules:
            for outputModuleName in outputModule.listSections_():
                outputFileset = Fileset(self.outputFilesetName(task, outputModuleName))
                outputFileset.create()
                outputFileset.markOpen(True)
                mergedOutputFileset = None
                
                for childTask in task.childTaskIterator():
                    if childTask.data.input.outputModule == outputModuleName:
                        if childTask.taskType() == "Merge":
                            mergedOutputFileset = Fileset(self.outputFilesetName(childTask, "Merged"))
                            mergedOutputFileset.create()
                            mergedOutputFileset.markOpen(True)
                                                         
                        self.createSubscription(topLevelFilesetName, childTask, outputFileset) 

                if mergedOutputFileset == None:
                    workflow.addOutput(outputModuleName, outputFileset,
                                       outputFileset)
                else:
                    workflow.addOutput(outputModuleName, outputFileset,
                                       mergedOutputFileset)
            
        return self.topLevelSubscription
示例#21
0
class WMBSMergeBySize(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Boiler plate DB setup.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=["WMCore.WMBS"],
                                useDefault=False)

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)
        return

    def tearDown(self):
        """
        _tearDown_

        Clear out WMBS.
        """
        self.testInit.clearDatabase()
        return

    def stuffWMBS(self, injected=True):
        """
        _stuffWMBS_

        Insert some dummy jobs, jobgroups, filesets, files and subscriptions
        into WMBS to test job creation.  Three completed job groups each
        containing several files are injected.  Another incomplete job group is
        also injected.  Also files are added to the "Mergeable" subscription as
        well as to the output fileset for their jobgroups.
        """
        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="T2_CH_CERN", pnn="T2_CH_CERN")
        locationAction.execute(siteName="T1_US_FNAL", pnn="T2_CH_CERN")

        changeStateDAO = self.daoFactory(classname="Jobs.ChangeState")

        self.mergeFileset = Fileset(name="mergeFileset")
        self.mergeFileset.create()
        self.bogusFileset = Fileset(name="bogusFileset")
        self.bogusFileset.create()

        self.mergeMergedFileset = Fileset(name="mergeMergedFileset")
        self.mergeMergedFileset.create()
        self.bogusMergedFileset = Fileset(name="bogusMergedFileset")
        self.bogusMergedFileset.create()

        mergeWorkflow = Workflow(name="mergeWorkflow", spec="bunk2",
                                 owner="Steve", task="Test")
        mergeWorkflow.create()
        markWorkflow = self.daoFactory(classname="Workflow.MarkInjectedWorkflows")
        markWorkflow.execute(names=[mergeWorkflow.name], injected=injected)

        self.mergeSubscription = Subscription(fileset=self.mergeFileset,
                                              workflow=mergeWorkflow,
                                              split_algo="WMBSMergeBySize")
        self.mergeSubscription.create()
        self.bogusSubscription = Subscription(fileset=self.bogusFileset,
                                              workflow=mergeWorkflow,
                                              split_algo="WMBSMergeBySize")

        inputFileset = Fileset(name="inputFileset")
        inputFileset.create()

        inputWorkflow = Workflow(name="inputWorkflow", spec="input",
                                 owner="Steve", task="Test")
        inputWorkflow.create()
        inputWorkflow.addOutput("output", self.mergeFileset,
                                self.mergeMergedFileset)
        inputWorkflow.addOutput("output2", self.bogusFileset,
                                self.bogusMergedFileset)
        bogusInputWorkflow = Workflow(name="bogusInputWorkflow", spec="input",
                                      owner="Steve", task="Test")
        bogusInputWorkflow.create()

        inputSubscription = Subscription(fileset=inputFileset,
                                         workflow=inputWorkflow)
        inputSubscription.create()
        bogusInputSubscription = Subscription(fileset=inputFileset,
                                              workflow=bogusInputWorkflow)
        bogusInputSubscription.create()

        parentFile1 = File(lfn="parentFile1")
        parentFile1.create()
        parentFile2 = File(lfn="parentFile2")
        parentFile2.create()
        parentFile3 = File(lfn="parentFile3")
        parentFile3.create()
        parentFile4 = File(lfn="parentFile4")
        parentFile4.create()
        self.parentFileSite2 = File(lfn="parentFileSite2")
        self.parentFileSite2.create()

        jobGroup1 = JobGroup(subscription=inputSubscription)
        jobGroup1.create()
        jobGroup2 = JobGroup(subscription=inputSubscription)
        jobGroup2.create()
        jobGroup3 = JobGroup(subscription=bogusInputSubscription)
        jobGroup3.create()

        testJob1 = Job()
        testJob1.addFile(parentFile1)
        testJob1.create(jobGroup1)
        testJob1["state"] = "cleanout"
        testJob1["oldstate"] = "new"
        testJob1["couch_record"] = "somejive"
        testJob1["retry_count"] = 0
        testJob1["outcome"] = "success"
        testJob1.save()
        changeStateDAO.execute([testJob1])

        testJob1A = Job()
        testJob1A.addFile(parentFile1)
        testJob1A.create(jobGroup3)
        testJob1A["state"] = "cleanout"
        testJob1A["oldstate"] = "new"
        testJob1A["couch_record"] = "somejive"
        testJob1A["retry_count"] = 0
        testJob1A["outcome"] = "failure"
        testJob1A.save()
        changeStateDAO.execute([testJob1A])

        testJob2 = Job()
        testJob2.addFile(parentFile2)
        testJob2.create(jobGroup1)
        testJob2["state"] = "cleanout"
        testJob2["oldstate"] = "new"
        testJob2["couch_record"] = "somejive"
        testJob2["retry_count"] = 0
        testJob2["outcome"] = "success"
        testJob2.save()
        changeStateDAO.execute([testJob2])

        testJob3 = Job()
        testJob3.addFile(parentFile3)
        testJob3.create(jobGroup2)
        testJob3["state"] = "cleanout"
        testJob3["oldstate"] = "new"
        testJob3["couch_record"] = "somejive"
        testJob3["retry_count"] = 0
        testJob3["outcome"] = "success"
        testJob3.save()
        changeStateDAO.execute([testJob3])

        testJob4 = Job()
        testJob4.addFile(parentFile4)
        testJob4.create(jobGroup2)
        testJob4["state"] = "cleanout"
        testJob4["oldstate"] = "new"
        testJob4["couch_record"] = "somejive"
        testJob4["retry_count"] = 0
        testJob4["outcome"] = "failure"
        testJob4.save()
        changeStateDAO.execute([testJob4])

        # We'll simulate a failed split by event job that the merger should
        # ignore.
        parentFile5 = File(lfn="parentFile5")
        parentFile5.create()

        testJob5 = Job()
        testJob5.addFile(parentFile5)
        testJob5.create(jobGroup2)
        testJob5["state"] = "cleanout"
        testJob5["oldstate"] = "new"
        testJob5["couch_record"] = "somejive"
        testJob5["retry_count"] = 0
        testJob5["outcome"] = "success"
        testJob5.save()
        changeStateDAO.execute([testJob5])

        testJob6 = Job()
        testJob6.addFile(parentFile5)
        testJob6.create(jobGroup2)
        testJob6["state"] = "cleanout"
        testJob6["oldstate"] = "new"
        testJob6["couch_record"] = "somejive"
        testJob6["retry_count"] = 0
        testJob6["outcome"] = "failure"
        testJob6.save()
        changeStateDAO.execute([testJob6])

        testJob7 = Job()
        testJob7.addFile(self.parentFileSite2)
        testJob7.create(jobGroup2)
        testJob7["state"] = "cleanout"
        testJob7["oldstate"] = "new"
        testJob7["couch_record"] = "somejive"
        testJob7["retry_count"] = 0
        testJob7["outcome"] = "success"
        testJob7.save()
        changeStateDAO.execute([testJob7])

        badFile1 = File(lfn="badFile1", size=10241024, events=10241024,
                        first_event=0, locations={"T2_CH_CERN"})
        badFile1.addRun(Run(1, *[45]))
        badFile1.create()
        badFile1.addParent(parentFile5["lfn"])

        file1 = File(lfn="file1", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        file1.addRun(Run(1, *[45]))
        file1.create()
        file1.addParent(parentFile1["lfn"])
        file2 = File(lfn="file2", size=1024, events=1024,
                     first_event=1024, locations={"T2_CH_CERN"})
        file2.addRun(Run(1, *[45]))
        file2.create()
        file2.addParent(parentFile1["lfn"])
        file3 = File(lfn="file3", size=1024, events=1024,
                     first_event=2048, locations={"T2_CH_CERN"})
        file3.addRun(Run(1, *[45]))
        file3.create()
        file3.addParent(parentFile1["lfn"])
        file4 = File(lfn="file4", size=1024, events=1024,
                     first_event=3072, locations={"T2_CH_CERN"})
        file4.addRun(Run(1, *[45]))
        file4.create()
        file4.addParent(parentFile1["lfn"])

        fileA = File(lfn="fileA", size=1024, events=1024,
                     first_event=0, locations={"T2_CH_CERN"})
        fileA.addRun(Run(1, *[46]))
        fileA.create()
        fileA.addParent(parentFile2["lfn"])
        fileB = File(lfn="fileB", size=1024, events=1024,
                     first_event=1024, locations={"T2_CH_CERN"})
        fileB.addRun(Run(1, *[46]))
        fileB.create()
        fileB.addParent(parentFile2["lfn"])
        fileC = File(lfn="fileC", size=1024, events=1024,
                     first_event=2048, locations={"T2_CH_CERN"})
        fileC.addRun(Run(1, *[46]))
        fileC.create()
        fileC.addParent(parentFile2["lfn"])

        fileI = File(lfn="fileI", size=1024, events=1024,
                     first_event=0, locations={"T2_CH_CERN"})
        fileI.addRun(Run(2, *[46]))
        fileI.create()
        fileI.addParent(parentFile3["lfn"])
        fileII = File(lfn="fileII", size=1024, events=1024,
                      first_event=1024, locations={"T2_CH_CERN"})
        fileII.addRun(Run(2, *[46]))
        fileII.create()
        fileII.addParent(parentFile3["lfn"])
        fileIII = File(lfn="fileIII", size=1024, events=1024,
                       first_event=2048, locations={"T2_CH_CERN"})
        fileIII.addRun(Run(2, *[46]))
        fileIII.create()
        fileIII.addParent(parentFile3["lfn"])
        fileIV = File(lfn="fileIV", size=1024, events=1024,
                      first_event=3072, locations={"T2_CH_CERN"})
        fileIV.addRun(Run(2, *[46]))
        fileIV.create()
        fileIV.addParent(parentFile3["lfn"])

        fileX = File(lfn="badFileA", size=1024, events=1024,
                     first_event=0, locations={"T2_CH_CERN"})
        fileX.addRun(Run(1, *[47]))
        fileX.create()
        fileX.addParent(parentFile4["lfn"])
        fileY = File(lfn="badFileB", size=1024, events=1024,
                     first_event=1024, locations={"T2_CH_CERN"})
        fileY.addRun(Run(1, *[47]))
        fileY.create()
        fileY.addParent(parentFile4["lfn"])
        fileZ = File(lfn="badFileC", size=1024, events=1024,
                     first_event=2048, locations={"T2_CH_CERN"})
        fileZ.addRun(Run(1, *[47]))
        fileZ.create()
        fileZ.addParent(parentFile4["lfn"])

        jobGroup1.output.addFile(file1)
        jobGroup1.output.addFile(file2)
        jobGroup1.output.addFile(file3)
        jobGroup1.output.addFile(file4)
        jobGroup1.output.addFile(fileA)
        jobGroup1.output.addFile(fileB)
        jobGroup1.output.addFile(fileC)
        jobGroup1.output.commit()

        jobGroup2.output.addFile(fileI)
        jobGroup2.output.addFile(fileII)
        jobGroup2.output.addFile(fileIII)
        jobGroup2.output.addFile(fileIV)
        jobGroup2.output.addFile(fileX)
        jobGroup2.output.addFile(fileY)
        jobGroup2.output.addFile(fileZ)
        jobGroup2.output.addFile(badFile1)
        jobGroup2.output.commit()

        for fileObj in [file1, file2, file3, file4, fileA, fileB, fileC, fileI,
                        fileII, fileIII, fileIV, fileX, fileY, fileZ, badFile1]:
            self.mergeFileset.addFile(fileObj)
            self.bogusFileset.addFile(fileObj)

        self.mergeFileset.commit()
        self.bogusFileset.commit()

        return

    def testMinMergeSize1(self):
        """
        _testMinMergeSize1_

        Set the minimum merge size to be 20,000 bytes which is more than the
        sum of all file sizes in the WMBS instance.  Verify that no merge jobs
        will be produced.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=20000, max_merge_size=2000000000,
                            max_merge_events=200000000)

        assert len(result) == 0, \
            "ERROR: No job groups should be returned."

        return

    def testMinMergeSize1a(self):
        """
        _testMinMergeSize1a_

        Set the minimum merge size to be 20,000 bytes which is more than the
        sum of all file sizes in the WMBS instance and mark the fileset as
        closed.  Verify that one job containing all files is pushed out.
        """
        self.stuffWMBS()
        self.mergeFileset.markOpen(False)

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=20000, max_merge_size=200000,
                            max_merge_events=20000)

        assert len(result) == 1, \
            "ERROR: More than one JobGroup returned: %s" % len(result)

        assert len(result[0].jobs) == 2, \
            "Error: Two jobs should have been returned."

        goldenFilesA = ["file1", "file2", "file3", "file4", "fileA", "fileB",
                        "fileC"]
        goldenFilesB = ["fileI", "fileII", "fileIII", "fileIV"]

        for job in result[0].jobs:

            self.assertEqual(job["possiblePSN"], {"T1_US_FNAL", "T2_CH_CERN"})

            jobFiles = job.getFiles()

            if len(jobFiles) == len(goldenFilesA):
                self.assertEqual(job["estimatedDiskUsage"], 7)
                goldenFiles = goldenFilesA
            else:
                self.assertEqual(job["estimatedDiskUsage"], 4)
                goldenFiles = goldenFilesB

            currentRun = 0
            currentLumi = 0
            currentEvent = 0
            for fileObj in jobFiles:
                fileObj.loadData()
                assert fileObj["lfn"] in goldenFiles, \
                    "Error: Unknown file: %s" % fileObj["lfn"]
                goldenFiles.remove(fileObj["lfn"])

                fileRun = list(fileObj["runs"])[0].run
                fileLumi = min(list(fileObj["runs"])[0])
                fileEvent = fileObj["first_event"]

                if currentRun == 0:
                    currentRun = fileRun
                    currentLumi = fileLumi
                    currentEvent = fileEvent
                    continue

                assert fileRun >= currentRun, \
                    "ERROR: Files not sorted by run."

                if fileRun == currentRun:
                    assert fileLumi >= currentLumi, \
                        "ERROR: Files not ordered by lumi"

                if fileLumi == currentLumi:
                    assert fileEvent >= currentEvent, \
                        "ERROR: Files not ordered by first event"

                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent

        return

    def testMinMergeSize2(self):
        """
        _testMinMergeSize2_

        Set the minimum merge size to be 7,167 bytes which is one byte less
        than the sum of all the file sizes in the largest merge group in the
        WMBS instance.  Verify that one merge job containing all the files in
        the largest merge group is produced.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=7167, max_merge_size=20000,
                            max_merge_events=20000)

        assert len(result) == 1, \
            "ERROR: More than one JobGroup returned: %d" % len(result)

        assert len(result[0].jobs) == 1, \
            "ERROR: One job should have been returned."

        self.assertEqual(result[0].jobs[0]["estimatedDiskUsage"], 7)

        self.assertEqual(result[0].jobs[0]["possiblePSN"], {"T1_US_FNAL", "T2_CH_CERN"})

        jobFiles = list(result[0].jobs)[0].getFiles()

        goldenFiles = ["file1", "file2", "file3", "file4", "fileA", "fileB",
                       "fileC"]

        assert len(jobFiles) == len(goldenFiles), \
            "ERROR: Merge job should contain %d files." % len(goldenFiles)

        currentRun = 0
        currentLumi = 0
        currentEvent = 0
        for fileObj in jobFiles:
            assert fileObj["lfn"] in goldenFiles, \
                "Error: Unknown file: %s" % fileObj["lfn"]
            goldenFiles.remove(fileObj["lfn"])

            fileRun = list(fileObj["runs"])[0].run
            fileLumi = min(list(fileObj["runs"])[0])
            fileEvent = fileObj["first_event"]

            if currentRun == 0:
                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent
                continue

            assert fileRun >= currentRun, \
                "ERROR: Files not sorted by run."

            if fileRun == currentRun:
                assert fileLumi >= currentLumi, \
                    "ERROR: Files not ordered by lumi"

                if fileLumi == currentLumi:
                    assert fileEvent >= currentEvent, \
                        "ERROR: Files not ordered by first event"

            currentRun = fileRun
            currentLumi = fileLumi
            currentEvent = fileEvent

        return

    def testMaxMergeSize1(self):
        """
        _testMaxMergeSize1_

        Set the maximum merge size to be two bytes.  Verify that three merge
        jobs are created, one for each job group that exists inside the WMBS
        instance.  Verify that each merge job contains the expected files.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=1, max_merge_size=2,
                            max_merge_events=20000)

        assert len(result) == 1, \
            "ERROR: More than one JobGroup returned: %s" % result

        assert len(result[0].jobs) == 3, \
            "ERROR: Three jobs should have been returned."

        self.assertEqual(result[0].jobs[0]["possiblePSN"], {"T1_US_FNAL", "T2_CH_CERN"})

        goldenFilesA = ["file1", "file2", "file3", "file4"]
        goldenFilesB = ["fileA", "fileB", "fileC"]
        goldenFilesC = ["fileI", "fileII", "fileIII", "fileIV"]

        for job in result[0].jobs:
            jobFiles = job.getFiles()

            if jobFiles[0]["lfn"] in goldenFilesA:
                self.assertEqual(job["estimatedDiskUsage"], 4)
                goldenFiles = goldenFilesA
            elif jobFiles[0]["lfn"] in goldenFilesB:
                self.assertEqual(job["estimatedDiskUsage"], 3)
                goldenFiles = goldenFilesB
            else:
                self.assertEqual(job["estimatedDiskUsage"], 4)
                goldenFiles = goldenFilesC

            currentRun = 0
            currentLumi = 0
            currentEvent = 0
            for fileObj in jobFiles:
                assert fileObj["lfn"] in goldenFiles, \
                    "Error: Unknown file in merge jobs."
                goldenFiles.remove(fileObj["lfn"])

                fileRun = list(fileObj["runs"])[0].run
                fileLumi = min(list(fileObj["runs"])[0])
                fileEvent = fileObj["first_event"]

                if currentRun == 0:
                    continue

                assert fileRun >= currentRun, \
                    "ERROR: Files not sorted by run."

                if fileRun == currentRun:
                    assert fileLumi >= currentLumi, \
                        "ERROR: Files not ordered by lumi"

                    if fileLumi == currentLumi:
                        assert fileEvent >= currentEvent, \
                            "ERROR: Files not ordered by first event"

        assert len(goldenFilesA) == 0 and len(goldenFilesB) == 0 and \
               len(goldenFilesC) == 0, \
            "ERROR: Files missing from merge jobs."

        return

    def testMaxMergeSize2(self):
        """
        _testMaxMergeSize2_

        Set the minimum merge size to be one byte larger than the largest job
        group in the WMBS instance and the max merge size to be one byte larger
        than the total size of two of the groups.  Verify that one merge job
        is produced with two of the job groups in it.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=4097, max_merge_size=7169,
                            max_merge_events=20000)

        assert len(result) == 1, \
            "ERROR: More than one JobGroup returned."

        assert len(result[0].jobs) == 1, \
            "ERROR: One job should have been returned."

        goldenFilesA = ["file1", "file2", "file3", "file4"]
        goldenFilesB = ["fileA", "fileB", "fileC"]
        goldenFilesC = ["fileI", "fileII", "fileIII", "fileIV"]

        self.assertEqual(result[0].jobs[0]["estimatedDiskUsage"], 7)

        self.assertEqual(result[0].jobs[0]["possiblePSN"], {"T1_US_FNAL", "T2_CH_CERN"})

        jobFiles = list(result[0].jobs)[0].getFiles()

        currentRun = 0
        currentLumi = 0
        currentEvent = 0
        for fileObj in jobFiles:

            if fileObj["lfn"] in goldenFilesA:
                goldenFilesA.remove(fileObj["lfn"])
            elif fileObj["lfn"] in goldenFilesB:
                goldenFilesB.remove(fileObj["lfn"])
            elif fileObj["lfn"] in goldenFilesC:
                goldenFilesC.remove(fileObj["lfn"])

            fileRun = list(fileObj["runs"])[0].run
            fileLumi = min(list(fileObj["runs"])[0])
            fileEvent = fileObj["first_event"]

            if currentRun == 0:
                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent
                continue

            assert fileRun >= currentRun, \
                "ERROR: Files not sorted by run."

            if fileRun == currentRun:
                assert fileLumi >= currentLumi, \
                    "ERROR: Files not ordered by lumi"

                if fileLumi == currentLumi:
                    assert fileEvent >= currentEvent, \
                        "ERROR: Files not ordered by first event"

            currentRun = fileRun
            currentLumi = fileLumi
            currentEvent = fileEvent

        assert len(goldenFilesB) == 0 and \
               (len(goldenFilesA) == 0 or len(goldenFilesC) == 0), \
            "ERROR: Files not allocated to jobs correctly."

        return

    def testMaxEvents1(self):
        """
        _testMaxEvents1_

        Set the maximum number of events per merge job to 1.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=1, max_merge_size=20000,
                            max_merge_events=1)

        assert len(result) == 1, \
            "ERROR: More than one JobGroup returned: %s" % result

        assert len(result[0].jobs) == 3, \
            "ERROR: Three jobs should have been returned: %s" % len(result[0].jobs)

        goldenFilesA = ["file1", "file2", "file3", "file4"]
        goldenFilesB = ["fileA", "fileB", "fileC"]
        goldenFilesC = ["fileI", "fileII", "fileIII", "fileIV"]

        for job in result[0].jobs:

            self.assertEqual(job["possiblePSN"], {"T1_US_FNAL", "T2_CH_CERN"})

            jobFiles = job.getFiles()

            if jobFiles[0]["lfn"] in goldenFilesA:
                self.assertEqual(job["estimatedDiskUsage"], 4)
                goldenFiles = goldenFilesA
            elif jobFiles[0]["lfn"] in goldenFilesB:
                self.assertEqual(job["estimatedDiskUsage"], 3)
                goldenFiles = goldenFilesB
            else:
                self.assertEqual(job["estimatedDiskUsage"], 4)
                goldenFiles = goldenFilesC

            currentRun = 0
            currentLumi = 0
            currentEvent = 0
            for fileObj in jobFiles:
                assert fileObj["lfn"] in goldenFiles, \
                    "Error: Unknown file in merge jobs."
                goldenFiles.remove(fileObj["lfn"])

                fileRun = list(fileObj["runs"])[0].run
                fileLumi = min(list(fileObj["runs"])[0])
                fileEvent = fileObj["first_event"]

                if currentRun == 0:
                    currentRun = fileRun
                    currentLumi = fileLumi
                    currentEvent = fileEvent
                    continue

                assert fileRun >= currentRun, \
                    "ERROR: Files not sorted by run: %s, %s" % (fileRun, currentRun)

                if fileRun == currentRun:
                    assert fileLumi >= currentLumi, \
                        "ERROR: Files not ordered by lumi"

                    if fileLumi == currentLumi:
                        assert fileEvent >= currentEvent, \
                            "ERROR: Files not ordered by first event"

                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent

        assert len(goldenFilesA) == 0 and len(goldenFilesB) == 0 and \
               len(goldenFilesC) == 0, \
            "ERROR: Files missing from merge jobs."

        return

    def testMaxEvents2(self):
        """
        _testMaxEvents2_

        Set the minimum merge size to be one byte larger than the largest job
        group in the WMBS instance and the max events to be one event larger
        than the total events in two of the groups.  Verify that one merge job
        is produced with two of the job groups in it.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=4097, max_merge_size=20000,
                            max_merge_events=7169)

        assert len(result) == 1, \
            "ERROR: More than one JobGroup returned."

        assert len(result[0].jobs) == 1, \
            "ERROR: One job should have been returned."

        self.assertEqual(result[0].jobs[0]["estimatedDiskUsage"], 7)

        self.assertEqual(result[0].jobs[0]["possiblePSN"], {"T1_US_FNAL", "T2_CH_CERN"})

        goldenFilesA = ["file1", "file2", "file3", "file4"]
        goldenFilesB = ["fileA", "fileB", "fileC"]
        goldenFilesC = ["fileI", "fileII", "fileIII", "fileIV"]

        jobFiles = list(result[0].jobs)[0].getFiles()

        currentRun = 0
        currentLumi = 0
        currentEvent = 0
        for fileObj in jobFiles:

            if fileObj["lfn"] in goldenFilesA:
                goldenFilesA.remove(fileObj["lfn"])
            elif fileObj["lfn"] in goldenFilesB:
                goldenFilesB.remove(fileObj["lfn"])
            elif fileObj["lfn"] in goldenFilesC:
                goldenFilesC.remove(fileObj["lfn"])

            fileRun = list(fileObj["runs"])[0].run
            fileLumi = min(list(fileObj["runs"])[0])
            fileEvent = fileObj["first_event"]

            if currentRun == 0:
                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent
                continue

            assert fileRun >= currentRun, \
                "ERROR: Files not sorted by run."

            if fileRun == currentRun:
                assert fileLumi >= currentLumi, \
                    "ERROR: Files not ordered by lumi"

                if fileLumi == currentLumi:
                    assert fileEvent >= currentEvent, \
                        "ERROR: Files not ordered by first event"

            currentRun = fileRun
            currentLumi = fileLumi
            currentEvent = fileEvent

        assert len(goldenFilesB) == 0 and \
               (len(goldenFilesA) == 0 or len(goldenFilesC) == 0), \
            "ERROR: Files not allocated to jobs correctly."

        return

    def testParallelProcessing(self):
        """
        _testParallelProcessing_

        Verify that merging works correctly when multiple processing
        subscriptions are run over the same input files.  The merging algorithm
        should ignore processing jobs that feed into different merge
        subscriptions.
        """
        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="T2_CH_CERN", pnn="T2_CH_CERN")
        locationAction.execute(siteName="T1_US_FNAL", pnn="T2_CH_CERN")

        mergeFilesetA = Fileset(name="mergeFilesetA")
        mergeFilesetB = Fileset(name="mergeFilesetB")
        mergeFilesetA.create()
        mergeFilesetB.create()

        mergeMergedFilesetA = Fileset(name="mergeMergedFilesetA")
        mergeMergedFilesetB = Fileset(name="mergeMergedFilesetB")
        mergeMergedFilesetA.create()
        mergeMergedFilesetB.create()

        mergeWorkflow = Workflow(name="mergeWorkflow", spec="bogus",
                                 owner="Steve", task="Test")
        mergeWorkflow.create()

        mergeSubscriptionA = Subscription(fileset=mergeFilesetA,
                                          workflow=mergeWorkflow,
                                          split_algo="WMBSMergeBySize")
        mergeSubscriptionB = Subscription(fileset=mergeFilesetB,
                                          workflow=mergeWorkflow,
                                          split_algo="WMBSMergeBySize")
        mergeSubscriptionA.create()
        mergeSubscriptionB.create()

        inputFileset = Fileset(name="inputFileset")
        inputFileset.create()

        inputFileA = File(lfn="inputLFNA")
        inputFileB = File(lfn="inputLFNB")
        inputFileA.create()
        inputFileB.create()

        procWorkflowA = Workflow(name="procWorkflowA", spec="bunk2",
                                 owner="Steve", task="Test")
        procWorkflowA.create()
        procWorkflowA.addOutput("output", mergeFilesetA, mergeMergedFilesetA)
        procWorkflowB = Workflow(name="procWorkflowB", spec="bunk3",
                                 owner="Steve", task="Test2")
        procWorkflowB.create()
        procWorkflowB.addOutput("output", mergeFilesetB, mergeMergedFilesetB)

        procSubscriptionA = Subscription(fileset=inputFileset,
                                         workflow=procWorkflowA,
                                         split_algo="EventBased")
        procSubscriptionA.create()
        procSubscriptionB = Subscription(fileset=inputFileset,
                                         workflow=procWorkflowB,
                                         split_algo="EventBased")
        procSubscriptionB.create()

        jobGroupA = JobGroup(subscription=procSubscriptionA)
        jobGroupA.create()
        jobGroupB = JobGroup(subscription=procSubscriptionB)
        jobGroupB.create()

        changeStateDAO = self.daoFactory(classname="Jobs.ChangeState")

        testJobA = Job()
        testJobA.addFile(inputFileA)
        testJobA.create(jobGroupA)
        testJobA["state"] = "cleanout"
        testJobA["oldstate"] = "new"
        testJobA["couch_record"] = "somejive"
        testJobA["retry_count"] = 0
        testJobA["outcome"] = "success"
        testJobA.save()

        testJobB = Job()
        testJobB.addFile(inputFileB)
        testJobB.create(jobGroupA)
        testJobB["state"] = "cleanout"
        testJobB["oldstate"] = "new"
        testJobB["couch_record"] = "somejive"
        testJobB["retry_count"] = 0
        testJobB["outcome"] = "success"
        testJobB.save()

        testJobC = Job()
        testJobC.addFile(inputFileA)
        testJobC.create(jobGroupB)
        testJobC["state"] = "cleanout"
        testJobC["oldstate"] = "new"
        testJobC["couch_record"] = "somejive"
        testJobC["retry_count"] = 0
        testJobC["outcome"] = "success"
        testJobC.save()

        testJobD = Job()
        testJobD.addFile(inputFileA)
        testJobD.create(jobGroupB)
        testJobD["state"] = "cleanout"
        testJobD["oldstate"] = "new"
        testJobD["couch_record"] = "somejive"
        testJobD["retry_count"] = 0
        testJobD["outcome"] = "failure"
        testJobD.save()

        testJobE = Job()
        testJobE.addFile(inputFileB)
        testJobE.create(jobGroupB)
        testJobE["state"] = "cleanout"
        testJobE["oldstate"] = "new"
        testJobE["couch_record"] = "somejive"
        testJobE["retry_count"] = 0
        testJobE["outcome"] = "success"
        testJobE.save()

        testJobF = Job()
        testJobF.addFile(inputFileB)
        testJobF.create(jobGroupB)
        testJobF["state"] = "cleanout"
        testJobF["oldstate"] = "new"
        testJobF["couch_record"] = "somejive"
        testJobF["retry_count"] = 0
        testJobF["outcome"] = "failure"
        testJobF.save()

        changeStateDAO.execute([testJobA, testJobB, testJobC, testJobD,
                                testJobE, testJobF])

        fileA = File(lfn="fileA", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        fileA.addRun(Run(1, *[45]))
        fileA.create()
        fileA.addParent(inputFileA["lfn"])
        fileB = File(lfn="fileB", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        fileB.addRun(Run(1, *[45]))
        fileB.create()
        fileB.addParent(inputFileB["lfn"])

        jobGroupA.output.addFile(fileA)
        jobGroupA.output.addFile(fileB)
        jobGroupA.output.commit()

        mergeFilesetA.addFile(fileA)
        mergeFilesetA.addFile(fileB)
        mergeFilesetA.commit()

        fileC = File(lfn="fileC", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        fileC.addRun(Run(1, *[45]))
        fileC.create()
        fileC.addParent(inputFileA["lfn"])
        fileD = File(lfn="fileD", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        fileD.addRun(Run(1, *[45]))
        fileD.create()
        fileD.addParent(inputFileB["lfn"])

        jobGroupB.output.addFile(fileC)
        jobGroupB.output.addFile(fileD)

        mergeFilesetB.addFile(fileC)
        mergeFilesetB.addFile(fileD)
        mergeFilesetB.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=mergeSubscriptionB)

        result = jobFactory(min_merge_size=1, max_merge_size=20000,
                            max_merge_events=7169)

        assert len(result) == 0, \
            "Error: No merge jobs should have been created."

        fileE = File(lfn="fileE", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        fileE.addRun(Run(1, *[45]))
        fileE.create()
        fileE.addParent(inputFileA["lfn"])
        fileF = File(lfn="fileF", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        fileF.addRun(Run(1, *[45]))
        fileF.create()
        fileF.addParent(inputFileB["lfn"])

        jobGroupB.output.addFile(fileE)
        jobGroupB.output.addFile(fileF)

        mergeFilesetB.addFile(fileE)
        mergeFilesetB.addFile(fileF)
        mergeFilesetB.commit()

        testJobD["outcome"] = "success"
        testJobD.save()
        testJobF["outcome"] = "success"
        testJobF.save()

        changeStateDAO.execute([testJobD, testJobF])

        result = jobFactory(min_merge_size=1, max_merge_size=20000,
                            max_merge_events=7169)

        assert len(result) == 1, \
            "Error: One merge job should have been created: %s" % len(result)

        return

    def testLocationMerging(self):
        """
        _testLocationMerging_

        Verify that files residing on different SEs are not merged together in
        the same job.
        """
        self.stuffWMBS()

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="T1_UK_RAL", pnn="T1_UK_RAL_Disk")

        fileSite2 = File(lfn="fileSite2", size=4098, events=1024,
                         first_event=0, locations={"T1_UK_RAL_Disk"})
        fileSite2.addRun(Run(1, *[46]))
        fileSite2.create()
        fileSite2.addParent(self.parentFileSite2["lfn"])

        self.mergeFileset.addFile(fileSite2)
        self.mergeFileset.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=4097, max_merge_size=99999999,
                            max_merge_events=999999999)

        assert len(result) == 1, \
            "ERROR: More than one JobGroup returned."

        assert len(result[0].jobs) == 2, \
            "ERROR: Two jobs should have been returned."

        ralJobs = 0
        fnalcernJobs = 0
        for job in result[0].jobs:
            if job["possiblePSN"] == {"T1_UK_RAL"}:
                ralJobs += 1
            elif job["possiblePSN"] == {"T1_US_FNAL", "T2_CH_CERN"}:
                fnalcernJobs += 1

        self.assertEqual(ralJobs, 1)
        self.assertEqual(fnalcernJobs, 1)

        return

    def testFilesetCloseout(self):
        """
        _testFilesetCloseout_

        Verify that the merge algorithm works correctly when it's input fileset
        is closed.  The split algorithm should create merge jobs for all files
        regardless of size and then mark any orphaned files (files that are the
        result of a split by lumi / split by event where one of the parent
        processing jobs has failed while others have succeeded) as failed so
        that the fileset closing works.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        # Get out all the good merge jobs out of the way.
        result = jobFactory(min_merge_size=1, max_merge_size=999999999999,
                            max_merge_events=999999999)

        # Verify that the bad files are the only "available" files
        availableAction = self.daoFactory(classname="Subscriptions.GetAvailableFilesMeta")
        availFiles = availableAction.execute(self.mergeSubscription["id"])

        assert len(availFiles) == 4, \
            "Error: Wrong number of available files."

        goldenFiles = ["badFile1", "badFileA", "badFileB", "badFileC"]
        for availFile in availFiles:
            assert availFile["lfn"] in goldenFiles, \
                "Error: Extra file is available."

            goldenFiles.remove(availFile["lfn"])

        self.mergeFileset.markOpen(False)
        result = jobFactory(min_merge_size=1, max_merge_size=999999999999,
                            max_merge_events=999999999)

        assert len(result) == 0, \
            "Error: Merging should have returned zero jobs."

        self.mergeFileset.markOpen(False)

        availFiles2 = availableAction.execute(self.mergeSubscription["id"])

        assert len(availFiles2) == 0, \
            "Error: There should be no more available files."

        failedAction = self.daoFactory(classname="Subscriptions.GetFailedFiles")
        failedFiles = failedAction.execute(self.mergeSubscription["id"])

        assert len(failedFiles) == 4, \
            "Error: Wrong number of failed files: %s" % failedFiles

        goldenIDs = []
        for availFile in availFiles:
            goldenIDs.append(availFile["id"])

        for failedFile in failedFiles:
            assert failedFile["file"] in goldenIDs, \
                "Error: Extra failed file."

        return

    def testFilesetCloseout2(self):
        """
        _testFilesetCloseout2_

        Verify that the fail orphan file code does not fail files that have
        failed for other workflows.
        """
        self.stuffWMBS()
        self.mergeFileset.markOpen(False)

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        # Get out all the good merge jobs out of the way.
        result = jobFactory(min_merge_size=1, max_merge_size=999999999999,
                            max_merge_events=999999999)

        self.assertEqual(len(result), 1, "Error: Wrong number of job groups.")
        self.assertEqual(len(result[0].jobs), 2, "Error: Wrong number of jobs.")

        failedAction = self.daoFactory(classname="Subscriptions.GetFailedFiles")
        failedFiles = failedAction.execute(self.mergeSubscription["id"])

        self.assertEqual(len(failedFiles), 4,
                         "Error: Wrong number of failed files: %s" % failedFiles)
        return

    def testForcedMerge(self):
        """
        _testForcedMerge_

        Repeat testMinMergeSize1a, but with non-injected files to assert that
        this causes no jobgroups to be created.
        """
        self.stuffWMBS(injected=False)
        self.mergeFileset.markOpen(False)

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=20000, max_merge_size=200000,
                            max_merge_events=20000)

        self.assertEqual(len(result), 0)

        return
示例#22
0
class WMBSMergeBySize(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Boiler plate DB setup.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = ["WMCore.WMBS"],
                                useDefault = False)

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package = "WMCore.WMBS",
                                     logger = myThread.logger,
                                     dbinterface = myThread.dbi)
        return

    def tearDown(self):
        """
        _tearDown_

        Clear out WMBS.
        """
        self.testInit.clearDatabase()
        return

    def stuffWMBS(self, injected = True):
        """
        _stuffWMBS_

        Insert some dummy jobs, jobgroups, filesets, files and subscriptions
        into WMBS to test job creation.  Three completed job groups each
        containing several files are injected.  Another incomplete job group is
        also injected.  Also files are added to the "Mergeable" subscription as
        well as to the output fileset for their jobgroups.
        """
        locationAction = self.daoFactory(classname = "Locations.New")
        locationAction.execute(siteName = "s1", seName = "somese.cern.ch")

        changeStateDAO = self.daoFactory(classname = "Jobs.ChangeState")

        self.mergeFileset = Fileset(name = "mergeFileset")
        self.mergeFileset.create()
        self.bogusFileset = Fileset(name = "bogusFileset")
        self.bogusFileset.create()

        self.mergeMergedFileset = Fileset(name = "mergeMergedFileset")
        self.mergeMergedFileset.create()
        self.bogusMergedFileset = Fileset(name = "bogusMergedFileset")
        self.bogusMergedFileset.create()

        mergeWorkflow = Workflow(name = "mergeWorkflow", spec = "bunk2",
                                 owner = "Steve", task="Test")
        mergeWorkflow.create()
        markWorkflow = self.daoFactory(classname = "Workflow.MarkInjectedWorkflows")
        markWorkflow.execute(names = [mergeWorkflow.name], injected = injected)

        self.mergeSubscription = Subscription(fileset = self.mergeFileset,
                                              workflow = mergeWorkflow,
                                              split_algo = "WMBSMergeBySize")
        self.mergeSubscription.create()
        self.bogusSubscription = Subscription(fileset = self.bogusFileset,
                                              workflow = mergeWorkflow,
                                              split_algo = "WMBSMergeBySize")

        inputFileset = Fileset(name = "inputFileset")
        inputFileset.create()

        inputWorkflow = Workflow(name = "inputWorkflow", spec = "input",
                                owner = "Steve", task = "Test")
        inputWorkflow.create()
        inputWorkflow.addOutput("output", self.mergeFileset,
                                self.mergeMergedFileset)
        inputWorkflow.addOutput("output2", self.bogusFileset,
                                self.bogusMergedFileset)
        bogusInputWorkflow = Workflow(name = "bogusInputWorkflow", spec = "input",
                                owner = "Steve", task = "Test")
        bogusInputWorkflow.create()

        inputSubscription = Subscription(fileset = inputFileset,
                                        workflow = inputWorkflow)
        inputSubscription.create()
        bogusInputSubscription = Subscription(fileset = inputFileset,
                                              workflow = bogusInputWorkflow)
        bogusInputSubscription.create()

        parentFile1 = File(lfn = "parentFile1")
        parentFile1.create()
        parentFile2 = File(lfn = "parentFile2")
        parentFile2.create()
        parentFile3 = File(lfn = "parentFile3")
        parentFile3.create()
        parentFile4 = File(lfn = "parentFile4")
        parentFile4.create()
        self.parentFileSite2 = File(lfn = "parentFileSite2")
        self.parentFileSite2.create()

        jobGroup1 = JobGroup(subscription = inputSubscription)
        jobGroup1.create()
        jobGroup2 = JobGroup(subscription = inputSubscription)
        jobGroup2.create()
        jobGroup3 = JobGroup(subscription = bogusInputSubscription)
        jobGroup3.create()

        testJob1 = Job()
        testJob1.addFile(parentFile1)
        testJob1.create(jobGroup1)
        testJob1["state"] = "cleanout"
        testJob1["oldstate"] = "new"
        testJob1["couch_record"] = "somejive"
        testJob1["retry_count"] = 0
        testJob1["outcome"] = "success"
        testJob1.save()
        changeStateDAO.execute([testJob1])

        testJob1A = Job()
        testJob1A.addFile(parentFile1)
        testJob1A.create(jobGroup3)
        testJob1A["state"] = "cleanout"
        testJob1A["oldstate"] = "new"
        testJob1A["couch_record"] = "somejive"
        testJob1A["retry_count"] = 0
        testJob1A["outcome"] = "failure"
        testJob1A.save()
        changeStateDAO.execute([testJob1A])

        testJob2 = Job()
        testJob2.addFile(parentFile2)
        testJob2.create(jobGroup1)
        testJob2["state"] = "cleanout"
        testJob2["oldstate"] = "new"
        testJob2["couch_record"] = "somejive"
        testJob2["retry_count"] = 0
        testJob2["outcome"] = "success"
        testJob2.save()
        changeStateDAO.execute([testJob2])

        testJob3 = Job()
        testJob3.addFile(parentFile3)
        testJob3.create(jobGroup2)
        testJob3["state"] = "cleanout"
        testJob3["oldstate"] = "new"
        testJob3["couch_record"] = "somejive"
        testJob3["retry_count"] = 0
        testJob3["outcome"] = "success"
        testJob3.save()
        changeStateDAO.execute([testJob3])

        testJob4 = Job()
        testJob4.addFile(parentFile4)
        testJob4.create(jobGroup2)
        testJob4["state"] = "cleanout"
        testJob4["oldstate"] = "new"
        testJob4["couch_record"] = "somejive"
        testJob4["retry_count"] = 0
        testJob4["outcome"] = "failure"
        testJob4.save()
        changeStateDAO.execute([testJob4])

        # We'll simulate a failed split by event job that the merger should
        # ignore.
        parentFile5 = File(lfn = "parentFile5")
        parentFile5.create()

        testJob5 = Job()
        testJob5.addFile(parentFile5)
        testJob5.create(jobGroup2)
        testJob5["state"] = "cleanout"
        testJob5["oldstate"] = "new"
        testJob5["couch_record"] = "somejive"
        testJob5["retry_count"] = 0
        testJob5["outcome"] = "success"
        testJob5.save()
        changeStateDAO.execute([testJob5])

        testJob6 = Job()
        testJob6.addFile(parentFile5)
        testJob6.create(jobGroup2)
        testJob6["state"] = "cleanout"
        testJob6["oldstate"] = "new"
        testJob6["couch_record"] = "somejive"
        testJob6["retry_count"] = 0
        testJob6["outcome"] = "failure"
        testJob6.save()
        changeStateDAO.execute([testJob6])

        testJob7 = Job()
        testJob7.addFile(self.parentFileSite2)
        testJob7.create(jobGroup2)
        testJob7["state"] = "cleanout"
        testJob7["oldstate"] = "new"
        testJob7["couch_record"] = "somejive"
        testJob7["retry_count"] = 0
        testJob7["outcome"] = "success"
        testJob7.save()
        changeStateDAO.execute([testJob7])

        badFile1 = File(lfn = "badFile1", size = 10241024, events = 10241024,
                        first_event = 0, locations = set(["somese.cern.ch"]))
        badFile1.addRun(Run(1, *[45]))
        badFile1.create()
        badFile1.addParent(parentFile5["lfn"])

        file1 = File(lfn = "file1", size = 1024, events = 1024, first_event = 0,
                     locations = set(["somese.cern.ch"]))
        file1.addRun(Run(1, *[45]))
        file1.create()
        file1.addParent(parentFile1["lfn"])
        file2 = File(lfn = "file2", size = 1024, events = 1024,
                     first_event = 1024, locations = set(["somese.cern.ch"]))
        file2.addRun(Run(1, *[45]))
        file2.create()
        file2.addParent(parentFile1["lfn"])
        file3 = File(lfn = "file3", size = 1024, events = 1024,
                     first_event = 2048, locations = set(["somese.cern.ch"]))
        file3.addRun(Run(1, *[45]))
        file3.create()
        file3.addParent(parentFile1["lfn"])
        file4 = File(lfn = "file4", size = 1024, events = 1024,
                     first_event = 3072, locations = set(["somese.cern.ch"]))
        file4.addRun(Run(1, *[45]))
        file4.create()
        file4.addParent(parentFile1["lfn"])

        fileA = File(lfn = "fileA", size = 1024, events = 1024,
                     first_event = 0, locations = set(["somese.cern.ch"]))
        fileA.addRun(Run(1, *[46]))
        fileA.create()
        fileA.addParent(parentFile2["lfn"])
        fileB = File(lfn = "fileB", size = 1024, events = 1024,
                     first_event = 1024, locations = set(["somese.cern.ch"]))
        fileB.addRun(Run(1, *[46]))
        fileB.create()
        fileB.addParent(parentFile2["lfn"])
        fileC = File(lfn = "fileC", size = 1024, events = 1024,
                     first_event = 2048, locations = set(["somese.cern.ch"]))
        fileC.addRun(Run(1, *[46]))
        fileC.create()
        fileC.addParent(parentFile2["lfn"])

        fileI = File(lfn = "fileI", size = 1024, events = 1024,
                     first_event = 0, locations = set(["somese.cern.ch"]))
        fileI.addRun(Run(2, *[46]))
        fileI.create()
        fileI.addParent(parentFile3["lfn"])
        fileII = File(lfn = "fileII", size = 1024, events = 1024,
                      first_event = 1024, locations = set(["somese.cern.ch"]))
        fileII.addRun(Run(2, *[46]))
        fileII.create()
        fileII.addParent(parentFile3["lfn"])
        fileIII = File(lfn = "fileIII", size = 1024, events = 1024,
                       first_event = 2048, locations = set(["somese.cern.ch"]))
        fileIII.addRun(Run(2, *[46]))
        fileIII.create()
        fileIII.addParent(parentFile3["lfn"])
        fileIV = File(lfn = "fileIV", size = 1024, events = 1024,
                      first_event = 3072, locations = set(["somese.cern.ch"]))
        fileIV.addRun(Run(2, *[46]))
        fileIV.create()
        fileIV.addParent(parentFile3["lfn"])

        fileX = File(lfn = "badFileA", size = 1024, events = 1024,
                     first_event = 0, locations = set(["somese.cern.ch"]))
        fileX.addRun(Run(1, *[47]))
        fileX.create()
        fileX.addParent(parentFile4["lfn"])
        fileY = File(lfn = "badFileB", size = 1024, events = 1024,
                     first_event = 1024, locations = set(["somese.cern.ch"]))
        fileY.addRun(Run(1, *[47]))
        fileY.create()
        fileY.addParent(parentFile4["lfn"])
        fileZ = File(lfn = "badFileC", size = 1024, events = 1024,
                     first_event = 2048, locations = set(["somese.cern.ch"]))
        fileZ.addRun(Run(1, *[47]))
        fileZ.create()
        fileZ.addParent(parentFile4["lfn"])

        jobGroup1.output.addFile(file1)
        jobGroup1.output.addFile(file2)
        jobGroup1.output.addFile(file3)
        jobGroup1.output.addFile(file4)
        jobGroup1.output.addFile(fileA)
        jobGroup1.output.addFile(fileB)
        jobGroup1.output.addFile(fileC)
        jobGroup1.output.commit()

        jobGroup2.output.addFile(fileI)
        jobGroup2.output.addFile(fileII)
        jobGroup2.output.addFile(fileIII)
        jobGroup2.output.addFile(fileIV)
        jobGroup2.output.addFile(fileX)
        jobGroup2.output.addFile(fileY)
        jobGroup2.output.addFile(fileZ)
        jobGroup2.output.addFile(badFile1)
        jobGroup2.output.commit()

        for file in [file1, file2, file3, file4, fileA, fileB, fileC, fileI,
                     fileII, fileIII, fileIV, fileX, fileY, fileZ, badFile1]:
            self.mergeFileset.addFile(file)
            self.bogusFileset.addFile(file)

        self.mergeFileset.commit()
        self.bogusFileset.commit()

        return

    def testMinMergeSize1(self):
        """
        _testMinMergeSize1_

        Set the minimum merge size to be 20,000 bytes which is more than the
        sum of all file sizes in the WMBS instance.  Verify that no merge jobs
        will be produced.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.mergeSubscription)

        result = jobFactory(min_merge_size = 20000, max_merge_size = 2000000000,
                            max_merge_events = 200000000)

        assert len(result) == 0, \
               "ERROR: No job groups should be returned."

        return

    def testMinMergeSize1a(self):
        """
        _testMinMergeSize1a_

        Set the minimum merge size to be 20,000 bytes which is more than the
        sum of all file sizes in the WMBS instance and mark the fileset as
        closed.  Verify that one job containing all files is pushed out.
        """
        self.stuffWMBS()
        self.mergeFileset.markOpen(False)

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.mergeSubscription)

        result = jobFactory(min_merge_size = 20000, max_merge_size = 200000,
                            max_merge_events = 20000)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned: %s" % len(result)

        assert len(result[0].jobs) == 2, \
               "Error: Two jobs should have been returned."

        goldenFilesA = ["file1", "file2", "file3", "file4", "fileA", "fileB",
                       "fileC"]
        goldenFilesB = ["fileI", "fileII", "fileIII", "fileIV"]

        for job in result[0].jobs:
            jobFiles = job.getFiles()

            if len(jobFiles) == len(goldenFilesA):
                goldenFiles = goldenFilesA
            else:
                goldenFiles = goldenFilesB

            currentRun = 0
            currentLumi = 0
            currentEvent = 0
            for file in jobFiles:
                file.loadData()
                assert file["lfn"] in goldenFiles, \
                       "Error: Unknown file: %s" % file["lfn"]
                assert file["locations"] == set(["somese.cern.ch"]), \
                       "Error: File is missing a location."
                goldenFiles.remove(file["lfn"])

                fileRun = list(file["runs"])[0].run
                fileLumi = min(list(file["runs"])[0])
                fileEvent = file["first_event"]

                if currentRun == 0:
                    currentRun = fileRun
                    currentLumi = fileLumi
                    currentEvent = fileEvent
                    continue

                assert fileRun >= currentRun, \
                       "ERROR: Files not sorted by run."

                if fileRun == currentRun:
                    assert fileLumi >= currentLumi, \
                           "ERROR: Files not ordered by lumi"

                if fileLumi == currentLumi:
                    assert fileEvent >= currentEvent, \
                           "ERROR: Files not ordered by first event"

                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent

        return

    def testMinMergeSize2(self):
        """
        _testMinMergeSize2_

        Set the minimum merge size to be 7,167 bytes which is one byte less
        than the sum of all the file sizes in the largest merge group in the
        WMBS instance.  Verify that one merge job containing all the files in
        the largest merge group is produced.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.mergeSubscription)

        result = jobFactory(min_merge_size = 7167, max_merge_size = 20000,
                            max_merge_events = 20000)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned: %d" % len(result)

        assert len(result[0].jobs) == 1, \
               "ERROR: One job should have been returned."

        jobFiles = list(result[0].jobs)[0].getFiles()

        goldenFiles = ["file1", "file2", "file3", "file4", "fileA", "fileB",
                       "fileC"]

        assert len(jobFiles) == len(goldenFiles), \
               "ERROR: Merge job should contain %d files." % len(goldenFiles)

        currentRun = 0
        currentLumi = 0
        currentEvent = 0
        for file in jobFiles:
            assert file["lfn"] in goldenFiles, \
                   "Error: Unknown file: %s" % file["lfn"]
            assert file["locations"] == set(["somese.cern.ch"]), \
                   "Error: File is missing a location."
            goldenFiles.remove(file["lfn"])

            fileRun = list(file["runs"])[0].run
            fileLumi = min(list(file["runs"])[0])
            fileEvent = file["first_event"]

            if currentRun == 0:
                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent
                continue

            assert fileRun >= currentRun, \
                   "ERROR: Files not sorted by run."

            if fileRun == currentRun:
                assert fileLumi >= currentLumi, \
                       "ERROR: Files not ordered by lumi"

                if fileLumi == currentLumi:
                    assert fileEvent >= currentEvent, \
                           "ERROR: Files not ordered by first event"

            currentRun = fileRun
            currentLumi = fileLumi
            currentEvent = fileEvent

        return

    def testMaxMergeSize1(self):
        """
        _testMaxMergeSize1_

        Set the maximum merge size to be two bytes.  Verify that three merge
        jobs are created, one for each job group that exists inside the WMBS
        instance.  Verify that each merge job contains the expected files.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.mergeSubscription)

        result = jobFactory(min_merge_size = 1, max_merge_size = 2,
                            max_merge_events = 20000)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned: %s" % result

        assert len(result[0].jobs) == 3, \
               "ERROR: Three jobs should have been returned."

        goldenFilesA = ["file1", "file2", "file3", "file4"]
        goldenFilesB = ["fileA", "fileB", "fileC"]
        goldenFilesC = ["fileI", "fileII", "fileIII", "fileIV"]

        for job in result[0].jobs:
            jobFiles = job.getFiles()

            if jobFiles[0]["lfn"] in goldenFilesA:
                goldenFiles = goldenFilesA
            elif jobFiles[0]["lfn"] in goldenFilesB:
                goldenFiles = goldenFilesB
            else:
                goldenFiles = goldenFilesC

            currentRun = 0
            currentLumi = 0
            currentEvent = 0
            for file in jobFiles:
                assert file["lfn"] in goldenFiles, \
                       "Error: Unknown file in merge jobs."
                assert file["locations"] == set(["somese.cern.ch"]), \
                       "Error: File is missing a location."

                goldenFiles.remove(file["lfn"])

            fileRun = list(file["runs"])[0].run
            fileLumi = min(list(file["runs"])[0])
            fileEvent = file["first_event"]

            if currentRun == 0:
                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent
                continue

            assert fileRun >= currentRun, \
                   "ERROR: Files not sorted by run."

            if fileRun == currentRun:
                assert fileLumi >= currentLumi, \
                       "ERROR: Files not ordered by lumi"

                if fileLumi == currentLumi:
                    assert fileEvent >= currentEvent, \
                           "ERROR: Files not ordered by first event"

            currentRun = fileRun
            currentLumi = fileLumi
            currentEvent = fileEvent

        assert len(goldenFilesA) == 0 and len(goldenFilesB) == 0 and \
               len(goldenFilesC) == 0, \
               "ERROR: Files missing from merge jobs."

        return

    def testMaxMergeSize2(self):
        """
        _testMaxMergeSize2_

        Set the minimum merge size to be one byte larger than the largest job
        group in the WMBS instance and the max merge size to be one byte larger
        than the total size of two of the groups.  Verify that one merge job
        is produced with two of the job groups in it.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.mergeSubscription)

        result = jobFactory(min_merge_size = 4097, max_merge_size = 7169,
                            max_merge_events = 20000)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned."

        assert len(result[0].jobs) == 1, \
               "ERROR: One job should have been returned."

        goldenFilesA = ["file1", "file2", "file3", "file4"]
        goldenFilesB = ["fileA", "fileB", "fileC"]
        goldenFilesC = ["fileI", "fileII", "fileIII", "fileIV"]

        jobFiles = list(result[0].jobs)[0].getFiles()

        currentRun = 0
        currentLumi = 0
        currentEvent = 0
        for file in jobFiles:
            assert file["locations"] == set(["somese.cern.ch"]), \
                   "Error: File is missing a location."

            if file["lfn"] in goldenFilesA:
                goldenFilesA.remove(file["lfn"])
            elif file["lfn"] in goldenFilesB:
                goldenFilesB.remove(file["lfn"])
            elif file["lfn"] in goldenFilesC:
                goldenFilesC.remove(file["lfn"])

            fileRun = list(file["runs"])[0].run
            fileLumi = min(list(file["runs"])[0])
            fileEvent = file["first_event"]

            if currentRun == 0:
                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent
                continue

            assert fileRun >= currentRun, \
                   "ERROR: Files not sorted by run."

            if fileRun == currentRun:
                assert fileLumi >= currentLumi, \
                       "ERROR: Files not ordered by lumi"

                if fileLumi == currentLumi:
                    assert fileEvent >= currentEvent, \
                           "ERROR: Files not ordered by first event"

            currentRun = fileRun
            currentLumi = fileLumi
            currentEvent = fileEvent

        assert len(goldenFilesB) == 0 and \
               (len(goldenFilesA) == 0 or len(goldenFilesC) == 0), \
               "ERROR: Files not allocated to jobs correctly."

        return

    def testMaxEvents1(self):
        """
        _testMaxEvents1_

        Set the maximum number of events per merge job to 1.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.mergeSubscription)

        result = jobFactory(min_merge_size = 1, max_merge_size = 20000,
                            max_merge_events = 1)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned: %s" % result

        assert len(result[0].jobs) == 3, \
               "ERROR: Three jobs should have been returned: %s" % len(result[0].jobs)

        goldenFilesA = ["file1", "file2", "file3", "file4"]
        goldenFilesB = ["fileA", "fileB", "fileC"]
        goldenFilesC = ["fileI", "fileII", "fileIII", "fileIV"]

        for job in result[0].jobs:
            jobFiles = job.getFiles()

            if jobFiles[0]["lfn"] in goldenFilesA:
                goldenFiles = goldenFilesA
            elif jobFiles[0]["lfn"] in goldenFilesB:
                goldenFiles = goldenFilesB
            else:
                goldenFiles = goldenFilesC

            currentRun = 0
            currentLumi = 0
            currentEvent = 0
            for file in jobFiles:
                assert file["lfn"] in goldenFiles, \
                       "Error: Unknown file in merge jobs."
                assert file["locations"] == set(["somese.cern.ch"]), \
                       "Error: File is missing a location: %s" % file["locations"]

                goldenFiles.remove(file["lfn"])

                fileRun = list(file["runs"])[0].run
                fileLumi = min(list(file["runs"])[0])
                fileEvent = file["first_event"]

                if currentRun == 0:
                    currentRun = fileRun
                    currentLumi = fileLumi
                    currentEvent = fileEvent
                    continue

                assert fileRun >= currentRun, \
                       "ERROR: Files not sorted by run: %s, %s" % (fileRun, currentRun)

                if fileRun == currentRun:
                    assert fileLumi >= currentLumi, \
                           "ERROR: Files not ordered by lumi"

                    if fileLumi == currentLumi:
                        assert fileEvent >= currentEvent, \
                               "ERROR: Files not ordered by first event"

                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent

        assert len(goldenFilesA) == 0 and len(goldenFilesB) == 0 and \
               len(goldenFilesC) == 0, \
               "ERROR: Files missing from merge jobs."

        return

    def testMaxEvents2(self):
        """
        _testMaxEvents2_

        Set the minimum merge size to be one byte larger than the largest job
        group in the WMBS instance and the max events to be one event larger
        than the total events in two of the groups.  Verify that one merge job
        is produced with two of the job groups in it.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.mergeSubscription)

        result = jobFactory(min_merge_size = 4097, max_merge_size = 20000,
                            max_merge_events = 7169)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned."

        assert len(result[0].jobs) == 1, \
               "ERROR: One job should have been returned."

        goldenFilesA = ["file1", "file2", "file3", "file4"]
        goldenFilesB = ["fileA", "fileB", "fileC"]
        goldenFilesC = ["fileI", "fileII", "fileIII", "fileIV"]

        jobFiles = list(result[0].jobs)[0].getFiles()

        currentRun = 0
        currentLumi = 0
        currentEvent = 0
        for file in jobFiles:
            assert file["locations"] == set(["somese.cern.ch"]), \
                   "Error: File is missing a location."

            if file["lfn"] in goldenFilesA:
                goldenFilesA.remove(file["lfn"])
            elif file["lfn"] in goldenFilesB:
                goldenFilesB.remove(file["lfn"])
            elif file["lfn"] in goldenFilesC:
                goldenFilesC.remove(file["lfn"])

            fileRun = list(file["runs"])[0].run
            fileLumi = min(list(file["runs"])[0])
            fileEvent = file["first_event"]

            if currentRun == 0:
                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent
                continue

            assert fileRun >= currentRun, \
                   "ERROR: Files not sorted by run."

            if fileRun == currentRun:
                assert fileLumi >= currentLumi, \
                       "ERROR: Files not ordered by lumi"

                if fileLumi == currentLumi:
                    assert fileEvent >= currentEvent, \
                           "ERROR: Files not ordered by first event"

            currentRun = fileRun
            currentLumi = fileLumi
            currentEvent = fileEvent

        assert len(goldenFilesB) == 0 and \
               (len(goldenFilesA) == 0 or len(goldenFilesC) == 0), \
               "ERROR: Files not allocated to jobs correctly."

        return

    def testParallelProcessing(self):
        """
        _testParallelProcessing_

        Verify that merging works correctly when multiple processing
        subscriptions are run over the same input files.  The merging algorithm
        should ignore processing jobs that feed into different merge
        subscriptions.
        """
        locationAction = self.daoFactory(classname = "Locations.New")
        locationAction.execute(siteName = "s1", seName = "somese.cern.ch")

        mergeFilesetA = Fileset(name = "mergeFilesetA")
        mergeFilesetB = Fileset(name = "mergeFilesetB")
        mergeFilesetA.create()
        mergeFilesetB.create()

        mergeMergedFilesetA = Fileset(name = "mergeMergedFilesetA")
        mergeMergedFilesetB = Fileset(name = "mergeMergedFilesetB")
        mergeMergedFilesetA.create()
        mergeMergedFilesetB.create()

        mergeWorkflow = Workflow(name = "mergeWorkflow", spec = "bogus",
                                 owner = "Steve", task = "Test")
        mergeWorkflow.create()

        mergeSubscriptionA = Subscription(fileset = mergeFilesetA,
                                          workflow = mergeWorkflow,
                                          split_algo = "WMBSMergeBySize")
        mergeSubscriptionB = Subscription(fileset = mergeFilesetB,
                                          workflow = mergeWorkflow,
                                          split_algo = "WMBSMergeBySize")
        mergeSubscriptionA.create()
        mergeSubscriptionB.create()

        inputFileset = Fileset(name = "inputFileset")
        inputFileset.create()

        inputFileA = File(lfn = "inputLFNA")
        inputFileB = File(lfn = "inputLFNB")
        inputFileA.create()
        inputFileB.create()

        procWorkflowA = Workflow(name = "procWorkflowA", spec = "bunk2",
                                 owner = "Steve", task = "Test")
        procWorkflowA.create()
        procWorkflowA.addOutput("output", mergeFilesetA, mergeMergedFilesetA)
        procWorkflowB = Workflow(name = "procWorkflowB", spec = "bunk3",
                                 owner = "Steve", task = "Test2")
        procWorkflowB.create()
        procWorkflowB.addOutput("output", mergeFilesetB, mergeMergedFilesetB)

        procSubscriptionA = Subscription(fileset = inputFileset,
                                         workflow = procWorkflowA,
                                         split_algo = "EventBased")
        procSubscriptionA.create()
        procSubscriptionB = Subscription(fileset = inputFileset,
                                         workflow = procWorkflowB,
                                         split_algo = "EventBased")
        procSubscriptionB.create()

        jobGroupA = JobGroup(subscription = procSubscriptionA)
        jobGroupA.create()
        jobGroupB = JobGroup(subscription = procSubscriptionB)
        jobGroupB.create()

        changeStateDAO = self.daoFactory(classname = "Jobs.ChangeState")

        testJobA = Job()
        testJobA.addFile(inputFileA)
        testJobA.create(jobGroupA)
        testJobA["state"] = "cleanout"
        testJobA["oldstate"] = "new"
        testJobA["couch_record"] = "somejive"
        testJobA["retry_count"] = 0
        testJobA["outcome"] = "success"
        testJobA.save()

        testJobB = Job()
        testJobB.addFile(inputFileB)
        testJobB.create(jobGroupA)
        testJobB["state"] = "cleanout"
        testJobB["oldstate"] = "new"
        testJobB["couch_record"] = "somejive"
        testJobB["retry_count"] = 0
        testJobB["outcome"] = "success"
        testJobB.save()

        testJobC = Job()
        testJobC.addFile(inputFileA)
        testJobC.create(jobGroupB)
        testJobC["state"] = "cleanout"
        testJobC["oldstate"] = "new"
        testJobC["couch_record"] = "somejive"
        testJobC["retry_count"] = 0
        testJobC["outcome"] = "success"
        testJobC.save()

        testJobD = Job()
        testJobD.addFile(inputFileA)
        testJobD.create(jobGroupB)
        testJobD["state"] = "cleanout"
        testJobD["oldstate"] = "new"
        testJobD["couch_record"] = "somejive"
        testJobD["retry_count"] = 0
        testJobD["outcome"] = "failure"
        testJobD.save()

        testJobE = Job()
        testJobE.addFile(inputFileB)
        testJobE.create(jobGroupB)
        testJobE["state"] = "cleanout"
        testJobE["oldstate"] = "new"
        testJobE["couch_record"] = "somejive"
        testJobE["retry_count"] = 0
        testJobE["outcome"] = "success"
        testJobE.save()

        testJobF = Job()
        testJobF.addFile(inputFileB)
        testJobF.create(jobGroupB)
        testJobF["state"] = "cleanout"
        testJobF["oldstate"] = "new"
        testJobF["couch_record"] = "somejive"
        testJobF["retry_count"] = 0
        testJobF["outcome"] = "failure"
        testJobF.save()

        changeStateDAO.execute([testJobA, testJobB, testJobC, testJobD,
                                testJobE, testJobF])

        fileA = File(lfn = "fileA", size = 1024, events = 1024, first_event = 0,
                     locations = set(["somese.cern.ch"]))
        fileA.addRun(Run(1, *[45]))
        fileA.create()
        fileA.addParent(inputFileA["lfn"])
        fileB = File(lfn = "fileB", size = 1024, events = 1024, first_event = 0,
                     locations = set(["somese.cern.ch"]))
        fileB.addRun(Run(1, *[45]))
        fileB.create()
        fileB.addParent(inputFileB["lfn"])

        jobGroupA.output.addFile(fileA)
        jobGroupA.output.addFile(fileB)
        jobGroupA.output.commit()

        mergeFilesetA.addFile(fileA)
        mergeFilesetA.addFile(fileB)
        mergeFilesetA.commit()

        fileC = File(lfn = "fileC", size = 1024, events = 1024, first_event = 0,
                     locations = set(["somese.cern.ch"]))
        fileC.addRun(Run(1, *[45]))
        fileC.create()
        fileC.addParent(inputFileA["lfn"])
        fileD = File(lfn = "fileD", size = 1024, events = 1024, first_event = 0,
                     locations = set(["somese.cern.ch"]))
        fileD.addRun(Run(1, *[45]))
        fileD.create()
        fileD.addParent(inputFileB["lfn"])

        jobGroupB.output.addFile(fileC)
        jobGroupB.output.addFile(fileD)

        mergeFilesetB.addFile(fileC)
        mergeFilesetB.addFile(fileD)
        mergeFilesetB.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = mergeSubscriptionB)

        result = jobFactory(min_merge_size = 1, max_merge_size = 20000,
                            max_merge_events = 7169)

        assert len(result) == 0, \
               "Error: No merge jobs should have been created."

        fileE = File(lfn = "fileE", size = 1024, events = 1024, first_event = 0,
                     locations = set(["somese.cern.ch"]))
        fileE.addRun(Run(1, *[45]))
        fileE.create()
        fileE.addParent(inputFileA["lfn"])
        fileF = File(lfn = "fileF", size = 1024, events = 1024, first_event = 0,
                     locations = set(["somese.cern.ch"]))
        fileF.addRun(Run(1, *[45]))
        fileF.create()
        fileF.addParent(inputFileB["lfn"])

        jobGroupB.output.addFile(fileE)
        jobGroupB.output.addFile(fileF)

        mergeFilesetB.addFile(fileE)
        mergeFilesetB.addFile(fileF)
        mergeFilesetB.commit()

        testJobD["outcome"] = "success"
        testJobD.save()
        testJobF["outcome"] = "success"
        testJobF.save()

        changeStateDAO.execute([testJobD, testJobF])

        result = jobFactory(min_merge_size = 1, max_merge_size = 20000,
                            max_merge_events = 7169)

        assert len(result) == 1, \
               "Error: One merge job should have been created: %s" % len(result)

        return

    def testLocationMerging(self):
        """
        _testLocationMerging_

        Verify that files residing on different SEs are not merged together in
        the same job.
        """
        self.stuffWMBS()

        locationAction = self.daoFactory(classname = "Locations.New")
        locationAction.execute(siteName = "s2", seName = "somese2.cern.ch")

        fileSite2 = File(lfn = "fileSite2", size = 4098, events = 1024,
                         first_event = 0, locations = set(["somese2.cern.ch"]))
        fileSite2.addRun(Run(1, *[46]))
        fileSite2.create()
        fileSite2.addParent(self.parentFileSite2["lfn"])

        self.mergeFileset.addFile(fileSite2)
        self.mergeFileset.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.mergeSubscription)

        result = jobFactory(min_merge_size = 4097, max_merge_size = 99999999,
                            max_merge_events = 999999999)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned."

        assert len(result[0].jobs) == 2, \
               "ERROR: Two jobs should have been returned."

        for job in result[0].jobs:
            firstInputFile = job.getFiles()[0]
            baseLocation = list(firstInputFile["locations"])[0]

            for inputFile in job.getFiles():
                assert len(inputFile["locations"]) == 1, \
                       "Error: Wrong number of locations"

                assert list(inputFile["locations"])[0] == baseLocation, \
                       "Error: Wrong location."

        return

    def testFilesetCloseout(self):
        """
        _testFilesetCloseout_

        Verify that the merge algorithm works correctly when it's input fileset
        is closed.  The split algorithm should create merge jobs for all files
        regardless of size and then mark any orphaned files (files that are the
        result of a split by lumi / split by event where one of the parent
        processing jobs has failed while others have succeeded) as failed so
        that the fileset closing works.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.mergeSubscription)

        # Get out all the good merge jobs out of the way.
        result = jobFactory(min_merge_size = 1, max_merge_size = 999999999999,
                            max_merge_events = 999999999)

        # Verify that the bad files are the only "available" files
        availableAction = self.daoFactory(classname = "Subscriptions.GetAvailableFilesMeta")
        availFiles = availableAction.execute(self.mergeSubscription["id"])

        assert len(availFiles) == 4, \
               "Error: Wrong number of available files."

        goldenFiles = ["badFile1", "badFileA", "badFileB", "badFileC"]
        for availFile in availFiles:
            assert availFile["lfn"] in goldenFiles, \
                   "Error: Extra file is available."

            goldenFiles.remove(availFile["lfn"])

        self.mergeFileset.markOpen(False)
        result = jobFactory(min_merge_size = 1, max_merge_size = 999999999999,
                            max_merge_events = 999999999)

        assert len(result) == 0, \
               "Error: Merging should have returned zero jobs."

        self.mergeFileset.markOpen(False)

        availFiles2 = availableAction.execute(self.mergeSubscription["id"])

        assert len(availFiles2) == 0, \
               "Error: There should be no more available files."

        failedAction = self.daoFactory(classname = "Subscriptions.GetFailedFiles")
        failedFiles = failedAction.execute(self.mergeSubscription["id"])

        assert len(failedFiles) == 4, \
               "Error: Wrong number of failed files: %s" % failedFiles

        goldenIDs = []
        for availFile in availFiles:
            goldenIDs.append(availFile["id"])

        for failedFile in failedFiles:
            assert failedFile["file"] in goldenIDs, \
                   "Error: Extra failed file."

        return

    def testFilesetCloseout2(self):
        """
        _testFilesetCloseout2_

        Verify that the fail orphan file code does not fail files that have
        failed for other workflows.
        """
        self.stuffWMBS()
        self.mergeFileset.markOpen(False)

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.mergeSubscription)

        # Get out all the good merge jobs out of the way.
        result = jobFactory(min_merge_size = 1, max_merge_size = 999999999999,
                            max_merge_events = 999999999)

        self.assertEqual(len(result), 1, "Error: Wrong number of job groups.")
        self.assertEqual(len(result[0].jobs), 2, "Error: Wrong number of jobs.")

        failedAction = self.daoFactory(classname = "Subscriptions.GetFailedFiles")
        failedFiles = failedAction.execute(self.mergeSubscription["id"])

        self.assertEqual(len(failedFiles), 4,
                         "Error: Wrong number of failed files: %s" % failedFiles)
        return

    def testForcedMerge(self):
        """
        _testForcedMerge_

        Repeat testMinMergeSize1a, but with non-injected files to assert that
        this causes no jobgroups to be created.
        """
        self.stuffWMBS(injected = False)
        self.mergeFileset.markOpen(False)

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.mergeSubscription)

        result = jobFactory(min_merge_size = 20000, max_merge_size = 200000,
                            max_merge_events = 20000)

        self.assertEqual(len(result), 0)

        return
示例#23
0
    def createTestJobGroup(
        self, config, name="TestWorkthrough", specLocation="spec.xml", error=False, task="/TestWorkload/ReReco"
    ):
        """
        Creates a group of several jobs

        """

        myThread = threading.currentThread()

        testWorkflow = Workflow(spec=specLocation, owner="Simon", name=name, task=task)
        testWorkflow.create()

        testWMBSFileset = Fileset(name=name)
        testWMBSFileset.create()

        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileA.addRun(Run(10, *[12312]))
        testFileA.setLocation("malpaquet")

        testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        testFileB.addRun(Run(10, *[12312]))
        testFileB.setLocation("malpaquet")

        testFileA.create()
        testFileB.create()

        testWMBSFileset.addFile(testFileA)
        testWMBSFileset.addFile(testFileB)
        testWMBSFileset.commit()
        testWMBSFileset.markOpen(0)

        testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        for i in range(0, self.nJobs):
            testJob = Job(name=makeUUID())
            testJob.addFile(testFileA)
            testJob.addFile(testFileB)
            testJob["retry_count"] = 1
            testJob["retry_max"] = 10
            testJob["mask"].addRunAndLumis(run=10, lumis=[12312, 12313])
            testJobGroup.add(testJob)

        testJobGroup.commit()

        changer = ChangeState(config)

        report1 = Report()
        report2 = Report()
        if error:
            path1 = os.path.join(
                WMCore.WMInit.getWMBASE(), "test/python/WMComponent_t/JobAccountant_t/fwjrs", "badBackfillJobReport.pkl"
            )
            path2 = path1
        else:
            path1 = os.path.join(
                WMCore.WMInit.getWMBASE(), "test/python/WMComponent_t/TaskArchiver_t/fwjrs", "mergeReport1.pkl"
            )
            path2 = os.path.join(
                WMCore.WMInit.getWMBASE(), "test/python/WMComponent_t/TaskArchiver_t/fwjrs", "mergeReport2.pkl"
            )
        report1.load(filename=path1)
        report2.load(filename=path2)

        changer.propagate(testJobGroup.jobs, "created", "new")
        changer.propagate(testJobGroup.jobs, "executing", "created")
        changer.propagate(testJobGroup.jobs, "complete", "executing")
        for i in range(self.nJobs):
            if i < self.nJobs / 2:
                testJobGroup.jobs[i]["fwjr"] = report1
            else:
                testJobGroup.jobs[i]["fwjr"] = report2
        changer.propagate(testJobGroup.jobs, "jobfailed", "complete")
        changer.propagate(testJobGroup.jobs, "exhausted", "jobfailed")
        changer.propagate(testJobGroup.jobs, "cleanout", "exhausted")

        testSubscription.completeFiles([testFileA, testFileB])

        return testJobGroup
示例#24
0
    def createTestJobGroup(self,
                           config,
                           name="TestWorkthrough",
                           filesetName="TestFileset",
                           specLocation="spec.xml",
                           error=False,
                           task="/TestWorkload/ReReco",
                           multicore=False):
        """
        Creates a group of several jobs

        """

        myThread = threading.currentThread()

        testWorkflow = Workflow(spec=specLocation,
                                owner=self.OWNERDN,
                                name=name,
                                task=task,
                                owner_vogroup="",
                                owner_vorole="")
        testWorkflow.create()
        self.inject.execute(names=[name], injected=True)

        testWMBSFileset = Fileset(name=filesetName)
        testWMBSFileset.create()

        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileA.addRun(Run(10, *[12312]))
        testFileA.setLocation('malpaquet')

        testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        testFileB.addRun(Run(10, *[12314]))
        testFileB.setLocation('malpaquet')

        testFileA.create()
        testFileB.create()

        testWMBSFileset.addFile(testFileA)
        testWMBSFileset.addFile(testFileB)
        testWMBSFileset.commit()
        testWMBSFileset.markOpen(0)

        outputWMBSFileset = Fileset(name='%sOutput' % filesetName)
        outputWMBSFileset.create()
        testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=10)
        testFileC.addRun(Run(10, *[12312]))
        testFileC.setLocation('malpaquet')
        testFileC.create()
        outputWMBSFileset.addFile(testFileC)
        outputWMBSFileset.commit()
        outputWMBSFileset.markOpen(0)

        testWorkflow.addOutput('output', outputWMBSFileset)

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        for i in range(0, self.nJobs):
            testJob = Job(name=makeUUID())
            testJob.addFile(testFileA)
            testJob.addFile(testFileB)
            testJob['retry_count'] = 1
            testJob['retry_max'] = 10
            testJob['mask'].addRunAndLumis(run=10, lumis=[12312, 12313])
            testJobGroup.add(testJob)

        testJobGroup.commit()

        changer = ChangeState(config)

        report1 = Report()
        report2 = Report()
        if error:
            path1 = os.path.join(WMCore.WMBase.getTestBase(),
                                 "WMComponent_t/JobAccountant_t/fwjrs",
                                 "badBackfillJobReport.pkl")
            path2 = os.path.join(WMCore.WMBase.getTestBase(),
                                 'WMComponent_t/TaskArchiver_t/fwjrs',
                                 'logCollectReport2.pkl')
        elif multicore:
            path1 = os.path.join(
                WMCore.WMBase.getTestBase(),
                "WMCore_t/FwkJobReport_t/MulticoreReport.pkl")
            path2 = path1
        else:
            path1 = os.path.join(WMCore.WMBase.getTestBase(),
                                 'WMComponent_t/TaskArchiver_t/fwjrs',
                                 'mergeReport1.pkl')
            path2 = os.path.join(WMCore.WMBase.getTestBase(),
                                 'WMComponent_t/TaskArchiver_t/fwjrs',
                                 'logCollectReport2.pkl')
        report1.load(filename=path1)
        report2.load(filename=path2)

        changer.propagate(testJobGroup.jobs, 'created', 'new')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')
        changer.propagate(testJobGroup.jobs, 'complete', 'executing')
        for i in range(self.nJobs):
            if i < self.nJobs / 2:
                testJobGroup.jobs[i]['fwjr'] = report1
            else:
                testJobGroup.jobs[i]['fwjr'] = report2
        changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete')
        changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed')
        changer.propagate(testJobGroup.jobs, 'created', 'jobcooloff')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')
        changer.propagate(testJobGroup.jobs, 'complete', 'executing')
        changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete')
        changer.propagate(testJobGroup.jobs, 'exhausted', 'jobfailed')
        changer.propagate(testJobGroup.jobs, 'cleanout', 'exhausted')

        testSubscription.completeFiles([testFileA, testFileB])

        return testJobGroup
示例#25
0
    def testMultipleRunHarvesting(self):
        """
        _testMultipleRunHarvesting_

        Add some files with multiple runs in each, make sure the jobs
        are created by location and run. Verify each job mask afterwards.
        Note that in this test run are splitted between sites,
        in real life that MUST NOT happen we still don't support that.
        """
        multipleFilesFileset = Fileset(name = "TestFileset")

        newFile = File("/some/file/test1", size = 1000, events = 100)
        newFile.addRun(Run(1,*[1,3,4,5,6,7]))
        newFile.addRun(Run(2,*[1,2,4,5,6,7]))
        newFile.setLocation('SomeSE')
        multipleFilesFileset.addFile(newFile)
        newFile = File("/some/file/test2", size = 1000, events = 100)
        newFile.addRun(Run(1,*[2,8]))
        newFile.addRun(Run(2,*[3,8]))
        newFile.setLocation('SomeSE3')
        multipleFilesFileset.addFile(newFile)
        multipleFilesFileset.create()

        harvestingWorkflow = Workflow(spec = "spec.xml",
                                      owner = "hufnagel",
                                      name = "TestWorkflow",
                                      task="Test")
        harvestingWorkflow.create()

        harvestSub  = Subscription(fileset = multipleFilesFileset,
                                   workflow = harvestingWorkflow,
                                   split_algo = "Harvest",
                                   type = "Harvesting")
        harvestSub.create()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = harvestSub)
        jobGroups = jobFactory(periodic_harvest_interval = 2)
        self.assertEqual(len(jobGroups), 1, "A single job group was not created")
        self.assertEqual(len(jobGroups[0].getJobs()), 4,
                             "Four jobs were not created")

        for job in jobGroups[0].getJobs():
            runs = job['mask'].getRunAndLumis()
            self.assertEqual(len(runs), 1, "Job has more than one run configured")
            possibleLumiPairs = {1 : [[1,1],[3,7],[2,2],[8,8]],
                                 2 : [[1,2],[4,7],[3,3],[8,8]]}
            run = runs.keys()[0]
            for lumiPair in runs[run]:
                self.assertTrue(lumiPair in possibleLumiPairs[run], "Strange lumi pair in the job mask")

        self.finishJobs(jobGroups, harvestSub)

        newFile = File("/some/file/test3", size = 1000, events = 100)
        newFile.addRun(Run(1,*range(9,15)))
        newFile.setLocation('SomeSE3')
        multipleFilesFileset.addFile(newFile)
        multipleFilesFileset.commit()

        time.sleep(2)

        jobGroups = jobFactory(periodic_harvest_interval = 2)
        self.assertEqual(len(jobGroups), 1, "A single job group was not created")
        self.assertEqual(len(jobGroups[0].getJobs()), 4, "Four jobs were not created")

        for job in jobGroups[0].getJobs():
            runs = job['mask'].getRunAndLumis()
            self.assertEqual(len(runs), 1, "Job has more than one run configured")
            possibleLumiPairs = {1 : [[1,1],[3,7],[2,2],[8,8],[9,14]],
                                 2 : [[1,2],[4,7],[3,3],[8,8]]}
            run = runs.keys()[0]
            for lumiPair in runs[run]:
                self.assertTrue(lumiPair in possibleLumiPairs[run], "Strange lumi pair in the job mask")

        harvestingWorkflowSib = Workflow(spec = "spec.xml",
                                         owner = "hufnagel",
                                         name = "TestWorkflowSib",
                                         task="TestSib")
        harvestingWorkflowSib.create()

        harvestSubSib  = Subscription(fileset = multipleFilesFileset,
                                      workflow = harvestingWorkflowSib,
                                      split_algo = "Harvest",
                                      type = "Harvesting")
        harvestSubSib.create()

        jobFactorySib = self.splitterFactory(package = "WMCore.WMBS", subscription = harvestSubSib)

        multipleFilesFileset.markOpen(False)

        jobGroups = jobFactorySib(periodic_harvest_sibling = True)
        self.assertEqual(len(jobGroups), 0, "A single job group was created")
                
        self.finishJobs(jobGroups, harvestSub)

        jobGroups = jobFactorySib(periodic_harvest_sibling = True)
        self.assertEqual(len(jobGroups), 1, "A single job group was not created")
        self.assertEqual(len(jobGroups[0].getJobs()), 4, "Four jobs were not created")

        for job in jobGroups[0].getJobs():
            runs = job['mask'].getRunAndLumis()
            self.assertEqual(len(runs), 1, "Job has more than one run configured")
            possibleLumiPairs = {1 : [[1,1],[3,7],[2,2],[8,8],[9,14]],
                                 2 : [[1,2],[4,7],[3,3],[8,8]]}
            run = runs.keys()[0]
            for lumiPair in runs[run]:
                self.assertTrue(lumiPair in possibleLumiPairs[run], "Strange lumi pair in the job mask")
示例#26
0
class ParentlessMergeBySizeTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Boiler plate DB setup.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=["WMCore.WMBS"],
                                useDefault=False)

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)
        return

    def tearDown(self):
        """
        _tearDown_

        Clear out WMBS.
        """
        self.testInit.clearDatabase()
        return

    def stuffWMBS(self):
        """
        _stuffWMBS_

        Insert some dummy jobs, jobgroups, filesets, files and subscriptions
        into WMBS to test job creation.  Three completed job groups each
        containing several files are injected.  Another incomplete job group is
        also injected.  Also files are added to the "Mergeable" subscription as
        well as to the output fileset for their jobgroups.
        """
        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="s1", seName="somese.cern.ch")
        locationAction.execute(siteName="s1", seName="somese2.cern.ch")

        changeStateDAO = self.daoFactory(classname="Jobs.ChangeState")

        self.mergeFileset = Fileset(name="mergeFileset")
        self.mergeFileset.create()
        self.bogusFileset = Fileset(name="bogusFileset")
        self.bogusFileset.create()

        mergeWorkflow = Workflow(name="mergeWorkflow",
                                 spec="bunk2",
                                 owner="Steve",
                                 task="Test")
        mergeWorkflow.create()
        markWorkflow = self.daoFactory(
            classname="Workflow.MarkInjectedWorkflows")
        markWorkflow.execute(names=[mergeWorkflow.name], injected=True)

        self.mergeSubscription = Subscription(
            fileset=self.mergeFileset,
            workflow=mergeWorkflow,
            split_algo="ParentlessMergeBySize")
        self.mergeSubscription.create()
        self.bogusSubscription = Subscription(
            fileset=self.bogusFileset,
            workflow=mergeWorkflow,
            split_algo="ParentlessMergeBySize")

        file1 = File(lfn="file1",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["somese.cern.ch"]))
        file1.addRun(Run(1, *[45]))
        file1.create()
        file2 = File(lfn="file2",
                     size=1024,
                     events=1024,
                     first_event=1024,
                     locations=set(["somese.cern.ch"]))
        file2.addRun(Run(1, *[45]))
        file2.create()
        file3 = File(lfn="file3",
                     size=1024,
                     events=1024,
                     first_event=2048,
                     locations=set(["somese.cern.ch"]))
        file3.addRun(Run(1, *[45]))
        file3.create()
        file4 = File(lfn="file4",
                     size=1024,
                     events=1024,
                     first_event=3072,
                     locations=set(["somese.cern.ch"]))
        file4.addRun(Run(1, *[45]))
        file4.create()

        fileA = File(lfn="fileA",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["somese.cern.ch"]))
        fileA.addRun(Run(1, *[46]))
        fileA.create()
        fileB = File(lfn="fileB",
                     size=1024,
                     events=1024,
                     first_event=1024,
                     locations=set(["somese.cern.ch"]))
        fileB.addRun(Run(1, *[46]))
        fileB.create()
        fileC = File(lfn="fileC",
                     size=1024,
                     events=1024,
                     first_event=2048,
                     locations=set(["somese.cern.ch"]))
        fileC.addRun(Run(1, *[46]))
        fileC.create()

        fileI = File(lfn="fileI",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["somese.cern.ch"]))
        fileI.addRun(Run(2, *[46]))
        fileI.create()
        fileII = File(lfn="fileII",
                      size=1024,
                      events=1024,
                      first_event=1024,
                      locations=set(["somese.cern.ch"]))
        fileII.addRun(Run(2, *[46]))
        fileII.create()
        fileIII = File(lfn="fileIII",
                       size=1024,
                       events=102400,
                       first_event=2048,
                       locations=set(["somese.cern.ch"]))
        fileIII.addRun(Run(2, *[46]))
        fileIII.create()
        fileIV = File(lfn="fileIV",
                      size=102400,
                      events=1024,
                      first_event=3072,
                      locations=set(["somese.cern.ch"]))
        fileIV.addRun(Run(2, *[46]))
        fileIV.create()

        for file in [
                file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII,
                fileIII, fileIV
        ]:
            self.mergeFileset.addFile(file)
            self.bogusFileset.addFile(file)

        self.mergeFileset.commit()
        self.bogusFileset.commit()

        return

    def testMinMergeSize1(self):
        """
        _testMinMergeSize1_

        Set the minimum merge size to be 20,000 bytes which is more than the
        sum of all file sizes in the WMBS instance.  Verify that no merge jobs
        will be produced.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=200000,
                            max_merge_size=2000000000,
                            max_merge_events=200000000)

        assert len(result) == 0, \
               "ERROR: No job groups should be returned."

        return

    def testMinMergeSize1(self):
        """
        _testMinMergeSize1_

        Set the minimum merge size to be 20,000 bytes which is more than the
        sum of all file sizes in the WMBS instance.  Verify that no merge jobs
        will be produced.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=200000,
                            max_merge_size=2000000000,
                            max_merge_events=200000000)

        assert len(result) == 0, \
               "ERROR: No job groups should be returned."

        return

    def testMinMergeSize1a(self):
        """
        _testMinMergeSize1a_

        Set the minimum merge size to be 20,000 bytes which is more than the
        sum of all file sizes in the WMBS instance and mark the fileset as
        closed.  Verify that one job containing all files is pushed out.
        """
        self.stuffWMBS()
        self.mergeFileset.markOpen(False)

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=200000,
                            max_merge_size=2000000,
                            max_merge_events=2000000)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned: %s" % len(result)

        assert len(result[0].jobs) == 1, \
               "Error: One job should have been returned: %s" % len(result[0].jobs)

        goldenFiles = [
            "file1", "file2", "file3", "file4", "fileA", "fileB", "fileC",
            "fileI", "fileII", "fileIII", "fileIV"
        ]

        jobFiles = result[0].jobs[0].getFiles()

        currentRun = 0
        currentLumi = 0
        currentEvent = 0
        for file in jobFiles:
            file.loadData()
            assert file["lfn"] in goldenFiles, \
                   "Error: Unknown file: %s" % file["lfn"]
            self.assertTrue(
                file["locations"] == set(["somese.cern.ch",
                                          "somese2.cern.ch"]),
                "Error: File is missing a location.")
            goldenFiles.remove(file["lfn"])

            fileRun = list(file["runs"])[0].run
            fileLumi = min(list(file["runs"])[0])
            fileEvent = file["first_event"]

            if currentRun == 0:
                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent
                continue

            assert fileRun >= currentRun, \
                   "ERROR: Files not sorted by run."

            if fileRun == currentRun:
                assert fileLumi >= currentLumi, \
                       "ERROR: Files not ordered by lumi"

            if fileLumi == currentLumi:
                assert fileEvent >= currentEvent, \
                       "ERROR: Files not ordered by first event"

            currentRun = fileRun
            currentLumi = fileLumi
            currentEvent = fileEvent

        return

    def testMaxMergeSize(self):
        """
        _testMaxMergeSize_

        Set the maximum merge size to be 100000 bytes.  Verify that two merge
        jobs are created, one for the one large file and another for the rest of
        the files.  Verify that each merge job contains the expected files and
        that we merge across runs.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=1,
                            max_merge_size=100000,
                            max_merge_events=200000)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned: %s" % result

        assert len(result[0].jobs) == 2, \
               "ERROR: Two jobs should have been returned."

        goldenFilesA = [
            "file1", "file2", "file3", "file4", "fileA", "fileB", "fileC",
            "fileI", "fileII", "fileIII"
        ]
        goldenFilesB = ["fileIV"]

        for job in result[0].jobs:
            jobFiles = job.getFiles()

            if jobFiles[0]["lfn"] in goldenFilesA:
                goldenFiles = goldenFilesA
            elif jobFiles[0]["lfn"] in goldenFilesB:
                goldenFiles = goldenFilesB

            currentRun = 0
            currentLumi = 0
            currentEvent = 0
            for file in jobFiles:
                assert file["lfn"] in goldenFiles, \
                       "Error: Unknown file in merge jobs."
                assert file["locations"] == set(["somese.cern.ch"]), \
                       "Error: File is missing a location."

                goldenFiles.remove(file["lfn"])

            fileRun = list(file["runs"])[0].run
            fileLumi = min(list(file["runs"])[0])
            fileEvent = file["first_event"]

            if currentRun == 0:
                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent
                continue

            assert fileRun >= currentRun, \
                   "ERROR: Files not sorted by run."

            if fileRun == currentRun:
                assert fileLumi >= currentLumi, \
                       "ERROR: Files not ordered by lumi"

                if fileLumi == currentLumi:
                    assert fileEvent >= currentEvent, \
                           "ERROR: Files not ordered by first event"

            currentRun = fileRun
            currentLumi = fileLumi
            currentEvent = fileEvent

        assert len(goldenFilesA) == 0 and len(goldenFilesB) == 0, \
               "ERROR: Files missing from merge jobs."

        return

    def testMaxEvents(self):
        """
        _testMaxEvents_

        Verify the the max_merge_events parameter works and that we correctly
        merge across runs.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=1,
                            max_merge_size=20000000,
                            max_merge_events=100000)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned: %s" % result

        assert len(result[0].jobs) == 2, \
               "ERROR: Two jobs should have been returned: %s" % len(result[0].jobs)

        goldenFilesA = [
            "file1", "file2", "file3", "file4", "fileA", "fileB", "fileC",
            "fileI", "fileII", "fileIV"
        ]
        goldenFilesB = ["fileIII"]

        for job in result[0].jobs:
            jobFiles = job.getFiles()

            if jobFiles[0]["lfn"] in goldenFilesA:
                goldenFiles = goldenFilesA
            elif jobFiles[0]["lfn"] in goldenFilesB:
                goldenFiles = goldenFilesB

            currentRun = 0
            currentLumi = 0
            currentEvent = 0
            for file in jobFiles:
                assert file["lfn"] in goldenFiles, \
                       "Error: Unknown file in merge jobs."
                assert file["locations"] == set(["somese.cern.ch"]), \
                       "Error: File is missing a location: %s" % file["locations"]

                goldenFiles.remove(file["lfn"])

                fileRun = list(file["runs"])[0].run
                fileLumi = min(list(file["runs"])[0])
                fileEvent = file["first_event"]

                if currentRun == 0:
                    currentRun = fileRun
                    currentLumi = fileLumi
                    currentEvent = fileEvent
                    continue

                assert fileRun >= currentRun, \
                       "ERROR: Files not sorted by run: %s, %s" % (fileRun, currentRun)

                if fileRun == currentRun:
                    assert fileLumi >= currentLumi, \
                           "ERROR: Files not ordered by lumi"

                    if fileLumi == currentLumi:
                        assert fileEvent >= currentEvent, \
                               "ERROR: Files not ordered by first event"

                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent

        assert len(goldenFilesA) == 0 and len(goldenFilesB) == 0 and \
               "ERROR: Files missing from merge jobs."

        return

    def testMinMergeSize1aNoRunMerge(self):
        """
        _testMinMergeSize1aNoRunMerge_

        Set the minimum merge size to be 20,000 bytes which is more than the
        sum of all file sizes in the WMBS instance and mark the fileset as
        closed.  Verify that two jobs are pushed out and that we don't merge
        accross run boundaries.
        """
        self.stuffWMBS()
        self.mergeFileset.markOpen(False)

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=200000,
                            max_merge_size=2000000,
                            max_merge_events=2000000,
                            merge_across_runs=False)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned: %s" % len(result)

        assert len(result[0].jobs) == 2, \
               "Error: Two jobs should have been returned: %s" % len(result[0].jobs)

        goldenFilesA = [
            "file1", "file2", "file3", "file4", "fileA", "fileB", "fileC"
        ]
        goldenFilesB = ["fileI", "fileII", "fileIII", "fileIV"]
        goldenFilesA.sort()
        goldenFilesB.sort()

        for job in result[0].jobs:
            currentRun = 0
            currentLumi = 0
            currentEvent = 0
            jobLFNs = []

            for file in job.getFiles():
                file.loadData()
                jobLFNs.append(file["lfn"])
                self.assertTrue(
                    file["locations"] == set(
                        ["somese.cern.ch", "somese2.cern.ch"]),
                    "Error: File is missing a location.")

                fileRun = list(file["runs"])[0].run
                fileLumi = min(list(file["runs"])[0])
                fileEvent = file["first_event"]

                if currentRun == 0:
                    currentRun = fileRun
                    currentLumi = fileLumi
                    currentEvent = fileEvent
                    continue

                assert fileRun >= currentRun, \
                       "ERROR: Files not sorted by run."

                if fileRun == currentRun:
                    assert fileLumi >= currentLumi, \
                           "ERROR: Files not ordered by lumi"

                if fileLumi == currentLumi:
                    assert fileEvent >= currentEvent, \
                           "ERROR: Files not ordered by first event"

                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent

            jobLFNs.sort()
            if jobLFNs == goldenFilesA:
                goldenFilesA = []
            else:
                self.assertEqual(jobLFNs, goldenFilesB,
                                 "Error: LFNs do not match.")
                goldenFilesB = []

        return

    def testMaxMergeSizeNoRunMerge(self):
        """
        _testMaxMergeSizeNoRunMerge_

        Set the maximum merge size to be 100000 bytes.  Verify that two merge
        jobs are created, one for the one large file and another for the rest of
        the files.  Verify that each merge job contains the expected files and
        that we don't merge across run boundaries.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=1,
                            max_merge_size=100000,
                            max_merge_events=200000,
                            merge_across_runs=False)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned: %s" % result

        assert len(result[0].jobs) == 3, \
               "ERROR: Three jobs should have been returned."

        goldenFilesA = [
            "file1", "file2", "file3", "file4", "fileA", "fileB", "fileC"
        ]
        goldenFilesB = ["fileI", "fileII", "fileIII"]
        goldenFilesC = ["fileIV"]

        for job in result[0].jobs:
            jobFiles = job.getFiles()

            if jobFiles[0]["lfn"] in goldenFilesA:
                goldenFiles = goldenFilesA
            elif jobFiles[0]["lfn"] in goldenFilesB:
                goldenFiles = goldenFilesB
            else:
                goldenFiles = goldenFilesC

            currentRun = 0
            currentLumi = 0
            currentEvent = 0
            for file in jobFiles:
                self.assertTrue(file["lfn"] in goldenFiles,
                                "Error: Unknown file in merge jobs.")
                self.assertTrue(file["locations"] == set(["somese.cern.ch"]),
                                "Error: File is missing a location.")

                goldenFiles.remove(file["lfn"])

            fileRun = list(file["runs"])[0].run
            fileLumi = min(list(file["runs"])[0])
            fileEvent = file["first_event"]

            if currentRun == 0:
                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent
                continue

            self.assertTrue(fileRun >= currentRun,
                            "ERROR: Files not sorted by run.")
            if fileRun == currentRun:
                self.assertTrue(fileLumi >= currentLumi,
                                "ERROR: Files not ordered by lumi")
                if fileLumi == currentLumi:
                    self.assertTrue(fileEvent >= currentEvent,
                                    "ERROR: Files not ordered by first event")

            currentRun = fileRun
            currentLumi = fileLumi
            currentEvent = fileEvent

        self.assertTrue(
            len(goldenFilesA) == 0 and len(goldenFilesB) == 0,
            "ERROR: Files missing from merge jobs.")

        return

    def testMaxEventsNoRunMerge(self):
        """
        _testMaxEventsNoRunMerge_

        Verify that the max events merge parameter works correctly and that we
        don't merge accross run boundaries.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=1,
                            max_merge_size=20000000,
                            max_merge_events=100000,
                            merge_across_runs=False)

        self.assertTrue(
            len(result) == 1,
            "ERROR: More than one JobGroup returned: %s" % result)

        self.assertTrue(
            len(result[0].jobs) == 3,
            "ERROR: Three jobs should have been returned: %s" %
            len(result[0].jobs))

        goldenFilesA = [
            "file1",
            "file2",
            "file3",
            "file4",
            "fileA",
            "fileB",
            "fileC",
        ]
        goldenFilesB = ["fileI", "fileII", "fileIV"]
        goldenFilesC = ["fileIII"]

        for job in result[0].jobs:
            jobFiles = job.getFiles()

            if jobFiles[0]["lfn"] in goldenFilesA:
                goldenFiles = goldenFilesA
            elif jobFiles[0]["lfn"] in goldenFilesB:
                goldenFiles = goldenFilesB
            else:
                goldenFiles = goldenFilesC

            currentRun = 0
            currentLumi = 0
            currentEvent = 0
            for file in jobFiles:
                self.assertTrue(file["lfn"] in goldenFiles,
                                "Error: Unknown file in merge jobs.")
                self.assertTrue(
                    file["locations"] == set(["somese.cern.ch"]),
                    "Error: File is missing a location: %s" %
                    file["locations"])

                goldenFiles.remove(file["lfn"])

                fileRun = list(file["runs"])[0].run
                fileLumi = min(list(file["runs"])[0])
                fileEvent = file["first_event"]

                if currentRun == 0:
                    currentRun = fileRun
                    currentLumi = fileLumi
                    currentEvent = fileEvent
                    continue

                self.assertTrue(
                    fileRun >= currentRun,
                    "ERROR: Files not sorted by run: %s, %s" %
                    (fileRun, currentRun))
                if fileRun == currentRun:
                    self.assertTrue(fileLumi >= currentLumi,
                                    "ERROR: Files not ordered by lumi")
                    if fileLumi == currentLumi:
                        self.assertTrue(
                            fileEvent >= currentEvent,
                            "ERROR: Files not ordered by first event")

                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent

        self.assertTrue(
            len(goldenFilesA) == 0 and len(goldenFilesB) == 0
            and len(goldenFilesC) == 0,
            "ERROR: Files missing from merge jobs.")

        return

    def testLocationMerging(self):
        """
        _testLocationMerging_

        Verify that files residing on different SEs are not merged together in
        the same job.
        """
        self.stuffWMBS()

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="s2", seName="somese2.cern.ch")

        fileSite2 = File(lfn="fileSite2",
                         size=4098,
                         events=1024,
                         first_event=0,
                         locations=set(["somese2.cern.ch"]))
        fileSite2.addRun(Run(1, *[46]))
        fileSite2.create()

        self.mergeFileset.addFile(fileSite2)
        self.mergeFileset.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=4097,
                            max_merge_size=99999999,
                            max_merge_events=999999999,
                            merge_across_runs=False)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned."

        assert len(result[0].jobs) == 3, \
               "ERROR: Three jobs should have been returned."

        for job in result[0].jobs:
            firstInputFile = job.getFiles()[0]
            baseLocation = list(firstInputFile["locations"])[0]

            for inputFile in job.getFiles():
                assert len(inputFile["locations"]) == 1, \
                       "Error: Wrong number of locations"

                assert list(inputFile["locations"])[0] == baseLocation, \
                       "Error: Wrong location."

        return

    def testMaxWaitTime(self):
        """
        _testMaxWaitTime_

        Set the max wait times to be negative - this should force all files to merge
        out immediately

        Using the first setup as the first merge test which should normally produce
        no jobGroups
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=200000,
                            max_merge_size=2000000000,
                            max_merge_events=200000000,
                            max_wait_time=-10)

        # Everything should be in one, small jobGroup
        self.assertEqual(len(result), 1)
        self.assertEqual(len(result[0].jobs), 1)
        job = result[0].jobs[0]
        # All files should be in one job
        self.assertEqual(len(job.getFiles()), 11)

        return

    def testDifferentSubscritionIDs(self):
        """
        _testDifferentSubscriptionIDs_

        Make sure that the merge splitting still runs if the subscription ID
        is not equal to the workflow ID.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()
        dummyWorkflow = Workflow(name="dummyWorkflow",
                                 spec="bunk49",
                                 owner="Steve",
                                 task="Test2")
        dummyWorkflow.create()
        dummyFileset = Fileset(name="dummyFileset")
        dummyFileset.create()
        dummySubscription1 = Subscription(fileset=dummyFileset,
                                          workflow=dummyWorkflow,
                                          split_algo="ParentlessMergeBySize")
        dummySubscription2 = Subscription(fileset=dummyFileset,
                                          workflow=dummyWorkflow,
                                          split_algo="ParentlessMergeBySize")
        dummySubscription1.create()
        dummySubscription2.create()
        myThread.transaction.commit()

        self.stuffWMBS()
        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)
        result = jobFactory(min_merge_size=4097,
                            max_merge_size=99999999,
                            max_merge_events=999999999,
                            merge_across_runs=False)
        self.assertEqual(len(result), 1)
        jobGroup = result[0]
        self.assertEqual(len(jobGroup.jobs), 2)
        return
示例#27
0
class ParentlessMergeBySizeTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Boiler plate DB setup.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = ["WMCore.WMBS"],
                                useDefault = False)

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package = "WMCore.WMBS",
                                     logger = myThread.logger,
                                     dbinterface = myThread.dbi)
        return

    def tearDown(self):
        """
        _tearDown_

        Clear out WMBS.
        """
        self.testInit.clearDatabase()
        return

    def stuffWMBS(self):
        """
        _stuffWMBS_

        Insert some dummy jobs, jobgroups, filesets, files and subscriptions
        into WMBS to test job creation.  Three completed job groups each
        containing several files are injected.  Another incomplete job group is
        also injected.  Also files are added to the "Mergeable" subscription as
        well as to the output fileset for their jobgroups.
        """
        locationAction = self.daoFactory(classname = "Locations.New")
        locationAction.execute(siteName = "s1", seName = "somese.cern.ch")

        changeStateDAO = self.daoFactory(classname = "Jobs.ChangeState")

        self.mergeFileset = Fileset(name = "mergeFileset")
        self.mergeFileset.create()
        self.bogusFileset = Fileset(name = "bogusFileset")
        self.bogusFileset.create()        

        mergeWorkflow = Workflow(name = "mergeWorkflow", spec = "bunk2",
                                 owner = "Steve", task="Test")
        mergeWorkflow.create()
        markWorkflow = self.daoFactory(classname = "Workflow.MarkInjectedWorkflows")
        markWorkflow.execute(names = [mergeWorkflow.name], injected = True)
        
        self.mergeSubscription = Subscription(fileset = self.mergeFileset,
                                              workflow = mergeWorkflow,
                                              split_algo = "ParentlessMergeBySize")
        self.mergeSubscription.create()
        self.bogusSubscription = Subscription(fileset = self.bogusFileset,
                                              workflow = mergeWorkflow,
                                              split_algo = "ParentlessMergeBySize")

        file1 = File(lfn = "file1", size = 1024, events = 1024, first_event = 0,
                     locations = set(["somese.cern.ch"]))
        file1.addRun(Run(1, *[45]))
        file1.create()
        file2 = File(lfn = "file2", size = 1024, events = 1024,
                     first_event = 1024, locations = set(["somese.cern.ch"]))
        file2.addRun(Run(1, *[45]))
        file2.create()
        file3 = File(lfn = "file3", size = 1024, events = 1024,
                     first_event = 2048, locations = set(["somese.cern.ch"]))
        file3.addRun(Run(1, *[45]))
        file3.create()
        file4 = File(lfn = "file4", size = 1024, events = 1024,
                     first_event = 3072, locations = set(["somese.cern.ch"]))
        file4.addRun(Run(1, *[45]))
        file4.create()

        fileA = File(lfn = "fileA", size = 1024, events = 1024,
                     first_event = 0, locations = set(["somese.cern.ch"]))
        fileA.addRun(Run(1, *[46]))
        fileA.create()
        fileB = File(lfn = "fileB", size = 1024, events = 1024,
                     first_event = 1024, locations = set(["somese.cern.ch"]))
        fileB.addRun(Run(1, *[46]))
        fileB.create()
        fileC = File(lfn = "fileC", size = 1024, events = 1024,
                     first_event = 2048, locations = set(["somese.cern.ch"]))
        fileC.addRun(Run(1, *[46]))
        fileC.create()
        
        fileI = File(lfn = "fileI", size = 1024, events = 1024,
                     first_event = 0, locations = set(["somese.cern.ch"]))
        fileI.addRun(Run(2, *[46]))
        fileI.create()
        fileII = File(lfn = "fileII", size = 1024, events = 1024,
                      first_event = 1024, locations = set(["somese.cern.ch"]))
        fileII.addRun(Run(2, *[46]))
        fileII.create()
        fileIII = File(lfn = "fileIII", size = 1024, events = 102400,
                       first_event = 2048, locations = set(["somese.cern.ch"]))
        fileIII.addRun(Run(2, *[46]))
        fileIII.create()
        fileIV = File(lfn = "fileIV", size = 102400, events = 1024,
                      first_event = 3072, locations = set(["somese.cern.ch"]))
        fileIV.addRun(Run(2, *[46]))
        fileIV.create()

        for file in [file1, file2, file3, file4, fileA, fileB, fileC, fileI,
                     fileII, fileIII, fileIV]:
            self.mergeFileset.addFile(file)
            self.bogusFileset.addFile(file)

        self.mergeFileset.commit()
        self.bogusFileset.commit()

        return

    def testMinMergeSize1(self):
        """
        _testMinMergeSize1_

        Set the minimum merge size to be 20,000 bytes which is more than the
        sum of all file sizes in the WMBS instance.  Verify that no merge jobs
        will be produced.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.mergeSubscription)

        result = jobFactory(min_merge_size = 200000, max_merge_size = 2000000000,
                            max_merge_events = 200000000)

        assert len(result) == 0, \
               "ERROR: No job groups should be returned."

        return

    def testMinMergeSize1(self):
        """
        _testMinMergeSize1_

        Set the minimum merge size to be 20,000 bytes which is more than the
        sum of all file sizes in the WMBS instance.  Verify that no merge jobs
        will be produced.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.mergeSubscription)

        result = jobFactory(min_merge_size = 200000, max_merge_size = 2000000000,
                            max_merge_events = 200000000)

        assert len(result) == 0, \
               "ERROR: No job groups should be returned."

        return

    def testMinMergeSize1a(self):
        """
        _testMinMergeSize1a_

        Set the minimum merge size to be 20,000 bytes which is more than the
        sum of all file sizes in the WMBS instance and mark the fileset as
        closed.  Verify that one job containing all files is pushed out.
        """
        self.stuffWMBS()
        self.mergeFileset.markOpen(False)

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.mergeSubscription)

        result = jobFactory(min_merge_size = 200000, max_merge_size = 2000000,
                            max_merge_events = 2000000)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned: %s" % len(result)

        assert len(result[0].jobs) == 1, \
               "Error: One job should have been returned: %s" % len(result[0].jobs)
        
        goldenFiles = ["file1", "file2", "file3", "file4", "fileA", "fileB",
                      "fileC", "fileI", "fileII", "fileIII", "fileIV"]

        jobFiles = result[0].jobs[0].getFiles()

        currentRun = 0
        currentLumi = 0
        currentEvent = 0
        for file in jobFiles:
            file.loadData()
            assert file["lfn"] in goldenFiles, \
                   "Error: Unknown file: %s" % file["lfn"]
            assert file["locations"] == set(["somese.cern.ch"]), \
                   "Error: File is missing a location."
            goldenFiles.remove(file["lfn"])

            fileRun = list(file["runs"])[0].run
            fileLumi = min(list(file["runs"])[0])
            fileEvent = file["first_event"]

            if currentRun == 0:
                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent
                continue

            assert fileRun >= currentRun, \
                   "ERROR: Files not sorted by run."

            if fileRun == currentRun:
                assert fileLumi >= currentLumi, \
                       "ERROR: Files not ordered by lumi"

            if fileLumi == currentLumi:
                assert fileEvent >= currentEvent, \
                       "ERROR: Files not ordered by first event"

            currentRun = fileRun
            currentLumi = fileLumi
            currentEvent = fileEvent

        return    

    def testMaxMergeSize(self):
        """
        _testMaxMergeSize_

        Set the maximum merge size to be 100000 bytes.  Verify that two merge
        jobs are created, one for the one large file and another for the rest of
        the files.  Verify that each merge job contains the expected files and
        that we merge across runs.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.mergeSubscription)

        result = jobFactory(min_merge_size = 1, max_merge_size = 100000,
                            max_merge_events = 200000)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned: %s" % result

        assert len(result[0].jobs) == 2, \
               "ERROR: Two jobs should have been returned."

        goldenFilesA = ["file1", "file2", "file3", "file4", "fileA", "fileB",
                        "fileC", "fileI", "fileII", "fileIII"]
        goldenFilesB = ["fileIV"]

        for job in result[0].jobs:
            jobFiles = job.getFiles()
            
            if jobFiles[0]["lfn"] in goldenFilesA:
                goldenFiles = goldenFilesA
            elif jobFiles[0]["lfn"] in goldenFilesB:
                goldenFiles = goldenFilesB

            currentRun = 0
            currentLumi = 0
            currentEvent = 0
            for file in jobFiles:
                assert file["lfn"] in goldenFiles, \
                       "Error: Unknown file in merge jobs."
                assert file["locations"] == set(["somese.cern.ch"]), \
                       "Error: File is missing a location."

                goldenFiles.remove(file["lfn"])

            fileRun = list(file["runs"])[0].run
            fileLumi = min(list(file["runs"])[0])
            fileEvent = file["first_event"]

            if currentRun == 0:
                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent
                continue

            assert fileRun >= currentRun, \
                   "ERROR: Files not sorted by run."

            if fileRun == currentRun:
                assert fileLumi >= currentLumi, \
                       "ERROR: Files not ordered by lumi"

                if fileLumi == currentLumi:
                    assert fileEvent >= currentEvent, \
                           "ERROR: Files not ordered by first event"

            currentRun = fileRun
            currentLumi = fileLumi
            currentEvent = fileEvent

        assert len(goldenFilesA) == 0 and len(goldenFilesB) == 0, \
               "ERROR: Files missing from merge jobs."

        return

    def testMaxEvents(self):
        """
        _testMaxEvents_

        Verify the the max_merge_events parameter works and that we correctly
        merge across runs.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.mergeSubscription)

        result = jobFactory(min_merge_size = 1, max_merge_size = 20000000,
                            max_merge_events = 100000)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned: %s" % result

        assert len(result[0].jobs) == 2, \
               "ERROR: Two jobs should have been returned: %s" % len(result[0].jobs)

        goldenFilesA = ["file1", "file2", "file3", "file4", "fileA", "fileB",
                        "fileC", "fileI", "fileII", "fileIV"]
        goldenFilesB = ["fileIII"]

        for job in result[0].jobs:
            jobFiles = job.getFiles()
            
            if jobFiles[0]["lfn"] in goldenFilesA:
                goldenFiles = goldenFilesA
            elif jobFiles[0]["lfn"] in goldenFilesB:
                goldenFiles = goldenFilesB

            currentRun = 0
            currentLumi = 0
            currentEvent = 0
            for file in jobFiles:
                assert file["lfn"] in goldenFiles, \
                       "Error: Unknown file in merge jobs."
                assert file["locations"] == set(["somese.cern.ch"]), \
                       "Error: File is missing a location: %s" % file["locations"]

                goldenFiles.remove(file["lfn"])

                fileRun = list(file["runs"])[0].run
                fileLumi = min(list(file["runs"])[0])
                fileEvent = file["first_event"]

                if currentRun == 0:
                    currentRun = fileRun
                    currentLumi = fileLumi
                    currentEvent = fileEvent
                    continue

                assert fileRun >= currentRun, \
                       "ERROR: Files not sorted by run: %s, %s" % (fileRun, currentRun)

                if fileRun == currentRun:
                    assert fileLumi >= currentLumi, \
                           "ERROR: Files not ordered by lumi"

                    if fileLumi == currentLumi:
                        assert fileEvent >= currentEvent, \
                               "ERROR: Files not ordered by first event"

                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent

        assert len(goldenFilesA) == 0 and len(goldenFilesB) == 0 and \
               "ERROR: Files missing from merge jobs."

        return

    def testMinMergeSize1aNoRunMerge(self):
        """
        _testMinMergeSize1aNoRunMerge_

        Set the minimum merge size to be 20,000 bytes which is more than the
        sum of all file sizes in the WMBS instance and mark the fileset as
        closed.  Verify that two jobs are pushed out and that we don't merge
        accross run boundaries.
        """
        self.stuffWMBS()
        self.mergeFileset.markOpen(False)

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.mergeSubscription)

        result = jobFactory(min_merge_size = 200000, max_merge_size = 2000000,
                            max_merge_events = 2000000, merge_across_runs = False)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned: %s" % len(result)

        assert len(result[0].jobs) == 2, \
               "Error: Two jobs should have been returned: %s" % len(result[0].jobs)
        
        goldenFilesA = ["file1", "file2", "file3", "file4", "fileA", "fileB",
                        "fileC"]
        goldenFilesB = ["fileI", "fileII", "fileIII", "fileIV"]
        goldenFilesA.sort()
        goldenFilesB.sort()

        for job in result[0].jobs:
            currentRun = 0
            currentLumi = 0
            currentEvent = 0
            jobLFNs = []

            for file in job.getFiles():
                file.loadData()
                jobLFNs.append(file["lfn"])
                assert file["locations"] == set(["somese.cern.ch"]), \
                       "Error: File is missing a location."

                fileRun = list(file["runs"])[0].run
                fileLumi = min(list(file["runs"])[0])
                fileEvent = file["first_event"]

                if currentRun == 0:
                    currentRun = fileRun
                    currentLumi = fileLumi
                    currentEvent = fileEvent
                    continue

                assert fileRun >= currentRun, \
                       "ERROR: Files not sorted by run."

                if fileRun == currentRun:
                    assert fileLumi >= currentLumi, \
                           "ERROR: Files not ordered by lumi"

                if fileLumi == currentLumi:
                    assert fileEvent >= currentEvent, \
                           "ERROR: Files not ordered by first event"

                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent

            jobLFNs.sort()
            if jobLFNs == goldenFilesA:
                goldenFilesA = []
            else:
                self.assertEqual(jobLFNs, goldenFilesB,
                                 "Error: LFNs do not match.")
                goldenFilesB = []
                
        return    

    def testMaxMergeSizeNoRunMerge(self):
        """
        _testMaxMergeSizeNoRunMerge_

        Set the maximum merge size to be 100000 bytes.  Verify that two merge
        jobs are created, one for the one large file and another for the rest of
        the files.  Verify that each merge job contains the expected files and
        that we don't merge across run boundaries.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.mergeSubscription)

        result = jobFactory(min_merge_size = 1, max_merge_size = 100000,
                            max_merge_events = 200000, merge_across_runs = False)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned: %s" % result

        assert len(result[0].jobs) == 3, \
               "ERROR: Three jobs should have been returned."

        goldenFilesA = ["file1", "file2", "file3", "file4", "fileA", "fileB",
                        "fileC"]
        goldenFilesB = ["fileI", "fileII", "fileIII"]
        goldenFilesC = ["fileIV"]

        for job in result[0].jobs:
            jobFiles = job.getFiles()
            
            if jobFiles[0]["lfn"] in goldenFilesA:
                goldenFiles = goldenFilesA
            elif jobFiles[0]["lfn"] in goldenFilesB:
                goldenFiles = goldenFilesB
            else:
                goldenFiles = goldenFilesC

            currentRun = 0
            currentLumi = 0
            currentEvent = 0
            for file in jobFiles:
                self.assertTrue(file["lfn"] in goldenFiles, 
                                "Error: Unknown file in merge jobs.")
                self.assertTrue(file["locations"] == set(["somese.cern.ch"]),
                                "Error: File is missing a location.")

                goldenFiles.remove(file["lfn"])

            fileRun = list(file["runs"])[0].run
            fileLumi = min(list(file["runs"])[0])
            fileEvent = file["first_event"]

            if currentRun == 0:
                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent
                continue

            self.assertTrue(fileRun >= currentRun,
                            "ERROR: Files not sorted by run.")
            if fileRun == currentRun:
                self.assertTrue(fileLumi >= currentLumi,
                                "ERROR: Files not ordered by lumi")
                if fileLumi == currentLumi:
                    self.assertTrue(fileEvent >= currentEvent,
                                    "ERROR: Files not ordered by first event")

            currentRun = fileRun
            currentLumi = fileLumi
            currentEvent = fileEvent

        self.assertTrue(len(goldenFilesA) == 0 and len(goldenFilesB) == 0,
                        "ERROR: Files missing from merge jobs.")

        return

    def testMaxEventsNoRunMerge(self):
        """
        _testMaxEventsNoRunMerge_

        Verify that the max events merge parameter works correctly and that we
        don't merge accross run boundaries.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.mergeSubscription)

        result = jobFactory(min_merge_size = 1, max_merge_size = 20000000,
                            max_merge_events = 100000, merge_across_runs = False)

        self.assertTrue(len(result) == 1,
                        "ERROR: More than one JobGroup returned: %s" % result)

        self.assertTrue(len(result[0].jobs) == 3,
                        "ERROR: Three jobs should have been returned: %s" % len(result[0].jobs))

        goldenFilesA = ["file1", "file2", "file3", "file4", "fileA", "fileB",
                        "fileC",]
        goldenFilesB = ["fileI", "fileII", "fileIV"]
        goldenFilesC = ["fileIII"]

        for job in result[0].jobs:
            jobFiles = job.getFiles()
            
            if jobFiles[0]["lfn"] in goldenFilesA:
                goldenFiles = goldenFilesA
            elif jobFiles[0]["lfn"] in goldenFilesB:
                goldenFiles = goldenFilesB
            else:
                goldenFiles = goldenFilesC

            currentRun = 0
            currentLumi = 0
            currentEvent = 0
            for file in jobFiles:
                self.assertTrue(file["lfn"] in goldenFiles,
                                "Error: Unknown file in merge jobs.")
                self.assertTrue(file["locations"] == set(["somese.cern.ch"]),
                                "Error: File is missing a location: %s" % file["locations"])

                goldenFiles.remove(file["lfn"])

                fileRun = list(file["runs"])[0].run
                fileLumi = min(list(file["runs"])[0])
                fileEvent = file["first_event"]

                if currentRun == 0:
                    currentRun = fileRun
                    currentLumi = fileLumi
                    currentEvent = fileEvent
                    continue

                self.assertTrue(fileRun >= currentRun,
                                "ERROR: Files not sorted by run: %s, %s" % (fileRun, currentRun))
                if fileRun == currentRun:
                    self.assertTrue(fileLumi >= currentLumi,
                                    "ERROR: Files not ordered by lumi")
                    if fileLumi == currentLumi:
                        self.assertTrue(fileEvent >= currentEvent,
                                        "ERROR: Files not ordered by first event")

                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent

        self.assertTrue(len(goldenFilesA) == 0 and len(goldenFilesB) == 0 and len(goldenFilesC) == 0,
                        "ERROR: Files missing from merge jobs.")

        return

    def testLocationMerging(self):
        """
        _testLocationMerging_

        Verify that files residing on different SEs are not merged together in
        the same job.
        """
        self.stuffWMBS()

        locationAction = self.daoFactory(classname = "Locations.New")
        locationAction.execute(siteName = "s2", seName = "somese2.cern.ch")

        fileSite2 = File(lfn = "fileSite2", size = 4098, events = 1024,
                         first_event = 0, locations = set(["somese2.cern.ch"]))
        fileSite2.addRun(Run(1, *[46]))
        fileSite2.create()

        self.mergeFileset.addFile(fileSite2)
        self.mergeFileset.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.mergeSubscription)

        result = jobFactory(min_merge_size = 4097, max_merge_size = 99999999,
                            max_merge_events = 999999999, merge_across_runs = False)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned."

        assert len(result[0].jobs) == 3, \
               "ERROR: Three jobs should have been returned."

        for job in result[0].jobs:
            firstInputFile = job.getFiles()[0]
            baseLocation = list(firstInputFile["locations"])[0]
            
            for inputFile in job.getFiles():
                assert len(inputFile["locations"]) == 1, \
                       "Error: Wrong number of locations"

                assert list(inputFile["locations"])[0] == baseLocation, \
                       "Error: Wrong location."
                       
        return


    def testMaxWaitTime(self):
        """
        _testMaxWaitTime_

        Set the max wait times to be negative - this should force all files to merge
        out immediately

        Using the first setup as the first merge test which should normally produce
        no jobGroups
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.mergeSubscription)

        result = jobFactory(min_merge_size = 200000, max_merge_size = 2000000000,
                            max_merge_events = 200000000, max_wait_time = -10)

        # Everything should be in one, small jobGroup
        self.assertEqual(len(result), 1)
        self.assertEqual(len(result[0].jobs), 1)
        job = result[0].jobs[0]
        # All files should be in one job
        self.assertEqual(len(job.getFiles()), 11)

        return

    def testDifferentSubscritionIDs(self):
        """
        _testDifferentSubscriptionIDs_

        Make sure that the merge splitting still runs if the subscription ID
        is not equal to the workflow ID.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()
        dummyWorkflow = Workflow(name = "dummyWorkflow", spec = "bunk49",
                                 owner = "Steve", task="Test2")
        dummyWorkflow.create()
        dummyFileset = Fileset(name = "dummyFileset")
        dummyFileset.create()
        dummySubscription1 = Subscription(fileset = dummyFileset,
                                          workflow = dummyWorkflow,
                                          split_algo = "ParentlessMergeBySize")
        dummySubscription2 = Subscription(fileset = dummyFileset,
                                          workflow = dummyWorkflow,
                                          split_algo = "ParentlessMergeBySize")
        dummySubscription1.create()
        dummySubscription2.create()
        myThread.transaction.commit()

        self.stuffWMBS()
        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.mergeSubscription)
        result = jobFactory(min_merge_size = 4097, max_merge_size = 99999999,
                            max_merge_events = 999999999, merge_across_runs = False)
        self.assertEqual(len(result), 1)
        jobGroup = result[0]
        self.assertEqual(len(jobGroup.jobs), 2)
        return
示例#28
0
    def _createSubscriptionsInWMBS(self,
                                   task,
                                   fileset,
                                   alternativeFilesetClose=False):
        """
        __createSubscriptionsInWMBS_

        Create subscriptions in WMBS for all the tasks in the spec.  This
        includes filesets, workflows and the output map for each task.
        """
        # create runtime sandbox for workflow
        self.createSandbox()

        # FIXME: Let workflow put in values if spec is missing them
        workflow = Workflow(
            spec=self.wmSpec.specUrl(),
            owner=self.wmSpec.getOwner()["name"],
            dn=self.wmSpec.getOwner().get("dn", "unknown"),
            group=self.wmSpec.getOwner().get("group", "unknown"),
            owner_vogroup=self.wmSpec.getOwner().get("vogroup", "DEFAULT"),
            owner_vorole=self.wmSpec.getOwner().get("vorole", "DEFAULT"),
            name=self.wmSpec.name(),
            task=task.getPathName(),
            wfType=self.wmSpec.getDashboardActivity(),
            alternativeFilesetClose=alternativeFilesetClose,
            priority=self.wmSpec.priority())
        workflow.create()
        subscription = Subscription(fileset=fileset,
                                    workflow=workflow,
                                    split_algo=task.jobSplittingAlgorithm(),
                                    type=task.getPrimarySubType())
        if subscription.exists():
            subscription.load()
            msg = "Subscription %s already exists for %s (you may ignore file insertion messages below, existing files wont be duplicated)"
            self.logger.info(msg % (subscription['id'], task.getPathName()))
        else:
            subscription.create()
        for site in task.siteWhitelist():
            subscription.addWhiteBlackList([{
                "site_name": site,
                "valid": True
            }])

        for site in task.siteBlacklist():
            subscription.addWhiteBlackList([{
                "site_name": site,
                "valid": False
            }])

        if self.topLevelSubscription is None:
            self.topLevelSubscription = subscription
            logging.info("Top level subscription created: %s",
                         subscription["id"])
        else:
            logging.info("Child subscription created: %s", subscription["id"])

        outputModules = task.getOutputModulesForTask()
        ignoredOutputModules = task.getIgnoredOutputModulesForTask()
        for outputModule in outputModules:
            for outputModuleName in outputModule.listSections_():
                if outputModuleName in ignoredOutputModules:
                    logging.info(
                        "IgnoredOutputModule set for %s, skipping fileset creation.",
                        outputModuleName)
                    continue
                outputFileset = Fileset(
                    self.outputFilesetName(task, outputModuleName))
                outputFileset.create()
                outputFileset.markOpen(True)
                mergedOutputFileset = None

                for childTask in task.childTaskIterator():
                    if childTask.data.input.outputModule == outputModuleName:
                        if childTask.taskType() == "Merge":
                            mergedOutputFileset = Fileset(
                                self.outputFilesetName(childTask, "Merged"))
                            mergedOutputFileset.create()
                            mergedOutputFileset.markOpen(True)

                            primaryDataset = getattr(
                                getattr(outputModule, outputModuleName),
                                "primaryDataset", None)
                            if primaryDataset != None:
                                self.mergeOutputMapping[
                                    mergedOutputFileset.id] = primaryDataset

                        self._createSubscriptionsInWMBS(
                            childTask, outputFileset, alternativeFilesetClose)

                if mergedOutputFileset is None:
                    workflow.addOutput(outputModuleName, outputFileset,
                                       outputFileset)
                else:
                    workflow.addOutput(outputModuleName, outputFileset,
                                       mergedOutputFileset)

        return self.topLevelSubscription
示例#29
0
class PeriodicTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Create a single subscription with one file.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = ["WMCore.WMBS"],
                                useDefault = False)
        
        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package = "WMCore.WMBS",
                                     logger = myThread.logger,
                                     dbinterface = myThread.dbi)
        
        locationAction = self.daoFactory(classname = "Locations.New")
        locationAction.execute(siteName = "site1", seName = "somese.cern.ch")
        locationAction.execute(siteName = "site2", seName = "otherse.cern.ch")
        
        self.testFileset = Fileset(name = "TestFileset1")
        self.testFileset.create()
        
        testWorkflow = Workflow(spec = "spec.xml", owner = "Steve",
                                name = "wf001", task="Test" )
        testWorkflow.create()
        self.testSubscription = Subscription(fileset = self.testFileset,
                                             workflow = testWorkflow,
                                             split_algo = "Periodic",
                                             type = "Processing")
        self.testSubscription.create()
        return
    
    def tearDown(self):
        """
        _tearDown_

        Clear out WMBS.
        """
        self.testInit.clearDatabase()
        return            

    def injectFile(self):
        """
        _injectFile_

        Inject a file into the periodic splitting input fileset.
        """
        testFile = File(lfn = "/this/is/a/lfn%s" % time.time(), size = 1000,
                        events = 100, locations = set(["somese.cern.ch"]))
        testFile.create()
        self.testFileset.addFile(testFile)    
        self.testFileset.commit()

        return

    def verifyFiles(self, wmbsJob):
        """
        _verifyFiles_

        Verify that the input files for the job are the same as the files in the
        input fileset.
        """
        inputFiles = wmbsJob.getFiles()
        filesetFiles = self.testFileset.getFiles()

        for inputFile in inputFiles:
            assert inputFile in filesetFiles, \
                   "ERROR: Unknown file: %s" % inputFile
            filesetFiles.remove(inputFile)

        assert len(filesetFiles) == 0, \
               "ERROR: Not all files included in job."
                
        return
    
    def testPeriodicSplitting(self):
        """
        _testPeriodiciSplitting_

        Manipulate the splitting algorithm to test the corner cases.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.testSubscription)

        # First pass: no jobs exist.  The algorithm should create a job
        # containing all available files.
        self.injectFile()
        jobGroups = jobFactory(job_period = 99999999999)

        assert len(jobGroups) == 1, \
               "ERROR: Wrong number of job groups returned: %s" % len(jobGroups)

        assert len(jobGroups[0].jobs) == 1, \
               "ERROR: Jobgroup has wrong number of jobs: %s" % len(jobGroups[0].jobs)

        wmbsJob = jobGroups[0].jobs.pop()
        self.verifyFiles(wmbsJob)

        # Verify that no jobs are generated as the previously issued job has not
        # completed yet.
        time.sleep(5)
        self.injectFile()
        moreJobGroups = jobFactory(job_period = 1)    

        assert len(moreJobGroups) == 0, \
               "ERROR: No jobgroups should be returned."

        # Complete the job so that the splitting algorithm will generate
        # another job.
        wmbsJob["state"] = "cleanout"
        wmbsJob["oldstate"] = "new"
        wmbsJob["couch_record"] = "somejive"
        wmbsJob["retry_count"] = 0
        changeStateDAO = self.daoFactory(classname = "Jobs.ChangeState")
        changeStateDAO.execute([wmbsJob])

        # Verify that no jobs will be generated if the period has not yet
        # expried.
        self.injectFile()
        moreJobGroups = jobFactory(job_period = 999999999999)

        assert len(moreJobGroups) == 0, \
               "ERROR: No jobgroups should be returned."

        # Verify that a job will be generated if the period has expired.
        time.sleep(5)
        self.injectFile()
        jobGroups = jobFactory(job_period = 1)

        assert len(jobGroups) == 1, \
               "ERROR: Wrong number of job groups returned: %s" % len(jobGroups)

        assert len(jobGroups[0].jobs) == 1, \
               "ERROR: Jobgroup has wrong number of jobs: %s" % len(jobGroups[0].jobs)

        self.verifyFiles(jobGroups[0].jobs.pop())

        # Verify that no jobs will be generated in the case that a periodic job
        # is still running and the fileset has been closed.
        self.testFileset.markOpen(False)
        time.sleep(5)
        self.injectFile()
        jobGroups = jobFactory(job_period = 1)

        assert len(jobGroups) == 0, \
               "ERROR: Wrong number of job groups returned: %s" % len(jobGroups)

        # Complete the outstanding job.
        wmbsJob["state"] = "cleanout"
        wmbsJob["oldstate"] = "new"
        wmbsJob["couch_record"] = "somejive"
        wmbsJob["retry_count"] = 0
        changeStateDAO.execute([wmbsJob])

        # Verify that when the input fileset is closed and all periodic jobs
        # are complete a job will not be generated.
        self.injectFile()
        jobGroups = jobFactory(job_period = 99999999999)

        assert len(jobGroups) == 0, \
               "ERROR: Wrong number of job groups returned: %s" % len(jobGroups)

        # Verify that after the final job is complete no more jobs are generated.
        wmbsJob["state"] = "cleanout"
        wmbsJob["oldstate"] = "new"
        wmbsJob["couch_record"] = "somejive"
        wmbsJob["retry_count"] = 0
        changeStateDAO.execute([wmbsJob])

        time.sleep(5)
        self.injectFile()
        moreJobGroups = jobFactory(job_period = 1)

        assert len(moreJobGroups) == 0, \
               "ERROR: No jobgroups should be returned."

        return
示例#30
0
class HarvestTest(unittest.TestCase):
    """
    _HarvestTest_

    Test for EndOfRun job splitter
    """

    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules = ["WMCore.WMBS"])

        self.splitterFactory = SplitterFactory(package = "WMCore.JobSplitting")

        myThread = threading.currentThread()
        self.myThread = myThread
        daoFactory = DAOFactory(package = "WMCore.WMBS",
                                logger = logging,
                                dbinterface = myThread.dbi)
        self.WMBSFactory = daoFactory

        config = self.getConfig()
        self.changer = ChangeState(config)

        myResourceControl = ResourceControl()
        myResourceControl.insertSite("SomeSite", 10, 20, "SomeSE", "SomeCE")
        myResourceControl.insertSite("SomeSite", 10, 20, "SomeSE2", "SomeCE")
        myResourceControl.insertSite("SomeSite2", 10, 20, "SomeSE3", "SomeCE2")

        self.fileset1 = Fileset(name = "TestFileset1")
        for file in range(11):
            newFile = File("/some/file/name%d" % file, size = 1000, events = 100)
            newFile.addRun(Run(1,*[1]))
            newFile.setLocation('SomeSE')
            self.fileset1.addFile(newFile)

        self.fileset1.create()

        workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test")
        workflow1.create()

        self.subscription1  = Subscription(fileset = self.fileset1,
                                           workflow = workflow1,
                                           split_algo = "Harvest",
                                           type = "Harvesting")

        self.subscription1.create()
        self.configFile = EmulatorSetup.setupWMAgentConfig()

        return

    def tearDown(self):
        """
        _tearDown_

        """
        self.testInit.clearDatabase()
        EmulatorSetup.deleteConfig(self.configFile)

        return

    def getConfig(self):
        """
        _getConfig_

        """
        config = self.testInit.getConfiguration()
        self.testInit.generateWorkDir(config)

        config.section_("CoreDatabase")
        config.CoreDatabase.connectUrl = os.getenv("DATABASE")
        config.CoreDatabase.socket     = os.getenv("DBSOCK")

        # JobStateMachine
        config.component_('JobStateMachine')
        config.JobStateMachine.couchurl        = os.getenv('COUCHURL', None)
        config.JobStateMachine.couchDBName     = 'wmagent_jobdump'

        return config

    def finishJobs(self, jobGroups, subscription = None):
        """
        _finishJobs_

        """
        if not subscription:
            subscription = self.subscription1
        for f in subscription.acquiredFiles():
            subscription.completeFiles(f)

        for jobGroup in jobGroups:
            self.changer.propagate(jobGroup.jobs, 'executing', 'created')
            self.changer.propagate(jobGroup.jobs, 'complete', 'executing')
            self.changer.propagate(jobGroup.jobs, 'success', 'complete')
            self.changer.propagate(jobGroup.jobs, 'cleanout', 'success')

        return

    def testHarvestEndOfRunTrigger(self):
        """
        _testDQMHarvestEndOfRunTrigger_

        Make sure that the basic splitting algo works, which is only, ExpressMerge is ALL done, fire a job against that fileset

        """
        self.assertEqual(self.fileset1.open, True, "Fileset is closed. Shouldn't")

        jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1)

        jobGroups = jobFactory()

        self.assertEqual(len(jobGroups), 0 , "We got 1 or more jobGroups with an open fileset and no periodic configuration")

        self.fileset1.markOpen(False)
        self.assertEqual(self.fileset1.open, False, "Fileset is opened, why?")

        # We should also check if there are aqcuired files, if there are, there are jobs,
        # we don't want to fire another jobs while previous are running (output is integrating whatever  input)
        # TODO : The above one we can do when all is done. Not priority

        jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1)
        jobGroups = jobFactory()

        self.assertEqual(len(jobGroups), 1 , "Harvest jobsplitter didn't create a single jobGroup after the fileset was closed")

        return

    def testPeriodicTrigger(self):
        """
        _testPeriodicTrigger_

        """
        self.assertEqual(self.fileset1.open, True, "Fileset is not open, not testing periodic here")
        # Test timeout (5s for this first test)
        # there should be no acquired files, if there are, shouldn't be a job
        #self.subscription1.acquireFiles(self.subscription1.availableFiles().pop())

        jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval = 3)

        self.assertEqual(len(jobGroups), 1 , "Didn't created the first periodic job when there were acquired files")

        # For the whole thing to work, faking the first job finishing, and putting the files as complete
        self.finishJobs(jobGroups)

        # Adding more of files, so we have new stuff to process
        for file in range(12,24):
            newFile = File("/some/file/name%d" % file, size = 1000, events = 100)
            newFile.addRun(Run(1,*[1]))
            newFile.setLocation('SomeSE')
            self.fileset1.addFile(newFile)
        self.fileset1.commit()

        # Testing that it doesn't create a job unless the delay is past
        jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval = 2)

        self.assertEqual(len(jobGroups), 0 , "Created one or more job, when there were non-acquired file and the period is not passed by")

        time.sleep(2)

        jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval = 2)

        self.assertEqual(len(jobGroups), 1 , "Didn't created one or more job, and there weren't and the period is passed by")

        # Finishing out previous jobs
        self.finishJobs(jobGroups)

        # Adding more of files, so we have new stuff to process
        for file in range(26,36):
            newFile = File("/some/file/name%d" % file, size = 1000, events = 100)
            newFile.addRun(Run(1,*[1]))
            newFile.setLocation('SomeSE')
            self.fileset1.addFile(newFile)
        self.fileset1.commit()

        # Trying to create another job just afterwards, it shouldn't, because it should respect the configured delay
        jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval = 2)

        self.assertEqual(len(jobGroups), 0 , "Created one or more job, there are new files, but the delay is not past")

        time.sleep(2)

        jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval = 2)

        self.assertEqual(len(jobGroups), 1 , "Didn't created one or more job, there are new files and the delay is past")

        # Last check is whether the job gets all the files or not

        numFilesJob = jobGroups[0].jobs[0].getFiles()
        numFilesFileset = self.fileset1.getFiles()
        self.assertEqual(numFilesJob, numFilesFileset, "Job didn't got all the files")

        # Finishing out previous jobs
        self.finishJobs(jobGroups)

        # Adding files for the first location
        for file in range(38,48):
            newFile = File("/some/file/name%d" % file, size = 1000, events = 100)
            newFile.addRun(Run(1,*[1]))
            newFile.setLocation('SomeSE')
            self.fileset1.addFile(newFile)
        self.fileset1.commit()
        # Then another location
        for file in range(50,56):
            newFile = File("/some/file/name%d" % file, size = 1000, events = 100)
            newFile.addRun(Run(1,*[1]))
            newFile.setLocation('SomeSE3')
            self.fileset1.addFile(newFile)
        self.fileset1.commit()

        # We should have jobs in both locations
        time.sleep(2)

        jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval = 2)

        self.assertEqual(len(jobGroups[0].getJobs()), 2 , "We didn't get 2 jobs for 2 locations")

        firstJobLocation = jobGroups[0].getJobs()[0].getFileLocations()[0]
        secondJobLocation = jobGroups[0].getJobs()[1].getFileLocations()[0]

        self.assertEqual(firstJobLocation, 'SomeSite', "First job location is not SomeSite")
        self.assertEqual(secondJobLocation, 'SomeSite2', "Second job location is not SomeSite2")

        self.finishJobs(jobGroups)

        for file in range(60,65):
            newFile = File("/some/file/name%d" % file, size = 1000, events = 100)
            newFile.addRun(Run(2,*[2]))
            newFile.setLocation('SomeSE3')
            self.fileset1.addFile(newFile)
        self.fileset1.commit()

        for file in range(70,75):
            newFile = File("/some/file/name%d" % file, size = 1000, events = 100)
            newFile.addRun(Run(3,*[3]))
            newFile.setLocation('SomeSE3')
            self.fileset1.addFile(newFile)
        self.fileset1.commit()

        time.sleep(2)

        jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval = 2)

        # This is one of the most "complicated" tests so worth to comment, 4 jobs should be created
        # 1 - all previous files from SomeSE and run = 1 (a lot, like ~45)
        # 2 - Few files from SomeSE3, Run = 1
        # 3 - Few files from SomeSE3, Run = 2
        # 4 - Few files from SomeSE3, Run = 3
        self.assertEqual(len(jobGroups[0].getJobs()), 4 , "We didn't get 4 jobs for adding 2 different runs to SomeSE3")

        return

    def testMultipleRunHarvesting(self):
        """
        _testMultipleRunHarvesting_

        Add some files with multiple runs in each, make sure the jobs
        are created by location and run. Verify each job mask afterwards.
        Note that in this test run are splitted between sites,
        in real life that MUST NOT happen we still don't support that.
        """
        multipleFilesFileset = Fileset(name = "TestFileset")

        newFile = File("/some/file/test1", size = 1000, events = 100)
        newFile.addRun(Run(1,*[1,3,4,5,6,7]))
        newFile.addRun(Run(2,*[1,2,4,5,6,7]))
        newFile.setLocation('SomeSE')
        multipleFilesFileset.addFile(newFile)
        newFile = File("/some/file/test2", size = 1000, events = 100)
        newFile.addRun(Run(1,*[2,8]))
        newFile.addRun(Run(2,*[3,8]))
        newFile.setLocation('SomeSE3')
        multipleFilesFileset.addFile(newFile)
        multipleFilesFileset.create()

        harvestingWorkflow = Workflow(spec = "spec.xml",
                                      owner = "hufnagel",
                                      name = "TestWorkflow",
                                      task="Test")
        harvestingWorkflow.create()

        harvestSub  = Subscription(fileset = multipleFilesFileset,
                                   workflow = harvestingWorkflow,
                                   split_algo = "Harvest",
                                   type = "Harvesting")
        harvestSub.create()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = harvestSub)
        jobGroups = jobFactory(periodic_harvest_interval = 2)
        self.assertEqual(len(jobGroups), 1, "A single job group was not created")
        self.assertEqual(len(jobGroups[0].getJobs()), 4,
                             "Four jobs were not created")

        for job in jobGroups[0].getJobs():
            runs = job['mask'].getRunAndLumis()
            self.assertEqual(len(runs), 1, "Job has more than one run configured")
            possibleLumiPairs = {1 : [[1,1],[3,7],[2,2],[8,8]],
                                 2 : [[1,2],[4,7],[3,3],[8,8]]}
            run = runs.keys()[0]
            for lumiPair in runs[run]:
                self.assertTrue(lumiPair in possibleLumiPairs[run], "Strange lumi pair in the job mask")

        self.finishJobs(jobGroups, harvestSub)

        newFile = File("/some/file/test3", size = 1000, events = 100)
        newFile.addRun(Run(1,*range(9,15)))
        newFile.setLocation('SomeSE3')
        multipleFilesFileset.addFile(newFile)
        multipleFilesFileset.commit()

        time.sleep(2)

        jobGroups = jobFactory(periodic_harvest_interval = 2)
        self.assertEqual(len(jobGroups), 1, "A single job group was not created")
        self.assertEqual(len(jobGroups[0].getJobs()), 4, "Four jobs were not created")

        for job in jobGroups[0].getJobs():
            runs = job['mask'].getRunAndLumis()
            self.assertEqual(len(runs), 1, "Job has more than one run configured")
            possibleLumiPairs = {1 : [[1,1],[3,7],[2,2],[8,8],[9,14]],
                                 2 : [[1,2],[4,7],[3,3],[8,8]]}
            run = runs.keys()[0]
            for lumiPair in runs[run]:
                self.assertTrue(lumiPair in possibleLumiPairs[run], "Strange lumi pair in the job mask")

        harvestingWorkflowSib = Workflow(spec = "spec.xml",
                                         owner = "hufnagel",
                                         name = "TestWorkflowSib",
                                         task="TestSib")
        harvestingWorkflowSib.create()

        harvestSubSib  = Subscription(fileset = multipleFilesFileset,
                                      workflow = harvestingWorkflowSib,
                                      split_algo = "Harvest",
                                      type = "Harvesting")
        harvestSubSib.create()

        jobFactorySib = self.splitterFactory(package = "WMCore.WMBS", subscription = harvestSubSib)

        multipleFilesFileset.markOpen(False)

        jobGroups = jobFactorySib(periodic_harvest_sibling = True)
        self.assertEqual(len(jobGroups), 0, "A single job group was created")
                
        self.finishJobs(jobGroups, harvestSub)

        jobGroups = jobFactorySib(periodic_harvest_sibling = True)
        self.assertEqual(len(jobGroups), 1, "A single job group was not created")
        self.assertEqual(len(jobGroups[0].getJobs()), 4, "Four jobs were not created")

        for job in jobGroups[0].getJobs():
            runs = job['mask'].getRunAndLumis()
            self.assertEqual(len(runs), 1, "Job has more than one run configured")
            possibleLumiPairs = {1 : [[1,1],[3,7],[2,2],[8,8],[9,14]],
                                 2 : [[1,2],[4,7],[3,3],[8,8]]}
            run = runs.keys()[0]
            for lumiPair in runs[run]:
                self.assertTrue(lumiPair in possibleLumiPairs[run], "Strange lumi pair in the job mask")
示例#31
0
    def testMultipleRunHarvesting(self):
        """
        _testMultipleRunHarvesting_

        Add some files with multiple runs in each, make sure the jobs
        are created by location and run. Verify each job mask afterwards.
        Note that in this test run are splitted between sites,
        in real life that MUST NOT happen we still don't support that.
        """
        multipleFilesFileset = Fileset(name="TestFileset")

        newFile = File("/some/file/test1", size=1000, events=100)
        newFile.addRun(Run(1, *[1, 3, 4, 5, 6, 7]))
        newFile.addRun(Run(2, *[1, 2, 4, 5, 6, 7]))
        newFile.setLocation('T1_US_FNAL_Disk')
        multipleFilesFileset.addFile(newFile)
        newFile = File("/some/file/test2", size=1000, events=100)
        newFile.addRun(Run(1, *[2, 8]))
        newFile.addRun(Run(2, *[3, 8]))
        newFile.setLocation('T2_CH_CERN')
        multipleFilesFileset.addFile(newFile)
        multipleFilesFileset.create()

        harvestingWorkflow = Workflow(spec="spec.xml",
                                      owner="hufnagel",
                                      name="TestWorkflow",
                                      task="Test")
        harvestingWorkflow.create()

        harvestSub = Subscription(fileset=multipleFilesFileset,
                                  workflow=harvestingWorkflow,
                                  split_algo="Harvest",
                                  type="Harvesting")
        harvestSub.create()

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSub)
        jobGroups = jobFactory(periodic_harvest_interval=2)
        self.assertEqual(len(jobGroups), 1, "A single job group was not created")
        self.assertEqual(len(jobGroups[0].getJobs()), 4,
                         "Four jobs were not created")

        for job in jobGroups[0].getJobs():
            runs = job['mask'].getRunAndLumis()
            self.assertEqual(len(runs), 1, "Job has more than one run configured")
            ll = LumiList(compactList={1: [[1, 1], [3, 7], [2, 2], [8, 8]],
                                       2: [[1, 2], [4, 7], [3, 3], [8, 8]]})
            run = runs.keys()[0]
            for lumiPair in runs[run]:
                for lumi in range(lumiPair[0], lumiPair[1] + 1):
                    self.assertTrue((str(run), lumi) in ll, "All of %s not in %s" % (lumiPair, ll))

        self.finishJobs(jobGroups, harvestSub)

        newFile = File("/some/file/test3", size=1000, events=100)
        newFile.addRun(Run(1, *range(9, 15)))
        newFile.setLocation('T2_CH_CERN')
        multipleFilesFileset.addFile(newFile)
        multipleFilesFileset.commit()

        time.sleep(2)

        jobGroups = jobFactory(periodic_harvest_interval=2)
        self.assertEqual(len(jobGroups), 1, "A single job group was not created")
        self.assertEqual(len(jobGroups[0].getJobs()), 4, "Four jobs were not created")

        for job in jobGroups[0].getJobs():
            runs = job['mask'].getRunAndLumis()
            self.assertEqual(len(runs), 1, "Job has more than one run configured")
            ll = LumiList(compactList={1: [[1, 1], [3, 7], [2, 2], [8, 8], [9, 14]],
                                       2: [[1, 2], [4, 7], [3, 3], [8, 8]]})
            run = runs.keys()[0]
            for lumiPair in runs[run]:
                for lumi in range(lumiPair[0], lumiPair[1] + 1):
                    self.assertTrue((run, lumi) in ll, "All of %s not in %s" % (lumiPair, ll))

        harvestingWorkflowSib = Workflow(spec="spec.xml",
                                         owner="hufnagel",
                                         name="TestWorkflowSib",
                                         task="TestSib")
        harvestingWorkflowSib.create()

        harvestSubSib = Subscription(fileset=multipleFilesFileset,
                                     workflow=harvestingWorkflowSib,
                                     split_algo="Harvest",
                                     type="Harvesting")
        harvestSubSib.create()

        jobFactorySib = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSubSib)

        multipleFilesFileset.markOpen(False)

        jobGroups = jobFactorySib(periodic_harvest_sibling=True)
        self.assertEqual(len(jobGroups), 0, "A single job group was created")

        self.finishJobs(jobGroups, harvestSub)

        jobGroups = jobFactorySib(periodic_harvest_sibling=True)
        self.assertEqual(len(jobGroups), 1, "A single job group was not created")
        self.assertEqual(len(jobGroups[0].getJobs()), 4, "Four jobs were not created")

        for job in jobGroups[0].getJobs():
            runs = job['mask'].getRunAndLumis()
            self.assertEqual(len(runs), 1, "Job has more than one run configured")
            ll = LumiList(compactList={1: [[1, 1], [3, 7], [2, 2], [8, 8], [9, 14]],
                                       2: [[1, 2], [4, 7], [3, 3], [8, 8]]})
            run = runs.keys()[0]
            for lumiPair in runs[run]:
                for lumi in range(lumiPair[0], lumiPair[1] + 1):
                    self.assertTrue((run, lumi) in ll, "All of %s not in %s" % (lumiPair, ll))
示例#32
0
    def setupExpressWorkflow(self):
        """
        _setupExpressWorkflow_

        Populate WMBS with a express-like workflow,
        every subscription must be unfinished at first
        """

        workflowName = 'Express_Run481516_StreamZFast'
        secondLevelTasks = ['ExpressMergewrite_StreamZFast_DQM', 'ExpressMergewrite_ExpressPhysics_FEVT',
                            'ExpressAlcaSkimwrite_StreamZFast_ALCARECO', 'ExpressCleanupUnmergedwrite_StreamZFast_DQM',
                            'ExpressCleanupUnmergedwrite_ExpressPhysics_FEVT',
                            'ExpressCleanupUnmergedwrite_StreamZFast_ALCARECO']
        alcaHarvestTask = 'ExpressAlcaSkimwrite_StreamZFast_ALCARECOAlcaHarvestALCARECOStreamPromptCalibProd'
        dqmHarvestTask = 'ExpressMergewrite_StreamZFast_DQMEndOfRunDQMHarvestMerged'

        self.stateMap = {'Merge': [],
                         'Harvesting': [],
                         'Processing Done': []}
        self.orderedStates = ['Merge', 'Harvesting', 'Processing Done']

        # Populate WMStats
        self.requestDBWriter.insertGenericRequest({'RequestName': workflowName})
        self.requestDBWriter.updateRequestStatus(workflowName, 'Closed')

        # Create a wmspec in disk
        workload = newWorkload(workflowName)
        expressTask = workload.newTask('Express')
        for task in secondLevelTasks:
            secondLevelTask = expressTask.addTask(task)
            if task == 'ExpressAlcaSkimwrite_StreamZFast_ALCARECO':
                secondLevelTask.addTask(alcaHarvestTask)
            elif task == 'ExpressMergewrite_StreamZFast_DQM':
                secondLevelTask.addTask(dqmHarvestTask)

        specPath = os.path.join(self.testDir, 'Express.pkl')
        workload.save(specPath)

        # Populate WMBS
        sharedFileset = Fileset(name='TestFileset')
        sharedFileset.create()
        sharedFileset.markOpen(False)

        options = {'spec': specPath, 'owner': 'ItsAMeMario',
                   'name': workflowName, 'wfType': 'tier0'}
        topLevelWorkflow = Workflow(task='/%s/Express' % workflowName,
                                    **options)
        topLevelWorkflow.create()
        topLevelSub = Subscription(sharedFileset, topLevelWorkflow)
        topLevelSub.create()
        self.stateMap['Merge'].append(topLevelSub)
        for task in [x for x in secondLevelTasks if not x.count('CleanupUnmerged')]:
            secondLevelWorkflow = Workflow(task='/%s/Express/%s' % (workflowName, task), **options)
            secondLevelWorkflow.create()
            mergeSub = Subscription(sharedFileset, secondLevelWorkflow)
            mergeSub.create()
            self.stateMap['Harvesting'].append(mergeSub)

        for (parent, child) in [('ExpressAlcaSkimwrite_StreamZFast_ALCARECO', alcaHarvestTask),
                                ('ExpressMergewrite_StreamZFast_DQM', dqmHarvestTask)]:
            harvestingWorkflow = Workflow(task='/%s/Express/%s/%s' % (workflowName, parent, child),
                                          **options)
            harvestingWorkflow.create()
            harvestingSub = Subscription(sharedFileset, harvestingWorkflow)
            harvestingSub.create()
            self.stateMap['Processing Done'].append(harvestingSub)

        return
示例#33
0
class HarvestTest(unittest.TestCase):
    """
    _HarvestTest_

    Test for EndOfRun job splitter
    """

    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules=["WMCore.WMBS"])

        self.splitterFactory = SplitterFactory(package="WMCore.JobSplitting")

        myThread = threading.currentThread()
        self.myThread = myThread
        daoFactory = DAOFactory(package="WMCore.WMBS",
                                logger=logging,
                                dbinterface=myThread.dbi)
        self.WMBSFactory = daoFactory

        config = self.getConfig()
        self.changer = ChangeState(config)

        myResourceControl = ResourceControl()
        myResourceControl.insertSite("T1_US_FNAL", 10, 20, "T1_US_FNAL_Disk", "T1_US_FNAL")
        myResourceControl.insertSite("T1_US_FNAL", 10, 20, "T3_US_FNALLPC", "T1_US_FNAL")
        myResourceControl.insertSite("T2_CH_CERN", 10, 20, "T2_CH_CERN", "T2_CH_CERN")

        self.fileset1 = Fileset(name="TestFileset1")
        for fileNum in range(11):
            newFile = File("/some/file/name%d" % fileNum, size=1000, events=100)
            newFile.addRun(Run(1, *[1]))
            newFile.setLocation('T1_US_FNAL_Disk')
            self.fileset1.addFile(newFile)

        self.fileset1.create()

        workflow1 = Workflow(spec="spec.xml", owner="hufnagel", name="TestWorkflow1", task="Test")
        workflow1.create()

        self.subscription1 = Subscription(fileset=self.fileset1,
                                          workflow=workflow1,
                                          split_algo="Harvest",
                                          type="Harvesting")

        self.subscription1.create()
        self.configFile = EmulatorSetup.setupWMAgentConfig()

        return

    def tearDown(self):
        """
        _tearDown_

        """
        self.testInit.clearDatabase()
        EmulatorSetup.deleteConfig(self.configFile)

        return

    def getConfig(self):
        """
        _getConfig_

        """
        config = self.testInit.getConfiguration()
        self.testInit.generateWorkDir(config)

        config.section_("CoreDatabase")
        config.CoreDatabase.connectUrl = os.getenv("DATABASE")
        config.CoreDatabase.socket = os.getenv("DBSOCK")

        # JobStateMachine
        config.component_('JobStateMachine')
        config.JobStateMachine.couchurl = os.getenv('COUCHURL', None)
        config.JobStateMachine.couchDBName = 'wmagent_jobdump'

        return config

    def finishJobs(self, jobGroups, subscription=None):
        """
        _finishJobs_

        """
        if not subscription:
            subscription = self.subscription1
        for f in subscription.acquiredFiles():
            subscription.completeFiles(f)

        for jobGroup in jobGroups:
            self.changer.propagate(jobGroup.jobs, 'executing', 'created')
            self.changer.propagate(jobGroup.jobs, 'complete', 'executing')
            self.changer.propagate(jobGroup.jobs, 'success', 'complete')
            self.changer.propagate(jobGroup.jobs, 'cleanout', 'success')

        return

    def testHarvestEndOfRunTrigger(self):
        """
        _testDQMHarvestEndOfRunTrigger_

        Make sure that the basic splitting algo works, which is only, ExpressMerge is ALL done, fire a job against that fileset

        """
        self.assertEqual(self.fileset1.open, True, "Fileset is closed. Shouldn't")

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1)

        jobGroups = jobFactory()

        self.assertEqual(len(jobGroups), 0,
                         "We got 1 or more jobGroups with an open fileset and no periodic configuration")

        self.fileset1.markOpen(False)
        self.assertEqual(self.fileset1.open, False, "Fileset is opened, why?")

        # We should also check if there are aqcuired files, if there are, there are jobs,
        # we don't want to fire another jobs while previous are running (output is integrating whatever  input)
        # TODO : The above one we can do when all is done. Not priority

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1)
        jobGroups = jobFactory()

        self.assertEqual(len(jobGroups), 1,
                         "Harvest jobsplitter didn't create a single jobGroup after the fileset was closed")

        return

    def testPeriodicTrigger(self):
        """
        _testPeriodicTrigger_

        """
        self.assertEqual(self.fileset1.open, True, "Fileset is not open, not testing periodic here")
        # Test timeout (5s for this first test)
        # there should be no acquired files, if there are, shouldn't be a job
        # self.subscription1.acquireFiles(self.subscription1.availableFiles().pop())

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval=3)

        self.assertEqual(len(jobGroups), 1, "Didn't created the first periodic job when there were acquired files")

        # For the whole thing to work, faking the first job finishing, and putting the files as complete
        self.finishJobs(jobGroups)

        # Adding more of files, so we have new stuff to process
        for fileNum in range(12, 24):
            newFile = File("/some/file/name%d" % fileNum, size=1000, events=100)
            newFile.addRun(Run(1, *[1]))
            newFile.setLocation('T1_US_FNAL_Disk')
            self.fileset1.addFile(newFile)
        self.fileset1.commit()

        # Testing that it doesn't create a job unless the delay is past
        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval=2)

        self.assertEqual(len(jobGroups), 0,
                         "Created one or more job, when there were non-acquired file and the period is not passed by")

        time.sleep(2)

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval=2)

        self.assertEqual(len(jobGroups), 1,
                         "Didn't created one or more job, and there weren't and the period is passed by")

        # Finishing out previous jobs
        self.finishJobs(jobGroups)

        # Adding more of files, so we have new stuff to process
        for fileNum in range(26, 36):
            newFile = File("/some/file/name%d" % fileNum, size=1000, events=100)
            newFile.addRun(Run(1, *[1]))
            newFile.setLocation('T1_US_FNAL_Disk')
            self.fileset1.addFile(newFile)
        self.fileset1.commit()

        # Trying to create another job just afterwards, it shouldn't, because it should respect the configured delay
        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval=2)

        self.assertEqual(len(jobGroups), 0, "Created one or more job, there are new files, but the delay is not past")

        time.sleep(2)

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval=2)

        self.assertEqual(len(jobGroups), 1, "Didn't created one or more job, there are new files and the delay is past")

        # Last check is whether the job gets all the files or not

        numFilesJob = jobGroups[0].jobs[0].getFiles()
        numFilesFileset = self.fileset1.getFiles()
        self.assertEqual(numFilesJob, numFilesFileset, "Job didn't got all the files")

        # Finishing out previous jobs
        self.finishJobs(jobGroups)

        # Adding files for the first location
        for fileNum in range(38, 48):
            newFile = File("/some/file/name%d" % fileNum, size=1000, events=100)
            newFile.addRun(Run(1, *[1]))
            newFile.setLocation('T1_US_FNAL_Disk')
            self.fileset1.addFile(newFile)
        self.fileset1.commit()
        # Then another location
        for fileNum in range(50, 56):
            newFile = File("/some/file/name%d" % fileNum, size=1000, events=100)
            newFile.addRun(Run(1, *[1]))
            newFile.setLocation('T2_CH_CERN')
            self.fileset1.addFile(newFile)
        self.fileset1.commit()

        # We should have jobs in both locations
        time.sleep(2)

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval=2)

        self.assertEqual(len(jobGroups[0].getJobs()), 2, "We didn't get 2 jobs for 2 locations")

        firstJobLocation = jobGroups[0].getJobs()[0].getFileLocations()[0]
        secondJobLocation = jobGroups[0].getJobs()[1].getFileLocations()[0]

        self.assertEqual(firstJobLocation, 'T2_CH_CERN')
        self.assertEqual(secondJobLocation, 'T1_US_FNAL')

        self.finishJobs(jobGroups)

        for fileNum in range(60, 65):
            newFile = File("/some/file/name%d" % fileNum, size=1000, events=100)
            newFile.addRun(Run(2, *[2]))
            newFile.setLocation('T2_CH_CERN')
            self.fileset1.addFile(newFile)
        self.fileset1.commit()

        for fileNum in range(70, 75):
            newFile = File("/some/file/name%d" % fileNum, size=1000, events=100)
            newFile.addRun(Run(3, *[3]))
            newFile.setLocation('T2_CH_CERN')
            self.fileset1.addFile(newFile)
        self.fileset1.commit()

        time.sleep(2)

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval=2)

        # This is one of the most "complicated" tests so worth to comment, 4 jobs should be created
        # 1 - all previous files from SomeSE and run = 1 (a lot, like ~45)
        # 2 - Few files from SomeSE3, Run = 1
        # 3 - Few files from SomeSE3, Run = 2
        # 4 - Few files from SomeSE3, Run = 3
        self.assertEqual(len(jobGroups[0].getJobs()), 4, "We didn't get 4 jobs for adding 2 different runs to SomeSE3")

        return

    def testMultipleRunHarvesting(self):
        """
        _testMultipleRunHarvesting_

        Add some files with multiple runs in each, make sure the jobs
        are created by location and run. Verify each job mask afterwards.
        Note that in this test run are splitted between sites,
        in real life that MUST NOT happen we still don't support that.
        """
        multipleFilesFileset = Fileset(name="TestFileset")

        newFile = File("/some/file/test1", size=1000, events=100)
        newFile.addRun(Run(1, *[1, 3, 4, 5, 6, 7]))
        newFile.addRun(Run(2, *[1, 2, 4, 5, 6, 7]))
        newFile.setLocation('T1_US_FNAL_Disk')
        multipleFilesFileset.addFile(newFile)
        newFile = File("/some/file/test2", size=1000, events=100)
        newFile.addRun(Run(1, *[2, 8]))
        newFile.addRun(Run(2, *[3, 8]))
        newFile.setLocation('T2_CH_CERN')
        multipleFilesFileset.addFile(newFile)
        multipleFilesFileset.create()

        harvestingWorkflow = Workflow(spec="spec.xml",
                                      owner="hufnagel",
                                      name="TestWorkflow",
                                      task="Test")
        harvestingWorkflow.create()

        harvestSub = Subscription(fileset=multipleFilesFileset,
                                  workflow=harvestingWorkflow,
                                  split_algo="Harvest",
                                  type="Harvesting")
        harvestSub.create()

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSub)
        jobGroups = jobFactory(periodic_harvest_interval=2)
        self.assertEqual(len(jobGroups), 1, "A single job group was not created")
        self.assertEqual(len(jobGroups[0].getJobs()), 4,
                         "Four jobs were not created")

        for job in jobGroups[0].getJobs():
            runs = job['mask'].getRunAndLumis()
            self.assertEqual(len(runs), 1, "Job has more than one run configured")
            ll = LumiList(compactList={1: [[1, 1], [3, 7], [2, 2], [8, 8]],
                                       2: [[1, 2], [4, 7], [3, 3], [8, 8]]})
            run = runs.keys()[0]
            for lumiPair in runs[run]:
                for lumi in range(lumiPair[0], lumiPair[1] + 1):
                    self.assertTrue((str(run), lumi) in ll, "All of %s not in %s" % (lumiPair, ll))

        self.finishJobs(jobGroups, harvestSub)

        newFile = File("/some/file/test3", size=1000, events=100)
        newFile.addRun(Run(1, *range(9, 15)))
        newFile.setLocation('T2_CH_CERN')
        multipleFilesFileset.addFile(newFile)
        multipleFilesFileset.commit()

        time.sleep(2)

        jobGroups = jobFactory(periodic_harvest_interval=2)
        self.assertEqual(len(jobGroups), 1, "A single job group was not created")
        self.assertEqual(len(jobGroups[0].getJobs()), 4, "Four jobs were not created")

        for job in jobGroups[0].getJobs():
            runs = job['mask'].getRunAndLumis()
            self.assertEqual(len(runs), 1, "Job has more than one run configured")
            ll = LumiList(compactList={1: [[1, 1], [3, 7], [2, 2], [8, 8], [9, 14]],
                                       2: [[1, 2], [4, 7], [3, 3], [8, 8]]})
            run = runs.keys()[0]
            for lumiPair in runs[run]:
                for lumi in range(lumiPair[0], lumiPair[1] + 1):
                    self.assertTrue((run, lumi) in ll, "All of %s not in %s" % (lumiPair, ll))

        harvestingWorkflowSib = Workflow(spec="spec.xml",
                                         owner="hufnagel",
                                         name="TestWorkflowSib",
                                         task="TestSib")
        harvestingWorkflowSib.create()

        harvestSubSib = Subscription(fileset=multipleFilesFileset,
                                     workflow=harvestingWorkflowSib,
                                     split_algo="Harvest",
                                     type="Harvesting")
        harvestSubSib.create()

        jobFactorySib = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSubSib)

        multipleFilesFileset.markOpen(False)

        jobGroups = jobFactorySib(periodic_harvest_sibling=True)
        self.assertEqual(len(jobGroups), 0, "A single job group was created")

        self.finishJobs(jobGroups, harvestSub)

        jobGroups = jobFactorySib(periodic_harvest_sibling=True)
        self.assertEqual(len(jobGroups), 1, "A single job group was not created")
        self.assertEqual(len(jobGroups[0].getJobs()), 4, "Four jobs were not created")

        for job in jobGroups[0].getJobs():
            runs = job['mask'].getRunAndLumis()
            self.assertEqual(len(runs), 1, "Job has more than one run configured")
            ll = LumiList(compactList={1: [[1, 1], [3, 7], [2, 2], [8, 8], [9, 14]],
                                       2: [[1, 2], [4, 7], [3, 3], [8, 8]]})
            run = runs.keys()[0]
            for lumiPair in runs[run]:
                for lumi in range(lumiPair[0], lumiPair[1] + 1):
                    self.assertTrue((run, lumi) in ll, "All of %s not in %s" % (lumiPair, ll))

    def testMultiRunHarvesting(self):
        """
        _testMultiRunHarvesting_

        Provided a fileset with a couple of files and different runs, create a
        single job for all the runs at a specific location, which also adds a
        baggage to the job (True) which is later on looked up by SetupCMSSWPSet.
        """
        multipleFilesFileset = createCommonFileset()
        self.assertEqual(multipleFilesFileset.open, True)

        harvestingWorkflow = Workflow(spec="spec.xml",
                                      owner="amaltaro",
                                      name="TestWorkflow",
                                      task="Test")
        harvestingWorkflow.create()

        harvestSub = Subscription(fileset=multipleFilesFileset,
                                  workflow=harvestingWorkflow,
                                  split_algo="Harvest",
                                  type="Harvesting")
        harvestSub.create()

        multipleFilesFileset.markOpen(False)
        self.assertEqual(multipleFilesFileset.open, False, "Fileset should now be closed")

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSub)
        jobGroups = jobFactory(dqmHarvestUnit="multiRun")
        self.assertEqual(len(jobGroups), 1)

        for jobGroup in jobGroups:
            self.assertEqual(len(jobGroup.jobs), 1)
            for job in jobGroup.jobs:
                baggage = job.getBaggage()
                self.assertTrue(getattr(baggage, "multiRun", False), "It's supposed to be a multiRun job")
                self.assertEqual(getattr(baggage, "runLimits", ""), "-1-6")

    def testByRunHarvesting(self):
        """
        _testByRunHarvesting_
        Provided a fileset with a couple of files and 4 different runs, create
        one single job per run and location.
        The multiRun baggage should be false in this case.
        """
        multipleFilesFileset = createCommonFileset()
        self.assertEqual(multipleFilesFileset.open, True, "Fileset should be open!")

        harvestingWorkflow = Workflow(spec="spec.xml",
                                      owner="amaltaro",
                                      name="TestWorkflow",
                                      task="Test")
        harvestingWorkflow.create()

        harvestSub = Subscription(fileset=multipleFilesFileset,
                                  workflow=harvestingWorkflow,
                                  split_algo="Harvest",
                                  type="Harvesting")
        harvestSub.create()

        multipleFilesFileset.markOpen(False)
        self.assertEqual(multipleFilesFileset.open, False, "Fileset should now be closed")

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSub)
        jobGroups = jobFactory()
        self.assertEqual(len(jobGroups), 1, "Should have created 1 job group")

        for jobGroup in jobGroups:
            self.assertEqual(len(jobGroup.jobs), 6, "Should have created 6 jobs")
            for job in jobGroup.jobs:
                baggage = job.getBaggage()
                self.assertFalse(getattr(baggage, "multiRun", False), "It's supposed to be a byRun job")

    def testByRunAndRunWhitelist(self):
        """
        _testByRunAndRunWhitelist_

        Create harvesting jobs by run for the runs provided in the RunWhitelist
        """
        multipleFilesFileset = createCommonFileset()
        self.assertEqual(multipleFilesFileset.open, True)

        harvestingWorkflow = Workflow(spec="spec.xml", owner="amaltaro",
                                      name="TestWorkflow", task="Test")
        harvestingWorkflow.create()

        harvestSub = Subscription(fileset=multipleFilesFileset, workflow=harvestingWorkflow,
                                  split_algo="Harvest", type="Harvesting")
        harvestSub.create()

        multipleFilesFileset.markOpen(False)
        self.assertEqual(multipleFilesFileset.open, False, "Fileset should now be closed")

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSub)
        jobGroups = jobFactory(runWhitelist=[1, 3])
        self.assertEqual(len(jobGroups), 1, "One jobgroup per location")

        for jobGroup in jobGroups:
            self.assertEqual(len(jobGroup.jobs), 2)

    def testByRunAndRunBlacklist(self):
        """
        _testByRunAndRunWhitelist_

        Create harvesting jobs by run for the runs provided in the RunWhitelist
        """
        multipleFilesFileset = createCommonFileset()
        self.assertEqual(multipleFilesFileset.open, True)

        harvestingWorkflow = Workflow(spec="spec.xml", owner="amaltaro",
                                      name="TestWorkflow", task="Test")
        harvestingWorkflow.create()

        harvestSub = Subscription(fileset=multipleFilesFileset, workflow=harvestingWorkflow,
                                  split_algo="Harvest", type="Harvesting")
        harvestSub.create()

        multipleFilesFileset.markOpen(False)
        self.assertEqual(multipleFilesFileset.open, False, "Fileset should now be closed")

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSub)
        jobGroups = jobFactory(runWhitelist=[1, 2, 3, 4, 5], runBlacklist=[1, 3])
        self.assertEqual(len(jobGroups), 1, "One jobgroup per location")

        for jobGroup in jobGroups:
            self.assertEqual(len(jobGroup.jobs), 3)
示例#34
0
    def createTestJobGroup(self, config, name = "TestWorkthrough",
                           filesetName = "TestFileset",
                           specLocation = "spec.xml", error = False,
                           task = "/TestWorkload/ReReco",
                           type = "Processing"):
        """
        Creates a group of several jobs

        """

        myThread = threading.currentThread()

        testWorkflow = Workflow(spec = specLocation, owner = self.OWNERDN,
                                name = name, task = task, owner_vogroup="", owner_vorole="")
        testWorkflow.create()
        self.inject.execute(names = [name], injected = True)

        testWMBSFileset = Fileset(name = filesetName)
        testWMBSFileset.create()

        testFileA = File(lfn = "/this/is/a/lfnA" , size = 1024, events = 10)
        testFileA.addRun(Run(10, *[12312]))
        testFileA.setLocation('malpaquet')

        testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10)
        testFileB.addRun(Run(10, *[12314]))
        testFileB.setLocation('malpaquet')

        testFileA.create()
        testFileB.create()

        testWMBSFileset.addFile(testFileA)
        testWMBSFileset.addFile(testFileB)
        testWMBSFileset.commit()
        testWMBSFileset.markOpen(0)

        outputWMBSFileset = Fileset(name = '%sOutput' % filesetName)
        outputWMBSFileset.create()
        testFileC = File(lfn = "/this/is/a/lfnC" , size = 1024, events = 10)
        testFileC.addRun(Run(10, *[12312]))
        testFileC.setLocation('malpaquet')
        testFileC.create()
        outputWMBSFileset.addFile(testFileC)
        outputWMBSFileset.commit()
        outputWMBSFileset.markOpen(0)

        testWorkflow.addOutput('output', outputWMBSFileset)


        testSubscription = Subscription(fileset = testWMBSFileset,
                                        workflow = testWorkflow,
                                        type = type)
        testSubscription.create()

        testJobGroup = JobGroup(subscription = testSubscription)
        testJobGroup.create()

        for i in range(0,self.nJobs):
            testJob = Job(name = makeUUID())
            testJob.addFile(testFileA)
            testJob.addFile(testFileB)
            testJob['retry_count'] = 1
            testJob['retry_max'] = 10
            testJob['mask'].addRunAndLumis(run = 10, lumis = [12312, 12313])
            testJobGroup.add(testJob)

        testJobGroup.commit()

        changer = ChangeState(config)

        report1 = Report()
        report2 = Report()
        if error:
            path1 = os.path.join(WMCore.WMBase.getTestBase(),
                                 "WMComponent_t/JobAccountant_t/fwjrs", "badBackfillJobReport.pkl")
            path2 = os.path.join(WMCore.WMBase.getTestBase(),
                                 'WMComponent_t/TaskArchiver_t/fwjrs',
                                 'logCollectReport2.pkl')
        else:
            path1 = os.path.join(WMCore.WMBase.getTestBase(),
                                 'WMComponent_t/TaskArchiver_t/fwjrs',
                                 'mergeReport1.pkl')
            path2 = os.path.join(WMCore.WMBase.getTestBase(),
                                 'WMComponent_t/TaskArchiver_t/fwjrs',
                                 'logCollectReport2.pkl')
        report1.load(filename = path1)
        report2.load(filename = path2)

        changer.propagate(testJobGroup.jobs, 'created', 'new')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')
        changer.propagate(testJobGroup.jobs, 'complete', 'executing')
        for i in range(self.nJobs):
            if i < self.nJobs/2:
                testJobGroup.jobs[i]['fwjr'] = report1
            else:
                testJobGroup.jobs[i]['fwjr'] = report2
        changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete')
        changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed')
        changer.propagate(testJobGroup.jobs, 'created', 'jobcooloff')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')
        changer.propagate(testJobGroup.jobs, 'complete', 'executing')
        changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete')
        changer.propagate(testJobGroup.jobs, 'retrydone', 'jobfailed')
        changer.propagate(testJobGroup.jobs, 'exhausted', 'retrydone')
        changer.propagate(testJobGroup.jobs, 'cleanout', 'exhausted')

        testSubscription.completeFiles([testFileA, testFileB])

        return testJobGroup
    def __call__(self, parameters):
        """
        Perform the work required with the given parameters
        """
        DefaultSlave.__call__(self, parameters)

        # Handle the message
        message = self.messageArgs

        # Lock on the running feeders list
        myThread = threading.currentThread()
        myThread.runningFeedersLock.acquire()

        # Create empty fileset if fileset.name doesn't exist
        filesetName = message["dataset"]
        feederType = message["FeederType"]
        fileType = message["FileType"]
        startRun = message["StartRun"]

        logging.debug("Dataset " + filesetName + " arrived")

        fileset = Fileset(name = filesetName+':'\
          +feederType+':'+fileType+':'+startRun)

        # Check if the fileset is already there
        if fileset.exists() == False:

            # Empty fileset creation
            fileset.create()
            fileset.setLastUpdate(0)

            logging.info("Fileset %s whith id %s is added" \
                               %(fileset.name, str(fileset.id)))

            # Get feeder type
            feederType = message["FeederType"]

            # Check if there is a running feeder
            if feederType in myThread.runningFeeders:
                logging.info("HAVE FEEDER " + feederType + " RUNNING")
                logging.info(myThread.runningFeeders[feederType])

            else:
                logging.info("NO FEEDER " + feederType + " RUNNING")

                # Check if we have a feeder in DB
                if self.queries.checkFeeder(feederType):
                    # Have feeder, get info
                    logging.info("Getting Feeder from DB")
                    feederId = self.queries.getFeederId(feederType)
                    logging.info(feederId)
                    myThread.runningFeeders[feederType] = feederId
                else:
                    # Create feeder
                    logging.info("Adding Feeder to DB")
                    self.queries.addFeeder(feederType, "StatePath")
                    feederId = self.queries.getFeederId(feederType)
                    logging.info(feederId)
                    myThread.runningFeeders[feederType] = feederId

            # Fileset/Feeder association
            self.queries.addFilesetToManage(fileset.id, \
                          myThread.runningFeeders[feederType])
            logging.info("Fileset %s is added to feeder %s" %(fileset.id, \
                          myThread.runningFeeders[feederType]))
        else:

            # If fileset already exist a new subscription
            # will be created for its workflow
            logging.info("Fileset exists: Subscription will be created for it")

            # Open it if close
            fileset.load()
            if fileset.open == False:

                fileset.markOpen(True)

                logging.info("Getting Feeder from DB")
                feederId = self.queries.getFeederId(feederType)
                logging.info(feederId)
                myThread.runningFeeders[feederType] = feederId

                self.queries.addFilesetToManage(fileset.id, \
                                  myThread.runningFeeders[feederType])
                logging.info("Fileset %s is added to feeder %s" %(fileset.id, \
                                  myThread.runningFeeders[feederType]))

        myThread.runningFeedersLock.release()
        myThread.msgService.finish()
示例#36
0
    def _createSubscriptionsInWMBS(self, task, fileset, alternativeFilesetClose = False):
        """
        __createSubscriptionsInWMBS_

        Create subscriptions in WMBS for all the tasks in the spec.  This
        includes filesets, workflows and the output map for each task.
        """
        # create runtime sandbox for workflow
        self.createSandbox()

        #FIXME: Let workflow put in values if spec is missing them
        workflow = Workflow(spec = self.wmSpec.specUrl(), owner = self.wmSpec.getOwner()["name"],
                            dn = self.wmSpec.getOwner().get("dn", "unknown"),
                            group = self.wmSpec.getOwner().get("group", "unknown"),
                            owner_vogroup = self.wmSpec.getOwner().get("vogroup", "DEFAULT"),
                            owner_vorole = self.wmSpec.getOwner().get("vorole", "DEFAULT"),
                            name = self.wmSpec.name(), task = task.getPathName(),
                            wfType = self.wmSpec.getDashboardActivity(),
                            alternativeFilesetClose = alternativeFilesetClose,
                            priority = self.wmSpec.priority())
        workflow.create()
        subscription = Subscription(fileset = fileset, workflow = workflow,
                                    split_algo = task.jobSplittingAlgorithm(),
                                    type = task.getPrimarySubType())
        if subscription.exists():
            subscription.load()
            msg = "Subscription %s already exists for %s (you may ignore file insertion messages below, existing files wont be duplicated)"
            self.logger.info(msg % (subscription['id'], task.getPathName()))
        else:
            subscription.create()
        for site in task.siteWhitelist():
            subscription.addWhiteBlackList([{"site_name": site, "valid": True}])

        for site in task.siteBlacklist():
            subscription.addWhiteBlackList([{"site_name": site, "valid": False}])

        if self.topLevelSubscription == None:
            self.topLevelSubscription = subscription
            logging.info("Top level subscription created: %s" % subscription["id"])
        else:
            logging.info("Child subscription created: %s" % subscription["id"])

        outputModules = task.getOutputModulesForTask()
        for outputModule in outputModules:
            for outputModuleName in outputModule.listSections_():
                outputFileset = Fileset(self.outputFilesetName(task, outputModuleName))
                outputFileset.create()
                outputFileset.markOpen(True)
                mergedOutputFileset = None

                for childTask in task.childTaskIterator():
                    if childTask.data.input.outputModule == outputModuleName:
                        if childTask.taskType() == "Merge":
                            mergedOutputFileset = Fileset(self.outputFilesetName(childTask, "Merged"))
                            mergedOutputFileset.create()
                            mergedOutputFileset.markOpen(True)

                            primaryDataset = getattr(getattr(outputModule, outputModuleName), "primaryDataset", None)
                            if primaryDataset != None:
                                self.mergeOutputMapping[mergedOutputFileset.id] = primaryDataset

                        self._createSubscriptionsInWMBS(childTask, outputFileset, alternativeFilesetClose)

                if mergedOutputFileset == None:
                    workflow.addOutput(outputModuleName, outputFileset,
                                       outputFileset)
                else:
                    workflow.addOutput(outputModuleName, outputFileset,
                                       mergedOutputFileset)

        return self.topLevelSubscription
示例#37
0
                        closeFileset = Fileset( name = (((\
      filesetToProcess.name).split(':')[0]).split('/')[0])+'/'+\
     (((filesetToProcess.name).split(':')[0]).split('/')[1]\
     )+'/'+(((filesetToProcess.name).split(':')[0]).split('/')\
     [2])+'/'+((((filesetToProcess.name).split(':')[0]).split\
     ('/')[3]).split('-')[0])+'-'+'Run'+str(listRun['run'])\
     +":"+":".join((filesetToProcess.name).split(':')[1:] ) )

                        if closeFileset.exists() != False :

                            closeFileset = Fileset( id = closeFileset.exists())
                            closeFileset.loadData()

                            if closeFileset.open == True:
                                closeFileset.markOpen(False)


        # Commit the fileset
        filesetToProcess.commit()


        # Commit the fileset
        logging.debug("Test purge in T0ASTRun ...")
        filesetToProcess.load()
        LASTIME = filesetToProcess.lastUpdate

        if (int(now)/3600 - LASTIME/3600) > self.purgeTime:

            filesetToProcess.markOpen(False)
            logging.debug("Purge Done...")
示例#38
0
    def createTestJobGroup(self, name = "TestWorkthrough",
                           specLocation = "spec.xml", error = False,
                           task = "/TestWorkload/ReReco", nJobs = 10):
        """
        _createTestJobGroup_

        Generate a test WMBS JobGroup with real FWJRs
        """

        myThread = threading.currentThread()

        testWorkflow = Workflow(spec = specLocation, owner = "Simon",
                                name = name, task = task)
        testWorkflow.create()

        testWMBSFileset = Fileset(name = name)
        testWMBSFileset.create()

        testFileA = File(lfn = makeUUID(), size = 1024, events = 10)
        testFileA.addRun(Run(10, *[12312]))
        testFileA.setLocation('malpaquet')

        testFileB = File(lfn = makeUUID(), size = 1024, events = 10)
        testFileB.addRun(Run(10, *[12312]))
        testFileB.setLocation('malpaquet')

        testFileA.create()
        testFileB.create()

        testWMBSFileset.addFile(testFileA)
        testWMBSFileset.addFile(testFileB)
        testWMBSFileset.commit()
        testWMBSFileset.markOpen(0)

        testSubscription = Subscription(fileset = testWMBSFileset,
                                        workflow = testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription = testSubscription)
        testJobGroup.create()

        for i in range(0, nJobs):
            testJob = Job(name = makeUUID())
            testJob.addFile(testFileA)
            testJob.addFile(testFileB)
            testJob['retry_count'] = 1
            testJob['retry_max'] = 10
            testJob['mask'].addRunAndLumis(run = 10, lumis = [12312, 12313])
            testJobGroup.add(testJob)

        testJobGroup.commit()

        report = Report()
        if error:
            path   = os.path.join(WMCore.WMBase.getTestBase(),
                                  "WMComponent_t/JobAccountant_t/fwjrs", "badBackfillJobReport.pkl")
        else:
            path = os.path.join(WMCore.WMBase.getTestBase(),
                                "WMComponent_t/JobAccountant_t/fwjrs", "PerformanceReport2.pkl")
        report.load(filename = path)

        self.changeState.propagate(testJobGroup.jobs, 'created', 'new')
        self.changeState.propagate(testJobGroup.jobs, 'executing', 'created')
        self.changeState.propagate(testJobGroup.jobs, 'complete', 'executing')
        for job in testJobGroup.jobs:
            job['fwjr'] = report
        self.changeState.propagate(testJobGroup.jobs, 'jobfailed', 'complete')
        self.changeState.propagate(testJobGroup.jobs, 'exhausted', 'jobfailed')
        self.changeState.propagate(testJobGroup.jobs, 'cleanout', 'exhausted')

        testSubscription.completeFiles([testFileA, testFileB])

        return testJobGroup
示例#39
0
文件: WMBSHelper.py 项目: dmwm/WMCore
    def _createSubscriptionsInWMBS(self, task, fileset, alternativeFilesetClose=False):
        """
        __createSubscriptionsInWMBS_

        Create subscriptions in WMBS for all the tasks in the spec.  This
        includes filesets, workflows and the output map for each task.
        """
        # create runtime sandbox for workflow
        self.createSandbox()

        # FIXME: Let workflow put in values if spec is missing them
        workflow = Workflow(spec=self.wmSpec.specUrl(), owner=self.wmSpec.getOwner()["name"],
                            dn=self.wmSpec.getOwner().get("dn", "unknown"),
                            group=self.wmSpec.getOwner().get("group", "unknown"),
                            owner_vogroup=self.wmSpec.getOwner().get("vogroup", "DEFAULT"),
                            owner_vorole=self.wmSpec.getOwner().get("vorole", "DEFAULT"),
                            name=self.wmSpec.name(), task=task.getPathName(),
                            wfType=self.wmSpec.getDashboardActivity(),
                            alternativeFilesetClose=alternativeFilesetClose,
                            priority=self.wmSpec.priority())
        workflow.create()
        subscription = Subscription(fileset=fileset, workflow=workflow,
                                    split_algo=task.jobSplittingAlgorithm(),
                                    type=task.getPrimarySubType())
        subscription.create()

        ### FIXME: I'm pretty sure we can improve how we handle this site white/black list
        for site in task.siteWhitelist():
            subscription.addWhiteBlackList([{"site_name": site, "valid": True}])

        for site in task.siteBlacklist():
            subscription.addWhiteBlackList([{"site_name": site, "valid": False}])

        if self.topLevelSubscription is None:
            self.topLevelSubscription = subscription
            logging.info("Top level subscription %s created for %s", subscription["id"], self.wmSpec.name())
        else:
            logging.info("Child subscription %s created for %s", subscription["id"], self.wmSpec.name())

        outputModules = task.getOutputModulesForTask()
        ignoredOutputModules = task.getIgnoredOutputModulesForTask()
        for outputModule in outputModules:
            for outputModuleName in outputModule.listSections_():
                if outputModuleName in ignoredOutputModules:
                    msg = "%s has %s as IgnoredOutputModule, skipping fileset creation."
                    logging.info(msg, task.getPathName(), outputModuleName)
                    continue
                dataTier = getattr(getattr(outputModule, outputModuleName), "dataTier", '')
                filesetName = self.outputFilesetName(task, outputModuleName, dataTier)
                outputFileset = Fileset(filesetName)
                outputFileset.create()
                outputFileset.markOpen(True)
                mergedOutputFileset = None

                for childTask in task.childTaskIterator():
                    if childTask.data.input.outputModule == outputModuleName:
                        childDatatier = getattr(childTask.data.input, 'dataTier', '')
                        if childTask.taskType() in ["Cleanup", "Merge"] and childDatatier != dataTier:
                            continue
                        elif childTask.taskType() == "Merge" and childDatatier == dataTier:
                            filesetName = self.outputFilesetName(childTask, "Merged", dataTier)
                            mergedOutputFileset = Fileset(filesetName)
                            mergedOutputFileset.create()
                            mergedOutputFileset.markOpen(True)

                            primaryDataset = getattr(getattr(outputModule, outputModuleName), "primaryDataset", None)
                            if primaryDataset is not None:
                                self.mergeOutputMapping[mergedOutputFileset.id] = primaryDataset

                        self._createSubscriptionsInWMBS(childTask, outputFileset, alternativeFilesetClose)

                if mergedOutputFileset is None:
                    workflow.addOutput(outputModuleName + dataTier, outputFileset,
                                       outputFileset)
                else:
                    workflow.addOutput(outputModuleName + dataTier, outputFileset,
                                       mergedOutputFileset)

        return
class SiblingProcessingBasedTest(unittest.TestCase):
    """
    _SiblingProcessingBasedTest_

    Test SiblingProcessing job splitting.
    """
    def setUp(self):
        """
        _setUp_

        Setup the database connections and schema.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = ["WMCore.WMBS"],
                                useDefault = False)

        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMCore.WMBS",
                                logger = myThread.logger,
                                dbinterface = myThread.dbi)

        locationAction = daofactory(classname = "Locations.New")
        locationAction.execute("T2_CH_CERN", pnn = "T2_CH_CERN")
        locationAction.execute("T1_US_FNAL", pnn = "T1_US_FNAL_Disk")

        self.testFilesetA = Fileset(name = "FilesetA")
        self.testFilesetA.create()
        self.testFilesetB = Fileset(name = "FilesetB")
        self.testFilesetB.create()

        self.testFileA = File("testFileA", size = 1000, events = 100,
                              locations = set(["T2_CH_CERN"]))
        self.testFileA.create()
        self.testFileB = File("testFileB", size = 1000, events = 100,
                              locations = set(["T2_CH_CERN"]))
        self.testFileB.create()
        self.testFileC = File("testFileC", size = 1000, events = 100,
                              locations = set(["T2_CH_CERN"]))
        self.testFileC.create()

        self.testFilesetA.addFile(self.testFileA)
        self.testFilesetA.addFile(self.testFileB)
        self.testFilesetA.addFile(self.testFileC)
        self.testFilesetA.commit()

        self.testFileD = File("testFileD", size = 1000, events = 100,
                              locations = set(["T2_CH_CERN"]))
        self.testFileD.create()
        self.testFileE = File("testFileE", size = 1000, events = 100,
                              locations = set(["T2_CH_CERN"]))
        self.testFileE.create()
        self.testFileF = File("testFileF", size = 1000, events = 100,
                              locations = set(["T2_CH_CERN"]))
        self.testFileF.create()

        self.testFilesetB.addFile(self.testFileD)
        self.testFilesetB.addFile(self.testFileE)
        self.testFilesetB.addFile(self.testFileF)
        self.testFilesetB.commit()

        testWorkflowA = Workflow(spec = "specA.xml", owner = "Steve",
                                 name = "wfA", task = "Test")
        testWorkflowA.create()
        testWorkflowB = Workflow(spec = "specB.xml", owner = "Steve",
                                 name = "wfB", task = "Test")
        testWorkflowB.create()
        testWorkflowC = Workflow(spec = "specC.xml", owner = "Steve",
                                 name = "wfC", task = "Test")
        testWorkflowC.create()
        testWorkflowD = Workflow(spec = "specD.xml", owner = "Steve",
                                 name = "wfD", task = "Test")
        testWorkflowD.create()

        self.testSubscriptionA = Subscription(fileset = self.testFilesetA,
                                              workflow = testWorkflowA,
                                              split_algo = "FileBased",
                                              type = "Processing")
        self.testSubscriptionA.create()
        self.testSubscriptionB = Subscription(fileset = self.testFilesetB,
                                              workflow = testWorkflowB,
                                              split_algo = "FileBased",
                                              type = "Processing")
        self.testSubscriptionB.create()
        self.testSubscriptionC = Subscription(fileset = self.testFilesetB,
                                              workflow = testWorkflowC,
                                              split_algo = "FileBased",
                                              type = "Processing")
        self.testSubscriptionC.create()
        self.testSubscriptionD = Subscription(fileset = self.testFilesetB,
                                              workflow = testWorkflowD,
                                              split_algo = "FileBased",
                                              type = "Processing")
        self.testSubscriptionD.create()

        deleteWorkflow = Workflow(spec = "specE.xml", owner = "Steve",
                                  name = "wfE", task = "Test")
        deleteWorkflow.create()

        self.deleteSubscriptionA = Subscription(fileset = self.testFilesetA,
                                                workflow = deleteWorkflow,
                                                split_algo = "SiblingProcessingBased",
                                                type = "Cleanup")
        self.deleteSubscriptionA.create()
        self.deleteSubscriptionB = Subscription(fileset = self.testFilesetB,
                                                workflow = deleteWorkflow,
                                                split_algo = "SiblingProcessingBased",
                                                type = "Cleanup")
        self.deleteSubscriptionB.create()
        return

    def tearDown(self):
        """
        _tearDown_

        Clear out WMBS.
        """
        self.testInit.clearDatabase()
        return

    def testSiblingProcessing(self):
        """
        _testSiblingProcessing_

        Verify that the sibling processing split works correctly dealing with
        failed files and acquiring files correctly.
        """
        splitter = SplitterFactory()
        deleteFactoryA = splitter(package = "WMCore.WMBS",
                                  subscription = self.deleteSubscriptionA)
        deleteFactoryB = splitter(package = "WMCore.WMBS",
                                  subscription = self.deleteSubscriptionB)

        result = deleteFactoryA()

        assert len(result) == 0, \
               "Error: No jobs should be returned."

        result = deleteFactoryB()

        assert len(result) == 0, \
               "Error: No jobs should be returned."

        self.testSubscriptionA.completeFiles(self.testFileA)

        result = deleteFactoryA(files_per_job = 1)

        assert len(result) == 1, \
               "Error: Only one jobgroup should be returned."
        assert len(result[0].jobs) == 1, \
               "Error: There should only be one job in the jobgroup."
        assert result[0].jobs[0]["possiblePSN"] == set(["T2_CH_CERN"]), \
               "Error: possiblePSN is wrong."
        assert len(result[0].jobs[0]["input_files"]) == 1, \
               "Error: Job should only have one input file."
        assert result[0].jobs[0]["input_files"][0]["lfn"] == "testFileA", \
               "Error: Input file for job is wrong."

        result = deleteFactoryB(files_per_job = 1)

        assert len(result) == 0, \
               "Error: Second subscription should have no jobs."

        result = deleteFactoryA(files_per_job = 1)

        assert len(result) == 0, \
               "Error: No jobs should have been created."

        self.testSubscriptionB.completeFiles(self.testFileD)
        self.testSubscriptionC.failFiles(self.testFileD)

        result = deleteFactoryA(files_per_job = 1)

        assert len(result) == 0, \
               "Error: No jobs should have been created."

        result = deleteFactoryB(files_per_job = 1)

        assert len(result) == 0, \
               "Error: No jobs should have been created."

        self.testSubscriptionD.failFiles(self.testFileD)

        result = deleteFactoryA(files_per_job = 1)

        assert len(result) == 0, \
               "Error: No jobs should have been created."

        result = deleteFactoryB(files_per_job = 1)

        assert len(result) == 0, \
               "Error: No job groups should have been created."

        self.testSubscriptionB.completeFiles([self.testFileE, self.testFileF])
        self.testSubscriptionC.completeFiles([self.testFileE, self.testFileF])
        self.testSubscriptionD.completeFiles([self.testFileE, self.testFileF])

        result = deleteFactoryB(files_per_job = 10)

        assert len(result) == 0, \
               "Error: No jobs should have been created."

        self.testFilesetB.markOpen(False)

        result = deleteFactoryB(files_per_job = 10)

        assert len(result) == 1, \
               "Error: One jobgroup should have been returned."
        assert len(result[0].jobs) == 1, \
               "Error: There should only be one job in the jobgroup."
        assert len(result[0].jobs[0]["input_files"]) == 2, \
               "Error: Job should only have one input file."

        lfns = [result[0].jobs[0]["input_files"][0]["lfn"], result[0].jobs[0]["input_files"][1]["lfn"]]

        assert "testFileE" in lfns, \
               "Error: TestFileE missing from job input."
        assert "testFileF" in lfns, \
               "Error: TestFileF missing from job input."

        self.assertEqual(len(self.deleteSubscriptionB.availableFiles()), 0,
                         "Error: There should be no available files.")

        completeFiles = self.deleteSubscriptionB.filesOfStatus("Completed")
        self.assertEqual(len(completeFiles), 1,
                         "Error: There should only be one complete file.")
        self.assertEqual(list(completeFiles)[0]["lfn"], "testFileD",
                         "Error: Test file D should be complete.")

        return

    def testMultipleLocations(self):
        """
        _testMultipleLocations_

        Verify that the sibling processing based algorithm doesn't create jobs
        that run over files at multiple sites.
        """
        testFile1 = File("testFile1", size = 1000, events = 100,
                         locations = set(["T1_US_FNAL_Disk"]))
        testFile1.create()
        testFile2 = File("testFile2", size = 1000, events = 100,
                         locations = set(["T1_US_FNAL_Disk"]))
        testFile2.create()
        testFile3 = File("testFile3", size = 1000, events = 100,
                         locations = set(["T1_US_FNAL_Disk"]))
        testFile3.create()

        self.testFilesetA.addFile(testFile1)
        self.testFilesetA.addFile(testFile2)
        self.testFilesetA.addFile(testFile3)
        self.testFilesetA.commit()
        self.testFilesetA.markOpen(False)

        self.testSubscriptionA.completeFiles([testFile1, testFile2, testFile3])
        self.testSubscriptionA.completeFiles([self.testFileA, self.testFileB, self.testFileC])

        splitter = SplitterFactory()
        deleteFactoryA = splitter(package = "WMCore.WMBS",
                                  subscription = self.deleteSubscriptionA)

        result = deleteFactoryA(files_per_job = 50)

        assert len(result) == 2, \
               "Error: Wrong number of jobgroups returned."

        goldenFilesA = ["testFileA", "testFileB", "testFileC"]
        goldenFilesB = ["testFile1", "testFile2", "testFile3"]

        for jobGroup in result:
            assert len(jobGroup.jobs) == 1, \
                   "Error: Wrong number of jobs in jobgroup."
            assert len(jobGroup.jobs[0]["input_files"]) == 3, \
                   "Error: Wrong number of input files in job."

            jobSite = jobGroup.jobs[0]["possiblePSN"]

            assert (jobSite == set(["T2_CH_CERN"])
                    or jobSite == set(["T1_US_FNAL"])), \
                    "Error: Wrong site for job."

            if jobSite == set(["T2_CH_CERN"]):
                goldenFiles = goldenFilesA
            else:
                goldenFiles = goldenFilesB

            for jobFile in jobGroup.jobs[0]["input_files"]:
                goldenFiles.remove(jobFile["lfn"])

            assert len(goldenFiles) == 0,  \
                   "Error: Files are missing."

        return

    def testLargeNumberOfFiles(self):
        """
        _testLargeNumberOfFiles_

        Setup a subscription with 500 files and verify that the splitting algo
        works correctly.
        """
        testWorkflowA = Workflow(spec = "specA.xml", owner = "Steve",
                                 name = "wfA", task = "Test")
        testWorkflowA.create()
        testWorkflowB = Workflow(spec = "specB.xml", owner = "Steve",
                                 name = "wfB", task = "Test")
        testWorkflowB.create()

        testFileset = Fileset(name = "TestFileset")
        testFileset.create()

        allFiles = []
        for i in range(500):
            testFile = File(str(i), size = 1000, events = 100,
                            locations = set(["T2_CH_CERN"]))
            testFile.create()
            allFiles.append(testFile)
            testFileset.addFile(testFile)
        testFileset.commit()

        testSubscriptionA = Subscription(fileset = testFileset,
                                         workflow = testWorkflowA,
                                         split_algo = "FileBased",
                                         type = "Processing")
        testSubscriptionA.create()
        testSubscriptionB = Subscription(fileset = testFileset,
                                         workflow = testWorkflowB,
                                         split_algo = "SiblingProcessingBased",
                                         type = "Processing")
        testSubscriptionB.create()

        testSubscriptionA.completeFiles(allFiles)

        splitter = SplitterFactory()
        deleteFactoryA = splitter(package = "WMCore.WMBS",
                                  subscription = testSubscriptionB)

        result = deleteFactoryA(files_per_job = 50)
        self.assertEqual(len(result), 1,
                         "Error: Wrong number of job groups returned.")
        self.assertEqual(len(result[0].jobs), 10,
                         "Error: Wrong number of jobs returned.")

        return

    def testFilesWithoutOtherSubscriptions(self):
        """
        _testFilesWithoutOtherSubscriptions_

        Test the case where files only in the delete subscription
        can happen if cleanup of the other subscriptions is fast

        """
        testWorkflowA = Workflow(spec = "specA.xml", owner = "Steve",
                                 name = "wfA", task = "Test")
        testWorkflowA.create()

        testFileset = Fileset(name = "TestFileset")
        testFileset.create()

        allFiles = []
        for i in range(500):
            testFile = File(str(i), size = 1000, events = 100,
                            locations = set(["T2_CH_CERN"]))
            testFile.create()
            allFiles.append(testFile)
            testFileset.addFile(testFile)
        testFileset.commit()

        testSubscriptionA = Subscription(fileset = testFileset,
                                         workflow = testWorkflowA,
                                         split_algo = "SiblingProcessingBased",
                                         type = "Processing")
        testSubscriptionA.create()

        splitter = SplitterFactory()
        deleteFactoryA = splitter(package = "WMCore.WMBS",
                                  subscription = testSubscriptionA)

        result = deleteFactoryA(files_per_job = 50)
        self.assertEqual(len(result), 1,
                         "Error: Wrong number of job groups returned.")
        self.assertEqual(len(result[0].jobs), 10,
                         "Error: Wrong number of jobs returned.")

        return
示例#41
0
class RepackTest(unittest.TestCase):
    """
    _RepackTest_

    Test for Repack job splitter
    """

    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules = ["T0.WMBS"])

        self.splitterFactory = SplitterFactory(package = "T0.JobSplitting")

        myThread = threading.currentThread()
        daoFactory = DAOFactory(package = "T0.WMBS",
                                logger = logging,
                                dbinterface = myThread.dbi)

        myThread.dbi.processData("""INSERT INTO wmbs_location
                                    (id, site_name, state)
                                    VALUES (1, 'SomeSite', 1)
                                    """, transaction = False)
        myThread.dbi.processData("""INSERT INTO wmbs_location_senames
                                    (location, se_name)
                                    VALUES (1, 'SomeSE')
                                    """, transaction = False)
        
        myThread.dbi.processData("""INSERT INTO wmbs_location_senames
                                    (location, se_name)
                                    VALUES (1, 'SomeSE2')
                                    """, transaction = False)

        insertRunDAO = daoFactory(classname = "RunConfig.InsertRun")
        insertRunDAO.execute(binds = { 'RUN' : 1,
                                       'TIME' : int(time.time()),
                                       'HLTKEY' : "someHLTKey" },
                             transaction = False)

        insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection")
        for lumi in [1, 2, 3, 4]:
            insertLumiDAO.execute(binds = { 'RUN' : 1,
                                            'LUMI' : lumi },
                                  transaction = False)

        insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream")
        insertStreamDAO.execute(binds = { 'STREAM' : "A" },
                                transaction = False)

        insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset")
        insertStreamFilesetDAO.execute(1, "A", "TestFileset1")

        self.fileset1 = Fileset(name = "TestFileset1")
        self.fileset1.load()

        workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test")
        workflow1.create()

        self.subscription1  = Subscription(fileset = self.fileset1,
                                           workflow = workflow1,
                                           split_algo = "Repack",
                                           type = "Repack")
        self.subscription1.create()

        # keep for later
        self.insertClosedLumiDAO = daoFactory(classname = "RunLumiCloseout.InsertClosedLumi")
        self.currentTime = int(time.time())

        # default split parameters
        self.splitArgs = {}
        self.splitArgs['maxSizeSingleLumi'] = 20*1024*1024*1024
        self.splitArgs['maxSizeMultiLumi'] = 10*1024*1024*1024
        self.splitArgs['maxInputEvents'] = 500000
        self.splitArgs['maxInputFiles'] = 1000

        return

    def tearDown(self):
        """
        _tearDown_

        """
        self.testInit.clearDatabase()

        return

    def getNumActiveSplitLumis(self):
        """
        _getNumActiveSplitLumis_

        helper function that counts the number of active split lumis
        """
        myThread = threading.currentThread()

        results = myThread.dbi.processData("""SELECT COUNT(*)
                                              FROM lumi_section_split_active
                                              """, transaction = False)[0].fetchall()

        return results[0][0]

    def test00(self):
        """
        _test00_

        Test that the job name prefix feature works
        Test multi lumi size threshold
        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 3, 4]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)

        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        mySplitArgs['maxSizeMultiLumi'] = self.splitArgs['maxSizeMultiLumi']
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxSizeMultiLumi'] = 5000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("Repack-"),
                        "ERROR: Job has wrong name")

        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset1.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("Repack-"),
                        "ERROR: Job has wrong name")

        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 0,
                         "ERROR: Split lumis were created")

        return

    def test01(self):
        """
        _test01_

        Test multi lumi event threshold
        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 3, 4]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "A",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : self.currentTime } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputEvents'] = 500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset1.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 0,
                         "ERROR: Split lumis were created")

        return

    def test02(self):
        """
        _test02_

        Test single lumi size threshold
        Single lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1]:
            filecount = 8
            for i in range(filecount):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "A",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : self.currentTime } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxSizeSingleLumi'] = 6500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 6,
                         "ERROR: Job does not process 6 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 1,
                         "ERROR: Split lumis were not created")

        return

    def test03(self):
        """
        _test03_

        Test single lumi event threshold
        Single lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1]:
            filecount = 8
            for i in range(filecount):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "A",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : self.currentTime } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputEvents'] = 650
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 6,
                         "ERROR: Job does not process 6 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 1,
                         "ERROR: Split lumis were not created")

        return

    def test04(self):
        """
        _test04_

        Test streamer count threshold (only multi lumi)
        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 3, 4]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "A",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : self.currentTime } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputFiles'] = 5
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset1.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 0,
                         "ERROR: Split lumis were created")

        return

    def test05(self):
        """
        _test05_

        Test repacking of multiple lumis with holes in the lumi sequence
        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 4]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "A",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : self.currentTime } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        mySplitArgs['maxInputFiles'] = 5
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1,
                                                   'LUMI' : 3,
                                                   'STREAM' : "A",
                                                   'FILECOUNT' : 0,
                                                   'INSERT_TIME' : self.currentTime,
                                                   'CLOSE_TIME' : self.currentTime },
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4,
                         "ERROR: first job does not process 4 files")


        return

    def test06(self):
        """
        _test06_

        Test repacking of 3 lumis
        2 small lumis (single job), followed by a big one (multiple jobs)

        files for lumi 1 and 2 are below multi-lumi thresholds
        files for lumi 3 are above single-lumi threshold

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 3]:
            filecount = 2
            for i in range(filecount):
                if lumi == 3:
                    nevents = 500
                else:
                    nevents = 100
                newFile = File(makeUUID(), size = 1000, events = nevents)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "A",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : self.currentTime } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        mySplitArgs['maxInputEvents'] = 900
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 3,
                         "ERROR: JobFactory didn't create three jobs")

        self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4,
                         "ERROR: first job does not process 4 files")

	self.assertEqual(len(jobGroups[0].jobs[1].getFiles()), 1,
                         "ERROR: second job does not process 1 file")

        self.assertEqual(len(jobGroups[0].jobs[2].getFiles()), 1,
                         "ERROR: third job does not process 1 file")

        return
示例#42
0
文件: Repack_t.py 项目: lucacopa/T0
class RepackTest(unittest.TestCase):
    """
    _RepackTest_

    Test for Repack job splitter
    """
    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules=["T0.WMBS"])

        self.splitterFactory = SplitterFactory(package="T0.JobSplitting")

        myThread = threading.currentThread()
        daoFactory = DAOFactory(package="T0.WMBS",
                                logger=logging,
                                dbinterface=myThread.dbi)

        myThread.dbi.processData("""INSERT INTO wmbs_location
                                    (id, site_name, state)
                                    VALUES (1, 'SomeSite', 1)
                                    """,
                                 transaction=False)
        myThread.dbi.processData("""INSERT INTO wmbs_location_senames
                                    (location, se_name)
                                    VALUES (1, 'SomeSE')
                                    """,
                                 transaction=False)

        myThread.dbi.processData("""INSERT INTO wmbs_location_senames
                                    (location, se_name)
                                    VALUES (1, 'SomeSE2')
                                    """,
                                 transaction=False)

        insertRunDAO = daoFactory(classname="RunConfig.InsertRun")
        insertRunDAO.execute(binds={
            'RUN': 1,
            'TIME': int(time.time()),
            'HLTKEY': "someHLTKey"
        },
                             transaction=False)

        insertLumiDAO = daoFactory(classname="RunConfig.InsertLumiSection")
        for lumi in [1, 2, 3, 4]:
            insertLumiDAO.execute(binds={
                'RUN': 1,
                'LUMI': lumi
            },
                                  transaction=False)

        insertStreamDAO = daoFactory(classname="RunConfig.InsertStream")
        insertStreamDAO.execute(binds={'STREAM': "A"}, transaction=False)

        insertStreamFilesetDAO = daoFactory(
            classname="RunConfig.InsertStreamFileset")
        insertStreamFilesetDAO.execute(1, "A", "TestFileset1")

        self.fileset1 = Fileset(name="TestFileset1")
        self.fileset1.load()

        workflow1 = Workflow(spec="spec.xml",
                             owner="hufnagel",
                             name="TestWorkflow1",
                             task="Test")
        workflow1.create()

        self.subscription1 = Subscription(fileset=self.fileset1,
                                          workflow=workflow1,
                                          split_algo="Repack",
                                          type="Repack")
        self.subscription1.create()

        # keep for later
        self.insertClosedLumiDAO = daoFactory(
            classname="RunLumiCloseout.InsertClosedLumi")
        self.currentTime = int(time.time())

        # default split parameters
        self.splitArgs = {}
        self.splitArgs['maxSizeSingleLumi'] = 20 * 1024 * 1024 * 1024
        self.splitArgs['maxSizeMultiLumi'] = 10 * 1024 * 1024 * 1024
        self.splitArgs['maxInputEvents'] = 500000
        self.splitArgs['maxInputFiles'] = 1000

        return

    def tearDown(self):
        """
        _tearDown_

        """
        self.testInit.clearDatabase()

        return

    def getNumActiveSplitLumis(self):
        """
        _getNumActiveSplitLumis_

        helper function that counts the number of active split lumis
        """
        myThread = threading.currentThread()

        results = myThread.dbi.processData("""SELECT COUNT(*)
                                              FROM lumi_section_split_active
                                              """,
                                           transaction=False)[0].fetchall()

        return results[0][0]

    def test00(self):
        """
        _test00_

        Test that the job name prefix feature works
        Test multi lumi size threshold
        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 3, 4]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)

        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        mySplitArgs['maxSizeMultiLumi'] = self.splitArgs['maxSizeMultiLumi']
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxSizeMultiLumi'] = 5000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("Repack-"),
                        "ERROR: Job has wrong name")

        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset1.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("Repack-"),
                        "ERROR: Job has wrong name")

        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 0,
                         "ERROR: Split lumis were created")

        return

    def test01(self):
        """
        _test01_

        Test multi lumi event threshold
        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 3, 4]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append({
                    'RUN': 1,
                    'LUMI': lumi,
                    'STREAM': "A",
                    'FILECOUNT': filecount,
                    'INSERT_TIME': self.currentTime,
                    'CLOSE_TIME': self.currentTime
                })
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds,
                                         transaction=False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputEvents'] = 500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset1.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 0,
                         "ERROR: Split lumis were created")

        return

    def test02(self):
        """
        _test02_

        Test single lumi size threshold
        Single lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1]:
            filecount = 8
            for i in range(filecount):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append({
                    'RUN': 1,
                    'LUMI': lumi,
                    'STREAM': "A",
                    'FILECOUNT': filecount,
                    'INSERT_TIME': self.currentTime,
                    'CLOSE_TIME': self.currentTime
                })
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds,
                                         transaction=False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxSizeSingleLumi'] = 6500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 6,
                         "ERROR: Job does not process 6 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 1,
                         "ERROR: Split lumis were not created")

        return

    def test03(self):
        """
        _test03_

        Test single lumi event threshold
        Single lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1]:
            filecount = 8
            for i in range(filecount):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append({
                    'RUN': 1,
                    'LUMI': lumi,
                    'STREAM': "A",
                    'FILECOUNT': filecount,
                    'INSERT_TIME': self.currentTime,
                    'CLOSE_TIME': self.currentTime
                })
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds,
                                         transaction=False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputEvents'] = 650
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 6,
                         "ERROR: Job does not process 6 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 1,
                         "ERROR: Split lumis were not created")

        return

    def test04(self):
        """
        _test04_

        Test streamer count threshold (only multi lumi)
        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 3, 4]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append({
                    'RUN': 1,
                    'LUMI': lumi,
                    'STREAM': "A",
                    'FILECOUNT': filecount,
                    'INSERT_TIME': self.currentTime,
                    'CLOSE_TIME': self.currentTime
                })
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds,
                                         transaction=False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputFiles'] = 5
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset1.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 0,
                         "ERROR: Split lumis were created")

        return

    def test05(self):
        """
        _test05_

        Test repacking of multiple lumis with holes in the lumi sequence
        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 4]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append({
                    'RUN': 1,
                    'LUMI': lumi,
                    'STREAM': "A",
                    'FILECOUNT': filecount,
                    'INSERT_TIME': self.currentTime,
                    'CLOSE_TIME': self.currentTime
                })
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds,
                                         transaction=False)

        mySplitArgs['maxInputFiles'] = 5
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds={
            'RUN': 1,
            'LUMI': 3,
            'STREAM': "A",
            'FILECOUNT': 0,
            'INSERT_TIME': self.currentTime,
            'CLOSE_TIME': self.currentTime
        },
                                         transaction=False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4,
                         "ERROR: first job does not process 4 files")

        return

    def test06(self):
        """
        _test06_

        Test repacking of 3 lumis
        2 small lumis (single job), followed by a big one (multiple jobs)

        files for lumi 1 and 2 are below multi-lumi thresholds
        files for lumi 3 are above single-lumi threshold

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 3]:
            filecount = 2
            for i in range(filecount):
                if lumi == 3:
                    nevents = 500
                else:
                    nevents = 100
                newFile = File(makeUUID(), size=1000, events=nevents)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append({
                    'RUN': 1,
                    'LUMI': lumi,
                    'STREAM': "A",
                    'FILECOUNT': filecount,
                    'INSERT_TIME': self.currentTime,
                    'CLOSE_TIME': self.currentTime
                })
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds,
                                         transaction=False)

        mySplitArgs['maxInputEvents'] = 900
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 3,
                         "ERROR: JobFactory didn't create three jobs")

        self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4,
                         "ERROR: first job does not process 4 files")

        self.assertEqual(len(jobGroups[0].jobs[1].getFiles()), 1,
                         "ERROR: second job does not process 1 file")

        self.assertEqual(len(jobGroups[0].jobs[2].getFiles()), 1,
                         "ERROR: third job does not process 1 file")

        return
示例#43
0
    def createTestJobGroup(self,
                           name="TestWorkthrough",
                           specLocation="spec.xml",
                           error=False,
                           task="/TestWorkload/ReReco",
                           nJobs=10):
        """
        _createTestJobGroup_

        Generate a test WMBS JobGroup with real FWJRs
        """

        myThread = threading.currentThread()

        testWorkflow = Workflow(spec=specLocation,
                                owner="Simon",
                                name=name,
                                task=task)
        testWorkflow.create()

        testWMBSFileset = Fileset(name=name)
        testWMBSFileset.create()

        testFileA = File(lfn=makeUUID(), size=1024, events=10)
        testFileA.addRun(Run(10, *[12312]))
        testFileA.setLocation('malpaquet')

        testFileB = File(lfn=makeUUID(), size=1024, events=10)
        testFileB.addRun(Run(10, *[12312]))
        testFileB.setLocation('malpaquet')

        testFileA.create()
        testFileB.create()

        testWMBSFileset.addFile(testFileA)
        testWMBSFileset.addFile(testFileB)
        testWMBSFileset.commit()
        testWMBSFileset.markOpen(0)

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        for i in range(0, nJobs):
            testJob = Job(name=makeUUID())
            testJob.addFile(testFileA)
            testJob.addFile(testFileB)
            testJob['retry_count'] = 1
            testJob['retry_max'] = 10
            testJob['mask'].addRunAndLumis(run=10, lumis=[12312, 12313])
            testJobGroup.add(testJob)

        testJobGroup.commit()

        report = Report()
        if error:
            path = os.path.join(WMCore.WMBase.getTestBase(),
                                "WMComponent_t/JobAccountant_t/fwjrs",
                                "badBackfillJobReport.pkl")
        else:
            path = os.path.join(WMCore.WMBase.getTestBase(),
                                "WMComponent_t/JobAccountant_t/fwjrs",
                                "PerformanceReport2.pkl")
        report.load(filename=path)

        self.changeState.propagate(testJobGroup.jobs, 'created', 'new')
        self.changeState.propagate(testJobGroup.jobs, 'executing', 'created')
        self.changeState.propagate(testJobGroup.jobs, 'complete', 'executing')
        for job in testJobGroup.jobs:
            job['fwjr'] = report
        self.changeState.propagate(testJobGroup.jobs, 'jobfailed', 'complete')
        self.changeState.propagate(testJobGroup.jobs, 'exhausted', 'jobfailed')
        self.changeState.propagate(testJobGroup.jobs, 'cleanout', 'exhausted')

        testSubscription.completeFiles([testFileA, testFileB])

        return testJobGroup