Пример #1
0
    def setUp(self):
        """
        _setUp_

        """

        self.manager = GeneratorManager()

        self.seedlistForRandom = {
            "simMuonRPCDigis.initialSeed": None,
            "simEcalUnsuppressedDigis.initialSeed": None,
            "simCastorDigis.initialSeed": None,
            "generator.initialSeed": None,
            "simSiStripDigis.initialSeed": None,
            "LHCTransport.initialSeed": None,
            "mix.initialSeed": None,
            "simHcalUnsuppressedDigis.initialSeed": None,
            "theSource.initialSeed": None,
            "simMuonCSCDigis.initialSeed": None,
            "VtxSmeared.initialSeed": None,
            "g4SimHits.initialSeed": None,
            "simSiPixelDigis.initialSeed": None,
            "simMuonDTDigis.initialSeed": None,
            "evtgenproducer.initialSeed": None
        }

        return
Пример #2
0
    def setUp(self):
        """
        _setUp_

        """

        self.manager = GeneratorManager()

        self.seedlistForRandom = {
            "simMuonRPCDigis.initialSeed": None,
            "simEcalUnsuppressedDigis.initialSeed": None,
            "simCastorDigis.initialSeed": None,
            "generator.initialSeed": None,
            "simSiStripDigis.initialSeed": None,
            "LHCTransport.initialSeed": None,
            "mix.initialSeed": None,
            "simHcalUnsuppressedDigis.initialSeed": None,
            "theSource.initialSeed": None,
            "simMuonCSCDigis.initialSeed": None,
            "VtxSmeared.initialSeed": None,
            "g4SimHits.initialSeed": None,
            "simSiPixelDigis.initialSeed": None,
            "simMuonDTDigis.initialSeed": None,
            "evtgenproducer.initialSeed": None
            }


        return
Пример #3
0
    def testPresetSeeder(self):
        """
        _testPresetSeeder_

        Test whether the PresetSeeder works
        """

        task1 = makeWMTask("task2")

        seederDict = {
            "generator.initialSeed": 1001,
            "evtgenproducer.initialSeed": 1001
        }
        task1.addGenerator("PresetSeeder", **seederDict)

        manager = GeneratorManager(task=task1)

        jobs = self.oneHundredFiles()

        for jobGrp in jobs:
            manager(jobGrp)

        for jobGrp in jobs:
            count = 0
            for job in jobGrp.jobs:
                conf = job.getBaggage()
                self.assertEqual(conf.PresetSeeder.evtgenproducer.initialSeed,
                                 1001)
                self.assertEqual(conf.PresetSeeder.generator.initialSeed, 1001)

        return
Пример #4
0
    def testWMTask(self):
        """
        _testWMTask_

        Test whether or not we can read the seeder parameters out of a WMTask.
        Also tests RandomSeeder and RunAndLumiSeeder
        """

        task1 = makeWMTask("task1")

        randomDict = {
            "generator.initialSeed": None,
            "evtgenproducer.initialSeed": None,
            "MAXINT": 1
        }
        lumiDict = {"lumi_per_run": 5}

        task1.addGenerator("RandomSeeder", **randomDict)
        task1.addGenerator("RunAndLumiSeeder", **lumiDict)

        manager = GeneratorManager(task=task1)

        jobs = self.oneHundredFiles()

        for jobGrp in jobs:
            manager(jobGrp)

        for jobGrp in jobs:
            count = 0
            for job in jobGrp.jobs:
                conf = job.getBaggage()
                self.assertTrue(
                    hasattr(conf.RandomSeeder.evtgenproducer, 'initialSeed'))
                self.assertTrue(
                    hasattr(conf.RandomSeeder.generator, 'initialSeed'))
                #self.assertEqual(job["mask"]["FirstLumi"], count%6)
                #self.assertEqual(job["mask"]["FirstRun"],  (count/6) + 1)
                x = conf.RandomSeeder.generator.initialSeed
                self.assertTrue(x > 0,
                                "ERROR: producing negative random numbers")
                self.assertTrue(
                    x <= 1,
                    "ERROR: MAXINT tag failed; producing bad random number %i"
                    % (x))
                count += 1

        return
Пример #5
0
    def pollSubscriptions(self):
        """
        Poller for looking in all active subscriptions for jobs that need to be made.

        """
        logging.info("Beginning JobCreator.pollSubscriptions() cycle.")
        myThread = threading.currentThread()

        #First, get list of Subscriptions
        subscriptions    = self.subscriptionList.execute()

        # Okay, now we have a list of subscriptions
        for subscriptionID in subscriptions:
            wmbsSubscription = Subscription(id = subscriptionID)
            try:
                wmbsSubscription.load()
            except IndexError:
                # This happens when the subscription no longer exists
                # i.e., someone executed a kill() function on the database
                # while the JobCreator was in cycle
                # Ignore this subscription
                msg = "JobCreator cannot load subscription %i" % subscriptionID
                logging.error(msg)
                self.sendAlert(6, msg = msg)
                continue

            workflow         = Workflow(id = wmbsSubscription["workflow"].id)
            workflow.load()
            wmbsSubscription['workflow'] = workflow
            wmWorkload       = retrieveWMSpec(workflow = workflow)

            if not workflow.task or not wmWorkload:
                # Then we have a problem
                # We NEED a sandbox
                # Abort this subscription!
                # But do NOT fail
                # We have no way of marking a subscription as bad per se
                # We'll have to just keep skipping it
                msg = "Have no task for workflow %i\n" % (workflow.id)
                msg += "Aborting Subscription %i" % (subscriptionID)
                logging.error(msg)
                self.sendAlert(1, msg = msg)
                continue

            logging.debug("Have loaded subscription %i with workflow %i\n" % (subscriptionID, workflow.id))

            # Set task object
            wmTask = wmWorkload.getTaskByPath(workflow.task)

            # Get generators
            # If you fail to load the generators, pass on the job
            try:
                if hasattr(wmTask.data, 'generators'):
                    manager    = GeneratorManager(wmTask)
                    seederList = manager.getGeneratorList()
                else:
                    seederList = []
            except Exception, ex:
                msg =  "Had failure loading generators for subscription %i\n" % (subscriptionID)
                msg += "Exception: %s\n" % str(ex)
                msg += "Passing over this error.  It will reoccur next interation!\n"
                msg += "Please check or remove this subscription!\n"
                logging.error(msg)
                self.sendAlert(6, msg = msg)
                continue

            logging.debug("Going to call wmbsJobFactory for sub %i with limit %i" % (subscriptionID, self.limit))

            splitParams = retrieveJobSplitParams(wmWorkload, workflow.task)
            logging.debug("Split Params: %s" % splitParams)

            # My hope is that the job factory is smart enough only to split un-split jobs
            splitterFactory = SplitterFactory(splitParams.get('algo_package', "WMCore.JobSplitting"))
            wmbsJobFactory = splitterFactory(package = "WMCore.WMBS",
                                             subscription = wmbsSubscription,
                                             generators=seederList,
                                             limit = self.limit)

            # Turn on the jobFactory
            wmbsJobFactory.open()

            # Create a function to hold it
            jobSplittingFunction = runSplitter(jobFactory = wmbsJobFactory,
                                               splitParams = splitParams)

            # Now we get to find out how many jobs there are.
            jobNumber = self.countJobs.execute(workflow = workflow.id,
                                               conn = myThread.transaction.conn,
                                               transaction = True)
            jobNumber += splitParams.get('initial_lfn_counter', 0)
            logging.debug("Have %i jobs for workflow %s already in database." % (jobNumber, workflow.name))

            continueSubscription = True
            while continueSubscription:
                # This loop runs over the jobFactory,
                # using yield statements and a pre-existing proxy to
                # generate and process new jobs

                # First we need the jobs.
                myThread.transaction.begin()
                try:
                    wmbsJobGroups = jobSplittingFunction.next()
                    logging.info("Retrieved %i jobGroups from jobSplitter" % (len(wmbsJobGroups)))
                except StopIteration:
                    # If you receive a stopIteration, we're done
                    logging.info("Completed iteration over subscription %i" % (subscriptionID))
                    continueSubscription = False
                    myThread.transaction.commit()
                    break

                # If we have no jobGroups, we're done
                if len(wmbsJobGroups) == 0:
                    logging.info("Found end in iteration over subscription %i" % (subscriptionID))
                    continueSubscription = False
                    myThread.transaction.commit()
                    break


                # Assemble a dict of all the info
                processDict = {'workflow': workflow,
                               'wmWorkload': wmWorkload, 'wmTaskName': wmTask.getPathName(),
                               'jobNumber': jobNumber, 'sandbox': wmTask.data.input.sandbox,
                               'owner': wmWorkload.getOwner().get('name', None),
                               'ownerDN': wmWorkload.getOwner().get('dn', None),
                               'ownerGroup': wmWorkload.getOwner().get('vogroup', ''),
                               'ownerRole': wmWorkload.getOwner().get('vorole', '')}

                tempSubscription = Subscription(id = wmbsSubscription['id'])

                nameDictList = []
                for wmbsJobGroup in wmbsJobGroups:
                    # For each jobGroup, put a dictionary
                    # together and run it with creatorProcess
                    jobsInGroup               = len(wmbsJobGroup.jobs)
                    wmbsJobGroup.subscription = tempSubscription
                    tempDict = {}
                    tempDict.update(processDict)
                    tempDict['jobGroup']  = wmbsJobGroup
                    tempDict['swVersion'] = wmTask.getSwVersion()
                    tempDict['scramArch'] = wmTask.getScramArch()
                    tempDict['jobNumber'] = jobNumber
                    tempDict['agentNumber'] = self.agentNumber

                    jobGroup = creatorProcess(work = tempDict,
                                              jobCacheDir = self.jobCacheDir)
                    jobNumber += jobsInGroup

                    # Set jobCache for group
                    for job in jobGroup.jobs:
                        nameDictList.append({'jobid':job['id'],
                                             'cacheDir':job['cache_dir']})
                        job["user"] = wmWorkload.getOwner()["name"]
                        job["group"] = wmWorkload.getOwner()["group"]
                # Set the caches in the database
                try:
                    if len(nameDictList) > 0:
                        self.setBulkCache.execute(jobDictList = nameDictList,
                                                  conn = myThread.transaction.conn,
                                                  transaction = True)
                except WMException:
                    raise
                except Exception, ex:
                    msg =  "Unknown exception while setting the bulk cache:\n"
                    msg += str(ex)
                    logging.error(msg)
                    self.sendAlert(6, msg = msg)
                    logging.debug("Error while setting bulkCache with following values: %s\n" % nameDictList)
                    raise JobCreatorException(msg)

                # Advance the jobGroup in changeState
                for wmbsJobGroup in wmbsJobGroups:
                    self.advanceJobGroup(wmbsJobGroup = wmbsJobGroup)

                # Now end the transaction so that everything is wrapped
                # in a single rollback
                myThread.transaction.commit()
Пример #6
0
class SeederTest(unittest.TestCase):



    def setUp(self):
        """
        _setUp_

        """

        self.manager = GeneratorManager()

        self.seedlistForRandom = {
            "simMuonRPCDigis.initialSeed": None,
            "simEcalUnsuppressedDigis.initialSeed": None,
            "simCastorDigis.initialSeed": None,
            "generator.initialSeed": None,
            "simSiStripDigis.initialSeed": None,
            "LHCTransport.initialSeed": None,
            "mix.initialSeed": None,
            "simHcalUnsuppressedDigis.initialSeed": None,
            "theSource.initialSeed": None,
            "simMuonCSCDigis.initialSeed": None,
            "VtxSmeared.initialSeed": None,
            "g4SimHits.initialSeed": None,
            "simSiPixelDigis.initialSeed": None,
            "simMuonDTDigis.initialSeed": None,
            "evtgenproducer.initialSeed": None
            }


        return

    def tearDown(self):
        """
        _tearDown_

        """
        #Do nothing

        return

    def oneHundredFiles(self, splittingAlgo = "EventBased", jobType = "Processing"):
        """
        _oneHundredFiles_
        
        Generate a WMBS data stack representing 100 files for job splitter
        testing
        
        """
        fileset1 = Fileset(name='EventBasedFiles1')
        for i in range(0, 100):
            f = File("/store/MultipleFileSplit%s.root" % i, # lfn
                 1000,   # size
                 100,   # events
                 10 + i, # run
                 12312   # lumi
                 )
            f['locations'].add("BULLSHIT")

            fileset1.addFile(f            )

        work = Workflow()
        subscription1 = Subscription(
            fileset = fileset1,
            workflow = work,
            split_algo = splittingAlgo,
            type = jobType)
        splitter = SplitterFactory()
        jobfactory = splitter(subscription1)
        jobs = jobfactory(events_per_job = 100)
        #for jobGroup in jobs:
        #    yield jobGroup




        self.manager.addGenerator("RandomSeeder", **self.seedlistForRandom)
        self.manager.addGenerator("RunAndLumiSeeder")

        return jobs



    def testSimpleFiles(self):
        """
        _testSimpleFiles_

        
        test using one hundred files that we can save the attributes in a job
        """
        jobs = self.oneHundredFiles()

        for jobGrp in jobs:
            self.manager(jobGrp)


        for jobGrp in jobs:
            count = 0
            for job in jobGrp.jobs:
                conf = job.getBaggage()
                self.assertEqual(hasattr(conf.RandomSeeder.evtgenproducer, 'initialSeed'), True)
                self.assertEqual(hasattr(conf.RandomSeeder.generator, 'initialSeed'), True)
                self.assertEqual(job["mask"]["FirstLumi"], count%11)
                self.assertEqual(job["mask"]["FirstRun"],  (count/11) + 1)
                count += 1

        return


    def testWMTask(self):
        """
        _testWMTask_
        
        Test whether or not we can read the seeder parameters out of a WMTask.
        Also tests RandomSeeder and RunAndLumiSeeder
        """

        task1 = makeWMTask("task1")

        randomDict = {"generator.initialSeed": None, "evtgenproducer.initialSeed": None, "MAXINT": 1}
        lumiDict   = {"lumi_per_run": 5}

        task1.addGenerator("RandomSeeder", **randomDict)
        task1.addGenerator("RunAndLumiSeeder", **lumiDict)

        manager = GeneratorManager(task = task1)

        jobs = self.oneHundredFiles()

        for jobGrp in jobs:
            manager(jobGrp)

        for jobGrp in jobs:
            count = 0
            for job in jobGrp.jobs:
                conf = job.getBaggage()
                self.assertTrue(hasattr(conf.RandomSeeder.evtgenproducer, 'initialSeed'))
                self.assertTrue(hasattr(conf.RandomSeeder.generator, 'initialSeed'))
                #self.assertEqual(job["mask"]["FirstLumi"], count%6)
                #self.assertEqual(job["mask"]["FirstRun"],  (count/6) + 1)
                x = conf.RandomSeeder.generator.initialSeed
                self.assertTrue( x > 0, "ERROR: producing negative random numbers")
                self.assertTrue( x <= 1, "ERROR: MAXINT tag failed; producing bad random number %i" %(x))
                count += 1

        return




    def testPresetSeeder(self):
        """
        _testPresetSeeder_
        
        Test whether the PresetSeeder works
        """

        task1 = makeWMTask("task2")

        seederDict = {"generator.initialSeed": 1001, "evtgenproducer.initialSeed": 1001}
        task1.addGenerator("PresetSeeder", **seederDict)
        
        manager = GeneratorManager(task = task1)


        jobs = self.oneHundredFiles()

        for jobGrp in jobs:
            manager(jobGrp)

        for jobGrp in jobs:
            count = 0
            for job in jobGrp.jobs:
                conf = job.getBaggage()
                self.assertEqual(conf.PresetSeeder.evtgenproducer.initialSeed, 1001)
                self.assertEqual(conf.PresetSeeder.generator.initialSeed,      1001)


        return
Пример #7
0
    def pollSubscriptions(self):
        """
        Poller for looking in all active subscriptions for jobs that need to be made.

        """
        logging.info("Beginning JobCreator.pollSubscriptions() cycle.")
        myThread = threading.currentThread()

        # First, get list of Subscriptions
        subscriptions = self.subscriptionList.execute()

        # Okay, now we have a list of subscriptions
        for subscriptionID in subscriptions:
            wmbsSubscription = Subscription(id=subscriptionID)
            try:
                wmbsSubscription.load()
            except IndexError:
                # This happens when the subscription no longer exists
                # i.e., someone executed a kill() function on the database
                # while the JobCreator was in cycle
                # Ignore this subscription
                msg = "JobCreator cannot load subscription %i" % subscriptionID
                logging.error(msg)
                continue

            workflow = Workflow(id=wmbsSubscription["workflow"].id)
            workflow.load()
            wmbsSubscription['workflow'] = workflow
            wmWorkload = retrieveWMSpec(workflow=workflow)

            if not workflow.task or not wmWorkload:
                # Then we have a problem
                # We NEED a sandbox
                # Abort this subscription!
                # But do NOT fail
                # We have no way of marking a subscription as bad per se
                # We'll have to just keep skipping it
                msg = "Have no task for workflow %i\n" % (workflow.id)
                msg += "Aborting Subscription %i" % (subscriptionID)
                logging.error(msg)
                continue

            logging.debug("Have loaded subscription %i with workflow %i\n", subscriptionID, workflow.id)

            # retrieve information from the workload to propagate down to the job configuration
            allowOpport = wmWorkload.getAllowOpportunistic()

            # Set task object
            wmTask = wmWorkload.getTaskByPath(workflow.task)

            # Get generators
            # If you fail to load the generators, pass on the job
            try:
                if hasattr(wmTask.data, 'generators'):
                    manager = GeneratorManager(wmTask)
                    seederList = manager.getGeneratorList()
                else:
                    seederList = []
            except Exception as ex:
                msg = "Had failure loading generators for subscription %i\n" % (subscriptionID)
                msg += "Exception: %s\n" % str(ex)
                msg += "Passing over this error.  It will reoccur next interation!\n"
                msg += "Please check or remove this subscription!\n"
                logging.error(msg)
                continue

            logging.debug("Going to call wmbsJobFactory for sub %i with limit %i", subscriptionID, self.limit)

            splitParams = retrieveJobSplitParams(wmWorkload, workflow.task)
            logging.debug("Split Params: %s", splitParams)

            # Load the proper job splitting module
            splitterFactory = SplitterFactory(splitParams.get('algo_package', "WMCore.JobSplitting"))
            # and return an instance of the splitting algorithm
            wmbsJobFactory = splitterFactory(package="WMCore.WMBS",
                                             subscription=wmbsSubscription,
                                             generators=seederList,
                                             limit=self.limit)

            # Turn on the jobFactory --> get available files for that subscription, keep result proxies
            wmbsJobFactory.open()

            # Create a function to hold it, calling __call__ from the JobFactory
            # which then calls algorithm method of the job splitting algo instance
            jobSplittingFunction = runSplitter(jobFactory=wmbsJobFactory,
                                               splitParams=splitParams)

            # Now we get to find out how many jobs there are.
            jobNumber = self.countJobs.execute(workflow=workflow.id,
                                               conn=myThread.transaction.conn,
                                               transaction=True)
            jobNumber += splitParams.get('initial_lfn_counter', 0)
            logging.debug("Have %i jobs for workflow %s already in database.", jobNumber, workflow.name)

            continueSubscription = True
            while continueSubscription:
                # This loop runs over the jobFactory,
                # using yield statements and a pre-existing proxy to
                # generate and process new jobs

                # First we need the jobs.
                myThread.transaction.begin()
                try:
                    wmbsJobGroups = next(jobSplittingFunction)
                    logging.info("Retrieved %i jobGroups from jobSplitter", len(wmbsJobGroups))
                except StopIteration:
                    # If you receive a stopIteration, we're done
                    logging.info("Completed iteration over subscription %i", subscriptionID)
                    continueSubscription = False
                    myThread.transaction.commit()
                    break

                # If we have no jobGroups, we're done
                if len(wmbsJobGroups) == 0:
                    logging.info("Found end in iteration over subscription %i", subscriptionID)
                    continueSubscription = False
                    myThread.transaction.commit()
                    break

                # Assemble a dict of all the info
                processDict = {'workflow': workflow,
                               'wmWorkload': wmWorkload, 'wmTaskName': wmTask.getPathName(),
                               'jobNumber': jobNumber, 'sandbox': wmTask.data.input.sandbox,
                               'owner': wmWorkload.getOwner().get('name', None),
                               'ownerDN': wmWorkload.getOwner().get('dn', None),
                               'ownerGroup': wmWorkload.getOwner().get('vogroup', ''),
                               'ownerRole': wmWorkload.getOwner().get('vorole', ''),
                               'numberOfCores': 1,
                               'inputDataset': wmTask.getInputDatasetPath(),
                               'inputPileup': wmTask.getInputPileupDatasets()}
                try:
                    maxCores = 1
                    stepNames = wmTask.listAllStepNames()
                    for stepName in stepNames:
                        sh = wmTask.getStep(stepName)
                        maxCores = max(maxCores, sh.getNumberOfCores())
                    processDict.update({'numberOfCores': maxCores})
                except AttributeError:
                    logging.info("Failed to read multicore settings from task %s", wmTask.getPathName())

                tempSubscription = Subscription(id=wmbsSubscription['id'])

                # if we have glideinWMS constraints, then adapt all jobs
                if self.glideinLimits:
                    capResourceEstimates(wmbsJobGroups, self.glideinLimits)

                nameDictList = []
                for wmbsJobGroup in wmbsJobGroups:
                    # For each jobGroup, put a dictionary
                    # together and run it with creatorProcess
                    jobsInGroup = len(wmbsJobGroup.jobs)
                    wmbsJobGroup.subscription = tempSubscription
                    tempDict = {}
                    tempDict.update(processDict)
                    tempDict['jobGroup'] = wmbsJobGroup
                    tempDict['swVersion'] = wmTask.getSwVersion(allSteps=True)
                    tempDict['scramArch'] = wmTask.getScramArch()
                    tempDict['jobNumber'] = jobNumber
                    tempDict['agentNumber'] = self.agentNumber
                    tempDict['agentName'] = self.agentName
                    tempDict['inputDatasetLocations'] = wmbsJobGroup.getLocationsForJobs()
                    tempDict['allowOpportunistic'] = allowOpport

                    jobGroup = creatorProcess(work=tempDict,
                                              jobCacheDir=self.jobCacheDir)
                    jobNumber += jobsInGroup

                    # Set jobCache for group
                    for job in jobGroup.jobs:
                        nameDictList.append({'jobid': job['id'],
                                             'cacheDir': job['cache_dir']})
                        job["user"] = wmWorkload.getOwner()["name"]
                        job["group"] = wmWorkload.getOwner()["group"]
                # Set the caches in the database
                try:
                    if len(nameDictList) > 0:
                        self.setBulkCache.execute(jobDictList=nameDictList,
                                                  conn=myThread.transaction.conn,
                                                  transaction=True)
                except WMException:
                    raise
                except Exception as ex:
                    msg = "Unknown exception while setting the bulk cache:\n"
                    msg += str(ex)
                    logging.error(msg)
                    logging.debug("Error while setting bulkCache with following values: %s\n", nameDictList)
                    raise JobCreatorException(msg)

                # Advance the jobGroup in changeState
                for wmbsJobGroup in wmbsJobGroups:
                    self.advanceJobGroup(wmbsJobGroup=wmbsJobGroup)

                # Now end the transaction so that everything is wrapped
                # in a single rollback
                myThread.transaction.commit()

            # END: While loop over jobFactory

            # Close the jobFactory
            wmbsJobFactory.close()

        return
Пример #8
0
class SeederTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        """

        self.manager = GeneratorManager()

        self.seedlistForRandom = {
            "simMuonRPCDigis.initialSeed": None,
            "simEcalUnsuppressedDigis.initialSeed": None,
            "simCastorDigis.initialSeed": None,
            "generator.initialSeed": None,
            "simSiStripDigis.initialSeed": None,
            "LHCTransport.initialSeed": None,
            "mix.initialSeed": None,
            "simHcalUnsuppressedDigis.initialSeed": None,
            "theSource.initialSeed": None,
            "simMuonCSCDigis.initialSeed": None,
            "VtxSmeared.initialSeed": None,
            "g4SimHits.initialSeed": None,
            "simSiPixelDigis.initialSeed": None,
            "simMuonDTDigis.initialSeed": None,
            "evtgenproducer.initialSeed": None
        }

        return

    def tearDown(self):
        """
        _tearDown_

        """
        #Do nothing

        return

    def oneHundredFiles(self,
                        splittingAlgo="EventBased",
                        jobType="Processing"):
        """
        _oneHundredFiles_

        Generate a WMBS data stack representing 100 files for job splitter
        testing

        """
        fileset1 = Fileset(name='EventBasedFiles1')
        for i in range(0, 100):
            f = File(
                "/store/MultipleFileSplit%s.root" % i,  # lfn
                1000,  # size
                100,  # events
                10 + i,  # run
                12312  # lumi
            )
            f['locations'].add("BULLSHIT")

            fileset1.addFile(f)

        work = Workflow()
        subscription1 = Subscription(fileset=fileset1,
                                     workflow=work,
                                     split_algo=splittingAlgo,
                                     type=jobType)
        splitter = SplitterFactory()
        jobfactory = splitter(subscription1)
        jobs = jobfactory(events_per_job=100)
        #for jobGroup in jobs:
        #    yield jobGroup

        self.manager.addGenerator("RandomSeeder", **self.seedlistForRandom)
        self.manager.addGenerator("RunAndLumiSeeder")

        return jobs

    def testSimpleFiles(self):
        """
        _testSimpleFiles_


        test using one hundred files that we can save the attributes in a job
        """
        jobs = self.oneHundredFiles()

        for jobGrp in jobs:
            self.manager(jobGrp)

        for jobGrp in jobs:
            count = 0
            for job in jobGrp.jobs:
                conf = job.getBaggage()
                self.assertEqual(
                    hasattr(conf.RandomSeeder.evtgenproducer, 'initialSeed'),
                    True)
                self.assertEqual(
                    hasattr(conf.RandomSeeder.generator, 'initialSeed'), True)
                self.assertEqual(job["mask"]["FirstLumi"], count % 11)
                self.assertEqual(job["mask"]["FirstRun"], (count / 11) + 1)
                count += 1

        return

    def testWMTask(self):
        """
        _testWMTask_

        Test whether or not we can read the seeder parameters out of a WMTask.
        Also tests RandomSeeder and RunAndLumiSeeder
        """

        task1 = makeWMTask("task1")

        randomDict = {
            "generator.initialSeed": None,
            "evtgenproducer.initialSeed": None,
            "MAXINT": 1
        }
        lumiDict = {"lumi_per_run": 5}

        task1.addGenerator("RandomSeeder", **randomDict)
        task1.addGenerator("RunAndLumiSeeder", **lumiDict)

        manager = GeneratorManager(task=task1)

        jobs = self.oneHundredFiles()

        for jobGrp in jobs:
            manager(jobGrp)

        for jobGrp in jobs:
            count = 0
            for job in jobGrp.jobs:
                conf = job.getBaggage()
                self.assertTrue(
                    hasattr(conf.RandomSeeder.evtgenproducer, 'initialSeed'))
                self.assertTrue(
                    hasattr(conf.RandomSeeder.generator, 'initialSeed'))
                #self.assertEqual(job["mask"]["FirstLumi"], count%6)
                #self.assertEqual(job["mask"]["FirstRun"],  (count/6) + 1)
                x = conf.RandomSeeder.generator.initialSeed
                self.assertTrue(x > 0,
                                "ERROR: producing negative random numbers")
                self.assertTrue(
                    x <= 1,
                    "ERROR: MAXINT tag failed; producing bad random number %i"
                    % (x))
                count += 1

        return

    def testPresetSeeder(self):
        """
        _testPresetSeeder_

        Test whether the PresetSeeder works
        """

        task1 = makeWMTask("task2")

        seederDict = {
            "generator.initialSeed": 1001,
            "evtgenproducer.initialSeed": 1001
        }
        task1.addGenerator("PresetSeeder", **seederDict)

        manager = GeneratorManager(task=task1)

        jobs = self.oneHundredFiles()

        for jobGrp in jobs:
            manager(jobGrp)

        for jobGrp in jobs:
            count = 0
            for job in jobGrp.jobs:
                conf = job.getBaggage()
                self.assertEqual(conf.PresetSeeder.evtgenproducer.initialSeed,
                                 1001)
                self.assertEqual(conf.PresetSeeder.generator.initialSeed, 1001)

        return
Пример #9
0
    def __call__(self, parameters):
        """
        Poller for looking in all active subscriptions for jobs that need to be made.

        """

        logging.info("In JobCreatorWorker.__call__")

        myThread = threading.currentThread()

        for entry in parameters:
            # This retrieves a single subscription
            subscriptionID = entry.get('subscription')

            if subscriptionID < 0:
                logging.error("Got non-existant subscription")
                logging.error("Assuming parameters in error: returning")
                return subscriptionID

            myThread.transaction.begin()

            logging.info("About to call subscription %i", subscriptionID)

            wmbsSubscription = Subscription(id=subscriptionID)
            wmbsSubscription.load()
            wmbsSubscription["workflow"].load()
            workflow = wmbsSubscription["workflow"]

            wmWorkload = retrieveWMSpec(wmbsSubscription)

            if not workflow.task or not wmWorkload:
                # Then we have a problem
                # We have no sandbox
                # We NEED a sandbox
                # Abort this subscription!
                # But do NOT fail
                # We have no way of marking a subscription as bad per se
                # We'll have to just keep skipping it
                wmTask = None
                seederList = []
                logging.error("Have no task for workflow %i", workflow.id)
                logging.error("Aborting Subscription %i", subscriptionID)
                continue

            else:
                wmTask = wmWorkload.getTaskByPath(workflow.task)
                if hasattr(wmTask.data, 'seeders'):
                    manager = GeneratorManager(wmTask)
                    seederList = manager.getGeneratorList()
                else:
                    seederList = []

            logging.info("About to enter JobFactory")
            logging.debug("Going to call wmbsJobFactory with limit %i",
                          self.limit)

            # My hope is that the job factory is smart enough only to split un-split jobs
            wmbsJobFactory = self.splitterFactory(
                package="WMCore.WMBS",
                subscription=wmbsSubscription,
                generators=seederList,
                limit=self.limit)
            splitParams = retrieveJobSplitParams(wmWorkload, workflow.task)
            logging.debug("Split Params: %s", splitParams)

            continueSubscription = True
            myThread.transaction.commit()

            # Turn on the jobFactory
            myThread.transaction.begin()
            wmbsJobFactory.open()

            # Create a function to hold it
            jobSplittingFunction = runSplitter(jobFactory=wmbsJobFactory,
                                               splitParams=splitParams)
            while continueSubscription:
                # This loop runs over the jobFactory,
                # using yield statements and a pre-existing proxy to
                # generate and process new jobs

                # First we need the jobs.

                try:
                    wmbsJobGroups = next(jobSplittingFunction)
                    logging.info("Retrieved %i jobGroups from jobSplitter",
                                 len(wmbsJobGroups))
                except StopIteration:
                    # If you receive a stopIteration, we're done
                    logging.info("Completed iteration over subscription %i",
                                 subscriptionID)
                    continueSubscription = False
                    continue

                # Now we get to find out what job they are.
                countJobs = self.daoFactory(
                    classname="Jobs.GetNumberOfJobsPerWorkflow")
                jobNumber = countJobs.execute(workflow=workflow.id,
                                              conn=myThread.transaction.conn,
                                              transaction=True)
                logging.debug("Have %i jobs for this workflow already",
                              jobNumber)

                for wmbsJobGroup in wmbsJobGroups:

                    logging.debug("Processing jobGroup %i",
                                  wmbsJobGroup.exists())
                    logging.debug("Processing %i jobs", len(wmbsJobGroup.jobs))

                    # Create a directory
                    self.createWorkArea.processJobs(
                        jobGroup=wmbsJobGroup,
                        startDir=self.jobCacheDir,
                        workflow=workflow,
                        wmWorkload=wmWorkload,
                        transaction=myThread.transaction,
                        conn=myThread.transaction.conn)

                    for job in wmbsJobGroup.jobs:
                        jobNumber += 1
                        self.saveJob(job=job,
                                     workflow=workflow,
                                     wmTask=wmTask,
                                     jobNumber=jobNumber)

                    self.advanceJobGroup(wmbsJobGroup)

                    logging.debug("Finished call for jobGroup %i",
                                  wmbsJobGroup.exists())

            # END: while loop over jobSplitter
            myThread.transaction.commit()

            # About to reset everything
            wmbsJobGroups = None
            wmTask = None
            wmWorkload = None
            splitParams = None
            wmbsJobFactory = None
            gc.collect()

            # About to check memory
            doMemoryCheck(
                "About to get memory references: End of subscription loop")

        # Final memory check
        doMemoryCheck("About to get memory references: End of __call__()")

        logging.debug("About to return from JobCreatorWorker.__call__()")

        return parameters
Пример #10
0
    def __call__(self, parameters):
        """
        Poller for looking in all active subscriptions for jobs that need to be made.

        """

        logging.info("In JobCreatorWorker.__call__")

        myThread = threading.currentThread()

        for entry in parameters:
            # This retrieves a single subscription
            subscriptionID = entry.get('subscription')

            if subscriptionID < 0:
                logging.error("Got non-existant subscription")
                logging.error("Assuming parameters in error: returning")
                return subscriptionID

            myThread.transaction.begin()

            logging.info("About to call subscription %i", subscriptionID)

            wmbsSubscription = Subscription(id=subscriptionID)
            wmbsSubscription.load()
            wmbsSubscription["workflow"].load()
            workflow = wmbsSubscription["workflow"]

            wmWorkload = retrieveWMSpec(wmbsSubscription)

            if not workflow.task or not wmWorkload:
                # Then we have a problem
                # We have no sandbox
                # We NEED a sandbox
                # Abort this subscription!
                # But do NOT fail
                # We have no way of marking a subscription as bad per se
                # We'll have to just keep skipping it
                wmTask = None
                seederList = []
                logging.error("Have no task for workflow %i", workflow.id)
                logging.error("Aborting Subscription %i", subscriptionID)
                continue

            else:
                wmTask = wmWorkload.getTaskByPath(workflow.task)
                if hasattr(wmTask.data, 'seeders'):
                    manager = GeneratorManager(wmTask)
                    seederList = manager.getGeneratorList()
                else:
                    seederList = []

            logging.info("About to enter JobFactory")
            logging.debug("Going to call wmbsJobFactory with limit %i", self.limit)

            # My hope is that the job factory is smart enough only to split un-split jobs
            wmbsJobFactory = self.splitterFactory(package="WMCore.WMBS",
                                                  subscription=wmbsSubscription,
                                                  generators=seederList,
                                                  limit=self.limit)
            splitParams = retrieveJobSplitParams(wmWorkload, workflow.task)
            logging.debug("Split Params: %s", splitParams)

            continueSubscription = True
            myThread.transaction.commit()

            # Turn on the jobFactory
            myThread.transaction.begin()
            wmbsJobFactory.open()

            # Create a function to hold it
            jobSplittingFunction = runSplitter(jobFactory=wmbsJobFactory,
                                               splitParams=splitParams)
            while continueSubscription:
                # This loop runs over the jobFactory,
                # using yield statements and a pre-existing proxy to
                # generate and process new jobs

                # First we need the jobs.

                try:
                    wmbsJobGroups = next(jobSplittingFunction)
                    logging.info("Retrieved %i jobGroups from jobSplitter", len(wmbsJobGroups))
                except StopIteration:
                    # If you receive a stopIteration, we're done
                    logging.info("Completed iteration over subscription %i", subscriptionID)
                    continueSubscription = False
                    continue

                # Now we get to find out what job they are.
                countJobs = self.daoFactory(classname="Jobs.GetNumberOfJobsPerWorkflow")
                jobNumber = countJobs.execute(workflow=workflow.id,
                                              conn=myThread.transaction.conn,
                                              transaction=True)
                logging.debug("Have %i jobs for this workflow already", jobNumber)

                for wmbsJobGroup in wmbsJobGroups:

                    logging.debug("Processing jobGroup %i", wmbsJobGroup.exists())
                    logging.debug("Processing %i jobs", len(wmbsJobGroup.jobs))

                    # Create a directory
                    self.createWorkArea.processJobs(jobGroup=wmbsJobGroup,
                                                    startDir=self.jobCacheDir,
                                                    workflow=workflow,
                                                    wmWorkload=wmWorkload,
                                                    transaction=myThread.transaction,
                                                    conn=myThread.transaction.conn)

                    for job in wmbsJobGroup.jobs:
                        jobNumber += 1
                        self.saveJob(job=job, workflow=workflow,
                                     wmTask=wmTask, jobNumber=jobNumber)

                    self.advanceJobGroup(wmbsJobGroup)

                    logging.debug("Finished call for jobGroup %i", wmbsJobGroup.exists())

            # END: while loop over jobSplitter
            myThread.transaction.commit()

            # About to reset everything
            wmbsJobGroups = None
            wmTask = None
            wmWorkload = None
            splitParams = None
            wmbsJobFactory = None
            gc.collect()

            # About to check memory
            doMemoryCheck("About to get memory references: End of subscription loop")

        # Final memory check
        doMemoryCheck("About to get memory references: End of __call__()")

        logging.debug("About to return from JobCreatorWorker.__call__()")

        return parameters
Пример #11
0
    def pollSubscriptions(self):
        """
        Poller for looking in all active subscriptions for jobs that need to be made.

        """
        logging.info("Beginning JobCreator.pollSubscriptions() cycle.")
        myThread = threading.currentThread()

        #First, get list of Subscriptions
        subscriptions = self.subscriptionList.execute()

        # Okay, now we have a list of subscriptions
        for subscriptionID in subscriptions:
            wmbsSubscription = Subscription(id=subscriptionID)
            try:
                wmbsSubscription.load()
            except IndexError:
                # This happens when the subscription no longer exists
                # i.e., someone executed a kill() function on the database
                # while the JobCreator was in cycle
                # Ignore this subscription
                msg = "JobCreator cannot load subscription %i" % subscriptionID
                logging.error(msg)
                self.sendAlert(6, msg=msg)
                continue

            workflow = Workflow(id=wmbsSubscription["workflow"].id)
            workflow.load()
            wmbsSubscription['workflow'] = workflow
            wmWorkload = retrieveWMSpec(workflow=workflow)

            if not workflow.task or not wmWorkload:
                # Then we have a problem
                # We NEED a sandbox
                # Abort this subscription!
                # But do NOT fail
                # We have no way of marking a subscription as bad per se
                # We'll have to just keep skipping it
                msg = "Have no task for workflow %i\n" % (workflow.id)
                msg += "Aborting Subscription %i" % (subscriptionID)
                logging.error(msg)
                self.sendAlert(1, msg=msg)
                continue

            logging.debug("Have loaded subscription %i with workflow %i\n" %
                          (subscriptionID, workflow.id))

            # Set task object
            wmTask = wmWorkload.getTaskByPath(workflow.task)

            # Get generators
            # If you fail to load the generators, pass on the job
            try:
                if hasattr(wmTask.data, 'generators'):
                    manager = GeneratorManager(wmTask)
                    seederList = manager.getGeneratorList()
                else:
                    seederList = []
            except Exception, ex:
                msg = "Had failure loading generators for subscription %i\n" % (
                    subscriptionID)
                msg += "Exception: %s\n" % str(ex)
                msg += "Passing over this error.  It will reoccur next interation!\n"
                msg += "Please check or remove this subscription!\n"
                logging.error(msg)
                self.sendAlert(6, msg=msg)
                continue

            logging.debug(
                "Going to call wmbsJobFactory for sub %i with limit %i" %
                (subscriptionID, self.limit))

            splitParams = retrieveJobSplitParams(wmWorkload, workflow.task)
            logging.debug("Split Params: %s" % splitParams)

            # My hope is that the job factory is smart enough only to split un-split jobs
            splitterFactory = SplitterFactory(
                splitParams.get('algo_package', "WMCore.JobSplitting"))
            wmbsJobFactory = splitterFactory(package="WMCore.WMBS",
                                             subscription=wmbsSubscription,
                                             generators=seederList,
                                             limit=self.limit)

            # Turn on the jobFactory
            wmbsJobFactory.open()

            # Create a function to hold it
            jobSplittingFunction = runSplitter(jobFactory=wmbsJobFactory,
                                               splitParams=splitParams)

            # Now we get to find out how many jobs there are.
            jobNumber = self.countJobs.execute(workflow=workflow.id,
                                               conn=myThread.transaction.conn,
                                               transaction=True)
            jobNumber += splitParams.get('initial_lfn_counter', 0)
            logging.debug("Have %i jobs for workflow %s already in database." %
                          (jobNumber, workflow.name))

            continueSubscription = True
            while continueSubscription:
                # This loop runs over the jobFactory,
                # using yield statements and a pre-existing proxy to
                # generate and process new jobs

                # First we need the jobs.
                myThread.transaction.begin()
                try:
                    wmbsJobGroups = jobSplittingFunction.next()
                    logging.info("Retrieved %i jobGroups from jobSplitter" %
                                 (len(wmbsJobGroups)))
                except StopIteration:
                    # If you receive a stopIteration, we're done
                    logging.info("Completed iteration over subscription %i" %
                                 (subscriptionID))
                    continueSubscription = False
                    myThread.transaction.commit()
                    break

                # If we have no jobGroups, we're done
                if len(wmbsJobGroups) == 0:
                    logging.info(
                        "Found end in iteration over subscription %i" %
                        (subscriptionID))
                    continueSubscription = False
                    myThread.transaction.commit()
                    break

                # Assemble a dict of all the info
                processDict = {
                    'workflow': workflow,
                    'wmWorkload': wmWorkload,
                    'wmTaskName': wmTask.getPathName(),
                    'jobNumber': jobNumber,
                    'sandbox': wmTask.data.input.sandbox,
                    'wmTaskPrio': wmTask.getTaskPriority(),
                    'owner': wmWorkload.getOwner().get('name', None),
                    'ownerDN': wmWorkload.getOwner().get('dn', None),
                    'ownerGroup': wmWorkload.getOwner().get('vogroup', ''),
                    'ownerRole': wmWorkload.getOwner().get('vorole', '')
                }

                tempSubscription = Subscription(id=wmbsSubscription['id'])

                nameDictList = []
                for wmbsJobGroup in wmbsJobGroups:
                    # For each jobGroup, put a dictionary
                    # together and run it with creatorProcess
                    jobsInGroup = len(wmbsJobGroup.jobs)
                    wmbsJobGroup.subscription = tempSubscription
                    tempDict = {}
                    tempDict.update(processDict)
                    tempDict['jobGroup'] = wmbsJobGroup
                    tempDict['swVersion'] = wmTask.getSwVersion()
                    tempDict['scramArch'] = wmTask.getScramArch()
                    tempDict['jobNumber'] = jobNumber
                    tempDict['agentNumber'] = self.agentNumber

                    jobGroup = creatorProcess(work=tempDict,
                                              jobCacheDir=self.jobCacheDir)
                    jobNumber += jobsInGroup

                    # Set jobCache for group
                    for job in jobGroup.jobs:
                        nameDictList.append({
                            'jobid': job['id'],
                            'cacheDir': job['cache_dir']
                        })
                        job["user"] = wmWorkload.getOwner()["name"]
                        job["group"] = wmWorkload.getOwner()["group"]
                # Set the caches in the database
                try:
                    if len(nameDictList) > 0:
                        self.setBulkCache.execute(
                            jobDictList=nameDictList,
                            conn=myThread.transaction.conn,
                            transaction=True)
                except WMException:
                    raise
                except Exception, ex:
                    msg = "Unknown exception while setting the bulk cache:\n"
                    msg += str(ex)
                    logging.error(msg)
                    self.sendAlert(6, msg=msg)
                    logging.debug(
                        "Error while setting bulkCache with following values: %s\n"
                        % nameDictList)
                    raise JobCreatorException(msg)

                # Advance the jobGroup in changeState
                for wmbsJobGroup in wmbsJobGroups:
                    self.advanceJobGroup(wmbsJobGroup=wmbsJobGroup)

                # Now end the transaction so that everything is wrapped
                # in a single rollback
                myThread.transaction.commit()