Exemplo n.º 1
0
    def testAlerts(self):
        """
        Tests only alerts triggered from JobTrackerPoller.

        """
        config = self.getConfig()
        jobTracker = JobTrackerPoller(config)
        jobTracker.sendAlert(6, msg = "test message")
Exemplo n.º 2
0
    def testAlerts(self):
        """
        Tests only alerts triggered from JobTrackerPoller.

        """
        config = self.getConfig()
        jobTracker = JobTrackerPoller(config)
        jobTracker.sendAlert(6, msg="test message")
Exemplo n.º 3
0
def killJobs(list, config):
    """
    Kill all the jobs in the batch system whose ID comes off of the list

    """

    jobList = []

    for job in list:
        jobList.append(job['id'])

    jobTracker = JobTrackerPoller(config)
    jobTracker.setup(None)
    jobTracker.killJobs(jobList = jobList)

    return list
Exemplo n.º 4
0
    def preInitialization(self):
        """
        Sets up the worker thread

        """
        logging.info("JobTracker.preInitialization")

        # Add event loop to worker manager
        myThread = threading.currentThread()

        pollInterval = self.config.JobTracker.pollInterval
        logging.info("Setting poll interval to %s seconds" % pollInterval)
        myThread.workerThreadManager.addWorker(JobTrackerPoller(self.config),
                                               pollInterval)

        return
Exemplo n.º 5
0
def killJobs(list, config):
    """
    Kill all the jobs in the batch system whose ID comes off of the list

    """

    jobList = []

    for job in list:
        jobList.append(job['id'])

    jobTracker = JobTrackerPoller(config)
    jobTracker.setup(None)
    jobTracker.killJobs(jobList=jobList)

    return list
Exemplo n.º 6
0
    def testA_StraightThrough(self):
        """
        _StraightThrough_

        Just run everything straight through without any variations
        """
        # Do pre-submit job check
        nRunning = getCondorRunningJobs()
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        myThread = threading.currentThread()
        workload = self.createTestWorkload()
        config   = self.getConfig()


        name         = 'WMAgent_Test1'
        site         = self.sites[0]
        nSubs        = 5
        nFiles       = 10
        workloadPath = os.path.join(self.testDir, 'workloadTest',
                                    'TestWorkload', 'WMSandbox',
                                    'WMWorkload.pkl')

        # Create a collection of files
        self.createFileCollection(name = name, nSubs = nSubs,
                                  nFiles = nFiles,
                                  workflowURL = workloadPath,
                                  site = site)



        ############################################################
        # Test the JobCreator


        config.Agent.componentName = 'JobCreator'
        testJobCreator = JobCreatorPoller(config = config)

        testJobCreator.algorithm()
        time.sleep(5)


        # Did all jobs get created?
        getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs")
        result = getJobsAction.execute(state = 'Created', jobType = "Processing")
        self.assertEqual(len(result), nSubs*nFiles)


        # Count database objects
        result = myThread.dbi.processData('SELECT * FROM wmbs_sub_files_acquired')[0].fetchall()
        self.assertEqual(len(result), nSubs * nFiles)


        # Find the test directory
        testDirectory = os.path.join(self.testDir, 'TestWorkload', 'ReReco')
        self.assertTrue('JobCollection_1_0' in os.listdir(testDirectory))
        self.assertTrue(len(os.listdir(testDirectory)) <= 20)

        groupDirectory = os.path.join(testDirectory, 'JobCollection_1_0')

        # First job should be in here
        self.assertTrue('job_1' in os.listdir(groupDirectory))
        jobFile = os.path.join(groupDirectory, 'job_1', 'job.pkl')
        self.assertTrue(os.path.isfile(jobFile))
        f = open(jobFile, 'r')
        job = cPickle.load(f)
        f.close()


        self.assertEqual(job['workflow'], name)
        self.assertEqual(len(job['input_files']), 1)
        self.assertEqual(os.path.basename(job['sandbox']), 'TestWorkload-Sandbox.tar.bz2')










        ###############################################################
        # Now test the JobSubmitter

        config.Agent.componentName = 'JobSubmitter'
        testJobSubmitter = JobSubmitterPoller(config = config)


        testJobSubmitter.algorithm()


        # Check that jobs are in the right state
        result = getJobsAction.execute(state = 'Created', jobType = "Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nFiles)



        # Check assigned locations
        getLocationAction = self.daoFactory(classname = "Jobs.GetLocation")
        for id in result:
            loc = getLocationAction.execute(jobid = id)
            self.assertEqual(loc, [[site]])


        # Check to make sure we have running jobs
        nRunning = getCondorRunningJobs()
        self.assertEqual(nRunning, nFiles * nSubs)


        #################################################################
        # Now the JobTracker


        config.Agent.componentName = 'JobTracker'
        testJobTracker = JobTrackerPoller(config = config)
        testJobTracker.setup()

        testJobTracker.algorithm()

        # Running the algo without removing the jobs should do nothing
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nFiles)


        condorRM()
        time.sleep(1)

        # All jobs gone?
        nRunning = getCondorRunningJobs()
        self.assertEqual(nRunning, 0)


        testJobTracker.algorithm()
        time.sleep(5)

        # Running the algo without removing the jobs should do nothing
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state = 'Complete', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nFiles)




        #################################################################
        # Now the JobAccountant

        # First you need to load all jobs


        self.getFWJRAction = self.daoFactory(classname = "Jobs.GetFWJRByState")
        completeJobs       = self.getFWJRAction.execute(state = "complete")


        # Create reports for all jobs
        self.createReports(jobs = completeJobs, retryCount = 0)






        config.Agent.componentName = 'JobAccountant'
        testJobAccountant = JobAccountantPoller(config = config)
        testJobAccountant.setup()


        # It should do something with the jobs
        testJobAccountant.algorithm()


        # All the jobs should be done now
        result = getJobsAction.execute(state = 'Complete', jobType = "Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state = 'Success', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nFiles)



        #######################################################################
        # Now the JobArchiver


        config.Agent.componentName = 'JobArchiver'
        testJobArchiver = JobArchiverPoller(config = config)


        testJobArchiver.algorithm()

        # All the jobs should be cleaned up
        result = getJobsAction.execute(state = 'Success', jobType = "Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state = 'Cleanout', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nFiles)


        logDir = os.path.join(self.testDir, 'logs')

        for job in completeJobs:
            self.assertFalse(os.path.exists(job['fwjr_path']))
            jobFolder = 'JobCluster_%i' \
                    % (int(job['id']/config.JobArchiver.numberOfJobsToCluster))
            jobPath = os.path.join(logDir, jobFolder, 'Job_%i.tar' %(job['id']))
            self.assertTrue(os.path.isfile(jobPath))
            self.assertTrue(os.path.getsize(jobPath) > 0)




        ###########################################################################
        # Now the TaskAchiver


        config.Agent.componentName = 'TaskArchiver'
        testTaskArchiver = TaskArchiverPoller(config = config)


        testTaskArchiver.algorithm()


        result = getJobsAction.execute(state = 'Cleanout', jobType = "Processing")
        self.assertEqual(len(result), 0)


        for jdict in completeJobs:
            job = Job(id = jdict['id'])
            self.assertFalse(job.exists())





        if os.path.isdir('testDir'):
            shutil.rmtree('testDir')
        shutil.copytree('%s' %self.testDir, os.path.join(os.getcwd(), 'testDir'))




        return
Exemplo n.º 7
0
    def testD_PrototypeChain(self):
        """
        _PrototypeChain_

        Prototype the BossAir workflow
        """
        myThread = threading.currentThread()

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))


        config = self.getConfig()
        config.BossAir.pluginName = 'CondorPlugin'

        baAPI  = BossAirAPI(config = config)

        workload = self.createTestWorkload()

        workloadName = "basicWorkload"

        changeState = ChangeState(config)

        nSubs = 5
        nJobs = 10

        cacheDir = os.path.join(self.testDir, 'CacheDir')

        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir,
                                                                        'workloadTest',
                                                                        workloadName),
                                            site = 'se.T2_US_UCSD')
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')


        jobSubmitter = JobSubmitterPoller(config = config)
        jobTracker   = JobTrackerPoller(config = config)
        statusPoller = StatusPoller(config = config)

        jobSubmitter.algorithm()

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nSubs * nJobs)

        newJobs = baAPI._loadByStatus(status = 'New')
        self.assertEqual(len(newJobs), nSubs * nJobs)

        # Check WMBS
        getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs")
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        statusPoller.algorithm()

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nSubs * nJobs)

        newJobs = baAPI._loadByStatus(status = 'New')
        self.assertEqual(len(newJobs), 0)

        newJobs = baAPI._loadByStatus(status = 'Idle')
        self.assertEqual(len(newJobs), nSubs * nJobs)


        # Tracker should do nothing
        jobTracker.algorithm()

        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs)


        # Wait for jobs to timeout due to short Pending wait period
        time.sleep(12)


        statusPoller.algorithm()

        newJobs = baAPI._loadByStatus(status = 'Idle')
        self.assertEqual(len(newJobs), 0)

        newJobs = baAPI._loadByStatus(status = 'Timeout', complete = '0')
        self.assertEqual(len(newJobs), nSubs * nJobs)

        # Jobs should be gone
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0)


        # Check if they're complete
        completeJobs = baAPI.getComplete()
        self.assertEqual(len(completeJobs), nSubs * nJobs)


        # Because they timed out, they all should have failed
        jobTracker.algorithm()

        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), 0)

        result = getJobsAction.execute(state = 'JobFailed', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        return
Exemplo n.º 8
0
    def testB_ReallyLongTest(self):
        """
        _ReallyLongTest_

        Run a really long test using the condor plugin
        """

        return

        # This has to be run with an empty queue
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))



        myThread = threading.currentThread()

        # This has to be run with an empty queue
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        nJobs  = 500
        jobCE  = 'cmsosgce.fnal.gov/jobmanager-condor'

        # Create directories
        cacheDir  = os.path.join(self.testDir, 'CacheDir')
        submitDir = os.path.join(self.testDir, 'SubmitDir')

        if not os.path.isdir(cacheDir):
            os.makedirs(cacheDir)
        if not os.path.isdir(submitDir):
            os.makedirs(submitDir)


        # Get config
        config = self.getConfig()

        # Get jobGroup
        testJobGroup = self.createTestJobs(nJobs = nJobs, cacheDir = cacheDir)

        # Propogate jobs
        changer = ChangeState(config)
        changer.propagate(testJobGroup.jobs, 'created', 'new')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')



        jobTracker = JobTrackerPoller(config)
        jobTracker.setup()


        # Now create some jobs
        for job in testJobGroup.jobs[:(nJobs/2)]:
            jdl = createJDL(id = job['id'], directory = submitDir, jobCE = jobCE)
            jdlFile = os.path.join(submitDir, 'condorJDL_%i.jdl' % (job['id']))
            handle = open(jdlFile, 'w')
            handle.writelines(jdl)
            handle.close()

            command = ["condor_submit", jdlFile]
            pipe = subprocess.Popen(command, stdout = subprocess.PIPE,
                                    stderr = subprocess.PIPE, shell = False)
            pipe.communicate()


        startTime = time.time()
        cProfile.runctx("jobTracker.algorithm()", globals(), locals(), filename = "testStats.stat")
        #jobTracker.algorithm()
        stopTime  = time.time()


        # Are jobs in the right state?
        result = self.getJobs.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nJobs/2)

        result = self.getJobs.execute(state = 'Complete', jobType = "Processing")
        self.assertEqual(len(result), nJobs/2)


        # Then we're done
        killList = [x['id'] for x in testJobGroup.jobs]
        jobTracker.killJobs(jobList = killList)

        # No jobs should be left
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0)

        print ("Process took %f seconds to process %i classAds" %((stopTime - startTime),
                                                                  nJobs/2))
        p = pstats.Stats('testStats.stat')
        p.sort_stats('cumulative')
        p.print_stats()
Exemplo n.º 9
0
    def testA_CondorTest(self):
        """
        _CondorTest_

        Because I don't want this test to be submitter dependent:
        Create a dummy condor job.
        Submit a dummy condor job.
        Track it.
        Kill it.
        Exit
        """

        myThread = threading.currentThread()

        # This has to be run with an empty queue
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        nJobs  = 10
        jobCE  = 'cmsosgce.fnal.gov/jobmanager-condor'

        # Create directories
        cacheDir  = os.path.join(self.testDir, 'CacheDir')
        submitDir = os.path.join(self.testDir, 'SubmitDir')

        if not os.path.isdir(cacheDir):
            os.makedirs(cacheDir)
        if not os.path.isdir(submitDir):
            os.makedirs(submitDir)


        # Get config
        config = self.getConfig()

        # Get jobGroup
        testJobGroup = self.createTestJobs(nJobs = nJobs, cacheDir = cacheDir)

        # Propogate jobs
        changer = ChangeState(config)
        changer.propagate(testJobGroup.jobs, 'created', 'new')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')

        result = self.getJobs.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nJobs)

        jobTracker = JobTrackerPoller(config)
        jobTracker.setup()



        # First iteration
        # There are no jobs in the tracker,
        # The tracker should register the jobs as missing
        # This should tell it that they've finished
        # So the tracker should send them onwards
        jobTracker.algorithm()

        result = self.getJobs.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nJobs)


        result = self.getJobs.execute(state = 'complete', jobType = "Processing")
        self.assertEqual(len(result), 0)



        # Second iteration
        # Reset the jobs
        # This time submit them to the queue
        # The jobs should remain in holding
        changer.propagate(testJobGroup.jobs, 'executing', 'created')

        result = self.getJobs.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nJobs)

        # Create a submit script
        createSubmitScript(submitDir)


        jobPackage = os.path.join(self.testDir, 'JobPackage.pkl')
        f = open(jobPackage, 'w')
        f.write(' ')
        f.close()

        sandbox = os.path.join(self.testDir, 'sandbox.box')
        f = open(sandbox, 'w')
        f.write(' ')
        f.close()

        for job in testJobGroup.jobs:
            job['plugin']     = 'CondorPlugin'
            job['userdn']     = 'jchurchill'
            job['custom']     = {'location': 'malpaquet'}
            job['cache_dir']  = self.testDir
            job['sandbox']    = sandbox
            job['packageDir'] = self.testDir

        info = {}
        info['packageDir'] = self.testDir
        info['index']      = 0
        info['sandbox']    = sandbox

        jobTracker.bossAir.submit(jobs = testJobGroup.jobs, info = info)

        time.sleep(1)

        # All jobs should be running
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nJobs)


        # Run the algorithm.  After this
        # all jobs should still be running
        jobTracker.algorithm()

        # Are jobs in the right state?
        result = self.getJobs.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nJobs)

        result = self.getJobs.execute(state = 'Complete', jobType = "Processing")
        self.assertEqual(len(result), 0)

        # Are jobs still in the condor_q
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nJobs)


        # Then we're done
        jobTracker.bossAir.kill(jobs = testJobGroup.jobs)

        # No jobs should be left
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0)

        jobTracker.algorithm()

        # Are jobs in the right state?
        result = self.getJobs.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), 0)

        result = self.getJobs.execute(state = 'Complete', jobType = "Processing")
        self.assertEqual(len(result), nJobs)


        # This is optional if you want to look at what
        # files were actually created during running
        #if os.path.isdir('testDir'):
        #    shutil.rmtree('testDir')
        #shutil.copytree('%s' %self.testDir, os.path.join(os.getcwd(), 'testDir'))


        return
Exemplo n.º 10
0
    def testD_PrototypeChain(self):
        """
        _PrototypeChain_

        Prototype the BossAir workflow
        """
        dummymyThread = threading.currentThread()

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(
            nRunning, 0,
            "User currently has %i running jobs.  Test will not continue" %
            (nRunning))

        config = self.getConfig()
        config.BossAir.pluginName = 'SimpleCondorPlugin'

        baAPI = BossAirAPI(config=config, insertStates=True)

        workload = self.createTestWorkload()

        workloadName = "basicWorkload"

        changeState = ChangeState(config)

        nSubs = 5
        nJobs = 10

        dummycacheDir = os.path.join(self.testDir, 'CacheDir')

        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=os.path.join(
                                                self.testDir, 'workloadTest',
                                                workloadName),
                                            site='se.T2_US_UCSD')
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter = JobSubmitterPoller(config=config)
        jobTracker = JobTrackerPoller(config=config)
        statusPoller = StatusPoller(config=config)

        jobSubmitter.algorithm()

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nSubs * nJobs)

        newJobs = baAPI._loadByStatus(status='New')
        self.assertEqual(len(newJobs), nSubs * nJobs)

        # Check WMBS
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        statusPoller.algorithm()

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nSubs * nJobs)

        newJobs = baAPI._loadByStatus(status='New')
        self.assertEqual(len(newJobs), 0)

        newJobs = baAPI._loadByStatus(status='Idle')
        self.assertEqual(len(newJobs), nSubs * nJobs)

        # Tracker should do nothing
        jobTracker.algorithm()

        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # Wait for jobs to timeout due to short Pending wait period
        time.sleep(12)

        statusPoller.algorithm()

        newJobs = baAPI._loadByStatus(status='Idle')
        self.assertEqual(len(newJobs), 0)

        newJobs = baAPI._loadByStatus(status='Timeout', complete='0')
        self.assertEqual(len(newJobs), nSubs * nJobs)

        # Jobs should be gone
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0)

        # Check if they're complete
        completeJobs = baAPI.getComplete()
        self.assertEqual(len(completeJobs), nSubs * nJobs)

        # Because they timed out, they all should have failed
        jobTracker.algorithm()

        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), 0)

        result = getJobsAction.execute(state='JobFailed', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        return
Exemplo n.º 11
0
    def testB_ReallyLongTest(self):
        """
        _ReallyLongTest_

        Run a really long test using the condor plugin
        """

        # This has to be run with an empty queue
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(
            nRunning, 0,
            "User currently has %i running jobs.  Test will not continue" %
            (nRunning))

        # This has to be run with an empty queue
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(
            nRunning, 0,
            "User currently has %i running jobs.  Test will not continue" %
            (nRunning))

        nJobs = 500
        jobCE = 'cmsosgce.fnal.gov/jobmanager-condor'

        # Create directories
        cacheDir = os.path.join(self.testDir, 'CacheDir')
        submitDir = os.path.join(self.testDir, 'SubmitDir')

        if not os.path.isdir(cacheDir):
            os.makedirs(cacheDir)
        if not os.path.isdir(submitDir):
            os.makedirs(submitDir)

        # Get config
        config = self.getConfig()

        # Get jobGroup
        testJobGroup = self.createTestJobs(nJobs=nJobs, cacheDir=cacheDir)

        # Propogate jobs
        changer = ChangeState(config)
        changer.propagate(testJobGroup.jobs, 'created', 'new')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')

        jobTracker = JobTrackerPoller(config)
        jobTracker.setup()

        # Now create some jobs
        for job in testJobGroup.jobs[:(nJobs // 2)]:
            jdl = createJDL(jobID=job['id'], directory=submitDir, jobCE=jobCE)
            jdlFile = os.path.join(submitDir, 'condorJDL_%i.jdl' % (job['id']))
            handle = open(jdlFile, 'w')
            handle.writelines(jdl)
            handle.close()

            command = ["condor_submit", jdlFile]
            pipe = subprocess.Popen(command,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE,
                                    shell=False)
            pipe.communicate()

        startTime = time.time()
        cProfile.runctx("jobTracker.algorithm()",
                        globals(),
                        locals(),
                        filename="testStats.stat")
        # jobTracker.algorithm()
        stopTime = time.time()

        # Are jobs in the right state?
        result = self.getJobs.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), nJobs // 2)

        result = self.getJobs.execute(state='Complete', jobType="Processing")
        self.assertEqual(len(result), nJobs // 2)

        # Then we're done
        killList = [x['id'] for x in testJobGroup.jobs]
        jobTracker.killJobs(jobList=killList)

        # No jobs should be left
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0)

        print("Process took %f seconds to process %i classAds" %
              ((stopTime - startTime), nJobs // 2))
        p = pstats.Stats('testStats.stat')
        p.sort_stats('cumulative')
        p.print_stats()
Exemplo n.º 12
0
    def testA_CondorTest(self):
        """
        _CondorTest_

        Because I don't want this test to be submitter dependent:
        Create a dummy condor job.
        Submit a dummy condor job.
        Track it.
        Kill it.
        Exit
        """

        # This has to be run with an empty queue
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(
            nRunning, 0,
            "User currently has %i running jobs.  Test will not continue" %
            (nRunning))

        nJobs = 10
        jobCE = 'cmsosgce.fnal.gov/jobmanager-condor'

        # Create directories
        cacheDir = os.path.join(self.testDir, 'CacheDir')
        submitDir = os.path.join(self.testDir, 'SubmitDir')

        if not os.path.isdir(cacheDir):
            os.makedirs(cacheDir)
        if not os.path.isdir(submitDir):
            os.makedirs(submitDir)

        # Get config
        config = self.getConfig()

        # Get jobGroup
        testJobGroup = self.createTestJobs(nJobs=nJobs, cacheDir=cacheDir)

        # Propogate jobs
        changer = ChangeState(config)
        changer.propagate(testJobGroup.jobs, 'created', 'new')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')

        result = self.getJobs.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), nJobs)

        jobTracker = JobTrackerPoller(config)
        jobTracker.setup()

        # First iteration
        # There are no jobs in the tracker,
        # The tracker should register the jobs as missing
        # This should tell it that they've finished
        # So the tracker should send them onwards
        jobTracker.algorithm()

        result = self.getJobs.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), nJobs)

        result = self.getJobs.execute(state='complete', jobType="Processing")
        self.assertEqual(len(result), 0)

        # Second iteration
        # Reset the jobs
        # This time submit them to the queue
        # The jobs should remain in holding
        changer.propagate(testJobGroup.jobs, 'executing', 'created')

        result = self.getJobs.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), nJobs)

        # Create a submit script
        createSubmitScript(submitDir)

        jobPackage = os.path.join(self.testDir, 'JobPackage.pkl')
        f = open(jobPackage, 'w')
        f.write(' ')
        f.close()

        sandbox = os.path.join(self.testDir, 'sandbox.box')
        f = open(sandbox, 'w')
        f.write(' ')
        f.close()

        for job in testJobGroup.jobs:
            job['plugin'] = 'CondorPlugin'
            job['userdn'] = 'jchurchill'
            job['custom'] = {'location': 'malpaquet'}
            job['cache_dir'] = self.testDir
            job['sandbox'] = sandbox
            job['packageDir'] = self.testDir

        info = {}
        info['packageDir'] = self.testDir
        info['index'] = 0
        info['sandbox'] = sandbox

        jobTracker.bossAir.submit(jobs=testJobGroup.jobs, info=info)

        time.sleep(1)

        # All jobs should be running
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nJobs)

        # Run the algorithm.  After this
        # all jobs should still be running
        jobTracker.algorithm()

        # Are jobs in the right state?
        result = self.getJobs.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), nJobs)

        result = self.getJobs.execute(state='Complete', jobType="Processing")
        self.assertEqual(len(result), 0)

        # Are jobs still in the condor_q
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nJobs)

        # Then we're done
        jobTracker.bossAir.kill(jobs=testJobGroup.jobs)

        # No jobs should be left
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0)

        jobTracker.algorithm()

        # Are jobs in the right state?
        result = self.getJobs.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), 0)

        result = self.getJobs.execute(state='Complete', jobType="Processing")
        self.assertEqual(len(result), nJobs)

        # This is optional if you want to look at what
        # files were actually created during running
        # if os.path.isdir('testDir'):
        #    shutil.rmtree('testDir')
        # shutil.copytree('%s' %self.testDir, os.path.join(os.getcwd(), 'testDir'))

        return