示例#1
0
def creatorProcess(work, jobCacheDir):
    """
    _creatorProcess_

    Creator work areas and pickle job objects
    """
    createWorkArea = CreateWorkArea()

    try:
        wmbsJobGroup = work.get('jobGroup')
        workflow = work.get('workflow')
        wmWorkload = work.get('wmWorkload')
        work['ownerDN'] = work.get('owner') if work.get(
            'ownerDN', None) is None else work.get('ownerDN')
    except KeyError as ex:
        msg = "Could not find critical key-value in work input.\n"
        msg += str(ex)
        logging.error(msg)
        raise JobCreatorException(msg)
    except Exception as ex:
        msg = "Exception in opening work package. Error: %s" % str(ex)
        logging.exception(msg)
        raise JobCreatorException(msg)

    try:
        createWorkArea.processJobs(jobGroup=wmbsJobGroup,
                                   startDir=jobCacheDir,
                                   workflow=workflow,
                                   wmWorkload=wmWorkload,
                                   cache=False)

        thisJobNumber = work.get('jobNumber', 0)
        for job in wmbsJobGroup.jobs:
            thisJobNumber += 1
            saveJob(job, thisJobNumber, **work)
    except Exception as ex:
        msg = "Exception in processing wmbsJobGroup %i\n. Error: %s" % (
            wmbsJobGroup.id, str(ex))
        logging.exception(msg)
        raise JobCreatorException(msg)

    return wmbsJobGroup
class JobCreatorWorker:
    """
    This is the ProcessPool worker function that actually
    runs the jobCreator
    """

    def __init__(self, **configDict):
        """
        init jobCreator
        """

        myThread = threading.currentThread()

        self.transaction = myThread.transaction

        #DAO factory for WMBS objects
        self.daoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging,
                                     dbinterface = myThread.dbi)

        # WMCore splitter factory for splitting up jobs.
        self.splitterFactory = SplitterFactory()

        config = Configuration()
        config.section_("JobStateMachine")
        config.JobStateMachine.couchurl      = configDict["couchURL"]
        config.JobStateMachine.couch_retries = configDict["defaultRetries"]
        config.JobStateMachine.couchDBName   = configDict["couchDBName"]

        self.config = config

        #Variables
        self.jobCacheDir    = configDict['jobCacheDir']
        self.defaultJobType = configDict['defaultJobType']
        self.limit          = configDict.get('fileLoadLimit', 500)



        self.createWorkArea  = CreateWorkArea()

        self.changeState = ChangeState(self.config)

        return


    def __call__(self, parameters):
        """
        Poller for looking in all active subscriptions for jobs that need to be made.

        """

        logging.info("In JobCreatorWorker.__call__")

        myThread = threading.currentThread()

        for entry in parameters:
            # This retrieves a single subscription
            subscriptionID = entry.get('subscription')

            if subscriptionID < 0:
                logging.error("Got non-existant subscription")
                logging.error("Assuming parameters in error: returning")
                return subscriptionID

            myThread.transaction.begin()

            logging.info("About to call subscription %i" %subscriptionID)

            wmbsSubscription = Subscription(id = subscriptionID)
            wmbsSubscription.load()
            wmbsSubscription["workflow"].load()
            workflow         = wmbsSubscription["workflow"]

            wmWorkload       = retrieveWMSpec(wmbsSubscription)





            if not workflow.task or not wmWorkload:
                # Then we have a problem
                # We have no sandbox
                # We NEED a sandbox
                # Abort this subscription!
                # But do NOT fail
                # We have no way of marking a subscription as bad per se
                # We'll have to just keep skipping it
                wmTask = None
                seederList = []
                logging.error("Have no task for workflow %i" % (workflow.id))
                logging.error("Aborting Subscription %i" % (subscriptionID))
                continue

            else:
                wmTask = wmWorkload.getTaskByPath(workflow.task)
                if hasattr(wmTask.data, 'seeders'):
                    manager    = SeederManager(wmTask)
                    seederList = manager.getSeederList()
                else:
                    seederList = []

            logging.info("About to enter JobFactory")
            logging.debug("Going to call wmbsJobFactory with limit %i" % (self.limit))

            # My hope is that the job factory is smart enough only to split un-split jobs
            wmbsJobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                                  subscription = wmbsSubscription,
                                                  generators=seederList,
                                                  limit = self.limit)
            splitParams = retrieveJobSplitParams(wmWorkload, workflow.task)
            logging.debug("Split Params: %s" % splitParams)

            continueSubscription = True
            myThread.transaction.commit()

            # Turn on the jobFactory
            myThread.transaction.begin()
            wmbsJobFactory.open()

            # Create a function to hold it
            jobSplittingFunction = runSplitter(jobFactory = wmbsJobFactory,
                                               splitParams = splitParams)
            while continueSubscription:
                # This loop runs over the jobFactory,
                # using yield statements and a pre-existing proxy to
                # generate and process new jobs

                # First we need the jobs.

                try:
                    wmbsJobGroups = jobSplittingFunction.next()
                    logging.info("Retrieved %i jobGroups from jobSplitter" % (len(wmbsJobGroups)))
                except StopIteration:
                    # If you receive a stopIteration, we're done
                    logging.info("Completed iteration over subscription %i" % (subscriptionID))
                    continueSubscription = False
                    continue

                # Now we get to find out what job they are.
                countJobs = self.daoFactory(classname = "Jobs.GetNumberOfJobsPerWorkflow")
                jobNumber = countJobs.execute(workflow = workflow.id,
                                              conn = myThread.transaction.conn,
                                              transaction = True)
                logging.debug("Have %i jobs for this workflow already" % (jobNumber))



                for wmbsJobGroup in wmbsJobGroups:

                    logging.debug("Processing jobGroup %i" % (wmbsJobGroup.exists()))
                    logging.debug("Processing %i jobs" % (len(wmbsJobGroup.jobs)) )

                    # Create a directory
                    self.createWorkArea.processJobs(jobGroup = wmbsJobGroup,
                                                    startDir = self.jobCacheDir,
                                                    workflow = workflow,
                                                    wmWorkload = wmWorkload,
                                                    transaction = myThread.transaction,
                                                    conn = myThread.transaction.conn)


                    for job in wmbsJobGroup.jobs:
                        jobNumber += 1
                        self.saveJob(job = job, workflow = workflow,
                                     wmTask = wmTask, jobNumber = jobNumber)


                    self.advanceJobGroup(wmbsJobGroup)

                    logging.debug("Finished call for jobGroup %i" \
                                 % (wmbsJobGroup.exists()))


            # END: while loop over jobSplitter
            myThread.transaction.commit()



            # About to reset everything
            wmbsJobGroups  = None
            wmTask         = None
            wmWorkload     = None
            splitParams    = None
            wmbsJobFactory = None
            gc.collect()



            # About to check memory
            doMemoryCheck("About to get memory references: End of subscription loop")


        # Final memory check
        doMemoryCheck("About to get memory references: End of __call__()")


        logging.debug("About to return from JobCreatorWorker.__call__()")

        return parameters


    def saveJob(self, job, workflow, wmTask = None, jobNumber = 0):
        """
        _saveJob_

        Actually do the mechanics of saving the job to a pickle file
        """
        priority = None

        if wmTask:
            # If we managed to load the task,
            # so the url should be valid
            job['spec']    = workflow.spec
            job['task']    = wmTask.getPathName()
            priority       = wmTask.getTaskPriority()
            if job.get('sandbox', None) == None:
                job['sandbox'] = wmTask.data.input.sandbox

        job['priority'] = priority
        job['counter']  = jobNumber
        cacheDir = job.getCache()
        job['cache_dir'] = cacheDir
        output = open(os.path.join(cacheDir, 'job.pkl'), 'w')
        cPickle.dump(job, output, cPickle.HIGHEST_PROTOCOL)
        output.flush()
        os.fsync(output.fileno())
        output.close()


        return




    def advanceJobGroup(self, wmbsJobGroup):
        """
        Pass this on to the jobCreator, which actually does the work

        """

        #Create the job
        self.changeState.propagate(wmbsJobGroup.jobs, 'created', 'new')

        logging.info("JobCreator has created jobGroup %i and is ending" \
                     % (wmbsJobGroup.id))

        return
示例#3
0
def creatorProcess(work, jobCacheDir):
    """
    _creatorProcess_

    Creator work areas and pickle job objects
    """
    createWorkArea = CreateWorkArea()

    try:
        wmbsJobGroup = work.get('jobGroup')
        workflow = work.get('workflow')
        wmWorkload = work.get('wmWorkload')
        wmTaskName = work.get('wmTaskName')
        sandbox = work.get('sandbox')
        owner = work.get('owner')
        ownerDN = work.get('ownerDN', None)
        ownerGroup = work.get('ownerGroup', '')
        ownerRole = work.get('ownerRole', '')
        scramArch = work.get('scramArch', None)
        swVersion = work.get('swVersion', None)
        agentNumber = work.get('agentNumber', 0)
        numberOfCores = work.get('numberOfCores', 1)
        inputDataset = work.get('inputDataset', None)
        inputDatasetLocations = work.get('inputDatasetLocations', None)
        inputPileup = work.get('inputPileup', None)
        allowOpportunistic = work.get('allowOpportunistic', False)
        agentName = work.get('agentName', '')

        if ownerDN is None:
            ownerDN = owner

        jobNumber = work.get('jobNumber', 0)
    except KeyError as ex:
        msg = "Could not find critical key-value in work input.\n"
        msg += str(ex)
        logging.error(msg)
        raise JobCreatorException(msg)
    except Exception as ex:
        msg = "Exception in opening work package. Error: %s" % str(ex)
        logging.exception(msg)
        raise JobCreatorException(msg)

    try:
        createWorkArea.processJobs(jobGroup=wmbsJobGroup,
                                   startDir=jobCacheDir,
                                   workflow=workflow,
                                   wmWorkload=wmWorkload,
                                   cache=False)

        for job in wmbsJobGroup.jobs:
            jobNumber += 1
            saveJob(job=job, workflow=workflow,
                    wmTask=wmTaskName,
                    jobNumber=jobNumber,
                    sandbox=sandbox,
                    owner=owner,
                    ownerDN=ownerDN,
                    ownerGroup=ownerGroup,
                    ownerRole=ownerRole,
                    scramArch=scramArch,
                    swVersion=swVersion,
                    agentNumber=agentNumber,
                    numberOfCores=numberOfCores,
                    inputDataset=inputDataset,
                    inputDatasetLocations=inputDatasetLocations,
                    inputPileup=inputPileup,
                    allowOpportunistic=allowOpportunistic,
                    agentName=agentName)

    except Exception as ex:
        msg = "Exception in processing wmbsJobGroup %i\n. Error: %s" % (wmbsJobGroup.id, str(ex))
        logging.exception(msg)
        raise JobCreatorException(msg)

    return wmbsJobGroup
示例#4
0
class JobCreatorWorker:
    """
    This is the ProcessPool worker function that actually
    runs the jobCreator
    """

    def __init__(self, **configDict):
        """
        init jobCreator
        """

        myThread = threading.currentThread()

        self.transaction = myThread.transaction

        #DAO factory for WMBS objects
        self.daoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging,
                                     dbinterface = myThread.dbi)

        # WMCore splitter factory for splitting up jobs.
        self.splitterFactory = SplitterFactory()

        config = Configuration()
        config.section_("JobStateMachine")
        config.JobStateMachine.couchurl      = configDict["couchURL"]
        config.JobStateMachine.couch_retries = configDict["defaultRetries"]
        config.JobStateMachine.couchDBName   = configDict["couchDBName"]

        self.config = config

        #Variables
        self.jobCacheDir    = configDict['jobCacheDir']
        self.defaultJobType = configDict['defaultJobType']
        self.limit          = configDict.get('fileLoadLimit', 500)



        self.createWorkArea  = CreateWorkArea()

        self.changeState = ChangeState(self.config)

        return


    def __call__(self, parameters):
        """
        Poller for looking in all active subscriptions for jobs that need to be made.

        """

        logging.info("In JobCreatorWorker.__call__")

        myThread = threading.currentThread()

        for entry in parameters:
            # This retrieves a single subscription
            subscriptionID = entry.get('subscription')

            if subscriptionID < 0:
                logging.error("Got non-existant subscription")
                logging.error("Assuming parameters in error: returning")
                return subscriptionID

            myThread.transaction.begin()

            logging.info("About to call subscription %i" %subscriptionID)

            wmbsSubscription = Subscription(id = subscriptionID)
            wmbsSubscription.load()
            wmbsSubscription["workflow"].load()
            workflow         = wmbsSubscription["workflow"]

            wmWorkload       = retrieveWMSpec(wmbsSubscription)





            if not workflow.task or not wmWorkload:
                # Then we have a problem
                # We have no sandbox
                # We NEED a sandbox
                # Abort this subscription!
                # But do NOT fail
                # We have no way of marking a subscription as bad per se
                # We'll have to just keep skipping it
                wmTask = None
                seederList = []
                logging.error("Have no task for workflow %i" % (workflow.id))
                logging.error("Aborting Subscription %i" % (subscriptionID))
                continue

            else:
                wmTask = wmWorkload.getTaskByPath(workflow.task)
                if hasattr(wmTask.data, 'seeders'):
                    manager    = SeederManager(wmTask)
                    seederList = manager.getSeederList()
                else:
                    seederList = []

            logging.info("About to enter JobFactory")
            logging.debug("Going to call wmbsJobFactory with limit %i" % (self.limit))

            # My hope is that the job factory is smart enough only to split un-split jobs
            wmbsJobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                                  subscription = wmbsSubscription,
                                                  generators=seederList,
                                                  limit = self.limit)
            splitParams = retrieveJobSplitParams(wmWorkload, workflow.task)
            logging.debug("Split Params: %s" % splitParams)

            continueSubscription = True
            myThread.transaction.commit()

            # Turn on the jobFactory
            myThread.transaction.begin()
            wmbsJobFactory.open()

            # Create a function to hold it
            jobSplittingFunction = runSplitter(jobFactory = wmbsJobFactory,
                                               splitParams = splitParams)
            while continueSubscription:
                # This loop runs over the jobFactory,
                # using yield statements and a pre-existing proxy to
                # generate and process new jobs

                # First we need the jobs.

                try:
                    wmbsJobGroups = next(jobSplittingFunction)
                    logging.info("Retrieved %i jobGroups from jobSplitter" % (len(wmbsJobGroups)))
                except StopIteration:
                    # If you receive a stopIteration, we're done
                    logging.info("Completed iteration over subscription %i" % (subscriptionID))
                    continueSubscription = False
                    continue

                # Now we get to find out what job they are.
                countJobs = self.daoFactory(classname = "Jobs.GetNumberOfJobsPerWorkflow")
                jobNumber = countJobs.execute(workflow = workflow.id,
                                              conn = myThread.transaction.conn,
                                              transaction = True)
                logging.debug("Have %i jobs for this workflow already" % (jobNumber))



                for wmbsJobGroup in wmbsJobGroups:

                    logging.debug("Processing jobGroup %i" % (wmbsJobGroup.exists()))
                    logging.debug("Processing %i jobs" % (len(wmbsJobGroup.jobs)) )

                    # Create a directory
                    self.createWorkArea.processJobs(jobGroup = wmbsJobGroup,
                                                    startDir = self.jobCacheDir,
                                                    workflow = workflow,
                                                    wmWorkload = wmWorkload,
                                                    transaction = myThread.transaction,
                                                    conn = myThread.transaction.conn)


                    for job in wmbsJobGroup.jobs:
                        jobNumber += 1
                        self.saveJob(job = job, workflow = workflow,
                                     wmTask = wmTask, jobNumber = jobNumber)


                    self.advanceJobGroup(wmbsJobGroup)

                    logging.debug("Finished call for jobGroup %i" \
                                 % (wmbsJobGroup.exists()))


            # END: while loop over jobSplitter
            myThread.transaction.commit()



            # About to reset everything
            wmbsJobGroups  = None
            wmTask         = None
            wmWorkload     = None
            splitParams    = None
            wmbsJobFactory = None
            gc.collect()



            # About to check memory
            doMemoryCheck("About to get memory references: End of subscription loop")


        # Final memory check
        doMemoryCheck("About to get memory references: End of __call__()")


        logging.debug("About to return from JobCreatorWorker.__call__()")

        return parameters


    def saveJob(self, job, workflow, wmTask = None, jobNumber = 0):
        """
        _saveJob_

        Actually do the mechanics of saving the job to a pickle file
        """
        priority = None

        if wmTask:
            # If we managed to load the task,
            # so the url should be valid
            job['spec']    = workflow.spec
            job['task']    = wmTask.getPathName()
            priority       = wmTask.getTaskPriority()
            if job.get('sandbox', None) == None:
                job['sandbox'] = wmTask.data.input.sandbox

        job['priority'] = priority
        job['counter']  = jobNumber
        cacheDir = job.getCache()
        job['cache_dir'] = cacheDir
        output = open(os.path.join(cacheDir, 'job.pkl'), 'w')
        pickle.dump(job, output, pickle.HIGHEST_PROTOCOL)
        output.flush()
        os.fsync(output.fileno())
        output.close()


        return




    def advanceJobGroup(self, wmbsJobGroup):
        """
        Pass this on to the jobCreator, which actually does the work

        """

        #Create the job
        self.changeState.propagate(wmbsJobGroup.jobs, 'created', 'new')

        logging.info("JobCreator has created jobGroup %i and is ending" \
                     % (wmbsJobGroup.id))

        return
示例#5
0
def creatorProcess(work, jobCacheDir):
    """
    _creatorProcess_

    Creator work areas and pickle job objects
    """
    createWorkArea  = CreateWorkArea()

    try:
        wmbsJobGroup = work.get('jobGroup')
        workflow     = work.get('workflow')
        wmWorkload   = work.get('wmWorkload')
        wmTaskName   = work.get('wmTaskName')
        sandbox      = work.get('sandbox')
        owner        = work.get('owner')
        ownerDN      = work.get('ownerDN',None)
        ownerGroup   = work.get('ownerGroup','')
        ownerRole    = work.get('ownerRole','')
        scramArch    = work.get('scramArch', None)
        swVersion    = work.get('swVersion', None)
        agentNumber  = work.get('agentNumber', 0)
        numberOfCores = work.get('numberOfCores', 1)

        if ownerDN == None:
            ownerDN = owner

        jobNumber    = work.get('jobNumber', 0)
    except KeyError as ex:
        msg =  "Could not find critical key-value in work input.\n"
        msg += str(ex)
        logging.error(msg)
        raise JobCreatorException(msg)
    except Exception as ex:
        msg =  "Exception in opening work package.\n"
        msg += str(ex)
        msg += str(traceback.format_exc())
        logging.error(msg)
        raise JobCreatorException(msg)


    try:
        createWorkArea.processJobs(jobGroup = wmbsJobGroup,
                                   startDir = jobCacheDir,
                                   workflow = workflow,
                                   wmWorkload = wmWorkload,
                                   cache = False)

        for job in wmbsJobGroup.jobs:
            jobNumber += 1
            saveJob(job = job, workflow = workflow,
                    wmTask = wmTaskName,
                    jobNumber = jobNumber,
                    sandbox = sandbox,
                    owner = owner,
                    ownerDN = ownerDN,
                    ownerGroup = ownerGroup,
                    ownerRole = ownerRole,
                    scramArch = scramArch,
                    swVersion = swVersion,
                    agentNumber = agentNumber,
                    numberOfCores = numberOfCores)

    except Exception as ex:
        # Register as failure; move on
        msg =  "Exception in processing wmbsJobGroup %i\n" % wmbsJobGroup.id
        msg += str(ex)
        msg += str(traceback.format_exc())
        logging.error(msg)
        raise JobCreatorException(msg)

    return wmbsJobGroup
示例#6
0
def creatorProcess(work, jobCacheDir):
    """
    _creatorProcess_

    Creator work areas and pickle job objects
    """
    createWorkArea = CreateWorkArea()

    try:
        wmbsJobGroup = work.get('jobGroup')
        workflow = work.get('workflow')
        wmWorkload = work.get('wmWorkload')
        wmTaskName = work.get('wmTaskName')
        sandbox = work.get('sandbox')
        owner = work.get('owner')
        ownerDN = work.get('ownerDN', None)
        ownerGroup = work.get('ownerGroup', '')
        ownerRole = work.get('ownerRole', '')
        scramArch = work.get('scramArch', None)
        swVersion = work.get('swVersion', None)
        agentNumber = work.get('agentNumber', 0)
        numberOfCores = work.get('numberOfCores', 1)

        if ownerDN == None:
            ownerDN = owner

        jobNumber = work.get('jobNumber', 0)
    except KeyError as ex:
        msg = "Could not find critical key-value in work input.\n"
        msg += str(ex)
        logging.error(msg)
        raise JobCreatorException(msg)
    except Exception as ex:
        msg = "Exception in opening work package.\n"
        msg += str(ex)
        msg += str(traceback.format_exc())
        logging.error(msg)
        raise JobCreatorException(msg)

    try:
        createWorkArea.processJobs(jobGroup=wmbsJobGroup,
                                   startDir=jobCacheDir,
                                   workflow=workflow,
                                   wmWorkload=wmWorkload,
                                   cache=False)

        for job in wmbsJobGroup.jobs:
            jobNumber += 1
            saveJob(job=job,
                    workflow=workflow,
                    wmTask=wmTaskName,
                    jobNumber=jobNumber,
                    sandbox=sandbox,
                    owner=owner,
                    ownerDN=ownerDN,
                    ownerGroup=ownerGroup,
                    ownerRole=ownerRole,
                    scramArch=scramArch,
                    swVersion=swVersion,
                    agentNumber=agentNumber,
                    numberOfCores=numberOfCores)

    except Exception as ex:
        # Register as failure; move on
        msg = "Exception in processing wmbsJobGroup %i\n" % wmbsJobGroup.id
        msg += str(ex)
        msg += str(traceback.format_exc())
        logging.error(msg)
        raise JobCreatorException(msg)

    return wmbsJobGroup