def creatorProcess(work, jobCacheDir): """ _creatorProcess_ Creator work areas and pickle job objects """ createWorkArea = CreateWorkArea() try: wmbsJobGroup = work.get('jobGroup') workflow = work.get('workflow') wmWorkload = work.get('wmWorkload') work['ownerDN'] = work.get('owner') if work.get( 'ownerDN', None) is None else work.get('ownerDN') except KeyError as ex: msg = "Could not find critical key-value in work input.\n" msg += str(ex) logging.error(msg) raise JobCreatorException(msg) except Exception as ex: msg = "Exception in opening work package. Error: %s" % str(ex) logging.exception(msg) raise JobCreatorException(msg) try: createWorkArea.processJobs(jobGroup=wmbsJobGroup, startDir=jobCacheDir, workflow=workflow, wmWorkload=wmWorkload, cache=False) thisJobNumber = work.get('jobNumber', 0) for job in wmbsJobGroup.jobs: thisJobNumber += 1 saveJob(job, thisJobNumber, **work) except Exception as ex: msg = "Exception in processing wmbsJobGroup %i\n. Error: %s" % ( wmbsJobGroup.id, str(ex)) logging.exception(msg) raise JobCreatorException(msg) return wmbsJobGroup
class JobCreatorWorker: """ This is the ProcessPool worker function that actually runs the jobCreator """ def __init__(self, **configDict): """ init jobCreator """ myThread = threading.currentThread() self.transaction = myThread.transaction #DAO factory for WMBS objects self.daoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) # WMCore splitter factory for splitting up jobs. self.splitterFactory = SplitterFactory() config = Configuration() config.section_("JobStateMachine") config.JobStateMachine.couchurl = configDict["couchURL"] config.JobStateMachine.couch_retries = configDict["defaultRetries"] config.JobStateMachine.couchDBName = configDict["couchDBName"] self.config = config #Variables self.jobCacheDir = configDict['jobCacheDir'] self.defaultJobType = configDict['defaultJobType'] self.limit = configDict.get('fileLoadLimit', 500) self.createWorkArea = CreateWorkArea() self.changeState = ChangeState(self.config) return def __call__(self, parameters): """ Poller for looking in all active subscriptions for jobs that need to be made. """ logging.info("In JobCreatorWorker.__call__") myThread = threading.currentThread() for entry in parameters: # This retrieves a single subscription subscriptionID = entry.get('subscription') if subscriptionID < 0: logging.error("Got non-existant subscription") logging.error("Assuming parameters in error: returning") return subscriptionID myThread.transaction.begin() logging.info("About to call subscription %i" %subscriptionID) wmbsSubscription = Subscription(id = subscriptionID) wmbsSubscription.load() wmbsSubscription["workflow"].load() workflow = wmbsSubscription["workflow"] wmWorkload = retrieveWMSpec(wmbsSubscription) if not workflow.task or not wmWorkload: # Then we have a problem # We have no sandbox # We NEED a sandbox # Abort this subscription! # But do NOT fail # We have no way of marking a subscription as bad per se # We'll have to just keep skipping it wmTask = None seederList = [] logging.error("Have no task for workflow %i" % (workflow.id)) logging.error("Aborting Subscription %i" % (subscriptionID)) continue else: wmTask = wmWorkload.getTaskByPath(workflow.task) if hasattr(wmTask.data, 'seeders'): manager = SeederManager(wmTask) seederList = manager.getSeederList() else: seederList = [] logging.info("About to enter JobFactory") logging.debug("Going to call wmbsJobFactory with limit %i" % (self.limit)) # My hope is that the job factory is smart enough only to split un-split jobs wmbsJobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = wmbsSubscription, generators=seederList, limit = self.limit) splitParams = retrieveJobSplitParams(wmWorkload, workflow.task) logging.debug("Split Params: %s" % splitParams) continueSubscription = True myThread.transaction.commit() # Turn on the jobFactory myThread.transaction.begin() wmbsJobFactory.open() # Create a function to hold it jobSplittingFunction = runSplitter(jobFactory = wmbsJobFactory, splitParams = splitParams) while continueSubscription: # This loop runs over the jobFactory, # using yield statements and a pre-existing proxy to # generate and process new jobs # First we need the jobs. try: wmbsJobGroups = jobSplittingFunction.next() logging.info("Retrieved %i jobGroups from jobSplitter" % (len(wmbsJobGroups))) except StopIteration: # If you receive a stopIteration, we're done logging.info("Completed iteration over subscription %i" % (subscriptionID)) continueSubscription = False continue # Now we get to find out what job they are. countJobs = self.daoFactory(classname = "Jobs.GetNumberOfJobsPerWorkflow") jobNumber = countJobs.execute(workflow = workflow.id, conn = myThread.transaction.conn, transaction = True) logging.debug("Have %i jobs for this workflow already" % (jobNumber)) for wmbsJobGroup in wmbsJobGroups: logging.debug("Processing jobGroup %i" % (wmbsJobGroup.exists())) logging.debug("Processing %i jobs" % (len(wmbsJobGroup.jobs)) ) # Create a directory self.createWorkArea.processJobs(jobGroup = wmbsJobGroup, startDir = self.jobCacheDir, workflow = workflow, wmWorkload = wmWorkload, transaction = myThread.transaction, conn = myThread.transaction.conn) for job in wmbsJobGroup.jobs: jobNumber += 1 self.saveJob(job = job, workflow = workflow, wmTask = wmTask, jobNumber = jobNumber) self.advanceJobGroup(wmbsJobGroup) logging.debug("Finished call for jobGroup %i" \ % (wmbsJobGroup.exists())) # END: while loop over jobSplitter myThread.transaction.commit() # About to reset everything wmbsJobGroups = None wmTask = None wmWorkload = None splitParams = None wmbsJobFactory = None gc.collect() # About to check memory doMemoryCheck("About to get memory references: End of subscription loop") # Final memory check doMemoryCheck("About to get memory references: End of __call__()") logging.debug("About to return from JobCreatorWorker.__call__()") return parameters def saveJob(self, job, workflow, wmTask = None, jobNumber = 0): """ _saveJob_ Actually do the mechanics of saving the job to a pickle file """ priority = None if wmTask: # If we managed to load the task, # so the url should be valid job['spec'] = workflow.spec job['task'] = wmTask.getPathName() priority = wmTask.getTaskPriority() if job.get('sandbox', None) == None: job['sandbox'] = wmTask.data.input.sandbox job['priority'] = priority job['counter'] = jobNumber cacheDir = job.getCache() job['cache_dir'] = cacheDir output = open(os.path.join(cacheDir, 'job.pkl'), 'w') cPickle.dump(job, output, cPickle.HIGHEST_PROTOCOL) output.flush() os.fsync(output.fileno()) output.close() return def advanceJobGroup(self, wmbsJobGroup): """ Pass this on to the jobCreator, which actually does the work """ #Create the job self.changeState.propagate(wmbsJobGroup.jobs, 'created', 'new') logging.info("JobCreator has created jobGroup %i and is ending" \ % (wmbsJobGroup.id)) return
def creatorProcess(work, jobCacheDir): """ _creatorProcess_ Creator work areas and pickle job objects """ createWorkArea = CreateWorkArea() try: wmbsJobGroup = work.get('jobGroup') workflow = work.get('workflow') wmWorkload = work.get('wmWorkload') wmTaskName = work.get('wmTaskName') sandbox = work.get('sandbox') owner = work.get('owner') ownerDN = work.get('ownerDN', None) ownerGroup = work.get('ownerGroup', '') ownerRole = work.get('ownerRole', '') scramArch = work.get('scramArch', None) swVersion = work.get('swVersion', None) agentNumber = work.get('agentNumber', 0) numberOfCores = work.get('numberOfCores', 1) inputDataset = work.get('inputDataset', None) inputDatasetLocations = work.get('inputDatasetLocations', None) inputPileup = work.get('inputPileup', None) allowOpportunistic = work.get('allowOpportunistic', False) agentName = work.get('agentName', '') if ownerDN is None: ownerDN = owner jobNumber = work.get('jobNumber', 0) except KeyError as ex: msg = "Could not find critical key-value in work input.\n" msg += str(ex) logging.error(msg) raise JobCreatorException(msg) except Exception as ex: msg = "Exception in opening work package. Error: %s" % str(ex) logging.exception(msg) raise JobCreatorException(msg) try: createWorkArea.processJobs(jobGroup=wmbsJobGroup, startDir=jobCacheDir, workflow=workflow, wmWorkload=wmWorkload, cache=False) for job in wmbsJobGroup.jobs: jobNumber += 1 saveJob(job=job, workflow=workflow, wmTask=wmTaskName, jobNumber=jobNumber, sandbox=sandbox, owner=owner, ownerDN=ownerDN, ownerGroup=ownerGroup, ownerRole=ownerRole, scramArch=scramArch, swVersion=swVersion, agentNumber=agentNumber, numberOfCores=numberOfCores, inputDataset=inputDataset, inputDatasetLocations=inputDatasetLocations, inputPileup=inputPileup, allowOpportunistic=allowOpportunistic, agentName=agentName) except Exception as ex: msg = "Exception in processing wmbsJobGroup %i\n. Error: %s" % (wmbsJobGroup.id, str(ex)) logging.exception(msg) raise JobCreatorException(msg) return wmbsJobGroup
class JobCreatorWorker: """ This is the ProcessPool worker function that actually runs the jobCreator """ def __init__(self, **configDict): """ init jobCreator """ myThread = threading.currentThread() self.transaction = myThread.transaction #DAO factory for WMBS objects self.daoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) # WMCore splitter factory for splitting up jobs. self.splitterFactory = SplitterFactory() config = Configuration() config.section_("JobStateMachine") config.JobStateMachine.couchurl = configDict["couchURL"] config.JobStateMachine.couch_retries = configDict["defaultRetries"] config.JobStateMachine.couchDBName = configDict["couchDBName"] self.config = config #Variables self.jobCacheDir = configDict['jobCacheDir'] self.defaultJobType = configDict['defaultJobType'] self.limit = configDict.get('fileLoadLimit', 500) self.createWorkArea = CreateWorkArea() self.changeState = ChangeState(self.config) return def __call__(self, parameters): """ Poller for looking in all active subscriptions for jobs that need to be made. """ logging.info("In JobCreatorWorker.__call__") myThread = threading.currentThread() for entry in parameters: # This retrieves a single subscription subscriptionID = entry.get('subscription') if subscriptionID < 0: logging.error("Got non-existant subscription") logging.error("Assuming parameters in error: returning") return subscriptionID myThread.transaction.begin() logging.info("About to call subscription %i" %subscriptionID) wmbsSubscription = Subscription(id = subscriptionID) wmbsSubscription.load() wmbsSubscription["workflow"].load() workflow = wmbsSubscription["workflow"] wmWorkload = retrieveWMSpec(wmbsSubscription) if not workflow.task or not wmWorkload: # Then we have a problem # We have no sandbox # We NEED a sandbox # Abort this subscription! # But do NOT fail # We have no way of marking a subscription as bad per se # We'll have to just keep skipping it wmTask = None seederList = [] logging.error("Have no task for workflow %i" % (workflow.id)) logging.error("Aborting Subscription %i" % (subscriptionID)) continue else: wmTask = wmWorkload.getTaskByPath(workflow.task) if hasattr(wmTask.data, 'seeders'): manager = SeederManager(wmTask) seederList = manager.getSeederList() else: seederList = [] logging.info("About to enter JobFactory") logging.debug("Going to call wmbsJobFactory with limit %i" % (self.limit)) # My hope is that the job factory is smart enough only to split un-split jobs wmbsJobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = wmbsSubscription, generators=seederList, limit = self.limit) splitParams = retrieveJobSplitParams(wmWorkload, workflow.task) logging.debug("Split Params: %s" % splitParams) continueSubscription = True myThread.transaction.commit() # Turn on the jobFactory myThread.transaction.begin() wmbsJobFactory.open() # Create a function to hold it jobSplittingFunction = runSplitter(jobFactory = wmbsJobFactory, splitParams = splitParams) while continueSubscription: # This loop runs over the jobFactory, # using yield statements and a pre-existing proxy to # generate and process new jobs # First we need the jobs. try: wmbsJobGroups = next(jobSplittingFunction) logging.info("Retrieved %i jobGroups from jobSplitter" % (len(wmbsJobGroups))) except StopIteration: # If you receive a stopIteration, we're done logging.info("Completed iteration over subscription %i" % (subscriptionID)) continueSubscription = False continue # Now we get to find out what job they are. countJobs = self.daoFactory(classname = "Jobs.GetNumberOfJobsPerWorkflow") jobNumber = countJobs.execute(workflow = workflow.id, conn = myThread.transaction.conn, transaction = True) logging.debug("Have %i jobs for this workflow already" % (jobNumber)) for wmbsJobGroup in wmbsJobGroups: logging.debug("Processing jobGroup %i" % (wmbsJobGroup.exists())) logging.debug("Processing %i jobs" % (len(wmbsJobGroup.jobs)) ) # Create a directory self.createWorkArea.processJobs(jobGroup = wmbsJobGroup, startDir = self.jobCacheDir, workflow = workflow, wmWorkload = wmWorkload, transaction = myThread.transaction, conn = myThread.transaction.conn) for job in wmbsJobGroup.jobs: jobNumber += 1 self.saveJob(job = job, workflow = workflow, wmTask = wmTask, jobNumber = jobNumber) self.advanceJobGroup(wmbsJobGroup) logging.debug("Finished call for jobGroup %i" \ % (wmbsJobGroup.exists())) # END: while loop over jobSplitter myThread.transaction.commit() # About to reset everything wmbsJobGroups = None wmTask = None wmWorkload = None splitParams = None wmbsJobFactory = None gc.collect() # About to check memory doMemoryCheck("About to get memory references: End of subscription loop") # Final memory check doMemoryCheck("About to get memory references: End of __call__()") logging.debug("About to return from JobCreatorWorker.__call__()") return parameters def saveJob(self, job, workflow, wmTask = None, jobNumber = 0): """ _saveJob_ Actually do the mechanics of saving the job to a pickle file """ priority = None if wmTask: # If we managed to load the task, # so the url should be valid job['spec'] = workflow.spec job['task'] = wmTask.getPathName() priority = wmTask.getTaskPriority() if job.get('sandbox', None) == None: job['sandbox'] = wmTask.data.input.sandbox job['priority'] = priority job['counter'] = jobNumber cacheDir = job.getCache() job['cache_dir'] = cacheDir output = open(os.path.join(cacheDir, 'job.pkl'), 'w') pickle.dump(job, output, pickle.HIGHEST_PROTOCOL) output.flush() os.fsync(output.fileno()) output.close() return def advanceJobGroup(self, wmbsJobGroup): """ Pass this on to the jobCreator, which actually does the work """ #Create the job self.changeState.propagate(wmbsJobGroup.jobs, 'created', 'new') logging.info("JobCreator has created jobGroup %i and is ending" \ % (wmbsJobGroup.id)) return
def creatorProcess(work, jobCacheDir): """ _creatorProcess_ Creator work areas and pickle job objects """ createWorkArea = CreateWorkArea() try: wmbsJobGroup = work.get('jobGroup') workflow = work.get('workflow') wmWorkload = work.get('wmWorkload') wmTaskName = work.get('wmTaskName') sandbox = work.get('sandbox') owner = work.get('owner') ownerDN = work.get('ownerDN',None) ownerGroup = work.get('ownerGroup','') ownerRole = work.get('ownerRole','') scramArch = work.get('scramArch', None) swVersion = work.get('swVersion', None) agentNumber = work.get('agentNumber', 0) numberOfCores = work.get('numberOfCores', 1) if ownerDN == None: ownerDN = owner jobNumber = work.get('jobNumber', 0) except KeyError as ex: msg = "Could not find critical key-value in work input.\n" msg += str(ex) logging.error(msg) raise JobCreatorException(msg) except Exception as ex: msg = "Exception in opening work package.\n" msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) raise JobCreatorException(msg) try: createWorkArea.processJobs(jobGroup = wmbsJobGroup, startDir = jobCacheDir, workflow = workflow, wmWorkload = wmWorkload, cache = False) for job in wmbsJobGroup.jobs: jobNumber += 1 saveJob(job = job, workflow = workflow, wmTask = wmTaskName, jobNumber = jobNumber, sandbox = sandbox, owner = owner, ownerDN = ownerDN, ownerGroup = ownerGroup, ownerRole = ownerRole, scramArch = scramArch, swVersion = swVersion, agentNumber = agentNumber, numberOfCores = numberOfCores) except Exception as ex: # Register as failure; move on msg = "Exception in processing wmbsJobGroup %i\n" % wmbsJobGroup.id msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) raise JobCreatorException(msg) return wmbsJobGroup
def creatorProcess(work, jobCacheDir): """ _creatorProcess_ Creator work areas and pickle job objects """ createWorkArea = CreateWorkArea() try: wmbsJobGroup = work.get('jobGroup') workflow = work.get('workflow') wmWorkload = work.get('wmWorkload') wmTaskName = work.get('wmTaskName') sandbox = work.get('sandbox') owner = work.get('owner') ownerDN = work.get('ownerDN', None) ownerGroup = work.get('ownerGroup', '') ownerRole = work.get('ownerRole', '') scramArch = work.get('scramArch', None) swVersion = work.get('swVersion', None) agentNumber = work.get('agentNumber', 0) numberOfCores = work.get('numberOfCores', 1) if ownerDN == None: ownerDN = owner jobNumber = work.get('jobNumber', 0) except KeyError as ex: msg = "Could not find critical key-value in work input.\n" msg += str(ex) logging.error(msg) raise JobCreatorException(msg) except Exception as ex: msg = "Exception in opening work package.\n" msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) raise JobCreatorException(msg) try: createWorkArea.processJobs(jobGroup=wmbsJobGroup, startDir=jobCacheDir, workflow=workflow, wmWorkload=wmWorkload, cache=False) for job in wmbsJobGroup.jobs: jobNumber += 1 saveJob(job=job, workflow=workflow, wmTask=wmTaskName, jobNumber=jobNumber, sandbox=sandbox, owner=owner, ownerDN=ownerDN, ownerGroup=ownerGroup, ownerRole=ownerRole, scramArch=scramArch, swVersion=swVersion, agentNumber=agentNumber, numberOfCores=numberOfCores) except Exception as ex: # Register as failure; move on msg = "Exception in processing wmbsJobGroup %i\n" % wmbsJobGroup.id msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) raise JobCreatorException(msg) return wmbsJobGroup