def makeWorkflow(self, testInstance): """ _processTest_ Process a test, create a WorkflowSpec for it, generate job specs and add the, to the test instance """ loader = CMSSWAPILoader(testInstance['CMSSWArchitecture'], testInstance['CMSSWVersion'], testInstance['CMSPath']) loader.load() cfgWrapper = CMSSWConfig() process = pickle.load(file(testInstance['PickleFile'])) cfgInt = cfgWrapper.loadConfiguration(process) cfgInt.validateForProduction() cfgAsString = process.dumpPython() # // # // Get release validation PSet from process #// relValPSet = getattr(process, "ReleaseValidation", None) if relValPSet == None: msg = "Unable to extract ReleaseValidation PSet from pickled cfg for \n" msg += "%s\n" % testInstance['PickleFile'] logging.error(msg) return testName = getattr(relValPSet, "primaryDatasetName", None) testInstance['Name'] = testName.value() if testName == None: msg = "No primaryDatasetName parameter in ReleaseValidation PSet\n" msg += "%s\n" % testInstance['PickleFile'] logging.error(msg) return totalEvents = getattr(relValPSet, "totalNumberOfEvents", None) if totalEvents == None: msg = "No totalNumberOfEvents parameter in ReleaseValidation PSet\n" msg += "%s\n" % testInstance['PickleFile'] logging.error(msg) return testInstance['TotalEvents'] = totalEvents.value() eventsPerJob = getattr(relValPSet, "eventsPerJob", None) speedCat = getattr(relValPSet, "speedCategory", None) if (eventsPerJob == None) and (speedCat == None): msg = "ReleaseValidation PSet must contain one of either eventsPerJob or speedCategory\n" msg += "%s\n" % testInstance['PickleFile'] logging.error(msg) return if eventsPerJob != None: testInstance['EventsPerJob'] = eventsPerJob.value() else: testInstance['SpeedCategory'] = speedCat.value() if not self.args.has_key(testInstance['SpeedCategory']): msg = "Unknown Speed Category: %s\n" % testInstance['SpeedCategory'] msg += "In file: %s\n" % testInstance['PickleFile'] logging.error(msg) return testInstance['EventsPerJob'] = self.args[testInstance['SpeedCategory']] inputDataset = getattr(relValPSet, "inputDatasetPath", None) pileupDataset = getattr(relValPSet, "pileupDatasetPath", None) if pileupDataset != None: testInstance['PileupDataset'] = pileupDataset.value() if inputDataset != None: testInstance['InputDataset'] = inputDataset.value() msg = "Processing : %s\n" % testInstance['Name'] msg += "From Pickle: %s\n" % testInstance['PickleFile'] msg += "TotalEvents: %s\n" % testInstance['TotalEvents'] msg += "EventsPerJob: %s\n" % testInstance['EventsPerJob'] msg += "SpeedCategory: %s\n" % testInstance['SpeedCategory'] logging.info(msg) if self.workflows.has_key(testInstance['Name']): testInstance['WorkflowSpecId'] = self.workflows[testInstance['Name']] testInstance['WorkflowSpecFile'] = self.workflowFiles[testInstance['Name']] testInstance['WorkingDir'] = self.workingDirs[testInstance['Name']] loader.unload() return self.jobCounts[testInstance['Name']] = 1 workingDir = os.path.join(self.args['ComponentDir'], testInstance['CMSSWVersion'], testInstance['Name']) if not os.path.exists(workingDir): os.makedirs(workingDir) loader.unload() maker = WorkflowMaker(str(self.timestamp), testInstance['Name'], 'RelVal') maker.setCMSSWVersion(testInstance['CMSSWVersion']) maker.setPhysicsGroup("RelVal") maker.setConfiguration(cfgWrapper, Type = "instance") maker.setOriginalCfg(cfgAsString) psetHash = "NO_PSET_HASH" if cfgWrapper.configMetadata.has_key('PSetHash'): psetHash = cfgWrapper.configMetadata['PSetHash'] maker.setPSetHash(psetHash) maker.changeCategory("relval") if testInstance['SelectionEfficiency'] != None: selEff = float(testInstance['SelectionEfficiency'] ) maker.addSelectionEfficiency(selEff) if testInstance['PileupDataset'] != None: maker.addPileupDataset(testInstance['PileupDataset'], 100) if testInstance['InputDataset'] != None: maker.addInputDataset(testInstance['InputDataset']) maker.inputDataset["SplitType"] = "events" maker.inputDataset["SplitSize"] = testInstance['EventsPerJob'] spec = maker.makeWorkflow() spec.parameters['OnlySites'] = testInstance['Site'] spec.parameters['DBSURL'] = self.dbsUrl specFile = "/%s/%s-Workflow.xml" % (workingDir, maker.workflowName) spec.save(specFile) self.workflows[testInstance['Name']] = str(maker.workflowName) self.workflowFiles[testInstance['Name']] = specFile self.workingDirs[testInstance['Name']] = workingDir testInstance['WorkflowSpecId'] = str(maker.workflowName) testInstance['WorkflowSpecFile'] = specFile testInstance['WorkingDir'] = workingDir msg = "Workflow created for test: %s" % testInstance['Name'] logging.info(msg) msg = "Registering Workflow Entity: %s" % maker.workflowName logging.debug(msg) WEWorkflow.register( maker.workflowName, {"owner" : "RelValInjector", "workflow_spec_file" : specFile, }) msg = "Publishing NewWorkflow/NewDataset for \n" msg += " %s\n "% specFile logging.debug(msg) self.ms.publish("NewWorkflow", specFile) self.ms.publish("NewDataset", specFile) self.ms.commit() return
modRef = imp.load_source( os.path.basename(cfgFile).replace(".py", ""), cfgFile) cmsCfg = modRef.process cfgWrapper = CMSSWConfig() cfgInt = cfgWrapper.loadConfiguration(cmsCfg) cfgInt.validateForProduction() if nodeNumber: try: inputModules = chainedInputs[nodeNumber-1] except IndexError: inputModules = [] maker.chainCmsRunNode(stageoutOutputs[nodeNumber-1], *inputModules) maker.setCMSSWVersion(versions[nodeNumber]) maker.setConfiguration(cfgWrapper, Type = "instance") maker.setOriginalCfg(file(cfgFile).read()) maker.setPSetHash(WorkflowTools.createPSetHash(cfgFile)) nodeNumber += 1 # // # // Pileup sample? #// if pileupDS != None: maker.addPileupDataset(pileupDS, pileupFilesPerJob) # // # // DataMix pileup sample? #// if dataMixDS:
def createHarvestingWorkflow(dataset, site, cmsPath, scramArch, cmsswVersion, globalTag, configFile = None, DQMServer = None, proxyLocation = None, DQMCopyToCERN = None, runNumber = None, doStageOut = None): """ _createHarvestingWorkflow_ Create a Harvesting workflow to extract DQM information from a dataset Enters an essentially empty process that will be updated at runtime to use the harvesting cfg from the release. """ datasetPieces = DatasetConventions.parseDatasetPath(dataset) physicsGroup = "OfflineDQM" category = "DQM" if runNumber == None: requestId = "OfflineDQM" label = "%s-%s-%s" % (datasetPieces['Primary'], datasetPieces['Processed'], datasetPieces['DataTier']) channel = "DQMHarvest" else: requestId = "%s-%s" % (datasetPieces["Primary"], datasetPieces["DataTier"]) label = "DQMHarvesting" channel = "Run%s" % runNumber logging.debug("path, arch, ver: %s, %s, %s" % (cmsPath, scramArch, cmsswVersion)) if configFile != None: cfgWrapper = configFromFile(cmsPath, scramArch, cmsswVersion, configFile) else: cfgWrapper = configOnFly(cmsPath, scramArch, cmsswVersion) # // # // Pass in global tag #// cfgWrapper.conditionsTag = globalTag maker = WorkflowMaker(requestId, channel, label ) maker.setCMSSWVersion(cmsswVersion) maker.setPhysicsGroup(physicsGroup) maker.setConfiguration(cfgWrapper, Type = "instance") maker.changeCategory(category) maker.setPSetHash("NO_HASH") maker.addInputDataset(dataset) maker.setActivity('harvesting') spec = maker.makeWorkflow() spec.parameters['WorkflowType'] = "Harvesting" spec.parameters['DBSURL'] = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet" spec.parameters['OnlySites'] = site if DQMServer != None : spec.parameters['DQMServer'] = DQMServer if proxyLocation != None : spec.parameters['proxyLocation'] = proxyLocation if DQMCopyToCERN != None : spec.parameters['DQMCopyToCERN'] = DQMCopyToCERN if doStageOut is not None: spec.parameters['DoStageOut'] = doStageOut spec.payload.scriptControls['PostTask'].append( "JobCreator.RuntimeTools.RuntimeOfflineDQM") if configFile == None: preExecScript = spec.payload.scriptControls["PreExe"] preExecScript.append("JobCreator.RuntimeTools.RuntimeOfflineDQMSetup") return spec