def setConfiguration(self, cfgFile, **args): """ _setConfiguration_ Provide the CMSSW configuration to be used. By default, assume that cfgFile is a python format string. The format & type can be specified using args: - Type : must be "file" or "string" or "instance" """ cfgType = args.get("Type", "instance") if cfgType not in ("file", "string", "instance"): msg = "Illegal Type for cfg file: %s\n" % cfgType msg += "Should be \"file\" or \"string\"\n" raise RuntimeError, msg cfgContent = cfgFile if cfgType == "file": cfgContent = file(cfgFile).read() cfgType = "string" if cfgType == "string": cfgData = cfgContent cfgContent = CMSSWConfig() cfgContent.unpack(cfgData) self.cmsRunNode.cfgInterface = cfgContent self.configurations.append(cfgContent) return
def checkcreateWorkflow(cfgFile): """ """ try: # // # // Set CMSSW_SEARCH_PATH #// pwd = getenv ("PWD") oldCmsswSearchPath = getenv ("CMSSW_SEARCH_PATH") if not oldCmsswSearchPath: msg = "CMSSW_SEARCH_PATH not set....you need to set CMSSW environment " raise RuntimeError, msg #os.environ["CMSSW_SEARCH_PATH"] = "%s:%s" % (pwd, oldCmsswSearchPath) os.environ["CMSSW_SEARCH_PATH"] = "/:%s" % (oldCmsswSearchPath) # // # // convert cfg #// # for the time being we support only cfg file. We might have to support multiple types soon print ">>> Checking cfg %s"%cfgFile if cfgType == "cfg": from FWCore.ParameterSet.Config import include cmsCfg = include(cfgFile) else: modRef = imp.find_module( os.path.basename(cfgFile).replace(".py", ""), os.path.dirname(cfgFile)) cmsCfg = modRef.process cfgWrapper = CMSSWConfig() cfgWrapper.originalCfg = file(cfgFile).read() cfgInt = cfgWrapper.loadConfiguration(cmsCfg) cfgInt.validateForProduction() except Exception, e: print "Unable to create request: %s" % e
maker.changeCategory(category) # loop over cfg's provided and add to workflow # first cmsRun node created implicitly by WorkflowMaker nodeNumber = 0 for cmsRunCfg in cmsRunCfgs: if cfgTypes[nodeNumber] == "cfg": from FWCore.ParameterSet.Config import include cmsCfg = include(cmsRunCfg) else: import imp modRef = imp.load_source( os.path.basename(cmsRunCfg).replace(".py", ""), cmsRunCfg) cmsCfg = modRef.process cfgWrapper = CMSSWConfig() #cfgWrapper.originalCfg = file(cmsRunCfg).read() cfgInt = cfgWrapper.loadConfiguration(cmsCfg) cfgInt.validateForProduction() if nodeNumber: try: inputModules = chainedInputs[nodeNumber-1] except IndexError: inputModules = [] maker.chainCmsRunNode(stageoutOutputs[nodeNumber-1], *inputModules) maker.setConfiguration(cfgWrapper, Type = "instance") maker.setCMSSWVersion(versions[nodeNumber]) maker.setOriginalCfg(file(cmsRunCfg).read()) maker.setPSetHash(WorkflowTools.createPSetHash(cmsRunCfg))
if self.useLazyDownload == True: logging.debug("Lazy downloads ENABLED.") cmsCfg.AdaptorConfig = cms.Service("AdaptorConfig", cacheHint = cms.untracked.string("lazy-download"), readHint = cms.untracked.string("auto-detect")) else: logging.debug("Lazy downloads DISABLED.") cmsCfg.AdaptorConfig = cms.Service("AdaptorConfig", cacheHint = cms.untracked.string("application-only"), readHint = cms.untracked.string("direct-unbuffered")) for outputModuleName in cmsCfg.outputModules: outputModule = getattr(cmsCfg, outputModuleName) outputModule.fastCloning = cms.untracked.bool(False) cfgWrapper = CMSSWConfig() cfgWrapper.originalCfg = file(self.configFile).read() cfgInt = cfgWrapper.loadConfiguration(cmsCfg) cfgInt.validateForProduction() self.workflow.payload.cfgInterface = cfgWrapper for outputModuleName in cmsCfg.outputModules: outputModule = getattr(cmsCfg, outputModuleName) self.setupOutputModule(outputModuleName, outputModule.dataset.dataTier.value()) loader.unload() return def loadProcessFromFramework(self):
def unpackPayloadNodeData(self, improvNode): """ _unpackPayloadNodeData_ Unpack PayloadNode data from improv Node provided and add information to self """ self.name = str(improvNode.attrs["Name"]) self.type = str(improvNode.attrs["Type"]) workflowName = improvNode.attrs.get('Workflow', None) if workflowName != None: self.workflow = str(workflowName) # // # // Unpack data for this instance #// App details appDataQ = IMProvQuery("/%s/Application" % self.__class__.__name__) appData = appDataQ(improvNode)[0] for appField in appData.children: field = str(appField.name) value = str(appField.attrs['Value']) self.application[field] = value # // # // App Control details #// appConDataQ = IMProvQuery("/%s/ApplicationControl/*" % self.__class__.__name__) appConData = appConDataQ(improvNode) for appConField in appConData: field = str(appConField.name) value = str(appConField.attrs['Value']) self.applicationControls[field] = value # // # // Script Controls #// scriptConQ = IMProvQuery("/%s/ScriptControls/ScriptList" % self.__class__.__name__) scriptLists = scriptConQ(improvNode) for scriptList in scriptLists: listName = scriptList.attrs.get("Name", None) if listName == None: continue listName = str(listName) for script in scriptList.children: scriptName = script.attrs.get("Value", None) if scriptName == None: continue self.scriptControls[listName].append(str(scriptName)) # // # // Dataset details #// Input Datasets inputDSQ = IMProvQuery("/%s/InputDatasets/DatasetInfo" % self.__class__.__name__) inputDS = inputDSQ(improvNode) # print improvNode for item in inputDS: newDS = DatasetInfo() newDS.load(item) self._InputDatasets.append(newDS) # // # // Output Datasets #// outputDSQ = IMProvQuery("/%s/OutputDatasets/DatasetInfo" % self.__class__.__name__) outputDS = outputDSQ(improvNode) for item in outputDS: newDS = DatasetInfo() newDS.load(item) self._OutputDatasets.append(newDS) # // # // Pileup Datasets #// pileupDSQ = IMProvQuery("/%s/PileupDatasets/DatasetInfo" % self.__class__.__name__) pileupDS = pileupDSQ(improvNode) for item in pileupDS: newDS = DatasetInfo() newDS.load(item) self._PileupDatasets.append(newDS) # // # // Input Links #// inpLinkQ = IMProvQuery("/%s/InputLinks/InputLink" % self.__class__.__name__) inpLinks = inpLinkQ(improvNode) for ilink in inpLinks: newLink = InputLink() newLink.load(ilink) self._InputLinks.append(newLink) # // # // Configuration #// configQ = IMProvQuery("/%s/Configuration" % self.__class__.__name__) configNodes = configQ(improvNode) if len(configNodes) > 0: configNode = configNodes[0] self.configuration = base64.decodestring(str(configNode.chardata)) cfgIntQ = IMProvQuery("/%s/CMSSWConfig" % self.__class__.__name__) cfgNodes = cfgIntQ(improvNode) if len(cfgNodes) > 0: cfgNode = cfgNodes[0] self.cfgInterface = CMSSWConfig() self.cfgInterface.load(cfgNode) # // # // User sandbox #// sandboxQ = IMProvQuery("/%s/UserSandbox" % self.__class__.__name__) sandboxNodes = sandboxQ(improvNode) if len(sandboxNodes) > 0: sandboxNode = sandboxNodes[-1] self.userSandbox = str(sandboxNode.chardata) return
class PayloadNode: """ _PayloadNode_ Abstract Application entry in a tree like workflow model """ def __init__(self, name=None): self.children = [] self.parent = None self.name = None self.workflow = None if name != None: self.name = name self.type = None self.application = {} self.application.setdefault("Project", None) self.application.setdefault("Version", None) self.application.setdefault("Architecture", None) self.application.setdefault("Executable", None) self.applicationControls = {} self.applicationControls.setdefault("EventMultiplier", None) self.applicationControls.setdefault("SelectionEfficiency", None) self.applicationControls.setdefault("PerRunFraction", None) # // # // These lists are deprecated and are maintained here #// for backwards compatibility for short term self.inputDatasets = [] self.outputDatasets = [] self.scriptControls = {} self.scriptControls.setdefault("PreTask", []) self.scriptControls.setdefault("PreExe", []) self.scriptControls.setdefault("PostExe", []) self.scriptControls.setdefault("PostTask", []) # // # // Dataset information is stored as DatasetInfo objects #// self._InputDatasets = [] self._OutputDatasets = [] self._PileupDatasets = [] self.configuration = "" self.cfgInterface = None self.userSandbox = None # // # // Input Links to other nodes #// self._InputLinks = [] def newNode(self, name): """ _newNode_ Create a new PayloadNode that is a child to this node and return it so that it can be configured. New Node name must be unique within the tree or it will barf """ newNode = PayloadNode() newNode.name = name self.addNode(newNode) return newNode def addInputDataset(self, primaryDS, processedDS): """ _addInputDataset_ Add a new Input Dataset to this Node. Arguments should be: - *primaryDS* : The Primary Dataset name of the input dataset - *processedDS* : The Processed Dataset name of the input dataset The DatasetInfo object is returned by reference for more information to be added to it InputModuleName should be the mainInputSource of the PSet for the main input dataset. At present this is set elsewhere """ newDataset = DatasetInfo() newDataset['PrimaryDataset'] = primaryDS newDataset['ProcessedDataset'] = processedDS self._InputDatasets.append(newDataset) return newDataset def addPileupDataset(self, primary, tier, processed): """ _addPileupDataset_ Add a pileup dataset to this node """ newDataset = DatasetInfo() newDataset['PrimaryDataset'] = primary newDataset['DataTier'] = tier newDataset['ProcessedDataset'] = processed self._PileupDatasets.append(newDataset) return newDataset def addOutputDataset(self, primaryDS, processedDS, outputModuleName): """ _addOutputDataset_ Add a new Output Dataset, specifying the Primary and Processed Dataset names and the name of the output module in the PSet responsible for writing out files for that dataset """ newDataset = DatasetInfo() newDataset['PrimaryDataset'] = primaryDS newDataset['ProcessedDataset'] = processedDS newDataset['OutputModuleName'] = outputModuleName self._OutputDatasets.append(newDataset) return newDataset def addInputLink(self, nodeName, nodeOutputModName, thisNodeSourceName=None, AppearStandalone=False, skipCfgCheck=False): """ _addInputLink_ Add an input link between this node and another node above it in the tree. This means that output from the named output module of the node will be linked to the source on this node. If a source name is not provided, the main source will be used """ # // # // Safety checks #// 1. Node name must exist if nodeName not in listAllNames(self): msg = "Error adding input link: Node named %s " % nodeName msg += "Does not exist in the node tree" raise RuntimeError, msg # // # // 2. Must be above this node. IE not in nodes descended from #// this node if nodeName in self.listDescendantNames(): msg = "Error adding input link: Node named %s \n" % nodeName msg += "Is below node %s in the tree\n" % self.name msg += "%s will run before %s\n" % (self.name, nodeName) raise RuntimeError, msg # // # // TODO: Check if named source is present #// link = InputLink(InputNode=nodeName, InputSource=thisNodeSourceName, OutputModule=nodeOutputModName, AppearStandalone=AppearStandalone) self._InputLinks.append(link) return def addNode(self, nodeInstance): """ _addNode_ Add a child node to this node nodeInstance must be an instance of PayloadNode """ if not isinstance(nodeInstance, PayloadNode): msg = "Argument supplied to addNode is not a PayloadNode instance" raise RuntimeError, msg dupes = intersection(listAllNames(self), listAllNames(nodeInstance)) if len(dupes) > 0: msg = "Duplicate Names already exist in parent tree:\n" msg += "The following names already exist in the parent tree:\n" for dupe in dupes: msg += " %s\n" % dupe msg += "Each PayloadNode within the tree must " msg += "have a unique name\n" raise RuntimeError, msg self.children.append(nodeInstance) nodeInstance.workflow = self.workflow nodeInstance.parent = self return def listDescendantNames(self, result=None): """ _listDescendantNames_ return a list of all names of nodes below this node recursively traversing children """ if result == None: result = [] result.append(self.name) for child in self.children: result = child.listDescendantNames(result) return result def makeIMProv(self): """ _makeIMProv_ Serialise self and children into an XML DOM friendly node structure """ node = IMProvNode(self.__class__.__name__, None, Name=str(self.name), Type=str(self.type), Workflow=str(self.workflow)) appNode = IMProvNode("Application") for key, val in self.application.items(): appNode.addNode(IMProvNode(key, None, Value=val)) appConNode = IMProvNode("ApplicationControl") for key, val in self.applicationControls.items(): if val == None: continue appConNode.addNode(IMProvNode(key, None, Value=val)) inputNode = IMProvNode("InputDatasets") for inpDS in self._InputDatasets: inputNode.addNode(inpDS.save()) outputNode = IMProvNode("OutputDatasets") for outDS in self._OutputDatasets: outputNode.addNode(outDS.save()) pileupNode = IMProvNode("PileupDatasets") for puDS in self._PileupDatasets: pileupNode.addNode(puDS.save()) inpLinksNode = IMProvNode("InputLinks") for iLink in self._InputLinks: inpLinksNode.addNode(iLink.save()) scriptsNode = IMProvNode("ScriptControls") for key, scriptList in self.scriptControls.items(): scriptListNode = IMProvNode("ScriptList", None, Name=key) [ scriptListNode.addNode(IMProvNode("Script", None, Value=x)) for x in scriptList ] scriptsNode.addNode(scriptListNode) if self.cfgInterface == None: configNode = IMProvNode("Configuration", base64.encodestring(self.configuration), Encoding="base64") else: configNode = self.cfgInterface.save() node.addNode(appNode) node.addNode(appConNode) node.addNode(scriptsNode) node.addNode(inputNode) node.addNode(outputNode) node.addNode(pileupNode) node.addNode(inpLinksNode) node.addNode(configNode) if self.userSandbox != None: sandboxNode = IMProvNode("UserSandbox", self.userSandbox) node.addNode(sandboxNode) for child in self.children: node.addNode(child.makeIMProv()) return node def __str__(self): """string rep for easy inspection""" return str(self.makeIMProv()) def operate(self, operator): """ _operate_ Recursive callable operation over a payloadNode tree starting from this node. operator must be a callable object or function, that accepts a single argument, that argument being the current node being operated on. """ operator(self) for child in self.children: child.operate(operator) return def populate(self, improvNode): """ _populate_ Extract details of this node from improvNode and instantiate and populate any children found """ self.unpackPayloadNodeData(improvNode) # // # // Recursively handle children #// childQ = IMProvQuery("/PayloadNode/PayloadNode") childNodes = childQ(improvNode) for item in childNodes: newChild = PayloadNode() self.addNode(newChild) newChild.populate(item) return def unpackPayloadNodeData(self, improvNode): """ _unpackPayloadNodeData_ Unpack PayloadNode data from improv Node provided and add information to self """ self.name = str(improvNode.attrs["Name"]) self.type = str(improvNode.attrs["Type"]) workflowName = improvNode.attrs.get('Workflow', None) if workflowName != None: self.workflow = str(workflowName) # // # // Unpack data for this instance #// App details appDataQ = IMProvQuery("/%s/Application" % self.__class__.__name__) appData = appDataQ(improvNode)[0] for appField in appData.children: field = str(appField.name) value = str(appField.attrs['Value']) self.application[field] = value # // # // App Control details #// appConDataQ = IMProvQuery("/%s/ApplicationControl/*" % self.__class__.__name__) appConData = appConDataQ(improvNode) for appConField in appConData: field = str(appConField.name) value = str(appConField.attrs['Value']) self.applicationControls[field] = value # // # // Script Controls #// scriptConQ = IMProvQuery("/%s/ScriptControls/ScriptList" % self.__class__.__name__) scriptLists = scriptConQ(improvNode) for scriptList in scriptLists: listName = scriptList.attrs.get("Name", None) if listName == None: continue listName = str(listName) for script in scriptList.children: scriptName = script.attrs.get("Value", None) if scriptName == None: continue self.scriptControls[listName].append(str(scriptName)) # // # // Dataset details #// Input Datasets inputDSQ = IMProvQuery("/%s/InputDatasets/DatasetInfo" % self.__class__.__name__) inputDS = inputDSQ(improvNode) # print improvNode for item in inputDS: newDS = DatasetInfo() newDS.load(item) self._InputDatasets.append(newDS) # // # // Output Datasets #// outputDSQ = IMProvQuery("/%s/OutputDatasets/DatasetInfo" % self.__class__.__name__) outputDS = outputDSQ(improvNode) for item in outputDS: newDS = DatasetInfo() newDS.load(item) self._OutputDatasets.append(newDS) # // # // Pileup Datasets #// pileupDSQ = IMProvQuery("/%s/PileupDatasets/DatasetInfo" % self.__class__.__name__) pileupDS = pileupDSQ(improvNode) for item in pileupDS: newDS = DatasetInfo() newDS.load(item) self._PileupDatasets.append(newDS) # // # // Input Links #// inpLinkQ = IMProvQuery("/%s/InputLinks/InputLink" % self.__class__.__name__) inpLinks = inpLinkQ(improvNode) for ilink in inpLinks: newLink = InputLink() newLink.load(ilink) self._InputLinks.append(newLink) # // # // Configuration #// configQ = IMProvQuery("/%s/Configuration" % self.__class__.__name__) configNodes = configQ(improvNode) if len(configNodes) > 0: configNode = configNodes[0] self.configuration = base64.decodestring(str(configNode.chardata)) cfgIntQ = IMProvQuery("/%s/CMSSWConfig" % self.__class__.__name__) cfgNodes = cfgIntQ(improvNode) if len(cfgNodes) > 0: cfgNode = cfgNodes[0] self.cfgInterface = CMSSWConfig() self.cfgInterface.load(cfgNode) # // # // User sandbox #// sandboxQ = IMProvQuery("/%s/UserSandbox" % self.__class__.__name__) sandboxNodes = sandboxQ(improvNode) if len(sandboxNodes) > 0: sandboxNode = sandboxNodes[-1] self.userSandbox = str(sandboxNode.chardata) return
def makeWorkflow(self, testInstance): """ _processTest_ Process a test, create a WorkflowSpec for it, generate job specs and add the, to the test instance """ loader = CMSSWAPILoader(testInstance['CMSSWArchitecture'], testInstance['CMSSWVersion'], testInstance['CMSPath']) loader.load() cfgWrapper = CMSSWConfig() process = pickle.load(file(testInstance['PickleFile'])) cfgInt = cfgWrapper.loadConfiguration(process) cfgInt.validateForProduction() cfgAsString = process.dumpPython() # // # // Get release validation PSet from process #// relValPSet = getattr(process, "ReleaseValidation", None) if relValPSet == None: msg = "Unable to extract ReleaseValidation PSet from pickled cfg for \n" msg += "%s\n" % testInstance['PickleFile'] logging.error(msg) return testName = getattr(relValPSet, "primaryDatasetName", None) testInstance['Name'] = testName.value() if testName == None: msg = "No primaryDatasetName parameter in ReleaseValidation PSet\n" msg += "%s\n" % testInstance['PickleFile'] logging.error(msg) return totalEvents = getattr(relValPSet, "totalNumberOfEvents", None) if totalEvents == None: msg = "No totalNumberOfEvents parameter in ReleaseValidation PSet\n" msg += "%s\n" % testInstance['PickleFile'] logging.error(msg) return testInstance['TotalEvents'] = totalEvents.value() eventsPerJob = getattr(relValPSet, "eventsPerJob", None) speedCat = getattr(relValPSet, "speedCategory", None) if (eventsPerJob == None) and (speedCat == None): msg = "ReleaseValidation PSet must contain one of either eventsPerJob or speedCategory\n" msg += "%s\n" % testInstance['PickleFile'] logging.error(msg) return if eventsPerJob != None: testInstance['EventsPerJob'] = eventsPerJob.value() else: testInstance['SpeedCategory'] = speedCat.value() if not self.args.has_key(testInstance['SpeedCategory']): msg = "Unknown Speed Category: %s\n" % testInstance['SpeedCategory'] msg += "In file: %s\n" % testInstance['PickleFile'] logging.error(msg) return testInstance['EventsPerJob'] = self.args[testInstance['SpeedCategory']] inputDataset = getattr(relValPSet, "inputDatasetPath", None) pileupDataset = getattr(relValPSet, "pileupDatasetPath", None) if pileupDataset != None: testInstance['PileupDataset'] = pileupDataset.value() if inputDataset != None: testInstance['InputDataset'] = inputDataset.value() msg = "Processing : %s\n" % testInstance['Name'] msg += "From Pickle: %s\n" % testInstance['PickleFile'] msg += "TotalEvents: %s\n" % testInstance['TotalEvents'] msg += "EventsPerJob: %s\n" % testInstance['EventsPerJob'] msg += "SpeedCategory: %s\n" % testInstance['SpeedCategory'] logging.info(msg) if self.workflows.has_key(testInstance['Name']): testInstance['WorkflowSpecId'] = self.workflows[testInstance['Name']] testInstance['WorkflowSpecFile'] = self.workflowFiles[testInstance['Name']] testInstance['WorkingDir'] = self.workingDirs[testInstance['Name']] loader.unload() return self.jobCounts[testInstance['Name']] = 1 workingDir = os.path.join(self.args['ComponentDir'], testInstance['CMSSWVersion'], testInstance['Name']) if not os.path.exists(workingDir): os.makedirs(workingDir) loader.unload() maker = WorkflowMaker(str(self.timestamp), testInstance['Name'], 'RelVal') maker.setCMSSWVersion(testInstance['CMSSWVersion']) maker.setPhysicsGroup("RelVal") maker.setConfiguration(cfgWrapper, Type = "instance") maker.setOriginalCfg(cfgAsString) psetHash = "NO_PSET_HASH" if cfgWrapper.configMetadata.has_key('PSetHash'): psetHash = cfgWrapper.configMetadata['PSetHash'] maker.setPSetHash(psetHash) maker.changeCategory("relval") if testInstance['SelectionEfficiency'] != None: selEff = float(testInstance['SelectionEfficiency'] ) maker.addSelectionEfficiency(selEff) if testInstance['PileupDataset'] != None: maker.addPileupDataset(testInstance['PileupDataset'], 100) if testInstance['InputDataset'] != None: maker.addInputDataset(testInstance['InputDataset']) maker.inputDataset["SplitType"] = "events" maker.inputDataset["SplitSize"] = testInstance['EventsPerJob'] spec = maker.makeWorkflow() spec.parameters['OnlySites'] = testInstance['Site'] spec.parameters['DBSURL'] = self.dbsUrl specFile = "/%s/%s-Workflow.xml" % (workingDir, maker.workflowName) spec.save(specFile) self.workflows[testInstance['Name']] = str(maker.workflowName) self.workflowFiles[testInstance['Name']] = specFile self.workingDirs[testInstance['Name']] = workingDir testInstance['WorkflowSpecId'] = str(maker.workflowName) testInstance['WorkflowSpecFile'] = specFile testInstance['WorkingDir'] = workingDir msg = "Workflow created for test: %s" % testInstance['Name'] logging.info(msg) msg = "Registering Workflow Entity: %s" % maker.workflowName logging.debug(msg) WEWorkflow.register( maker.workflowName, {"owner" : "RelValInjector", "workflow_spec_file" : specFile, }) msg = "Publishing NewWorkflow/NewDataset for \n" msg += " %s\n "% specFile logging.debug(msg) self.ms.publish("NewWorkflow", specFile) self.ms.publish("NewDataset", specFile) self.ms.commit() return
def __init__(self, cmsswConfigData, isString = False, appControls = {} ): self.template = cmsswConfigData self.appControls = appControls if isString == True: self.template = CMSSWConfig() self.template.unpack(cmsswConfigData)
class CfgGenerator: """ _CfgGenerator_ """ def __init__(self, cmsswConfigData, isString = False, appControls = {} ): self.template = cmsswConfigData self.appControls = appControls if isString == True: self.template = CMSSWConfig() self.template.unpack(cmsswConfigData) def __call__(self, jobName, **args): """ _operator()_ Insert per job information into a copy of the template CMSSWConfig object and return it """ newCfg = self.template.lightweightClone() # // # // Output modules first, use the module name in the #// parameters in case of multiple modules # // # // #// for modName in newCfg.outputModules.keys(): outModule = newCfg.getOutputModule(modName) outModule['catalog'] = "%s-%s-Output.xml" % (jobName, modName) outModule['fileName'] = "%s-%s.root" % (jobName, modName) outModule['logicalFileName'] = "%s-%s.root" % (jobName, modName) if outModule.has_key('LFNBase'): outModule['logicalFileName'] = "%s/%s" % ( outModule['LFNBase'], outModule['logicalFileName'] ) maxEvents = args.get("maxEvents", None) if maxEvents != None: selectionEff = self.appControls.get("SelectionEfficiency", None) evMultiplier = self.appControls.get("EventMultiplier", None) # // # // Adjust number of events for selection efficiency #// if selectionEff != None: newMaxEv = float(maxEvents) / float(selectionEff) maxEvents = int(newMaxEv) # // If this node has an Event Multiplier, adjust maxEvents #// if evMultiplier != None: maxEvents = int(maxEvents) * int(evMultiplier) newCfg.setInputMaxEvents(maxEvents) maxOutputEvents = args.get("maxEventsWritten", None) if maxOutputEvents != None: newCfg.setOutputMaxEvents(maxOutputEvents) skipEvents = args.get("skipEvents", None) if skipEvents != None: newCfg.sourceParams['skipEvents'] = skipEvents firstEvent = args.get("firstEvent", None) if firstEvent != None: newCfg.sourceParams['firstEvent'] = firstEvent firstRun = args.get("firstRun", None) if firstRun != None: newCfg.sourceParams['firstRun'] = firstRun firstLumi = args.get("firstLumi", None) if firstLumi != None: newCfg.sourceParams['firstLuminosityBlock'] = firstLumi fileNames = args.get("fileNames", None) if fileNames != None: #newCfg.inputFiles.extend(fileNames) newCfg.inputFiles = fileNames seeds = [ randomSeed() for i in range(0, newCfg.requiredSeeds+1)] newCfg.seeds = seeds return newCfg
def unpackPayloadNodeData(self, improvNode): """ _unpackPayloadNodeData_ Unpack PayloadNode data from improv Node provided and add information to self """ self.name = str(improvNode.attrs["Name"]) self.type = str(improvNode.attrs["Type"]) workflowName = improvNode.attrs.get("Workflow", None) if workflowName != None: self.workflow = str(workflowName) # // # // Unpack data for this instance # // App details appDataQ = IMProvQuery("/%s/Application" % self.__class__.__name__) appData = appDataQ(improvNode)[0] for appField in appData.children: field = str(appField.name) value = str(appField.attrs["Value"]) self.application[field] = value # // # // App Control details # // appConDataQ = IMProvQuery("/%s/ApplicationControl/*" % self.__class__.__name__) appConData = appConDataQ(improvNode) for appConField in appConData: field = str(appConField.name) value = str(appConField.attrs["Value"]) self.applicationControls[field] = value # // # // Script Controls # // scriptConQ = IMProvQuery("/%s/ScriptControls/ScriptList" % self.__class__.__name__) scriptLists = scriptConQ(improvNode) for scriptList in scriptLists: listName = scriptList.attrs.get("Name", None) if listName == None: continue listName = str(listName) for script in scriptList.children: scriptName = script.attrs.get("Value", None) if scriptName == None: continue self.scriptControls[listName].append(str(scriptName)) # // # // Dataset details # // Input Datasets inputDSQ = IMProvQuery("/%s/InputDatasets/DatasetInfo" % self.__class__.__name__) inputDS = inputDSQ(improvNode) # print improvNode for item in inputDS: newDS = DatasetInfo() newDS.load(item) self._InputDatasets.append(newDS) # // # // Output Datasets # // outputDSQ = IMProvQuery("/%s/OutputDatasets/DatasetInfo" % self.__class__.__name__) outputDS = outputDSQ(improvNode) for item in outputDS: newDS = DatasetInfo() newDS.load(item) self._OutputDatasets.append(newDS) # // # // Pileup Datasets # // pileupDSQ = IMProvQuery("/%s/PileupDatasets/DatasetInfo" % self.__class__.__name__) pileupDS = pileupDSQ(improvNode) for item in pileupDS: newDS = DatasetInfo() newDS.load(item) self._PileupDatasets.append(newDS) # // # // Input Links # // inpLinkQ = IMProvQuery("/%s/InputLinks/InputLink" % self.__class__.__name__) inpLinks = inpLinkQ(improvNode) for ilink in inpLinks: newLink = InputLink() newLink.load(ilink) self._InputLinks.append(newLink) # // # // Configuration # // configQ = IMProvQuery("/%s/Configuration" % self.__class__.__name__) configNodes = configQ(improvNode) if len(configNodes) > 0: configNode = configNodes[0] self.configuration = base64.decodestring(str(configNode.chardata)) cfgIntQ = IMProvQuery("/%s/CMSSWConfig" % self.__class__.__name__) cfgNodes = cfgIntQ(improvNode) if len(cfgNodes) > 0: cfgNode = cfgNodes[0] self.cfgInterface = CMSSWConfig() self.cfgInterface.load(cfgNode) # // # // User sandbox # // sandboxQ = IMProvQuery("/%s/UserSandbox" % self.__class__.__name__) sandboxNodes = sandboxQ(improvNode) if len(sandboxNodes) > 0: sandboxNode = sandboxNodes[-1] self.userSandbox = str(sandboxNode.chardata) return
class PayloadNode: """ _PayloadNode_ Abstract Application entry in a tree like workflow model """ def __init__(self, name=None): self.children = [] self.parent = None self.name = None self.workflow = None if name != None: self.name = name self.type = None self.application = {} self.application.setdefault("Project", None) self.application.setdefault("Version", None) self.application.setdefault("Architecture", None) self.application.setdefault("Executable", None) self.applicationControls = {} self.applicationControls.setdefault("EventMultiplier", None) self.applicationControls.setdefault("SelectionEfficiency", None) self.applicationControls.setdefault("PerRunFraction", None) # // # // These lists are deprecated and are maintained here # // for backwards compatibility for short term self.inputDatasets = [] self.outputDatasets = [] self.scriptControls = {} self.scriptControls.setdefault("PreTask", []) self.scriptControls.setdefault("PreExe", []) self.scriptControls.setdefault("PostExe", []) self.scriptControls.setdefault("PostTask", []) # // # // Dataset information is stored as DatasetInfo objects # // self._InputDatasets = [] self._OutputDatasets = [] self._PileupDatasets = [] self.configuration = "" self.cfgInterface = None self.userSandbox = None # // # // Input Links to other nodes # // self._InputLinks = [] def newNode(self, name): """ _newNode_ Create a new PayloadNode that is a child to this node and return it so that it can be configured. New Node name must be unique within the tree or it will barf """ newNode = PayloadNode() newNode.name = name self.addNode(newNode) return newNode def addInputDataset(self, primaryDS, processedDS): """ _addInputDataset_ Add a new Input Dataset to this Node. Arguments should be: - *primaryDS* : The Primary Dataset name of the input dataset - *processedDS* : The Processed Dataset name of the input dataset The DatasetInfo object is returned by reference for more information to be added to it InputModuleName should be the mainInputSource of the PSet for the main input dataset. At present this is set elsewhere """ newDataset = DatasetInfo() newDataset["PrimaryDataset"] = primaryDS newDataset["ProcessedDataset"] = processedDS self._InputDatasets.append(newDataset) return newDataset def addPileupDataset(self, primary, tier, processed): """ _addPileupDataset_ Add a pileup dataset to this node """ newDataset = DatasetInfo() newDataset["PrimaryDataset"] = primary newDataset["DataTier"] = tier newDataset["ProcessedDataset"] = processed self._PileupDatasets.append(newDataset) return newDataset def addOutputDataset(self, primaryDS, processedDS, outputModuleName): """ _addOutputDataset_ Add a new Output Dataset, specifying the Primary and Processed Dataset names and the name of the output module in the PSet responsible for writing out files for that dataset """ newDataset = DatasetInfo() newDataset["PrimaryDataset"] = primaryDS newDataset["ProcessedDataset"] = processedDS newDataset["OutputModuleName"] = outputModuleName self._OutputDatasets.append(newDataset) return newDataset def addInputLink( self, nodeName, nodeOutputModName, thisNodeSourceName=None, AppearStandalone=False, skipCfgCheck=False ): """ _addInputLink_ Add an input link between this node and another node above it in the tree. This means that output from the named output module of the node will be linked to the source on this node. If a source name is not provided, the main source will be used """ # // # // Safety checks # // 1. Node name must exist if nodeName not in listAllNames(self): msg = "Error adding input link: Node named %s " % nodeName msg += "Does not exist in the node tree" raise RuntimeError, msg # // # // 2. Must be above this node. IE not in nodes descended from # // this node if nodeName in self.listDescendantNames(): msg = "Error adding input link: Node named %s \n" % nodeName msg += "Is below node %s in the tree\n" % self.name msg += "%s will run before %s\n" % (self.name, nodeName) raise RuntimeError, msg # // # // TODO: Check if named source is present # // link = InputLink( InputNode=nodeName, InputSource=thisNodeSourceName, OutputModule=nodeOutputModName, AppearStandalone=AppearStandalone, ) self._InputLinks.append(link) return def addNode(self, nodeInstance): """ _addNode_ Add a child node to this node nodeInstance must be an instance of PayloadNode """ if not isinstance(nodeInstance, PayloadNode): msg = "Argument supplied to addNode is not a PayloadNode instance" raise RuntimeError, msg dupes = intersection(listAllNames(self), listAllNames(nodeInstance)) if len(dupes) > 0: msg = "Duplicate Names already exist in parent tree:\n" msg += "The following names already exist in the parent tree:\n" for dupe in dupes: msg += " %s\n" % dupe msg += "Each PayloadNode within the tree must " msg += "have a unique name\n" raise RuntimeError, msg self.children.append(nodeInstance) nodeInstance.workflow = self.workflow nodeInstance.parent = self return def listDescendantNames(self, result=None): """ _listDescendantNames_ return a list of all names of nodes below this node recursively traversing children """ if result == None: result = [] result.append(self.name) for child in self.children: result = child.listDescendantNames(result) return result def makeIMProv(self): """ _makeIMProv_ Serialise self and children into an XML DOM friendly node structure """ node = IMProvNode( self.__class__.__name__, None, Name=str(self.name), Type=str(self.type), Workflow=str(self.workflow) ) appNode = IMProvNode("Application") for key, val in self.application.items(): appNode.addNode(IMProvNode(key, None, Value=val)) appConNode = IMProvNode("ApplicationControl") for key, val in self.applicationControls.items(): if val == None: continue appConNode.addNode(IMProvNode(key, None, Value=val)) inputNode = IMProvNode("InputDatasets") for inpDS in self._InputDatasets: inputNode.addNode(inpDS.save()) outputNode = IMProvNode("OutputDatasets") for outDS in self._OutputDatasets: outputNode.addNode(outDS.save()) pileupNode = IMProvNode("PileupDatasets") for puDS in self._PileupDatasets: pileupNode.addNode(puDS.save()) inpLinksNode = IMProvNode("InputLinks") for iLink in self._InputLinks: inpLinksNode.addNode(iLink.save()) scriptsNode = IMProvNode("ScriptControls") for key, scriptList in self.scriptControls.items(): scriptListNode = IMProvNode("ScriptList", None, Name=key) [scriptListNode.addNode(IMProvNode("Script", None, Value=x)) for x in scriptList] scriptsNode.addNode(scriptListNode) if self.cfgInterface == None: configNode = IMProvNode("Configuration", base64.encodestring(self.configuration), Encoding="base64") else: configNode = self.cfgInterface.save() node.addNode(appNode) node.addNode(appConNode) node.addNode(scriptsNode) node.addNode(inputNode) node.addNode(outputNode) node.addNode(pileupNode) node.addNode(inpLinksNode) node.addNode(configNode) if self.userSandbox != None: sandboxNode = IMProvNode("UserSandbox", self.userSandbox) node.addNode(sandboxNode) for child in self.children: node.addNode(child.makeIMProv()) return node def __str__(self): """string rep for easy inspection""" return str(self.makeIMProv()) def operate(self, operator): """ _operate_ Recursive callable operation over a payloadNode tree starting from this node. operator must be a callable object or function, that accepts a single argument, that argument being the current node being operated on. """ operator(self) for child in self.children: child.operate(operator) return def populate(self, improvNode): """ _populate_ Extract details of this node from improvNode and instantiate and populate any children found """ self.unpackPayloadNodeData(improvNode) # // # // Recursively handle children # // childQ = IMProvQuery("/PayloadNode/PayloadNode") childNodes = childQ(improvNode) for item in childNodes: newChild = PayloadNode() self.addNode(newChild) newChild.populate(item) return def unpackPayloadNodeData(self, improvNode): """ _unpackPayloadNodeData_ Unpack PayloadNode data from improv Node provided and add information to self """ self.name = str(improvNode.attrs["Name"]) self.type = str(improvNode.attrs["Type"]) workflowName = improvNode.attrs.get("Workflow", None) if workflowName != None: self.workflow = str(workflowName) # // # // Unpack data for this instance # // App details appDataQ = IMProvQuery("/%s/Application" % self.__class__.__name__) appData = appDataQ(improvNode)[0] for appField in appData.children: field = str(appField.name) value = str(appField.attrs["Value"]) self.application[field] = value # // # // App Control details # // appConDataQ = IMProvQuery("/%s/ApplicationControl/*" % self.__class__.__name__) appConData = appConDataQ(improvNode) for appConField in appConData: field = str(appConField.name) value = str(appConField.attrs["Value"]) self.applicationControls[field] = value # // # // Script Controls # // scriptConQ = IMProvQuery("/%s/ScriptControls/ScriptList" % self.__class__.__name__) scriptLists = scriptConQ(improvNode) for scriptList in scriptLists: listName = scriptList.attrs.get("Name", None) if listName == None: continue listName = str(listName) for script in scriptList.children: scriptName = script.attrs.get("Value", None) if scriptName == None: continue self.scriptControls[listName].append(str(scriptName)) # // # // Dataset details # // Input Datasets inputDSQ = IMProvQuery("/%s/InputDatasets/DatasetInfo" % self.__class__.__name__) inputDS = inputDSQ(improvNode) # print improvNode for item in inputDS: newDS = DatasetInfo() newDS.load(item) self._InputDatasets.append(newDS) # // # // Output Datasets # // outputDSQ = IMProvQuery("/%s/OutputDatasets/DatasetInfo" % self.__class__.__name__) outputDS = outputDSQ(improvNode) for item in outputDS: newDS = DatasetInfo() newDS.load(item) self._OutputDatasets.append(newDS) # // # // Pileup Datasets # // pileupDSQ = IMProvQuery("/%s/PileupDatasets/DatasetInfo" % self.__class__.__name__) pileupDS = pileupDSQ(improvNode) for item in pileupDS: newDS = DatasetInfo() newDS.load(item) self._PileupDatasets.append(newDS) # // # // Input Links # // inpLinkQ = IMProvQuery("/%s/InputLinks/InputLink" % self.__class__.__name__) inpLinks = inpLinkQ(improvNode) for ilink in inpLinks: newLink = InputLink() newLink.load(ilink) self._InputLinks.append(newLink) # // # // Configuration # // configQ = IMProvQuery("/%s/Configuration" % self.__class__.__name__) configNodes = configQ(improvNode) if len(configNodes) > 0: configNode = configNodes[0] self.configuration = base64.decodestring(str(configNode.chardata)) cfgIntQ = IMProvQuery("/%s/CMSSWConfig" % self.__class__.__name__) cfgNodes = cfgIntQ(improvNode) if len(cfgNodes) > 0: cfgNode = cfgNodes[0] self.cfgInterface = CMSSWConfig() self.cfgInterface.load(cfgNode) # // # // User sandbox # // sandboxQ = IMProvQuery("/%s/UserSandbox" % self.__class__.__name__) sandboxNodes = sandboxQ(improvNode) if len(sandboxNodes) > 0: sandboxNode = sandboxNodes[-1] self.userSandbox = str(sandboxNode.chardata) return
# // # // cfg python parser from CMSSW #// print "CMSSW python parser on %s \n ....it can take a while..."%cfgFile from FWCore.ParameterSet.Config import include from FWCore.ParameterSet.parsecf.pyparsing import * try: cmsCfg = include(cfgFile) except ParseException, ex: print "Error in CMSSW python parser: ParseException \n %s \n"%ex continue except ParseFatalException, ex: print "Error in CMSSW python parser: ParseFatalException \n %s \n"%ex continue cfgWrapper = CMSSWConfig() cfgWrapper.originalCfg = file(cfgFile).read() cfgInt = cfgWrapper.loadConfiguration(cmsCfg) cfgInt.validateForProduction() if testPythonMode: print "Test Python Mode:" print "python cfg parser successful for %s"% prodName print "EdmConfigHash successful for %s" % prodName # print "Python Config File: %s" % pycfgFile print "Hash: %s" % RealPSetHash continue if not workflowsOnly: # use MessageService
def __init__(self, cmsswConfigData, isString=False, appControls={}): self.template = cmsswConfigData self.appControls = appControls if isString == True: self.template = CMSSWConfig() self.template.unpack(cmsswConfigData)
class CfgGenerator: """ _CfgGenerator_ """ def __init__(self, cmsswConfigData, isString=False, appControls={}): self.template = cmsswConfigData self.appControls = appControls if isString == True: self.template = CMSSWConfig() self.template.unpack(cmsswConfigData) def __call__(self, jobName, **args): """ _operator()_ Insert per job information into a copy of the template CMSSWConfig object and return it """ newCfg = self.template.lightweightClone() # // # // Output modules first, use the module name in the #// parameters in case of multiple modules # // # // #// for modName in newCfg.outputModules.keys(): outModule = newCfg.getOutputModule(modName) outModule['catalog'] = "%s-%s-Output.xml" % (jobName, modName) outModule['fileName'] = "%s-%s.root" % (jobName, modName) outModule['logicalFileName'] = "%s-%s.root" % (jobName, modName) if outModule.has_key('LFNBase'): outModule['logicalFileName'] = "%s/%s" % ( outModule['LFNBase'], outModule['logicalFileName']) maxEvents = args.get("maxEvents", None) if maxEvents != None: selectionEff = self.appControls.get("SelectionEfficiency", None) evMultiplier = self.appControls.get("EventMultiplier", None) # // # // Adjust number of events for selection efficiency #// if selectionEff != None: newMaxEv = float(maxEvents) / float(selectionEff) maxEvents = int(newMaxEv) # // If this node has an Event Multiplier, adjust maxEvents #// if evMultiplier != None: maxEvents = int(maxEvents) * int(evMultiplier) newCfg.setInputMaxEvents(maxEvents) maxOutputEvents = args.get("maxEventsWritten", None) if maxOutputEvents != None: newCfg.setOutputMaxEvents(maxOutputEvents) skipEvents = args.get("skipEvents", None) if skipEvents != None: newCfg.sourceParams['skipEvents'] = skipEvents firstEvent = args.get("firstEvent", None) if firstEvent != None: newCfg.sourceParams['firstEvent'] = firstEvent firstRun = args.get("firstRun", None) if firstRun != None: newCfg.sourceParams['firstRun'] = firstRun firstLumi = args.get("firstLumi", None) if firstLumi != None: newCfg.sourceParams['firstLuminosityBlock'] = firstLumi fileNames = args.get("fileNames", None) if fileNames != None: #newCfg.inputFiles.extend(fileNames) newCfg.inputFiles = fileNames seeds = [randomSeed() for i in range(0, newCfg.requiredSeeds + 1)] newCfg.seeds = seeds return newCfg