def modifyTaskConfiguration(self, taskConf, firstTask=False, generator=False): """ _modifyTaskConfiguration_ Modify the TaskConfiguration according to the specifications in getWorkloadArguments and getTaskArguments. It does type casting and assigns default values. """ baseArguments = self.getWorkloadArguments() for argument in baseArguments: if argument in taskConf: taskConf[argument] = baseArguments[argument]["type"](taskConf[argument]) taskArguments = self.getTaskArguments(firstTask, generator) for argument in taskArguments: if argument not in taskConf: taskConf[argument] = taskArguments[argument]["default"] else: taskConf[argument] = taskArguments[argument]["type"](taskConf[argument]) taskConf["PileupConfig"] = parsePileupConfig(taskConf["MCPileup"], taskConf["DataPileup"]) if firstTask: self.modifyJobSplitting(taskConf, generator) return
def testPileupFetcherOnMC(self): pileupMcArgs = MonteCarloWorkloadFactory.getTestArguments() pileupMcArgs["MCPileup"] = "/Cosmics/ComissioningHI-PromptReco-v1/RECO" pileupMcArgs["DataPileup"] = "/HighPileUp/Run2011A-v1/RAW" pileupMcArgs["CouchURL"] = os.environ["COUCHURL"] pileupMcArgs["CouchDBName"] = "pileupfetcher_t" pileupMcArgs["ConfigCacheID"] = self.injectGenerationConfig() factory = MonteCarloWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", pileupMcArgs) # now that the workload was created and args validated, we can add this PileupConfig pileupMcArgs["PileupConfig"] = parsePileupConfig( pileupMcArgs["MCPileup"], pileupMcArgs["DataPileup"]) # Since this is test of the fetcher - The loading from WMBS isn't # really necessary because the fetching happens before the workflow # is inserted into WMBS: feed the workload instance directly into fetcher: fetcher = PileupFetcher() creator = SandboxCreator() pathBase = "%s/%s" % (self.testDir, testWorkload.name()) for topLevelTask in testWorkload.taskIterator(): for taskNode in topLevelTask.nodeIterator(): # this is how the call to PileupFetcher is happening # from the SandboxCreator test task = WMTask.WMTaskHelper(taskNode) taskPath = "%s/WMSandbox/%s" % (pathBase, task.name()) fetcher.setWorkingDirectory(taskPath) # create Sandbox for the fetcher ... creator._makePathonPackage(taskPath) fetcher(task) self._queryPileUpConfigFile(pileupMcArgs, task, taskPath)
def __call__(self, workloadName, arguments): """ _call_ Create a ReDigi workload with the given parameters. """ DataProcessing.__call__(self, workloadName, arguments) # Transform the pileup as required by the CMSSW step self.pileupConfig = parsePileupConfig(self.mcPileup, self.dataPileup) # Adjust the pileup splitting self.procJobSplitArgs.setdefault("deterministicPileup", self.deterministicPileup) # Adjust the sizePerEvent, timePerEvent and memory for step two and three if self.stepTwoTimePerEvent is None: self.stepTwoTimePerEvent = self.timePerEvent if self.stepTwoSizePerEvent is None: self.stepTwoSizePerEvent = self.sizePerEvent if self.stepTwoMemory is None: self.stepTwoMemory = self.memory if self.stepThreeTimePerEvent is None: self.stepThreeTimePerEvent = self.timePerEvent if self.stepThreeSizePerEvent is None: self.stepThreeSizePerEvent = self.sizePerEvent if self.stepThreeMemory is None: self.stepThreeMemory = self.memory return self.buildWorkload()
def modifyTaskConfiguration(self, taskConf, firstTask=False, generator=False): """ _modifyTaskConfiguration_ Modify the TaskConfiguration according to the specifications in getWorkloadCreateArgs and getChainCreateArgs. It does type casting and assigns default values. """ baseArguments = self.getWorkloadCreateArgs() for argument in baseArguments: if argument in taskConf: taskConf[argument] = baseArguments[argument]["type"]( taskConf[argument]) taskArguments = self.getChainCreateArgs(firstTask, generator) for argument in taskArguments: if argument not in taskConf: taskConf[argument] = taskArguments[argument]["default"] else: taskConf[argument] = taskArguments[argument]["type"]( taskConf[argument]) taskConf["PileupConfig"] = parsePileupConfig(taskConf["MCPileup"], taskConf["DataPileup"]) if firstTask: self.modifyJobSplitting(taskConf, generator) return
def modifyTaskConfiguration(self, taskConf, firstTask=False, generator=False): """ _modifyTaskConfiguration_ Modify the TaskConfiguration according to the specifications in getWorkloadArguments and getTaskArguments. It does type casting and assigns default values. """ taskArguments = self.getTaskArguments(firstTask, generator) for argument in taskArguments: if argument not in taskConf: taskConf[argument] = taskArguments[argument]["default"] else: taskConf[argument] = taskArguments[argument]["type"]( taskConf[argument]) baseArguments = self.getWorkloadArguments() for argument in baseArguments: if argument in taskConf: taskConf[argument] = baseArguments[argument]["type"]( taskConf[argument]) if generator: taskConf["SplittingAlgo"] = "EventBased" # Adjust totalEvents according to the filter efficiency taskConf["RequestNumEvents"] = int(taskConf.get("RequestNumEvents", 0) / \ taskConf.get("FilterEfficiency")) taskConf["SizePerEvent"] = taskConf.get("SizePerEvent", self.sizePerEvent) * \ taskConf.get("FilterEfficiency") if taskConf["EventsPerJob"] is None: taskConf["EventsPerJob"] = (8.0 * 3600.0) / (taskConf.get( "TimePerEvent", self.timePerEvent)) if taskConf["EventsPerLumi"] is None: taskConf["EventsPerLumi"] = taskConf["EventsPerJob"] taskConf["SplittingArguments"] = {} if taskConf["SplittingAlgo"] == "EventBased" or taskConf[ "SplittingAlgo"] == "EventAwareLumiBased": taskConf["SplittingArguments"]["events_per_job"] = taskConf[ "EventsPerJob"] if taskConf["SplittingAlgo"] == "EventAwareLumiBased": taskConf["SplittingArguments"]["max_events_per_lumi"] = 20000 else: taskConf["SplittingArguments"]["events_per_lumi"] = taskConf[ "EventsPerLumi"] taskConf["SplittingArguments"]["lheInputFiles"] = taskConf[ "LheInputFiles"] elif taskConf["SplittingAlgo"] == "LumiBased": taskConf["SplittingArguments"]["lumis_per_job"] = taskConf[ "LumisPerJob"] elif taskConf["SplittingAlgo"] == "FileBased": taskConf["SplittingArguments"]["files_per_job"] = taskConf[ "FilesPerJob"] taskConf["PileupConfig"] = parsePileupConfig(taskConf["MCPileup"], taskConf["DataPileup"]) return
def modifyTaskConfiguration(self, taskConf, firstTask=False, generator=False): """ _modifyTaskConfiguration_ Modify the TaskConfiguration according to the specifications in getWorkloadCreateArgs and getChainCreateArgs. It does type casting and assigns default values if key is not present, unless default value is None. """ taskArguments = self.getChainCreateArgs(firstTask, generator) for argument in taskArguments: if argument not in taskConf and taskArguments[argument][ "default"] is not None: taskConf[argument] = taskArguments[argument]["default"] elif argument in taskConf: taskConf[argument] = taskArguments[argument]["type"]( taskConf[argument]) if generator: taskConf["SplittingAlgo"] = "EventBased" # Adjust totalEvents according to the filter efficiency taskConf["RequestNumEvents"] = int(taskConf.get("RequestNumEvents", 0) / \ taskConf.get("FilterEfficiency")) taskConf["SizePerEvent"] = taskConf.get("SizePerEvent", self.sizePerEvent) * \ taskConf.get("FilterEfficiency") taskConf["SplittingArguments"] = {} if taskConf["SplittingAlgo"] in ["EventBased", "EventAwareLumiBased"]: if taskConf.get("EventsPerJob") is None: taskConf["EventsPerJob"] = int( (8.0 * 3600.0) / taskConf.get("TimePerEvent", self.timePerEvent)) if taskConf.get("EventsPerLumi") is None: taskConf["EventsPerLumi"] = taskConf["EventsPerJob"] taskConf["SplittingArguments"]["events_per_job"] = taskConf[ "EventsPerJob"] if taskConf["SplittingAlgo"] == "EventAwareLumiBased": taskConf["SplittingArguments"]["max_events_per_lumi"] = 20000 else: taskConf["SplittingArguments"]["events_per_lumi"] = taskConf[ "EventsPerLumi"] taskConf["SplittingArguments"]["lheInputFiles"] = taskConf[ "LheInputFiles"] elif taskConf["SplittingAlgo"] == "LumiBased": taskConf["SplittingArguments"]["lumis_per_job"] = taskConf[ "LumisPerJob"] elif taskConf["SplittingAlgo"] == "FileBased": taskConf["SplittingArguments"]["files_per_job"] = taskConf[ "FilesPerJob"] taskConf["PileupConfig"] = parsePileupConfig( taskConf.get("MCPileup"), taskConf.get("DataPileup")) # Adjust the pileup splitting taskConf["SplittingArguments"].setdefault( "deterministicPileup", taskConf['DeterministicPileup']) return
def testPileupFetcherOnMC(self): pileupMcArgs = TaskChainWorkloadFactory.getTestArguments() pileupMcArgs['Task1']["MCPileup"] = "/Cosmics/ComissioningHI-PromptReco-v1/RECO" pileupMcArgs['Task1']["DataPileup"] = "/HighPileUp/Run2011A-v1/RAW" pileupMcArgs['Task1']["ConfigCacheID"] = self.injectGenerationConfig() pileupMcArgs["CouchDBName"] = "pileupfetcher_t" pileupMcArgs["CouchURL"] = os.environ["COUCHURL"] factory = TaskChainWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", pileupMcArgs) # now that the workload was created and args validated, we can add this PileupConfig pileupMcArgs["PileupConfig"] = parsePileupConfig(pileupMcArgs['Task1']["MCPileup"], pileupMcArgs['Task1']["DataPileup"]) # Since this is test of the fetcher - The loading from WMBS isn't # really necessary because the fetching happens before the workflow # is inserted into WMBS: feed the workload instance directly into fetcher: fetcher = PileupFetcher() creator = SandboxCreator() pathBase = "%s/%s" % (self.testDir, testWorkload.name()) for topLevelTask in testWorkload.taskIterator(): for taskNode in topLevelTask.nodeIterator(): # this is how the call to PileupFetcher is happening # from the SandboxCreator test task = WMTask.WMTaskHelper(taskNode) taskPath = "%s/WMSandbox/%s" % (pathBase, task.name()) fetcher.setWorkingDirectory(taskPath) # create Sandbox for the fetcher ... creator._makePathonPackage(taskPath) fetcher(task) self._queryPileUpConfigFile(pileupMcArgs, task, taskPath)
def modifyTaskConfiguration(self, taskConf, firstTask = False, generator = False): """ _modifyTaskConfiguration_ Modify the TaskConfiguration according to the specifications in getWorkloadArguments and getTaskArguments. It does type casting and assigns default values. """ taskArguments = self.getTaskArguments(firstTask, generator) for argument in taskArguments: if argument not in taskConf: taskConf[argument] = taskArguments[argument]["default"] else: taskConf[argument] = taskArguments[argument]["type"](taskConf[argument]) baseArguments = self.getWorkloadArguments() for argument in baseArguments: if argument in taskConf: taskConf[argument] = baseArguments[argument]["type"](taskConf[argument]) if taskConf["EventsPerJob"] is None: taskConf["EventsPerJob"] = (8.0 * 3600.0)/(taskConf.get("TimePerEvent", self.timePerEvent)) if generator: taskConf["SplittingAlgo"] = "EventBased" taskConf["SplittingArguments"] = {} if taskConf["SplittingAlgo"] == "EventBased" or taskConf["SplittingAlgo"] == "EventAwareLumiBased": taskConf["SplittingArguments"]["events_per_job"] = taskConf["EventsPerJob"] if taskConf["SplittingAlgo"] == "EventAwareLumiBased": taskConf["SplittingArguments"]["max_events_per_lumi"] = 20000 elif taskConf["SplittingAlgo"] == "LumiBased": taskConf["SplittingArguments"]["lumis_per_job"] = taskConf["LumisPerJob"] elif taskConf["SplittingAlgo"] == "FileBased": taskConf["SplittingArguments"]["files_per_job"] = taskConf["FilesPerJob"] taskConf["PileupConfig"] = parsePileupConfig(taskConf["MCPileup"], taskConf["DataPileup"]) return
def setupNextSteps(self, task, origArgs): """ _setupNextSteps_ Modify the step one task to include N more CMSSW steps and chain the output between all three steps. """ configCacheUrl = self.configCacheUrl or self.couchURL stepMapping = {} stepMapping.setdefault(origArgs['Step1']['StepName'], ('Step1', 'cmsRun1')) for i in range(2, self.stepChain + 1): currentStepNumber = "Step%d" % i currentCmsRun = "cmsRun%d" % i stepMapping.setdefault(origArgs[currentStepNumber]['StepName'], (currentStepNumber, currentCmsRun)) taskConf = {} for k, v in origArgs[currentStepNumber].iteritems(): taskConf[k] = v parentStepNumber = stepMapping.get(taskConf['InputStep'])[0] parentCmsRun = stepMapping.get(taskConf['InputStep'])[1] parentCmsswStep = task.getStep(parentCmsRun) parentCmsswStepHelper = parentCmsswStep.getTypeHelper() # Set default values for the task parameters self.modifyTaskConfiguration(taskConf, False, 'InputDataset' not in taskConf) globalTag = taskConf.get("GlobalTag", self.globalTag) frameworkVersion = taskConf.get("CMSSWVersion", self.frameworkVersion) scramArch = taskConf.get("ScramArch", self.scramArch) childCmssw = parentCmsswStep.addTopStep(currentCmsRun) childCmssw.setStepType("CMSSW") template = StepFactory.getStepTemplate("CMSSW") template(childCmssw.data) childCmsswStepHelper = childCmssw.getTypeHelper() childCmsswStepHelper.setGlobalTag(globalTag) childCmsswStepHelper.setupChainedProcessing(parentCmsRun, taskConf['InputFromOutputModule']) childCmsswStepHelper.cmsswSetup(frameworkVersion, softwareEnvironment="", scramArch=scramArch) childCmsswStepHelper.setConfigCache(configCacheUrl, taskConf['ConfigCacheID'], self.couchDBName) # Pileup check taskConf["PileupConfig"] = parsePileupConfig(taskConf["MCPileup"], taskConf["DataPileup"]) if taskConf["PileupConfig"]: self.setupPileup(task, taskConf['PileupConfig']) # Handling the output modules parentKeepOutput = strToBool(origArgs[parentStepNumber].get('KeepOutput', True)) parentCmsswStepHelper.keepOutput(parentKeepOutput) childKeepOutput = strToBool(taskConf.get('KeepOutput', True)) childCmsswStepHelper.keepOutput(childKeepOutput) self.setupOutputModules(task, taskConf["ConfigCacheID"], currentCmsRun, childKeepOutput, taskConf['StepName']) # Closing out the task configuration. The last step output must be saved/merged childCmsswStepHelper.keepOutput(True) return
def modifyTaskConfiguration(self, taskConf, firstTask=False, generator=False): """ _modifyTaskConfiguration_ Modify the TaskConfiguration according to the specifications in getWorkloadCreateArgs and getChainCreateArgs. It does type casting and assigns default values if key is not present, unless default value is None. """ taskArguments = self.getChainCreateArgs(firstTask, generator) for argument in taskArguments: if argument not in taskConf and taskArguments[argument]["default"] is not None: taskConf[argument] = taskArguments[argument]["default"] elif argument in taskConf: taskConf[argument] = taskArguments[argument]["type"](taskConf[argument]) if generator: taskConf["SplittingAlgo"] = "EventBased" # Adjust totalEvents according to the filter efficiency taskConf["RequestNumEvents"] = int(taskConf.get("RequestNumEvents", 0) / \ taskConf.get("FilterEfficiency")) taskConf["SizePerEvent"] = taskConf.get("SizePerEvent", self.sizePerEvent) * \ taskConf.get("FilterEfficiency") taskConf["SplittingArguments"] = {} if taskConf["SplittingAlgo"] in ["EventBased", "EventAwareLumiBased"]: taskConf["EventsPerJob"], taskConf["EventsPerLumi"] = StdBase.calcEvtsPerJobLumi(taskConf.get("EventsPerJob"), taskConf.get("EventsPerLumi"), taskConf.get("TimePerEvent", self.timePerEvent), taskConf.get("RequestNumEvents")) if firstTask: self.eventsPerJob = taskConf["EventsPerJob"] self.eventsPerLumi = taskConf["EventsPerLumi"] taskConf["SplittingArguments"]["events_per_job"] = taskConf["EventsPerJob"] if taskConf["SplittingAlgo"] == "EventBased": taskConf["SplittingArguments"]["events_per_lumi"] = taskConf["EventsPerLumi"] else: taskConf["SplittingArguments"]["job_time_limit"] = 48 * 3600 # 2 days taskConf["SplittingArguments"]["lheInputFiles"] = taskConf["LheInputFiles"] elif taskConf["SplittingAlgo"] == "LumiBased": taskConf["SplittingArguments"]["lumis_per_job"] = taskConf["LumisPerJob"] elif taskConf["SplittingAlgo"] == "FileBased": taskConf["SplittingArguments"]["files_per_job"] = taskConf["FilesPerJob"] taskConf["SplittingArguments"].setdefault("include_parents", taskConf['IncludeParents']) taskConf["PileupConfig"] = parsePileupConfig(taskConf.get("MCPileup"), taskConf.get("DataPileup")) # Adjust the pileup splitting taskConf["SplittingArguments"].setdefault("deterministicPileup", taskConf['DeterministicPileup']) return
def modifyTaskConfiguration(self, taskConf, firstTask=False, generator=False): """ _modifyTaskConfiguration_ Modify the TaskConfiguration according to the specifications in getWorkloadArguments and getTaskArguments. It does type casting and assigns default values. """ taskArguments = self.getTaskArguments(firstTask, generator) for argument in taskArguments: if argument not in taskConf: taskConf[argument] = taskArguments[argument]["default"] else: taskConf[argument] = taskArguments[argument]["type"](taskConf[argument]) baseArguments = self.getWorkloadArguments() for argument in baseArguments: if argument in taskConf: taskConf[argument] = baseArguments[argument]["type"](taskConf[argument]) if generator: taskConf["SplittingAlgo"] = "EventBased" # Adjust totalEvents according to the filter efficiency taskConf["RequestNumEvents"] = int(taskConf.get("RequestNumEvents", 0) / \ taskConf.get("FilterEfficiency")) taskConf["SizePerEvent"] = taskConf.get("SizePerEvent", self.sizePerEvent) * \ taskConf.get("FilterEfficiency") if taskConf["EventsPerJob"] is None: taskConf["EventsPerJob"] = int((8.0 * 3600.0) / (taskConf.get("TimePerEvent", self.timePerEvent))) if taskConf["EventsPerLumi"] is None: taskConf["EventsPerLumi"] = taskConf["EventsPerJob"] taskConf["SplittingArguments"] = {} if taskConf["SplittingAlgo"] == "EventBased" or taskConf["SplittingAlgo"] == "EventAwareLumiBased": taskConf["SplittingArguments"]["events_per_job"] = taskConf["EventsPerJob"] if taskConf["SplittingAlgo"] == "EventAwareLumiBased": taskConf["SplittingArguments"]["max_events_per_lumi"] = 20000 else: taskConf["SplittingArguments"]["events_per_lumi"] = taskConf["EventsPerLumi"] taskConf["SplittingArguments"]["lheInputFiles"] = taskConf["LheInputFiles"] elif taskConf["SplittingAlgo"] == "LumiBased": taskConf["SplittingArguments"]["lumis_per_job"] = taskConf["LumisPerJob"] elif taskConf["SplittingAlgo"] == "FileBased": taskConf["SplittingArguments"]["files_per_job"] = taskConf["FilesPerJob"] taskConf["PileupConfig"] = parsePileupConfig(taskConf["MCPileup"], taskConf["DataPileup"]) # Adjust the pileup splitting taskConf["SplittingArguments"].setdefault("deterministicPileup", taskConf['DeterministicPileup']) return
def buildWorkload(self): """ _buildWorkload_ Build a workflow for a MonteCarlo request. This means a production config and merge tasks for each output module. """ self.commonWorkload() prodTask = self.workload.newTask("PrivateMC") self.workload.setWorkQueueSplitPolicy("MonteCarlo", self.analysisJobSplitAlgo, self.analysisJobSplitArgs) self.workload.setEndPolicy("SingleShot") outputMods = self.setupProcessingTask( prodTask, "PrivateMC", None, couchURL=self.couchURL, couchDBName=self.couchDBName, configCacheUrl=self.configCacheUrl, configDoc=self.configCacheID, splitAlgo=self.analysisJobSplitAlgo, splitArgs=self.analysisJobSplitArgs, seeding=self.seeding, totalEvents=self.totalEvents, userSandbox=self.userSandbox, userFiles=self.userFiles) self.setUserOutput(prodTask) # Pileup configuration for the first generation task self.pileupConfig = parsePileupConfig(self.mcPileup, self.dataPileup) # Pile up support if self.pileupConfig: self.setupPileup(prodTask, self.pileupConfig) # setting the parameters which need to be set for all the tasks # sets acquisitionEra, processingVersion, processingString self.workload.setTaskPropertiesFromWorkload() # set the LFN bases (normally done by request manager) # also pass runNumber (workload evaluates it) workload.setLFNBase(self.mergedLFNBase, self.unmergedLFNBase, runNumber=self.runNumber) return self.workload
def __call__(self, workloadName, arguments): """ _call_ Create a MonteCarloFromGEN workload with the given parameters. """ DataProcessing.__call__(self, workloadName, arguments) # Transform the pileup as required by the CMSSW step self.pileupConfig = parsePileupConfig(self.mcPileup, self.dataPileup) # Adjust the pileup splitting self.procJobSplitArgs.setdefault("deterministicPileup", self.deterministicPileup) return self.buildWorkload()
def buildWorkload(self): """ _buildWorkload_ Build a workflow for a MonteCarlo request. This means a production config and merge tasks for each output module. """ self.commonWorkload() prodTask = self.workload.newTask("PrivateMC") self.workload.setWorkQueueSplitPolicy("MonteCarlo", self.analysisJobSplitAlgo, self.analysisJobSplitArgs) self.workload.setEndPolicy("SingleShot") outputMods = self.setupProcessingTask(prodTask, "PrivateMC", None, couchURL = self.couchURL, couchDBName = self.couchDBName, configCacheUrl = self.configCacheUrl, configDoc = self.configCacheID, splitAlgo = self.analysisJobSplitAlgo, splitArgs = self.analysisJobSplitArgs, seeding = self.seeding, totalEvents = self.totalEvents, userSandbox = self.userSandbox, userFiles = self.userFiles) self.setUserOutput(prodTask) # Pileup configuration for the first generation task self.pileupConfig = parsePileupConfig(self.mcPileup, self.dataPileup) # Pile up support if self.pileupConfig: self.setupPileup(prodTask, self.pileupConfig) # setting the parameters which need to be set for all the tasks # sets acquisitionEra, processingVersion, processingString self.workload.setTaskPropertiesFromWorkload() # set the LFN bases (normally done by request manager) # also pass runNumber (workload evaluates it) self.workload.setLFNBase(self.mergedLFNBase, self.unmergedLFNBase, runNumber = self.runNumber) return self.workload
def __call__(self, workloadName, arguments): """ Store the arguments in attributes with the proper formatting. """ StdBase.__call__(self, workloadName, arguments) # Adjust the events by the filter efficiency self.totalEvents = int(self.requestNumEvents / self.filterEfficiency) # We don't write out every event in MC, # adjust the size per event accordingly self.sizePerEvent = self.sizePerEvent * self.filterEfficiency # Tune the splitting, only EventBased is allowed for MonteCarlo # 8h jobs are CMS standard, set the default with that in mind self.prodJobSplitAlgo = "EventBased" self.eventsPerJob, self.eventsPerLumi = StdBase.calcEvtsPerJobLumi( self.eventsPerJob, self.eventsPerLumi, self.timePerEvent) self.prodJobSplitArgs = { "events_per_job": self.eventsPerJob, "events_per_lumi": self.eventsPerLumi, "lheInputFiles": self.lheInputFiles } # Transform the pileup as required by the CMSSW step self.pileupConfig = parsePileupConfig(self.mcPileup, self.dataPileup) # Adjust the pileup splitting self.prodJobSplitArgs.setdefault("deterministicPileup", self.deterministicPileup) # Production can be extending statistics, # need to move the initial lfn counter self.previousJobCount = 0 if self.firstLumi > 1: self.previousJobCount = int( math.ceil((self.firstEvent - 1) / self.eventsPerJob)) self.prodJobSplitArgs[ "initial_lfn_counter"] = self.previousJobCount # Feed values back to save in couch arguments['EventsPerJob'] = self.eventsPerJob return self.buildWorkload()
def __call__(self, workloadName, arguments): """ Store the arguments in attributes with the proper formatting. """ StdBase.__call__(self, workloadName, arguments) # Adjust the events by the filter efficiency self.totalEvents = int(self.requestNumEvents / self.filterEfficiency) # We don't write out every event in MC, # adjust the size per event accordingly self.sizePerEvent = self.sizePerEvent * self.filterEfficiency # Tune the splitting, only EventBased is allowed for MonteCarlo # 8h jobs are CMS standard, set the default with that in mind self.prodJobSplitAlgo = "EventBased" self.eventsPerJob, self.eventsPerLumi = StdBase.calcEvtsPerJobLumi(self.eventsPerJob, self.eventsPerLumi, self.timePerEvent) self.prodJobSplitArgs = {"events_per_job": self.eventsPerJob, "events_per_lumi": self.eventsPerLumi, "lheInputFiles": self.lheInputFiles} # Transform the pileup as required by the CMSSW step self.pileupConfig = parsePileupConfig(self.mcPileup, self.dataPileup) # Adjust the pileup splitting self.prodJobSplitArgs.setdefault("deterministicPileup", self.deterministicPileup) # Production can be extending statistics, # need to move the initial lfn counter self.previousJobCount = 0 if self.firstLumi > 1: self.previousJobCount = int(math.ceil((self.firstEvent - 1) / self.eventsPerJob)) self.prodJobSplitArgs["initial_lfn_counter"] = self.previousJobCount # Feed values back to save in couch arguments['EventsPerJob'] = self.eventsPerJob return self.buildWorkload()
def testInputPileup(self): """ _testInputPileup_ Verify that the input pileup dataset getter/setter methods work correctly """ testTask = makeWMTask("TestTask") self.assertEqual(testTask.getInputPileupDatasets(), []) pileupConfig = parsePileupConfig("/MC/ProcessedDataset/DataTier", "/Data/ProcessedDataset/DataTier") # then mimic the setupPileup method thesePU = [] for puType, puList in pileupConfig.items(): # there should be only one type and one PU dataset testTask.setInputPileupDatasets(puList) thesePU.extend(puList) self.assertItemsEqual(testTask.getInputPileupDatasets(), thesePU) with self.assertRaises(ValueError): testTask.setInputPileupDatasets(None)
def __call__(self, workloadName, arguments): """ Store the arguments in attributes with the proper formatting. """ StdBase.__call__(self, workloadName, arguments) # Adjust the events by the filter efficiency self.totalEvents = int(self.requestNumEvents / self.filterEfficiency) # We don't write out every event in MC, # adjust the size per event accordingly self.sizePerEvent = self.sizePerEvent * self.filterEfficiency # Tune the splitting, only EventBased is allowed for MonteCarlo # 8h jobs are CMS standard, set the default with that in mind self.prodJobSplitAlgo = "EventBased" if self.eventsPerJob is None: self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent) if self.eventsPerLumi is None: self.eventsPerLumi = self.eventsPerJob self.prodJobSplitArgs = { "events_per_job": self.eventsPerJob, "events_per_lumi": self.eventsPerLumi, "lheInputFiles": self.lheInputFiles, } # Transform the pileup as required by the CMSSW step self.pileupConfig = parsePileupConfig(self.mcPileup, self.dataPileup) # Production can be extending statistics, # need to move the initial lfn counter self.previousJobCount = 0 if self.firstLumi > 1: lumisPerJob = int(float(self.eventsPerJob) / self.eventsPerLumi) self.previousJobCount = self.firstLumi / lumisPerJob self.prodJobSplitArgs["initial_lfn_counter"] = self.previousJobCount return self.buildWorkload()
def buildWorkload(self): """ _buildWorkload_ Build a workflow for a MonteCarlo request. This means a production config and merge tasks for each output module. """ self.commonWorkload() prodTask = self.workload.newTask("PrivateMC") self.workload.setWorkQueueSplitPolicy("MonteCarlo", self.analysisJobSplitAlgo, self.analysisJobSplitArgs) self.workload.setEndPolicy("SingleShot") outputMods = self.setupProcessingTask(prodTask, "PrivateMC", None, couchURL = self.couchURL, couchDBName = self.couchDBName, configCacheUrl = self.configCacheUrl, configDoc = self.configCacheID, splitAlgo = self.analysisJobSplitAlgo, splitArgs = self.analysisJobSplitArgs, seeding = self.seeding, totalEvents = self.totalEvents, userSandbox = self.userSandbox, userFiles = self.userFiles) self.setUserOutput(prodTask) # Pileup configuration for the first generation task self.pileupConfig = parsePileupConfig(self.mcPileup, self.dataPileup) # Pile up support if self.pileupConfig: self.setupPileup(prodTask, self.pileupConfig) return self.workload
def setupNextSteps(self, task, origArgs): """ _setupNextSteps_ Modify the step one task to include N more CMSSW steps and chain the output between all three steps. """ configCacheUrl = self.configCacheUrl or self.couchURL stepMapping = {} stepMapping.setdefault(origArgs['Step1']['StepName'], ('Step1', 'cmsRun1')) for i in range(2, self.stepChain + 1): currentStepNumber = "Step%d" % i currentCmsRun = "cmsRun%d" % i stepMapping.setdefault(origArgs[currentStepNumber]['StepName'], (currentStepNumber, currentCmsRun)) taskConf = {} for k, v in origArgs[currentStepNumber].iteritems(): taskConf[k] = v parentStepNumber = stepMapping.get(taskConf['InputStep'])[0] parentCmsRun = stepMapping.get(taskConf['InputStep'])[1] parentCmsswStep = task.getStep(parentCmsRun) parentCmsswStepHelper = parentCmsswStep.getTypeHelper() # Set default values for the task parameters self.modifyTaskConfiguration(taskConf, False, 'InputDataset' not in taskConf) globalTag = taskConf.get("GlobalTag", self.globalTag) frameworkVersion = taskConf.get("CMSSWVersion", self.frameworkVersion) scramArch = taskConf.get("ScramArch", self.scramArch) childCmssw = parentCmsswStep.addTopStep(currentCmsRun) childCmssw.setStepType("CMSSW") template = StepFactory.getStepTemplate("CMSSW") template(childCmssw.data) childCmsswStepHelper = childCmssw.getTypeHelper() childCmsswStepHelper.setGlobalTag(globalTag) childCmsswStepHelper.setupChainedProcessing( parentCmsRun, taskConf['InputFromOutputModule']) childCmsswStepHelper.cmsswSetup(frameworkVersion, softwareEnvironment="", scramArch=scramArch) childCmsswStepHelper.setConfigCache(configCacheUrl, taskConf['ConfigCacheID'], self.couchDBName) # Pileup check taskConf["PileupConfig"] = parsePileupConfig( taskConf["MCPileup"], taskConf["DataPileup"]) if taskConf["PileupConfig"]: self.setupPileup(task, taskConf['PileupConfig']) # Handling the output modules parentKeepOutput = strToBool(origArgs[parentStepNumber].get( 'KeepOutput', True)) parentCmsswStepHelper.keepOutput(parentKeepOutput) childKeepOutput = strToBool(taskConf.get('KeepOutput', True)) childCmsswStepHelper.keepOutput(childKeepOutput) self.setupOutputModules(task, taskConf["ConfigCacheID"], currentCmsRun, childKeepOutput, taskConf['StepName']) # Closing out the task configuration. The last step output must be saved/merged childCmsswStepHelper.keepOutput(True) return
def setupNextSteps(self, task, origArgs): """ _setupNextSteps_ Modify the step one task to include N more CMSSW steps and chain the output between all three steps. """ configCacheUrl = self.configCacheUrl or self.couchURL for i in range(2, self.stepChain + 1): inputStepName = "cmsRun%d" % (i-1) parentCmsswStep = task.getStep(inputStepName) parentCmsswStepHelper = parentCmsswStep.getTypeHelper() parentCmsswStepHelper.keepOutput(False) currentStepName = "cmsRun%d" % i taskConf = {} for k, v in origArgs["Step%d" % i].iteritems(): taskConf[k] = v # Set default values to task parameters self.modifyTaskConfiguration(taskConf, False, 'InputDataset' not in taskConf) globalTag = taskConf.get("GlobalTag", self.globalTag) childCmssw = parentCmsswStep.addTopStep(currentStepName) childCmssw.setStepType("CMSSW") template = StepFactory.getStepTemplate("CMSSW") template(childCmssw.data) childCmsswHelper = childCmssw.getTypeHelper() childCmsswHelper.setGlobalTag(globalTag) childCmsswHelper.setupChainedProcessing(inputStepName, taskConf['InputFromOutputModule']) # Assuming we cannot change the CMSSW version inside the same job childCmsswHelper.cmsswSetup(self.frameworkVersion, softwareEnvironment="", scramArch=self.scramArch) childCmsswHelper.setConfigCache(configCacheUrl, taskConf['ConfigCacheID'], self.couchDBName) childCmsswHelper.keepOutput(False) # Pileup check taskConf["PileupConfig"] = parsePileupConfig(taskConf["MCPileup"], taskConf["DataPileup"]) if taskConf["PileupConfig"]: self.setupPileup(task, taskConf['PileupConfig']) # Handling the output modules outputMods = {} configOutput = self.determineOutputModules(configDoc=taskConf['ConfigCacheID'], couchURL=configCacheUrl, couchDBName=self.couchDBName) for outputModuleName in configOutput.keys(): outputModule = self.addOutputModule(task, outputModuleName, self.inputPrimaryDataset, configOutput[outputModuleName]["dataTier"], configOutput[outputModuleName]["filterName"], stepName=currentStepName) outputMods[outputModuleName] = outputModule # Closing out the task configuration # Only the last step output is important :-) childCmsswHelper.keepOutput(True) self.addMergeTasks(task, currentStepName, outputMods) # Override task parameters by the workload ones in case of their absence self.updateCommonParams(task, taskConf) return
def setupNextSteps(self, task, origArgs): """ _setupNextSteps_ Modify the step one task to include N more CMSSW steps and chain the output between all three steps. """ self.stepParentageMapping.setdefault(origArgs['Step1']['StepName'], {}) for i in range(2, self.stepChain + 1): currentStepNumber = "Step%d" % i currentCmsRun = "cmsRun%d" % i taskConf = {} for k, v in viewitems(origArgs[currentStepNumber]): taskConf[k] = v parentStepNumber = self.stepMapping.get(taskConf['InputStep'])[0] parentCmsRun = self.stepMapping.get(taskConf['InputStep'])[1] parentCmsswStep = task.getStep(parentCmsRun) parentCmsswStepHelper = parentCmsswStep.getTypeHelper() # Set default values for the task parameters self.modifyTaskConfiguration(taskConf, False, 'InputDataset' not in taskConf) globalTag = self.getStepValue('GlobalTag', taskConf, self.globalTag) frameworkVersion = self.getStepValue('CMSSWVersion', taskConf, self.frameworkVersion) scramArch = self.getStepValue('ScramArch', taskConf, self.scramArch) prepId = self.getStepValue('PrepID', taskConf, self.prepID) currentCmssw = parentCmsswStep.addTopStep(currentCmsRun) currentCmssw.setStepType("CMSSW") template = StepFactory.getStepTemplate("CMSSW") template(currentCmssw.data) currentCmsswStepHelper = currentCmssw.getTypeHelper() currentCmsswStepHelper.setPrepId(prepId) currentCmsswStepHelper.setGlobalTag(globalTag) currentCmsswStepHelper.setupChainedProcessing( parentCmsRun, taskConf['InputFromOutputModule']) currentCmsswStepHelper.cmsswSetup(frameworkVersion, softwareEnvironment="", scramArch=scramArch) currentCmsswStepHelper.setConfigCache(self.configCacheUrl, taskConf['ConfigCacheID'], self.couchDBName) # multicore settings multicore = self.multicore eventStreams = self.eventStreams if taskConf['Multicore'] > 0: multicore = taskConf['Multicore'] if taskConf.get("EventStreams" ) is not None and taskConf['EventStreams'] >= 0: eventStreams = taskConf['EventStreams'] currentCmsswStepHelper.setNumberOfCores(multicore, eventStreams) # Pileup check taskConf["PileupConfig"] = parsePileupConfig( taskConf["MCPileup"], taskConf["DataPileup"]) if taskConf["PileupConfig"]: self.setupPileup(task, taskConf['PileupConfig'], stepName=currentCmsRun) # Handling the output modules in order to decide whether we should # stage them out and report them in the Report.pkl file parentKeepOutput = strToBool(origArgs[parentStepNumber].get( 'KeepOutput', True)) parentCmsswStepHelper.keepOutput(parentKeepOutput) childKeepOutput = strToBool(taskConf.get('KeepOutput', True)) currentCmsswStepHelper.keepOutput(childKeepOutput) self.setupOutputModules(task, taskConf, currentCmsRun, childKeepOutput) # Closing out the task configuration. The last step output must be saved/merged currentCmsswStepHelper.keepOutput(True) return
def setupNextSteps(self, task, origArgs): """ _setupNextSteps_ Modify the step one task to include N more CMSSW steps and chain the output between all three steps. """ configCacheUrl = self.configCacheUrl or self.couchURL for i in range(2, self.stepChain + 1): inputStepName = "cmsRun%d" % (i - 1) parentCmsswStep = task.getStep(inputStepName) parentCmsswStepHelper = parentCmsswStep.getTypeHelper() parentCmsswStepHelper.keepOutput(False) currentStepName = "cmsRun%d" % i taskConf = {} for k, v in origArgs["Step%d" % i].iteritems(): taskConf[k] = v # Set default values to task parameters self.modifyTaskConfiguration(taskConf, False, 'InputDataset' not in taskConf) globalTag = taskConf.get("GlobalTag", self.globalTag) frameworkVersion = taskConf.get("CMSSWVersion", self.frameworkVersion) scramArch = taskConf.get("ScramArch", self.scramArch) childCmssw = parentCmsswStep.addTopStep(currentStepName) childCmssw.setStepType("CMSSW") template = StepFactory.getStepTemplate("CMSSW") template(childCmssw.data) childCmsswHelper = childCmssw.getTypeHelper() childCmsswHelper.setGlobalTag(globalTag) childCmsswHelper.setupChainedProcessing( inputStepName, taskConf['InputFromOutputModule']) # Assuming we cannot change the CMSSW version inside the same job childCmsswHelper.cmsswSetup(frameworkVersion, softwareEnvironment="", scramArch=scramArch) childCmsswHelper.setConfigCache(configCacheUrl, taskConf['ConfigCacheID'], self.couchDBName) childCmsswHelper.keepOutput(False) # Pileup check taskConf["PileupConfig"] = parsePileupConfig( taskConf["MCPileup"], taskConf["DataPileup"]) if taskConf["PileupConfig"]: self.setupPileup(task, taskConf['PileupConfig']) # Handling the output modules outputMods = {} configOutput = self.determineOutputModules( configDoc=taskConf['ConfigCacheID'], couchURL=configCacheUrl, couchDBName=self.couchDBName) for outputModuleName in configOutput.keys(): outputModule = self.addOutputModule( task, outputModuleName, self.inputPrimaryDataset, configOutput[outputModuleName]["dataTier"], configOutput[outputModuleName]["filterName"], stepName=currentStepName) outputMods[outputModuleName] = outputModule # Closing out the task configuration # Only the last step output is important :-) childCmsswHelper.keepOutput(True) self.addMergeTasks(task, currentStepName, outputMods) return
def setupNextSteps(self, task, origArgs): """ _setupNextSteps_ Modify the step one task to include N more CMSSW steps and chain the output between all three steps. """ self.stepParentageMapping.setdefault(origArgs['Step1']['StepName'], {}) for i in range(2, self.stepChain + 1): currentStepNumber = "Step%d" % i currentCmsRun = "cmsRun%d" % i taskConf = {} for k, v in origArgs[currentStepNumber].items(): taskConf[k] = v parentStepNumber = self.stepMapping.get(taskConf['InputStep'])[0] parentCmsRun = self.stepMapping.get(taskConf['InputStep'])[1] parentCmsswStep = task.getStep(parentCmsRun) parentCmsswStepHelper = parentCmsswStep.getTypeHelper() # Set default values for the task parameters self.modifyTaskConfiguration(taskConf, False, 'InputDataset' not in taskConf) globalTag = self.getStepValue('GlobalTag', taskConf, self.globalTag) frameworkVersion = self.getStepValue('CMSSWVersion', taskConf, self.frameworkVersion) scramArch = self.getStepValue('ScramArch', taskConf, self.scramArch) currentCmssw = parentCmsswStep.addTopStep(currentCmsRun) currentCmssw.setStepType("CMSSW") template = StepFactory.getStepTemplate("CMSSW") template(currentCmssw.data) currentCmsswStepHelper = currentCmssw.getTypeHelper() currentCmsswStepHelper.setGlobalTag(globalTag) currentCmsswStepHelper.setupChainedProcessing(parentCmsRun, taskConf['InputFromOutputModule']) currentCmsswStepHelper.cmsswSetup(frameworkVersion, softwareEnvironment="", scramArch=scramArch) currentCmsswStepHelper.setConfigCache(self.configCacheUrl, taskConf['ConfigCacheID'], self.couchDBName) # multicore settings multicore = self.multicore eventStreams = self.eventStreams if taskConf['Multicore'] > 0: multicore = taskConf['Multicore'] if taskConf.get('EventStreams') >= 0: eventStreams = taskConf['EventStreams'] currentCmsswStepHelper.setNumberOfCores(multicore, eventStreams) # Pileup check taskConf["PileupConfig"] = parsePileupConfig(taskConf["MCPileup"], taskConf["DataPileup"]) if taskConf["PileupConfig"]: self.setupPileup(task, taskConf['PileupConfig'], stepName=currentCmsRun) # Handling the output modules in order to decide whether we should # stage them out and report them in the Report.pkl file parentKeepOutput = strToBool(origArgs[parentStepNumber].get('KeepOutput', True)) parentCmsswStepHelper.keepOutput(parentKeepOutput) childKeepOutput = strToBool(taskConf.get('KeepOutput', True)) currentCmsswStepHelper.keepOutput(childKeepOutput) self.setupOutputModules(task, taskConf, currentCmsRun, childKeepOutput) # Closing out the task configuration. The last step output must be saved/merged currentCmsswStepHelper.keepOutput(True) return