def __call__(self, workloadName, arguments): """ _call_ Create a ReReco workload with the given parameters. """ DataProcessing.__call__(self, workloadName, arguments) # Arrange the skims in a skimConfig object (i.e. a list of skim configurations) self.skimConfigs = [] skimIndex = 1 while "SkimName%s" % skimIndex in arguments: skimConfig = {} skimConfig["SkimName"] = arguments["SkimName%s" % skimIndex] skimConfig["SkimInput"] = arguments["SkimInput%s" % skimIndex] skimConfig["ConfigCacheID"] = arguments["Skim%sConfigCacheID" % skimIndex] skimConfig["TimePerEvent"] = float(arguments.get("SkimTimePerEvent%s" % skimIndex, self.timePerEvent)) skimConfig["SizePerEvent"] = float(arguments.get("SkimSizePerEvent%s" % skimIndex, self.sizePerEvent)) skimConfig["Memory"] = float(arguments.get("SkimMemory%s" % skimIndex, self.memory)) skimConfig["SkimJobSplitAlgo"] = arguments.get("SkimSplittingAlgo%s" % skimIndex, "FileBased") skimConfig["SkimJobSplitArgs"] = {"include_parents" : True} if skimConfig["SkimJobSplitAlgo"] == "FileBased": skimConfig["SkimJobSplitArgs"]["files_per_job"] = int(arguments.get("SkimFilesPerJob%s" % skimIndex, 1)) elif skimConfig["SkimJobSplitAlgo"] == "EventBased" or skimConfig["SkimJobSplitAlgo"] == "EventAwareLumiBased": skimConfig["SkimJobSplitArgs"]["events_per_job"] = int(arguments.get("SkimEventsPerJob%s" % skimIndex, int((8.0 * 3600.0) / skimConfig["TimePerEvent"]))) if skimConfig["SkimJobSplitAlgo"] == "EventAwareLumiBased": skimConfig["SkimJobSplitAlgo"]["max_events_per_lumi"] = 20000 elif skimConfig["SkimJobSplitAlgo"] == "LumiBased": skimConfig["SkimJobSplitArgs"["lumis_per_job"]] = int(arguments.get("SkimLumisPerJob%s" % skimIndex, 8)) self.skimConfigs.append(skimConfig) skimIndex += 1 return self.buildWorkload()
def __call__(self, workloadName, arguments): """ _call_ Create a DQMHarvest workload with the given parameters. """ DataProcessing.__call__(self, workloadName, arguments) self.workload = self.createWorkload() self.workload.setDashboardActivity("harvesting") splitArgs = {"runs_per_job": 1} if self.dqmHarvestUnit == "multiRun": # then it should result in a single job in the end, very high number of runs splitArgs['runs_per_job'] = 999999 self.workload.setWorkQueueSplitPolicy("Dataset", "Harvest", splitArgs) # also creates the logCollect job by default self.addDQMHarvestTask( uploadProxy=self.dqmUploadProxy, periodic_harvest_interval=self.periodicHarvestInterval, dqmHarvestUnit=self.dqmHarvestUnit) # setting the parameters which need to be set for all the tasks # sets acquisitionEra, processingVersion, processingString self.workload.setTaskPropertiesFromWorkload() self.reportWorkflowToDashboard(self.workload.getDashboardActivity()) return self.workload
def __call__(self, workloadName, arguments): """ _call_ Create a ReDigi workload with the given parameters. """ DataProcessing.__call__(self, workloadName, arguments) # Transform the pileup as required by the CMSSW step self.pileupConfig = parsePileupConfig(self.mcPileup, self.dataPileup) # Adjust the pileup splitting self.procJobSplitArgs.setdefault("deterministicPileup", self.deterministicPileup) # Adjust the sizePerEvent, timePerEvent and memory for step two and three if self.stepTwoTimePerEvent is None: self.stepTwoTimePerEvent = self.timePerEvent if self.stepTwoSizePerEvent is None: self.stepTwoSizePerEvent = self.sizePerEvent if self.stepTwoMemory is None: self.stepTwoMemory = self.memory if self.stepThreeTimePerEvent is None: self.stepThreeTimePerEvent = self.timePerEvent if self.stepThreeSizePerEvent is None: self.stepThreeSizePerEvent = self.sizePerEvent if self.stepThreeMemory is None: self.stepThreeMemory = self.memory return self.buildWorkload()
def __call__(self, workloadName, arguments): """ _call_ Create a DQMHarvest workload with the given parameters. """ DataProcessing.__call__(self, workloadName, arguments) self.workload = self.createWorkload() self.workload.setDashboardActivity("harvesting") splitArgs = {"runs_per_job": 1} if self.dqmHarvestUnit == "multiRun": # then it should result in a single job in the end, very high number of runs splitArgs['runs_per_job'] = 999999 self.workload.setWorkQueueSplitPolicy("Dataset", "Harvest", splitArgs) # also creates the logCollect job by default self.addDQMHarvestTask(uploadProxy=self.dqmUploadProxy, periodic_harvest_interval=self.periodicHarvestInterval, dqmHarvestUnit=self.dqmHarvestUnit) # setting the parameters which need to be set for all the tasks # sets acquisitionEra, processingVersion, processingString self.workload.setTaskPropertiesFromWorkload() self.reportWorkflowToDashboard(self.workload.getDashboardActivity()) return self.workload
def __call__(self, workloadName, arguments): """ _call_ Create a MonteCarloFromGEN workload with the given parameters. """ DataProcessing.__call__(self, workloadName, arguments) return self.buildWorkload()
def __call__(self, workloadName, arguments): """ _call_ Create a MonteCarloFromGEN workload with the given parameters. """ DataProcessing.__call__(self, workloadName, arguments) # Transform the pileup as required by the CMSSW step self.pileupConfig = parsePileupConfig(self.mcPileup, self.dataPileup) # Adjust the pileup splitting self.procJobSplitArgs.setdefault("deterministicPileup", self.deterministicPileup) return self.buildWorkload()
def __call__(self, workloadName, arguments): """ _call_ Create a ReReco workload with the given parameters. """ DataProcessing.__call__(self, workloadName, arguments) # These are mostly place holders because the job splitting algo and # parameters will be updated after the workflow has been created. self.procJobSplitArgs = {} if self.procJobSplitAlgo in ["EventBased", "EventAwareLumiBased"]: if self.eventsPerJob is None: self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent) self.procJobSplitArgs["events_per_job"] = self.eventsPerJob if self.procJobSplitAlgo == "EventAwareLumiBased": self.procJobSplitArgs[ "job_time_limit"] = 96 * 3600 # 4 days in seconds self.procJobSplitArgs["allowCreationFailure"] = False elif self.procJobSplitAlgo == "LumiBased": self.procJobSplitArgs["lumis_per_job"] = self.lumisPerJob elif self.procJobSplitAlgo == "FileBased": self.procJobSplitArgs["files_per_job"] = self.filesPerJob self.skimJobSplitArgs = {} if self.skimJobSplitAlgo in ["EventBased", "EventAwareLumiBased"]: if self.eventsPerJob is None: self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent) if self.skimJobSplitAlgo == "EventAwareLumiBased": self.skimJobSplitArgs["job_time_limit"] = 48 * 3600 # 2 days self.skimJobSplitArgs["allowCreationFailure"] = False self.skimJobSplitArgs["events_per_job"] = self.eventsPerJob elif self.skimJobSplitAlgo == "LumiBased": self.skimJobSplitArgs["lumis_per_job"] = self.lumisPerJob elif self.skimJobSplitAlgo == "FileBased": self.skimJobSplitArgs["files_per_job"] = self.filesPerJob self.skimJobSplitArgs = arguments.get("SkimJobSplitArgs", { "files_per_job": 1, "include_parents": True }) return self.buildWorkload()
def __call__(self, workloadName, arguments): """ _call_ Create a ReReco workload with the given parameters. """ DataProcessing.__call__(self, workloadName, arguments) # These are mostly place holders because the job splitting algo and # parameters will be updated after the workflow has been created. self.procJobSplitArgs = {} if self.procJobSplitAlgo in ["EventBased", "EventAwareLumiBased"]: if self.eventsPerJob is None: self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent) self.procJobSplitArgs["events_per_job"] = self.eventsPerJob if self.procJobSplitAlgo == "EventAwareLumiBased": self.procJobSplitArgs["job_time_limit"] = 96 * 3600 # 4 days in seconds elif self.procJobSplitAlgo == "LumiBased": self.procJobSplitArgs["lumis_per_job"] = self.lumisPerJob elif self.procJobSplitAlgo == "FileBased": self.procJobSplitArgs["files_per_job"] = self.filesPerJob self.skimJobSplitArgs = {} if self.skimJobSplitAlgo in ["EventBased", "EventAwareLumiBased"]: if self.eventsPerJob is None: self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent) if self.skimJobSplitAlgo == "EventAwareLumiBased": self.skimJobSplitArgs["job_time_limit"] = 48 * 3600 # 2 days self.skimJobSplitArgs["events_per_job"] = self.eventsPerJob elif self.skimJobSplitAlgo == "LumiBased": self.skimJobSplitArgs["lumis_per_job"] = self.lumisPerJob elif self.skimJobSplitAlgo == "FileBased": self.skimJobSplitArgs["files_per_job"] = self.filesPerJob self.skimJobSplitArgs = arguments.get("SkimJobSplitArgs", {"files_per_job": 1, "include_parents": True}) return self.buildWorkload()
def __call__(self, workloadName, arguments): """ _call_ Create a ReReco workload with the given parameters. """ DataProcessing.__call__(self, workloadName, arguments) # These are mostly place holders because the job splitting algo and # parameters will be updated after the workflow has been created. self.procJobSplitArgs = {} if self.procJobSplitAlgo == "EventBased" or self.procJobSplitAlgo == "EventAwareLumiBased": if self.eventsPerJob is None: self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent) self.procJobSplitArgs["events_per_job"] = self.eventsPerJob if self.procJobSplitAlgo == "EventAwareLumiBased": self.procJobSplitArgs["max_events_per_lumi"] = 100000 elif self.procJobSplitAlgo == "LumiBased": self.procJobSplitArgs["lumis_per_job"] = self.lumisPerJob elif self.procJobSplitAlgo == "FileBased": self.procJobSplitArgs["files_per_job"] = self.filesPerJob self.skimJobSplitArgs = {} if self.skimJobSplitAlgo == "EventBased" or self.skimJobSplitAlgo == "EventAwareLumiBased": if self.eventsPerJob is None: self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent) self.skimJobSplitArgs["events_per_job"] = self.eventsPerJob if self.skimJobSplitAlgo == "EventAwareLumiBased": self.skimJobSplitArgs["max_events_per_lumi"] = 20000 elif self.skimJobSplitAlgo == "LumiBased": self.skimJobSplitArgs["lumis_per_job"] = self.lumisPerJob elif self.skimJobSplitAlgo == "FileBased": self.skimJobSplitArgs["files_per_job"] = self.filesPerJob self.skimJobSplitArgs = arguments.get("SkimJobSplitArgs", { "files_per_job": 1, "include_parents": True }) return self.buildWorkload()