def determineOutputModules(self, scenarioFunc = None, scenarioArgs = None, configDoc = None, couchURL = None, couchDBName = None): """ _determineOutputModules_ Determine the output module names and associated metadata for the given config. """ outputModules = {} if configDoc != None and configDoc != "": configCache = ConfigCache(couchURL, couchDBName) configCache.loadByID(configDoc) outputModules = configCache.getOutputModuleInfo() else: if scenarioFunc in [ "promptReco", "expressProcessing" ]: for output in scenarioArgs.get('outputs',[]): dataTier = output['dataTier'] moduleLabel = output['moduleLabel'] filterName = output.get('filterName', None) outputModules[moduleLabel] = {'dataTier' : dataTier, 'filterName' : filterName} elif scenarioFunc == "alcaSkim": for alcaSkim in scenarioArgs.get('skims',[]): dataTier = "ALCARECO" moduleLabel = "ALCARECOStream%s" % alcaSkim filterName = alcaSkim outputModules[moduleLabel] = {'dataTier' : dataTier, 'filterName' : filterName} return outputModules
def determineOutputModules(self, scenarioFunc=None, scenarioArgs=None, configDoc=None, couchURL=None, couchDBName=None, configCacheUrl=None): """ _determineOutputModules_ Determine the output module names and associated metadata for the given config. """ # set default scenarioArgs to empty dictionary if it is None. scenarioArgs = scenarioArgs or {} outputModules = {} if configDoc != None and configDoc != "": url = configCacheUrl or couchURL configCache = ConfigCache(url, couchDBName) configCache.loadByID(configDoc) outputModules = configCache.getOutputModuleInfo() else: if 'outputs' in scenarioArgs and scenarioFunc in [ "promptReco", "expressProcessing", "repack" ]: for output in scenarioArgs.get('outputs', []): moduleLabel = output['moduleLabel'] outputModules[moduleLabel] = { 'dataTier': output['dataTier'] } if output.has_key('primaryDataset'): outputModules[moduleLabel]['primaryDataset'] = output[ 'primaryDataset'] if output.has_key('filterName'): outputModules[moduleLabel]['filterName'] = output[ 'filterName'] elif 'writeTiers' in scenarioArgs and scenarioFunc == "promptReco": for dataTier in scenarioArgs.get('writeTiers'): moduleLabel = "%soutput" % dataTier outputModules[moduleLabel] = {'dataTier': dataTier} elif scenarioFunc == "alcaSkim": for alcaSkim in scenarioArgs.get('skims', []): moduleLabel = "ALCARECOStream%s" % alcaSkim if alcaSkim.startswith("PromptCalibProd"): dataTier = "ALCAPROMPT" else: dataTier = "ALCARECO" outputModules[moduleLabel] = { 'dataTier': dataTier, 'primaryDataset': scenarioArgs.get('primaryDataset'), 'filterName': alcaSkim } return outputModules
def validateConfigCacheExists(self, configID, couchURL, couchDBName, getOutputModules=False): """ _validateConfigCacheExists_ If we have a configCache, we should probably try and load it. """ if configID == '' or configID == ' ': self.raiseValidationException( msg="ConfigCacheID is invalid and cannot be loaded") configCache = ConfigCache(dbURL=couchURL, couchDBName=couchDBName, id=configID) try: configCache.loadByID(configID=configID) except ConfigCacheException: self.raiseValidationException( msg="Failure to load ConfigCache while validating workload") duplicateCheck = {} try: outputModuleInfo = configCache.getOutputModuleInfo() except Exception: # Something's gone wrong with trying to open the configCache msg = "Error in getting output modules from ConfigCache during workload validation. Check ConfigCache formatting!" self.raiseValidationException(msg=msg) for outputModule in outputModuleInfo.values(): dataTier = outputModule.get('dataTier', None) filterName = outputModule.get('filterName', None) if not dataTier: self.raiseValidationException( msg="No DataTier in output module.") # Add dataTier to duplicate dictionary if not dataTier in duplicateCheck.keys(): duplicateCheck[dataTier] = [] if filterName in duplicateCheck[dataTier]: # Then we've seen this combination before self.raiseValidationException( msg="Duplicate dataTier/filterName combination.") else: duplicateCheck[dataTier].append(filterName) if getOutputModules: return outputModuleInfo return
def determineOutputModules(self, scenarioFunc = None, scenarioArgs = None, configDoc = None, couchURL = None, couchDBName = None, configCacheUrl = None): """ _determineOutputModules_ Determine the output module names and associated metadata for the given config. """ # set default scenarioArgs to empty dictionary if it is None. scenarioArgs = scenarioArgs or {} outputModules = {} if configDoc != None and configDoc != "": url = configCacheUrl or couchURL if (url, couchDBName) in self.config_cache: configCache = self.config_cache[(url, couchDBName)] else: configCache = ConfigCache(url, couchDBName, True) self.config_cache[(url, couchDBName)] = configCache #TODO: need to change to DataCache #configCache.loadDocument(configDoc) configCache.loadByID(configDoc) outputModules = configCache.getOutputModuleInfo() else: if 'outputs' in scenarioArgs and scenarioFunc in [ "promptReco", "expressProcessing", "repack" ]: for output in scenarioArgs.get('outputs', []): moduleLabel = output['moduleLabel'] outputModules[moduleLabel] = { 'dataTier' : output['dataTier'] } if 'primaryDataset' in output: outputModules[moduleLabel]['primaryDataset'] = output['primaryDataset'] if 'filterName' in output: outputModules[moduleLabel]['filterName'] = output['filterName'] elif 'writeTiers' in scenarioArgs and scenarioFunc == "promptReco": for dataTier in scenarioArgs.get('writeTiers'): moduleLabel = "%soutput" % dataTier outputModules[moduleLabel] = { 'dataTier' : dataTier } elif scenarioFunc == "alcaSkim": for alcaSkim in scenarioArgs.get('skims',[]): moduleLabel = "ALCARECOStream%s" % alcaSkim if alcaSkim.startswith("PromptCalibProd"): dataTier = "ALCAPROMPT" else: dataTier = "ALCARECO" outputModules[moduleLabel] = { 'dataTier' : dataTier, 'primaryDataset' : scenarioArgs.get('primaryDataset'), 'filterName' : alcaSkim } return outputModules
def validateConfigCacheExists(self, configID, couchURL, couchDBName, getOutputModules = False): """ _validateConfigCacheExists_ If we have a configCache, we should probably try and load it. """ if configID == '' or configID == ' ': self.raiseValidationException(msg = "ConfigCacheID is invalid and cannot be loaded") configCache = ConfigCache(dbURL = couchURL, couchDBName = couchDBName, id = configID) try: configCache.loadByID(configID = configID) except ConfigCacheException: self.raiseValidationException(msg = "Failure to load ConfigCache while validating workload") duplicateCheck = {} try: outputModuleInfo = configCache.getOutputModuleInfo() except Exception: # Something's gone wrong with trying to open the configCache msg = "Error in getting output modules from ConfigCache during workload validation. Check ConfigCache formatting!" self.raiseValidationException(msg = msg) for outputModule in outputModuleInfo.values(): dataTier = outputModule.get('dataTier', None) filterName = outputModule.get('filterName', None) if not dataTier: self.raiseValidationException(msg = "No DataTier in output module.") # Add dataTier to duplicate dictionary if not dataTier in duplicateCheck.keys(): duplicateCheck[dataTier] = [] if filterName in duplicateCheck[dataTier]: # Then we've seen this combination before self.raiseValidationException(msg = "Duplicate dataTier/filterName combination.") else: duplicateCheck[dataTier].append(filterName) if getOutputModules: return outputModuleInfo return
def determineOutputModules(self, scenarioFunc = None, scenarioArgs = None, configDoc = None, couchURL = None, couchDBName = None, configCacheUrl = None): """ _determineOutputModules_ Determine the output module names and associated metadata for the given config. """ outputModules = {} if configDoc != None and configDoc != "": url = configCacheUrl or couchURL configCache = ConfigCache(url, couchDBName) configCache.loadByID(configDoc) outputModules = configCache.getOutputModuleInfo() else: if 'outputs' in scenarioArgs and scenarioFunc in [ "promptReco", "expressProcessing", "repack" ]: for output in scenarioArgs.get('outputs', []): moduleLabel = output['moduleLabel'] outputModules[moduleLabel] = { 'dataTier' : output['dataTier'] } if output.has_key('primaryDataset'): outputModules[moduleLabel]['primaryDataset'] = output['primaryDataset'] if output.has_key('filterName'): outputModules[moduleLabel]['filterName'] = output['filterName'] elif 'writeTiers' in scenarioArgs and scenarioFunc == "promptReco": for dataTier in scenarioArgs.get('writeTiers'): moduleLabel = "%soutput" % dataTier outputModules[moduleLabel] = { 'dataTier' : dataTier } elif scenarioFunc == "alcaSkim": for alcaSkim in scenarioArgs.get('skims',[]): moduleLabel = "ALCARECOStream%s" % alcaSkim if alcaSkim == "PromptCalibProd": dataTier = "ALCAPROMPT" else: dataTier = "ALCARECO" outputModules[moduleLabel] = { 'dataTier' : dataTier, 'primaryDataset' : scenarioArgs.get('primaryDataset'), 'filterName' : alcaSkim } return outputModules
def determineOutputModules(self, scenarioName = None, scenarioArgs = None, configDoc = None, couchURL = None, couchDBName = None): """ _determineOutputModules_ Determine the output module names and associated metadata for the given config. """ outputModules = {} if configDoc != None and configDoc != "": configCache = ConfigCache(couchURL, couchDBName) configCache.loadByID(configDoc) outputModules = configCache.getOutputModuleInfo() else: for dataTier in scenarioArgs.get("writeTiers",[]): outputModuleName = "output%s%s" % (dataTier, dataTier) outputModules[outputModuleName] = {"dataTier": dataTier, "filterName": None} return outputModules
def determineOutputModules( self, scenarioFunc=None, scenarioArgs=None, configDoc=None, couchURL=None, couchDBName=None ): """ _determineOutputModules_ Determine the output module names and associated metadata for the given config. """ outputModules = {} if configDoc != None and configDoc != "": configCache = ConfigCache(couchURL, couchDBName) configCache.loadByID(configDoc) outputModules = configCache.getOutputModuleInfo() else: if "outputs" in scenarioArgs and scenarioFunc in ["promptReco", "expressProcessing", "repack"]: for output in scenarioArgs.get("outputs", []): moduleLabel = output["moduleLabel"] outputModules[moduleLabel] = {"dataTier": output["dataTier"]} if output.has_key("primaryDataset"): outputModules[moduleLabel]["primaryDataset"] = output["primaryDataset"] if output.has_key("filterName"): outputModules[moduleLabel]["filterName"] = output["filterName"] elif "writeTiers" in scenarioArgs and scenarioFunc == "promptReco": for dataTier in scenarioArgs.get("writeTiers"): moduleLabel = "%soutput" % dataTier outputModules[moduleLabel] = {"dataTier": dataTier} elif scenarioFunc == "alcaSkim": for alcaSkim in scenarioArgs.get("skims", []): moduleLabel = "ALCARECOStream%s" % alcaSkim outputModules[moduleLabel] = { "dataTier": "ALCARECO", "primaryDataset": scenarioArgs.get("primaryDataset"), "filterName": alcaSkim, } return outputModules
def determineOutputModules( self, scenarioFunc=None, scenarioArgs=None, configDoc=None, couchURL=None, couchDBName=None, configCacheUrl=None ): """ _determineOutputModules_ Determine the output module names and associated metadata for the given config. """ # set default scenarioArgs to empty dictionary if it is None. scenarioArgs = scenarioArgs or {} outputModules = {} if configDoc != None and configDoc != "": url = configCacheUrl or couchURL if (url, couchDBName) in self.config_cache: configCache = self.config_cache[(url, couchDBName)] else: configCache = ConfigCache(url, couchDBName, True) self.config_cache[(url, couchDBName)] = configCache # TODO: need to change to DataCache # configCache.loadDocument(configDoc) configCache.loadByID(configDoc) outputModules = configCache.getOutputModuleInfo() else: if "outputs" in scenarioArgs and scenarioFunc in ["promptReco", "expressProcessing", "repack"]: for output in scenarioArgs.get("outputs", []): moduleLabel = output["moduleLabel"] outputModules[moduleLabel] = {"dataTier": output["dataTier"]} if "primaryDataset" in output: outputModules[moduleLabel]["primaryDataset"] = output["primaryDataset"] if "filterName" in output: outputModules[moduleLabel]["filterName"] = output["filterName"] for physicsSkim in scenarioArgs.get("PhysicsSkims", []): skimToDataTier = { "LogError": "RAW-RECO", "LogErrorMonitor": "USER", "ZElectron": "RAW-RECO", "ZMu": "RAW-RECO", "MuTau": "RAW-RECO", "TopMuEG": "RAW-RECO", "EcalActivity": "RAW-RECO", "CosmicSP": "RAW-RECO", "CosmicTP": "RAW-RECO", "ZMM": "RAW-RECO", "Onia": "RECO", "HighPtJet": "RAW-RECO", "D0Meson": "RECO", "Photon": "AOD", "ZEE": "AOD", "BJet": "AOD", "OniaCentral": "RECO", "OniaPeripheral": "RECO", "SingleTrack": "AOD", "MinBias": "AOD", "OniaUPC": "RAW-RECO", "HighMET": "RECO", "BPHSkim": "USER", } dataTier = skimToDataTier.get(physicsSkim, "USER") moduleLabel = "SKIMStream%s" % physicsSkim outputModules[moduleLabel] = {"dataTier": dataTier, "filterName": physicsSkim} elif scenarioFunc == "alcaSkim": for alcaSkim in scenarioArgs.get("skims", []): moduleLabel = "ALCARECOStream%s" % alcaSkim if alcaSkim.startswith("PromptCalibProd"): dataTier = "ALCAPROMPT" else: dataTier = "ALCARECO" outputModules[moduleLabel] = { "dataTier": dataTier, "primaryDataset": scenarioArgs.get("primaryDataset"), "filterName": alcaSkim, } return outputModules